maoliangran / rpms / kernel

Forked from rpms/kernel 4 months ago
Clone
Blob Blame History Raw
From d5f9669c68f4d484fd9e035cff5f909413b10ccb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:07 +0100
Subject: [PATCH 001/100] crypto: lib - tidy up lib/crypto Kconfig and Makefile
commit 746b2e024c67aa605ac12d135cd7085a49cf9dc4 upstream.
In preparation of introducing a set of crypto library interfaces, tidy
up the Makefile and split off the Kconfig symbols into a separate file.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/Kconfig | 13 +------------
lib/crypto/Kconfig | 15 +++++++++++++++
lib/crypto/Makefile | 16 ++++++++--------
3 files changed, 24 insertions(+), 20 deletions(-)
create mode 100644 lib/crypto/Kconfig
diff --git a/crypto/Kconfig b/crypto/Kconfig
index b2cc0ad3792a..7d19b46a7ef7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -878,9 +878,6 @@ config CRYPTO_SHA1_PPC_SPE
SHA-1 secure hash standard (DFIPS 180-4) implemented
using powerpc SPE SIMD instruction set.
-config CRYPTO_LIB_SHA256
- tristate
-
config CRYPTO_SHA256
tristate "SHA224 and SHA256 digest algorithm"
select CRYPTO_HASH
@@ -1019,9 +1016,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
comment "Ciphers"
-config CRYPTO_LIB_AES
- tristate
-
config CRYPTO_AES
tristate "AES cipher algorithms"
select CRYPTO_ALGAPI
@@ -1150,9 +1144,6 @@ config CRYPTO_ANUBIS
<https://www.cosic.esat.kuleuven.be/nessie/reports/>
<http://www.larc.usp.br/~pbarreto/AnubisPage.html>
-config CRYPTO_LIB_ARC4
- tristate
-
config CRYPTO_ARC4
tristate "ARC4 cipher algorithm"
select CRYPTO_BLKCIPHER
@@ -1339,9 +1330,6 @@ config CRYPTO_CAST6_AVX_X86_64
This module provides the Cast6 cipher algorithm that processes
eight blocks parallel using the AVX instruction set.
-config CRYPTO_LIB_DES
- tristate
-
config CRYPTO_DES
tristate "DES and Triple DES EDE cipher algorithms"
select CRYPTO_ALGAPI
@@ -1845,6 +1833,7 @@ config CRYPTO_STATS
config CRYPTO_HASH_INFO
bool
+source "lib/crypto/Kconfig"
source "drivers/crypto/Kconfig"
source "crypto/asymmetric_keys/Kconfig"
source "certs/Kconfig"
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
new file mode 100644
index 000000000000..261430051595
--- /dev/null
+++ b/lib/crypto/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+comment "Crypto library routines"
+
+config CRYPTO_LIB_AES
+ tristate
+
+config CRYPTO_LIB_ARC4
+ tristate
+
+config CRYPTO_LIB_DES
+ tristate
+
+config CRYPTO_LIB_SHA256
+ tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index cbe0b6a6450d..63de4cb3fcf8 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -1,13 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
-libaes-y := aes.o
+obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
+libaes-y := aes.o
-obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
-libarc4-y := arc4.o
+obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
+libarc4-y := arc4.o
-obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
-libdes-y := des.o
+obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
+libdes-y := des.o
-obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
-libsha256-y := sha256.o
+obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
+libsha256-y := sha256.o
--
2.18.2
From df871ee7dba8582ca3b6f414ae9b615df113ac99 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:08 +0100
Subject: [PATCH 002/100] crypto: chacha - move existing library code into
lib/crypto
commit 5fb8ef25803ef33e2eb60b626435828b937bed75 upstream.
Currently, our generic ChaCha implementation consists of a permute
function in lib/chacha.c that operates on the 64-byte ChaCha state
directly [and which is always included into the core kernel since it
is used by the /dev/random driver], and the crypto API plumbing to
expose it as a skcipher.
In order to support in-kernel users that need the ChaCha streamcipher
but have no need [or tolerance] for going through the abstractions of
the crypto API, let's expose the streamcipher bits via a library API
as well, in a way that permits the implementation to be superseded by
an architecture specific one if provided.
So move the streamcipher code into a separate module in lib/crypto,
and expose the init() and crypt() routines to users of the library.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/chacha-neon-glue.c | 2 +-
arch/arm64/crypto/chacha-neon-glue.c | 2 +-
arch/x86/crypto/chacha_glue.c | 2 +-
crypto/Kconfig | 1 +
crypto/chacha_generic.c | 60 ++--------------------
include/crypto/chacha.h | 77 ++++++++++++++++++++++------
include/crypto/internal/chacha.h | 53 +++++++++++++++++++
lib/Makefile | 3 +-
lib/crypto/Kconfig | 26 ++++++++++
lib/crypto/Makefile | 4 ++
lib/{ => crypto}/chacha.c | 20 ++++----
lib/crypto/libchacha.c | 35 +++++++++++++
12 files changed, 199 insertions(+), 86 deletions(-)
create mode 100644 include/crypto/internal/chacha.h
rename lib/{ => crypto}/chacha.c (88%)
create mode 100644 lib/crypto/libchacha.c
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
index a8e9b534c8da..26576772f18b 100644
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -20,7 +20,7 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 1495d2b18518..d4cc61bfe79d 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -20,7 +20,7 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 388f95a4ec24..bc62daa8dafd 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -7,7 +7,7 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7d19b46a7ef7..f29bf10c0462 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1393,6 +1393,7 @@ config CRYPTO_SALSA20
config CRYPTO_CHACHA20
tristate "ChaCha stream cipher algorithms"
+ select CRYPTO_LIB_CHACHA_GENERIC
select CRYPTO_BLKCIPHER
help
The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
index 085d8d219987..ebae6d9d9b32 100644
--- a/crypto/chacha_generic.c
+++ b/crypto/chacha_generic.c
@@ -8,29 +8,10 @@
#include <asm/unaligned.h>
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
-static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- /* aligned to potentially speed up crypto_xor() */
- u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
-
- while (bytes >= CHACHA_BLOCK_SIZE) {
- chacha_block(state, stream, nrounds);
- crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
- bytes -= CHACHA_BLOCK_SIZE;
- dst += CHACHA_BLOCK_SIZE;
- src += CHACHA_BLOCK_SIZE;
- }
- if (bytes) {
- chacha_block(state, stream, nrounds);
- crypto_xor_cpy(dst, src, stream, bytes);
- }
-}
-
static int chacha_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
@@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
- chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skcipher_request *req,
void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
{
- state[0] = 0x61707865; /* "expa" */
- state[1] = 0x3320646e; /* "nd 3" */
- state[2] = 0x79622d32; /* "2-by" */
- state[3] = 0x6b206574; /* "te k" */
- state[4] = ctx->key[0];
- state[5] = ctx->key[1];
- state[6] = ctx->key[2];
- state[7] = ctx->key[3];
- state[8] = ctx->key[4];
- state[9] = ctx->key[5];
- state[10] = ctx->key[6];
- state[11] = ctx->key[7];
- state[12] = get_unaligned_le32(iv + 0);
- state[13] = get_unaligned_le32(iv + 4);
- state[14] = get_unaligned_le32(iv + 8);
- state[15] = get_unaligned_le32(iv + 12);
+ chacha_init_generic(state, ctx->key, iv);
}
EXPORT_SYMBOL_GPL(crypto_chacha_init);
-static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize, int nrounds)
-{
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- int i;
-
- if (keysize != CHACHA_KEY_SIZE)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
- ctx->nrounds = nrounds;
- return 0;
-}
-
int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keysize)
{
@@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
/* Compute the subkey given the original key and first 128 nonce bits */
crypto_chacha_init(state, ctx, req->iv);
- hchacha_block(state, subctx.key, ctx->nrounds);
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
subctx.nrounds = ctx->nrounds;
/* Build the real IV */
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index d1e723c6a37d..5c662f8fecac 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -15,9 +15,8 @@
#ifndef _CRYPTO_CHACHA_H
#define _CRYPTO_CHACHA_H
-#include <crypto/skcipher.h>
+#include <asm/unaligned.h>
#include <linux/types.h>
-#include <linux/crypto.h>
/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
#define CHACHA_IV_SIZE 16
@@ -29,26 +28,70 @@
/* 192-bit nonce, then 64-bit stream position */
#define XCHACHA_IV_SIZE 32
-struct chacha_ctx {
- u32 key[8];
- int nrounds;
-};
-
-void chacha_block(u32 *state, u8 *stream, int nrounds);
+void chacha_block_generic(u32 *state, u8 *stream, int nrounds);
static inline void chacha20_block(u32 *state, u8 *stream)
{
- chacha_block(state, stream, 20);
+ chacha_block_generic(state, stream, 20);
}
-void hchacha_block(const u32 *in, u32 *out, int nrounds);
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
+void hchacha_block_arch(const u32 *state, u32 *out, int nrounds);
+void hchacha_block_generic(const u32 *state, u32 *out, int nrounds);
+
+static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
+ hchacha_block_arch(state, out, nrounds);
+ else
+ hchacha_block_generic(state, out, nrounds);
+}
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize);
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize);
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
+static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv)
+{
+ state[0] = 0x61707865; /* "expa" */
+ state[1] = 0x3320646e; /* "nd 3" */
+ state[2] = 0x79622d32; /* "2-by" */
+ state[3] = 0x6b206574; /* "te k" */
+ state[4] = key[0];
+ state[5] = key[1];
+ state[6] = key[2];
+ state[7] = key[3];
+ state[8] = key[4];
+ state[9] = key[5];
+ state[10] = key[6];
+ state[11] = key[7];
+ state[12] = get_unaligned_le32(iv + 0);
+ state[13] = get_unaligned_le32(iv + 4);
+ state[14] = get_unaligned_le32(iv + 8);
+ state[15] = get_unaligned_le32(iv + 12);
+}
-int crypto_chacha_crypt(struct skcipher_request *req);
-int crypto_xchacha_crypt(struct skcipher_request *req);
+static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
+ chacha_init_arch(state, key, iv);
+ else
+ chacha_init_generic(state, key, iv);
+}
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds);
+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds);
+
+static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
+ chacha_crypt_arch(state, dst, src, bytes, nrounds);
+ else
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes)
+{
+ chacha_crypt(state, dst, src, bytes, 20);
+}
#endif /* _CRYPTO_CHACHA_H */
diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
new file mode 100644
index 000000000000..c0e40b245431
--- /dev/null
+++ b/include/crypto/internal/chacha.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CRYPTO_INTERNAL_CHACHA_H
+#define _CRYPTO_INTERNAL_CHACHA_H
+
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/crypto.h>
+
+struct chacha_ctx {
+ u32 key[8];
+ int nrounds;
+};
+
+void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
+
+static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize, int nrounds)
+{
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int i;
+
+ if (keysize != CHACHA_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ ctx->nrounds = nrounds;
+ return 0;
+}
+
+static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ return chacha_setkey(tfm, key, keysize, 20);
+}
+
+static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ return chacha_setkey(tfm, key, keysize, 12);
+}
+
+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize);
+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize);
+
+int crypto_chacha_crypt(struct skcipher_request *req);
+int crypto_xchacha_crypt(struct skcipher_request *req);
+
+#endif /* _CRYPTO_CHACHA_H */
diff --git a/lib/Makefile b/lib/Makefile
index c5892807e06f..5af38fd5cc60 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,8 +26,7 @@ endif
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o timerqueue.o xarray.o \
- idr.o extable.o \
- sha1.o chacha.o irq_regs.o argv_split.o \
+ idr.o extable.o sha1.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 261430051595..6a11931ae105 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -8,6 +8,32 @@ config CRYPTO_LIB_AES
config CRYPTO_LIB_ARC4
tristate
+config CRYPTO_ARCH_HAVE_LIB_CHACHA
+ tristate
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the ChaCha library interface,
+ either builtin or as a module.
+
+config CRYPTO_LIB_CHACHA_GENERIC
+ tristate
+ select CRYPTO_ALGAPI
+ help
+ This symbol can be depended upon by arch implementations of the
+ ChaCha library interface that require the generic code as a
+ fallback, e.g., for SIMD implementations. If no arch specific
+ implementation is enabled, this implementation serves the users
+ of CRYPTO_LIB_CHACHA.
+
+config CRYPTO_LIB_CHACHA
+ tristate "ChaCha library interface"
+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
+ help
+ Enable the ChaCha library interface. This interface may be fulfilled
+ by either the generic implementation or an arch-specific one, if one
+ is available and enabled.
+
config CRYPTO_LIB_DES
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 63de4cb3fcf8..0ce40604e104 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -1,5 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
+# chacha is used by the /dev/random driver which is always builtin
+obj-y += chacha.o
+obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o
+
obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
libaes-y := aes.o
diff --git a/lib/chacha.c b/lib/crypto/chacha.c
similarity index 88%
rename from lib/chacha.c
rename to lib/crypto/chacha.c
index c7c9826564d3..65ead6b0c7e0 100644
--- a/lib/chacha.c
+++ b/lib/crypto/chacha.c
@@ -5,9 +5,11 @@
* Copyright (C) 2015 Martin Willi
*/
+#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/bitops.h>
+#include <linux/string.h>
#include <linux/cryptohash.h>
#include <asm/unaligned.h>
#include <crypto/chacha.h>
@@ -72,7 +74,7 @@ static void chacha_permute(u32 *x, int nrounds)
* The caller has already converted the endianness of the input. This function
* also handles incrementing the block counter in the input matrix.
*/
-void chacha_block(u32 *state, u8 *stream, int nrounds)
+void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
{
u32 x[16];
int i;
@@ -86,11 +88,11 @@ void chacha_block(u32 *state, u8 *stream, int nrounds)
state[12]++;
}
-EXPORT_SYMBOL(chacha_block);
+EXPORT_SYMBOL(chacha_block_generic);
/**
- * hchacha_block - abbreviated ChaCha core, for XChaCha
- * @in: input state matrix (16 32-bit words)
+ * hchacha_block_generic - abbreviated ChaCha core, for XChaCha
+ * @state: input state matrix (16 32-bit words)
* @out: output (8 32-bit words)
* @nrounds: number of rounds (20 or 12; 20 is recommended)
*
@@ -99,15 +101,15 @@ EXPORT_SYMBOL(chacha_block);
* skips the final addition of the initial state, and outputs only certain words
* of the state. It should not be used for streaming directly.
*/
-void hchacha_block(const u32 *in, u32 *out, int nrounds)
+void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
{
u32 x[16];
- memcpy(x, in, 64);
+ memcpy(x, state, 64);
chacha_permute(x, nrounds);
- memcpy(&out[0], &x[0], 16);
- memcpy(&out[4], &x[12], 16);
+ memcpy(&stream[0], &x[0], 16);
+ memcpy(&stream[4], &x[12], 16);
}
-EXPORT_SYMBOL(hchacha_block);
+EXPORT_SYMBOL(hchacha_block_generic);
diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c
new file mode 100644
index 000000000000..dabc3accae05
--- /dev/null
+++ b/lib/crypto/libchacha.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The ChaCha stream cipher (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/module.h>
+
+#include <crypto/algapi.h> // for crypto_xor_cpy
+#include <crypto/chacha.h>
+
+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ /* aligned to potentially speed up crypto_xor() */
+ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
+
+ while (bytes >= CHACHA_BLOCK_SIZE) {
+ chacha_block_generic(state, stream, nrounds);
+ crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
+ bytes -= CHACHA_BLOCK_SIZE;
+ dst += CHACHA_BLOCK_SIZE;
+ src += CHACHA_BLOCK_SIZE;
+ }
+ if (bytes) {
+ chacha_block_generic(state, stream, nrounds);
+ crypto_xor_cpy(dst, src, stream, bytes);
+ }
+}
+EXPORT_SYMBOL(chacha_crypt_generic);
+
+MODULE_LICENSE("GPL");
--
2.18.2
From 23aa6c8d945743ff123207351ee2a27612c6486f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:09 +0100
Subject: [PATCH 003/100] crypto: x86/chacha - depend on generic chacha library
instead of crypto driver
commit 28e8d89b1ce8d2e7badfb5f69971dd635acb8863 upstream.
In preparation of extending the x86 ChaCha driver to also expose the ChaCha
library interface, drop the dependency on the chacha_generic crypto driver
as a non-SIMD fallback, and depend on the generic ChaCha library directly.
This way, we only pull in the code we actually need, without registering
a set of ChaCha skciphers that we will never use.
Since turning the FPU on and off is cheap these days, simplify the SIMD
routine by dropping the per-page yield, which makes for a cleaner switch
to the library API as well. This also allows use to invoke the skcipher
walk routines in non-atomic mode.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/chacha_glue.c | 90 ++++++++++++++---------------------
crypto/Kconfig | 2 +-
2 files changed, 36 insertions(+), 56 deletions(-)
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index bc62daa8dafd..0aabb382edce 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -123,37 +123,38 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}
-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
+static int chacha_simd_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
- int next_yield = 4096; /* bytes until next FPU yield */
- int err = 0;
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
- crypto_chacha_init(state, ctx, iv);
+ chacha_init_generic(state, ctx->key, iv);
- while (walk->nbytes > 0) {
- unsigned int nbytes = walk->nbytes;
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
- if (nbytes < walk->total) {
- nbytes = round_down(nbytes, walk->stride);
- next_yield -= nbytes;
- }
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
- chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
- nbytes, ctx->nrounds);
-
- if (next_yield <= 0) {
- /* temporarily allow preemption */
- kernel_fpu_end();
+ if (!crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
kernel_fpu_begin();
- next_yield = 4096;
+ chacha_dosimd(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ kernel_fpu_end();
}
-
- err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
@@ -163,55 +164,34 @@ static int chacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- int err;
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
- if (err)
- return err;
-
- kernel_fpu_begin();
- err = chacha_simd_stream_xor(&walk, ctx, req->iv);
- kernel_fpu_end();
- return err;
+ return chacha_simd_stream_xor(req, ctx, req->iv);
}
static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- struct chacha_ctx subctx;
u32 *state, state_buf[16 + 2] __aligned(8);
+ struct chacha_ctx subctx;
u8 real_iv[16];
- int err;
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
- if (err)
- return err;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_fpu_begin();
-
- hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ kernel_fpu_end();
+ } else {
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
+ }
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
- err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
-
- kernel_fpu_end();
-
- return err;
+ return chacha_simd_stream_xor(req, &subctx, real_iv);
}
static struct skcipher_alg algs[] = {
@@ -227,7 +207,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = chacha_simd,
.decrypt = chacha_simd,
}, {
@@ -242,7 +222,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
}, {
@@ -257,7 +237,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
},
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f29bf10c0462..564a3f7b40b8 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1417,7 +1417,7 @@ config CRYPTO_CHACHA20_X86_64
tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
depends on X86 && 64BIT
select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
+ select CRYPTO_LIB_CHACHA_GENERIC
help
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
XChaCha20, and XChaCha12 stream ciphers.
--
2.18.2
From eae0a3dc41f16fa4829ea9fe4b5520402a54d0eb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:10 +0100
Subject: [PATCH 004/100] crypto: x86/chacha - expose SIMD ChaCha routine as
library function
commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream.
Wire the existing x86 SIMD ChaCha code into the new ChaCha library
interface, so that users of the library interface will get the
accelerated version when available.
Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++----------
crypto/Kconfig | 1 +
include/crypto/chacha.h | 6 +++
3 files changed, 73 insertions(+), 25 deletions(-)
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 0aabb382edce..b391e13a9e41 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-#ifdef CONFIG_AS_AVX2
+
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
-static bool chacha_use_avx2;
-#ifdef CONFIG_AS_AVX512
+
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
-static bool chacha_use_avx512vl;
-#endif
-#endif
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
-#ifdef CONFIG_AS_AVX2
-#ifdef CONFIG_AS_AVX512
- if (chacha_use_avx512vl) {
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ static_branch_likely(&chacha_use_avx512vl)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx512vl(state, dst, src, bytes,
nrounds);
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
-#endif
- if (chacha_use_avx2) {
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ static_branch_likely(&chacha_use_avx2)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
-#endif
+
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, stream, nrounds);
+ } else {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, stream, nrounds);
+ kernel_fpu_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE)
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, bytes, nrounds);
+ kernel_fpu_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
static int chacha_simd_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
- if (!crypto_simd_usable()) {
+ if (!static_branch_likely(&chacha_use_simd) ||
+ !crypto_simd_usable()) {
chacha_crypt_generic(state, walk.dst.virt.addr,
walk.src.virt.addr, nbytes,
ctx->nrounds);
@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifdef CONFIG_AS_AVX512
- chacha_use_avx512vl = chacha_use_avx2 &&
- boot_cpu_has(X86_FEATURE_AVX512VL) &&
- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
-#endif
-#endif
+ return 0;
+
+ static_branch_enable(&chacha_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+ static_branch_enable(&chacha_use_avx2);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+ static_branch_enable(&chacha_use_avx512vl);
+ }
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 564a3f7b40b8..649dc564f242 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64
depends on X86 && 64BIT
select CRYPTO_BLKCIPHER
select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
help
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
XChaCha20, and XChaCha12 stream ciphers.
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index 5c662f8fecac..2676f4fbd4c1 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -25,6 +25,12 @@
#define CHACHA_BLOCK_SIZE 64
#define CHACHAPOLY_IV_SIZE 12
+#ifdef CONFIG_X86_64
+#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
+#else
+#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
+#endif
+
/* 192-bit nonce, then 64-bit stream position */
#define XCHACHA_IV_SIZE 32
--
2.18.2
From 00be77bf975db1c79b616faa27f89a1625c8a8f8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:11 +0100
Subject: [PATCH 005/100] crypto: arm64/chacha - depend on generic chacha
library instead of crypto driver
commit c77da4867cbb7841177275dbb250f5c09679fae4 upstream.
Depend on the generic ChaCha library routines instead of pulling in the
generic ChaCha skcipher driver, which is more than we need, and makes
managing the dependencies between the generic library, generic driver,
accelerated library and driver more complicated.
While at it, drop the logic to prefer the scalar code on short inputs.
Turning the NEON on and off is cheap these days, and one major use case
for ChaCha20 is ChaCha20-Poly1305, which is guaranteed to hit the scalar
path upon every invocation (when doing the Poly1305 nonce generation)
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm64/crypto/Kconfig | 2 +-
arch/arm64/crypto/chacha-neon-glue.c | 40 +++++++++++++++-------------
2 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 4922c4451e7c..fdf52d5f18f9 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -103,7 +103,7 @@ config CRYPTO_CHACHA20_NEON
tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
+ select CRYPTO_LIB_CHACHA_GENERIC
config CRYPTO_NHPOLY1305_NEON
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index d4cc61bfe79d..cae2cb92eca8 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -68,7 +68,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
err = skcipher_walk_virt(&walk, req, false);
- crypto_chacha_init(state, ctx, iv);
+ chacha_init_generic(state, ctx->key, iv);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
@@ -76,10 +76,16 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = rounddown(nbytes, walk.stride);
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- kernel_neon_end();
+ if (!crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ kernel_neon_end();
+ }
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -91,9 +97,6 @@ static int chacha_neon(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
return chacha_neon_stream_xor(req, ctx, req->iv);
}
@@ -105,14 +108,15 @@ static int xchacha_neon(struct skcipher_request *req)
u32 state[16];
u8 real_iv[16];
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
+ chacha_init_generic(state, ctx->key, req->iv);
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
+ if (crypto_simd_usable()) {
+ kernel_neon_begin();
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
+ kernel_neon_end();
+ } else {
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
+ }
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
@@ -134,7 +138,7 @@ static struct skcipher_alg algs[] = {
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = chacha_neon,
.decrypt = chacha_neon,
}, {
@@ -150,7 +154,7 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = xchacha_neon,
.decrypt = xchacha_neon,
}, {
@@ -166,7 +170,7 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = xchacha_neon,
.decrypt = xchacha_neon,
}
--
2.18.2
From 9612659d26c1a94ab65d28c6e95e8efabc44555c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:12 +0100
Subject: [PATCH 006/100] crypto: arm64/chacha - expose arm64 ChaCha routine as
library function
commit b3aad5bad26a01a4bd8c49a5c5f52aec665f3b7c upstream.
Expose the accelerated NEON ChaCha routine directly as a symbol
export so that users of the ChaCha library API can use it directly.
Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm64/crypto/Kconfig | 1 +
arch/arm64/crypto/chacha-neon-glue.c | 53 ++++++++++++++++++++++------
2 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index fdf52d5f18f9..17bada4b9dd2 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -104,6 +104,7 @@ config CRYPTO_CHACHA20_NEON
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
config CRYPTO_NHPOLY1305_NEON
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index cae2cb92eca8..46cd4297761c 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -23,6 +23,7 @@
#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
int nrounds, int bytes);
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
int bytes, int nrounds)
{
@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
}
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
static int chacha_neon_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
@@ -76,7 +110,8 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = rounddown(nbytes, walk.stride);
- if (!crypto_simd_usable()) {
+ if (!static_branch_likely(&have_neon) ||
+ !crypto_simd_usable()) {
chacha_crypt_generic(state, walk.dst.virt.addr,
walk.src.virt.addr, nbytes,
ctx->nrounds);
@@ -109,14 +144,7 @@ static int xchacha_neon(struct skcipher_request *req)
u8 real_iv[16];
chacha_init_generic(state, ctx->key, req->iv);
-
- if (crypto_simd_usable()) {
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
- } else {
- hchacha_block_generic(state, subctx.key, ctx->nrounds);
- }
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
@@ -179,14 +207,17 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!cpu_have_named_feature(ASIMD))
- return -ENODEV;
+ return 0;
+
+ static_branch_enable(&have_neon);
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (cpu_have_named_feature(ASIMD))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
--
2.18.2
From eb04a17f747282ec72b45430b18595e44ace9fac Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:13 +0100
Subject: [PATCH 007/100] crypto: arm/chacha - import Eric Biggers's scalar
accelerated ChaCha code
commit 29621d099f9c642b22a69dc8e7e20c108473a392 upstream.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/chacha-scalar-core.S | 461 +++++++++++++++++++++++++++
1 file changed, 461 insertions(+)
create mode 100644 arch/arm/crypto/chacha-scalar-core.S
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
new file mode 100644
index 000000000000..2140319b64a0
--- /dev/null
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Design notes:
+ *
+ * 16 registers would be needed to hold the state matrix, but only 14 are
+ * available because 'sp' and 'pc' cannot be used. So we spill the elements
+ * (x8, x9) to the stack and swap them out with (x10, x11). This adds one
+ * 'ldrd' and one 'strd' instruction per round.
+ *
+ * All rotates are performed using the implicit rotate operand accepted by the
+ * 'add' and 'eor' instructions. This is faster than using explicit rotate
+ * instructions. To make this work, we allow the values in the second and last
+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
+ * wrong rotation amount. The rotation amount is then fixed up just in time
+ * when the values are used. 'brot' is the number of bits the values in row 'b'
+ * need to be rotated right to arrive at the correct values, and 'drot'
+ * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
+ * that they end up as (25, 24) after every round.
+ */
+
+ // ChaCha state registers
+ X0 .req r0
+ X1 .req r1
+ X2 .req r2
+ X3 .req r3
+ X4 .req r4
+ X5 .req r5
+ X6 .req r6
+ X7 .req r7
+ X8_X10 .req r8 // shared by x8 and x10
+ X9_X11 .req r9 // shared by x9 and x11
+ X12 .req r10
+ X13 .req r11
+ X14 .req r12
+ X15 .req r14
+
+.Lexpand_32byte_k:
+ // "expand 32-byte k"
+ .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+
+#ifdef __thumb2__
+# define adrl adr
+#endif
+
+.macro __rev out, in, t0, t1, t2
+.if __LINUX_ARM_ARCH__ >= 6
+ rev \out, \in
+.else
+ lsl \t0, \in, #24
+ and \t1, \in, #0xff00
+ and \t2, \in, #0xff0000
+ orr \out, \t0, \in, lsr #24
+ orr \out, \out, \t1, lsl #8
+ orr \out, \out, \t2, lsr #8
+.endif
+.endm
+
+.macro _le32_bswap x, t0, t1, t2
+#ifdef __ARMEB__
+ __rev \x, \x, \t0, \t1, \t2
+#endif
+.endm
+
+.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
+ _le32_bswap \a, \t0, \t1, \t2
+ _le32_bswap \b, \t0, \t1, \t2
+ _le32_bswap \c, \t0, \t1, \t2
+ _le32_bswap \d, \t0, \t1, \t2
+.endm
+
+.macro __ldrd a, b, src, offset
+#if __LINUX_ARM_ARCH__ >= 6
+ ldrd \a, \b, [\src, #\offset]
+#else
+ ldr \a, [\src, #\offset]
+ ldr \b, [\src, #\offset + 4]
+#endif
+.endm
+
+.macro __strd a, b, dst, offset
+#if __LINUX_ARM_ARCH__ >= 6
+ strd \a, \b, [\dst, #\offset]
+#else
+ str \a, [\dst, #\offset]
+ str \b, [\dst, #\offset + 4]
+#endif
+.endm
+
+.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
+
+ // a += b; d ^= a; d = rol(d, 16);
+ add \a1, \a1, \b1, ror #brot
+ add \a2, \a2, \b2, ror #brot
+ eor \d1, \a1, \d1, ror #drot
+ eor \d2, \a2, \d2, ror #drot
+ // drot == 32 - 16 == 16
+
+ // c += d; b ^= c; b = rol(b, 12);
+ add \c1, \c1, \d1, ror #16
+ add \c2, \c2, \d2, ror #16
+ eor \b1, \c1, \b1, ror #brot
+ eor \b2, \c2, \b2, ror #brot
+ // brot == 32 - 12 == 20
+
+ // a += b; d ^= a; d = rol(d, 8);
+ add \a1, \a1, \b1, ror #20
+ add \a2, \a2, \b2, ror #20
+ eor \d1, \a1, \d1, ror #16
+ eor \d2, \a2, \d2, ror #16
+ // drot == 32 - 8 == 24
+
+ // c += d; b ^= c; b = rol(b, 7);
+ add \c1, \c1, \d1, ror #24
+ add \c2, \c2, \d2, ror #24
+ eor \b1, \c1, \b1, ror #20
+ eor \b2, \c2, \b2, ror #20
+ // brot == 32 - 7 == 25
+.endm
+
+.macro _doubleround
+
+ // column round
+
+ // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
+ _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
+
+ // save (x8, x9); restore (x10, x11)
+ __strd X8_X10, X9_X11, sp, 0
+ __ldrd X8_X10, X9_X11, sp, 8
+
+ // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
+ _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
+
+ .set brot, 25
+ .set drot, 24
+
+ // diagonal round
+
+ // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
+ _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
+
+ // save (x10, x11); restore (x8, x9)
+ __strd X8_X10, X9_X11, sp, 8
+ __ldrd X8_X10, X9_X11, sp, 0
+
+ // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
+ _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
+.endm
+
+.macro _chacha_permute nrounds
+ .set brot, 0
+ .set drot, 0
+ .rept \nrounds / 2
+ _doubleround
+ .endr
+.endm
+
+.macro _chacha nrounds
+
+.Lnext_block\@:
+ // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
+ // Registers contain x0-x9,x12-x15.
+
+ // Do the core ChaCha permutation to update x0-x15.
+ _chacha_permute \nrounds
+
+ add sp, #8
+ // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers contain x0-x9,x12-x15.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
+ push {X8_X10, X9_X11, X12, X13, X14, X15}
+
+ // Load (OUT, IN, LEN).
+ ldr r14, [sp, #96]
+ ldr r12, [sp, #100]
+ ldr r11, [sp, #104]
+
+ orr r10, r14, r12
+
+ // Use slow path if fewer than 64 bytes remain.
+ cmp r11, #64
+ blt .Lxor_slowpath\@
+
+ // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
+ // ARMv6+, since ldmia and stmia (used below) still require alignment.
+ tst r10, #3
+ bne .Lxor_slowpath\@
+
+ // Fast path: XOR 64 bytes of aligned data.
+
+ // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // x0-x3
+ __ldrd r8, r9, sp, 32
+ __ldrd r10, r11, sp, 40
+ add X0, X0, r8
+ add X1, X1, r9
+ add X2, X2, r10
+ add X3, X3, r11
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ ldmia r12!, {r8-r11}
+ eor X0, X0, r8
+ eor X1, X1, r9
+ eor X2, X2, r10
+ eor X3, X3, r11
+ stmia r14!, {X0-X3}
+
+ // x4-x7
+ __ldrd r8, r9, sp, 48
+ __ldrd r10, r11, sp, 56
+ add X4, r8, X4, ror #brot
+ add X5, r9, X5, ror #brot
+ ldmia r12!, {X0-X3}
+ add X6, r10, X6, ror #brot
+ add X7, r11, X7, ror #brot
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ eor X4, X4, X0
+ eor X5, X5, X1
+ eor X6, X6, X2
+ eor X7, X7, X3
+ stmia r14!, {X4-X7}
+
+ // x8-x15
+ pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
+ __ldrd r8, r9, sp, 32
+ __ldrd r10, r11, sp, 40
+ add r0, r0, r8 // x8
+ add r1, r1, r9 // x9
+ add r6, r6, r10 // x10
+ add r7, r7, r11 // x11
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ ldmia r12!, {r8-r11}
+ eor r0, r0, r8 // x8
+ eor r1, r1, r9 // x9
+ eor r6, r6, r10 // x10
+ eor r7, r7, r11 // x11
+ stmia r14!, {r0,r1,r6,r7}
+ ldmia r12!, {r0,r1,r6,r7}
+ __ldrd r8, r9, sp, 48
+ __ldrd r10, r11, sp, 56
+ add r2, r8, r2, ror #drot // x12
+ add r3, r9, r3, ror #drot // x13
+ add r4, r10, r4, ror #drot // x14
+ add r5, r11, r5, ror #drot // x15
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ ldr r9, [sp, #72] // load LEN
+ eor r2, r2, r0 // x12
+ eor r3, r3, r1 // x13
+ eor r4, r4, r6 // x14
+ eor r5, r5, r7 // x15
+ subs r9, #64 // decrement and check LEN
+ stmia r14!, {r2-r5}
+
+ beq .Ldone\@
+
+.Lprepare_for_next_block\@:
+
+ // Stack: x0-x15 OUT IN LEN
+
+ // Increment block counter (x12)
+ add r8, #1
+
+ // Store updated (OUT, IN, LEN)
+ str r14, [sp, #64]
+ str r12, [sp, #68]
+ str r9, [sp, #72]
+
+ mov r14, sp
+
+ // Store updated block counter (x12)
+ str r8, [sp, #48]
+
+ sub sp, #16
+
+ // Reload state and do next block
+ ldmia r14!, {r0-r11} // load x0-x11
+ __strd r10, r11, sp, 8 // store x10-x11 before state
+ ldmia r14, {r10-r12,r14} // load x12-x15
+ b .Lnext_block\@
+
+.Lxor_slowpath\@:
+ // Slow path: < 64 bytes remaining, or unaligned input or output buffer.
+ // We handle it by storing the 64 bytes of keystream to the stack, then
+ // XOR-ing the needed portion with the data.
+
+ // Allocate keystream buffer
+ sub sp, #64
+ mov r14, sp
+
+ // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // Save keystream for x0-x3
+ __ldrd r8, r9, sp, 96
+ __ldrd r10, r11, sp, 104
+ add X0, X0, r8
+ add X1, X1, r9
+ add X2, X2, r10
+ add X3, X3, r11
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ stmia r14!, {X0-X3}
+
+ // Save keystream for x4-x7
+ __ldrd r8, r9, sp, 112
+ __ldrd r10, r11, sp, 120
+ add X4, r8, X4, ror #brot
+ add X5, r9, X5, ror #brot
+ add X6, r10, X6, ror #brot
+ add X7, r11, X7, ror #brot
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ add r8, sp, #64
+ stmia r14!, {X4-X7}
+
+ // Save keystream for x8-x15
+ ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
+ __ldrd r8, r9, sp, 128
+ __ldrd r10, r11, sp, 136
+ add r0, r0, r8 // x8
+ add r1, r1, r9 // x9
+ add r6, r6, r10 // x10
+ add r7, r7, r11 // x11
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ stmia r14!, {r0,r1,r6,r7}
+ __ldrd r8, r9, sp, 144
+ __ldrd r10, r11, sp, 152
+ add r2, r8, r2, ror #drot // x12
+ add r3, r9, r3, ror #drot // x13
+ add r4, r10, r4, ror #drot // x14
+ add r5, r11, r5, ror #drot // x15
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ stmia r14, {r2-r5}
+
+ // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
+ // Registers: r8 is block counter, r12 is IN.
+
+ ldr r9, [sp, #168] // LEN
+ ldr r14, [sp, #160] // OUT
+ cmp r9, #64
+ mov r0, sp
+ movle r1, r9
+ movgt r1, #64
+ // r1 is number of bytes to XOR, in range [1, 64]
+
+.if __LINUX_ARM_ARCH__ < 6
+ orr r2, r12, r14
+ tst r2, #3 // IN or OUT misaligned?
+ bne .Lxor_next_byte\@
+.endif
+
+ // XOR a word at a time
+.rept 16
+ subs r1, #4
+ blt .Lxor_words_done\@
+ ldr r2, [r12], #4
+ ldr r3, [r0], #4
+ eor r2, r2, r3
+ str r2, [r14], #4
+.endr
+ b .Lxor_slowpath_done\@
+.Lxor_words_done\@:
+ ands r1, r1, #3
+ beq .Lxor_slowpath_done\@
+
+ // XOR a byte at a time
+.Lxor_next_byte\@:
+ ldrb r2, [r12], #1
+ ldrb r3, [r0], #1
+ eor r2, r2, r3
+ strb r2, [r14], #1
+ subs r1, #1
+ bne .Lxor_next_byte\@
+
+.Lxor_slowpath_done\@:
+ subs r9, #64
+ add sp, #96
+ bgt .Lprepare_for_next_block\@
+
+.Ldone\@:
+.endm // _chacha
+
+/*
+ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
+ * const u32 iv[4]);
+ */
+ENTRY(chacha20_arm)
+ cmp r2, #0 // len == 0?
+ reteq lr
+
+ push {r0-r2,r4-r11,lr}
+
+ // Push state x0-x15 onto stack.
+ // Also store an extra copy of x10-x11 just before the state.
+
+ ldr r4, [sp, #48] // iv
+ mov r0, sp
+ sub sp, #80
+
+ // iv: x12-x15
+ ldm r4, {X12,X13,X14,X15}
+ stmdb r0!, {X12,X13,X14,X15}
+
+ // key: x4-x11
+ __ldrd X8_X10, X9_X11, r3, 24
+ __strd X8_X10, X9_X11, sp, 8
+ stmdb r0!, {X8_X10, X9_X11}
+ ldm r3, {X4-X9_X11}
+ stmdb r0!, {X4-X9_X11}
+
+ // constants: x0-x3
+ adrl X3, .Lexpand_32byte_k
+ ldm X3, {X0-X3}
+ __strd X0, X1, sp, 16
+ __strd X2, X3, sp, 24
+
+ _chacha 20
+
+ add sp, #76
+ pop {r4-r11, pc}
+ENDPROC(chacha20_arm)
+
+/*
+ * void hchacha20_arm(const u32 state[16], u32 out[8]);
+ */
+ENTRY(hchacha20_arm)
+ push {r1,r4-r11,lr}
+
+ mov r14, r0
+ ldmia r14!, {r0-r11} // load x0-x11
+ push {r10-r11} // store x10-x11 to stack
+ ldm r14, {r10-r12,r14} // load x12-x15
+ sub sp, #8
+
+ _chacha_permute 20
+
+ // Skip over (unused0-unused1, x10-x11)
+ add sp, #16
+
+ // Fix up rotations of x12-x15
+ ror X12, X12, #drot
+ ror X13, X13, #drot
+ pop {r4} // load 'out'
+ ror X14, X14, #drot
+ ror X15, X15, #drot
+
+ // Store (x0-x3,x12-x15) to 'out'
+ stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
+
+ pop {r4-r11,pc}
+ENDPROC(hchacha20_arm)
--
2.18.2
From 6240a7104c8ec4bf92a7c5b7554f2f504f6c8bfe Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:14 +0100
Subject: [PATCH 008/100] crypto: arm/chacha - remove dependency on generic
ChaCha driver
commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream.
Instead of falling back to the generic ChaCha skcipher driver for
non-SIMD cases, use a fast scalar implementation for ARM authored
by Eric Biggers. This removes the module dependency on chacha-generic
altogether, which also simplifies things when we expose the ChaCha
library interface from this module.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/Kconfig | 4 +-
arch/arm/crypto/Makefile | 3 +-
arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++
arch/arm/crypto/chacha-neon-glue.c | 202 ------------------
arch/arm/crypto/chacha-scalar-core.S | 65 +++---
arch/arm64/crypto/chacha-neon-glue.c | 2 +-
6 files changed, 340 insertions(+), 240 deletions(-)
create mode 100644 arch/arm/crypto/chacha-glue.c
delete mode 100644 arch/arm/crypto/chacha-neon-glue.c
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 043b0b18bf7e..cee414afeabc 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE
select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha stream cipher algorithms"
- depends on KERNEL_MODE_NEON
+ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
config CRYPTO_NHPOLY1305_NEON
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4180f3a13512..6b97dffcf90f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -53,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
ifdef REGENERATE_ARM_CRYPTO
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
new file mode 100644
index 000000000000..eb40efb3eb34
--- /dev/null
+++ b/arch/arm/crypto/chacha-glue.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ const u32 *state, int nrounds);
+
+static inline bool neon_usable(void)
+{
+ return crypto_simd_usable();
+}
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+ src += CHACHA_BLOCK_SIZE * 4;
+ dst += CHACHA_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA_BLOCK_SIZE) {
+ chacha_block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE;
+ src += CHACHA_BLOCK_SIZE;
+ dst += CHACHA_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha_block_xor_neon(state, buf, buf, nrounds);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+static int chacha_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv,
+ bool neon)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ if (!neon) {
+ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, state, ctx->nrounds);
+ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
+ } else {
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ kernel_neon_end();
+ }
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int do_chacha(struct skcipher_request *req, bool neon)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_stream_xor(req, ctx, req->iv, neon);
+}
+
+static int chacha_arm(struct skcipher_request *req)
+{
+ return do_chacha(req, false);
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+ return do_chacha(req, neon_usable());
+}
+
+static int do_xchacha(struct skcipher_request *req, bool neon)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ if (!neon) {
+ hchacha_block_arm(state, subctx.key, ctx->nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
+ kernel_neon_end();
+ }
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_stream_xor(req, &subctx, real_iv, neon);
+}
+
+static int xchacha_arm(struct skcipher_request *req)
+{
+ return do_xchacha(req, false);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+ return do_xchacha(req, neon_usable());
+}
+
+static struct skcipher_alg arm_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_arm,
+ .decrypt = chacha_arm,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_arm,
+ .decrypt = xchacha_arm,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_arm,
+ .decrypt = xchacha_arm,
+ },
+};
+
+static struct skcipher_alg neon_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_neon,
+ .decrypt = chacha_neon,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ int err;
+
+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+ int i;
+
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A7:
+ case ARM_CPU_PART_CORTEX_A5:
+ /*
+ * The Cortex-A7 and Cortex-A5 do not perform well with
+ * the NEON implementation but do incredibly with the
+ * scalar one and use less power.
+ */
+ for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
+ neon_algs[i].base.cra_priority = 0;
+ break;
+ }
+
+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+ if (err)
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ }
+ return err;
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
+#ifdef CONFIG_KERNEL_MODE_NEON
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
+#endif
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
deleted file mode 100644
index 26576772f18b..000000000000
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- u8 buf[CHACHA_BLOCK_SIZE];
-
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha_4block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 4;
- src += CHACHA_BLOCK_SIZE * 4;
- dst += CHACHA_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA_BLOCK_SIZE) {
- chacha_block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE;
- src += CHACHA_BLOCK_SIZE;
- dst += CHACHA_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha_block_xor_neon(state, buf, buf, nrounds);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- crypto_chacha_init(state, ctx, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
- return chacha_neon_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_neon_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha_neon,
- .decrypt = chacha_neon,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
index 2140319b64a0..2985b80a45b5 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -41,14 +41,6 @@
X14 .req r12
X15 .req r14
-.Lexpand_32byte_k:
- // "expand 32-byte k"
- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
-
-#ifdef __thumb2__
-# define adrl adr
-#endif
-
.macro __rev out, in, t0, t1, t2
.if __LINUX_ARM_ARCH__ >= 6
rev \out, \in
@@ -391,61 +383,65 @@
.endm // _chacha
/*
- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
- * const u32 iv[4]);
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ * const u32 *state, int nrounds);
*/
-ENTRY(chacha20_arm)
+ENTRY(chacha_doarm)
cmp r2, #0 // len == 0?
reteq lr
+ ldr ip, [sp]
+ cmp ip, #12
+
push {r0-r2,r4-r11,lr}
// Push state x0-x15 onto stack.
// Also store an extra copy of x10-x11 just before the state.
- ldr r4, [sp, #48] // iv
- mov r0, sp
- sub sp, #80
-
- // iv: x12-x15
- ldm r4, {X12,X13,X14,X15}
- stmdb r0!, {X12,X13,X14,X15}
+ add X12, r3, #48
+ ldm X12, {X12,X13,X14,X15}
+ push {X12,X13,X14,X15}
+ sub sp, sp, #64
- // key: x4-x11
- __ldrd X8_X10, X9_X11, r3, 24
+ __ldrd X8_X10, X9_X11, r3, 40
__strd X8_X10, X9_X11, sp, 8
- stmdb r0!, {X8_X10, X9_X11}
- ldm r3, {X4-X9_X11}
- stmdb r0!, {X4-X9_X11}
-
- // constants: x0-x3
- adrl X3, .Lexpand_32byte_k
- ldm X3, {X0-X3}
+ __strd X8_X10, X9_X11, sp, 56
+ ldm r3, {X0-X9_X11}
__strd X0, X1, sp, 16
__strd X2, X3, sp, 24
+ __strd X4, X5, sp, 32
+ __strd X6, X7, sp, 40
+ __strd X8_X10, X9_X11, sp, 48
+ beq 1f
_chacha 20
- add sp, #76
+0: add sp, #76
pop {r4-r11, pc}
-ENDPROC(chacha20_arm)
+
+1: _chacha 12
+ b 0b
+ENDPROC(chacha_doarm)
/*
- * void hchacha20_arm(const u32 state[16], u32 out[8]);
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
*/
-ENTRY(hchacha20_arm)
+ENTRY(hchacha_block_arm)
push {r1,r4-r11,lr}
+ cmp r2, #12 // ChaCha12 ?
+
mov r14, r0
ldmia r14!, {r0-r11} // load x0-x11
push {r10-r11} // store x10-x11 to stack
ldm r14, {r10-r12,r14} // load x12-x15
sub sp, #8
+ beq 1f
_chacha_permute 20
// Skip over (unused0-unused1, x10-x11)
- add sp, #16
+0: add sp, #16
// Fix up rotations of x12-x15
ror X12, X12, #drot
@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
pop {r4-r11,pc}
-ENDPROC(hchacha20_arm)
+
+1: _chacha_permute 12
+ b 0b
+ENDPROC(hchacha_block_arm)
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 46cd4297761c..b08029d7bde6 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,5 @@
/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
* including ChaCha20 (RFC7539)
*
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
--
2.18.2
From d3944704ef3870fb15fc8801ef9a71f00b34babd Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:15 +0100
Subject: [PATCH 009/100] crypto: arm/chacha - expose ARM ChaCha routine as
library function
commit a44a3430d71bad4ee56788a59fff099b291ea54c upstream.
Expose the accelerated NEON ChaCha routine directly as a symbol
export so that users of the ChaCha library API can use it directly.
Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/Kconfig | 1 +
arch/arm/crypto/chacha-glue.c | 41 ++++++++++++++++++++++++++++++++++-
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index cee414afeabc..b25ffec04417 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -129,6 +129,7 @@ config CRYPTO_CRC32_ARM_CE
config CRYPTO_CHACHA20_NEON
tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
select CRYPTO_BLKCIPHER
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
config CRYPTO_NHPOLY1305_NEON
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index eb40efb3eb34..3f0c057aa050 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -11,6 +11,7 @@
#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -29,9 +30,11 @@ asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
const u32 *state, int nrounds);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
static inline bool neon_usable(void)
{
- return crypto_simd_usable();
+ return static_branch_likely(&use_neon) && crypto_simd_usable();
}
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
@@ -60,6 +63,40 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
}
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
+ hchacha_block_arm(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE) {
+ chacha_doarm(dst, src, bytes, state, nrounds);
+ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+ return;
+ }
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
static int chacha_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv,
bool neon)
@@ -269,6 +306,8 @@ static int __init chacha_simd_mod_init(void)
for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
neon_algs[i].base.cra_priority = 0;
break;
+ default:
+ static_branch_enable(&use_neon);
}
err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
--
2.18.2
From f2718bb79f639d39f1f2ec04b6746ed5e8e70af9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:16 +0100
Subject: [PATCH 010/100] crypto: mips/chacha - import 32r2 ChaCha code from
Zinc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit 49aa7c00eddf8d8f462b0256bd82e81762d7b0c6 upstream.
This imports the accelerated MIPS 32r2 ChaCha20 implementation from the
Zinc patch set.
Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/mips/crypto/chacha-core.S | 424 +++++++++++++++++++++++++++++++++
1 file changed, 424 insertions(+)
create mode 100644 arch/mips/crypto/chacha-core.S
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
new file mode 100644
index 000000000000..a81e02db95e7
--- /dev/null
+++ b/arch/mips/crypto/chacha-core.S
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define MASK_U32 0x3c
+#define CHACHA20_BLOCK_SIZE 64
+#define STACK_SIZE 32
+
+#define X0 $t0
+#define X1 $t1
+#define X2 $t2
+#define X3 $t3
+#define X4 $t4
+#define X5 $t5
+#define X6 $t6
+#define X7 $t7
+#define X8 $t8
+#define X9 $t9
+#define X10 $v1
+#define X11 $s6
+#define X12 $s5
+#define X13 $s4
+#define X14 $s3
+#define X15 $s2
+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
+#define T0 $s1
+#define T1 $s0
+#define T(n) T ## n
+#define X(n) X ## n
+
+/* Input arguments */
+#define STATE $a0
+#define OUT $a1
+#define IN $a2
+#define BYTES $a3
+
+/* Output argument */
+/* NONCE[0] is kept in a register and not in memory.
+ * We don't want to touch original value in memory.
+ * Must be incremented every loop iteration.
+ */
+#define NONCE_0 $v0
+
+/* SAVED_X and SAVED_CA are set in the jump table.
+ * Use regs which are overwritten on exit else we don't leak clear data.
+ * They are used to handling the last bytes which are not multiple of 4.
+ */
+#define SAVED_X X15
+#define SAVED_CA $s7
+
+#define IS_UNALIGNED $s7
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MSB 0
+#define LSB 3
+#define ROTx rotl
+#define ROTR(n) rotr n, 24
+#define CPU_TO_LE32(n) \
+ wsbh n; \
+ rotr n, 16;
+#else
+#define MSB 3
+#define LSB 0
+#define ROTx rotr
+#define CPU_TO_LE32(n)
+#define ROTR(n)
+#endif
+
+#define FOR_EACH_WORD(x) \
+ x( 0); \
+ x( 1); \
+ x( 2); \
+ x( 3); \
+ x( 4); \
+ x( 5); \
+ x( 6); \
+ x( 7); \
+ x( 8); \
+ x( 9); \
+ x(10); \
+ x(11); \
+ x(12); \
+ x(13); \
+ x(14); \
+ x(15);
+
+#define FOR_EACH_WORD_REV(x) \
+ x(15); \
+ x(14); \
+ x(13); \
+ x(12); \
+ x(11); \
+ x(10); \
+ x( 9); \
+ x( 8); \
+ x( 7); \
+ x( 6); \
+ x( 5); \
+ x( 4); \
+ x( 3); \
+ x( 2); \
+ x( 1); \
+ x( 0);
+
+#define PLUS_ONE_0 1
+#define PLUS_ONE_1 2
+#define PLUS_ONE_2 3
+#define PLUS_ONE_3 4
+#define PLUS_ONE_4 5
+#define PLUS_ONE_5 6
+#define PLUS_ONE_6 7
+#define PLUS_ONE_7 8
+#define PLUS_ONE_8 9
+#define PLUS_ONE_9 10
+#define PLUS_ONE_10 11
+#define PLUS_ONE_11 12
+#define PLUS_ONE_12 13
+#define PLUS_ONE_13 14
+#define PLUS_ONE_14 15
+#define PLUS_ONE_15 16
+#define PLUS_ONE(x) PLUS_ONE_ ## x
+#define _CONCAT3(a,b,c) a ## b ## c
+#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
+
+#define STORE_UNALIGNED(x) \
+CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
+ .endif; \
+ lwl T1, (x*4)+MSB ## (IN); \
+ lwr T1, (x*4)+LSB ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
+ .else; \
+ addu X ## x, T0; \
+ .endif; \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ swl X ## x, (x*4)+MSB ## (OUT); \
+ swr X ## x, (x*4)+LSB ## (OUT);
+
+#define STORE_ALIGNED(x) \
+CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
+ .endif; \
+ lw T1, (x*4) ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
+ .else; \
+ addu X ## x, T0; \
+ .endif; \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ sw X ## x, (x*4) ## (OUT);
+
+/* Jump table macro.
+ * Used for setup and handling the last bytes, which are not multiple of 4.
+ * X15 is free to store Xn
+ * Every jumptable entry must be equal in size.
+ */
+#define JMPTBL_ALIGNED(x) \
+.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
+ .set noreorder; \
+ b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
+ .if (x == 12); \
+ addu SAVED_X, X ## x, NONCE_0; \
+ .else; \
+ addu SAVED_X, X ## x, SAVED_CA; \
+ .endif; \
+ .set reorder
+
+#define JMPTBL_UNALIGNED(x) \
+.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
+ .set noreorder; \
+ b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
+ .if (x == 12); \
+ addu SAVED_X, X ## x, NONCE_0; \
+ .else; \
+ addu SAVED_X, X ## x, SAVED_CA; \
+ .endif; \
+ .set reorder
+
+#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
+ addu X(A), X(K); \
+ addu X(B), X(L); \
+ addu X(C), X(M); \
+ addu X(D), X(N); \
+ xor X(V), X(A); \
+ xor X(W), X(B); \
+ xor X(Y), X(C); \
+ xor X(Z), X(D); \
+ rotl X(V), S; \
+ rotl X(W), S; \
+ rotl X(Y), S; \
+ rotl X(Z), S;
+
+.text
+.set reorder
+.set noat
+.globl chacha20_mips
+.ent chacha20_mips
+chacha20_mips:
+ .frame $sp, STACK_SIZE, $ra
+
+ addiu $sp, -STACK_SIZE
+
+ /* Return bytes = 0. */
+ beqz BYTES, .Lchacha20_mips_end
+
+ lw NONCE_0, 48(STATE)
+
+ /* Save s0-s7 */
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+ sw $s4, 16($sp)
+ sw $s5, 20($sp)
+ sw $s6, 24($sp)
+ sw $s7, 28($sp)
+
+ /* Test IN or OUT is unaligned.
+ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
+ */
+ or IS_UNALIGNED, IN, OUT
+ andi IS_UNALIGNED, 0x3
+
+ /* Set number of rounds */
+ li $at, 20
+
+ b .Lchacha20_rounds_start
+
+.align 4
+.Loop_chacha20_rounds:
+ addiu IN, CHACHA20_BLOCK_SIZE
+ addiu OUT, CHACHA20_BLOCK_SIZE
+ addiu NONCE_0, 1
+
+.Lchacha20_rounds_start:
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+
+ move X12, NONCE_0
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_chacha20_xor_rounds:
+ addiu $at, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $at, .Loop_chacha20_xor_rounds
+
+ addiu BYTES, -(CHACHA20_BLOCK_SIZE)
+
+ /* Is data src/dst unaligned? Jump */
+ bnez IS_UNALIGNED, .Loop_chacha20_unaligned
+
+ /* Set number rounds here to fill delayslot. */
+ li $at, 20
+
+ /* BYTES < 0, it has no full block. */
+ bltz BYTES, .Lchacha20_mips_no_full_block_aligned
+
+ FOR_EACH_WORD_REV(STORE_ALIGNED)
+
+ /* BYTES > 0? Loop again. */
+ bgtz BYTES, .Loop_chacha20_rounds
+
+ /* Place this here to fill delay slot */
+ addiu NONCE_0, 1
+
+ /* BYTES < 0? Handle last bytes */
+ bltz BYTES, .Lchacha20_mips_xor_bytes
+
+.Lchacha20_mips_xor_done:
+ /* Restore used registers */
+ lw $s0, 0($sp)
+ lw $s1, 4($sp)
+ lw $s2, 8($sp)
+ lw $s3, 12($sp)
+ lw $s4, 16($sp)
+ lw $s5, 20($sp)
+ lw $s6, 24($sp)
+ lw $s7, 28($sp)
+
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
+.Lchacha20_mips_end:
+ addiu $sp, STACK_SIZE
+ jr $ra
+
+.Lchacha20_mips_no_full_block_aligned:
+ /* Restore the offset on BYTES */
+ addiu BYTES, CHACHA20_BLOCK_SIZE
+
+ /* Get number of full WORDS */
+ andi $at, BYTES, MASK_U32
+
+ /* Load upper half of jump table addr */
+ lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
+
+ /* Calculate lower half jump table offset */
+ ins T0, $at, 1, 6
+
+ /* Add offset to STATE */
+ addu T1, STATE, $at
+
+ /* Add lower half jump table addr */
+ addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
+
+ /* Read value from STATE */
+ lw SAVED_CA, 0(T1)
+
+ /* Store remaining bytecounter as negative value */
+ subu BYTES, $at, BYTES
+
+ jr T0
+
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_ALIGNED)
+
+
+.Loop_chacha20_unaligned:
+ /* Set number rounds here to fill delayslot. */
+ li $at, 20
+
+ /* BYTES > 0, it has no full block. */
+ bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
+
+ FOR_EACH_WORD_REV(STORE_UNALIGNED)
+
+ /* BYTES > 0? Loop again. */
+ bgtz BYTES, .Loop_chacha20_rounds
+
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
+ .set noreorder
+ /* Fall through to byte handling */
+ bgez BYTES, .Lchacha20_mips_xor_done
+.Lchacha20_mips_xor_unaligned_0_b:
+.Lchacha20_mips_xor_aligned_0_b:
+ /* Place this here to fill delay slot */
+ addiu NONCE_0, 1
+ .set reorder
+
+.Lchacha20_mips_xor_bytes:
+ addu IN, $at
+ addu OUT, $at
+ /* First byte */
+ lbu T1, 0(IN)
+ addiu $at, BYTES, 1
+ CPU_TO_LE32(SAVED_X)
+ ROTR(SAVED_X)
+ xor T1, SAVED_X
+ sb T1, 0(OUT)
+ beqz $at, .Lchacha20_mips_xor_done
+ /* Second byte */
+ lbu T1, 1(IN)
+ addiu $at, BYTES, 2
+ ROTx SAVED_X, 8
+ xor T1, SAVED_X
+ sb T1, 1(OUT)
+ beqz $at, .Lchacha20_mips_xor_done
+ /* Third byte */
+ lbu T1, 2(IN)
+ ROTx SAVED_X, 8
+ xor T1, SAVED_X
+ sb T1, 2(OUT)
+ b .Lchacha20_mips_xor_done
+
+.Lchacha20_mips_no_full_block_unaligned:
+ /* Restore the offset on BYTES */
+ addiu BYTES, CHACHA20_BLOCK_SIZE
+
+ /* Get number of full WORDS */
+ andi $at, BYTES, MASK_U32
+
+ /* Load upper half of jump table addr */
+ lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
+
+ /* Calculate lower half jump table offset */
+ ins T0, $at, 1, 6
+
+ /* Add offset to STATE */
+ addu T1, STATE, $at
+
+ /* Add lower half jump table addr */
+ addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
+
+ /* Read value from STATE */
+ lw SAVED_CA, 0(T1)
+
+ /* Store remaining bytecounter as negative value */
+ subu BYTES, $at, BYTES
+
+ jr T0
+
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_UNALIGNED)
+.end chacha20_mips
+.set at
--
2.18.2
From a171197a29c587a90f42392a3e23afc7b790576b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:17 +0100
Subject: [PATCH 011/100] crypto: mips/chacha - wire up accelerated 32r2 code
from Zinc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 upstream.
This integrates the accelerated MIPS 32r2 implementation of ChaCha
into both the API and library interfaces of the kernel crypto stack.
The significance of this is that, in addition to becoming available
as an accelerated library implementation, it can also be used by
existing crypto API code such as Adiantum (for block encryption on
ultra low performance cores) or IPsec using chacha20poly1305. These
are use cases that have already opted into using the abstract crypto
API. In order to support Adiantum, the core assembler routine has
been adapted to take the round count as a function argument rather
than hardcoding it to 20.
Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/mips/Makefile | 2 +-
arch/mips/crypto/Makefile | 4 +
arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++---------
arch/mips/crypto/chacha-glue.c | 150 +++++++++++++++++++++++++++++++
crypto/Kconfig | 6 ++
5 files changed, 277 insertions(+), 44 deletions(-)
create mode 100644 arch/mips/crypto/chacha-glue.c
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 5403a91ce098..573409c85c81 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -334,7 +334,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
# See arch/mips/Kbuild for content of core part of the kernel
core-y += arch/mips/
-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
+drivers-y += arch/mips/crypto/
drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
# suspend and hibernation support
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index e07aca572c2e..b528b9d300f1 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -4,3 +4,7 @@
#
obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
+chacha-mips-y := chacha-core.o chacha-glue.o
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
index a81e02db95e7..5755f69cfe00 100644
--- a/arch/mips/crypto/chacha-core.S
+++ b/arch/mips/crypto/chacha-core.S
@@ -125,7 +125,7 @@
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
#define STORE_UNALIGNED(x) \
-CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
swr X ## x, (x*4)+LSB ## (OUT);
#define STORE_ALIGNED(x) \
-CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
* Every jumptable entry must be equal in size.
*/
#define JMPTBL_ALIGNED(x) \
-.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
.set noreorder; \
- b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.set reorder
#define JMPTBL_UNALIGNED(x) \
-.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
.set noreorder; \
- b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.text
.set reorder
.set noat
-.globl chacha20_mips
-.ent chacha20_mips
-chacha20_mips:
+.globl chacha_crypt_arch
+.ent chacha_crypt_arch
+chacha_crypt_arch:
.frame $sp, STACK_SIZE, $ra
+ /* Load number of rounds */
+ lw $at, 16($sp)
+
addiu $sp, -STACK_SIZE
/* Return bytes = 0. */
- beqz BYTES, .Lchacha20_mips_end
+ beqz BYTES, .Lchacha_mips_end
lw NONCE_0, 48(STATE)
@@ -228,18 +231,15 @@ chacha20_mips:
or IS_UNALIGNED, IN, OUT
andi IS_UNALIGNED, 0x3
- /* Set number of rounds */
- li $at, 20
-
- b .Lchacha20_rounds_start
+ b .Lchacha_rounds_start
.align 4
-.Loop_chacha20_rounds:
+.Loop_chacha_rounds:
addiu IN, CHACHA20_BLOCK_SIZE
addiu OUT, CHACHA20_BLOCK_SIZE
addiu NONCE_0, 1
-.Lchacha20_rounds_start:
+.Lchacha_rounds_start:
lw X0, 0(STATE)
lw X1, 4(STATE)
lw X2, 8(STATE)
@@ -259,7 +259,7 @@ chacha20_mips:
lw X14, 56(STATE)
lw X15, 60(STATE)
-.Loop_chacha20_xor_rounds:
+.Loop_chacha_xor_rounds:
addiu $at, -2
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
@@ -269,31 +269,31 @@ chacha20_mips:
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
- bnez $at, .Loop_chacha20_xor_rounds
+ bnez $at, .Loop_chacha_xor_rounds
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
/* Is data src/dst unaligned? Jump */
- bnez IS_UNALIGNED, .Loop_chacha20_unaligned
+ bnez IS_UNALIGNED, .Loop_chacha_unaligned
/* Set number rounds here to fill delayslot. */
- li $at, 20
+ lw $at, (STACK_SIZE+16)($sp)
/* BYTES < 0, it has no full block. */
- bltz BYTES, .Lchacha20_mips_no_full_block_aligned
+ bltz BYTES, .Lchacha_mips_no_full_block_aligned
FOR_EACH_WORD_REV(STORE_ALIGNED)
/* BYTES > 0? Loop again. */
- bgtz BYTES, .Loop_chacha20_rounds
+ bgtz BYTES, .Loop_chacha_rounds
/* Place this here to fill delay slot */
addiu NONCE_0, 1
/* BYTES < 0? Handle last bytes */
- bltz BYTES, .Lchacha20_mips_xor_bytes
+ bltz BYTES, .Lchacha_mips_xor_bytes
-.Lchacha20_mips_xor_done:
+.Lchacha_mips_xor_done:
/* Restore used registers */
lw $s0, 0($sp)
lw $s1, 4($sp)
@@ -307,11 +307,11 @@ chacha20_mips:
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
-.Lchacha20_mips_end:
+.Lchacha_mips_end:
addiu $sp, STACK_SIZE
jr $ra
-.Lchacha20_mips_no_full_block_aligned:
+.Lchacha_mips_no_full_block_aligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
@@ -319,7 +319,7 @@ chacha20_mips:
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
- lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
@@ -328,7 +328,7 @@ chacha20_mips:
addu T1, STATE, $at
/* Add lower half jump table addr */
- addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
@@ -342,31 +342,31 @@ chacha20_mips:
FOR_EACH_WORD(JMPTBL_ALIGNED)
-.Loop_chacha20_unaligned:
+.Loop_chacha_unaligned:
/* Set number rounds here to fill delayslot. */
- li $at, 20
+ lw $at, (STACK_SIZE+16)($sp)
/* BYTES > 0, it has no full block. */
- bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned
FOR_EACH_WORD_REV(STORE_UNALIGNED)
/* BYTES > 0? Loop again. */
- bgtz BYTES, .Loop_chacha20_rounds
+ bgtz BYTES, .Loop_chacha_rounds
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
.set noreorder
/* Fall through to byte handling */
- bgez BYTES, .Lchacha20_mips_xor_done
-.Lchacha20_mips_xor_unaligned_0_b:
-.Lchacha20_mips_xor_aligned_0_b:
+ bgez BYTES, .Lchacha_mips_xor_done
+.Lchacha_mips_xor_unaligned_0_b:
+.Lchacha_mips_xor_aligned_0_b:
/* Place this here to fill delay slot */
addiu NONCE_0, 1
.set reorder
-.Lchacha20_mips_xor_bytes:
+.Lchacha_mips_xor_bytes:
addu IN, $at
addu OUT, $at
/* First byte */
@@ -376,22 +376,22 @@ chacha20_mips:
ROTR(SAVED_X)
xor T1, SAVED_X
sb T1, 0(OUT)
- beqz $at, .Lchacha20_mips_xor_done
+ beqz $at, .Lchacha_mips_xor_done
/* Second byte */
lbu T1, 1(IN)
addiu $at, BYTES, 2
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 1(OUT)
- beqz $at, .Lchacha20_mips_xor_done
+ beqz $at, .Lchacha_mips_xor_done
/* Third byte */
lbu T1, 2(IN)
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 2(OUT)
- b .Lchacha20_mips_xor_done
+ b .Lchacha_mips_xor_done
-.Lchacha20_mips_no_full_block_unaligned:
+.Lchacha_mips_no_full_block_unaligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
@@ -399,7 +399,7 @@ chacha20_mips:
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
- lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
@@ -408,7 +408,7 @@ chacha20_mips:
addu T1, STATE, $at
/* Add lower half jump table addr */
- addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
@@ -420,5 +420,78 @@ chacha20_mips:
/* Jump table */
FOR_EACH_WORD(JMPTBL_UNALIGNED)
-.end chacha20_mips
+.end chacha_crypt_arch
+.set at
+
+/* Input arguments
+ * STATE $a0
+ * OUT $a1
+ * NROUND $a2
+ */
+
+#undef X12
+#undef X13
+#undef X14
+#undef X15
+
+#define X12 $a3
+#define X13 $at
+#define X14 $v0
+#define X15 STATE
+
+.set noat
+.globl hchacha_block_arch
+.ent hchacha_block_arch
+hchacha_block_arch:
+ .frame $sp, STACK_SIZE, $ra
+
+ addiu $sp, -STACK_SIZE
+
+ /* Save X11(s6) */
+ sw X11, 0($sp)
+
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+ lw X12, 48(STATE)
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_hchacha_xor_rounds:
+ addiu $a2, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $a2, .Loop_hchacha_xor_rounds
+
+ /* Restore used register */
+ lw X11, 0($sp)
+
+ sw X0, 0(OUT)
+ sw X1, 4(OUT)
+ sw X2, 8(OUT)
+ sw X3, 12(OUT)
+ sw X12, 16(OUT)
+ sw X13, 20(OUT)
+ sw X14, 24(OUT)
+ sw X15, 28(OUT)
+
+ addiu $sp, STACK_SIZE
+ jr $ra
+.end hchacha_block_arch
.set at
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
new file mode 100644
index 000000000000..779e399c9bef
--- /dev/null
+++ b/arch/mips/crypto/chacha-glue.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPS accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/byteorder.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds);
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+static int chacha_mips_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int chacha_mips(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_mips_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_mips(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ hchacha_block(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_mips_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_mips,
+ .decrypt = chacha_mips,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_mips,
+ .decrypt = xchacha_mips,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_mips,
+ .decrypt = xchacha_mips,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-mips");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 649dc564f242..6b5e14cee475 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1423,6 +1423,12 @@ config CRYPTO_CHACHA20_X86_64
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
XChaCha20, and XChaCha12 stream ciphers.
+config CRYPTO_CHACHA_MIPS
+ tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)"
+ depends on CPU_MIPS32_R2
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
config CRYPTO_SEED
tristate "SEED cipher algorithm"
select CRYPTO_ALGAPI
--
2.18.2
From b455a56d1c9035f7ca22428941755ec376189d14 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:18 +0100
Subject: [PATCH 012/100] crypto: chacha - unexport chacha_generic routines
commit 22cf705360707ced15f9fe5423938f313c7df536 upstream.
Now that all users of generic ChaCha code have moved to the core library,
there is no longer a need for the generic ChaCha skcpiher driver to
export parts of it implementation for reuse by other drivers. So drop
the exports, and make the symbols static.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/chacha_generic.c | 26 ++++++++------------------
include/crypto/internal/chacha.h | 10 ----------
2 files changed, 8 insertions(+), 28 deletions(-)
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
index ebae6d9d9b32..c1b147318393 100644
--- a/crypto/chacha_generic.c
+++ b/crypto/chacha_generic.c
@@ -21,7 +21,7 @@ static int chacha_stream_xor(struct skcipher_request *req,
err = skcipher_walk_virt(&walk, req, false);
- crypto_chacha_init(state, ctx, iv);
+ chacha_init_generic(state, ctx->key, iv);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
@@ -37,36 +37,27 @@ static int chacha_stream_xor(struct skcipher_request *req,
return err;
}
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
-{
- chacha_init_generic(state, ctx->key, iv);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha_init);
-
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
{
return chacha_setkey(tfm, key, keysize, 20);
}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
+static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
{
return chacha_setkey(tfm, key, keysize, 12);
}
-EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
-int crypto_chacha_crypt(struct skcipher_request *req)
+static int crypto_chacha_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
return chacha_stream_xor(req, ctx, req->iv);
}
-EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
-int crypto_xchacha_crypt(struct skcipher_request *req)
+static int crypto_xchacha_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -75,7 +66,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
u8 real_iv[16];
/* Compute the subkey given the original key and first 128 nonce bits */
- crypto_chacha_init(state, ctx, req->iv);
+ chacha_init_generic(state, ctx->key, req->iv);
hchacha_block_generic(state, subctx.key, ctx->nrounds);
subctx.nrounds = ctx->nrounds;
@@ -86,7 +77,6 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
/* Generate the stream and XOR it with the data */
return chacha_stream_xor(req, &subctx, real_iv);
}
-EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
static struct skcipher_alg algs[] = {
{
diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
index c0e40b245431..aa5d4a16aac5 100644
--- a/include/crypto/internal/chacha.h
+++ b/include/crypto/internal/chacha.h
@@ -12,8 +12,6 @@ struct chacha_ctx {
int nrounds;
};
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
-
static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keysize, int nrounds)
{
@@ -42,12 +40,4 @@ static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
return chacha_setkey(tfm, key, keysize, 12);
}
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize);
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize);
-
-int crypto_chacha_crypt(struct skcipher_request *req);
-int crypto_xchacha_crypt(struct skcipher_request *req);
-
#endif /* _CRYPTO_CHACHA_H */
--
2.18.2
From 015fca1be79e9f688c936524100503689d100f8c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:19 +0100
Subject: [PATCH 013/100] crypto: poly1305 - move core routines into a separate
library
commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream.
Move the core Poly1305 routines shared between the generic Poly1305
shash driver and the Adiantum and NHPoly1305 drivers into a separate
library so that using just this pieces does not pull in the crypto
API pieces of the generic Poly1305 routine.
In a subsequent patch, we will augment this generic library with
init/update/final routines so that Poyl1305 algorithm can be used
directly without the need for using the crypto API's shash abstraction.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305_glue.c | 2 +-
crypto/Kconfig | 5 +-
crypto/adiantum.c | 5 +-
crypto/nhpoly1305.c | 3 +-
crypto/poly1305_generic.c | 195 ++---------------------------
include/crypto/internal/poly1305.h | 67 ++++++++++
include/crypto/poly1305.h | 23 ----
lib/crypto/Kconfig | 3 +
lib/crypto/Makefile | 3 +
lib/crypto/poly1305.c | 158 +++++++++++++++++++++++
10 files changed, 248 insertions(+), 216 deletions(-)
create mode 100644 include/crypto/internal/poly1305.h
create mode 100644 lib/crypto/poly1305.c
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4a1c05dce950..6ccf8eb26324 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -7,8 +7,8 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
-#include <crypto/poly1305.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 6b5e14cee475..b70b9d7c6e2f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP
config CRYPTO_NHPOLY1305
tristate
select CRYPTO_HASH
- select CRYPTO_POLY1305
+ select CRYPTO_LIB_POLY1305_GENERIC
config CRYPTO_NHPOLY1305_SSE2
tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
@@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2
config CRYPTO_ADIANTUM
tristate "Adiantum support"
select CRYPTO_CHACHA20
- select CRYPTO_POLY1305
+ select CRYPTO_LIB_POLY1305_GENERIC
select CRYPTO_NHPOLY1305
select CRYPTO_MANAGER
help
@@ -686,6 +686,7 @@ config CRYPTO_GHASH
config CRYPTO_POLY1305
tristate "Poly1305 authenticator algorithm"
select CRYPTO_HASH
+ select CRYPTO_LIB_POLY1305_GENERIC
help
Poly1305 authenticator algorithm, RFC7539.
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
index 395a3ddd3707..aded26092268 100644
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@ -33,6 +33,7 @@
#include <crypto/b128ops.h>
#include <crypto/chacha.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
#include <crypto/internal/skcipher.h>
#include <crypto/nhpoly1305.h>
#include <crypto/scatterwalk.h>
@@ -242,11 +243,11 @@ static void adiantum_hash_header(struct skcipher_request *req)
BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
poly1305_core_blocks(&state, &tctx->header_hash_key,
- &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+ &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
- TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+ TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
poly1305_core_emit(&state, &rctx->header_hash);
}
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
index 9ab4e07cde4d..f6b6a52092b4 100644
--- a/crypto/nhpoly1305.c
+++ b/crypto/nhpoly1305.c
@@ -33,6 +33,7 @@
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
#include <crypto/nhpoly1305.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
@@ -78,7 +79,7 @@ static void process_nh_hash_value(struct nhpoly1305_state *state,
BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
- NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
+ NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
}
/*
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index adc40298c749..067f493c2504 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -13,27 +13,12 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
+#include <crypto/internal/poly1305.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/unaligned.h>
-static inline u64 mlt(u64 a, u64 b)
-{
- return a * b;
-}
-
-static inline u32 sr(u64 v, u_char n)
-{
- return v >> n;
-}
-
-static inline u32 and(u32 v, u32 mask)
-{
- return v & mask;
-}
-
int crypto_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_desc *desc)
}
EXPORT_SYMBOL_GPL(crypto_poly1305_init);
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
-{
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
-}
-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
-
-/*
- * Poly1305 requires a unique key for each tag, which implies that we can't set
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
- * expect the key as the first 32 bytes in the update() call.
- */
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- if (!dctx->sset) {
- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_core_setkey(&dctx->r, src);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->rset = true;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- }
- return srclen;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
-
-static void poly1305_blocks_internal(struct poly1305_state *state,
- const struct poly1305_key *key,
- const void *src, unsigned int nblocks,
- u32 hibit)
-{
- u32 r0, r1, r2, r3, r4;
- u32 s1, s2, s3, s4;
- u32 h0, h1, h2, h3, h4;
- u64 d0, d1, d2, d3, d4;
-
- if (!nblocks)
- return;
-
- r0 = key->r[0];
- r1 = key->r[1];
- r2 = key->r[2];
- r3 = key->r[3];
- r4 = key->r[4];
-
- s1 = r1 * 5;
- s2 = r2 * 5;
- s3 = r3 * 5;
- s4 = r4 * 5;
-
- h0 = state->h[0];
- h1 = state->h[1];
- h2 = state->h[2];
- h3 = state->h[3];
- h4 = state->h[4];
-
- do {
- /* h += m[i] */
- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
- h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
-
- /* h *= r */
- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
- mlt(h3, s2) + mlt(h4, s1);
- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
- mlt(h3, s3) + mlt(h4, s2);
- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
- mlt(h3, s4) + mlt(h4, s3);
- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
- mlt(h3, r0) + mlt(h4, s4);
- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
- mlt(h3, r1) + mlt(h4, r0);
-
- /* (partial) h %= p */
- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
-
- src += POLY1305_BLOCK_SIZE;
- } while (--nblocks);
-
- state->h[0] = h0;
- state->h[1] = h1;
- state->h[2] = h2;
- state->h[3] = h3;
- state->h[4] = h4;
-}
-
-void poly1305_core_blocks(struct poly1305_state *state,
- const struct poly1305_key *key,
- const void *src, unsigned int nblocks)
-{
- poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
-
-static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen, u32 hibit)
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int srclen)
{
unsigned int datalen;
@@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
srclen = datalen;
}
- poly1305_blocks_internal(&dctx->h, &dctx->r,
- src, srclen / POLY1305_BLOCK_SIZE, hibit);
+ poly1305_core_blocks(&dctx->h, &dctx->r, src,
+ srclen / POLY1305_BLOCK_SIZE, 1);
}
int crypto_poly1305_update(struct shash_desc *desc,
@@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_desc *desc,
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
poly1305_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE, 1 << 24);
+ POLY1305_BLOCK_SIZE);
dctx->buflen = 0;
}
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- poly1305_blocks(dctx, src, srclen, 1 << 24);
+ poly1305_blocks(dctx, src, srclen);
src += srclen - (srclen % POLY1305_BLOCK_SIZE);
srclen %= POLY1305_BLOCK_SIZE;
}
@@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_desc *desc,
}
EXPORT_SYMBOL_GPL(crypto_poly1305_update);
-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
-{
- u32 h0, h1, h2, h3, h4;
- u32 g0, g1, g2, g3, g4;
- u32 mask;
-
- /* fully carry h */
- h0 = state->h[0];
- h1 = state->h[1];
- h2 = state->h[2];
- h3 = state->h[3];
- h4 = state->h[4];
-
- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
-
- /* compute h + -p */
- g0 = h0 + 5;
- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
- /* select h if h < p, or h + -p if h >= p */
- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
- g0 &= mask;
- g1 &= mask;
- g2 &= mask;
- g3 &= mask;
- g4 &= mask;
- mask = ~mask;
- h0 = (h0 & mask) | g0;
- h1 = (h1 & mask) | g1;
- h2 = (h2 & mask) | g2;
- h3 = (h3 & mask) | g3;
- h4 = (h4 & mask) | g4;
-
- /* h = h % (2^128) */
- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_emit);
-
int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0,
POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
}
poly1305_core_emit(&dctx->h, digest);
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
new file mode 100644
index 000000000000..cb58e61f73a7
--- /dev/null
+++ b/include/crypto/internal/poly1305.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for the Poly1305 algorithm
+ */
+
+#ifndef _CRYPTO_INTERNAL_POLY1305_H
+#define _CRYPTO_INTERNAL_POLY1305_H
+
+#include <asm/unaligned.h>
+#include <linux/types.h>
+#include <crypto/poly1305.h>
+
+struct shash_desc;
+
+/*
+ * Poly1305 core functions. These implement the ε-almost-∆-universal hash
+ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
+ * ("s key") at the end. They also only support block-aligned inputs.
+ */
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
+static inline void poly1305_core_init(struct poly1305_state *state)
+{
+ *state = (struct poly1305_state){};
+}
+
+void poly1305_core_blocks(struct poly1305_state *state,
+ const struct poly1305_key *key, const void *src,
+ unsigned int nblocks, u32 hibit);
+void poly1305_core_emit(const struct poly1305_state *state, void *dst);
+
+/* Crypto API helper functions for the Poly1305 MAC */
+int crypto_poly1305_init(struct shash_desc *desc);
+
+int crypto_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen);
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
+
+/*
+ * Poly1305 requires a unique key for each tag, which implies that we can't set
+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
+ * expect the key as the first 32 bytes in the update() call.
+ */
+static inline
+unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ if (!dctx->sset) {
+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
+ poly1305_core_setkey(&dctx->r, src);
+ src += POLY1305_BLOCK_SIZE;
+ srclen -= POLY1305_BLOCK_SIZE;
+ dctx->rset = true;
+ }
+ if (srclen >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ srclen -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ }
+ return srclen;
+}
+
+#endif
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 34317ed2071e..f5a4319c2a1f 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -38,27 +38,4 @@ struct poly1305_desc_ctx {
bool sset;
};
-/*
- * Poly1305 core functions. These implement the ε-almost-∆-universal hash
- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
- * ("s key") at the end. They also only support block-aligned inputs.
- */
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
-static inline void poly1305_core_init(struct poly1305_state *state)
-{
- memset(state->h, 0, sizeof(state->h));
-}
-void poly1305_core_blocks(struct poly1305_state *state,
- const struct poly1305_key *key,
- const void *src, unsigned int nblocks);
-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
-
-/* Crypto API helper functions for the Poly1305 MAC */
-int crypto_poly1305_init(struct shash_desc *desc);
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
-
#endif
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 6a11931ae105..c4882d29879e 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA
config CRYPTO_LIB_DES
tristate
+config CRYPTO_LIB_POLY1305_GENERIC
+ tristate
+
config CRYPTO_LIB_SHA256
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 0ce40604e104..b58ab6843a9d 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -13,5 +13,8 @@ libarc4-y := arc4.o
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
libdes-y := des.o
+obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
+libpoly1305-y := poly1305.o
+
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
new file mode 100644
index 000000000000..f019a57dbc1b
--- /dev/null
+++ b/lib/crypto/poly1305.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ */
+
+#include <crypto/internal/poly1305.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+
+static inline u64 mlt(u64 a, u64 b)
+{
+ return a * b;
+}
+
+static inline u32 sr(u64 v, u_char n)
+{
+ return v >> n;
+}
+
+static inline u32 and(u32 v, u32 mask)
+{
+ return v & mask;
+}
+
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
+{
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
+ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
+ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
+ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
+ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
+}
+EXPORT_SYMBOL_GPL(poly1305_core_setkey);
+
+void poly1305_core_blocks(struct poly1305_state *state,
+ const struct poly1305_key *key, const void *src,
+ unsigned int nblocks, u32 hibit)
+{
+ u32 r0, r1, r2, r3, r4;
+ u32 s1, s2, s3, s4;
+ u32 h0, h1, h2, h3, h4;
+ u64 d0, d1, d2, d3, d4;
+
+ if (!nblocks)
+ return;
+
+ r0 = key->r[0];
+ r1 = key->r[1];
+ r2 = key->r[2];
+ r3 = key->r[3];
+ r4 = key->r[4];
+
+ s1 = r1 * 5;
+ s2 = r2 * 5;
+ s3 = r3 * 5;
+ s4 = r4 * 5;
+
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+
+ do {
+ /* h += m[i] */
+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
+ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
+
+ /* h *= r */
+ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
+ mlt(h3, s2) + mlt(h4, s1);
+ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
+ mlt(h3, s3) + mlt(h4, s2);
+ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
+ mlt(h3, s4) + mlt(h4, s3);
+ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
+ mlt(h3, r0) + mlt(h4, s4);
+ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
+ mlt(h3, r1) + mlt(h4, r0);
+
+ /* (partial) h %= p */
+ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
+ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
+ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
+ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
+ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
+ h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
+
+ src += POLY1305_BLOCK_SIZE;
+ } while (--nblocks);
+
+ state->h[0] = h0;
+ state->h[1] = h1;
+ state->h[2] = h2;
+ state->h[3] = h3;
+ state->h[4] = h4;
+}
+EXPORT_SYMBOL_GPL(poly1305_core_blocks);
+
+void poly1305_core_emit(const struct poly1305_state *state, void *dst)
+{
+ u32 h0, h1, h2, h3, h4;
+ u32 g0, g1, g2, g3, g4;
+ u32 mask;
+
+ /* fully carry h */
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+
+ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
+ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
+ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
+ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
+ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
+ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
+ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
+ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
+ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
+ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
+ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_emit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
--
2.18.2
From 67f694baa2784551f3a5b0402fa53b17feed3009 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:20 +0100
Subject: [PATCH 014/100] crypto: x86/poly1305 - unify Poly1305 state struct
with generic code
commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
In preparation of exposing a Poly1305 library interface directly from
the accelerated x86 driver, align the state descriptor of the x86 code
with the one used by the generic driver. This is needed to make the
library interface unified between all implementations.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305_glue.c | 88 ++++++++++--------------------
crypto/poly1305_generic.c | 6 +-
include/crypto/internal/poly1305.h | 4 +-
include/crypto/poly1305.h | 18 +++---
4 files changed, 43 insertions(+), 73 deletions(-)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 6ccf8eb26324..b43b93c95e79 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -14,40 +14,14 @@
#include <linux/module.h>
#include <asm/simd.h>
-struct poly1305_simd_desc_ctx {
- struct poly1305_desc_ctx base;
- /* derived key u set? */
- bool uset;
-#ifdef CONFIG_AS_AVX2
- /* derived keys r^3, r^4 set? */
- bool wset;
-#endif
- /* derived Poly1305 key r^2 */
- u32 u[5];
- /* ... silently appended r^3 and r^4 when using AVX2 */
-};
-
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
const u32 *r, unsigned int blocks);
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
-static int poly1305_simd_init(struct shash_desc *desc)
-{
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
- sctx->wset = false;
-#endif
-
- return crypto_poly1305_init(desc);
-}
+static bool poly1305_use_avx2 __ro_after_init;
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
- struct poly1305_simd_desc_ctx *sctx;
unsigned int blocks, datalen;
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
-
if (unlikely(!dctx->sset)) {
datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
src += srclen - datalen;
srclen = datalen;
}
-#ifdef CONFIG_AS_AVX2
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
- if (unlikely(!sctx->wset)) {
- if (!sctx->uset) {
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r.r);
- sctx->uset = true;
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ poly1305_use_avx2 &&
+ srclen >= POLY1305_BLOCK_SIZE * 4) {
+ if (unlikely(dctx->rset < 4)) {
+ if (dctx->rset < 2) {
+ dctx->r[1] = dctx->r[0];
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
}
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
- sctx->wset = true;
+ dctx->r[2] = dctx->r[1];
+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
+ dctx->r[3] = dctx->r[2];
+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
+ dctx->rset = 4;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
- sctx->u);
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
+ dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 4 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
}
-#endif
+
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
- if (unlikely(!sctx->uset)) {
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r.r);
- sctx->uset = true;
+ if (unlikely(dctx->rset < 2)) {
+ dctx->r[1] = dctx->r[0];
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
+ dctx->rset = 2;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
- sctx->u);
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
+ blocks, dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 2 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
srclen -= POLY1305_BLOCK_SIZE;
}
return srclen;
@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct shash_desc *desc,
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
- .init = poly1305_simd_init,
+ .init = crypto_poly1305_init,
.update = poly1305_simd_update,
.final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
+ .descsize = sizeof(struct poly1305_desc_ctx),
.base = {
.cra_name = "poly1305",
.cra_driver_name = "poly1305-simd",
@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init(void)
if (!boot_cpu_has(X86_FEATURE_XMM2))
return -ENODEV;
-#ifdef CONFIG_AS_AVX2
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
+ alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
if (poly1305_use_avx2)
alg.descsize += 10 * sizeof(u32);
-#endif
+
return crypto_register_shash(&alg);
}
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 067f493c2504..f3fcd9578a47 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
poly1305_core_init(&dctx->h);
dctx->buflen = 0;
- dctx->rset = false;
+ dctx->rset = 0;
dctx->sset = false;
return 0;
@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
srclen = datalen;
}
- poly1305_core_blocks(&dctx->h, &dctx->r, src,
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
srclen / POLY1305_BLOCK_SIZE, 1);
}
@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0,
POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
+ poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
}
poly1305_core_emit(&dctx->h, digest);
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
index cb58e61f73a7..04fa269e5534 100644
--- a/include/crypto/internal/poly1305.h
+++ b/include/crypto/internal/poly1305.h
@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
{
if (!dctx->sset) {
if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_core_setkey(&dctx->r, src);
+ poly1305_core_setkey(dctx->r, src);
src += POLY1305_BLOCK_SIZE;
srclen -= POLY1305_BLOCK_SIZE;
- dctx->rset = true;
+ dctx->rset = 1;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
dctx->s[0] = get_unaligned_le32(src + 0);
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index f5a4319c2a1f..36b5886cb50c 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -22,20 +22,20 @@ struct poly1305_state {
};
struct poly1305_desc_ctx {
- /* key */
- struct poly1305_key r;
- /* finalize key */
- u32 s[4];
- /* accumulator */
- struct poly1305_state h;
/* partial buffer */
u8 buf[POLY1305_BLOCK_SIZE];
/* bytes used in partial buffer */
unsigned int buflen;
- /* r key has been set */
- bool rset;
- /* s key has been set */
+ /* how many keys have been set in r[] */
+ unsigned short rset;
+ /* whether s[] has been set */
bool sset;
+ /* finalize key */
+ u32 s[4];
+ /* accumulator */
+ struct poly1305_state h;
+ /* key */
+ struct poly1305_key r[1];
};
#endif
--
2.18.2
From 355c32dc47fefdec66a4e6cb58ce5501d70d3a42 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:21 +0100
Subject: [PATCH 015/100] crypto: poly1305 - expose init/update/final library
interface
commit a1d93064094cc5e24d64e35cf093e7191d0c9344 upstream.
Expose the existing generic Poly1305 code via a init/update/final
library interface so that callers are not required to go through
the crypto API's shash abstraction to access it. At the same time,
make some preparations so that the library implementation can be
superseded by an accelerated arch-specific version in the future.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/poly1305_generic.c | 22 +-----------
include/crypto/poly1305.h | 38 +++++++++++++++++++-
lib/crypto/Kconfig | 26 ++++++++++++++
lib/crypto/poly1305.c | 74 +++++++++++++++++++++++++++++++++++++++
4 files changed, 138 insertions(+), 22 deletions(-)
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index f3fcd9578a47..afe9a9e576dd 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -85,31 +85,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update);
int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- __le32 digest[4];
- u64 f = 0;
if (unlikely(!dctx->sset))
return -ENOKEY;
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
- }
-
- poly1305_core_emit(&dctx->h, digest);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
- put_unaligned_le32(f, dst + 0);
- f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
- put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
- put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
- put_unaligned_le32(f, dst + 12);
-
+ poly1305_final_generic(dctx, dst);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_poly1305_final);
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 36b5886cb50c..74c6e1cd73ee 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -35,7 +35,43 @@ struct poly1305_desc_ctx {
/* accumulator */
struct poly1305_state h;
/* key */
- struct poly1305_key r[1];
+ struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
};
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
+void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key);
+
+static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_init_arch(desc, key);
+ else
+ poly1305_init_generic(desc, key);
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src,
+ unsigned int nbytes);
+void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
+ unsigned int nbytes);
+
+static inline void poly1305_update(struct poly1305_desc_ctx *desc,
+ const u8 *src, unsigned int nbytes)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_update_arch(desc, src, nbytes);
+ else
+ poly1305_update_generic(desc, src, nbytes);
+}
+
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest);
+void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest);
+
+static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_final_arch(desc, digest);
+ else
+ poly1305_final_generic(desc, digest);
+}
+
#endif
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index c4882d29879e..a731ea36bd5c 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -37,8 +37,34 @@ config CRYPTO_LIB_CHACHA
config CRYPTO_LIB_DES
tristate
+config CRYPTO_LIB_POLY1305_RSIZE
+ int
+ default 1
+
+config CRYPTO_ARCH_HAVE_LIB_POLY1305
+ tristate
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the Poly1305 library interface,
+ either builtin or as a module.
+
config CRYPTO_LIB_POLY1305_GENERIC
tristate
+ help
+ This symbol can be depended upon by arch implementations of the
+ Poly1305 library interface that require the generic code as a
+ fallback, e.g., for SIMD implementations. If no arch specific
+ implementation is enabled, this implementation serves the users
+ of CRYPTO_LIB_POLY1305.
+
+config CRYPTO_LIB_POLY1305
+ tristate "Poly1305 library interface"
+ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
+ select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n
+ help
+ Enable the Poly1305 library interface. This interface may be fulfilled
+ by either the generic implementation or an arch-specific one, if one
+ is available and enabled.
config CRYPTO_LIB_SHA256
tristate
diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
index f019a57dbc1b..32ec293c65ae 100644
--- a/lib/crypto/poly1305.c
+++ b/lib/crypto/poly1305.c
@@ -154,5 +154,79 @@ void poly1305_core_emit(const struct poly1305_state *state, void *dst)
}
EXPORT_SYMBOL_GPL(poly1305_core_emit);
+void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
+{
+ poly1305_core_setkey(desc->r, key);
+ desc->s[0] = get_unaligned_le32(key + 16);
+ desc->s[1] = get_unaligned_le32(key + 20);
+ desc->s[2] = get_unaligned_le32(key + 24);
+ desc->s[3] = get_unaligned_le32(key + 28);
+ poly1305_core_init(&desc->h);
+ desc->buflen = 0;
+ desc->sset = true;
+ desc->rset = 1;
+}
+EXPORT_SYMBOL_GPL(poly1305_init_generic);
+
+void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
+ unsigned int nbytes)
+{
+ unsigned int bytes;
+
+ if (unlikely(desc->buflen)) {
+ bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen);
+ memcpy(desc->buf + desc->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ desc->buflen += bytes;
+
+ if (desc->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1);
+ desc->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ poly1305_core_blocks(&desc->h, desc->r, src,
+ nbytes / POLY1305_BLOCK_SIZE, 1);
+ src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ desc->buflen = nbytes;
+ memcpy(desc->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL_GPL(poly1305_update_generic);
+
+void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(desc->buflen)) {
+ desc->buf[desc->buflen++] = 1;
+ memset(desc->buf + desc->buflen, 0,
+ POLY1305_BLOCK_SIZE - desc->buflen);
+ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0);
+ }
+
+ poly1305_core_emit(&desc->h, digest);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
+ put_unaligned_le32(f, dst + 0);
+ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
+ put_unaligned_le32(f, dst + 12);
+
+ *desc = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL_GPL(poly1305_final_generic);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
--
2.18.2
From 7998253646414533f2b58947258628dd83cd380b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:22 +0100
Subject: [PATCH 016/100] crypto: x86/poly1305 - depend on generic library not
generic shash
commit 1b2c6a5120489d41c8ea3b8dacd0b4586289b158 upstream.
Remove the dependency on the generic Poly1305 driver. Instead, depend
on the generic library so that we only reuse code without pulling in
the generic skcipher implementation as well.
While at it, remove the logic that prefers the non-SIMD path for short
inputs - this is no longer necessary after recent FPU handling changes
on x86.
Since this removes the last remaining user of the routines exported
by the generic shash driver, unexport them and make them static.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305_glue.c | 66 +++++++++++++++++++++++++-----
crypto/Kconfig | 2 +-
crypto/poly1305_generic.c | 11 ++---
include/crypto/internal/poly1305.h | 9 ----
4 files changed, 60 insertions(+), 28 deletions(-)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index b43b93c95e79..a5b3a054604c 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -34,6 +34,24 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
poly1305_block_sse2(a, m, b, 1);
}
+static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ unsigned int datalen;
+
+ if (unlikely(!dctx->sset)) {
+ datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+ src += srclen - datalen;
+ srclen = datalen;
+ }
+ if (srclen >= POLY1305_BLOCK_SIZE) {
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
+ srclen / POLY1305_BLOCK_SIZE, 1);
+ srclen %= POLY1305_BLOCK_SIZE;
+ }
+ return srclen;
+}
+
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
@@ -91,12 +109,6 @@ static int poly1305_simd_update(struct shash_desc *desc,
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int bytes;
- /* kernel_fpu_begin/end is costly, use fallback for small updates */
- if (srclen <= 288 || !crypto_simd_usable())
- return crypto_poly1305_update(desc, src, srclen);
-
- kernel_fpu_begin();
-
if (unlikely(dctx->buflen)) {
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
memcpy(dctx->buf + dctx->buflen, src, bytes);
@@ -105,25 +117,57 @@ static int poly1305_simd_update(struct shash_desc *desc,
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_simd_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
+ if (likely(crypto_simd_usable())) {
+ kernel_fpu_begin();
+ poly1305_simd_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE);
+ kernel_fpu_end();
+ } else {
+ poly1305_scalar_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE);
+ }
dctx->buflen = 0;
}
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = poly1305_simd_blocks(dctx, src, srclen);
+ if (likely(crypto_simd_usable())) {
+ kernel_fpu_begin();
+ bytes = poly1305_simd_blocks(dctx, src, srclen);
+ kernel_fpu_end();
+ } else {
+ bytes = poly1305_scalar_blocks(dctx, src, srclen);
+ }
src += srclen - bytes;
srclen = bytes;
}
- kernel_fpu_end();
-
if (unlikely(srclen)) {
dctx->buflen = srclen;
memcpy(dctx->buf, src, srclen);
}
+}
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ poly1305_core_init(&dctx->h);
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+ poly1305_final_generic(dctx, dst);
return 0;
}
diff --git a/crypto/Kconfig b/crypto/Kconfig
index b70b9d7c6e2f..6178aa627141 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -697,7 +697,7 @@ config CRYPTO_POLY1305
config CRYPTO_POLY1305_X86_64
tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
depends on X86 && 64BIT
- select CRYPTO_POLY1305
+ select CRYPTO_LIB_POLY1305_GENERIC
help
Poly1305 authenticator algorithm, RFC7539.
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index afe9a9e576dd..21edbd8c99fb 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -19,7 +19,7 @@
#include <linux/module.h>
#include <asm/unaligned.h>
-int crypto_poly1305_init(struct shash_desc *desc)
+static int crypto_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -30,7 +30,6 @@ int crypto_poly1305_init(struct shash_desc *desc)
return 0;
}
-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int srclen)
@@ -47,8 +46,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
srclen / POLY1305_BLOCK_SIZE, 1);
}
-int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+static int crypto_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int bytes;
@@ -80,9 +79,8 @@ int crypto_poly1305_update(struct shash_desc *desc,
return 0;
}
-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -92,7 +90,6 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
poly1305_final_generic(dctx, dst);
return 0;
}
-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
static struct shash_alg poly1305_alg = {
.digestsize = POLY1305_DIGEST_SIZE,
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
index 04fa269e5534..479b0cab2a1a 100644
--- a/include/crypto/internal/poly1305.h
+++ b/include/crypto/internal/poly1305.h
@@ -10,8 +10,6 @@
#include <linux/types.h>
#include <crypto/poly1305.h>
-struct shash_desc;
-
/*
* Poly1305 core functions. These implement the ε-almost-∆-universal hash
* function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
@@ -28,13 +26,6 @@ void poly1305_core_blocks(struct poly1305_state *state,
unsigned int nblocks, u32 hibit);
void poly1305_core_emit(const struct poly1305_state *state, void *dst);
-/* Crypto API helper functions for the Poly1305 MAC */
-int crypto_poly1305_init(struct shash_desc *desc);
-
-int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
-
/*
* Poly1305 requires a unique key for each tag, which implies that we can't set
* it on the tfm that gets accessed by multiple users simultaneously. Instead we
--
2.18.2
From 42b7d909e3dc65ee679c5820954fddc02a89b250 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:23 +0100
Subject: [PATCH 017/100] crypto: x86/poly1305 - expose existing driver as
poly1305 library
commit f0e89bcfbb894e5844cd1bbf6b3cf7c63cb0f5ac upstream.
Implement the arch init/update/final Poly1305 library routines in the
accelerated SIMD driver for x86 so they are accessible to users of
the Poly1305 library interface as well.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305_glue.c | 57 ++++++++++++++++++++++++---------
crypto/Kconfig | 1 +
lib/crypto/Kconfig | 1 +
3 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index a5b3a054604c..370cd88068ec 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -10,6 +10,7 @@
#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/simd.h>
@@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2 __ro_after_init;
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
@@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
}
if (IS_ENABLED(CONFIG_AS_AVX2) &&
- poly1305_use_avx2 &&
+ static_branch_likely(&poly1305_use_avx2) &&
srclen >= POLY1305_BLOCK_SIZE * 4) {
if (unlikely(dctx->rset < 4)) {
if (dctx->rset < 2) {
@@ -103,10 +105,15 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
return srclen;
}
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
+{
+ poly1305_init_generic(desc, key);
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int srclen)
{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int bytes;
if (unlikely(dctx->buflen)) {
@@ -117,7 +124,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- if (likely(crypto_simd_usable())) {
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(crypto_simd_usable())) {
kernel_fpu_begin();
poly1305_simd_blocks(dctx, dctx->buf,
POLY1305_BLOCK_SIZE);
@@ -131,7 +139,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- if (likely(crypto_simd_usable())) {
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(crypto_simd_usable())) {
kernel_fpu_begin();
bytes = poly1305_simd_blocks(dctx, src, srclen);
kernel_fpu_end();
@@ -147,6 +156,13 @@ static int poly1305_simd_update(struct shash_desc *desc,
memcpy(dctx->buf, src, srclen);
}
}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
+{
+ poly1305_final_generic(desc, digest);
+}
+EXPORT_SYMBOL(poly1305_final_arch);
static int crypto_poly1305_init(struct shash_desc *desc)
{
@@ -171,6 +187,15 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
return 0;
}
+static int poly1305_simd_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ poly1305_update_arch(dctx, src, srclen);
+ return 0;
+}
+
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
.init = crypto_poly1305_init,
@@ -189,15 +214,15 @@ static struct shash_alg alg = {
static int __init poly1305_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2))
- return -ENODEV;
-
- poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
- boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
- if (poly1305_use_avx2)
- alg.descsize += 10 * sizeof(u32);
+ return 0;
+
+ static_branch_enable(&poly1305_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx2);
return crypto_register_shash(&alg);
}
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 6178aa627141..15cfb02c3e49 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -698,6 +698,7 @@ config CRYPTO_POLY1305_X86_64
tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
depends on X86 && 64BIT
select CRYPTO_LIB_POLY1305_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
help
Poly1305 authenticator algorithm, RFC7539.
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index a731ea36bd5c..181754615f73 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
+ default 4 if X86_64
default 1
config CRYPTO_ARCH_HAVE_LIB_POLY1305
--
2.18.2
From 3cd3c9e240bf2bf8b2ea277f8fa06e08017bfbce Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:24 +0100
Subject: [PATCH 018/100] crypto: arm64/poly1305 - incorporate
OpenSSL/CRYPTOGAMS NEON implementation
commit f569ca16475155013525686d0f73bc379c67e635 upstream.
This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation
for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL
project. The file 'poly1305-armv8.pl' is taken straight from this upstream
GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f,
and already contains all the changes required to build it as part of a
Linux kernel module.
[0] https://github.com/dot-asm/cryptogams
Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm64/crypto/Kconfig | 6 +
arch/arm64/crypto/Makefile | 10 +-
arch/arm64/crypto/poly1305-armv8.pl | 913 ++++++++++++++++++++++
arch/arm64/crypto/poly1305-core.S_shipped | 835 ++++++++++++++++++++
arch/arm64/crypto/poly1305-glue.c | 237 ++++++
lib/crypto/Kconfig | 1 +
6 files changed, 2001 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/crypto/poly1305-armv8.pl
create mode 100644 arch/arm64/crypto/poly1305-core.S_shipped
create mode 100644 arch/arm64/crypto/poly1305-glue.c
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 17bada4b9dd2..30d9b24ee86e 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -106,6 +106,12 @@ config CRYPTO_CHACHA20_NEON
select CRYPTO_LIB_CHACHA_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CHACHA
+config CRYPTO_POLY1305_NEON
+ tristate "Poly1305 hash function using scalar or NEON instructions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
config CRYPTO_NHPOLY1305_NEON
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0435f2a0610e..d0901e610df3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
+poly1305-neon-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
+
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+ $(call cmd,perlasm)
+
$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
+
endif
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
new file mode 100644
index 000000000000..6e5576d19af8
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -0,0 +1,913 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# This module implements Poly1305 hash for ARMv8.
+#
+# June 2015
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+# IALU/gcc-4.9 NEON
+#
+# Apple A7 1.86/+5% 0.72
+# Cortex-A53 2.69/+58% 1.47
+# Cortex-A57 2.70/+7% 1.14
+# Denver 1.64/+50% 1.18(*)
+# X-Gene 2.13/+68% 2.27
+# Mongoose 1.77/+75% 1.12
+# Kryo 2.70/+55% 1.13
+# ThunderX2 1.17/+95% 1.36
+#
+# (*) estimate based on resources availability is less than 1.0,
+# i.e. measured result is worse than expected, presumably binary
+# translator is not almighty;
+
+$flavour=shift;
+$output=shift;
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
+
+my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
+my ($mac,$nonce)=($inp,$len);
+
+my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl poly1305_blocks
+.globl poly1305_emit
+
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+ cmp $inp,xzr
+ stp xzr,xzr,[$ctx] // zero hash value
+ stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
+
+ csel x0,xzr,x0,eq
+ b.eq .Lno_key
+
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+ ldp $r0,$r1,[$inp] // load key
+ mov $s1,#0xfffffffc0fffffff
+ movk $s1,#0x0fff,lsl#48
+#ifdef __AARCH64EB__
+ rev $r0,$r0 // flip bytes
+ rev $r1,$r1
+#endif
+ and $r0,$r0,$s1 // &=0ffffffc0fffffff
+ and $s1,$s1,#-4
+ and $r1,$r1,$s1 // &=0ffffffc0ffffffc
+ mov w#$s1,#-1
+ stp $r0,$r1,[$ctx,#32] // save key value
+ str w#$s1,[$ctx,#48] // impossible key power value
+
+#ifndef __KERNEL__
+ tst w17,#ARMV7_NEON
+
+ adr $d0,.Lpoly1305_blocks
+ adr $r0,.Lpoly1305_blocks_neon
+ adr $d1,.Lpoly1305_emit
+
+ csel $d0,$d0,$r0,eq
+
+# ifdef __ILP32__
+ stp w#$d0,w#$d1,[$len]
+# else
+ stp $d0,$d1,[$len]
+# endif
+#endif
+ mov x0,#1
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ ands $len,$len,#-16
+ b.eq .Lno_data
+
+ ldp $h0,$h1,[$ctx] // load hash value
+ ldp $h2,x17,[$ctx,#16] // [along with is_base2_26]
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+#ifdef __AARCH64EB__
+ lsr $d0,$h0,#32
+ mov w#$d1,w#$h0
+ lsr $d2,$h1,#32
+ mov w15,w#$h1
+ lsr x16,$h2,#32
+#else
+ mov w#$d0,w#$h0
+ lsr $d1,$h0,#32
+ mov w#$d2,w#$h1
+ lsr x15,$h1,#32
+ mov w16,w#$h2
+#endif
+
+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
+ lsr $d1,$d2,#12
+ adds $d0,$d0,$d2,lsl#52
+ add $d1,$d1,x15,lsl#14
+ adc $d1,$d1,xzr
+ lsr $d2,x16,#24
+ adds $d1,$d1,x16,lsl#40
+ adc $d2,$d2,xzr
+
+ cmp x17,#0 // is_base2_26?
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+ csel $h0,$h0,$d0,eq // choose between radixes
+ csel $h1,$h1,$d1,eq
+ csel $h2,$h2,$d2,eq
+
+.Loop:
+ ldp $t0,$t1,[$inp],#16 // load input
+ sub $len,$len,#16
+#ifdef __AARCH64EB__
+ rev $t0,$t0
+ rev $t1,$t1
+#endif
+ adds $h0,$h0,$t0 // accumulate input
+ adcs $h1,$h1,$t1
+
+ mul $d0,$h0,$r0 // h0*r0
+ adc $h2,$h2,$padbit
+ umulh $d1,$h0,$r0
+
+ mul $t0,$h1,$s1 // h1*5*r1
+ umulh $t1,$h1,$s1
+
+ adds $d0,$d0,$t0
+ mul $t0,$h0,$r1 // h0*r1
+ adc $d1,$d1,$t1
+ umulh $d2,$h0,$r1
+
+ adds $d1,$d1,$t0
+ mul $t0,$h1,$r0 // h1*r0
+ adc $d2,$d2,xzr
+ umulh $t1,$h1,$r0
+
+ adds $d1,$d1,$t0
+ mul $t0,$h2,$s1 // h2*5*r1
+ adc $d2,$d2,$t1
+ mul $t1,$h2,$r0 // h2*r0
+
+ adds $d1,$d1,$t0
+ adc $d2,$d2,$t1
+
+ and $t0,$d2,#-4 // final reduction
+ and $h2,$d2,#3
+ add $t0,$t0,$d2,lsr#2
+ adds $h0,$d0,$t0
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
+
+ cbnz $len,.Loop
+
+ stp $h0,$h1,[$ctx] // store hash value
+ stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26]
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ ldp $h0,$h1,[$ctx] // load hash base 2^64
+ ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26]
+ ldp $t0,$t1,[$nonce] // load nonce
+
+#ifdef __AARCH64EB__
+ lsr $d0,$h0,#32
+ mov w#$d1,w#$h0
+ lsr $d2,$h1,#32
+ mov w15,w#$h1
+ lsr x16,$h2,#32
+#else
+ mov w#$d0,w#$h0
+ lsr $d1,$h0,#32
+ mov w#$d2,w#$h1
+ lsr x15,$h1,#32
+ mov w16,w#$h2
+#endif
+
+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
+ lsr $d1,$d2,#12
+ adds $d0,$d0,$d2,lsl#52
+ add $d1,$d1,x15,lsl#14
+ adc $d1,$d1,xzr
+ lsr $d2,x16,#24
+ adds $d1,$d1,x16,lsl#40
+ adc $d2,$d2,xzr
+
+ cmp $r0,#0 // is_base2_26?
+ csel $h0,$h0,$d0,eq // choose between radixes
+ csel $h1,$h1,$d1,eq
+ csel $h2,$h2,$d2,eq
+
+ adds $d0,$h0,#5 // compare to modulus
+ adcs $d1,$h1,xzr
+ adc $d2,$h2,xzr
+
+ tst $d2,#-4 // see if it's carried/borrowed
+
+ csel $h0,$h0,$d0,eq
+ csel $h1,$h1,$d1,eq
+
+#ifdef __AARCH64EB__
+ ror $t0,$t0,#32 // flip nonce words
+ ror $t1,$t1,#32
+#endif
+ adds $h0,$h0,$t0 // accumulate nonce
+ adc $h1,$h1,$t1
+#ifdef __AARCH64EB__
+ rev $h0,$h0 // flip output bytes
+ rev $h1,$h1
+#endif
+ stp $h0,$h1,[$mac] // write result
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+___
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
+my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
+my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
+my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
+my ($T0,$T1,$MASK) = map("v$_",(29..31));
+
+my ($in2,$zeros)=("x16","x17");
+my $is_base2_26 = $zeros; # borrow
+
+$code.=<<___;
+.type poly1305_mult,%function
+.align 5
+poly1305_mult:
+ mul $d0,$h0,$r0 // h0*r0
+ umulh $d1,$h0,$r0
+
+ mul $t0,$h1,$s1 // h1*5*r1
+ umulh $t1,$h1,$s1
+
+ adds $d0,$d0,$t0
+ mul $t0,$h0,$r1 // h0*r1
+ adc $d1,$d1,$t1
+ umulh $d2,$h0,$r1
+
+ adds $d1,$d1,$t0
+ mul $t0,$h1,$r0 // h1*r0
+ adc $d2,$d2,xzr
+ umulh $t1,$h1,$r0
+
+ adds $d1,$d1,$t0
+ mul $t0,$h2,$s1 // h2*5*r1
+ adc $d2,$d2,$t1
+ mul $t1,$h2,$r0 // h2*r0
+
+ adds $d1,$d1,$t0
+ adc $d2,$d2,$t1
+
+ and $t0,$d2,#-4 // final reduction
+ and $h2,$d2,#3
+ add $t0,$t0,$d2,lsr#2
+ adds $h0,$d0,$t0
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
+
+ ret
+.size poly1305_mult,.-poly1305_mult
+
+.type poly1305_splat,%function
+.align 4
+poly1305_splat:
+ and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x13,$h0,#26,#26
+ extr x14,$h1,$h0,#52
+ and x14,x14,#0x03ffffff
+ ubfx x15,$h1,#14,#26
+ extr x16,$h2,$h1,#40
+
+ str w12,[$ctx,#16*0] // r0
+ add w12,w13,w13,lsl#2 // r1*5
+ str w13,[$ctx,#16*1] // r1
+ add w13,w14,w14,lsl#2 // r2*5
+ str w12,[$ctx,#16*2] // s1
+ str w14,[$ctx,#16*3] // r2
+ add w14,w15,w15,lsl#2 // r3*5
+ str w13,[$ctx,#16*4] // s2
+ str w15,[$ctx,#16*5] // r3
+ add w15,w16,w16,lsl#2 // r4*5
+ str w14,[$ctx,#16*6] // s3
+ str w16,[$ctx,#16*7] // r4
+ str w15,[$ctx,#16*8] // s4
+
+ ret
+.size poly1305_splat,.-poly1305_splat
+
+#ifdef __KERNEL__
+.globl poly1305_blocks_neon
+#endif
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr $is_base2_26,[$ctx,#24]
+ cmp $len,#128
+ b.lo .Lpoly1305_blocks
+
+ .inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-80]!
+ add x29,sp,#0
+
+ stp d8,d9,[sp,#16] // meet ABI requirements
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+
+ cbz $is_base2_26,.Lbase2_64_neon
+
+ ldp w10,w11,[$ctx] // load hash value base 2^26
+ ldp w12,w13,[$ctx,#8]
+ ldr w14,[$ctx,#16]
+
+ tst $len,#31
+ b.eq .Leven_neon
+
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+ add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
+ lsr $h1,x12,#12
+ adds $h0,$h0,x12,lsl#52
+ add $h1,$h1,x13,lsl#14
+ adc $h1,$h1,xzr
+ lsr $h2,x14,#24
+ adds $h1,$h1,x14,lsl#40
+ adc $d2,$h2,xzr // can be partially reduced...
+
+ ldp $d0,$d1,[$inp],#16 // load input
+ sub $len,$len,#16
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+
+#ifdef __AARCH64EB__
+ rev $d0,$d0
+ rev $d1,$d1
+#endif
+ adds $h0,$h0,$d0 // accumulate input
+ adcs $h1,$h1,$d1
+ adc $h2,$h2,$padbit
+
+ bl poly1305_mult
+
+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,$h0,#26,#26
+ extr x12,$h1,$h0,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,$h1,#14,#26
+ extr x14,$h2,$h1,#40
+
+ b .Leven_neon
+
+.align 4
+.Lbase2_64_neon:
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+ ldp $h0,$h1,[$ctx] // load hash value base 2^64
+ ldr $h2,[$ctx,#16]
+
+ tst $len,#31
+ b.eq .Linit_neon
+
+ ldp $d0,$d1,[$inp],#16 // load input
+ sub $len,$len,#16
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+#ifdef __AARCH64EB__
+ rev $d0,$d0
+ rev $d1,$d1
+#endif
+ adds $h0,$h0,$d0 // accumulate input
+ adcs $h1,$h1,$d1
+ adc $h2,$h2,$padbit
+
+ bl poly1305_mult
+
+.Linit_neon:
+ ldr w17,[$ctx,#48] // first table element
+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,$h0,#26,#26
+ extr x12,$h1,$h0,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,$h1,#14,#26
+ extr x14,$h2,$h1,#40
+
+ cmp w17,#-1 // is value impossible?
+ b.ne .Leven_neon
+
+ fmov ${H0},x10
+ fmov ${H1},x11
+ fmov ${H2},x12
+ fmov ${H3},x13
+ fmov ${H4},x14
+
+ ////////////////////////////////// initialize r^n table
+ mov $h0,$r0 // r^1
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+ mov $h1,$r1
+ mov $h2,xzr
+ add $ctx,$ctx,#48+12
+ bl poly1305_splat
+
+ bl poly1305_mult // r^2
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^3
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^4
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+ sub $ctx,$ctx,#48 // restore original $ctx
+ b .Ldo_neon
+
+.align 4
+.Leven_neon:
+ fmov ${H0},x10
+ fmov ${H1},x11
+ fmov ${H2},x12
+ fmov ${H3},x13
+ fmov ${H4},x14
+
+.Ldo_neon:
+ ldp x8,x12,[$inp,#32] // inp[2:3]
+ subs $len,$len,#64
+ ldp x9,x13,[$inp,#48]
+ add $in2,$inp,#96
+ adr $zeros,.Lzeros
+
+ lsl $padbit,$padbit,#24
+ add x15,$ctx,#48
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov $IN23_0,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,$padbit,x12,lsr#40
+ add x13,$padbit,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov $IN23_1,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ fmov $IN23_2,x8
+ fmov $IN23_3,x10
+ fmov $IN23_4,x12
+
+ ldp x8,x12,[$inp],#16 // inp[0:1]
+ ldp x9,x13,[$inp],#48
+
+ ld1 {$R0,$R1,$S1,$R2},[x15],#64
+ ld1 {$S2,$R3,$S3,$R4},[x15],#64
+ ld1 {$S4},[x15]
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov $IN01_0,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,$padbit,x12,lsr#40
+ add x13,$padbit,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov $IN01_1,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ movi $MASK.2d,#-1
+ fmov $IN01_2,x8
+ fmov $IN01_3,x10
+ fmov $IN01_4,x12
+ ushr $MASK.2d,$MASK.2d,#38
+
+ b.ls .Lskip_loop
+
+.align 4
+.Loop_neon:
+ ////////////////////////////////////////////////////////////////
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ // \___________________/
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ // \___________________/ \____________________/
+ //
+ // Note that we start with inp[2:3]*r^2. This is because it
+ // doesn't depend on reduction in previous iteration.
+ ////////////////////////////////////////////////////////////////
+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+ subs $len,$len,#64
+ umull $ACC4,$IN23_0,${R4}[2]
+ csel $in2,$zeros,$in2,lo
+ umull $ACC3,$IN23_0,${R3}[2]
+ umull $ACC2,$IN23_0,${R2}[2]
+ ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
+ umull $ACC1,$IN23_0,${R1}[2]
+ ldp x9,x13,[$in2],#48
+ umull $ACC0,$IN23_0,${R0}[2]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ umlal $ACC4,$IN23_1,${R3}[2]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal $ACC3,$IN23_1,${R2}[2]
+ and x5,x9,#0x03ffffff
+ umlal $ACC2,$IN23_1,${R1}[2]
+ ubfx x6,x8,#26,#26
+ umlal $ACC1,$IN23_1,${R0}[2]
+ ubfx x7,x9,#26,#26
+ umlal $ACC0,$IN23_1,${S4}[2]
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+
+ umlal $ACC4,$IN23_2,${R2}[2]
+ extr x8,x12,x8,#52
+ umlal $ACC3,$IN23_2,${R1}[2]
+ extr x9,x13,x9,#52
+ umlal $ACC2,$IN23_2,${R0}[2]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal $ACC1,$IN23_2,${S4}[2]
+ fmov $IN23_0,x4
+ umlal $ACC0,$IN23_2,${S3}[2]
+ and x8,x8,#0x03ffffff
+
+ umlal $ACC4,$IN23_3,${R1}[2]
+ and x9,x9,#0x03ffffff
+ umlal $ACC3,$IN23_3,${R0}[2]
+ ubfx x10,x12,#14,#26
+ umlal $ACC2,$IN23_3,${S4}[2]
+ ubfx x11,x13,#14,#26
+ umlal $ACC1,$IN23_3,${S3}[2]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal $ACC0,$IN23_3,${S2}[2]
+ fmov $IN23_1,x6
+
+ add $IN01_2,$IN01_2,$H2
+ add x12,$padbit,x12,lsr#40
+ umlal $ACC4,$IN23_4,${R0}[2]
+ add x13,$padbit,x13,lsr#40
+ umlal $ACC3,$IN23_4,${S4}[2]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal $ACC2,$IN23_4,${S3}[2]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal $ACC1,$IN23_4,${S2}[2]
+ fmov $IN23_2,x8
+ umlal $ACC0,$IN23_4,${S1}[2]
+ fmov $IN23_3,x10
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4 and accumulate
+
+ add $IN01_0,$IN01_0,$H0
+ fmov $IN23_4,x12
+ umlal $ACC3,$IN01_2,${R1}[0]
+ ldp x8,x12,[$inp],#16 // inp[0:1]
+ umlal $ACC0,$IN01_2,${S3}[0]
+ ldp x9,x13,[$inp],#48
+ umlal $ACC4,$IN01_2,${R2}[0]
+ umlal $ACC1,$IN01_2,${S4}[0]
+ umlal $ACC2,$IN01_2,${R0}[0]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ add $IN01_1,$IN01_1,$H1
+ umlal $ACC3,$IN01_0,${R3}[0]
+ umlal $ACC4,$IN01_0,${R4}[0]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal $ACC2,$IN01_0,${R2}[0]
+ and x5,x9,#0x03ffffff
+ umlal $ACC0,$IN01_0,${R0}[0]
+ ubfx x6,x8,#26,#26
+ umlal $ACC1,$IN01_0,${R1}[0]
+ ubfx x7,x9,#26,#26
+
+ add $IN01_3,$IN01_3,$H3
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ umlal $ACC3,$IN01_1,${R2}[0]
+ extr x8,x12,x8,#52
+ umlal $ACC4,$IN01_1,${R3}[0]
+ extr x9,x13,x9,#52
+ umlal $ACC0,$IN01_1,${S4}[0]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal $ACC2,$IN01_1,${R1}[0]
+ fmov $IN01_0,x4
+ umlal $ACC1,$IN01_1,${R0}[0]
+ and x8,x8,#0x03ffffff
+
+ add $IN01_4,$IN01_4,$H4
+ and x9,x9,#0x03ffffff
+ umlal $ACC3,$IN01_3,${R0}[0]
+ ubfx x10,x12,#14,#26
+ umlal $ACC0,$IN01_3,${S2}[0]
+ ubfx x11,x13,#14,#26
+ umlal $ACC4,$IN01_3,${R1}[0]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal $ACC1,$IN01_3,${S3}[0]
+ fmov $IN01_1,x6
+ umlal $ACC2,$IN01_3,${S4}[0]
+ add x12,$padbit,x12,lsr#40
+
+ umlal $ACC3,$IN01_4,${S4}[0]
+ add x13,$padbit,x13,lsr#40
+ umlal $ACC0,$IN01_4,${S1}[0]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal $ACC4,$IN01_4,${R0}[0]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal $ACC1,$IN01_4,${S2}[0]
+ fmov $IN01_2,x8
+ umlal $ACC2,$IN01_4,${S3}[0]
+ fmov $IN01_3,x10
+ fmov $IN01_4,x12
+
+ /////////////////////////////////////////////////////////////////
+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ // and P. Schwabe
+ //
+ // [see discussion in poly1305-armv4 module]
+
+ ushr $T0.2d,$ACC3,#26
+ xtn $H3,$ACC3
+ ushr $T1.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+ add $ACC4,$ACC4,$T0.2d // h3 -> h4
+ bic $H3,#0xfc,lsl#24 // &=0x03ffffff
+ add $ACC1,$ACC1,$T1.2d // h0 -> h1
+
+ ushr $T0.2d,$ACC4,#26
+ xtn $H4,$ACC4
+ ushr $T1.2d,$ACC1,#26
+ xtn $H1,$ACC1
+ bic $H4,#0xfc,lsl#24
+ add $ACC2,$ACC2,$T1.2d // h1 -> h2
+
+ add $ACC0,$ACC0,$T0.2d
+ shl $T0.2d,$T0.2d,#2
+ shrn $T1.2s,$ACC2,#26
+ xtn $H2,$ACC2
+ add $ACC0,$ACC0,$T0.2d // h4 -> h0
+ bic $H1,#0xfc,lsl#24
+ add $H3,$H3,$T1.2s // h2 -> h3
+ bic $H2,#0xfc,lsl#24
+
+ shrn $T0.2s,$ACC0,#26
+ xtn $H0,$ACC0
+ ushr $T1.2s,$H3,#26
+ bic $H3,#0xfc,lsl#24
+ bic $H0,#0xfc,lsl#24
+ add $H1,$H1,$T0.2s // h0 -> h1
+ add $H4,$H4,$T1.2s // h3 -> h4
+
+ b.hi .Loop_neon
+
+.Lskip_loop:
+ dup $IN23_2,${IN23_2}[0]
+ add $IN01_2,$IN01_2,$H2
+
+ ////////////////////////////////////////////////////////////////
+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ adds $len,$len,#32
+ b.ne .Long_tail
+
+ dup $IN23_2,${IN01_2}[0]
+ add $IN23_0,$IN01_0,$H0
+ add $IN23_3,$IN01_3,$H3
+ add $IN23_1,$IN01_1,$H1
+ add $IN23_4,$IN01_4,$H4
+
+.Long_tail:
+ dup $IN23_0,${IN23_0}[0]
+ umull2 $ACC0,$IN23_2,${S3}
+ umull2 $ACC3,$IN23_2,${R1}
+ umull2 $ACC4,$IN23_2,${R2}
+ umull2 $ACC2,$IN23_2,${R0}
+ umull2 $ACC1,$IN23_2,${S4}
+
+ dup $IN23_1,${IN23_1}[0]
+ umlal2 $ACC0,$IN23_0,${R0}
+ umlal2 $ACC2,$IN23_0,${R2}
+ umlal2 $ACC3,$IN23_0,${R3}
+ umlal2 $ACC4,$IN23_0,${R4}
+ umlal2 $ACC1,$IN23_0,${R1}
+
+ dup $IN23_3,${IN23_3}[0]
+ umlal2 $ACC0,$IN23_1,${S4}
+ umlal2 $ACC3,$IN23_1,${R2}
+ umlal2 $ACC2,$IN23_1,${R1}
+ umlal2 $ACC4,$IN23_1,${R3}
+ umlal2 $ACC1,$IN23_1,${R0}
+
+ dup $IN23_4,${IN23_4}[0]
+ umlal2 $ACC3,$IN23_3,${R0}
+ umlal2 $ACC4,$IN23_3,${R1}
+ umlal2 $ACC0,$IN23_3,${S2}
+ umlal2 $ACC1,$IN23_3,${S3}
+ umlal2 $ACC2,$IN23_3,${S4}
+
+ umlal2 $ACC3,$IN23_4,${S4}
+ umlal2 $ACC0,$IN23_4,${S1}
+ umlal2 $ACC4,$IN23_4,${R0}
+ umlal2 $ACC1,$IN23_4,${S2}
+ umlal2 $ACC2,$IN23_4,${S3}
+
+ b.eq .Lshort_tail
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ add $IN01_0,$IN01_0,$H0
+ umlal $ACC3,$IN01_2,${R1}
+ umlal $ACC0,$IN01_2,${S3}
+ umlal $ACC4,$IN01_2,${R2}
+ umlal $ACC1,$IN01_2,${S4}
+ umlal $ACC2,$IN01_2,${R0}
+
+ add $IN01_1,$IN01_1,$H1
+ umlal $ACC3,$IN01_0,${R3}
+ umlal $ACC0,$IN01_0,${R0}
+ umlal $ACC4,$IN01_0,${R4}
+ umlal $ACC1,$IN01_0,${R1}
+ umlal $ACC2,$IN01_0,${R2}
+
+ add $IN01_3,$IN01_3,$H3
+ umlal $ACC3,$IN01_1,${R2}
+ umlal $ACC0,$IN01_1,${S4}
+ umlal $ACC4,$IN01_1,${R3}
+ umlal $ACC1,$IN01_1,${R0}
+ umlal $ACC2,$IN01_1,${R1}
+
+ add $IN01_4,$IN01_4,$H4
+ umlal $ACC3,$IN01_3,${R0}
+ umlal $ACC0,$IN01_3,${S2}
+ umlal $ACC4,$IN01_3,${R1}
+ umlal $ACC1,$IN01_3,${S3}
+ umlal $ACC2,$IN01_3,${S4}
+
+ umlal $ACC3,$IN01_4,${S4}
+ umlal $ACC0,$IN01_4,${S1}
+ umlal $ACC4,$IN01_4,${R0}
+ umlal $ACC1,$IN01_4,${S2}
+ umlal $ACC2,$IN01_4,${S3}
+
+.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp $ACC3,$ACC3,$ACC3
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp $ACC0,$ACC0,$ACC0
+ ldp d10,d11,[sp,#32]
+ addp $ACC4,$ACC4,$ACC4
+ ldp d12,d13,[sp,#48]
+ addp $ACC1,$ACC1,$ACC1
+ ldp d14,d15,[sp,#64]
+ addp $ACC2,$ACC2,$ACC2
+ ldr x30,[sp,#8]
+ .inst 0xd50323bf // autiasp
+
+ ////////////////////////////////////////////////////////////////
+ // lazy reduction, but without narrowing
+
+ ushr $T0.2d,$ACC3,#26
+ and $ACC3,$ACC3,$MASK.2d
+ ushr $T1.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+
+ add $ACC4,$ACC4,$T0.2d // h3 -> h4
+ add $ACC1,$ACC1,$T1.2d // h0 -> h1
+
+ ushr $T0.2d,$ACC4,#26
+ and $ACC4,$ACC4,$MASK.2d
+ ushr $T1.2d,$ACC1,#26
+ and $ACC1,$ACC1,$MASK.2d
+ add $ACC2,$ACC2,$T1.2d // h1 -> h2
+
+ add $ACC0,$ACC0,$T0.2d
+ shl $T0.2d,$T0.2d,#2
+ ushr $T1.2d,$ACC2,#26
+ and $ACC2,$ACC2,$MASK.2d
+ add $ACC0,$ACC0,$T0.2d // h4 -> h0
+ add $ACC3,$ACC3,$T1.2d // h2 -> h3
+
+ ushr $T0.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+ ushr $T1.2d,$ACC3,#26
+ and $ACC3,$ACC3,$MASK.2d
+ add $ACC1,$ACC1,$T0.2d // h0 -> h1
+ add $ACC4,$ACC4,$T1.2d // h3 -> h4
+
+ ////////////////////////////////////////////////////////////////
+ // write the result, can be partially reduced
+
+ st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
+ mov x4,#1
+ st1 {$ACC4}[0],[$ctx]
+ str x4,[$ctx,#8] // set is_base2_26
+
+ ldr x29,[sp],#80
+ ret
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0
+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.align 2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+___
+
+foreach (split("\n",$code)) {
+ s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
+ s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
+ (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
+ (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
+ (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
+ (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
+ (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
+
+ s/\.[124]([sd])\[/.$1\[/;
+ s/w#x([0-9]+)/w$1/g;
+
+ print $_,"\n";
+}
+close STDOUT;
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..8d1c4e420ccd
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -0,0 +1,835 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl poly1305_blocks
+.globl poly1305_emit
+
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+ cmp x1,xzr
+ stp xzr,xzr,[x0] // zero hash value
+ stp xzr,xzr,[x0,#16] // [along with is_base2_26]
+
+ csel x0,xzr,x0,eq
+ b.eq .Lno_key
+
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+ ldp x7,x8,[x1] // load key
+ mov x9,#0xfffffffc0fffffff
+ movk x9,#0x0fff,lsl#48
+#ifdef __AARCH64EB__
+ rev x7,x7 // flip bytes
+ rev x8,x8
+#endif
+ and x7,x7,x9 // &=0ffffffc0fffffff
+ and x9,x9,#-4
+ and x8,x8,x9 // &=0ffffffc0ffffffc
+ mov w9,#-1
+ stp x7,x8,[x0,#32] // save key value
+ str w9,[x0,#48] // impossible key power value
+
+#ifndef __KERNEL__
+ tst w17,#ARMV7_NEON
+
+ adr x12,.Lpoly1305_blocks
+ adr x7,.Lpoly1305_blocks_neon
+ adr x13,.Lpoly1305_emit
+
+ csel x12,x12,x7,eq
+
+# ifdef __ILP32__
+ stp w12,w13,[x2]
+# else
+ stp x12,x13,[x2]
+# endif
+#endif
+ mov x0,#1
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ ands x2,x2,#-16
+ b.eq .Lno_data
+
+ ldp x4,x5,[x0] // load hash value
+ ldp x6,x17,[x0,#16] // [along with is_base2_26]
+ ldp x7,x8,[x0,#32] // load key value
+
+#ifdef __AARCH64EB__
+ lsr x12,x4,#32
+ mov w13,w4
+ lsr x14,x5,#32
+ mov w15,w5
+ lsr x16,x6,#32
+#else
+ mov w12,w4
+ lsr x13,x4,#32
+ mov w14,w5
+ lsr x15,x5,#32
+ mov w16,w6
+#endif
+
+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
+ lsr x13,x14,#12
+ adds x12,x12,x14,lsl#52
+ add x13,x13,x15,lsl#14
+ adc x13,x13,xzr
+ lsr x14,x16,#24
+ adds x13,x13,x16,lsl#40
+ adc x14,x14,xzr
+
+ cmp x17,#0 // is_base2_26?
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+ csel x4,x4,x12,eq // choose between radixes
+ csel x5,x5,x13,eq
+ csel x6,x6,x14,eq
+
+.Loop:
+ ldp x10,x11,[x1],#16 // load input
+ sub x2,x2,#16
+#ifdef __AARCH64EB__
+ rev x10,x10
+ rev x11,x11
+#endif
+ adds x4,x4,x10 // accumulate input
+ adcs x5,x5,x11
+
+ mul x12,x4,x7 // h0*r0
+ adc x6,x6,x3
+ umulh x13,x4,x7
+
+ mul x10,x5,x9 // h1*5*r1
+ umulh x11,x5,x9
+
+ adds x12,x12,x10
+ mul x10,x4,x8 // h0*r1
+ adc x13,x13,x11
+ umulh x14,x4,x8
+
+ adds x13,x13,x10
+ mul x10,x5,x7 // h1*r0
+ adc x14,x14,xzr
+ umulh x11,x5,x7
+
+ adds x13,x13,x10
+ mul x10,x6,x9 // h2*5*r1
+ adc x14,x14,x11
+ mul x11,x6,x7 // h2*r0
+
+ adds x13,x13,x10
+ adc x14,x14,x11
+
+ and x10,x14,#-4 // final reduction
+ and x6,x14,#3
+ add x10,x10,x14,lsr#2
+ adds x4,x12,x10
+ adcs x5,x13,xzr
+ adc x6,x6,xzr
+
+ cbnz x2,.Loop
+
+ stp x4,x5,[x0] // store hash value
+ stp x6,xzr,[x0,#16] // [and clear is_base2_26]
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ ldp x4,x5,[x0] // load hash base 2^64
+ ldp x6,x7,[x0,#16] // [along with is_base2_26]
+ ldp x10,x11,[x2] // load nonce
+
+#ifdef __AARCH64EB__
+ lsr x12,x4,#32
+ mov w13,w4
+ lsr x14,x5,#32
+ mov w15,w5
+ lsr x16,x6,#32
+#else
+ mov w12,w4
+ lsr x13,x4,#32
+ mov w14,w5
+ lsr x15,x5,#32
+ mov w16,w6
+#endif
+
+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
+ lsr x13,x14,#12
+ adds x12,x12,x14,lsl#52
+ add x13,x13,x15,lsl#14
+ adc x13,x13,xzr
+ lsr x14,x16,#24
+ adds x13,x13,x16,lsl#40
+ adc x14,x14,xzr
+
+ cmp x7,#0 // is_base2_26?
+ csel x4,x4,x12,eq // choose between radixes
+ csel x5,x5,x13,eq
+ csel x6,x6,x14,eq
+
+ adds x12,x4,#5 // compare to modulus
+ adcs x13,x5,xzr
+ adc x14,x6,xzr
+
+ tst x14,#-4 // see if it's carried/borrowed
+
+ csel x4,x4,x12,eq
+ csel x5,x5,x13,eq
+
+#ifdef __AARCH64EB__
+ ror x10,x10,#32 // flip nonce words
+ ror x11,x11,#32
+#endif
+ adds x4,x4,x10 // accumulate nonce
+ adc x5,x5,x11
+#ifdef __AARCH64EB__
+ rev x4,x4 // flip output bytes
+ rev x5,x5
+#endif
+ stp x4,x5,[x1] // write result
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+.type poly1305_mult,%function
+.align 5
+poly1305_mult:
+ mul x12,x4,x7 // h0*r0
+ umulh x13,x4,x7
+
+ mul x10,x5,x9 // h1*5*r1
+ umulh x11,x5,x9
+
+ adds x12,x12,x10
+ mul x10,x4,x8 // h0*r1
+ adc x13,x13,x11
+ umulh x14,x4,x8
+
+ adds x13,x13,x10
+ mul x10,x5,x7 // h1*r0
+ adc x14,x14,xzr
+ umulh x11,x5,x7
+
+ adds x13,x13,x10
+ mul x10,x6,x9 // h2*5*r1
+ adc x14,x14,x11
+ mul x11,x6,x7 // h2*r0
+
+ adds x13,x13,x10
+ adc x14,x14,x11
+
+ and x10,x14,#-4 // final reduction
+ and x6,x14,#3
+ add x10,x10,x14,lsr#2
+ adds x4,x12,x10
+ adcs x5,x13,xzr
+ adc x6,x6,xzr
+
+ ret
+.size poly1305_mult,.-poly1305_mult
+
+.type poly1305_splat,%function
+.align 4
+poly1305_splat:
+ and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x13,x4,#26,#26
+ extr x14,x5,x4,#52
+ and x14,x14,#0x03ffffff
+ ubfx x15,x5,#14,#26
+ extr x16,x6,x5,#40
+
+ str w12,[x0,#16*0] // r0
+ add w12,w13,w13,lsl#2 // r1*5
+ str w13,[x0,#16*1] // r1
+ add w13,w14,w14,lsl#2 // r2*5
+ str w12,[x0,#16*2] // s1
+ str w14,[x0,#16*3] // r2
+ add w14,w15,w15,lsl#2 // r3*5
+ str w13,[x0,#16*4] // s2
+ str w15,[x0,#16*5] // r3
+ add w15,w16,w16,lsl#2 // r4*5
+ str w14,[x0,#16*6] // s3
+ str w16,[x0,#16*7] // r4
+ str w15,[x0,#16*8] // s4
+
+ ret
+.size poly1305_splat,.-poly1305_splat
+
+#ifdef __KERNEL__
+.globl poly1305_blocks_neon
+#endif
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr x17,[x0,#24]
+ cmp x2,#128
+ b.lo .Lpoly1305_blocks
+
+ .inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-80]!
+ add x29,sp,#0
+
+ stp d8,d9,[sp,#16] // meet ABI requirements
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+
+ cbz x17,.Lbase2_64_neon
+
+ ldp w10,w11,[x0] // load hash value base 2^26
+ ldp w12,w13,[x0,#8]
+ ldr w14,[x0,#16]
+
+ tst x2,#31
+ b.eq .Leven_neon
+
+ ldp x7,x8,[x0,#32] // load key value
+
+ add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
+ lsr x5,x12,#12
+ adds x4,x4,x12,lsl#52
+ add x5,x5,x13,lsl#14
+ adc x5,x5,xzr
+ lsr x6,x14,#24
+ adds x5,x5,x14,lsl#40
+ adc x14,x6,xzr // can be partially reduced...
+
+ ldp x12,x13,[x1],#16 // load input
+ sub x2,x2,#16
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+
+#ifdef __AARCH64EB__
+ rev x12,x12
+ rev x13,x13
+#endif
+ adds x4,x4,x12 // accumulate input
+ adcs x5,x5,x13
+ adc x6,x6,x3
+
+ bl poly1305_mult
+
+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,x4,#26,#26
+ extr x12,x5,x4,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,x5,#14,#26
+ extr x14,x6,x5,#40
+
+ b .Leven_neon
+
+.align 4
+.Lbase2_64_neon:
+ ldp x7,x8,[x0,#32] // load key value
+
+ ldp x4,x5,[x0] // load hash value base 2^64
+ ldr x6,[x0,#16]
+
+ tst x2,#31
+ b.eq .Linit_neon
+
+ ldp x12,x13,[x1],#16 // load input
+ sub x2,x2,#16
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+#ifdef __AARCH64EB__
+ rev x12,x12
+ rev x13,x13
+#endif
+ adds x4,x4,x12 // accumulate input
+ adcs x5,x5,x13
+ adc x6,x6,x3
+
+ bl poly1305_mult
+
+.Linit_neon:
+ ldr w17,[x0,#48] // first table element
+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,x4,#26,#26
+ extr x12,x5,x4,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,x5,#14,#26
+ extr x14,x6,x5,#40
+
+ cmp w17,#-1 // is value impossible?
+ b.ne .Leven_neon
+
+ fmov d24,x10
+ fmov d25,x11
+ fmov d26,x12
+ fmov d27,x13
+ fmov d28,x14
+
+ ////////////////////////////////// initialize r^n table
+ mov x4,x7 // r^1
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+ mov x5,x8
+ mov x6,xzr
+ add x0,x0,#48+12
+ bl poly1305_splat
+
+ bl poly1305_mult // r^2
+ sub x0,x0,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^3
+ sub x0,x0,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^4
+ sub x0,x0,#4
+ bl poly1305_splat
+ sub x0,x0,#48 // restore original x0
+ b .Ldo_neon
+
+.align 4
+.Leven_neon:
+ fmov d24,x10
+ fmov d25,x11
+ fmov d26,x12
+ fmov d27,x13
+ fmov d28,x14
+
+.Ldo_neon:
+ ldp x8,x12,[x1,#32] // inp[2:3]
+ subs x2,x2,#64
+ ldp x9,x13,[x1,#48]
+ add x16,x1,#96
+ adr x17,.Lzeros
+
+ lsl x3,x3,#24
+ add x15,x0,#48
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov d14,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,x3,x12,lsr#40
+ add x13,x3,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov d15,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ fmov d16,x8
+ fmov d17,x10
+ fmov d18,x12
+
+ ldp x8,x12,[x1],#16 // inp[0:1]
+ ldp x9,x13,[x1],#48
+
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
+ ld1 {v8.4s},[x15]
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov d9,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,x3,x12,lsr#40
+ add x13,x3,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov d10,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ movi v31.2d,#-1
+ fmov d11,x8
+ fmov d12,x10
+ fmov d13,x12
+ ushr v31.2d,v31.2d,#38
+
+ b.ls .Lskip_loop
+
+.align 4
+.Loop_neon:
+ ////////////////////////////////////////////////////////////////
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ // ___________________/
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ // ___________________/ ____________________/
+ //
+ // Note that we start with inp[2:3]*r^2. This is because it
+ // doesn't depend on reduction in previous iteration.
+ ////////////////////////////////////////////////////////////////
+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+ subs x2,x2,#64
+ umull v23.2d,v14.2s,v7.s[2]
+ csel x16,x17,x16,lo
+ umull v22.2d,v14.2s,v5.s[2]
+ umull v21.2d,v14.2s,v3.s[2]
+ ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
+ umull v20.2d,v14.2s,v1.s[2]
+ ldp x9,x13,[x16],#48
+ umull v19.2d,v14.2s,v0.s[2]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ umlal v23.2d,v15.2s,v5.s[2]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal v22.2d,v15.2s,v3.s[2]
+ and x5,x9,#0x03ffffff
+ umlal v21.2d,v15.2s,v1.s[2]
+ ubfx x6,x8,#26,#26
+ umlal v20.2d,v15.2s,v0.s[2]
+ ubfx x7,x9,#26,#26
+ umlal v19.2d,v15.2s,v8.s[2]
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+
+ umlal v23.2d,v16.2s,v3.s[2]
+ extr x8,x12,x8,#52
+ umlal v22.2d,v16.2s,v1.s[2]
+ extr x9,x13,x9,#52
+ umlal v21.2d,v16.2s,v0.s[2]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal v20.2d,v16.2s,v8.s[2]
+ fmov d14,x4
+ umlal v19.2d,v16.2s,v6.s[2]
+ and x8,x8,#0x03ffffff
+
+ umlal v23.2d,v17.2s,v1.s[2]
+ and x9,x9,#0x03ffffff
+ umlal v22.2d,v17.2s,v0.s[2]
+ ubfx x10,x12,#14,#26
+ umlal v21.2d,v17.2s,v8.s[2]
+ ubfx x11,x13,#14,#26
+ umlal v20.2d,v17.2s,v6.s[2]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal v19.2d,v17.2s,v4.s[2]
+ fmov d15,x6
+
+ add v11.2s,v11.2s,v26.2s
+ add x12,x3,x12,lsr#40
+ umlal v23.2d,v18.2s,v0.s[2]
+ add x13,x3,x13,lsr#40
+ umlal v22.2d,v18.2s,v8.s[2]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal v21.2d,v18.2s,v6.s[2]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal v20.2d,v18.2s,v4.s[2]
+ fmov d16,x8
+ umlal v19.2d,v18.2s,v2.s[2]
+ fmov d17,x10
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4 and accumulate
+
+ add v9.2s,v9.2s,v24.2s
+ fmov d18,x12
+ umlal v22.2d,v11.2s,v1.s[0]
+ ldp x8,x12,[x1],#16 // inp[0:1]
+ umlal v19.2d,v11.2s,v6.s[0]
+ ldp x9,x13,[x1],#48
+ umlal v23.2d,v11.2s,v3.s[0]
+ umlal v20.2d,v11.2s,v8.s[0]
+ umlal v21.2d,v11.2s,v0.s[0]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ add v10.2s,v10.2s,v25.2s
+ umlal v22.2d,v9.2s,v5.s[0]
+ umlal v23.2d,v9.2s,v7.s[0]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal v21.2d,v9.2s,v3.s[0]
+ and x5,x9,#0x03ffffff
+ umlal v19.2d,v9.2s,v0.s[0]
+ ubfx x6,x8,#26,#26
+ umlal v20.2d,v9.2s,v1.s[0]
+ ubfx x7,x9,#26,#26
+
+ add v12.2s,v12.2s,v27.2s
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ umlal v22.2d,v10.2s,v3.s[0]
+ extr x8,x12,x8,#52
+ umlal v23.2d,v10.2s,v5.s[0]
+ extr x9,x13,x9,#52
+ umlal v19.2d,v10.2s,v8.s[0]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal v21.2d,v10.2s,v1.s[0]
+ fmov d9,x4
+ umlal v20.2d,v10.2s,v0.s[0]
+ and x8,x8,#0x03ffffff
+
+ add v13.2s,v13.2s,v28.2s
+ and x9,x9,#0x03ffffff
+ umlal v22.2d,v12.2s,v0.s[0]
+ ubfx x10,x12,#14,#26
+ umlal v19.2d,v12.2s,v4.s[0]
+ ubfx x11,x13,#14,#26
+ umlal v23.2d,v12.2s,v1.s[0]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal v20.2d,v12.2s,v6.s[0]
+ fmov d10,x6
+ umlal v21.2d,v12.2s,v8.s[0]
+ add x12,x3,x12,lsr#40
+
+ umlal v22.2d,v13.2s,v8.s[0]
+ add x13,x3,x13,lsr#40
+ umlal v19.2d,v13.2s,v2.s[0]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal v23.2d,v13.2s,v0.s[0]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal v20.2d,v13.2s,v4.s[0]
+ fmov d11,x8
+ umlal v21.2d,v13.2s,v6.s[0]
+ fmov d12,x10
+ fmov d13,x12
+
+ /////////////////////////////////////////////////////////////////
+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ // and P. Schwabe
+ //
+ // [see discussion in poly1305-armv4 module]
+
+ ushr v29.2d,v22.2d,#26
+ xtn v27.2s,v22.2d
+ ushr v30.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+ add v23.2d,v23.2d,v29.2d // h3 -> h4
+ bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
+ add v20.2d,v20.2d,v30.2d // h0 -> h1
+
+ ushr v29.2d,v23.2d,#26
+ xtn v28.2s,v23.2d
+ ushr v30.2d,v20.2d,#26
+ xtn v25.2s,v20.2d
+ bic v28.2s,#0xfc,lsl#24
+ add v21.2d,v21.2d,v30.2d // h1 -> h2
+
+ add v19.2d,v19.2d,v29.2d
+ shl v29.2d,v29.2d,#2
+ shrn v30.2s,v21.2d,#26
+ xtn v26.2s,v21.2d
+ add v19.2d,v19.2d,v29.2d // h4 -> h0
+ bic v25.2s,#0xfc,lsl#24
+ add v27.2s,v27.2s,v30.2s // h2 -> h3
+ bic v26.2s,#0xfc,lsl#24
+
+ shrn v29.2s,v19.2d,#26
+ xtn v24.2s,v19.2d
+ ushr v30.2s,v27.2s,#26
+ bic v27.2s,#0xfc,lsl#24
+ bic v24.2s,#0xfc,lsl#24
+ add v25.2s,v25.2s,v29.2s // h0 -> h1
+ add v28.2s,v28.2s,v30.2s // h3 -> h4
+
+ b.hi .Loop_neon
+
+.Lskip_loop:
+ dup v16.2d,v16.d[0]
+ add v11.2s,v11.2s,v26.2s
+
+ ////////////////////////////////////////////////////////////////
+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ adds x2,x2,#32
+ b.ne .Long_tail
+
+ dup v16.2d,v11.d[0]
+ add v14.2s,v9.2s,v24.2s
+ add v17.2s,v12.2s,v27.2s
+ add v15.2s,v10.2s,v25.2s
+ add v18.2s,v13.2s,v28.2s
+
+.Long_tail:
+ dup v14.2d,v14.d[0]
+ umull2 v19.2d,v16.4s,v6.4s
+ umull2 v22.2d,v16.4s,v1.4s
+ umull2 v23.2d,v16.4s,v3.4s
+ umull2 v21.2d,v16.4s,v0.4s
+ umull2 v20.2d,v16.4s,v8.4s
+
+ dup v15.2d,v15.d[0]
+ umlal2 v19.2d,v14.4s,v0.4s
+ umlal2 v21.2d,v14.4s,v3.4s
+ umlal2 v22.2d,v14.4s,v5.4s
+ umlal2 v23.2d,v14.4s,v7.4s
+ umlal2 v20.2d,v14.4s,v1.4s
+
+ dup v17.2d,v17.d[0]
+ umlal2 v19.2d,v15.4s,v8.4s
+ umlal2 v22.2d,v15.4s,v3.4s
+ umlal2 v21.2d,v15.4s,v1.4s
+ umlal2 v23.2d,v15.4s,v5.4s
+ umlal2 v20.2d,v15.4s,v0.4s
+
+ dup v18.2d,v18.d[0]
+ umlal2 v22.2d,v17.4s,v0.4s
+ umlal2 v23.2d,v17.4s,v1.4s
+ umlal2 v19.2d,v17.4s,v4.4s
+ umlal2 v20.2d,v17.4s,v6.4s
+ umlal2 v21.2d,v17.4s,v8.4s
+
+ umlal2 v22.2d,v18.4s,v8.4s
+ umlal2 v19.2d,v18.4s,v2.4s
+ umlal2 v23.2d,v18.4s,v0.4s
+ umlal2 v20.2d,v18.4s,v4.4s
+ umlal2 v21.2d,v18.4s,v6.4s
+
+ b.eq .Lshort_tail
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ add v9.2s,v9.2s,v24.2s
+ umlal v22.2d,v11.2s,v1.2s
+ umlal v19.2d,v11.2s,v6.2s
+ umlal v23.2d,v11.2s,v3.2s
+ umlal v20.2d,v11.2s,v8.2s
+ umlal v21.2d,v11.2s,v0.2s
+
+ add v10.2s,v10.2s,v25.2s
+ umlal v22.2d,v9.2s,v5.2s
+ umlal v19.2d,v9.2s,v0.2s
+ umlal v23.2d,v9.2s,v7.2s
+ umlal v20.2d,v9.2s,v1.2s
+ umlal v21.2d,v9.2s,v3.2s
+
+ add v12.2s,v12.2s,v27.2s
+ umlal v22.2d,v10.2s,v3.2s
+ umlal v19.2d,v10.2s,v8.2s
+ umlal v23.2d,v10.2s,v5.2s
+ umlal v20.2d,v10.2s,v0.2s
+ umlal v21.2d,v10.2s,v1.2s
+
+ add v13.2s,v13.2s,v28.2s
+ umlal v22.2d,v12.2s,v0.2s
+ umlal v19.2d,v12.2s,v4.2s
+ umlal v23.2d,v12.2s,v1.2s
+ umlal v20.2d,v12.2s,v6.2s
+ umlal v21.2d,v12.2s,v8.2s
+
+ umlal v22.2d,v13.2s,v8.2s
+ umlal v19.2d,v13.2s,v2.2s
+ umlal v23.2d,v13.2s,v0.2s
+ umlal v20.2d,v13.2s,v4.2s
+ umlal v21.2d,v13.2s,v6.2s
+
+.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp v22.2d,v22.2d,v22.2d
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp v19.2d,v19.2d,v19.2d
+ ldp d10,d11,[sp,#32]
+ addp v23.2d,v23.2d,v23.2d
+ ldp d12,d13,[sp,#48]
+ addp v20.2d,v20.2d,v20.2d
+ ldp d14,d15,[sp,#64]
+ addp v21.2d,v21.2d,v21.2d
+ ldr x30,[sp,#8]
+ .inst 0xd50323bf // autiasp
+
+ ////////////////////////////////////////////////////////////////
+ // lazy reduction, but without narrowing
+
+ ushr v29.2d,v22.2d,#26
+ and v22.16b,v22.16b,v31.16b
+ ushr v30.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+
+ add v23.2d,v23.2d,v29.2d // h3 -> h4
+ add v20.2d,v20.2d,v30.2d // h0 -> h1
+
+ ushr v29.2d,v23.2d,#26
+ and v23.16b,v23.16b,v31.16b
+ ushr v30.2d,v20.2d,#26
+ and v20.16b,v20.16b,v31.16b
+ add v21.2d,v21.2d,v30.2d // h1 -> h2
+
+ add v19.2d,v19.2d,v29.2d
+ shl v29.2d,v29.2d,#2
+ ushr v30.2d,v21.2d,#26
+ and v21.16b,v21.16b,v31.16b
+ add v19.2d,v19.2d,v29.2d // h4 -> h0
+ add v22.2d,v22.2d,v30.2d // h2 -> h3
+
+ ushr v29.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+ ushr v30.2d,v22.2d,#26
+ and v22.16b,v22.16b,v31.16b
+ add v20.2d,v20.2d,v29.2d // h0 -> h1
+ add v23.2d,v23.2d,v30.2d // h3 -> h4
+
+ ////////////////////////////////////////////////////////////////
+ // write the result, can be partially reduced
+
+ st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
+ mov x4,#1
+ st1 {v23.s}[0],[x0]
+ str x4,[x0,#8] // set is_base2_26
+
+ ldr x29,[sp],#80
+ ret
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0
+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
+.align 2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..dd843d0ee83a
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_arm64(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int neon_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit, bool do_neon)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_arch(dctx, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ if (static_branch_likely(&have_neon) && likely(do_neon))
+ poly1305_blocks_neon(&dctx->h, src, len, hibit);
+ else
+ poly1305_blocks(&dctx->h, src, len, hibit);
+}
+
+static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len, bool do_neon)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ neon_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1, false);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ neon_poly1305_blocks(dctx, src, len, 1, do_neon);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int neon_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ bool do_neon = crypto_simd_usable() && srclen > 128;
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_begin();
+ neon_poly1305_do_update(dctx, src, srclen, do_neon);
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_end();
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, len, 1);
+ kernel_neon_end();
+ } else {
+ poly1305_blocks(&dctx->h, src, len, 1);
+ }
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg neon_poly1305_alg = {
+ .init = neon_poly1305_init,
+ .update = neon_poly1305_update,
+ .final = neon_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init neon_poly1305_mod_init(void)
+{
+ if (!cpu_have_named_feature(ASIMD))
+ return 0;
+
+ static_branch_enable(&have_neon);
+
+ return crypto_register_shash(&neon_poly1305_alg);
+}
+
+static void __exit neon_poly1305_mod_exit(void)
+{
+ if (cpu_have_named_feature(ASIMD))
+ crypto_unregister_shash(&neon_poly1305_alg);
+}
+
+module_init(neon_poly1305_mod_init);
+module_exit(neon_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 181754615f73..9923445e8225 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -40,6 +40,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
default 4 if X86_64
+ default 9 if ARM64
default 1
config CRYPTO_ARCH_HAVE_LIB_POLY1305
--
2.18.2
From 37bfaa5edd51c66438932ea842ac026313fa7104 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:25 +0100
Subject: [PATCH 019/100] crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS
NEON implementation
commit a6b803b3ddc793d6db0c16f12fc12d30d20fa9cc upstream.
This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation
for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL
project. The file 'poly1305-armv4.pl' is taken straight from this upstream
GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f,
and already contains all the changes required to build it as part of a
Linux kernel module.
[0] https://github.com/dot-asm/cryptogams
Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/Kconfig | 5 +
arch/arm/crypto/Makefile | 12 +-
arch/arm/crypto/poly1305-armv4.pl | 1236 +++++++++++++++++++++++
arch/arm/crypto/poly1305-core.S_shipped | 1158 +++++++++++++++++++++
arch/arm/crypto/poly1305-glue.c | 276 +++++
lib/crypto/Kconfig | 2 +-
6 files changed, 2687 insertions(+), 2 deletions(-)
create mode 100644 arch/arm/crypto/poly1305-armv4.pl
create mode 100644 arch/arm/crypto/poly1305-core.S_shipped
create mode 100644 arch/arm/crypto/poly1305-glue.c
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b25ffec04417..2e8a9289bded 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -131,6 +131,11 @@ config CRYPTO_CHACHA20_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_ARCH_HAVE_LIB_CHACHA
+config CRYPTO_POLY1305_ARM
+ tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
config CRYPTO_NHPOLY1305_NEON
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 6b97dffcf90f..4f6a8a81dabc 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
@@ -55,12 +56,16 @@ crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
chacha-neon-y := chacha-scalar-core.o chacha-glue.o
chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+ $(call cmd,perl)
+
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
@@ -68,4 +73,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
$(call cmd,perl)
endif
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
+
+# massage the perlasm code a bit so we only get the NEON routine if we need it
+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl
new file mode 100644
index 000000000000..6d79498d3115
--- /dev/null
+++ b/arch/arm/crypto/poly1305-armv4.pl
@@ -0,0 +1,1236 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# IALU(*)/gcc-4.4 NEON
+#
+# ARM11xx(ARMv6) 7.78/+100% -
+# Cortex-A5 6.35/+130% 3.00
+# Cortex-A8 6.25/+115% 2.36
+# Cortex-A9 5.10/+95% 2.55
+# Cortex-A15 3.85/+85% 1.25(**)
+# Snapdragon S4 5.70/+100% 1.48(**)
+#
+# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data;
+# (**) these are trade-off results, they can be improved by ~8% but at
+# the cost of 15/12% regression on Cortex-A5/A7, it's even possible
+# to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
+
+($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit poly1305_emit_arm
+.globl poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.globl poly1305_emit
+.globl poly1305_blocks
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+.Lpoly1305_init:
+ stmdb sp!,{r4-r11}
+
+ eor r3,r3,r3
+ cmp $inp,#0
+ str r3,[$ctx,#0] @ zero hash value
+ str r3,[$ctx,#4]
+ str r3,[$ctx,#8]
+ str r3,[$ctx,#12]
+ str r3,[$ctx,#16]
+ str r3,[$ctx,#36] @ clear is_base2_26
+ add $ctx,$ctx,#20
+
+#ifdef __thumb2__
+ it eq
+#endif
+ moveq r0,#0
+ beq .Lno_key
+
+#if __ARM_MAX_ARCH__>=7
+ mov r3,#-1
+ str r3,[$ctx,#28] @ impossible key power value
+# ifndef __KERNEL__
+ adr r11,.Lpoly1305_init
+ ldr r12,.LOPENSSL_armcap
+# endif
+#endif
+ ldrb r4,[$inp,#0]
+ mov r10,#0x0fffffff
+ ldrb r5,[$inp,#1]
+ and r3,r10,#-4 @ 0x0ffffffc
+ ldrb r6,[$inp,#2]
+ ldrb r7,[$inp,#3]
+ orr r4,r4,r5,lsl#8
+ ldrb r5,[$inp,#4]
+ orr r4,r4,r6,lsl#16
+ ldrb r6,[$inp,#5]
+ orr r4,r4,r7,lsl#24
+ ldrb r7,[$inp,#6]
+ and r4,r4,r10
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+ ldr r12,[r11,r12] @ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+ ldr r12,[r12]
+# endif
+#endif
+ ldrb r8,[$inp,#7]
+ orr r5,r5,r6,lsl#8
+ ldrb r6,[$inp,#8]
+ orr r5,r5,r7,lsl#16
+ ldrb r7,[$inp,#9]
+ orr r5,r5,r8,lsl#24
+ ldrb r8,[$inp,#10]
+ and r5,r5,r3
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ tst r12,#ARMV7_NEON @ check for NEON
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ it ne
+ movne r11,r9
+ adr r12,.Lpoly1305_emit
+ orr r11,r11,#1 @ thumb-ify addresses
+ orr r12,r12,#1
+# else
+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ ite eq
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+ ldrb r9,[$inp,#11]
+ orr r6,r6,r7,lsl#8
+ ldrb r7,[$inp,#12]
+ orr r6,r6,r8,lsl#16
+ ldrb r8,[$inp,#13]
+ orr r6,r6,r9,lsl#24
+ ldrb r9,[$inp,#14]
+ and r6,r6,r3
+
+ ldrb r10,[$inp,#15]
+ orr r7,r7,r8,lsl#8
+ str r4,[$ctx,#0]
+ orr r7,r7,r9,lsl#16
+ str r5,[$ctx,#4]
+ orr r7,r7,r10,lsl#24
+ str r6,[$ctx,#8]
+ and r7,r7,r3
+ str r7,[$ctx,#12]
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ stmia r2,{r11,r12} @ fill functions table
+ mov r0,#1
+#else
+ mov r0,#0
+#endif
+.Lno_key:
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
+my ($s1,$s2,$s3)=($r1,$r2,$r3);
+
+$code.=<<___;
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ stmdb sp!,{r3-r11,lr}
+
+ ands $len,$len,#-16
+ beq .Lno_data
+
+ add $len,$len,$inp @ end pointer
+ sub sp,sp,#32
+
+#if __ARM_ARCH__<7
+ ldmia $ctx,{$h0-$r3} @ load context
+ add $ctx,$ctx,#20
+ str $len,[sp,#16] @ offload stuff
+ str $ctx,[sp,#12]
+#else
+ ldr lr,[$ctx,#36] @ is_base2_26
+ ldmia $ctx!,{$h0-$h4} @ load hash value
+ str $len,[sp,#16] @ offload stuff
+ str $ctx,[sp,#12]
+
+ adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
+ mov $r1,$h1,lsr#6
+ adcs $r1,$r1,$h2,lsl#20
+ mov $r2,$h2,lsr#12
+ adcs $r2,$r2,$h3,lsl#14
+ mov $r3,$h3,lsr#18
+ adcs $r3,$r3,$h4,lsl#8
+ mov $len,#0
+ teq lr,#0
+ str $len,[$ctx,#16] @ clear is_base2_26
+ adc $len,$len,$h4,lsr#24
+
+ itttt ne
+ movne $h0,$r0 @ choose between radixes
+ movne $h1,$r1
+ movne $h2,$r2
+ movne $h3,$r3
+ ldmia $ctx,{$r0-$r3} @ load key
+ it ne
+ movne $h4,$len
+#endif
+
+ mov lr,$inp
+ cmp $padbit,#0
+ str $r1,[sp,#20]
+ str $r2,[sp,#24]
+ str $r3,[sp,#28]
+ b .Loop
+
+.align 4
+.Loop:
+#if __ARM_ARCH__<7
+ ldrb r0,[lr],#16 @ load input
+# ifdef __thumb2__
+ it hi
+# endif
+ addhi $h4,$h4,#1 @ 1<<128
+ ldrb r1,[lr,#-15]
+ ldrb r2,[lr,#-14]
+ ldrb r3,[lr,#-13]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-12]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-11]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-10]
+ adds $h0,$h0,r3 @ accumulate input
+
+ ldrb r3,[lr,#-9]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-8]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-7]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-6]
+ adcs $h1,$h1,r3
+
+ ldrb r3,[lr,#-5]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-4]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-3]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-2]
+ adcs $h2,$h2,r3
+
+ ldrb r3,[lr,#-1]
+ orr r1,r0,r1,lsl#8
+ str lr,[sp,#8] @ offload input pointer
+ orr r2,r1,r2,lsl#16
+ add $s1,$r1,$r1,lsr#2
+ orr r3,r2,r3,lsl#24
+#else
+ ldr r0,[lr],#16 @ load input
+ it hi
+ addhi $h4,$h4,#1 @ padbit
+ ldr r1,[lr,#-12]
+ ldr r2,[lr,#-8]
+ ldr r3,[lr,#-4]
+# ifdef __ARMEB__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+# endif
+ adds $h0,$h0,r0 @ accumulate input
+ str lr,[sp,#8] @ offload input pointer
+ adcs $h1,$h1,r1
+ add $s1,$r1,$r1,lsr#2
+ adcs $h2,$h2,r2
+#endif
+ add $s2,$r2,$r2,lsr#2
+ adcs $h3,$h3,r3
+ add $s3,$r3,$r3,lsr#2
+
+ umull r2,r3,$h1,$r0
+ adc $h4,$h4,#0
+ umull r0,r1,$h0,$r0
+ umlal r2,r3,$h4,$s1
+ umlal r0,r1,$h3,$s1
+ ldr $r1,[sp,#20] @ reload $r1
+ umlal r2,r3,$h2,$s3
+ umlal r0,r1,$h1,$s3
+ umlal r2,r3,$h3,$s2
+ umlal r0,r1,$h2,$s2
+ umlal r2,r3,$h0,$r1
+ str r0,[sp,#0] @ future $h0
+ mul r0,$s2,$h4
+ ldr $r2,[sp,#24] @ reload $r2
+ adds r2,r2,r1 @ d1+=d0>>32
+ eor r1,r1,r1
+ adc lr,r3,#0 @ future $h2
+ str r2,[sp,#4] @ future $h1
+
+ mul r2,$s3,$h4
+ eor r3,r3,r3
+ umlal r0,r1,$h3,$s3
+ ldr $r3,[sp,#28] @ reload $r3
+ umlal r2,r3,$h3,$r0
+ umlal r0,r1,$h2,$r0
+ umlal r2,r3,$h2,$r1
+ umlal r0,r1,$h1,$r1
+ umlal r2,r3,$h1,$r2
+ umlal r0,r1,$h0,$r2
+ umlal r2,r3,$h0,$r3
+ ldr $h0,[sp,#0]
+ mul $h4,$r0,$h4
+ ldr $h1,[sp,#4]
+
+ adds $h2,lr,r0 @ d2+=d1>>32
+ ldr lr,[sp,#8] @ reload input pointer
+ adc r1,r1,#0
+ adds $h3,r2,r1 @ d3+=d2>>32
+ ldr r0,[sp,#16] @ reload end pointer
+ adc r3,r3,#0
+ add $h4,$h4,r3 @ h4+=d3>>32
+
+ and r1,$h4,#-4
+ and $h4,$h4,#3
+ add r1,r1,r1,lsr#2 @ *=5
+ adds $h0,$h0,r1
+ adcs $h1,$h1,#0
+ adcs $h2,$h2,#0
+ adcs $h3,$h3,#0
+ adc $h4,$h4,#0
+
+ cmp r0,lr @ done yet?
+ bhi .Loop
+
+ ldr $ctx,[sp,#12]
+ add sp,sp,#32
+ stmdb $ctx,{$h0-$h4} @ store the result
+
+.Lno_data:
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r3-r11,pc}
+#else
+ ldmia sp!,{r3-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce)=map("r$_",(0..2));
+my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
+my $g4=$ctx;
+
+$code.=<<___;
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ stmdb sp!,{r4-r11}
+
+ ldmia $ctx,{$h0-$h4}
+
+#if __ARM_ARCH__>=7
+ ldr ip,[$ctx,#36] @ is_base2_26
+
+ adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
+ mov $g1,$h1,lsr#6
+ adcs $g1,$g1,$h2,lsl#20
+ mov $g2,$h2,lsr#12
+ adcs $g2,$g2,$h3,lsl#14
+ mov $g3,$h3,lsr#18
+ adcs $g3,$g3,$h4,lsl#8
+ mov $g4,#0
+ adc $g4,$g4,$h4,lsr#24
+
+ tst ip,ip
+ itttt ne
+ movne $h0,$g0
+ movne $h1,$g1
+ movne $h2,$g2
+ movne $h3,$g3
+ it ne
+ movne $h4,$g4
+#endif
+
+ adds $g0,$h0,#5 @ compare to modulus
+ adcs $g1,$h1,#0
+ adcs $g2,$h2,#0
+ adcs $g3,$h3,#0
+ adc $g4,$h4,#0
+ tst $g4,#4 @ did it carry/borrow?
+
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h0,$g0
+ ldr $g0,[$nonce,#0]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h1,$g1
+ ldr $g1,[$nonce,#4]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h2,$g2
+ ldr $g2,[$nonce,#8]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h3,$g3
+ ldr $g3,[$nonce,#12]
+
+ adds $h0,$h0,$g0
+ adcs $h1,$h1,$g1
+ adcs $h2,$h2,$g2
+ adc $h3,$h3,$g3
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+ rev $h0,$h0
+ rev $h1,$h1
+ rev $h2,$h2
+ rev $h3,$h3
+# endif
+ str $h0,[$mac,#0]
+ str $h1,[$mac,#4]
+ str $h2,[$mac,#8]
+ str $h3,[$mac,#12]
+#else
+ strb $h0,[$mac,#0]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#4]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#8]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#12]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#1]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#5]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#9]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#13]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#2]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#6]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#10]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#14]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#3]
+ strb $h1,[$mac,#7]
+ strb $h2,[$mac,#11]
+ strb $h3,[$mac,#15]
+#endif
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_emit,.-poly1305_emit
+___
+{
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
+my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
+my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
+
+my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.fpu neon
+
+.type poly1305_init_neon,%function
+.align 5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+ ldr r3,[$ctx,#48] @ first table element
+ cmp r3,#-1 @ is value impossible?
+ bne .Lno_init_neon
+
+ ldr r4,[$ctx,#20] @ load key base 2^32
+ ldr r5,[$ctx,#24]
+ ldr r6,[$ctx,#28]
+ ldr r7,[$ctx,#32]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ and r3,r3,#0x03ffffff
+ and r4,r4,#0x03ffffff
+ and r5,r5,#0x03ffffff
+
+ vdup.32 $R0,r2 @ r^1 in both lanes
+ add r2,r3,r3,lsl#2 @ *5
+ vdup.32 $R1,r3
+ add r3,r4,r4,lsl#2
+ vdup.32 $S1,r2
+ vdup.32 $R2,r4
+ add r4,r5,r5,lsl#2
+ vdup.32 $S2,r3
+ vdup.32 $R3,r5
+ add r5,r6,r6,lsl#2
+ vdup.32 $S3,r4
+ vdup.32 $R4,r6
+ vdup.32 $S4,r5
+
+ mov $zeros,#2 @ counter
+
+.Lsquare_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+
+ vmull.u32 $D0,$R0,${R0}[1]
+ vmull.u32 $D1,$R1,${R0}[1]
+ vmull.u32 $D2,$R2,${R0}[1]
+ vmull.u32 $D3,$R3,${R0}[1]
+ vmull.u32 $D4,$R4,${R0}[1]
+
+ vmlal.u32 $D0,$R4,${S1}[1]
+ vmlal.u32 $D1,$R0,${R1}[1]
+ vmlal.u32 $D2,$R1,${R1}[1]
+ vmlal.u32 $D3,$R2,${R1}[1]
+ vmlal.u32 $D4,$R3,${R1}[1]
+
+ vmlal.u32 $D0,$R3,${S2}[1]
+ vmlal.u32 $D1,$R4,${S2}[1]
+ vmlal.u32 $D3,$R1,${R2}[1]
+ vmlal.u32 $D2,$R0,${R2}[1]
+ vmlal.u32 $D4,$R2,${R2}[1]
+
+ vmlal.u32 $D0,$R2,${S3}[1]
+ vmlal.u32 $D3,$R0,${R3}[1]
+ vmlal.u32 $D1,$R3,${S3}[1]
+ vmlal.u32 $D2,$R4,${S3}[1]
+ vmlal.u32 $D4,$R1,${R3}[1]
+
+ vmlal.u32 $D3,$R4,${S4}[1]
+ vmlal.u32 $D0,$R1,${S4}[1]
+ vmlal.u32 $D1,$R2,${S4}[1]
+ vmlal.u32 $D2,$R3,${S4}[1]
+ vmlal.u32 $D4,$R0,${R4}[1]
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ @ and P. Schwabe
+ @
+ @ H0>>+H1>>+H2>>+H3>>+H4
+ @ H3>>+H4>>*5+H0>>+H1
+ @
+ @ Trivia.
+ @
+ @ Result of multiplication of n-bit number by m-bit number is
+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+ @ m-bit number multiplied by 2^n is still n+m bits wide.
+ @
+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+ @ one is n+1 bits wide.
+ @
+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+ @ can be 27. However! In cases when their width exceeds 26 bits
+ @ they are limited by 2^26+2^6. This in turn means that *sum*
+ @ of the products with these values can still be viewed as sum
+ @ of 52-bit numbers as long as the amount of addends is not a
+ @ power of 2. For example,
+ @
+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+ @
+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+ @ which is less than 32 * (2^52) or 2^57. And when processing
+ @ data we are looking at triple as many addends...
+ @
+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
+ @ This means that result of reduction have to be compressed upon
+ @ loop wrap-around. This can be done in the process of reduction
+ @ to minimize amount of instructions [as well as amount of
+ @ 128-bit instructions, which benefits low-end processors], but
+ @ one has to watch for H2 (which is narrower than H0) and 5*H4
+ @ not being wider than 58 bits, so that result of right shift
+ @ by 26 bits fits in 32 bits. This is also useful on x86,
+ @ because it allows to use paddd in place for paddq, which
+ @ benefits Atom, where paddq is ridiculously slow.
+
+ vshr.u64 $T0,$D3,#26
+ vmovn.i64 $D3#lo,$D3
+ vshr.u64 $T1,$D0,#26
+ vmovn.i64 $D0#lo,$D0
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+ vbic.i32 $D0#lo,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D4,#26
+ vmovn.i64 $D4#lo,$D4
+ vshr.u64 $T1,$D1,#26
+ vmovn.i64 $D1#lo,$D1
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+ vbic.i32 $D4#lo,#0xfc000000
+ vbic.i32 $D1#lo,#0xfc000000
+
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo
+ vshl.u32 $T0#lo,$T0#lo,#2
+ vshrn.u64 $T1#lo,$D2,#26
+ vmovn.i64 $D2#lo,$D2
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0
+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
+ vbic.i32 $D2#lo,#0xfc000000
+
+ vshr.u32 $T0#lo,$D0#lo,#26
+ vbic.i32 $D0#lo,#0xfc000000
+ vshr.u32 $T1#lo,$D3#lo,#26
+ vbic.i32 $D3#lo,#0xfc000000
+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
+
+ subs $zeros,$zeros,#1
+ beq .Lsquare_break_neon
+
+ add $tbl0,$ctx,#(48+0*9*4)
+ add $tbl1,$ctx,#(48+1*9*4)
+
+ vtrn.32 $R0,$D0#lo @ r^2:r^1
+ vtrn.32 $R2,$D2#lo
+ vtrn.32 $R3,$D3#lo
+ vtrn.32 $R1,$D1#lo
+ vtrn.32 $R4,$D4#lo
+
+ vshl.u32 $S2,$R2,#2 @ *5
+ vshl.u32 $S3,$R3,#2
+ vshl.u32 $S1,$R1,#2
+ vshl.u32 $S4,$R4,#2
+ vadd.i32 $S2,$S2,$R2
+ vadd.i32 $S1,$S1,$R1
+ vadd.i32 $S3,$S3,$R3
+ vadd.i32 $S4,$S4,$R4
+
+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vst1.32 {${S4}[0]},[$tbl0,:32]
+ vst1.32 {${S4}[1]},[$tbl1,:32]
+
+ b .Lsquare_neon
+
+.align 4
+.Lsquare_break_neon:
+ add $tbl0,$ctx,#(48+2*4*9)
+ add $tbl1,$ctx,#(48+3*4*9)
+
+ vmov $R0,$D0#lo @ r^4:r^3
+ vshl.u32 $S1,$D1#lo,#2 @ *5
+ vmov $R1,$D1#lo
+ vshl.u32 $S2,$D2#lo,#2
+ vmov $R2,$D2#lo
+ vshl.u32 $S3,$D3#lo,#2
+ vmov $R3,$D3#lo
+ vshl.u32 $S4,$D4#lo,#2
+ vmov $R4,$D4#lo
+ vadd.i32 $S1,$S1,$D1#lo
+ vadd.i32 $S2,$S2,$D2#lo
+ vadd.i32 $S3,$S3,$D3#lo
+ vadd.i32 $S4,$S4,$D4#lo
+
+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vst1.32 {${S4}[0]},[$tbl0]
+ vst1.32 {${S4}[1]},[$tbl1]
+
+.Lno_init_neon:
+ ret @ bx lr
+.size poly1305_init_neon,.-poly1305_init_neon
+
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr ip,[$ctx,#36] @ is_base2_26
+
+ cmp $len,#64
+ blo .Lpoly1305_blocks
+
+ stmdb sp!,{r4-r7}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ tst ip,ip @ is_base2_26?
+ bne .Lbase2_26_neon
+
+ stmdb sp!,{r1-r3,lr}
+ bl .Lpoly1305_init_neon
+
+ ldr r4,[$ctx,#0] @ load hash value base 2^32
+ ldr r5,[$ctx,#4]
+ ldr r6,[$ctx,#8]
+ ldr r7,[$ctx,#12]
+ ldr ip,[$ctx,#16]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ veor $D0#lo,$D0#lo,$D0#lo
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ veor $D1#lo,$D1#lo,$D1#lo
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ veor $D2#lo,$D2#lo,$D2#lo
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ veor $D3#lo,$D3#lo,$D3#lo
+ and r3,r3,#0x03ffffff
+ orr r6,r6,ip,lsl#24
+ veor $D4#lo,$D4#lo,$D4#lo
+ and r4,r4,#0x03ffffff
+ mov r1,#1
+ and r5,r5,#0x03ffffff
+ str r1,[$ctx,#36] @ set is_base2_26
+
+ vmov.32 $D0#lo[0],r2
+ vmov.32 $D1#lo[0],r3
+ vmov.32 $D2#lo[0],r4
+ vmov.32 $D3#lo[0],r5
+ vmov.32 $D4#lo[0],r6
+ adr $zeros,.Lzeros
+
+ ldmia sp!,{r1-r3,lr}
+ b .Lhash_loaded
+
+.align 4
+.Lbase2_26_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ load hash value
+
+ veor $D0#lo,$D0#lo,$D0#lo
+ veor $D1#lo,$D1#lo,$D1#lo
+ veor $D2#lo,$D2#lo,$D2#lo
+ veor $D3#lo,$D3#lo,$D3#lo
+ veor $D4#lo,$D4#lo,$D4#lo
+ vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+ adr $zeros,.Lzeros
+ vld1.32 {$D4#lo[0]},[$ctx]
+ sub $ctx,$ctx,#16 @ rewind
+
+.Lhash_loaded:
+ add $in2,$inp,#32
+ mov $padbit,$padbit,lsl#24
+ tst $len,#31
+ beq .Leven
+
+ vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
+ vmov.32 $H4#lo[0],$padbit
+ sub $len,$len,#16
+ add $in2,$inp,#32
+
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H3,$H3
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+# endif
+ vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26
+ vshl.u32 $H3#lo,$H3#lo,#18
+
+ vsri.u32 $H3#lo,$H2#lo,#14
+ vshl.u32 $H2#lo,$H2#lo,#12
+ vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi
+
+ vbic.i32 $H3#lo,#0xfc000000
+ vsri.u32 $H2#lo,$H1#lo,#20
+ vshl.u32 $H1#lo,$H1#lo,#6
+
+ vbic.i32 $H2#lo,#0xfc000000
+ vsri.u32 $H1#lo,$H0#lo,#26
+ vadd.i32 $H3#hi,$H3#lo,$D3#lo
+
+ vbic.i32 $H0#lo,#0xfc000000
+ vbic.i32 $H1#lo,#0xfc000000
+ vadd.i32 $H2#hi,$H2#lo,$D2#lo
+
+ vadd.i32 $H0#hi,$H0#lo,$D0#lo
+ vadd.i32 $H1#hi,$H1#lo,$D1#lo
+
+ mov $tbl1,$zeros
+ add $tbl0,$ctx,#48
+
+ cmp $len,$len
+ b .Long_tail
+
+.align 4
+.Leven:
+ subs $len,$len,#64
+ it lo
+ movlo $in2,$zeros
+
+ vmov.i32 $H4,#1<<24 @ padbit, yes, always
+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
+ add $inp,$inp,#64
+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
+ add $in2,$in2,#64
+ itt hi
+ addhi $tbl1,$ctx,#(48+1*9*4)
+ addhi $tbl0,$ctx,#(48+3*9*4)
+
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H3,$H3
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+# endif
+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
+ vshl.u32 $H3,$H3,#18
+
+ vsri.u32 $H3,$H2,#14
+ vshl.u32 $H2,$H2,#12
+
+ vbic.i32 $H3,#0xfc000000
+ vsri.u32 $H2,$H1,#20
+ vshl.u32 $H1,$H1,#6
+
+ vbic.i32 $H2,#0xfc000000
+ vsri.u32 $H1,$H0,#26
+
+ vbic.i32 $H0,#0xfc000000
+ vbic.i32 $H1,#0xfc000000
+
+ bls .Lskip_loop
+
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ b .Loop_neon
+
+.align 5
+.Loop_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ @ \___________________/
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ @ \___________________/ \____________________/
+ @
+ @ Note that we start with inp[2:3]*r^2. This is because it
+ @ doesn't depend on reduction in previous iteration.
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ inp[2:3]*r^2
+
+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1]
+ vmull.u32 $D2,$H2#hi,${R0}[1]
+ vadd.i32 $H0#lo,$H0#lo,$D0#lo
+ vmull.u32 $D0,$H0#hi,${R0}[1]
+ vadd.i32 $H3#lo,$H3#lo,$D3#lo
+ vmull.u32 $D3,$H3#hi,${R0}[1]
+ vmlal.u32 $D2,$H1#hi,${R1}[1]
+ vadd.i32 $H1#lo,$H1#lo,$D1#lo
+ vmull.u32 $D1,$H1#hi,${R0}[1]
+
+ vadd.i32 $H4#lo,$H4#lo,$D4#lo
+ vmull.u32 $D4,$H4#hi,${R0}[1]
+ subs $len,$len,#64
+ vmlal.u32 $D0,$H4#hi,${S1}[1]
+ it lo
+ movlo $in2,$zeros
+ vmlal.u32 $D3,$H2#hi,${R1}[1]
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D1,$H0#hi,${R1}[1]
+ vmlal.u32 $D4,$H3#hi,${R1}[1]
+
+ vmlal.u32 $D0,$H3#hi,${S2}[1]
+ vmlal.u32 $D3,$H1#hi,${R2}[1]
+ vmlal.u32 $D4,$H2#hi,${R2}[1]
+ vmlal.u32 $D1,$H4#hi,${S2}[1]
+ vmlal.u32 $D2,$H0#hi,${R2}[1]
+
+ vmlal.u32 $D3,$H0#hi,${R3}[1]
+ vmlal.u32 $D0,$H2#hi,${S3}[1]
+ vmlal.u32 $D4,$H1#hi,${R3}[1]
+ vmlal.u32 $D1,$H3#hi,${S3}[1]
+ vmlal.u32 $D2,$H4#hi,${S3}[1]
+
+ vmlal.u32 $D3,$H4#hi,${S4}[1]
+ vmlal.u32 $D0,$H1#hi,${S4}[1]
+ vmlal.u32 $D4,$H0#hi,${R4}[1]
+ vmlal.u32 $D1,$H2#hi,${S4}[1]
+ vmlal.u32 $D2,$H3#hi,${S4}[1]
+
+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
+ add $in2,$in2,#64
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4 and accumulate
+
+ vmlal.u32 $D3,$H3#lo,${R0}[0]
+ vmlal.u32 $D0,$H0#lo,${R0}[0]
+ vmlal.u32 $D4,$H4#lo,${R0}[0]
+ vmlal.u32 $D1,$H1#lo,${R0}[0]
+ vmlal.u32 $D2,$H2#lo,${R0}[0]
+ vld1.32 ${S4}[0],[$tbl0,:32]
+
+ vmlal.u32 $D3,$H2#lo,${R1}[0]
+ vmlal.u32 $D0,$H4#lo,${S1}[0]
+ vmlal.u32 $D4,$H3#lo,${R1}[0]
+ vmlal.u32 $D1,$H0#lo,${R1}[0]
+ vmlal.u32 $D2,$H1#lo,${R1}[0]
+
+ vmlal.u32 $D3,$H1#lo,${R2}[0]
+ vmlal.u32 $D0,$H3#lo,${S2}[0]
+ vmlal.u32 $D4,$H2#lo,${R2}[0]
+ vmlal.u32 $D1,$H4#lo,${S2}[0]
+ vmlal.u32 $D2,$H0#lo,${R2}[0]
+
+ vmlal.u32 $D3,$H0#lo,${R3}[0]
+ vmlal.u32 $D0,$H2#lo,${S3}[0]
+ vmlal.u32 $D4,$H1#lo,${R3}[0]
+ vmlal.u32 $D1,$H3#lo,${S3}[0]
+ vmlal.u32 $D3,$H4#lo,${S4}[0]
+
+ vmlal.u32 $D2,$H4#lo,${S3}[0]
+ vmlal.u32 $D0,$H1#lo,${S4}[0]
+ vmlal.u32 $D4,$H0#lo,${R4}[0]
+ vmov.i32 $H4,#1<<24 @ padbit, yes, always
+ vmlal.u32 $D1,$H2#lo,${S4}[0]
+ vmlal.u32 $D2,$H3#lo,${S4}[0]
+
+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
+ add $inp,$inp,#64
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+ vrev32.8 $H3,$H3
+# endif
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
+ @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
+
+ vshr.u64 $T0,$D3,#26
+ vmovn.i64 $D3#lo,$D3
+ vshr.u64 $T1,$D0,#26
+ vmovn.i64 $D0#lo,$D0
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vbic.i32 $D3#lo,#0xfc000000
+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+ vshl.u32 $H3,$H3,#18
+ vbic.i32 $D0#lo,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D4,#26
+ vmovn.i64 $D4#lo,$D4
+ vshr.u64 $T1,$D1,#26
+ vmovn.i64 $D1#lo,$D1
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+ vsri.u32 $H3,$H2,#14
+ vbic.i32 $D4#lo,#0xfc000000
+ vshl.u32 $H2,$H2,#12
+ vbic.i32 $D1#lo,#0xfc000000
+
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo
+ vshl.u32 $T0#lo,$T0#lo,#2
+ vbic.i32 $H3,#0xfc000000
+ vshrn.u64 $T1#lo,$D2,#26
+ vmovn.i64 $D2#lo,$D2
+ vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec]
+ vsri.u32 $H2,$H1,#20
+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
+ vshl.u32 $H1,$H1,#6
+ vbic.i32 $D2#lo,#0xfc000000
+ vbic.i32 $H2,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D0,#26 @ re-narrow
+ vmovn.i64 $D0#lo,$D0
+ vsri.u32 $H1,$H0,#26
+ vbic.i32 $H0,#0xfc000000
+ vshr.u32 $T1#lo,$D3#lo,#26
+ vbic.i32 $D3#lo,#0xfc000000
+ vbic.i32 $D0#lo,#0xfc000000
+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
+ vbic.i32 $H1,#0xfc000000
+
+ bhi .Loop_neon
+
+.Lskip_loop:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ add $tbl1,$ctx,#(48+0*9*4)
+ add $tbl0,$ctx,#(48+1*9*4)
+ adds $len,$len,#32
+ it ne
+ movne $len,#0
+ bne .Long_tail
+
+ vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi
+ vadd.i32 $H0#hi,$H0#lo,$D0#lo
+ vadd.i32 $H3#hi,$H3#lo,$D3#lo
+ vadd.i32 $H1#hi,$H1#lo,$D1#lo
+ vadd.i32 $H4#hi,$H4#lo,$D4#lo
+
+.Long_tail:
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2
+
+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant
+ vmull.u32 $D2,$H2#hi,$R0
+ vadd.i32 $H0#lo,$H0#lo,$D0#lo
+ vmull.u32 $D0,$H0#hi,$R0
+ vadd.i32 $H3#lo,$H3#lo,$D3#lo
+ vmull.u32 $D3,$H3#hi,$R0
+ vadd.i32 $H1#lo,$H1#lo,$D1#lo
+ vmull.u32 $D1,$H1#hi,$R0
+ vadd.i32 $H4#lo,$H4#lo,$D4#lo
+ vmull.u32 $D4,$H4#hi,$R0
+
+ vmlal.u32 $D0,$H4#hi,$S1
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vmlal.u32 $D3,$H2#hi,$R1
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vmlal.u32 $D1,$H0#hi,$R1
+ vmlal.u32 $D4,$H3#hi,$R1
+ vmlal.u32 $D2,$H1#hi,$R1
+
+ vmlal.u32 $D3,$H1#hi,$R2
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D0,$H3#hi,$S2
+ vld1.32 ${S4}[0],[$tbl0,:32]
+ vmlal.u32 $D4,$H2#hi,$R2
+ vmlal.u32 $D1,$H4#hi,$S2
+ vmlal.u32 $D2,$H0#hi,$R2
+
+ vmlal.u32 $D3,$H0#hi,$R3
+ it ne
+ addne $tbl1,$ctx,#(48+2*9*4)
+ vmlal.u32 $D0,$H2#hi,$S3
+ it ne
+ addne $tbl0,$ctx,#(48+3*9*4)
+ vmlal.u32 $D4,$H1#hi,$R3
+ vmlal.u32 $D1,$H3#hi,$S3
+ vmlal.u32 $D2,$H4#hi,$S3
+
+ vmlal.u32 $D3,$H4#hi,$S4
+ vorn $MASK,$MASK,$MASK @ all-ones, can be redundant
+ vmlal.u32 $D0,$H1#hi,$S4
+ vshr.u64 $MASK,$MASK,#38
+ vmlal.u32 $D4,$H0#hi,$R4
+ vmlal.u32 $D1,$H2#hi,$S4
+ vmlal.u32 $D2,$H3#hi,$S4
+
+ beq .Lshort_tail
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
+
+ vmlal.u32 $D2,$H2#lo,$R0
+ vmlal.u32 $D0,$H0#lo,$R0
+ vmlal.u32 $D3,$H3#lo,$R0
+ vmlal.u32 $D1,$H1#lo,$R0
+ vmlal.u32 $D4,$H4#lo,$R0
+
+ vmlal.u32 $D0,$H4#lo,$S1
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vmlal.u32 $D3,$H2#lo,$R1
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vmlal.u32 $D1,$H0#lo,$R1
+ vmlal.u32 $D4,$H3#lo,$R1
+ vmlal.u32 $D2,$H1#lo,$R1
+
+ vmlal.u32 $D3,$H1#lo,$R2
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D0,$H3#lo,$S2
+ vld1.32 ${S4}[0],[$tbl0,:32]
+ vmlal.u32 $D4,$H2#lo,$R2
+ vmlal.u32 $D1,$H4#lo,$S2
+ vmlal.u32 $D2,$H0#lo,$R2
+
+ vmlal.u32 $D3,$H0#lo,$R3
+ vmlal.u32 $D0,$H2#lo,$S3
+ vmlal.u32 $D4,$H1#lo,$R3
+ vmlal.u32 $D1,$H3#lo,$S3
+ vmlal.u32 $D2,$H4#lo,$S3
+
+ vmlal.u32 $D3,$H4#lo,$S4
+ vorn $MASK,$MASK,$MASK @ all-ones
+ vmlal.u32 $D0,$H1#lo,$S4
+ vshr.u64 $MASK,$MASK,#38
+ vmlal.u32 $D4,$H0#lo,$R4
+ vmlal.u32 $D1,$H2#lo,$S4
+ vmlal.u32 $D2,$H3#lo,$S4
+
+.Lshort_tail:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 $D3#lo,$D3#lo,$D3#hi
+ vadd.i64 $D0#lo,$D0#lo,$D0#hi
+ vadd.i64 $D4#lo,$D4#lo,$D4#hi
+ vadd.i64 $D1#lo,$D1#lo,$D1#hi
+ vadd.i64 $D2#lo,$D2#lo,$D2#hi
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction, but without narrowing
+
+ vshr.u64 $T0,$D3,#26
+ vand.i64 $D3,$D3,$MASK
+ vshr.u64 $T1,$D0,#26
+ vand.i64 $D0,$D0,$MASK
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+
+ vshr.u64 $T0,$D4,#26
+ vand.i64 $D4,$D4,$MASK
+ vshr.u64 $T1,$D1,#26
+ vand.i64 $D1,$D1,$MASK
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+
+ vadd.i64 $D0,$D0,$T0
+ vshl.u64 $T0,$T0,#2
+ vshr.u64 $T1,$D2,#26
+ vand.i64 $D2,$D2,$MASK
+ vadd.i64 $D0,$D0,$T0 @ h4 -> h0
+ vadd.i64 $D3,$D3,$T1 @ h2 -> h3
+
+ vshr.u64 $T0,$D0,#26
+ vand.i64 $D0,$D0,$MASK
+ vshr.u64 $T1,$D3,#26
+ vand.i64 $D3,$D3,$MASK
+ vadd.i64 $D1,$D1,$T0 @ h0 -> h1
+ vadd.i64 $D4,$D4,$T1 @ h3 -> h4
+
+ cmp $len,#0
+ bne .Leven
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ store hash value
+
+ vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+ vst1.32 {$D4#lo[0]},[$ctx]
+
+ vldmia sp!,{d8-d15} @ epilogue
+ ldmia sp!,{r4-r7}
+ ret @ bx lr
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef __KERNEL__
+.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
+.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+#endif
+___
+} }
+$code.=<<___;
+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..37b71d990293
--- /dev/null
+++ b/arch/arm/crypto/poly1305-core.S_shipped
@@ -0,0 +1,1158 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit poly1305_emit_arm
+.globl poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.globl poly1305_emit
+.globl poly1305_blocks
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+.Lpoly1305_init:
+ stmdb sp!,{r4-r11}
+
+ eor r3,r3,r3
+ cmp r1,#0
+ str r3,[r0,#0] @ zero hash value
+ str r3,[r0,#4]
+ str r3,[r0,#8]
+ str r3,[r0,#12]
+ str r3,[r0,#16]
+ str r3,[r0,#36] @ clear is_base2_26
+ add r0,r0,#20
+
+#ifdef __thumb2__
+ it eq
+#endif
+ moveq r0,#0
+ beq .Lno_key
+
+#if __ARM_MAX_ARCH__>=7
+ mov r3,#-1
+ str r3,[r0,#28] @ impossible key power value
+# ifndef __KERNEL__
+ adr r11,.Lpoly1305_init
+ ldr r12,.LOPENSSL_armcap
+# endif
+#endif
+ ldrb r4,[r1,#0]
+ mov r10,#0x0fffffff
+ ldrb r5,[r1,#1]
+ and r3,r10,#-4 @ 0x0ffffffc
+ ldrb r6,[r1,#2]
+ ldrb r7,[r1,#3]
+ orr r4,r4,r5,lsl#8
+ ldrb r5,[r1,#4]
+ orr r4,r4,r6,lsl#16
+ ldrb r6,[r1,#5]
+ orr r4,r4,r7,lsl#24
+ ldrb r7,[r1,#6]
+ and r4,r4,r10
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+ ldr r12,[r11,r12] @ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+ ldr r12,[r12]
+# endif
+#endif
+ ldrb r8,[r1,#7]
+ orr r5,r5,r6,lsl#8
+ ldrb r6,[r1,#8]
+ orr r5,r5,r7,lsl#16
+ ldrb r7,[r1,#9]
+ orr r5,r5,r8,lsl#24
+ ldrb r8,[r1,#10]
+ and r5,r5,r3
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ tst r12,#ARMV7_NEON @ check for NEON
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ it ne
+ movne r11,r9
+ adr r12,.Lpoly1305_emit
+ orr r11,r11,#1 @ thumb-ify addresses
+ orr r12,r12,#1
+# else
+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ ite eq
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+ ldrb r9,[r1,#11]
+ orr r6,r6,r7,lsl#8
+ ldrb r7,[r1,#12]
+ orr r6,r6,r8,lsl#16
+ ldrb r8,[r1,#13]
+ orr r6,r6,r9,lsl#24
+ ldrb r9,[r1,#14]
+ and r6,r6,r3
+
+ ldrb r10,[r1,#15]
+ orr r7,r7,r8,lsl#8
+ str r4,[r0,#0]
+ orr r7,r7,r9,lsl#16
+ str r5,[r0,#4]
+ orr r7,r7,r10,lsl#24
+ str r6,[r0,#8]
+ and r7,r7,r3
+ str r7,[r0,#12]
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ stmia r2,{r11,r12} @ fill functions table
+ mov r0,#1
+#else
+ mov r0,#0
+#endif
+.Lno_key:
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ bx lr @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_init,.-poly1305_init
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ stmdb sp!,{r3-r11,lr}
+
+ ands r2,r2,#-16
+ beq .Lno_data
+
+ add r2,r2,r1 @ end pointer
+ sub sp,sp,#32
+
+#if __ARM_ARCH__<7
+ ldmia r0,{r4-r12} @ load context
+ add r0,r0,#20
+ str r2,[sp,#16] @ offload stuff
+ str r0,[sp,#12]
+#else
+ ldr lr,[r0,#36] @ is_base2_26
+ ldmia r0!,{r4-r8} @ load hash value
+ str r2,[sp,#16] @ offload stuff
+ str r0,[sp,#12]
+
+ adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32
+ mov r10,r5,lsr#6
+ adcs r10,r10,r6,lsl#20
+ mov r11,r6,lsr#12
+ adcs r11,r11,r7,lsl#14
+ mov r12,r7,lsr#18
+ adcs r12,r12,r8,lsl#8
+ mov r2,#0
+ teq lr,#0
+ str r2,[r0,#16] @ clear is_base2_26
+ adc r2,r2,r8,lsr#24
+
+ itttt ne
+ movne r4,r9 @ choose between radixes
+ movne r5,r10
+ movne r6,r11
+ movne r7,r12
+ ldmia r0,{r9-r12} @ load key
+ it ne
+ movne r8,r2
+#endif
+
+ mov lr,r1
+ cmp r3,#0
+ str r10,[sp,#20]
+ str r11,[sp,#24]
+ str r12,[sp,#28]
+ b .Loop
+
+.align 4
+.Loop:
+#if __ARM_ARCH__<7
+ ldrb r0,[lr],#16 @ load input
+# ifdef __thumb2__
+ it hi
+# endif
+ addhi r8,r8,#1 @ 1<<128
+ ldrb r1,[lr,#-15]
+ ldrb r2,[lr,#-14]
+ ldrb r3,[lr,#-13]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-12]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-11]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-10]
+ adds r4,r4,r3 @ accumulate input
+
+ ldrb r3,[lr,#-9]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-8]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-7]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-6]
+ adcs r5,r5,r3
+
+ ldrb r3,[lr,#-5]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-4]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-3]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-2]
+ adcs r6,r6,r3
+
+ ldrb r3,[lr,#-1]
+ orr r1,r0,r1,lsl#8
+ str lr,[sp,#8] @ offload input pointer
+ orr r2,r1,r2,lsl#16
+ add r10,r10,r10,lsr#2
+ orr r3,r2,r3,lsl#24
+#else
+ ldr r0,[lr],#16 @ load input
+ it hi
+ addhi r8,r8,#1 @ padbit
+ ldr r1,[lr,#-12]
+ ldr r2,[lr,#-8]
+ ldr r3,[lr,#-4]
+# ifdef __ARMEB__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+# endif
+ adds r4,r4,r0 @ accumulate input
+ str lr,[sp,#8] @ offload input pointer
+ adcs r5,r5,r1
+ add r10,r10,r10,lsr#2
+ adcs r6,r6,r2
+#endif
+ add r11,r11,r11,lsr#2
+ adcs r7,r7,r3
+ add r12,r12,r12,lsr#2
+
+ umull r2,r3,r5,r9
+ adc r8,r8,#0
+ umull r0,r1,r4,r9
+ umlal r2,r3,r8,r10
+ umlal r0,r1,r7,r10
+ ldr r10,[sp,#20] @ reload r10
+ umlal r2,r3,r6,r12
+ umlal r0,r1,r5,r12
+ umlal r2,r3,r7,r11
+ umlal r0,r1,r6,r11
+ umlal r2,r3,r4,r10
+ str r0,[sp,#0] @ future r4
+ mul r0,r11,r8
+ ldr r11,[sp,#24] @ reload r11
+ adds r2,r2,r1 @ d1+=d0>>32
+ eor r1,r1,r1
+ adc lr,r3,#0 @ future r6
+ str r2,[sp,#4] @ future r5
+
+ mul r2,r12,r8
+ eor r3,r3,r3
+ umlal r0,r1,r7,r12
+ ldr r12,[sp,#28] @ reload r12
+ umlal r2,r3,r7,r9
+ umlal r0,r1,r6,r9
+ umlal r2,r3,r6,r10
+ umlal r0,r1,r5,r10
+ umlal r2,r3,r5,r11
+ umlal r0,r1,r4,r11
+ umlal r2,r3,r4,r12
+ ldr r4,[sp,#0]
+ mul r8,r9,r8
+ ldr r5,[sp,#4]
+
+ adds r6,lr,r0 @ d2+=d1>>32
+ ldr lr,[sp,#8] @ reload input pointer
+ adc r1,r1,#0
+ adds r7,r2,r1 @ d3+=d2>>32
+ ldr r0,[sp,#16] @ reload end pointer
+ adc r3,r3,#0
+ add r8,r8,r3 @ h4+=d3>>32
+
+ and r1,r8,#-4
+ and r8,r8,#3
+ add r1,r1,r1,lsr#2 @ *=5
+ adds r4,r4,r1
+ adcs r5,r5,#0
+ adcs r6,r6,#0
+ adcs r7,r7,#0
+ adc r8,r8,#0
+
+ cmp r0,lr @ done yet?
+ bhi .Loop
+
+ ldr r0,[sp,#12]
+ add sp,sp,#32
+ stmdb r0,{r4-r8} @ store the result
+
+.Lno_data:
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r3-r11,pc}
+#else
+ ldmia sp!,{r3-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_blocks,.-poly1305_blocks
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ stmdb sp!,{r4-r11}
+
+ ldmia r0,{r3-r7}
+
+#if __ARM_ARCH__>=7
+ ldr ip,[r0,#36] @ is_base2_26
+
+ adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32
+ mov r9,r4,lsr#6
+ adcs r9,r9,r5,lsl#20
+ mov r10,r5,lsr#12
+ adcs r10,r10,r6,lsl#14
+ mov r11,r6,lsr#18
+ adcs r11,r11,r7,lsl#8
+ mov r0,#0
+ adc r0,r0,r7,lsr#24
+
+ tst ip,ip
+ itttt ne
+ movne r3,r8
+ movne r4,r9
+ movne r5,r10
+ movne r6,r11
+ it ne
+ movne r7,r0
+#endif
+
+ adds r8,r3,#5 @ compare to modulus
+ adcs r9,r4,#0
+ adcs r10,r5,#0
+ adcs r11,r6,#0
+ adc r0,r7,#0
+ tst r0,#4 @ did it carry/borrow?
+
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r3,r8
+ ldr r8,[r2,#0]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r4,r9
+ ldr r9,[r2,#4]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r5,r10
+ ldr r10,[r2,#8]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r6,r11
+ ldr r11,[r2,#12]
+
+ adds r3,r3,r8
+ adcs r4,r4,r9
+ adcs r5,r5,r10
+ adc r6,r6,r11
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+ rev r3,r3
+ rev r4,r4
+ rev r5,r5
+ rev r6,r6
+# endif
+ str r3,[r1,#0]
+ str r4,[r1,#4]
+ str r5,[r1,#8]
+ str r6,[r1,#12]
+#else
+ strb r3,[r1,#0]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#4]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#8]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#12]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#1]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#5]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#9]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#13]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#2]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#6]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#10]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#14]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#3]
+ strb r4,[r1,#7]
+ strb r5,[r1,#11]
+ strb r6,[r1,#15]
+#endif
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ bx lr @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_emit,.-poly1305_emit
+#if __ARM_MAX_ARCH__>=7
+.fpu neon
+
+.type poly1305_init_neon,%function
+.align 5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+ ldr r3,[r0,#48] @ first table element
+ cmp r3,#-1 @ is value impossible?
+ bne .Lno_init_neon
+
+ ldr r4,[r0,#20] @ load key base 2^32
+ ldr r5,[r0,#24]
+ ldr r6,[r0,#28]
+ ldr r7,[r0,#32]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ and r3,r3,#0x03ffffff
+ and r4,r4,#0x03ffffff
+ and r5,r5,#0x03ffffff
+
+ vdup.32 d0,r2 @ r^1 in both lanes
+ add r2,r3,r3,lsl#2 @ *5
+ vdup.32 d1,r3
+ add r3,r4,r4,lsl#2
+ vdup.32 d2,r2
+ vdup.32 d3,r4
+ add r4,r5,r5,lsl#2
+ vdup.32 d4,r3
+ vdup.32 d5,r5
+ add r5,r6,r6,lsl#2
+ vdup.32 d6,r4
+ vdup.32 d7,r6
+ vdup.32 d8,r5
+
+ mov r5,#2 @ counter
+
+.Lsquare_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+
+ vmull.u32 q5,d0,d0[1]
+ vmull.u32 q6,d1,d0[1]
+ vmull.u32 q7,d3,d0[1]
+ vmull.u32 q8,d5,d0[1]
+ vmull.u32 q9,d7,d0[1]
+
+ vmlal.u32 q5,d7,d2[1]
+ vmlal.u32 q6,d0,d1[1]
+ vmlal.u32 q7,d1,d1[1]
+ vmlal.u32 q8,d3,d1[1]
+ vmlal.u32 q9,d5,d1[1]
+
+ vmlal.u32 q5,d5,d4[1]
+ vmlal.u32 q6,d7,d4[1]
+ vmlal.u32 q8,d1,d3[1]
+ vmlal.u32 q7,d0,d3[1]
+ vmlal.u32 q9,d3,d3[1]
+
+ vmlal.u32 q5,d3,d6[1]
+ vmlal.u32 q8,d0,d5[1]
+ vmlal.u32 q6,d5,d6[1]
+ vmlal.u32 q7,d7,d6[1]
+ vmlal.u32 q9,d1,d5[1]
+
+ vmlal.u32 q8,d7,d8[1]
+ vmlal.u32 q5,d1,d8[1]
+ vmlal.u32 q6,d3,d8[1]
+ vmlal.u32 q7,d5,d8[1]
+ vmlal.u32 q9,d0,d7[1]
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ @ and P. Schwabe
+ @
+ @ H0>>+H1>>+H2>>+H3>>+H4
+ @ H3>>+H4>>*5+H0>>+H1
+ @
+ @ Trivia.
+ @
+ @ Result of multiplication of n-bit number by m-bit number is
+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+ @ m-bit number multiplied by 2^n is still n+m bits wide.
+ @
+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+ @ one is n+1 bits wide.
+ @
+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+ @ can be 27. However! In cases when their width exceeds 26 bits
+ @ they are limited by 2^26+2^6. This in turn means that *sum*
+ @ of the products with these values can still be viewed as sum
+ @ of 52-bit numbers as long as the amount of addends is not a
+ @ power of 2. For example,
+ @
+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+ @
+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+ @ which is less than 32 * (2^52) or 2^57. And when processing
+ @ data we are looking at triple as many addends...
+ @
+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
+ @ This means that result of reduction have to be compressed upon
+ @ loop wrap-around. This can be done in the process of reduction
+ @ to minimize amount of instructions [as well as amount of
+ @ 128-bit instructions, which benefits low-end processors], but
+ @ one has to watch for H2 (which is narrower than H0) and 5*H4
+ @ not being wider than 58 bits, so that result of right shift
+ @ by 26 bits fits in 32 bits. This is also useful on x86,
+ @ because it allows to use paddd in place for paddq, which
+ @ benefits Atom, where paddq is ridiculously slow.
+
+ vshr.u64 q15,q8,#26
+ vmovn.i64 d16,q8
+ vshr.u64 q4,q5,#26
+ vmovn.i64 d10,q5
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vbic.i32 d16,#0xfc000000 @ &=0x03ffffff
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+ vbic.i32 d10,#0xfc000000
+
+ vshrn.u64 d30,q9,#26
+ vmovn.i64 d18,q9
+ vshr.u64 q4,q6,#26
+ vmovn.i64 d12,q6
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+ vbic.i32 d18,#0xfc000000
+ vbic.i32 d12,#0xfc000000
+
+ vadd.i32 d10,d10,d30
+ vshl.u32 d30,d30,#2
+ vshrn.u64 d8,q7,#26
+ vmovn.i64 d14,q7
+ vadd.i32 d10,d10,d30 @ h4 -> h0
+ vadd.i32 d16,d16,d8 @ h2 -> h3
+ vbic.i32 d14,#0xfc000000
+
+ vshr.u32 d30,d10,#26
+ vbic.i32 d10,#0xfc000000
+ vshr.u32 d8,d16,#26
+ vbic.i32 d16,#0xfc000000
+ vadd.i32 d12,d12,d30 @ h0 -> h1
+ vadd.i32 d18,d18,d8 @ h3 -> h4
+
+ subs r5,r5,#1
+ beq .Lsquare_break_neon
+
+ add r6,r0,#(48+0*9*4)
+ add r7,r0,#(48+1*9*4)
+
+ vtrn.32 d0,d10 @ r^2:r^1
+ vtrn.32 d3,d14
+ vtrn.32 d5,d16
+ vtrn.32 d1,d12
+ vtrn.32 d7,d18
+
+ vshl.u32 d4,d3,#2 @ *5
+ vshl.u32 d6,d5,#2
+ vshl.u32 d2,d1,#2
+ vshl.u32 d8,d7,#2
+ vadd.i32 d4,d4,d3
+ vadd.i32 d2,d2,d1
+ vadd.i32 d6,d6,d5
+ vadd.i32 d8,d8,d7
+
+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vst1.32 {d8[0]},[r6,:32]
+ vst1.32 {d8[1]},[r7,:32]
+
+ b .Lsquare_neon
+
+.align 4
+.Lsquare_break_neon:
+ add r6,r0,#(48+2*4*9)
+ add r7,r0,#(48+3*4*9)
+
+ vmov d0,d10 @ r^4:r^3
+ vshl.u32 d2,d12,#2 @ *5
+ vmov d1,d12
+ vshl.u32 d4,d14,#2
+ vmov d3,d14
+ vshl.u32 d6,d16,#2
+ vmov d5,d16
+ vshl.u32 d8,d18,#2
+ vmov d7,d18
+ vadd.i32 d2,d2,d12
+ vadd.i32 d4,d4,d14
+ vadd.i32 d6,d6,d16
+ vadd.i32 d8,d8,d18
+
+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vst1.32 {d8[0]},[r6]
+ vst1.32 {d8[1]},[r7]
+
+.Lno_init_neon:
+ bx lr @ bx lr
+.size poly1305_init_neon,.-poly1305_init_neon
+
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr ip,[r0,#36] @ is_base2_26
+
+ cmp r2,#64
+ blo .Lpoly1305_blocks
+
+ stmdb sp!,{r4-r7}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ tst ip,ip @ is_base2_26?
+ bne .Lbase2_26_neon
+
+ stmdb sp!,{r1-r3,lr}
+ bl .Lpoly1305_init_neon
+
+ ldr r4,[r0,#0] @ load hash value base 2^32
+ ldr r5,[r0,#4]
+ ldr r6,[r0,#8]
+ ldr r7,[r0,#12]
+ ldr ip,[r0,#16]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ veor d10,d10,d10
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ veor d12,d12,d12
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ veor d14,d14,d14
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ veor d16,d16,d16
+ and r3,r3,#0x03ffffff
+ orr r6,r6,ip,lsl#24
+ veor d18,d18,d18
+ and r4,r4,#0x03ffffff
+ mov r1,#1
+ and r5,r5,#0x03ffffff
+ str r1,[r0,#36] @ set is_base2_26
+
+ vmov.32 d10[0],r2
+ vmov.32 d12[0],r3
+ vmov.32 d14[0],r4
+ vmov.32 d16[0],r5
+ vmov.32 d18[0],r6
+ adr r5,.Lzeros
+
+ ldmia sp!,{r1-r3,lr}
+ b .Lhash_loaded
+
+.align 4
+.Lbase2_26_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ load hash value
+
+ veor d10,d10,d10
+ veor d12,d12,d12
+ veor d14,d14,d14
+ veor d16,d16,d16
+ veor d18,d18,d18
+ vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
+ adr r5,.Lzeros
+ vld1.32 {d18[0]},[r0]
+ sub r0,r0,#16 @ rewind
+
+.Lhash_loaded:
+ add r4,r1,#32
+ mov r3,r3,lsl#24
+ tst r2,#31
+ beq .Leven
+
+ vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]!
+ vmov.32 d28[0],r3
+ sub r2,r2,#16
+ add r4,r1,#32
+
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q13,q13
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+# endif
+ vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26
+ vshl.u32 d26,d26,#18
+
+ vsri.u32 d26,d24,#14
+ vshl.u32 d24,d24,#12
+ vadd.i32 d29,d28,d18 @ add hash value and move to #hi
+
+ vbic.i32 d26,#0xfc000000
+ vsri.u32 d24,d22,#20
+ vshl.u32 d22,d22,#6
+
+ vbic.i32 d24,#0xfc000000
+ vsri.u32 d22,d20,#26
+ vadd.i32 d27,d26,d16
+
+ vbic.i32 d20,#0xfc000000
+ vbic.i32 d22,#0xfc000000
+ vadd.i32 d25,d24,d14
+
+ vadd.i32 d21,d20,d10
+ vadd.i32 d23,d22,d12
+
+ mov r7,r5
+ add r6,r0,#48
+
+ cmp r2,r2
+ b .Long_tail
+
+.align 4
+.Leven:
+ subs r2,r2,#64
+ it lo
+ movlo r4,r5
+
+ vmov.i32 q14,#1<<24 @ padbit, yes, always
+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
+ add r1,r1,#64
+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
+ add r4,r4,#64
+ itt hi
+ addhi r7,r0,#(48+1*9*4)
+ addhi r6,r0,#(48+3*9*4)
+
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q13,q13
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+# endif
+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
+ vshl.u32 q13,q13,#18
+
+ vsri.u32 q13,q12,#14
+ vshl.u32 q12,q12,#12
+
+ vbic.i32 q13,#0xfc000000
+ vsri.u32 q12,q11,#20
+ vshl.u32 q11,q11,#6
+
+ vbic.i32 q12,#0xfc000000
+ vsri.u32 q11,q10,#26
+
+ vbic.i32 q10,#0xfc000000
+ vbic.i32 q11,#0xfc000000
+
+ bls .Lskip_loop
+
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ b .Loop_neon
+
+.align 5
+.Loop_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ @ ___________________/
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ @ ___________________/ ____________________/
+ @
+ @ Note that we start with inp[2:3]*r^2. This is because it
+ @ doesn't depend on reduction in previous iteration.
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ inp[2:3]*r^2
+
+ vadd.i32 d24,d24,d14 @ accumulate inp[0:1]
+ vmull.u32 q7,d25,d0[1]
+ vadd.i32 d20,d20,d10
+ vmull.u32 q5,d21,d0[1]
+ vadd.i32 d26,d26,d16
+ vmull.u32 q8,d27,d0[1]
+ vmlal.u32 q7,d23,d1[1]
+ vadd.i32 d22,d22,d12
+ vmull.u32 q6,d23,d0[1]
+
+ vadd.i32 d28,d28,d18
+ vmull.u32 q9,d29,d0[1]
+ subs r2,r2,#64
+ vmlal.u32 q5,d29,d2[1]
+ it lo
+ movlo r4,r5
+ vmlal.u32 q8,d25,d1[1]
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q6,d21,d1[1]
+ vmlal.u32 q9,d27,d1[1]
+
+ vmlal.u32 q5,d27,d4[1]
+ vmlal.u32 q8,d23,d3[1]
+ vmlal.u32 q9,d25,d3[1]
+ vmlal.u32 q6,d29,d4[1]
+ vmlal.u32 q7,d21,d3[1]
+
+ vmlal.u32 q8,d21,d5[1]
+ vmlal.u32 q5,d25,d6[1]
+ vmlal.u32 q9,d23,d5[1]
+ vmlal.u32 q6,d27,d6[1]
+ vmlal.u32 q7,d29,d6[1]
+
+ vmlal.u32 q8,d29,d8[1]
+ vmlal.u32 q5,d23,d8[1]
+ vmlal.u32 q9,d21,d7[1]
+ vmlal.u32 q6,d25,d8[1]
+ vmlal.u32 q7,d27,d8[1]
+
+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
+ add r4,r4,#64
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4 and accumulate
+
+ vmlal.u32 q8,d26,d0[0]
+ vmlal.u32 q5,d20,d0[0]
+ vmlal.u32 q9,d28,d0[0]
+ vmlal.u32 q6,d22,d0[0]
+ vmlal.u32 q7,d24,d0[0]
+ vld1.32 d8[0],[r6,:32]
+
+ vmlal.u32 q8,d24,d1[0]
+ vmlal.u32 q5,d28,d2[0]
+ vmlal.u32 q9,d26,d1[0]
+ vmlal.u32 q6,d20,d1[0]
+ vmlal.u32 q7,d22,d1[0]
+
+ vmlal.u32 q8,d22,d3[0]
+ vmlal.u32 q5,d26,d4[0]
+ vmlal.u32 q9,d24,d3[0]
+ vmlal.u32 q6,d28,d4[0]
+ vmlal.u32 q7,d20,d3[0]
+
+ vmlal.u32 q8,d20,d5[0]
+ vmlal.u32 q5,d24,d6[0]
+ vmlal.u32 q9,d22,d5[0]
+ vmlal.u32 q6,d26,d6[0]
+ vmlal.u32 q8,d28,d8[0]
+
+ vmlal.u32 q7,d28,d6[0]
+ vmlal.u32 q5,d22,d8[0]
+ vmlal.u32 q9,d20,d7[0]
+ vmov.i32 q14,#1<<24 @ padbit, yes, always
+ vmlal.u32 q6,d24,d8[0]
+ vmlal.u32 q7,d26,d8[0]
+
+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
+ add r1,r1,#64
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+ vrev32.8 q13,q13
+# endif
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
+ @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
+
+ vshr.u64 q15,q8,#26
+ vmovn.i64 d16,q8
+ vshr.u64 q4,q5,#26
+ vmovn.i64 d10,q5
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vbic.i32 d16,#0xfc000000
+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+ vshl.u32 q13,q13,#18
+ vbic.i32 d10,#0xfc000000
+
+ vshrn.u64 d30,q9,#26
+ vmovn.i64 d18,q9
+ vshr.u64 q4,q6,#26
+ vmovn.i64 d12,q6
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+ vsri.u32 q13,q12,#14
+ vbic.i32 d18,#0xfc000000
+ vshl.u32 q12,q12,#12
+ vbic.i32 d12,#0xfc000000
+
+ vadd.i32 d10,d10,d30
+ vshl.u32 d30,d30,#2
+ vbic.i32 q13,#0xfc000000
+ vshrn.u64 d8,q7,#26
+ vmovn.i64 d14,q7
+ vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec]
+ vsri.u32 q12,q11,#20
+ vadd.i32 d16,d16,d8 @ h2 -> h3
+ vshl.u32 q11,q11,#6
+ vbic.i32 d14,#0xfc000000
+ vbic.i32 q12,#0xfc000000
+
+ vshrn.u64 d30,q5,#26 @ re-narrow
+ vmovn.i64 d10,q5
+ vsri.u32 q11,q10,#26
+ vbic.i32 q10,#0xfc000000
+ vshr.u32 d8,d16,#26
+ vbic.i32 d16,#0xfc000000
+ vbic.i32 d10,#0xfc000000
+ vadd.i32 d12,d12,d30 @ h0 -> h1
+ vadd.i32 d18,d18,d8 @ h3 -> h4
+ vbic.i32 q11,#0xfc000000
+
+ bhi .Loop_neon
+
+.Lskip_loop:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ add r7,r0,#(48+0*9*4)
+ add r6,r0,#(48+1*9*4)
+ adds r2,r2,#32
+ it ne
+ movne r2,#0
+ bne .Long_tail
+
+ vadd.i32 d25,d24,d14 @ add hash value and move to #hi
+ vadd.i32 d21,d20,d10
+ vadd.i32 d27,d26,d16
+ vadd.i32 d23,d22,d12
+ vadd.i32 d29,d28,d18
+
+.Long_tail:
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2
+
+ vadd.i32 d24,d24,d14 @ can be redundant
+ vmull.u32 q7,d25,d0
+ vadd.i32 d20,d20,d10
+ vmull.u32 q5,d21,d0
+ vadd.i32 d26,d26,d16
+ vmull.u32 q8,d27,d0
+ vadd.i32 d22,d22,d12
+ vmull.u32 q6,d23,d0
+ vadd.i32 d28,d28,d18
+ vmull.u32 q9,d29,d0
+
+ vmlal.u32 q5,d29,d2
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vmlal.u32 q8,d25,d1
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vmlal.u32 q6,d21,d1
+ vmlal.u32 q9,d27,d1
+ vmlal.u32 q7,d23,d1
+
+ vmlal.u32 q8,d23,d3
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q5,d27,d4
+ vld1.32 d8[0],[r6,:32]
+ vmlal.u32 q9,d25,d3
+ vmlal.u32 q6,d29,d4
+ vmlal.u32 q7,d21,d3
+
+ vmlal.u32 q8,d21,d5
+ it ne
+ addne r7,r0,#(48+2*9*4)
+ vmlal.u32 q5,d25,d6
+ it ne
+ addne r6,r0,#(48+3*9*4)
+ vmlal.u32 q9,d23,d5
+ vmlal.u32 q6,d27,d6
+ vmlal.u32 q7,d29,d6
+
+ vmlal.u32 q8,d29,d8
+ vorn q0,q0,q0 @ all-ones, can be redundant
+ vmlal.u32 q5,d23,d8
+ vshr.u64 q0,q0,#38
+ vmlal.u32 q9,d21,d7
+ vmlal.u32 q6,d25,d8
+ vmlal.u32 q7,d27,d8
+
+ beq .Lshort_tail
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
+
+ vmlal.u32 q7,d24,d0
+ vmlal.u32 q5,d20,d0
+ vmlal.u32 q8,d26,d0
+ vmlal.u32 q6,d22,d0
+ vmlal.u32 q9,d28,d0
+
+ vmlal.u32 q5,d28,d2
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vmlal.u32 q8,d24,d1
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vmlal.u32 q6,d20,d1
+ vmlal.u32 q9,d26,d1
+ vmlal.u32 q7,d22,d1
+
+ vmlal.u32 q8,d22,d3
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q5,d26,d4
+ vld1.32 d8[0],[r6,:32]
+ vmlal.u32 q9,d24,d3
+ vmlal.u32 q6,d28,d4
+ vmlal.u32 q7,d20,d3
+
+ vmlal.u32 q8,d20,d5
+ vmlal.u32 q5,d24,d6
+ vmlal.u32 q9,d22,d5
+ vmlal.u32 q6,d26,d6
+ vmlal.u32 q7,d28,d6
+
+ vmlal.u32 q8,d28,d8
+ vorn q0,q0,q0 @ all-ones
+ vmlal.u32 q5,d22,d8
+ vshr.u64 q0,q0,#38
+ vmlal.u32 q9,d20,d7
+ vmlal.u32 q6,d24,d8
+ vmlal.u32 q7,d26,d8
+
+.Lshort_tail:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 d16,d16,d17
+ vadd.i64 d10,d10,d11
+ vadd.i64 d18,d18,d19
+ vadd.i64 d12,d12,d13
+ vadd.i64 d14,d14,d15
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction, but without narrowing
+
+ vshr.u64 q15,q8,#26
+ vand.i64 q8,q8,q0
+ vshr.u64 q4,q5,#26
+ vand.i64 q5,q5,q0
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+
+ vshr.u64 q15,q9,#26
+ vand.i64 q9,q9,q0
+ vshr.u64 q4,q6,#26
+ vand.i64 q6,q6,q0
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+
+ vadd.i64 q5,q5,q15
+ vshl.u64 q15,q15,#2
+ vshr.u64 q4,q7,#26
+ vand.i64 q7,q7,q0
+ vadd.i64 q5,q5,q15 @ h4 -> h0
+ vadd.i64 q8,q8,q4 @ h2 -> h3
+
+ vshr.u64 q15,q5,#26
+ vand.i64 q5,q5,q0
+ vshr.u64 q4,q8,#26
+ vand.i64 q8,q8,q0
+ vadd.i64 q6,q6,q15 @ h0 -> h1
+ vadd.i64 q9,q9,q4 @ h3 -> h4
+
+ cmp r2,#0
+ bne .Leven
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ store hash value
+
+ vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
+ vst1.32 {d18[0]},[r0]
+
+ vldmia sp!,{d8-d15} @ epilogue
+ ldmia sp!,{r4-r7}
+ bx lr @ bx lr
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef __KERNEL__
+.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
+.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+#endif
+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
+.align 2
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..74a725ac89c9
--- /dev/null
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+void poly1305_init_arm(void *state, const u8 *key);
+void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
+void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
+
+void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
+{
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_arm(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int arm_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit, bool do_neon)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_arm(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ if (static_branch_likely(&have_neon) && likely(do_neon))
+ poly1305_blocks_neon(&dctx->h, src, len, hibit);
+ else
+ poly1305_blocks_arm(&dctx->h, src, len, hibit);
+}
+
+static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len, bool do_neon)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ arm_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1, false);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ arm_poly1305_blocks(dctx, src, len, 1, do_neon);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int arm_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ arm_poly1305_do_update(dctx, src, srclen, false);
+ return 0;
+}
+
+static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
+ const u8 *src,
+ unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ bool do_neon = crypto_simd_usable() && srclen > 128;
+
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_begin();
+ arm_poly1305_do_update(dctx, src, srclen, do_neon);
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_end();
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ crypto_simd_usable();
+
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_arm(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ if (static_branch_likely(&have_neon) && do_neon) {
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, len, 1);
+ kernel_neon_end();
+ } else {
+ poly1305_blocks_arm(&dctx->h, src, len, 1);
+ }
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_arm(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg arm_poly1305_algs[] = {{
+ .init = arm_poly1305_init,
+ .update = arm_poly1305_update,
+ .final = arm_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-arm",
+ .base.cra_priority = 150,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+#ifdef CONFIG_KERNEL_MODE_NEON
+}, {
+ .init = arm_poly1305_init,
+ .update = arm_poly1305_update_neon,
+ .final = arm_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+#endif
+}};
+
+static int __init arm_poly1305_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ (elf_hwcap & HWCAP_NEON))
+ static_branch_enable(&have_neon);
+ else
+ /* register only the first entry */
+ return crypto_register_shash(&arm_poly1305_algs[0]);
+
+ return crypto_register_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs));
+}
+
+static void __exit arm_poly1305_mod_exit(void)
+{
+ if (!static_branch_likely(&have_neon)) {
+ crypto_unregister_shash(&arm_poly1305_algs[0]);
+ return;
+ }
+ crypto_unregister_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs));
+}
+
+module_init(arm_poly1305_mod_init);
+module_exit(arm_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-arm");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 9923445e8225..9bd15b227e78 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -40,7 +40,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
default 4 if X86_64
- default 9 if ARM64
+ default 9 if ARM || ARM64
default 1
config CRYPTO_ARCH_HAVE_LIB_POLY1305
--
2.18.2
From d6a1701488249634f8dd62a3757dfe6119d2acf0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:26 +0100
Subject: [PATCH 020/100] crypto: mips/poly1305 - incorporate
OpenSSL/CRYPTOGAMS optimized implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit a11d055e7a64ac34a5e99b6fe731299449cbcd58 upstream.
This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for
MIPS authored by Andy Polyakov, a prior 64-bit only version of which has been
contributed by him to the OpenSSL project. The file 'poly1305-mips.pl' is taken
straight from this upstream GitHub repository [0] at commit
d22ade312a7af958ec955620b0d241cf42c37feb, and already contains all the changes
required to build it as part of a Linux kernel module.
[0] https://github.com/dot-asm/cryptogams
Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/mips/crypto/Makefile | 14 +
arch/mips/crypto/poly1305-glue.c | 203 +++++
arch/mips/crypto/poly1305-mips.pl | 1273 +++++++++++++++++++++++++++++
crypto/Kconfig | 5 +
lib/crypto/Kconfig | 1 +
5 files changed, 1496 insertions(+)
create mode 100644 arch/mips/crypto/poly1305-glue.c
create mode 100644 arch/mips/crypto/poly1305-mips.pl
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index b528b9d300f1..8e1deaf00e0c 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -8,3 +8,17 @@ obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
chacha-mips-y := chacha-core.o chacha-glue.o
AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
+
+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
+poly1305-mips-y := poly1305-core.o poly1305-glue.o
+
+perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
+perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
+
+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
+ $(call if_changed,perlasm)
+
+targets += poly1305-core.S
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..b759b6ccc361
--- /dev/null
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_mips(void *state, const u8 *key);
+asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_mips(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int mips_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_mips(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ poly1305_blocks_mips(&dctx->h, src, len, hibit);
+}
+
+static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ mips_poly1305_blocks(dctx, src, len, 1);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_mips(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ poly1305_blocks_mips(&dctx->h, src, len, 1);
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_mips(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg mips_poly1305_alg = {
+ .init = mips_poly1305_init,
+ .update = mips_poly1305_update,
+ .final = mips_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init mips_poly1305_mod_init(void)
+{
+ return crypto_register_shash(&mips_poly1305_alg);
+}
+
+static void __exit mips_poly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&mips_poly1305_alg);
+}
+
+module_init(mips_poly1305_mod_init);
+module_exit(mips_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-mips");
diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl
new file mode 100644
index 000000000000..b05bab884ed2
--- /dev/null
+++ b/arch/mips/crypto/poly1305-mips.pl
@@ -0,0 +1,1273 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
+# project.
+# ====================================================================
+
+# Poly1305 hash for MIPS.
+#
+# May 2016
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+# IALU/gcc
+# R1x000 ~5.5/+130% (big-endian)
+# Octeon II 2.50/+70% (little-endian)
+#
+# March 2019
+#
+# Add 32-bit code path.
+#
+# October 2019
+#
+# Modulo-scheduling reduction allows to omit dependency chain at the
+# end of inner loop and improve performance. Also optimize MIPS32R2
+# code path for MIPS 1004K core. Per René von Dorst's suggestions.
+#
+# IALU/gcc
+# R1x000 ~9.8/? (big-endian)
+# Octeon II 3.65/+140% (little-endian)
+# MT7621/1004K 4.75/? (little-endian)
+#
+######################################################################
+# There is a number of MIPS ABI in use, O32 and N32/64 are most
+# widely used. Then there is a new contender: NUBI. It appears that if
+# one picks the latter, it's possible to arrange code in ABI neutral
+# manner. Therefore let's stick to NUBI register layout:
+#
+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
+#
+# The return value is placed in $a0. Following coding rules facilitate
+# interoperability:
+#
+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
+# excluded from the rule, because it's specified volatile];
+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
+# old code];
+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
+#
+# For reference here is register layout for N32/64 MIPS ABIs:
+#
+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
+#
+# <appro@openssl.org>
+#
+######################################################################
+
+$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
+
+$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
+
+if ($flavour =~ /64|n32/i) {{{
+######################################################################
+# 64-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
+ defined(_MIPS_ARCH_MIPS64R6)) \\
+ && !defined(_MIPS_ARCH_MIPS64R2)
+# define _MIPS_ARCH_MIPS64R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define dmultu(rs,rt)
+# define mflo(rd,rs,rt) dmulu rd,rs,rt
+# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
+#else
+# define dmultu(rs,rt) dmultu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+#endif
+
+#ifdef __KERNEL__
+# define poly1305_init poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 7
+#else
+# define MSB 7
+# define LSB 0
+#endif
+
+.text
+.set noat
+.set noreorder
+
+.align 5
+.globl poly1305_init
+.ent poly1305_init
+poly1305_init:
+ .frame $sp,0,$ra
+ .set reorder
+
+ sd $zero,0($ctx)
+ sd $zero,8($ctx)
+ sd $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+ andi $tmp0,$inp,7 # $inp % 8
+ dsubu $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+ ld $in0,0($inp)
+ ld $in1,8($inp)
+ beqz $tmp0,.Laligned_key
+ ld $tmp2,16($inp)
+
+ subu $tmp1,$zero,$tmp0
+# ifdef MIPSEB
+ dsllv $in0,$in0,$tmp0
+ dsrlv $tmp3,$in1,$tmp1
+ dsllv $in1,$in1,$tmp0
+ dsrlv $tmp2,$tmp2,$tmp1
+# else
+ dsrlv $in0,$in0,$tmp0
+ dsllv $tmp3,$in1,$tmp1
+ dsrlv $in1,$in1,$tmp0
+ dsllv $tmp2,$tmp2,$tmp1
+# endif
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+.Laligned_key:
+#else
+ ldl $in0,0+MSB($inp)
+ ldl $in1,8+MSB($inp)
+ ldr $in0,0+LSB($inp)
+ ldr $in1,8+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+ dsbh $in0,$in0 # byte swap
+ dsbh $in1,$in1
+ dshd $in0,$in0
+ dshd $in1,$in1
+# else
+ ori $tmp0,$zero,0xFF
+ dsll $tmp2,$tmp0,32
+ or $tmp0,$tmp2 # 0x000000FF000000FF
+
+ and $tmp1,$in0,$tmp0 # byte swap
+ and $tmp3,$in1,$tmp0
+ dsrl $tmp2,$in0,24
+ dsrl $tmp4,$in1,24
+ dsll $tmp1,24
+ dsll $tmp3,24
+ and $tmp2,$tmp0
+ and $tmp4,$tmp0
+ dsll $tmp0,8 # 0x0000FF000000FF00
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ and $tmp2,$in0,$tmp0
+ and $tmp4,$in1,$tmp0
+ dsrl $in0,8
+ dsrl $in1,8
+ dsll $tmp2,8
+ dsll $tmp4,8
+ and $in0,$tmp0
+ and $in1,$tmp0
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ or $in0,$tmp1
+ or $in1,$tmp3
+ dsrl $tmp1,$in0,32
+ dsrl $tmp3,$in1,32
+ dsll $in0,32
+ dsll $in1,32
+ or $in0,$tmp1
+ or $in1,$tmp3
+# endif
+#endif
+ li $tmp0,1
+ dsll $tmp0,32 # 0x0000000100000000
+ daddiu $tmp0,-63 # 0x00000000ffffffc1
+ dsll $tmp0,28 # 0x0ffffffc10000000
+ daddiu $tmp0,-1 # 0x0ffffffc0fffffff
+
+ and $in0,$tmp0
+ daddiu $tmp0,-3 # 0x0ffffffc0ffffffc
+ and $in1,$tmp0
+
+ sd $in0,24($ctx)
+ dsrl $tmp0,$in1,2
+ sd $in1,32($ctx)
+ daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
+ sd $tmp0,40($ctx)
+
+.Lno_key:
+ li $v0,0 # return 0
+ jr $ra
+.end poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
+
+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
+ ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
+my ($shr,$shl) = ($s6,$s7); # used on R6
+
+$code.=<<___;
+.align 5
+.globl poly1305_blocks
+.ent poly1305_blocks
+poly1305_blocks:
+ .set noreorder
+ dsrl $len,4 # number of complete blocks
+ bnez $len,poly1305_blocks_internal
+ nop
+ jr $ra
+ nop
+.end poly1305_blocks
+
+.align 5
+.ent poly1305_blocks_internal
+poly1305_blocks_internal:
+ .set noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+ .frame $sp,8*8,$ra
+ .mask $SAVED_REGS_MASK|0x000c0000,-8
+ dsubu $sp,8*8
+ sd $s7,56($sp)
+ sd $s6,48($sp)
+#else
+ .frame $sp,6*8,$ra
+ .mask $SAVED_REGS_MASK,-8
+ dsubu $sp,6*8
+#endif
+ sd $s5,40($sp)
+ sd $s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ sd $s3,24($sp)
+ sd $s2,16($sp)
+ sd $s1,8($sp)
+ sd $s0,0($sp)
+___
+$code.=<<___;
+ .set reorder
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+ andi $shr,$inp,7
+ dsubu $inp,$inp,$shr # align $inp
+ sll $shr,$shr,3 # byte to bit offset
+ subu $shl,$zero,$shr
+#endif
+
+ ld $h0,0($ctx) # load hash value
+ ld $h1,8($ctx)
+ ld $h2,16($ctx)
+
+ ld $r0,24($ctx) # load key
+ ld $r1,32($ctx)
+ ld $rs1,40($ctx)
+
+ dsll $len,4
+ daddu $len,$inp # end of buffer
+ b .Loop
+
+.align 4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $in0,0($inp) # load input
+ ld $in1,8($inp)
+ beqz $shr,.Laligned_inp
+
+ ld $tmp2,16($inp)
+# ifdef MIPSEB
+ dsllv $in0,$in0,$shr
+ dsrlv $tmp3,$in1,$shl
+ dsllv $in1,$in1,$shr
+ dsrlv $tmp2,$tmp2,$shl
+# else
+ dsrlv $in0,$in0,$shr
+ dsllv $tmp3,$in1,$shl
+ dsrlv $in1,$in1,$shr
+ dsllv $tmp2,$tmp2,$shl
+# endif
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+.Laligned_inp:
+#else
+ ldl $in0,0+MSB($inp) # load input
+ ldl $in1,8+MSB($inp)
+ ldr $in0,0+LSB($inp)
+ ldr $in1,8+LSB($inp)
+#endif
+ daddiu $inp,16
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+ dsbh $in0,$in0 # byte swap
+ dsbh $in1,$in1
+ dshd $in0,$in0
+ dshd $in1,$in1
+# else
+ ori $tmp0,$zero,0xFF
+ dsll $tmp2,$tmp0,32
+ or $tmp0,$tmp2 # 0x000000FF000000FF
+
+ and $tmp1,$in0,$tmp0 # byte swap
+ and $tmp3,$in1,$tmp0
+ dsrl $tmp2,$in0,24
+ dsrl $tmp4,$in1,24
+ dsll $tmp1,24
+ dsll $tmp3,24
+ and $tmp2,$tmp0
+ and $tmp4,$tmp0
+ dsll $tmp0,8 # 0x0000FF000000FF00
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ and $tmp2,$in0,$tmp0
+ and $tmp4,$in1,$tmp0
+ dsrl $in0,8
+ dsrl $in1,8
+ dsll $tmp2,8
+ dsll $tmp4,8
+ and $in0,$tmp0
+ and $in1,$tmp0
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ or $in0,$tmp1
+ or $in1,$tmp3
+ dsrl $tmp1,$in0,32
+ dsrl $tmp3,$in1,32
+ dsll $in0,32
+ dsll $in1,32
+ or $in0,$tmp1
+ or $in1,$tmp3
+# endif
+#endif
+ dsrl $tmp1,$h2,2 # modulo-scheduled reduction
+ andi $h2,$h2,3
+ dsll $tmp0,$tmp1,2
+
+ daddu $d0,$h0,$in0 # accumulate input
+ daddu $tmp1,$tmp0
+ sltu $tmp0,$d0,$h0
+ daddu $d0,$d0,$tmp1 # ... and residue
+ sltu $tmp1,$d0,$tmp1
+ daddu $d1,$h1,$in1
+ daddu $tmp0,$tmp1
+ sltu $tmp1,$d1,$h1
+ daddu $d1,$tmp0
+
+ dmultu ($r0,$d0) # h0*r0
+ daddu $d2,$h2,$padbit
+ sltu $tmp0,$d1,$tmp0
+ mflo ($h0,$r0,$d0)
+ mfhi ($h1,$r0,$d0)
+
+ dmultu ($rs1,$d1) # h1*5*r1
+ daddu $d2,$tmp1
+ daddu $d2,$tmp0
+ mflo ($tmp0,$rs1,$d1)
+ mfhi ($tmp1,$rs1,$d1)
+
+ dmultu ($r1,$d0) # h0*r1
+ mflo ($tmp2,$r1,$d0)
+ mfhi ($h2,$r1,$d0)
+ daddu $h0,$tmp0
+ daddu $h1,$tmp1
+ sltu $tmp0,$h0,$tmp0
+
+ dmultu ($r0,$d1) # h1*r0
+ daddu $h1,$tmp0
+ daddu $h1,$tmp2
+ mflo ($tmp0,$r0,$d1)
+ mfhi ($tmp1,$r0,$d1)
+
+ dmultu ($rs1,$d2) # h2*5*r1
+ sltu $tmp2,$h1,$tmp2
+ daddu $h2,$tmp2
+ mflo ($tmp2,$rs1,$d2)
+
+ dmultu ($r0,$d2) # h2*r0
+ daddu $h1,$tmp0
+ daddu $h2,$tmp1
+ mflo ($tmp3,$r0,$d2)
+ sltu $tmp0,$h1,$tmp0
+ daddu $h2,$tmp0
+
+ daddu $h1,$tmp2
+ sltu $tmp2,$h1,$tmp2
+ daddu $h2,$tmp2
+ daddu $h2,$tmp3
+
+ bne $inp,$len,.Loop
+
+ sd $h0,0($ctx) # store hash value
+ sd $h1,8($ctx)
+ sd $h2,16($ctx)
+
+ .set noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $s7,56($sp)
+ ld $s6,48($sp)
+#endif
+ ld $s5,40($sp) # epilogue
+ ld $s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
+ ld $s3,24($sp)
+ ld $s2,16($sp)
+ ld $s1,8($sp)
+ ld $s0,0($sp)
+___
+$code.=<<___;
+ jr $ra
+#if defined(_MIPS_ARCH_MIPS64R6)
+ daddu $sp,8*8
+#else
+ daddu $sp,6*8
+#endif
+.end poly1305_blocks_internal
+___
+}
+{
+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
+
+$code.=<<___;
+.align 5
+.globl poly1305_emit
+.ent poly1305_emit
+poly1305_emit:
+ .frame $sp,0,$ra
+ .set reorder
+
+ ld $tmp2,16($ctx)
+ ld $tmp0,0($ctx)
+ ld $tmp1,8($ctx)
+
+ li $in0,-4 # final reduction
+ dsrl $in1,$tmp2,2
+ and $in0,$tmp2
+ andi $tmp2,$tmp2,3
+ daddu $in0,$in1
+
+ daddu $tmp0,$tmp0,$in0
+ sltu $in1,$tmp0,$in0
+ daddiu $in0,$tmp0,5 # compare to modulus
+ daddu $tmp1,$tmp1,$in1
+ sltiu $tmp3,$in0,5
+ sltu $tmp4,$tmp1,$in1
+ daddu $in1,$tmp1,$tmp3
+ daddu $tmp2,$tmp2,$tmp4
+ sltu $tmp3,$in1,$tmp3
+ daddu $tmp2,$tmp2,$tmp3
+
+ dsrl $tmp2,2 # see if it carried/borrowed
+ dsubu $tmp2,$zero,$tmp2
+
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ and $in0,$tmp2
+ and $in1,$tmp2
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+
+ lwu $tmp0,0($nonce) # load nonce
+ lwu $tmp1,4($nonce)
+ lwu $tmp2,8($nonce)
+ lwu $tmp3,12($nonce)
+ dsll $tmp1,32
+ dsll $tmp3,32
+ or $tmp0,$tmp1
+ or $tmp2,$tmp3
+
+ daddu $in0,$tmp0 # accumulate nonce
+ daddu $in1,$tmp2
+ sltu $tmp0,$in0,$tmp0
+ daddu $in1,$tmp0
+
+ dsrl $tmp0,$in0,8 # write mac value
+ dsrl $tmp1,$in0,16
+ dsrl $tmp2,$in0,24
+ sb $in0,0($mac)
+ dsrl $tmp3,$in0,32
+ sb $tmp0,1($mac)
+ dsrl $tmp0,$in0,40
+ sb $tmp1,2($mac)
+ dsrl $tmp1,$in0,48
+ sb $tmp2,3($mac)
+ dsrl $tmp2,$in0,56
+ sb $tmp3,4($mac)
+ dsrl $tmp3,$in1,8
+ sb $tmp0,5($mac)
+ dsrl $tmp0,$in1,16
+ sb $tmp1,6($mac)
+ dsrl $tmp1,$in1,24
+ sb $tmp2,7($mac)
+
+ sb $in1,8($mac)
+ dsrl $tmp2,$in1,32
+ sb $tmp3,9($mac)
+ dsrl $tmp3,$in1,40
+ sb $tmp0,10($mac)
+ dsrl $tmp0,$in1,48
+ sb $tmp1,11($mac)
+ dsrl $tmp1,$in1,56
+ sb $tmp2,12($mac)
+ sb $tmp3,13($mac)
+ sb $tmp0,14($mac)
+ sb $tmp1,15($mac)
+
+ jr $ra
+.end poly1305_emit
+.rdata
+.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+}
+}}} else {{{
+######################################################################
+# 32-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
+ ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
+ defined(_MIPS_ARCH_MIPS32R6)) \\
+ && !defined(_MIPS_ARCH_MIPS32R2)
+# define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+# define multu(rs,rt)
+# define mflo(rd,rs,rt) mulu rd,rs,rt
+# define mfhi(rd,rs,rt) muhu rd,rs,rt
+#else
+# define multu(rs,rt) multu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+#endif
+
+#ifdef __KERNEL__
+# define poly1305_init poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 3
+#else
+# define MSB 3
+# define LSB 0
+#endif
+
+.text
+.set noat
+.set noreorder
+
+.align 5
+.globl poly1305_init
+.ent poly1305_init
+poly1305_init:
+ .frame $sp,0,$ra
+ .set reorder
+
+ sw $zero,0($ctx)
+ sw $zero,4($ctx)
+ sw $zero,8($ctx)
+ sw $zero,12($ctx)
+ sw $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+ andi $tmp0,$inp,3 # $inp % 4
+ subu $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+ lw $in0,0($inp)
+ lw $in1,4($inp)
+ lw $in2,8($inp)
+ lw $in3,12($inp)
+ beqz $tmp0,.Laligned_key
+
+ lw $tmp2,16($inp)
+ subu $tmp1,$zero,$tmp0
+# ifdef MIPSEB
+ sllv $in0,$in0,$tmp0
+ srlv $tmp3,$in1,$tmp1
+ sllv $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ srlv $tmp3,$in2,$tmp1
+ sllv $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ srlv $tmp3,$in3,$tmp1
+ sllv $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ srlv $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+# else
+ srlv $in0,$in0,$tmp0
+ sllv $tmp3,$in1,$tmp1
+ srlv $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ sllv $tmp3,$in2,$tmp1
+ srlv $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ sllv $tmp3,$in3,$tmp1
+ srlv $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ sllv $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+# endif
+.Laligned_key:
+#else
+ lwl $in0,0+MSB($inp)
+ lwl $in1,4+MSB($inp)
+ lwl $in2,8+MSB($inp)
+ lwl $in3,12+MSB($inp)
+ lwr $in0,0+LSB($inp)
+ lwr $in1,4+LSB($inp)
+ lwr $in2,8+LSB($inp)
+ lwr $in3,12+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+ wsbh $in0,$in0 # byte swap
+ wsbh $in1,$in1
+ wsbh $in2,$in2
+ wsbh $in3,$in3
+ rotr $in0,$in0,16
+ rotr $in1,$in1,16
+ rotr $in2,$in2,16
+ rotr $in3,$in3,16
+# else
+ srl $tmp0,$in0,24 # byte swap
+ srl $tmp1,$in0,8
+ andi $tmp2,$in0,0xFF00
+ sll $in0,$in0,24
+ andi $tmp1,0xFF00
+ sll $tmp2,$tmp2,8
+ or $in0,$tmp0
+ srl $tmp0,$in1,24
+ or $tmp1,$tmp2
+ srl $tmp2,$in1,8
+ or $in0,$tmp1
+ andi $tmp1,$in1,0xFF00
+ sll $in1,$in1,24
+ andi $tmp2,0xFF00
+ sll $tmp1,$tmp1,8
+ or $in1,$tmp0
+ srl $tmp0,$in2,24
+ or $tmp2,$tmp1
+ srl $tmp1,$in2,8
+ or $in1,$tmp2
+ andi $tmp2,$in2,0xFF00
+ sll $in2,$in2,24
+ andi $tmp1,0xFF00
+ sll $tmp2,$tmp2,8
+ or $in2,$tmp0
+ srl $tmp0,$in3,24
+ or $tmp1,$tmp2
+ srl $tmp2,$in3,8
+ or $in2,$tmp1
+ andi $tmp1,$in3,0xFF00
+ sll $in3,$in3,24
+ andi $tmp2,0xFF00
+ sll $tmp1,$tmp1,8
+ or $in3,$tmp0
+ or $tmp2,$tmp1
+ or $in3,$tmp2
+# endif
+#endif
+ lui $tmp0,0x0fff
+ ori $tmp0,0xffff # 0x0fffffff
+ and $in0,$in0,$tmp0
+ subu $tmp0,3 # 0x0ffffffc
+ and $in1,$in1,$tmp0
+ and $in2,$in2,$tmp0
+ and $in3,$in3,$tmp0
+
+ sw $in0,20($ctx)
+ sw $in1,24($ctx)
+ sw $in2,28($ctx)
+ sw $in3,32($ctx)
+
+ srl $tmp1,$in1,2
+ srl $tmp2,$in2,2
+ srl $tmp3,$in3,2
+ addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
+ addu $in2,$in2,$tmp2
+ addu $in3,$in3,$tmp3
+ sw $in1,36($ctx)
+ sw $in2,40($ctx)
+ sw $in3,44($ctx)
+.Lno_key:
+ li $v0,0
+ jr $ra
+.end poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
+
+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
+ ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
+my ($d0,$d1,$d2,$d3) =
+ ($a4,$a5,$a6,$a7);
+my $shr = $t2; # used on R6
+my $one = $t2; # used on R2
+
+$code.=<<___;
+.globl poly1305_blocks
+.align 5
+.ent poly1305_blocks
+poly1305_blocks:
+ .frame $sp,16*4,$ra
+ .mask $SAVED_REGS_MASK,-4
+ .set noreorder
+ subu $sp, $sp,4*12
+ sw $s11,4*11($sp)
+ sw $s10,4*10($sp)
+ sw $s9, 4*9($sp)
+ sw $s8, 4*8($sp)
+ sw $s7, 4*7($sp)
+ sw $s6, 4*6($sp)
+ sw $s5, 4*5($sp)
+ sw $s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ sw $s3, 4*3($sp)
+ sw $s2, 4*2($sp)
+ sw $s1, 4*1($sp)
+ sw $s0, 4*0($sp)
+___
+$code.=<<___;
+ .set reorder
+
+ srl $len,4 # number of complete blocks
+ li $one,1
+ beqz $len,.Labort
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+ andi $shr,$inp,3
+ subu $inp,$inp,$shr # align $inp
+ sll $shr,$shr,3 # byte to bit offset
+#endif
+
+ lw $h0,0($ctx) # load hash value
+ lw $h1,4($ctx)
+ lw $h2,8($ctx)
+ lw $h3,12($ctx)
+ lw $h4,16($ctx)
+
+ lw $r0,20($ctx) # load key
+ lw $r1,24($ctx)
+ lw $r2,28($ctx)
+ lw $r3,32($ctx)
+ lw $rs1,36($ctx)
+ lw $rs2,40($ctx)
+ lw $rs3,44($ctx)
+
+ sll $len,4
+ addu $len,$len,$inp # end of buffer
+ b .Loop
+
+.align 4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS32R6)
+ lw $d0,0($inp) # load input
+ lw $d1,4($inp)
+ lw $d2,8($inp)
+ lw $d3,12($inp)
+ beqz $shr,.Laligned_inp
+
+ lw $t0,16($inp)
+ subu $t1,$zero,$shr
+# ifdef MIPSEB
+ sllv $d0,$d0,$shr
+ srlv $at,$d1,$t1
+ sllv $d1,$d1,$shr
+ or $d0,$d0,$at
+ srlv $at,$d2,$t1
+ sllv $d2,$d2,$shr
+ or $d1,$d1,$at
+ srlv $at,$d3,$t1
+ sllv $d3,$d3,$shr
+ or $d2,$d2,$at
+ srlv $t0,$t0,$t1
+ or $d3,$d3,$t0
+# else
+ srlv $d0,$d0,$shr
+ sllv $at,$d1,$t1
+ srlv $d1,$d1,$shr
+ or $d0,$d0,$at
+ sllv $at,$d2,$t1
+ srlv $d2,$d2,$shr
+ or $d1,$d1,$at
+ sllv $at,$d3,$t1
+ srlv $d3,$d3,$shr
+ or $d2,$d2,$at
+ sllv $t0,$t0,$t1
+ or $d3,$d3,$t0
+# endif
+.Laligned_inp:
+#else
+ lwl $d0,0+MSB($inp) # load input
+ lwl $d1,4+MSB($inp)
+ lwl $d2,8+MSB($inp)
+ lwl $d3,12+MSB($inp)
+ lwr $d0,0+LSB($inp)
+ lwr $d1,4+LSB($inp)
+ lwr $d2,8+LSB($inp)
+ lwr $d3,12+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+ wsbh $d0,$d0 # byte swap
+ wsbh $d1,$d1
+ wsbh $d2,$d2
+ wsbh $d3,$d3
+ rotr $d0,$d0,16
+ rotr $d1,$d1,16
+ rotr $d2,$d2,16
+ rotr $d3,$d3,16
+# else
+ srl $at,$d0,24 # byte swap
+ srl $t0,$d0,8
+ andi $t1,$d0,0xFF00
+ sll $d0,$d0,24
+ andi $t0,0xFF00
+ sll $t1,$t1,8
+ or $d0,$at
+ srl $at,$d1,24
+ or $t0,$t1
+ srl $t1,$d1,8
+ or $d0,$t0
+ andi $t0,$d1,0xFF00
+ sll $d1,$d1,24
+ andi $t1,0xFF00
+ sll $t0,$t0,8
+ or $d1,$at
+ srl $at,$d2,24
+ or $t1,$t0
+ srl $t0,$d2,8
+ or $d1,$t1
+ andi $t1,$d2,0xFF00
+ sll $d2,$d2,24
+ andi $t0,0xFF00
+ sll $t1,$t1,8
+ or $d2,$at
+ srl $at,$d3,24
+ or $t0,$t1
+ srl $t1,$d3,8
+ or $d2,$t0
+ andi $t0,$d3,0xFF00
+ sll $d3,$d3,24
+ andi $t1,0xFF00
+ sll $t0,$t0,8
+ or $d3,$at
+ or $t1,$t0
+ or $d3,$t1
+# endif
+#endif
+ srl $t0,$h4,2 # modulo-scheduled reduction
+ andi $h4,$h4,3
+ sll $at,$t0,2
+
+ addu $d0,$d0,$h0 # accumulate input
+ addu $t0,$t0,$at
+ sltu $h0,$d0,$h0
+ addu $d0,$d0,$t0 # ... and residue
+ sltu $at,$d0,$t0
+
+ addu $d1,$d1,$h1
+ addu $h0,$h0,$at # carry
+ sltu $h1,$d1,$h1
+ addu $d1,$d1,$h0
+ sltu $h0,$d1,$h0
+
+ addu $d2,$d2,$h2
+ addu $h1,$h1,$h0 # carry
+ sltu $h2,$d2,$h2
+ addu $d2,$d2,$h1
+ sltu $h1,$d2,$h1
+
+ addu $d3,$d3,$h3
+ addu $h2,$h2,$h1 # carry
+ sltu $h3,$d3,$h3
+ addu $d3,$d3,$h2
+
+#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
+ multu $r0,$d0 # d0*r0
+ sltu $h2,$d3,$h2
+ maddu $rs3,$d1 # d1*s3
+ addu $h3,$h3,$h2 # carry
+ maddu $rs2,$d2 # d2*s2
+ addu $h4,$h4,$padbit
+ maddu $rs1,$d3 # d3*s1
+ addu $h4,$h4,$h3
+ mfhi $at
+ mflo $h0
+
+ multu $r1,$d0 # d0*r1
+ maddu $r0,$d1 # d1*r0
+ maddu $rs3,$d2 # d2*s3
+ maddu $rs2,$d3 # d3*s2
+ maddu $rs1,$h4 # h4*s1
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h1
+
+ multu $r2,$d0 # d0*r2
+ maddu $r1,$d1 # d1*r1
+ maddu $r0,$d2 # d2*r0
+ maddu $rs3,$d3 # d3*s3
+ maddu $rs2,$h4 # h4*s2
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h2
+
+ mul $t0,$r0,$h4 # h4*r0
+
+ multu $r3,$d0 # d0*r3
+ maddu $r2,$d1 # d1*r2
+ maddu $r1,$d2 # d2*r1
+ maddu $r0,$d3 # d3*r0
+ maddu $rs3,$h4 # h4*s3
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h3
+
+ addiu $inp,$inp,16
+
+ addu $h4,$t0,$at
+#else
+ multu ($r0,$d0) # d0*r0
+ mflo ($h0,$r0,$d0)
+ mfhi ($h1,$r0,$d0)
+
+ sltu $h2,$d3,$h2
+ addu $h3,$h3,$h2 # carry
+
+ multu ($rs3,$d1) # d1*s3
+ mflo ($at,$rs3,$d1)
+ mfhi ($t0,$rs3,$d1)
+
+ addu $h4,$h4,$padbit
+ addiu $inp,$inp,16
+ addu $h4,$h4,$h3
+
+ multu ($rs2,$d2) # d2*s2
+ mflo ($a3,$rs2,$d2)
+ mfhi ($t1,$rs2,$d2)
+ addu $h0,$h0,$at
+ addu $h1,$h1,$t0
+ multu ($rs1,$d3) # d3*s1
+ sltu $at,$h0,$at
+ addu $h1,$h1,$at
+
+ mflo ($at,$rs1,$d3)
+ mfhi ($t0,$rs1,$d3)
+ addu $h0,$h0,$a3
+ addu $h1,$h1,$t1
+ multu ($r1,$d0) # d0*r1
+ sltu $a3,$h0,$a3
+ addu $h1,$h1,$a3
+
+
+ mflo ($a3,$r1,$d0)
+ mfhi ($h2,$r1,$d0)
+ addu $h0,$h0,$at
+ addu $h1,$h1,$t0
+ multu ($r0,$d1) # d1*r0
+ sltu $at,$h0,$at
+ addu $h1,$h1,$at
+
+ mflo ($at,$r0,$d1)
+ mfhi ($t0,$r0,$d1)
+ addu $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ multu ($rs3,$d2) # d2*s3
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$rs3,$d2)
+ mfhi ($t1,$rs3,$d2)
+ addu $h1,$h1,$at
+ addu $h2,$h2,$t0
+ multu ($rs2,$d3) # d3*s2
+ sltu $at,$h1,$at
+ addu $h2,$h2,$at
+
+ mflo ($at,$rs2,$d3)
+ mfhi ($t0,$rs2,$d3)
+ addu $h1,$h1,$a3
+ addu $h2,$h2,$t1
+ multu ($rs1,$h4) # h4*s1
+ sltu $a3,$h1,$a3
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$rs1,$h4)
+ addu $h1,$h1,$at
+ addu $h2,$h2,$t0
+ multu ($r2,$d0) # d0*r2
+ sltu $at,$h1,$at
+ addu $h2,$h2,$at
+
+
+ mflo ($at,$r2,$d0)
+ mfhi ($h3,$r2,$d0)
+ addu $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ multu ($r1,$d1) # d1*r1
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$r1,$d1)
+ mfhi ($t1,$r1,$d1)
+ addu $h2,$h2,$at
+ sltu $at,$h2,$at
+ multu ($r0,$d2) # d2*r0
+ addu $h3,$h3,$at
+
+ mflo ($at,$r0,$d2)
+ mfhi ($t0,$r0,$d2)
+ addu $h2,$h2,$a3
+ addu $h3,$h3,$t1
+ multu ($rs3,$d3) # d3*s3
+ sltu $a3,$h2,$a3
+ addu $h3,$h3,$a3
+
+ mflo ($a3,$rs3,$d3)
+ mfhi ($t1,$rs3,$d3)
+ addu $h2,$h2,$at
+ addu $h3,$h3,$t0
+ multu ($rs2,$h4) # h4*s2
+ sltu $at,$h2,$at
+ addu $h3,$h3,$at
+
+ mflo ($at,$rs2,$h4)
+ addu $h2,$h2,$a3
+ addu $h3,$h3,$t1
+ multu ($r3,$d0) # d0*r3
+ sltu $a3,$h2,$a3
+ addu $h3,$h3,$a3
+
+
+ mflo ($a3,$r3,$d0)
+ mfhi ($t1,$r3,$d0)
+ addu $h2,$h2,$at
+ sltu $at,$h2,$at
+ multu ($r2,$d1) # d1*r2
+ addu $h3,$h3,$at
+
+ mflo ($at,$r2,$d1)
+ mfhi ($t0,$r2,$d1)
+ addu $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ multu ($r0,$d3) # d3*r0
+ addu $t1,$t1,$a3
+
+ mflo ($a3,$r0,$d3)
+ mfhi ($d3,$r0,$d3)
+ addu $h3,$h3,$at
+ addu $t1,$t1,$t0
+ multu ($r1,$d2) # d2*r1
+ sltu $at,$h3,$at
+ addu $t1,$t1,$at
+
+ mflo ($at,$r1,$d2)
+ mfhi ($t0,$r1,$d2)
+ addu $h3,$h3,$a3
+ addu $t1,$t1,$d3
+ multu ($rs3,$h4) # h4*s3
+ sltu $a3,$h3,$a3
+ addu $t1,$t1,$a3
+
+ mflo ($a3,$rs3,$h4)
+ addu $h3,$h3,$at
+ addu $t1,$t1,$t0
+ multu ($r0,$h4) # h4*r0
+ sltu $at,$h3,$at
+ addu $t1,$t1,$at
+
+
+ mflo ($h4,$r0,$h4)
+ addu $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ addu $t1,$t1,$a3
+ addu $h4,$h4,$t1
+
+ li $padbit,1 # if we loop, padbit is 1
+#endif
+ bne $inp,$len,.Loop
+
+ sw $h0,0($ctx) # store hash value
+ sw $h1,4($ctx)
+ sw $h2,8($ctx)
+ sw $h3,12($ctx)
+ sw $h4,16($ctx)
+
+ .set noreorder
+.Labort:
+ lw $s11,4*11($sp)
+ lw $s10,4*10($sp)
+ lw $s9, 4*9($sp)
+ lw $s8, 4*8($sp)
+ lw $s7, 4*7($sp)
+ lw $s6, 4*6($sp)
+ lw $s5, 4*5($sp)
+ lw $s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ lw $s3, 4*3($sp)
+ lw $s2, 4*2($sp)
+ lw $s1, 4*1($sp)
+ lw $s0, 4*0($sp)
+___
+$code.=<<___;
+ jr $ra
+ addu $sp,$sp,4*12
+.end poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
+
+$code.=<<___;
+.align 5
+.globl poly1305_emit
+.ent poly1305_emit
+poly1305_emit:
+ .frame $sp,0,$ra
+ .set reorder
+
+ lw $tmp4,16($ctx)
+ lw $tmp0,0($ctx)
+ lw $tmp1,4($ctx)
+ lw $tmp2,8($ctx)
+ lw $tmp3,12($ctx)
+
+ li $in0,-4 # final reduction
+ srl $ctx,$tmp4,2
+ and $in0,$in0,$tmp4
+ andi $tmp4,$tmp4,3
+ addu $ctx,$ctx,$in0
+
+ addu $tmp0,$tmp0,$ctx
+ sltu $ctx,$tmp0,$ctx
+ addiu $in0,$tmp0,5 # compare to modulus
+ addu $tmp1,$tmp1,$ctx
+ sltiu $in1,$in0,5
+ sltu $ctx,$tmp1,$ctx
+ addu $in1,$in1,$tmp1
+ addu $tmp2,$tmp2,$ctx
+ sltu $in2,$in1,$tmp1
+ sltu $ctx,$tmp2,$ctx
+ addu $in2,$in2,$tmp2
+ addu $tmp3,$tmp3,$ctx
+ sltu $in3,$in2,$tmp2
+ sltu $ctx,$tmp3,$ctx
+ addu $in3,$in3,$tmp3
+ addu $tmp4,$tmp4,$ctx
+ sltu $ctx,$in3,$tmp3
+ addu $ctx,$tmp4
+
+ srl $ctx,2 # see if it carried/borrowed
+ subu $ctx,$zero,$ctx
+
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ xor $in2,$tmp2
+ xor $in3,$tmp3
+ and $in0,$ctx
+ and $in1,$ctx
+ and $in2,$ctx
+ and $in3,$ctx
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ xor $in2,$tmp2
+ xor $in3,$tmp3
+
+ lw $tmp0,0($nonce) # load nonce
+ lw $tmp1,4($nonce)
+ lw $tmp2,8($nonce)
+ lw $tmp3,12($nonce)
+
+ addu $in0,$tmp0 # accumulate nonce
+ sltu $ctx,$in0,$tmp0
+
+ addu $in1,$tmp1
+ sltu $tmp1,$in1,$tmp1
+ addu $in1,$ctx
+ sltu $ctx,$in1,$ctx
+ addu $ctx,$tmp1
+
+ addu $in2,$tmp2
+ sltu $tmp2,$in2,$tmp2
+ addu $in2,$ctx
+ sltu $ctx,$in2,$ctx
+ addu $ctx,$tmp2
+
+ addu $in3,$tmp3
+ addu $in3,$ctx
+
+ srl $tmp0,$in0,8 # write mac value
+ srl $tmp1,$in0,16
+ srl $tmp2,$in0,24
+ sb $in0, 0($mac)
+ sb $tmp0,1($mac)
+ srl $tmp0,$in1,8
+ sb $tmp1,2($mac)
+ srl $tmp1,$in1,16
+ sb $tmp2,3($mac)
+ srl $tmp2,$in1,24
+ sb $in1, 4($mac)
+ sb $tmp0,5($mac)
+ srl $tmp0,$in2,8
+ sb $tmp1,6($mac)
+ srl $tmp1,$in2,16
+ sb $tmp2,7($mac)
+ srl $tmp2,$in2,24
+ sb $in2, 8($mac)
+ sb $tmp0,9($mac)
+ srl $tmp0,$in3,8
+ sb $tmp1,10($mac)
+ srl $tmp1,$in3,16
+ sb $tmp2,11($mac)
+ srl $tmp2,$in3,24
+ sb $in3, 12($mac)
+ sb $tmp0,13($mac)
+ sb $tmp1,14($mac)
+ sb $tmp2,15($mac)
+
+ jr $ra
+.end poly1305_emit
+.rdata
+.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+}
+}}}
+
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 15cfb02c3e49..0be5b4092f18 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -707,6 +707,11 @@ config CRYPTO_POLY1305_X86_64
in IETF protocols. This is the x86_64 assembler implementation using SIMD
instructions.
+config CRYPTO_POLY1305_MIPS
+ tristate "Poly1305 authenticator algorithm (MIPS optimized)"
+ depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
config CRYPTO_MD4
tristate "MD4 digest algorithm"
select CRYPTO_HASH
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 9bd15b227e78..d15ec5382986 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
+ default 2 if MIPS
default 4 if X86_64
default 9 if ARM || ARM64
default 1
--
2.18.2
From 8f7e17a4015605a94c6c26c1bec0f22b8490a8e3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:28 +0100
Subject: [PATCH 021/100] crypto: blake2s - generic C library implementation
and selftest
commit 66d7fb94e4ffe5acc589e0b2b4710aecc1f07a28 upstream.
The C implementation was originally based on Samuel Neves' public
domain reference implementation but has since been heavily modified
for the kernel. We're able to do compile-time optimizations by moving
some scaffolding around the final function into the header file.
Information: https://blake2.net/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
[ardb: - move from lib/zinc to lib/crypto
- remove simd handling
- rewrote selftest for better coverage
- use fixed digest length for blake2s_hmac() and rename to
blake2s256_hmac() ]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/crypto/blake2s.h | 106 +++++
include/crypto/internal/blake2s.h | 19 +
lib/crypto/Kconfig | 25 ++
lib/crypto/Makefile | 10 +
lib/crypto/blake2s-generic.c | 111 ++++++
lib/crypto/blake2s-selftest.c | 622 ++++++++++++++++++++++++++++++
lib/crypto/blake2s.c | 126 ++++++
7 files changed, 1019 insertions(+)
create mode 100644 include/crypto/blake2s.h
create mode 100644 include/crypto/internal/blake2s.h
create mode 100644 lib/crypto/blake2s-generic.c
create mode 100644 lib/crypto/blake2s-selftest.c
create mode 100644 lib/crypto/blake2s.c
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
new file mode 100644
index 000000000000..b471deac28ff
--- /dev/null
+++ b/include/crypto/blake2s.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef BLAKE2S_H
+#define BLAKE2S_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/bug.h>
+
+enum blake2s_lengths {
+ BLAKE2S_BLOCK_SIZE = 64,
+ BLAKE2S_HASH_SIZE = 32,
+ BLAKE2S_KEY_SIZE = 32,
+
+ BLAKE2S_128_HASH_SIZE = 16,
+ BLAKE2S_160_HASH_SIZE = 20,
+ BLAKE2S_224_HASH_SIZE = 28,
+ BLAKE2S_256_HASH_SIZE = 32,
+};
+
+struct blake2s_state {
+ u32 h[8];
+ u32 t[2];
+ u32 f[2];
+ u8 buf[BLAKE2S_BLOCK_SIZE];
+ unsigned int buflen;
+ unsigned int outlen;
+};
+
+enum blake2s_iv {
+ BLAKE2S_IV0 = 0x6A09E667UL,
+ BLAKE2S_IV1 = 0xBB67AE85UL,
+ BLAKE2S_IV2 = 0x3C6EF372UL,
+ BLAKE2S_IV3 = 0xA54FF53AUL,
+ BLAKE2S_IV4 = 0x510E527FUL,
+ BLAKE2S_IV5 = 0x9B05688CUL,
+ BLAKE2S_IV6 = 0x1F83D9ABUL,
+ BLAKE2S_IV7 = 0x5BE0CD19UL,
+};
+
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
+void blake2s_final(struct blake2s_state *state, u8 *out);
+
+static inline void blake2s_init_param(struct blake2s_state *state,
+ const u32 param)
+{
+ *state = (struct blake2s_state){{
+ BLAKE2S_IV0 ^ param,
+ BLAKE2S_IV1,
+ BLAKE2S_IV2,
+ BLAKE2S_IV3,
+ BLAKE2S_IV4,
+ BLAKE2S_IV5,
+ BLAKE2S_IV6,
+ BLAKE2S_IV7,
+ }};
+}
+
+static inline void blake2s_init(struct blake2s_state *state,
+ const size_t outlen)
+{
+ blake2s_init_param(state, 0x01010000 | outlen);
+ state->outlen = outlen;
+}
+
+static inline void blake2s_init_key(struct blake2s_state *state,
+ const size_t outlen, const void *key,
+ const size_t keylen)
+{
+ WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
+ !key || !keylen || keylen > BLAKE2S_KEY_SIZE));
+
+ blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
+ memcpy(state->buf, key, keylen);
+ state->buflen = BLAKE2S_BLOCK_SIZE;
+ state->outlen = outlen;
+}
+
+static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
+ const size_t outlen, const size_t inlen,
+ const size_t keylen)
+{
+ struct blake2s_state state;
+
+ WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
+ outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
+ (!key && keylen)));
+
+ if (keylen)
+ blake2s_init_key(&state, outlen, key, keylen);
+ else
+ blake2s_init(&state, outlen);
+
+ blake2s_update(&state, in, inlen);
+ blake2s_final(&state, out);
+}
+
+void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
+ const size_t keylen);
+
+#endif /* BLAKE2S_H */
diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
new file mode 100644
index 000000000000..941693effc7d
--- /dev/null
+++ b/include/crypto/internal/blake2s.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#ifndef BLAKE2S_INTERNAL_H
+#define BLAKE2S_INTERNAL_H
+
+#include <crypto/blake2s.h>
+
+void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
+ size_t nblocks, const u32 inc);
+
+void blake2s_compress_arch(struct blake2s_state *state,const u8 *block,
+ size_t nblocks, const u32 inc);
+
+static inline void blake2s_set_lastblock(struct blake2s_state *state)
+{
+ state->f[0] = -1;
+}
+
+#endif /* BLAKE2S_INTERNAL_H */
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index d15ec5382986..7ad98b624e55 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -8,6 +8,31 @@ config CRYPTO_LIB_AES
config CRYPTO_LIB_ARC4
tristate
+config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ tristate
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the Blake2s library interface,
+ either builtin or as a module.
+
+config CRYPTO_LIB_BLAKE2S_GENERIC
+ tristate
+ help
+ This symbol can be depended upon by arch implementations of the
+ Blake2s library interface that require the generic code as a
+ fallback, e.g., for SIMD implementations. If no arch specific
+ implementation is enabled, this implementation serves the users
+ of CRYPTO_LIB_BLAKE2S.
+
+config CRYPTO_LIB_BLAKE2S
+ tristate "BLAKE2s hash function library"
+ depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n
+ help
+ Enable the Blake2s library interface. This interface may be fulfilled
+ by either the generic implementation or an arch-specific one, if one
+ is available and enabled.
+
config CRYPTO_ARCH_HAVE_LIB_CHACHA
tristate
help
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index b58ab6843a9d..8ca66b5f9807 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -10,6 +10,12 @@ libaes-y := aes.o
obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
libarc4-y := arc4.o
+obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o
+libblake2s-generic-y += blake2s-generic.o
+
+obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
+libblake2s-y += blake2s.o
+
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
libdes-y := des.o
@@ -18,3 +24,7 @@ libpoly1305-y := poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
+
+ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
+libblake2s-y += blake2s-selftest.o
+endif
diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c
new file mode 100644
index 000000000000..04ff8df24513
--- /dev/null
+++ b/lib/crypto/blake2s-generic.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the BLAKE2s hash and PRF functions.
+ *
+ * Information: https://blake2.net/
+ *
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bug.h>
+#include <asm/unaligned.h>
+
+static const u8 blake2s_sigma[10][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+};
+
+static inline void blake2s_increment_counter(struct blake2s_state *state,
+ const u32 inc)
+{
+ state->t[0] += inc;
+ state->t[1] += (state->t[0] < inc);
+}
+
+void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
+ size_t nblocks, const u32 inc)
+{
+ u32 m[16];
+ u32 v[16];
+ int i;
+
+ WARN_ON(IS_ENABLED(DEBUG) &&
+ (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
+
+ while (nblocks > 0) {
+ blake2s_increment_counter(state, inc);
+ memcpy(m, block, BLAKE2S_BLOCK_SIZE);
+ le32_to_cpu_array(m, ARRAY_SIZE(m));
+ memcpy(v, state->h, 32);
+ v[ 8] = BLAKE2S_IV0;
+ v[ 9] = BLAKE2S_IV1;
+ v[10] = BLAKE2S_IV2;
+ v[11] = BLAKE2S_IV3;
+ v[12] = BLAKE2S_IV4 ^ state->t[0];
+ v[13] = BLAKE2S_IV5 ^ state->t[1];
+ v[14] = BLAKE2S_IV6 ^ state->f[0];
+ v[15] = BLAKE2S_IV7 ^ state->f[1];
+
+#define G(r, i, a, b, c, d) do { \
+ a += b + m[blake2s_sigma[r][2 * i + 0]]; \
+ d = ror32(d ^ a, 16); \
+ c += d; \
+ b = ror32(b ^ c, 12); \
+ a += b + m[blake2s_sigma[r][2 * i + 1]]; \
+ d = ror32(d ^ a, 8); \
+ c += d; \
+ b = ror32(b ^ c, 7); \
+} while (0)
+
+#define ROUND(r) do { \
+ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
+ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
+ G(r, 2, v[2], v[ 6], v[10], v[14]); \
+ G(r, 3, v[3], v[ 7], v[11], v[15]); \
+ G(r, 4, v[0], v[ 5], v[10], v[15]); \
+ G(r, 5, v[1], v[ 6], v[11], v[12]); \
+ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
+ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
+} while (0)
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+
+#undef G
+#undef ROUND
+
+ for (i = 0; i < 8; ++i)
+ state->h[i] ^= v[i] ^ v[i + 8];
+
+ block += BLAKE2S_BLOCK_SIZE;
+ --nblocks;
+ }
+}
+
+EXPORT_SYMBOL(blake2s_compress_generic);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("BLAKE2s hash function");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
new file mode 100644
index 000000000000..79ef404a990d
--- /dev/null
+++ b/lib/crypto/blake2s-selftest.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/blake2s.h>
+#include <linux/string.h>
+
+/*
+ * blake2s_testvecs[] generated with the program below (using libb2-dev and
+ * libssl-dev [OpenSSL])
+ *
+ * #include <blake2.h>
+ * #include <stdint.h>
+ * #include <stdio.h>
+ *
+ * #include <openssl/evp.h>
+ * #include <openssl/hmac.h>
+ *
+ * #define BLAKE2S_TESTVEC_COUNT 256
+ *
+ * static void print_vec(const uint8_t vec[], int len)
+ * {
+ * int i;
+ *
+ * printf(" { ");
+ * for (i = 0; i < len; i++) {
+ * if (i && (i % 12) == 0)
+ * printf("\n ");
+ * printf("0x%02x, ", vec[i]);
+ * }
+ * printf("},\n");
+ * }
+ *
+ * int main(void)
+ * {
+ * uint8_t key[BLAKE2S_KEYBYTES];
+ * uint8_t buf[BLAKE2S_TESTVEC_COUNT];
+ * uint8_t hash[BLAKE2S_OUTBYTES];
+ * int i, j;
+ *
+ * key[0] = key[1] = 1;
+ * for (i = 2; i < BLAKE2S_KEYBYTES; ++i)
+ * key[i] = key[i - 2] + key[i - 1];
+ *
+ * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i)
+ * buf[i] = (uint8_t)i;
+ *
+ * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
+ *
+ * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) {
+ * int outlen = 1 + i % BLAKE2S_OUTBYTES;
+ * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1);
+ *
+ * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i,
+ * keylen);
+ * print_vec(hash, outlen);
+ * }
+ * printf("};\n\n");
+ *
+ * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
+ *
+ * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL);
+ * print_vec(hash, BLAKE2S_OUTBYTES);
+ *
+ * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL);
+ * print_vec(hash, BLAKE2S_OUTBYTES);
+ *
+ * printf("};\n");
+ *
+ * return 0;
+ *}
+ */
+static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
+ { 0xa1, },
+ { 0x7c, 0x89, },
+ { 0x74, 0x0e, 0xd4, },
+ { 0x47, 0x0c, 0x21, 0x15, },
+ { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, },
+ { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, },
+ { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, },
+ { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, },
+ { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, },
+ { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, },
+ { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, },
+ { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, },
+ { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda,
+ 0xb7, },
+ { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25,
+ 0x52, 0x3e, },
+ { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc,
+ 0x57, 0xe2, 0x27, },
+ { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6,
+ 0x47, 0x2a, 0xc7, 0xf5, },
+ { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43,
+ 0xe7, 0x72, 0x08, 0xec, 0xbe, },
+ { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96,
+ 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, },
+ { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e,
+ 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, },
+ { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d,
+ 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, },
+ { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86,
+ 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, },
+ { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41,
+ 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, },
+ { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22,
+ 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, },
+ { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00,
+ 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, },
+ { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9,
+ 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13,
+ 0xd1, },
+ { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05,
+ 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07,
+ 0x01, 0x3e, },
+ { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74,
+ 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b,
+ 0xec, 0x13, 0xed, },
+ { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7,
+ 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37,
+ 0x72, 0x67, 0x09, 0xfc, },
+ { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38,
+ 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c,
+ 0x95, 0x29, 0x19, 0xf2, 0x4b, },
+ { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22,
+ 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30,
+ 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, },
+ { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e,
+ 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd,
+ 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, },
+ { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe,
+ 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61,
+ 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, },
+ { 0x0a, },
+ { 0x6e, 0xd4, },
+ { 0x64, 0xe9, 0xd1, },
+ { 0x30, 0xdd, 0x71, 0xef, },
+ { 0x11, 0xb5, 0x0c, 0x87, 0xc9, },
+ { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, },
+ { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, },
+ { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, },
+ { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, },
+ { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, },
+ { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, },
+ { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, },
+ { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf,
+ 0xe2, },
+ { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64,
+ 0x7e, 0xb0, },
+ { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51,
+ 0x98, 0x0a, 0x8d, },
+ { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04,
+ 0xf1, 0xc3, 0x94, 0xd8, },
+ { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7,
+ 0x2c, 0xe8, 0x8f, 0xc7, 0x34, },
+ { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41,
+ 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, },
+ { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81,
+ 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, },
+ { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4,
+ 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, },
+ { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a,
+ 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, },
+ { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb,
+ 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, },
+ { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9,
+ 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, },
+ { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9,
+ 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, },
+ { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e,
+ 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42,
+ 0xe3, },
+ { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9,
+ 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e,
+ 0xe3, 0x90, },
+ { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3,
+ 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10,
+ 0x58, 0x06, 0x9a, },
+ { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5,
+ 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d,
+ 0x53, 0x03, 0x1a, 0x39, },
+ { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0,
+ 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5,
+ 0xd4, 0x36, 0xb1, 0xac, 0x73, },
+ { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59,
+ 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90,
+ 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, },
+ { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28,
+ 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75,
+ 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, },
+ { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6,
+ 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77,
+ 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, },
+ { 0x9d, },
+ { 0x9d, 0x7d, },
+ { 0xfd, 0xc3, 0xda, },
+ { 0xe8, 0x82, 0xcd, 0x21, },
+ { 0xc3, 0x1d, 0x42, 0x4c, 0x74, },
+ { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, },
+ { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, },
+ { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, },
+ { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, },
+ { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, },
+ { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, },
+ { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, },
+ { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3,
+ 0x63, },
+ { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f,
+ 0xe6, 0xa9, },
+ { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a,
+ 0xf6, 0x0e, 0x20, },
+ { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39,
+ 0x18, 0x3e, 0xc7, 0xc3, },
+ { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c,
+ 0x92, 0xfc, 0x7f, 0x34, 0x41, },
+ { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb,
+ 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, },
+ { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96,
+ 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, },
+ { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6,
+ 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, },
+ { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba,
+ 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, },
+ { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0,
+ 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, },
+ { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d,
+ 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, },
+ { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59,
+ 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, },
+ { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89,
+ 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e,
+ 0xaf, },
+ { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc,
+ 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7,
+ 0xb1, 0x1d, },
+ { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9,
+ 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf,
+ 0xee, 0x6a, 0xbe, },
+ { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8,
+ 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd,
+ 0x5c, 0xef, 0xa3, 0x25, },
+ { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6,
+ 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8,
+ 0x4b, 0x0e, 0xe3, 0x94, 0x9f, },
+ { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc,
+ 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf,
+ 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, },
+ { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76,
+ 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46,
+ 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, },
+ { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02,
+ 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3,
+ 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, },
+ { 0x02, },
+ { 0x52, 0xa8, },
+ { 0x38, 0x25, 0x0d, },
+ { 0xe3, 0x04, 0xd4, 0x92, },
+ { 0x97, 0xdb, 0xf7, 0x81, 0xca, },
+ { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, },
+ { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, },
+ { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, },
+ { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, },
+ { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, },
+ { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, },
+ { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, },
+ { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa,
+ 0xd3, },
+ { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c,
+ 0xb7, 0x9a, },
+ { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31,
+ 0x3d, 0x47, 0x3d, },
+ { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0,
+ 0x24, 0xf0, 0x17, 0xc0, },
+ { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76,
+ 0xd2, 0xfd, 0x0c, 0x47, 0x40, },
+ { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae,
+ 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, },
+ { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee,
+ 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, },
+ { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d,
+ 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, },
+ { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61,
+ 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, },
+ { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd,
+ 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, },
+ { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5,
+ 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, },
+ { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17,
+ 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, },
+ { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c,
+ 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c,
+ 0xae, },
+ { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5,
+ 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d,
+ 0x59, 0x00, },
+ { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e,
+ 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c,
+ 0x5a, 0x2c, 0x3b, },
+ { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda,
+ 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c,
+ 0x10, 0x07, 0x2a, 0xc4, },
+ { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14,
+ 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf,
+ 0xee, 0xa1, 0x74, 0xd6, 0x71, },
+ { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33,
+ 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff,
+ 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, },
+ { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c,
+ 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44,
+ 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, },
+ { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff,
+ 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23,
+ 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, },
+ { 0xbe, },
+ { 0x17, 0x6c, },
+ { 0x1a, 0x42, 0x4f, },
+ { 0xba, 0xaf, 0xb7, 0x65, },
+ { 0xc2, 0x63, 0x43, 0x6a, 0xea, },
+ { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, },
+ { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, },
+ { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, },
+ { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, },
+ { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, },
+ { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, },
+ { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, },
+ { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92,
+ 0xe9, },
+ { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d,
+ 0xc4, 0xb3, },
+ { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60,
+ 0xd7, 0xd3, 0x5e, },
+ { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16,
+ 0xaf, 0x39, 0xbd, 0x92, },
+ { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10,
+ 0xd9, 0xa7, 0x66, 0x27, 0xcf, },
+ { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02,
+ 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, },
+ { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd,
+ 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, },
+ { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3,
+ 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, },
+ { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f,
+ 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, },
+ { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51,
+ 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, },
+ { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3,
+ 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, },
+ { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda,
+ 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, },
+ { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6,
+ 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a,
+ 0x26, },
+ { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24,
+ 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35,
+ 0xf4, 0x1d, },
+ { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9,
+ 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46,
+ 0x4e, 0x29, 0x26, },
+ { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09,
+ 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98,
+ 0x1a, 0x5b, 0xff, 0xc9, },
+ { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2,
+ 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69,
+ 0xbf, 0xf6, 0xbe, 0x3c, 0x40, },
+ { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31,
+ 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20,
+ 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, },
+ { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4,
+ 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30,
+ 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, },
+ { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0,
+ 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5,
+ 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, },
+ { 0x1f, },
+ { 0x82, 0x60, },
+ { 0xcc, 0xe3, 0x08, },
+ { 0x56, 0x17, 0xe4, 0x59, },
+ { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, },
+ { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, },
+ { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, },
+ { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, },
+ { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, },
+ { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, },
+ { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, },
+ { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, },
+ { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc,
+ 0x5b, },
+ { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0,
+ 0x90, 0x48, },
+ { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60,
+ 0x04, 0xd9, 0xd5, },
+ { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47,
+ 0xe5, 0x31, 0x06, 0x70, },
+ { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c,
+ 0x70, 0x25, 0xdb, 0x33, 0x27, },
+ { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a,
+ 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, },
+ { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3,
+ 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, },
+ { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7,
+ 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, },
+ { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac,
+ 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, },
+ { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98,
+ 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, },
+ { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11,
+ 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, },
+ { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a,
+ 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, },
+ { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41,
+ 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70,
+ 0x88, },
+ { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4,
+ 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3,
+ 0xc6, 0xbb, },
+ { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55,
+ 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5,
+ 0x55, 0xfd, 0x4f, },
+ { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e,
+ 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2,
+ 0x42, 0x64, 0x3b, 0x1a, },
+ { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95,
+ 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5,
+ 0x1d, 0x60, 0x8f, 0x8a, 0x1d, },
+ { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4,
+ 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d,
+ 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, },
+ { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b,
+ 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3,
+ 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, },
+ { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf,
+ 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f,
+ 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, },
+ { 0x60, },
+ { 0x24, 0x26, },
+ { 0x47, 0xeb, 0xc9, },
+ { 0x4a, 0xd0, 0xbc, 0xf0, },
+ { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, },
+ { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, },
+ { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, },
+ { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, },
+ { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, },
+ { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, },
+ { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, },
+ { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, },
+ { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91,
+ 0x8d, },
+ { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33,
+ 0xbf, 0xa0, },
+ { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c,
+ 0xd5, 0x4b, 0xed, },
+ { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44,
+ 0xc8, 0x24, 0x0a, 0xb7, },
+ { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75,
+ 0xb2, 0x15, 0xd1, 0xb1, 0x10, },
+ { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35,
+ 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, },
+ { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9,
+ 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, },
+ { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47,
+ 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, },
+ { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35,
+ 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, },
+ { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1,
+ 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, },
+ { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21,
+ 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, },
+ { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4,
+ 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, },
+ { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7,
+ 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7,
+ 0xd3, },
+ { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb,
+ 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16,
+ 0xa6, 0xd6, },
+ { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80,
+ 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88,
+ 0x55, 0xd1, 0xa9, },
+ { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54,
+ 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46,
+ 0xef, 0xb0, 0x00, 0x8d, },
+ { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8,
+ 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94,
+ 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, },
+ { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4,
+ 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67,
+ 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, },
+ { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02,
+ 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04,
+ 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, },
+ { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28,
+ 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca,
+ 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, },
+ { 0x7e, },
+ { 0x1e, 0x21, },
+ { 0x26, 0xd3, 0xdd, },
+ { 0x2c, 0xd4, 0xb3, 0x3d, },
+ { 0x86, 0x7b, 0x76, 0x3c, 0xf0, },
+ { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, },
+ { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, },
+ { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, },
+ { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, },
+ { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, },
+ { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, },
+ { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, },
+ { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77,
+ 0x66, },
+ { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31,
+ 0x55, 0x66, },
+ { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12,
+ 0x43, 0xbf, 0xa1, },
+ { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e,
+ 0x95, 0x8a, 0xc5, 0xed, },
+ { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef,
+ 0xf6, 0x2b, 0x3a, 0xba, 0x60, },
+ { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2,
+ 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, },
+ { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b,
+ 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, },
+ { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94,
+ 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, },
+ { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b,
+ 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, },
+ { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf,
+ 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, },
+ { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8,
+ 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, },
+ { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea,
+ 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, },
+ { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2,
+ 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0,
+ 0xd7, },
+ { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b,
+ 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd,
+ 0xb6, 0xef, },
+ { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6,
+ 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57,
+ 0xef, 0xf8, 0x53, },
+ { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46,
+ 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89,
+ 0x89, 0xfd, 0xf7, 0x99, },
+ { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03,
+ 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a,
+ 0xa9, 0x8f, 0x2b, 0x59, 0xfb, },
+ { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb,
+ 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d,
+ 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, },
+ { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0,
+ 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d,
+ 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, },
+ { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c,
+ 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92,
+ 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, },
+};
+
+static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
+ { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70,
+ 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79,
+ 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, },
+ { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9,
+ 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f,
+ 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, },
+};
+
+bool __init blake2s_selftest(void)
+{
+ u8 key[BLAKE2S_KEY_SIZE];
+ u8 buf[ARRAY_SIZE(blake2s_testvecs)];
+ u8 hash[BLAKE2S_HASH_SIZE];
+ struct blake2s_state state;
+ bool success = true;
+ int i, l;
+
+ key[0] = key[1] = 1;
+ for (i = 2; i < sizeof(key); ++i)
+ key[i] = key[i - 2] + key[i - 1];
+
+ for (i = 0; i < sizeof(buf); ++i)
+ buf[i] = (u8)i;
+
+ for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) {
+ int outlen = 1 + i % BLAKE2S_HASH_SIZE;
+ int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1);
+
+ blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i,
+ keylen);
+ if (memcmp(hash, blake2s_testvecs[i], outlen)) {
+ pr_err("blake2s self-test %d: FAIL\n", i + 1);
+ success = false;
+ }
+
+ if (!keylen)
+ blake2s_init(&state, outlen);
+ else
+ blake2s_init_key(&state, outlen,
+ key + BLAKE2S_KEY_SIZE - keylen,
+ keylen);
+
+ blake2s_update(&state, buf, l);
+ blake2s_update(&state, buf + l, i - l);
+ blake2s_final(&state, hash);
+ if (memcmp(hash, blake2s_testvecs[i], outlen)) {
+ pr_err("blake2s init/update/final self-test %d: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
+
+ if (success) {
+ blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key));
+ success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE);
+
+ blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf));
+ success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE);
+
+ if (!success)
+ pr_err("blake2s256_hmac self-test: FAIL\n");
+ }
+
+ return success;
+}
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
new file mode 100644
index 000000000000..41025a30c524
--- /dev/null
+++ b/lib/crypto/blake2s.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the BLAKE2s hash and PRF functions.
+ *
+ * Information: https://blake2.net/
+ *
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bug.h>
+#include <asm/unaligned.h>
+
+bool blake2s_selftest(void);
+
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+{
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+ if (unlikely(!inlen))
+ return;
+ if (inlen > fill) {
+ memcpy(state->buf + state->buflen, in, fill);
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S))
+ blake2s_compress_arch(state, state->buf, 1,
+ BLAKE2S_BLOCK_SIZE);
+ else
+ blake2s_compress_generic(state, state->buf, 1,
+ BLAKE2S_BLOCK_SIZE);
+ state->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+ /* Hash one less (full) block than strictly possible */
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S))
+ blake2s_compress_arch(state, in, nblocks - 1,
+ BLAKE2S_BLOCK_SIZE);
+ else
+ blake2s_compress_generic(state, in, nblocks - 1,
+ BLAKE2S_BLOCK_SIZE);
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ }
+ memcpy(state->buf + state->buflen, in, inlen);
+ state->buflen += inlen;
+}
+EXPORT_SYMBOL(blake2s_update);
+
+void blake2s_final(struct blake2s_state *state, u8 *out)
+{
+ WARN_ON(IS_ENABLED(DEBUG) && !out);
+ blake2s_set_lastblock(state);
+ memset(state->buf + state->buflen, 0,
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S))
+ blake2s_compress_arch(state, state->buf, 1, state->buflen);
+ else
+ blake2s_compress_generic(state, state->buf, 1, state->buflen);
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+ memcpy(out, state->h, state->outlen);
+ memzero_explicit(state, sizeof(*state));
+}
+EXPORT_SYMBOL(blake2s_final);
+
+void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
+ const size_t keylen)
+{
+ struct blake2s_state state;
+ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
+ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
+ int i;
+
+ if (keylen > BLAKE2S_BLOCK_SIZE) {
+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
+ blake2s_update(&state, key, keylen);
+ blake2s_final(&state, x_key);
+ } else
+ memcpy(x_key, key, keylen);
+
+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
+ x_key[i] ^= 0x36;
+
+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
+ blake2s_update(&state, in, inlen);
+ blake2s_final(&state, i_hash);
+
+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
+ x_key[i] ^= 0x5c ^ 0x36;
+
+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
+ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
+ blake2s_final(&state, i_hash);
+
+ memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
+ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
+ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
+}
+EXPORT_SYMBOL(blake2s256_hmac);
+
+static int __init mod_init(void)
+{
+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+ WARN_ON(!blake2s_selftest()))
+ return -ENODEV;
+ return 0;
+}
+
+static void __exit mod_exit(void)
+{
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("BLAKE2s hash function");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
--
2.18.2
From 9d6b3f8f7dd184396b6dff0fb80d53f631b81dfe Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:29 +0100
Subject: [PATCH 022/100] crypto: testmgr - add test cases for Blake2s
commit 17e1df67023a5c9ccaeb5de8bf5b88f63127ecf7 upstream.
As suggested by Eric for the Blake2b implementation contributed by
David, introduce a set of test vectors for Blake2s covering different
digest and key sizes.
blake2s-128 blake2s-160 blake2s-224 blake2s-256
---------------------------------------------------
len=0 | klen=0 klen=1 klen=16 klen=32
len=1 | klen=16 klen=32 klen=0 klen=1
len=7 | klen=32 klen=0 klen=1 klen=16
len=15 | klen=1 klen=16 klen=32 klen=0
len=64 | klen=0 klen=1 klen=16 klen=32
len=247 | klen=16 klen=32 klen=0 klen=1
len=256 | klen=32 klen=0 klen=1 klen=16
Cc: David Sterba <dsterba@suse.com>
Cc: Eric Biggers <ebiggers@google.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/testmgr.c | 24 +++++
crypto/testmgr.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 275 insertions(+)
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 7473c5bc06b1..711390861f71 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4034,6 +4034,30 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "authenc(hmac(sha512),rfc3686(ctr(aes)))",
.test = alg_test_null,
.fips_allowed = 1,
+ }, {
+ .alg = "blake2s-128",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(blakes2s_128_tv_template)
+ }
+ }, {
+ .alg = "blake2s-160",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(blakes2s_160_tv_template)
+ }
+ }, {
+ .alg = "blake2s-224",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(blakes2s_224_tv_template)
+ }
+ }, {
+ .alg = "blake2s-256",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(blakes2s_256_tv_template)
+ }
}, {
.alg = "cbc(aes)",
.test = alg_test_skcipher,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index ef7d21f39d4a..102fcad54966 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -31567,4 +31567,255 @@ static const struct aead_testvec essiv_hmac_sha256_aes_cbc_tv_temp[] = {
},
};
+static const char blake2_ordered_sequence[] =
+ "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ "\x20\x21\x22\x23\x24\x25\x26\x27"
+ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+ "\x40\x41\x42\x43\x44\x45\x46\x47"
+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+ "\x50\x51\x52\x53\x54\x55\x56\x57"
+ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+ "\x60\x61\x62\x63\x64\x65\x66\x67"
+ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+ "\x70\x71\x72\x73\x74\x75\x76\x77"
+ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+ "\x80\x81\x82\x83\x84\x85\x86\x87"
+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ "\x90\x91\x92\x93\x94\x95\x96\x97"
+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+ "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+ "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+ "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
+
+static const struct hash_testvec blakes2s_128_tv_template[] = {{
+ .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
+ 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
+}, {
+ .plaintext = blake2_ordered_sequence,
+ .psize = 64,
+ .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
+ 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 1,
+ .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
+ 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 7,
+ .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
+ 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .plaintext = blake2_ordered_sequence,
+ .psize = 15,
+ .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
+ 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 247,
+ .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
+ 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 256,
+ .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
+ 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
+}};
+
+static const struct hash_testvec blakes2s_160_tv_template[] = {{
+ .plaintext = blake2_ordered_sequence,
+ .psize = 7,
+ .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
+ 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
+ 0xe3, 0xf2, 0x84, 0xff, },
+}, {
+ .plaintext = blake2_ordered_sequence,
+ .psize = 256,
+ .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
+ 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
+ 0x9b, 0x2d, 0x35, 0x05, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
+ 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
+ 0x79, 0x65, 0x32, 0x93, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 1,
+ .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
+ 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
+ 0xa2, 0x3a, 0x56, 0x9c, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 15,
+ .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
+ 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
+ 0x83, 0x39, 0x0f, 0x30, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .plaintext = blake2_ordered_sequence,
+ .psize = 64,
+ .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
+ 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
+ 0xac, 0xa6, 0x81, 0x63, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 247,
+ .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
+ 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
+ 0x0a, 0xf6, 0x73, 0xe8, },
+}};
+
+static const struct hash_testvec blakes2s_224_tv_template[] = {{
+ .plaintext = blake2_ordered_sequence,
+ .psize = 1,
+ .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
+ 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
+ 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
+ 0x48, 0x21, 0x97, 0xbb, },
+}, {
+ .plaintext = blake2_ordered_sequence,
+ .psize = 247,
+ .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
+ 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
+ 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
+ 0x2b, 0xa4, 0xd5, 0xf6, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
+ 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
+ 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
+ 0xa7, 0x19, 0xfc, 0xb8, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .plaintext = blake2_ordered_sequence,
+ .psize = 7,
+ .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
+ 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
+ 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
+ 0x7b, 0x45, 0xfe, 0x05, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 15,
+ .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
+ 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
+ 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
+ 0x25, 0xab, 0xc5, 0x02, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 64,
+ .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
+ 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
+ 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
+ 0x6a, 0x31, 0x83, 0xb5, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .plaintext = blake2_ordered_sequence,
+ .psize = 256,
+ .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
+ 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
+ 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
+ 0xb3, 0xd7, 0xec, 0xcc, },
+}};
+
+static const struct hash_testvec blakes2s_256_tv_template[] = {{
+ .plaintext = blake2_ordered_sequence,
+ .psize = 15,
+ .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+ 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+ 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+ 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+ 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+ 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+ 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .plaintext = blake2_ordered_sequence,
+ .psize = 1,
+ .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
+ 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
+ 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
+ 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 7,
+ .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
+ 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
+ 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
+ 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
+}, {
+ .ksize = 32,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 64,
+ .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+ 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+ 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+ 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
+}, {
+ .ksize = 1,
+ .key = "B",
+ .plaintext = blake2_ordered_sequence,
+ .psize = 247,
+ .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
+ 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
+ 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
+ 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
+}, {
+ .ksize = 16,
+ .key = blake2_ordered_sequence,
+ .plaintext = blake2_ordered_sequence,
+ .psize = 256,
+ .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
+ 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
+ 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
+ 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
+}};
+
#endif /* _CRYPTO_TESTMGR_H */
--
2.18.2
From ae1e1578754f22af47a32fde440803ad6f4c96d2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:30 +0100
Subject: [PATCH 023/100] crypto: blake2s - implement generic shash driver
commit 7f9b0880925f1f9d7d59504ea0892d2ae9cfc233 upstream.
Wire up our newly added Blake2s implementation via the shash API.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/Kconfig | 18 ++++
crypto/Makefile | 1 +
crypto/blake2s_generic.c | 171 ++++++++++++++++++++++++++++++
include/crypto/internal/blake2s.h | 5 +
4 files changed, 195 insertions(+)
create mode 100644 crypto/blake2s_generic.c
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 0be5b4092f18..81c8a4059afc 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -639,6 +639,24 @@ config CRYPTO_XXHASH
xxHash non-cryptographic hash algorithm. Extremely fast, working at
speeds close to RAM limits.
+config CRYPTO_BLAKE2S
+ tristate "BLAKE2s digest algorithm"
+ select CRYPTO_LIB_BLAKE2S_GENERIC
+ select CRYPTO_HASH
+ help
+ Implementation of cryptographic hash function BLAKE2s
+ optimized for 8-32bit platforms and can produce digests of any size
+ between 1 to 32. The keyed hash is also implemented.
+
+ This module provides the following algorithms:
+
+ - blake2s-128
+ - blake2s-160
+ - blake2s-224
+ - blake2s-256
+
+ See https://blake2.net for further information.
+
config CRYPTO_CRCT10DIF
tristate "CRCT10DIF algorithm"
select CRYPTO_HASH
diff --git a/crypto/Makefile b/crypto/Makefile
index aa740c8492b9..fd27edea7c8e 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
+obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
new file mode 100644
index 000000000000..ed0c74640470
--- /dev/null
+++ b/crypto/blake2s_generic.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(tctx->key, key, keylen);
+ tctx->keylen = keylen;
+
+ return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const int outlen = crypto_shash_digestsize(desc->tfm);
+
+ if (tctx->keylen)
+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+ else
+ blake2s_init(state, outlen);
+
+ return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+ if (unlikely(!inlen))
+ return 0;
+ if (inlen > fill) {
+ memcpy(state->buf + state->buflen, in, fill);
+ blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+ state->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+ /* Hash one less (full) block than strictly possible */
+ blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ }
+ memcpy(state->buf + state->buflen, in, inlen);
+ state->buflen += inlen;
+
+ return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+
+ blake2s_set_lastblock(state);
+ memset(state->buf + state->buflen, 0,
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+ blake2s_compress_generic(state, state->buf, 1, state->buflen);
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+ memcpy(out, state->h, state->outlen);
+ memzero_explicit(state, sizeof(*state));
+
+ return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+ .base.cra_name = "blake2s-128",
+ .base.cra_driver_name = "blake2s-128-generic",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_128_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-160",
+ .base.cra_driver_name = "blake2s-160-generic",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_160_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-224",
+ .base.cra_driver_name = "blake2s-224-generic",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_224_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-256",
+ .base.cra_driver_name = "blake2s-256-generic",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_256_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+subsys_initcall(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-generic");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-generic");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-generic");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-generic");
+MODULE_LICENSE("GPL v2");
diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
index 941693effc7d..74ff77032e52 100644
--- a/include/crypto/internal/blake2s.h
+++ b/include/crypto/internal/blake2s.h
@@ -5,6 +5,11 @@
#include <crypto/blake2s.h>
+struct blake2s_tfm_ctx {
+ u8 key[BLAKE2S_KEY_SIZE];
+ unsigned int keylen;
+};
+
void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
size_t nblocks, const u32 inc);
--
2.18.2
From 5d9e7e09d015f1cc5d3ae6d1e4553f11ef79dca1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:31 +0100
Subject: [PATCH 024/100] crypto: blake2s - x86_64 SIMD implementation
commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream.
These implementations from Samuel Neves support AVX and AVX-512VL.
Originally this used AVX-512F, but Skylake thermal throttling made
AVX-512VL more attractive and possible to do with negligable difference.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
[ardb: move to arch/x86/crypto, wire into lib/crypto framework]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/Makefile | 2 +
arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++
arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++
crypto/Kconfig | 6 +
4 files changed, 499 insertions(+)
create mode 100644 arch/x86/crypto/blake2s-core.S
create mode 100644 arch/x86/crypto/blake2s-glue.c
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 759b1a927826..922c8ecfa00f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
endif
# These modules require assembler to support AVX2.
@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
new file mode 100644
index 000000000000..8591938eee26
--- /dev/null
+++ b/arch/x86/crypto/blake2s-core.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
+.align 32
+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
+.align 16
+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
+.align 64
+SIGMA:
+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
+#ifdef CONFIG_AS_AVX512
+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
+.align 64
+SIGMA2:
+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
+#endif /* CONFIG_AS_AVX512 */
+
+.text
+#ifdef CONFIG_AS_SSSE3
+ENTRY(blake2s_compress_ssse3)
+ testq %rdx,%rdx
+ je .Lendofloop
+ movdqu (%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqa ROT16(%rip),%xmm12
+ movdqa ROR328(%rip),%xmm13
+ movdqu 0x20(%rdi),%xmm14
+ movq %rcx,%xmm15
+ leaq SIGMA+0xa0(%rip),%r8
+ jmp .Lbeginofloop
+ .align 32
+.Lbeginofloop:
+ movdqa %xmm0,%xmm10
+ movdqa %xmm1,%xmm11
+ paddq %xmm15,%xmm14
+ movdqa IV(%rip),%xmm2
+ movdqa %xmm14,%xmm3
+ pxor IV+0x10(%rip),%xmm3
+ leaq SIGMA(%rip),%rcx
+.Lroundloop:
+ movzbl (%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0x1(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0x2(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x3(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ punpckldq %xmm5,%xmm4
+ punpckldq %xmm7,%xmm6
+ punpcklqdq %xmm6,%xmm4
+ paddd %xmm4,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm12,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0xc,%xmm1
+ pslld $0x14,%xmm8
+ por %xmm8,%xmm1
+ movzbl 0x4(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0x5(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x6(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0x7(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ punpckldq %xmm6,%xmm5
+ punpckldq %xmm4,%xmm7
+ punpcklqdq %xmm7,%xmm5
+ paddd %xmm5,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm13,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0x7,%xmm1
+ pslld $0x19,%xmm8
+ por %xmm8,%xmm1
+ pshufd $0x93,%xmm0,%xmm0
+ pshufd $0x4e,%xmm3,%xmm3
+ pshufd $0x39,%xmm2,%xmm2
+ movzbl 0x8(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x9(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0xa(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0xb(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ punpckldq %xmm7,%xmm6
+ punpckldq %xmm5,%xmm4
+ punpcklqdq %xmm4,%xmm6
+ paddd %xmm6,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm12,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0xc,%xmm1
+ pslld $0x14,%xmm8
+ por %xmm8,%xmm1
+ movzbl 0xc(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0xd(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0xe(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0xf(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ punpckldq %xmm4,%xmm7
+ punpckldq %xmm6,%xmm5
+ punpcklqdq %xmm5,%xmm7
+ paddd %xmm7,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm13,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0x7,%xmm1
+ pslld $0x19,%xmm8
+ por %xmm8,%xmm1
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x4e,%xmm3,%xmm3
+ pshufd $0x93,%xmm2,%xmm2
+ addq $0x10,%rcx
+ cmpq %r8,%rcx
+ jnz .Lroundloop
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm1
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm1
+ addq $0x40,%rsi
+ decq %rdx
+ jnz .Lbeginofloop
+ movdqu %xmm0,(%rdi)
+ movdqu %xmm1,0x10(%rdi)
+ movdqu %xmm14,0x20(%rdi)
+.Lendofloop:
+ ret
+ENDPROC(blake2s_compress_ssse3)
+#endif /* CONFIG_AS_SSSE3 */
+
+#ifdef CONFIG_AS_AVX512
+ENTRY(blake2s_compress_avx512)
+ vmovdqu (%rdi),%xmm0
+ vmovdqu 0x10(%rdi),%xmm1
+ vmovdqu 0x20(%rdi),%xmm4
+ vmovq %rcx,%xmm5
+ vmovdqa IV(%rip),%xmm14
+ vmovdqa IV+16(%rip),%xmm15
+ jmp .Lblake2s_compress_avx512_mainloop
+.align 32
+.Lblake2s_compress_avx512_mainloop:
+ vmovdqa %xmm0,%xmm10
+ vmovdqa %xmm1,%xmm11
+ vpaddq %xmm5,%xmm4,%xmm4
+ vmovdqa %xmm14,%xmm2
+ vpxor %xmm15,%xmm4,%xmm3
+ vmovdqu (%rsi),%ymm6
+ vmovdqu 0x20(%rsi),%ymm7
+ addq $0x40,%rsi
+ leaq SIGMA2(%rip),%rax
+ movb $0xa,%cl
+.Lblake2s_compress_avx512_roundloop:
+ addq $0x40,%rax
+ vmovdqa -0x40(%rax),%ymm8
+ vmovdqa -0x20(%rax),%ymm9
+ vpermi2d %ymm7,%ymm6,%ymm8
+ vpermi2d %ymm7,%ymm6,%ymm9
+ vmovdqa %ymm8,%ymm6
+ vmovdqa %ymm9,%ymm7
+ vpaddd %xmm8,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x10,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0xc,%xmm1,%xmm1
+ vextracti128 $0x1,%ymm8,%xmm8
+ vpaddd %xmm8,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x8,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0x7,%xmm1,%xmm1
+ vpshufd $0x93,%xmm0,%xmm0
+ vpshufd $0x4e,%xmm3,%xmm3
+ vpshufd $0x39,%xmm2,%xmm2
+ vpaddd %xmm9,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x10,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0xc,%xmm1,%xmm1
+ vextracti128 $0x1,%ymm9,%xmm9
+ vpaddd %xmm9,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x8,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0x7,%xmm1,%xmm1
+ vpshufd $0x39,%xmm0,%xmm0
+ vpshufd $0x4e,%xmm3,%xmm3
+ vpshufd $0x93,%xmm2,%xmm2
+ decb %cl
+ jne .Lblake2s_compress_avx512_roundloop
+ vpxor %xmm10,%xmm0,%xmm0
+ vpxor %xmm11,%xmm1,%xmm1
+ vpxor %xmm2,%xmm0,%xmm0
+ vpxor %xmm3,%xmm1,%xmm1
+ decq %rdx
+ jne .Lblake2s_compress_avx512_mainloop
+ vmovdqu %xmm0,(%rdi)
+ vmovdqu %xmm1,0x10(%rdi)
+ vmovdqu %xmm4,0x20(%rdi)
+ vzeroupper
+ retq
+ENDPROC(blake2s_compress_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
new file mode 100644
index 000000000000..4a37ba7cdbe5
--- /dev/null
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
+#include <asm/processor.h>
+#include <asm/simd.h>
+
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+void blake2s_compress_arch(struct blake2s_state *state,
+ const u8 *block, size_t nblocks,
+ const u32 inc)
+{
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+
+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
+ blake2s_compress_generic(state, block, nblocks, inc);
+ return;
+ }
+
+ for (;;) {
+ const size_t blocks = min_t(size_t, nblocks,
+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+
+ kernel_fpu_begin();
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ static_branch_likely(&blake2s_use_avx512))
+ blake2s_compress_avx512(state, block, blocks, inc);
+ else
+ blake2s_compress_ssse3(state, block, blocks, inc);
+ kernel_fpu_end();
+
+ nblocks -= blocks;
+ if (!nblocks)
+ break;
+ block += blocks * BLAKE2S_BLOCK_SIZE;
+ }
+}
+EXPORT_SYMBOL(blake2s_compress_arch);
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(tctx->key, key, keylen);
+ tctx->keylen = keylen;
+
+ return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const int outlen = crypto_shash_digestsize(desc->tfm);
+
+ if (tctx->keylen)
+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+ else
+ blake2s_init(state, outlen);
+
+ return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+ if (unlikely(!inlen))
+ return 0;
+ if (inlen > fill) {
+ memcpy(state->buf + state->buflen, in, fill);
+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+ state->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+ /* Hash one less (full) block than strictly possible */
+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ }
+ memcpy(state->buf + state->buflen, in, inlen);
+ state->buflen += inlen;
+
+ return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+
+ blake2s_set_lastblock(state);
+ memset(state->buf + state->buflen, 0,
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+ blake2s_compress_arch(state, state->buf, 1, state->buflen);
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+ memcpy(out, state->h, state->outlen);
+ memzero_explicit(state, sizeof(*state));
+
+ return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+ .base.cra_name = "blake2s-128",
+ .base.cra_driver_name = "blake2s-128-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_128_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-160",
+ .base.cra_driver_name = "blake2s-160-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_160_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-224",
+ .base.cra_driver_name = "blake2s-224-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_224_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-256",
+ .base.cra_driver_name = "blake2s-256-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_256_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
+ return 0;
+
+ static_branch_enable(&blake2s_use_ssse3);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+ XFEATURE_MASK_AVX512, NULL))
+ static_branch_enable(&blake2s_use_avx512);
+
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 81c8a4059afc..8fd3954bf64c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S
See https://blake2.net for further information.
+config CRYPTO_BLAKE2S_X86
+ tristate "BLAKE2s digest algorithm (x86 accelerated version)"
+ depends on X86 && 64BIT
+ select CRYPTO_LIB_BLAKE2S_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+
config CRYPTO_CRCT10DIF
tristate "CRCT10DIF algorithm"
select CRYPTO_HASH
--
2.18.2
From 854d7c4e760cb5841345ca2b90a451dedf784deb Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:32 +0100
Subject: [PATCH 025/100] crypto: curve25519 - generic C library
implementations
commit 0ed42a6f431e930b2e8fae21955406e09fe75d70 upstream.
This contains two formally verified C implementations of the Curve25519
scalar multiplication function, one for 32-bit systems, and one for
64-bit systems whose compiler supports efficient 128-bit integer types.
Not only are these implementations formally verified, but they are also
the fastest available C implementations. They have been modified to be
friendly to kernel space and to be generally less horrendous looking,
but still an effort has been made to retain their formally verified
characteristic, and so the C might look slightly unidiomatic.
The 64-bit version comes from HACL*: https://github.com/project-everest/hacl-star
The 32-bit version comes from Fiat: https://github.com/mit-plv/fiat-crypto
Information: https://cr.yp.to/ecdh.html
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
[ardb: - move from lib/zinc to lib/crypto
- replace .c #includes with Kconfig based object selection
- drop simd handling and simplify support for per-arch versions ]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/crypto/curve25519.h | 71 +++
lib/crypto/Kconfig | 25 +
lib/crypto/Makefile | 5 +
lib/crypto/curve25519-fiat32.c | 864 +++++++++++++++++++++++++++++++++
lib/crypto/curve25519-hacl64.c | 788 ++++++++++++++++++++++++++++++
lib/crypto/curve25519.c | 25 +
6 files changed, 1778 insertions(+)
create mode 100644 include/crypto/curve25519.h
create mode 100644 lib/crypto/curve25519-fiat32.c
create mode 100644 lib/crypto/curve25519-hacl64.c
create mode 100644 lib/crypto/curve25519.c
diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
new file mode 100644
index 000000000000..4e6dc840b159
--- /dev/null
+++ b/include/crypto/curve25519.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef CURVE25519_H
+#define CURVE25519_H
+
+#include <crypto/algapi.h> // For crypto_memneq.
+#include <linux/types.h>
+#include <linux/random.h>
+
+enum curve25519_lengths {
+ CURVE25519_KEY_SIZE = 32
+};
+
+extern const u8 curve25519_null_point[];
+extern const u8 curve25519_base_point[];
+
+void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
+ const u8 scalar[CURVE25519_KEY_SIZE],
+ const u8 point[CURVE25519_KEY_SIZE]);
+
+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
+ const u8 scalar[CURVE25519_KEY_SIZE],
+ const u8 point[CURVE25519_KEY_SIZE]);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE]);
+
+static inline
+bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ curve25519_arch(mypublic, secret, basepoint);
+ else
+ curve25519_generic(mypublic, secret, basepoint);
+ return crypto_memneq(mypublic, curve25519_null_point,
+ CURVE25519_KEY_SIZE);
+}
+
+static inline bool
+__must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE])
+{
+ if (unlikely(!crypto_memneq(secret, curve25519_null_point,
+ CURVE25519_KEY_SIZE)))
+ return false;
+
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ curve25519_base_arch(pub, secret);
+ else
+ curve25519_generic(pub, secret, curve25519_base_point);
+ return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE);
+}
+
+static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
+{
+ secret[0] &= 248;
+ secret[31] = (secret[31] & 127) | 64;
+}
+
+static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
+{
+ get_random_bytes_wait(secret, CURVE25519_KEY_SIZE);
+ curve25519_clamp_secret(secret);
+}
+
+#endif /* CURVE25519_H */
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 7ad98b624e55..b1d830dc1c9e 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -59,6 +59,31 @@ config CRYPTO_LIB_CHACHA
by either the generic implementation or an arch-specific one, if one
is available and enabled.
+config CRYPTO_ARCH_HAVE_LIB_CURVE25519
+ tristate
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the Curve25519 library interface,
+ either builtin or as a module.
+
+config CRYPTO_LIB_CURVE25519_GENERIC
+ tristate
+ help
+ This symbol can be depended upon by arch implementations of the
+ Curve25519 library interface that require the generic code as a
+ fallback, e.g., for SIMD implementations. If no arch specific
+ implementation is enabled, this implementation serves the users
+ of CRYPTO_LIB_CURVE25519.
+
+config CRYPTO_LIB_CURVE25519
+ tristate "Curve25519 scalar multiplication library"
+ depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
+ select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
+ help
+ Enable the Curve25519 library interface. This interface may be
+ fulfilled by either the generic implementation or an arch-specific
+ one, if one is available and enabled.
+
config CRYPTO_LIB_DES
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 8ca66b5f9807..273c55d5e147 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -16,6 +16,11 @@ libblake2s-generic-y += blake2s-generic.o
obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
libblake2s-y += blake2s.o
+obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
+libcurve25519-y := curve25519-fiat32.o
+libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
+libcurve25519-y += curve25519.o
+
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
libdes-y := des.o
diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c
new file mode 100644
index 000000000000..1c455207341d
--- /dev/null
+++ b/lib/crypto/curve25519-fiat32.c
@@ -0,0 +1,864 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2016 The fiat-crypto Authors.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is a machine-generated formally verified implementation of Curve25519
+ * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
+ * machine generated, it has been tweaked to be suitable for use in the kernel.
+ * It is optimized for 32-bit machines and machines that cannot work efficiently
+ * with 128-bit integer types.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/curve25519.h>
+#include <linux/string.h>
+
+/* fe means field element. Here the field is \Z/(2^255-19). An element t,
+ * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
+ * t[3]+2^102 t[4]+...+2^230 t[9].
+ * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
+ * Multiplication and carrying produce fe from fe_loose.
+ */
+typedef struct fe { u32 v[10]; } fe;
+
+/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
+ * Addition and subtraction produce fe_loose from (fe, fe).
+ */
+typedef struct fe_loose { u32 v[10]; } fe_loose;
+
+static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
+{
+ /* Ignores top bit of s. */
+ u32 a0 = get_unaligned_le32(s);
+ u32 a1 = get_unaligned_le32(s+4);
+ u32 a2 = get_unaligned_le32(s+8);
+ u32 a3 = get_unaligned_le32(s+12);
+ u32 a4 = get_unaligned_le32(s+16);
+ u32 a5 = get_unaligned_le32(s+20);
+ u32 a6 = get_unaligned_le32(s+24);
+ u32 a7 = get_unaligned_le32(s+28);
+ h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
+ h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
+ h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
+ h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
+ h[4] = (a3>> 6); /* (32- 6) = 26 */
+ h[5] = a4&((1<<25)-1); /* 25 */
+ h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
+ h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
+ h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
+ h[9] = (a7>> 6)&((1<<25)-1); /* 25 */
+}
+
+static __always_inline void fe_frombytes(fe *h, const u8 *s)
+{
+ fe_frombytes_impl(h->v, s);
+}
+
+static __always_inline u8 /*bool*/
+addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
+{
+ /* This function extracts 25 bits of result and 1 bit of carry
+ * (26 total), so a 32-bit intermediate is sufficient.
+ */
+ u32 x = a + b + c;
+ *low = x & ((1 << 25) - 1);
+ return (x >> 25) & 1;
+}
+
+static __always_inline u8 /*bool*/
+addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
+{
+ /* This function extracts 26 bits of result and 1 bit of carry
+ * (27 total), so a 32-bit intermediate is sufficient.
+ */
+ u32 x = a + b + c;
+ *low = x & ((1 << 26) - 1);
+ return (x >> 26) & 1;
+}
+
+static __always_inline u8 /*bool*/
+subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
+{
+ /* This function extracts 25 bits of result and 1 bit of borrow
+ * (26 total), so a 32-bit intermediate is sufficient.
+ */
+ u32 x = a - b - c;
+ *low = x & ((1 << 25) - 1);
+ return x >> 31;
+}
+
+static __always_inline u8 /*bool*/
+subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
+{
+ /* This function extracts 26 bits of result and 1 bit of borrow
+ *(27 total), so a 32-bit intermediate is sufficient.
+ */
+ u32 x = a - b - c;
+ *low = x & ((1 << 26) - 1);
+ return x >> 31;
+}
+
+static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
+{
+ t = -!!t; /* all set if nonzero, 0 if 0 */
+ return (t&nz) | ((~t)&z);
+}
+
+static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
+{
+ { const u32 x17 = in1[9];
+ { const u32 x18 = in1[8];
+ { const u32 x16 = in1[7];
+ { const u32 x14 = in1[6];
+ { const u32 x12 = in1[5];
+ { const u32 x10 = in1[4];
+ { const u32 x8 = in1[3];
+ { const u32 x6 = in1[2];
+ { const u32 x4 = in1[1];
+ { const u32 x2 = in1[0];
+ { u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
+ { u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
+ { u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
+ { u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
+ { u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
+ { u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
+ { u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
+ { u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
+ { u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
+ { u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
+ { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
+ { u32 x50 = (x49 & 0x3ffffed);
+ { u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
+ { u32 x54 = (x49 & 0x1ffffff);
+ { u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
+ { u32 x58 = (x49 & 0x3ffffff);
+ { u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
+ { u32 x62 = (x49 & 0x1ffffff);
+ { u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
+ { u32 x66 = (x49 & 0x3ffffff);
+ { u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
+ { u32 x70 = (x49 & 0x1ffffff);
+ { u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
+ { u32 x74 = (x49 & 0x3ffffff);
+ { u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
+ { u32 x78 = (x49 & 0x1ffffff);
+ { u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
+ { u32 x82 = (x49 & 0x3ffffff);
+ { u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
+ { u32 x86 = (x49 & 0x1ffffff);
+ { u32 x88; addcarryx_u25(x85, x47, x86, &x88);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
+
+static __always_inline void fe_tobytes(u8 s[32], const fe *f)
+{
+ u32 h[10];
+ fe_freeze(h, f->v);
+ s[0] = h[0] >> 0;
+ s[1] = h[0] >> 8;
+ s[2] = h[0] >> 16;
+ s[3] = (h[0] >> 24) | (h[1] << 2);
+ s[4] = h[1] >> 6;
+ s[5] = h[1] >> 14;
+ s[6] = (h[1] >> 22) | (h[2] << 3);
+ s[7] = h[2] >> 5;
+ s[8] = h[2] >> 13;
+ s[9] = (h[2] >> 21) | (h[3] << 5);
+ s[10] = h[3] >> 3;
+ s[11] = h[3] >> 11;
+ s[12] = (h[3] >> 19) | (h[4] << 6);
+ s[13] = h[4] >> 2;
+ s[14] = h[4] >> 10;
+ s[15] = h[4] >> 18;
+ s[16] = h[5] >> 0;
+ s[17] = h[5] >> 8;
+ s[18] = h[5] >> 16;
+ s[19] = (h[5] >> 24) | (h[6] << 1);
+ s[20] = h[6] >> 7;
+ s[21] = h[6] >> 15;
+ s[22] = (h[6] >> 23) | (h[7] << 3);
+ s[23] = h[7] >> 5;
+ s[24] = h[7] >> 13;
+ s[25] = (h[7] >> 21) | (h[8] << 4);
+ s[26] = h[8] >> 4;
+ s[27] = h[8] >> 12;
+ s[28] = (h[8] >> 20) | (h[9] << 6);
+ s[29] = h[9] >> 2;
+ s[30] = h[9] >> 10;
+ s[31] = h[9] >> 18;
+}
+
+/* h = f */
+static __always_inline void fe_copy(fe *h, const fe *f)
+{
+ memmove(h, f, sizeof(u32) * 10);
+}
+
+static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
+{
+ memmove(h, f, sizeof(u32) * 10);
+}
+
+/* h = 0 */
+static __always_inline void fe_0(fe *h)
+{
+ memset(h, 0, sizeof(u32) * 10);
+}
+
+/* h = 1 */
+static __always_inline void fe_1(fe *h)
+{
+ memset(h, 0, sizeof(u32) * 10);
+ h->v[0] = 1;
+}
+
+static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+{
+ { const u32 x20 = in1[9];
+ { const u32 x21 = in1[8];
+ { const u32 x19 = in1[7];
+ { const u32 x17 = in1[6];
+ { const u32 x15 = in1[5];
+ { const u32 x13 = in1[4];
+ { const u32 x11 = in1[3];
+ { const u32 x9 = in1[2];
+ { const u32 x7 = in1[1];
+ { const u32 x5 = in1[0];
+ { const u32 x38 = in2[9];
+ { const u32 x39 = in2[8];
+ { const u32 x37 = in2[7];
+ { const u32 x35 = in2[6];
+ { const u32 x33 = in2[5];
+ { const u32 x31 = in2[4];
+ { const u32 x29 = in2[3];
+ { const u32 x27 = in2[2];
+ { const u32 x25 = in2[1];
+ { const u32 x23 = in2[0];
+ out[0] = (x5 + x23);
+ out[1] = (x7 + x25);
+ out[2] = (x9 + x27);
+ out[3] = (x11 + x29);
+ out[4] = (x13 + x31);
+ out[5] = (x15 + x33);
+ out[6] = (x17 + x35);
+ out[7] = (x19 + x37);
+ out[8] = (x21 + x39);
+ out[9] = (x20 + x38);
+ }}}}}}}}}}}}}}}}}}}}
+}
+
+/* h = f + g
+ * Can overlap h with f or g.
+ */
+static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
+{
+ fe_add_impl(h->v, f->v, g->v);
+}
+
+static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+{
+ { const u32 x20 = in1[9];
+ { const u32 x21 = in1[8];
+ { const u32 x19 = in1[7];
+ { const u32 x17 = in1[6];
+ { const u32 x15 = in1[5];
+ { const u32 x13 = in1[4];
+ { const u32 x11 = in1[3];
+ { const u32 x9 = in1[2];
+ { const u32 x7 = in1[1];
+ { const u32 x5 = in1[0];
+ { const u32 x38 = in2[9];
+ { const u32 x39 = in2[8];
+ { const u32 x37 = in2[7];
+ { const u32 x35 = in2[6];
+ { const u32 x33 = in2[5];
+ { const u32 x31 = in2[4];
+ { const u32 x29 = in2[3];
+ { const u32 x27 = in2[2];
+ { const u32 x25 = in2[1];
+ { const u32 x23 = in2[0];
+ out[0] = ((0x7ffffda + x5) - x23);
+ out[1] = ((0x3fffffe + x7) - x25);
+ out[2] = ((0x7fffffe + x9) - x27);
+ out[3] = ((0x3fffffe + x11) - x29);
+ out[4] = ((0x7fffffe + x13) - x31);
+ out[5] = ((0x3fffffe + x15) - x33);
+ out[6] = ((0x7fffffe + x17) - x35);
+ out[7] = ((0x3fffffe + x19) - x37);
+ out[8] = ((0x7fffffe + x21) - x39);
+ out[9] = ((0x3fffffe + x20) - x38);
+ }}}}}}}}}}}}}}}}}}}}
+}
+
+/* h = f - g
+ * Can overlap h with f or g.
+ */
+static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
+{
+ fe_sub_impl(h->v, f->v, g->v);
+}
+
+static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+{
+ { const u32 x20 = in1[9];
+ { const u32 x21 = in1[8];
+ { const u32 x19 = in1[7];
+ { const u32 x17 = in1[6];
+ { const u32 x15 = in1[5];
+ { const u32 x13 = in1[4];
+ { const u32 x11 = in1[3];
+ { const u32 x9 = in1[2];
+ { const u32 x7 = in1[1];
+ { const u32 x5 = in1[0];
+ { const u32 x38 = in2[9];
+ { const u32 x39 = in2[8];
+ { const u32 x37 = in2[7];
+ { const u32 x35 = in2[6];
+ { const u32 x33 = in2[5];
+ { const u32 x31 = in2[4];
+ { const u32 x29 = in2[3];
+ { const u32 x27 = in2[2];
+ { const u32 x25 = in2[1];
+ { const u32 x23 = in2[0];
+ { u64 x40 = ((u64)x23 * x5);
+ { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
+ { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
+ { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
+ { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
+ { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
+ { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
+ { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
+ { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
+ { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
+ { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
+ { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
+ { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
+ { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
+ { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
+ { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
+ { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
+ { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
+ { u64 x58 = ((u64)(0x2 * x38) * x20);
+ { u64 x59 = (x48 + (x58 << 0x4));
+ { u64 x60 = (x59 + (x58 << 0x1));
+ { u64 x61 = (x60 + x58);
+ { u64 x62 = (x47 + (x57 << 0x4));
+ { u64 x63 = (x62 + (x57 << 0x1));
+ { u64 x64 = (x63 + x57);
+ { u64 x65 = (x46 + (x56 << 0x4));
+ { u64 x66 = (x65 + (x56 << 0x1));
+ { u64 x67 = (x66 + x56);
+ { u64 x68 = (x45 + (x55 << 0x4));
+ { u64 x69 = (x68 + (x55 << 0x1));
+ { u64 x70 = (x69 + x55);
+ { u64 x71 = (x44 + (x54 << 0x4));
+ { u64 x72 = (x71 + (x54 << 0x1));
+ { u64 x73 = (x72 + x54);
+ { u64 x74 = (x43 + (x53 << 0x4));
+ { u64 x75 = (x74 + (x53 << 0x1));
+ { u64 x76 = (x75 + x53);
+ { u64 x77 = (x42 + (x52 << 0x4));
+ { u64 x78 = (x77 + (x52 << 0x1));
+ { u64 x79 = (x78 + x52);
+ { u64 x80 = (x41 + (x51 << 0x4));
+ { u64 x81 = (x80 + (x51 << 0x1));
+ { u64 x82 = (x81 + x51);
+ { u64 x83 = (x40 + (x50 << 0x4));
+ { u64 x84 = (x83 + (x50 << 0x1));
+ { u64 x85 = (x84 + x50);
+ { u64 x86 = (x85 >> 0x1a);
+ { u32 x87 = ((u32)x85 & 0x3ffffff);
+ { u64 x88 = (x86 + x82);
+ { u64 x89 = (x88 >> 0x19);
+ { u32 x90 = ((u32)x88 & 0x1ffffff);
+ { u64 x91 = (x89 + x79);
+ { u64 x92 = (x91 >> 0x1a);
+ { u32 x93 = ((u32)x91 & 0x3ffffff);
+ { u64 x94 = (x92 + x76);
+ { u64 x95 = (x94 >> 0x19);
+ { u32 x96 = ((u32)x94 & 0x1ffffff);
+ { u64 x97 = (x95 + x73);
+ { u64 x98 = (x97 >> 0x1a);
+ { u32 x99 = ((u32)x97 & 0x3ffffff);
+ { u64 x100 = (x98 + x70);
+ { u64 x101 = (x100 >> 0x19);
+ { u32 x102 = ((u32)x100 & 0x1ffffff);
+ { u64 x103 = (x101 + x67);
+ { u64 x104 = (x103 >> 0x1a);
+ { u32 x105 = ((u32)x103 & 0x3ffffff);
+ { u64 x106 = (x104 + x64);
+ { u64 x107 = (x106 >> 0x19);
+ { u32 x108 = ((u32)x106 & 0x1ffffff);
+ { u64 x109 = (x107 + x61);
+ { u64 x110 = (x109 >> 0x1a);
+ { u32 x111 = ((u32)x109 & 0x3ffffff);
+ { u64 x112 = (x110 + x49);
+ { u64 x113 = (x112 >> 0x19);
+ { u32 x114 = ((u32)x112 & 0x1ffffff);
+ { u64 x115 = (x87 + (0x13 * x113));
+ { u32 x116 = (u32) (x115 >> 0x1a);
+ { u32 x117 = ((u32)x115 & 0x3ffffff);
+ { u32 x118 = (x116 + x90);
+ { u32 x119 = (x118 >> 0x19);
+ { u32 x120 = (x118 & 0x1ffffff);
+ out[0] = x117;
+ out[1] = x120;
+ out[2] = (x119 + x93);
+ out[3] = x96;
+ out[4] = x99;
+ out[5] = x102;
+ out[6] = x105;
+ out[7] = x108;
+ out[8] = x111;
+ out[9] = x114;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
+
+static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
+{
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
+{
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static __always_inline void
+fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
+{
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static void fe_sqr_impl(u32 out[10], const u32 in1[10])
+{
+ { const u32 x17 = in1[9];
+ { const u32 x18 = in1[8];
+ { const u32 x16 = in1[7];
+ { const u32 x14 = in1[6];
+ { const u32 x12 = in1[5];
+ { const u32 x10 = in1[4];
+ { const u32 x8 = in1[3];
+ { const u32 x6 = in1[2];
+ { const u32 x4 = in1[1];
+ { const u32 x2 = in1[0];
+ { u64 x19 = ((u64)x2 * x2);
+ { u64 x20 = ((u64)(0x2 * x2) * x4);
+ { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
+ { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
+ { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
+ { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
+ { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
+ { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
+ { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
+ { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
+ { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
+ { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
+ { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
+ { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
+ { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
+ { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
+ { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
+ { u64 x36 = ((u64)(0x2 * x18) * x17);
+ { u64 x37 = ((u64)(0x2 * x17) * x17);
+ { u64 x38 = (x27 + (x37 << 0x4));
+ { u64 x39 = (x38 + (x37 << 0x1));
+ { u64 x40 = (x39 + x37);
+ { u64 x41 = (x26 + (x36 << 0x4));
+ { u64 x42 = (x41 + (x36 << 0x1));
+ { u64 x43 = (x42 + x36);
+ { u64 x44 = (x25 + (x35 << 0x4));
+ { u64 x45 = (x44 + (x35 << 0x1));
+ { u64 x46 = (x45 + x35);
+ { u64 x47 = (x24 + (x34 << 0x4));
+ { u64 x48 = (x47 + (x34 << 0x1));
+ { u64 x49 = (x48 + x34);
+ { u64 x50 = (x23 + (x33 << 0x4));
+ { u64 x51 = (x50 + (x33 << 0x1));
+ { u64 x52 = (x51 + x33);
+ { u64 x53 = (x22 + (x32 << 0x4));
+ { u64 x54 = (x53 + (x32 << 0x1));
+ { u64 x55 = (x54 + x32);
+ { u64 x56 = (x21 + (x31 << 0x4));
+ { u64 x57 = (x56 + (x31 << 0x1));
+ { u64 x58 = (x57 + x31);
+ { u64 x59 = (x20 + (x30 << 0x4));
+ { u64 x60 = (x59 + (x30 << 0x1));
+ { u64 x61 = (x60 + x30);
+ { u64 x62 = (x19 + (x29 << 0x4));
+ { u64 x63 = (x62 + (x29 << 0x1));
+ { u64 x64 = (x63 + x29);
+ { u64 x65 = (x64 >> 0x1a);
+ { u32 x66 = ((u32)x64 & 0x3ffffff);
+ { u64 x67 = (x65 + x61);
+ { u64 x68 = (x67 >> 0x19);
+ { u32 x69 = ((u32)x67 & 0x1ffffff);
+ { u64 x70 = (x68 + x58);
+ { u64 x71 = (x70 >> 0x1a);
+ { u32 x72 = ((u32)x70 & 0x3ffffff);
+ { u64 x73 = (x71 + x55);
+ { u64 x74 = (x73 >> 0x19);
+ { u32 x75 = ((u32)x73 & 0x1ffffff);
+ { u64 x76 = (x74 + x52);
+ { u64 x77 = (x76 >> 0x1a);
+ { u32 x78 = ((u32)x76 & 0x3ffffff);
+ { u64 x79 = (x77 + x49);
+ { u64 x80 = (x79 >> 0x19);
+ { u32 x81 = ((u32)x79 & 0x1ffffff);
+ { u64 x82 = (x80 + x46);
+ { u64 x83 = (x82 >> 0x1a);
+ { u32 x84 = ((u32)x82 & 0x3ffffff);
+ { u64 x85 = (x83 + x43);
+ { u64 x86 = (x85 >> 0x19);
+ { u32 x87 = ((u32)x85 & 0x1ffffff);
+ { u64 x88 = (x86 + x40);
+ { u64 x89 = (x88 >> 0x1a);
+ { u32 x90 = ((u32)x88 & 0x3ffffff);
+ { u64 x91 = (x89 + x28);
+ { u64 x92 = (x91 >> 0x19);
+ { u32 x93 = ((u32)x91 & 0x1ffffff);
+ { u64 x94 = (x66 + (0x13 * x92));
+ { u32 x95 = (u32) (x94 >> 0x1a);
+ { u32 x96 = ((u32)x94 & 0x3ffffff);
+ { u32 x97 = (x95 + x69);
+ { u32 x98 = (x97 >> 0x19);
+ { u32 x99 = (x97 & 0x1ffffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x72);
+ out[3] = x75;
+ out[4] = x78;
+ out[5] = x81;
+ out[6] = x84;
+ out[7] = x87;
+ out[8] = x90;
+ out[9] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
+
+static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
+{
+ fe_sqr_impl(h->v, f->v);
+}
+
+static __always_inline void fe_sq_tt(fe *h, const fe *f)
+{
+ fe_sqr_impl(h->v, f->v);
+}
+
+static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
+{
+ fe t0;
+ fe t1;
+ fe t2;
+ fe t3;
+ int i;
+
+ fe_sq_tl(&t0, z);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 2; ++i)
+ fe_sq_tt(&t1, &t1);
+ fe_mul_tlt(&t1, z, &t1);
+ fe_mul_ttt(&t0, &t0, &t1);
+ fe_sq_tt(&t2, &t0);
+ fe_mul_ttt(&t1, &t1, &t2);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 5; ++i)
+ fe_sq_tt(&t2, &t2);
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 10; ++i)
+ fe_sq_tt(&t2, &t2);
+ fe_mul_ttt(&t2, &t2, &t1);
+ fe_sq_tt(&t3, &t2);
+ for (i = 1; i < 20; ++i)
+ fe_sq_tt(&t3, &t3);
+ fe_mul_ttt(&t2, &t3, &t2);
+ fe_sq_tt(&t2, &t2);
+ for (i = 1; i < 10; ++i)
+ fe_sq_tt(&t2, &t2);
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 50; ++i)
+ fe_sq_tt(&t2, &t2);
+ fe_mul_ttt(&t2, &t2, &t1);
+ fe_sq_tt(&t3, &t2);
+ for (i = 1; i < 100; ++i)
+ fe_sq_tt(&t3, &t3);
+ fe_mul_ttt(&t2, &t3, &t2);
+ fe_sq_tt(&t2, &t2);
+ for (i = 1; i < 50; ++i)
+ fe_sq_tt(&t2, &t2);
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t1, &t1);
+ for (i = 1; i < 5; ++i)
+ fe_sq_tt(&t1, &t1);
+ fe_mul_ttt(out, &t1, &t0);
+}
+
+static __always_inline void fe_invert(fe *out, const fe *z)
+{
+ fe_loose l;
+ fe_copy_lt(&l, z);
+ fe_loose_invert(out, &l);
+}
+
+/* Replace (f,g) with (g,f) if b == 1;
+ * replace (f,g) with (f,g) if b == 0.
+ *
+ * Preconditions: b in {0,1}
+ */
+static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
+{
+ unsigned i;
+ b = 0 - b;
+ for (i = 0; i < 10; i++) {
+ u32 x = f->v[i] ^ g->v[i];
+ x &= b;
+ f->v[i] ^= x;
+ g->v[i] ^= x;
+ }
+}
+
+/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
+static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
+{
+ { const u32 x20 = in1[9];
+ { const u32 x21 = in1[8];
+ { const u32 x19 = in1[7];
+ { const u32 x17 = in1[6];
+ { const u32 x15 = in1[5];
+ { const u32 x13 = in1[4];
+ { const u32 x11 = in1[3];
+ { const u32 x9 = in1[2];
+ { const u32 x7 = in1[1];
+ { const u32 x5 = in1[0];
+ { const u32 x38 = 0;
+ { const u32 x39 = 0;
+ { const u32 x37 = 0;
+ { const u32 x35 = 0;
+ { const u32 x33 = 0;
+ { const u32 x31 = 0;
+ { const u32 x29 = 0;
+ { const u32 x27 = 0;
+ { const u32 x25 = 0;
+ { const u32 x23 = 121666;
+ { u64 x40 = ((u64)x23 * x5);
+ { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
+ { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
+ { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
+ { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
+ { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
+ { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
+ { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
+ { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
+ { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
+ { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
+ { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
+ { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
+ { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
+ { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
+ { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
+ { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
+ { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
+ { u64 x58 = ((u64)(0x2 * x38) * x20);
+ { u64 x59 = (x48 + (x58 << 0x4));
+ { u64 x60 = (x59 + (x58 << 0x1));
+ { u64 x61 = (x60 + x58);
+ { u64 x62 = (x47 + (x57 << 0x4));
+ { u64 x63 = (x62 + (x57 << 0x1));
+ { u64 x64 = (x63 + x57);
+ { u64 x65 = (x46 + (x56 << 0x4));
+ { u64 x66 = (x65 + (x56 << 0x1));
+ { u64 x67 = (x66 + x56);
+ { u64 x68 = (x45 + (x55 << 0x4));
+ { u64 x69 = (x68 + (x55 << 0x1));
+ { u64 x70 = (x69 + x55);
+ { u64 x71 = (x44 + (x54 << 0x4));
+ { u64 x72 = (x71 + (x54 << 0x1));
+ { u64 x73 = (x72 + x54);
+ { u64 x74 = (x43 + (x53 << 0x4));
+ { u64 x75 = (x74 + (x53 << 0x1));
+ { u64 x76 = (x75 + x53);
+ { u64 x77 = (x42 + (x52 << 0x4));
+ { u64 x78 = (x77 + (x52 << 0x1));
+ { u64 x79 = (x78 + x52);
+ { u64 x80 = (x41 + (x51 << 0x4));
+ { u64 x81 = (x80 + (x51 << 0x1));
+ { u64 x82 = (x81 + x51);
+ { u64 x83 = (x40 + (x50 << 0x4));
+ { u64 x84 = (x83 + (x50 << 0x1));
+ { u64 x85 = (x84 + x50);
+ { u64 x86 = (x85 >> 0x1a);
+ { u32 x87 = ((u32)x85 & 0x3ffffff);
+ { u64 x88 = (x86 + x82);
+ { u64 x89 = (x88 >> 0x19);
+ { u32 x90 = ((u32)x88 & 0x1ffffff);
+ { u64 x91 = (x89 + x79);
+ { u64 x92 = (x91 >> 0x1a);
+ { u32 x93 = ((u32)x91 & 0x3ffffff);
+ { u64 x94 = (x92 + x76);
+ { u64 x95 = (x94 >> 0x19);
+ { u32 x96 = ((u32)x94 & 0x1ffffff);
+ { u64 x97 = (x95 + x73);
+ { u64 x98 = (x97 >> 0x1a);
+ { u32 x99 = ((u32)x97 & 0x3ffffff);
+ { u64 x100 = (x98 + x70);
+ { u64 x101 = (x100 >> 0x19);
+ { u32 x102 = ((u32)x100 & 0x1ffffff);
+ { u64 x103 = (x101 + x67);
+ { u64 x104 = (x103 >> 0x1a);
+ { u32 x105 = ((u32)x103 & 0x3ffffff);
+ { u64 x106 = (x104 + x64);
+ { u64 x107 = (x106 >> 0x19);
+ { u32 x108 = ((u32)x106 & 0x1ffffff);
+ { u64 x109 = (x107 + x61);
+ { u64 x110 = (x109 >> 0x1a);
+ { u32 x111 = ((u32)x109 & 0x3ffffff);
+ { u64 x112 = (x110 + x49);
+ { u64 x113 = (x112 >> 0x19);
+ { u32 x114 = ((u32)x112 & 0x1ffffff);
+ { u64 x115 = (x87 + (0x13 * x113));
+ { u32 x116 = (u32) (x115 >> 0x1a);
+ { u32 x117 = ((u32)x115 & 0x3ffffff);
+ { u32 x118 = (x116 + x90);
+ { u32 x119 = (x118 >> 0x19);
+ { u32 x120 = (x118 & 0x1ffffff);
+ out[0] = x117;
+ out[1] = x120;
+ out[2] = (x119 + x93);
+ out[3] = x96;
+ out[4] = x99;
+ out[5] = x102;
+ out[6] = x105;
+ out[7] = x108;
+ out[8] = x111;
+ out[9] = x114;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
+
+static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
+{
+ fe_mul_121666_impl(h->v, f->v);
+}
+
+void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
+ const u8 scalar[CURVE25519_KEY_SIZE],
+ const u8 point[CURVE25519_KEY_SIZE])
+{
+ fe x1, x2, z2, x3, z3;
+ fe_loose x2l, z2l, x3l;
+ unsigned swap = 0;
+ int pos;
+ u8 e[32];
+
+ memcpy(e, scalar, 32);
+ curve25519_clamp_secret(e);
+
+ /* The following implementation was transcribed to Coq and proven to
+ * correspond to unary scalar multiplication in affine coordinates given
+ * that x1 != 0 is the x coordinate of some point on the curve. It was
+ * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
+ * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
+ * quantified over the underlying field, so it applies to Curve25519
+ * itself and the quadratic twist of Curve25519. It was not proven in
+ * Coq that prime-field arithmetic correctly simulates extension-field
+ * arithmetic on prime-field values. The decoding of the byte array
+ * representation of e was not considered.
+ *
+ * Specification of Montgomery curves in affine coordinates:
+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
+ *
+ * Proof that these form a group that is isomorphic to a Weierstrass
+ * curve:
+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
+ *
+ * Coq transcription and correctness proof of the loop
+ * (where scalarbits=255):
+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
+ * preconditions: 0 <= e < 2^255 (not necessarily e < order),
+ * fe_invert(0) = 0
+ */
+ fe_frombytes(&x1, point);
+ fe_1(&x2);
+ fe_0(&z2);
+ fe_copy(&x3, &x1);
+ fe_1(&z3);
+
+ for (pos = 254; pos >= 0; --pos) {
+ fe tmp0, tmp1;
+ fe_loose tmp0l, tmp1l;
+ /* loop invariant as of right before the test, for the case
+ * where x1 != 0:
+ * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
+ * is nonzero
+ * let r := e >> (pos+1) in the following equalities of
+ * projective points:
+ * to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
+ * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
+ * x1 is the nonzero x coordinate of the nonzero
+ * point (r*P-(r+1)*P)
+ */
+ unsigned b = 1 & (e[pos / 8] >> (pos & 7));
+ swap ^= b;
+ fe_cswap(&x2, &x3, swap);
+ fe_cswap(&z2, &z3, swap);
+ swap = b;
+ /* Coq transcription of ladderstep formula (called from
+ * transcribed loop):
+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
+ * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
+ * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
+ */
+ fe_sub(&tmp0l, &x3, &z3);
+ fe_sub(&tmp1l, &x2, &z2);
+ fe_add(&x2l, &x2, &z2);
+ fe_add(&z2l, &x3, &z3);
+ fe_mul_tll(&z3, &tmp0l, &x2l);
+ fe_mul_tll(&z2, &z2l, &tmp1l);
+ fe_sq_tl(&tmp0, &tmp1l);
+ fe_sq_tl(&tmp1, &x2l);
+ fe_add(&x3l, &z3, &z2);
+ fe_sub(&z2l, &z3, &z2);
+ fe_mul_ttt(&x2, &tmp1, &tmp0);
+ fe_sub(&tmp1l, &tmp1, &tmp0);
+ fe_sq_tl(&z2, &z2l);
+ fe_mul121666(&z3, &tmp1l);
+ fe_sq_tl(&x3, &x3l);
+ fe_add(&tmp0l, &tmp0, &z3);
+ fe_mul_ttt(&z3, &x1, &z2);
+ fe_mul_tll(&z2, &tmp1l, &tmp0l);
+ }
+ /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
+ * else (x2, z2)
+ */
+ fe_cswap(&x2, &x3, swap);
+ fe_cswap(&z2, &z3, swap);
+
+ fe_invert(&z2, &z2);
+ fe_mul_ttt(&x2, &x2, &z2);
+ fe_tobytes(out, &x2);
+
+ memzero_explicit(&x1, sizeof(x1));
+ memzero_explicit(&x2, sizeof(x2));
+ memzero_explicit(&z2, sizeof(z2));
+ memzero_explicit(&x3, sizeof(x3));
+ memzero_explicit(&z3, sizeof(z3));
+ memzero_explicit(&x2l, sizeof(x2l));
+ memzero_explicit(&z2l, sizeof(z2l));
+ memzero_explicit(&x3l, sizeof(x3l));
+ memzero_explicit(&e, sizeof(e));
+}
diff --git a/lib/crypto/curve25519-hacl64.c b/lib/crypto/curve25519-hacl64.c
new file mode 100644
index 000000000000..771d82dc5f14
--- /dev/null
+++ b/lib/crypto/curve25519-hacl64.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2016-2017 INRIA and Microsoft Corporation.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is a machine-generated formally verified implementation of Curve25519
+ * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine
+ * generated, it has been tweaked to be suitable for use in the kernel. It is
+ * optimized for 64-bit machines that can efficiently work with 128-bit
+ * integer types.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/curve25519.h>
+#include <linux/string.h>
+
+typedef __uint128_t u128;
+
+static __always_inline u64 u64_eq_mask(u64 a, u64 b)
+{
+ u64 x = a ^ b;
+ u64 minus_x = ~x + (u64)1U;
+ u64 x_or_minus_x = x | minus_x;
+ u64 xnx = x_or_minus_x >> (u32)63U;
+ u64 c = xnx - (u64)1U;
+ return c;
+}
+
+static __always_inline u64 u64_gte_mask(u64 a, u64 b)
+{
+ u64 x = a;
+ u64 y = b;
+ u64 x_xor_y = x ^ y;
+ u64 x_sub_y = x - y;
+ u64 x_sub_y_xor_y = x_sub_y ^ y;
+ u64 q = x_xor_y | x_sub_y_xor_y;
+ u64 x_xor_q = x ^ q;
+ u64 x_xor_q_ = x_xor_q >> (u32)63U;
+ u64 c = x_xor_q_ - (u64)1U;
+ return c;
+}
+
+static __always_inline void modulo_carry_top(u64 *b)
+{
+ u64 b4 = b[4];
+ u64 b0 = b[0];
+ u64 b4_ = b4 & 0x7ffffffffffffLLU;
+ u64 b0_ = b0 + 19 * (b4 >> 51);
+ b[4] = b4_;
+ b[0] = b0_;
+}
+
+static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
+{
+ {
+ u128 xi = input[0];
+ output[0] = ((u64)(xi));
+ }
+ {
+ u128 xi = input[1];
+ output[1] = ((u64)(xi));
+ }
+ {
+ u128 xi = input[2];
+ output[2] = ((u64)(xi));
+ }
+ {
+ u128 xi = input[3];
+ output[3] = ((u64)(xi));
+ }
+ {
+ u128 xi = input[4];
+ output[4] = ((u64)(xi));
+ }
+}
+
+static __always_inline void
+fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
+{
+ output[0] += (u128)input[0] * s;
+ output[1] += (u128)input[1] * s;
+ output[2] += (u128)input[2] * s;
+ output[3] += (u128)input[3] * s;
+ output[4] += (u128)input[4] * s;
+}
+
+static __always_inline void fproduct_carry_wide_(u128 *tmp)
+{
+ {
+ u32 ctr = 0;
+ u128 tctr = tmp[ctr];
+ u128 tctrp1 = tmp[ctr + 1];
+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
+ u128 c = ((tctr) >> (51));
+ tmp[ctr] = ((u128)(r0));
+ tmp[ctr + 1] = ((tctrp1) + (c));
+ }
+ {
+ u32 ctr = 1;
+ u128 tctr = tmp[ctr];
+ u128 tctrp1 = tmp[ctr + 1];
+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
+ u128 c = ((tctr) >> (51));
+ tmp[ctr] = ((u128)(r0));
+ tmp[ctr + 1] = ((tctrp1) + (c));
+ }
+
+ {
+ u32 ctr = 2;
+ u128 tctr = tmp[ctr];
+ u128 tctrp1 = tmp[ctr + 1];
+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
+ u128 c = ((tctr) >> (51));
+ tmp[ctr] = ((u128)(r0));
+ tmp[ctr + 1] = ((tctrp1) + (c));
+ }
+ {
+ u32 ctr = 3;
+ u128 tctr = tmp[ctr];
+ u128 tctrp1 = tmp[ctr + 1];
+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
+ u128 c = ((tctr) >> (51));
+ tmp[ctr] = ((u128)(r0));
+ tmp[ctr + 1] = ((tctrp1) + (c));
+ }
+}
+
+static __always_inline void fmul_shift_reduce(u64 *output)
+{
+ u64 tmp = output[4];
+ u64 b0;
+ {
+ u32 ctr = 5 - 0 - 1;
+ u64 z = output[ctr - 1];
+ output[ctr] = z;
+ }
+ {
+ u32 ctr = 5 - 1 - 1;
+ u64 z = output[ctr - 1];
+ output[ctr] = z;
+ }
+ {
+ u32 ctr = 5 - 2 - 1;
+ u64 z = output[ctr - 1];
+ output[ctr] = z;
+ }
+ {
+ u32 ctr = 5 - 3 - 1;
+ u64 z = output[ctr - 1];
+ output[ctr] = z;
+ }
+ output[0] = tmp;
+ b0 = output[0];
+ output[0] = 19 * b0;
+}
+
+static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
+ u64 *input21)
+{
+ u32 i;
+ u64 input2i;
+ {
+ u64 input2i = input21[0];
+ fproduct_sum_scalar_multiplication_(output, input, input2i);
+ fmul_shift_reduce(input);
+ }
+ {
+ u64 input2i = input21[1];
+ fproduct_sum_scalar_multiplication_(output, input, input2i);
+ fmul_shift_reduce(input);
+ }
+ {
+ u64 input2i = input21[2];
+ fproduct_sum_scalar_multiplication_(output, input, input2i);
+ fmul_shift_reduce(input);
+ }
+ {
+ u64 input2i = input21[3];
+ fproduct_sum_scalar_multiplication_(output, input, input2i);
+ fmul_shift_reduce(input);
+ }
+ i = 4;
+ input2i = input21[i];
+ fproduct_sum_scalar_multiplication_(output, input, input2i);
+}
+
+static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21)
+{
+ u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
+ {
+ u128 b4;
+ u128 b0;
+ u128 b4_;
+ u128 b0_;
+ u64 i0;
+ u64 i1;
+ u64 i0_;
+ u64 i1_;
+ u128 t[5] = { 0 };
+ fmul_mul_shift_reduce_(t, tmp, input21);
+ fproduct_carry_wide_(t);
+ b4 = t[4];
+ b0 = t[0];
+ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
+ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+ t[4] = b4_;
+ t[0] = b0_;
+ fproduct_copy_from_wide_(output, t);
+ i0 = output[0];
+ i1 = output[1];
+ i0_ = i0 & 0x7ffffffffffffLLU;
+ i1_ = i1 + (i0 >> 51);
+ output[0] = i0_;
+ output[1] = i1_;
+ }
+}
+
+static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
+{
+ u64 r0 = output[0];
+ u64 r1 = output[1];
+ u64 r2 = output[2];
+ u64 r3 = output[3];
+ u64 r4 = output[4];
+ u64 d0 = r0 * 2;
+ u64 d1 = r1 * 2;
+ u64 d2 = r2 * 2 * 19;
+ u64 d419 = r4 * 19;
+ u64 d4 = d419 * 2;
+ u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
+ (((u128)(d2) * (r3))));
+ u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
+ (((u128)(r3 * 19) * (r3))));
+ u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
+ (((u128)(d4) * (r3))));
+ u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
+ (((u128)(r4) * (d419))));
+ u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
+ (((u128)(r2) * (r2))));
+ tmp[0] = s0;
+ tmp[1] = s1;
+ tmp[2] = s2;
+ tmp[3] = s3;
+ tmp[4] = s4;
+}
+
+static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
+{
+ u128 b4;
+ u128 b0;
+ u128 b4_;
+ u128 b0_;
+ u64 i0;
+ u64 i1;
+ u64 i0_;
+ u64 i1_;
+ fsquare_fsquare__(tmp, output);
+ fproduct_carry_wide_(tmp);
+ b4 = tmp[4];
+ b0 = tmp[0];
+ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
+ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+ tmp[4] = b4_;
+ tmp[0] = b0_;
+ fproduct_copy_from_wide_(output, tmp);
+ i0 = output[0];
+ i1 = output[1];
+ i0_ = i0 & 0x7ffffffffffffLLU;
+ i1_ = i1 + (i0 >> 51);
+ output[0] = i0_;
+ output[1] = i1_;
+}
+
+static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
+ u32 count1)
+{
+ u32 i;
+ fsquare_fsquare_(tmp, output);
+ for (i = 1; i < count1; ++i)
+ fsquare_fsquare_(tmp, output);
+}
+
+static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
+ u32 count1)
+{
+ u128 t[5];
+ memcpy(output, input, 5 * sizeof(*input));
+ fsquare_fsquare_times_(output, t, count1);
+}
+
+static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
+ u32 count1)
+{
+ u128 t[5];
+ fsquare_fsquare_times_(output, t, count1);
+}
+
+static __always_inline void crecip_crecip(u64 *out, u64 *z)
+{
+ u64 buf[20] = { 0 };
+ u64 *a0 = buf;
+ u64 *t00 = buf + 5;
+ u64 *b0 = buf + 10;
+ u64 *t01;
+ u64 *b1;
+ u64 *c0;
+ u64 *a;
+ u64 *t0;
+ u64 *b;
+ u64 *c;
+ fsquare_fsquare_times(a0, z, 1);
+ fsquare_fsquare_times(t00, a0, 2);
+ fmul_fmul(b0, t00, z);
+ fmul_fmul(a0, b0, a0);
+ fsquare_fsquare_times(t00, a0, 1);
+ fmul_fmul(b0, t00, b0);
+ fsquare_fsquare_times(t00, b0, 5);
+ t01 = buf + 5;
+ b1 = buf + 10;
+ c0 = buf + 15;
+ fmul_fmul(b1, t01, b1);
+ fsquare_fsquare_times(t01, b1, 10);
+ fmul_fmul(c0, t01, b1);
+ fsquare_fsquare_times(t01, c0, 20);
+ fmul_fmul(t01, t01, c0);
+ fsquare_fsquare_times_inplace(t01, 10);
+ fmul_fmul(b1, t01, b1);
+ fsquare_fsquare_times(t01, b1, 50);
+ a = buf;
+ t0 = buf + 5;
+ b = buf + 10;
+ c = buf + 15;
+ fmul_fmul(c, t0, b);
+ fsquare_fsquare_times(t0, c, 100);
+ fmul_fmul(t0, t0, c);
+ fsquare_fsquare_times_inplace(t0, 50);
+ fmul_fmul(t0, t0, b);
+ fsquare_fsquare_times_inplace(t0, 5);
+ fmul_fmul(out, t0, a);
+}
+
+static __always_inline void fsum(u64 *a, u64 *b)
+{
+ a[0] += b[0];
+ a[1] += b[1];
+ a[2] += b[2];
+ a[3] += b[3];
+ a[4] += b[4];
+}
+
+static __always_inline void fdifference(u64 *a, u64 *b)
+{
+ u64 tmp[5] = { 0 };
+ u64 b0;
+ u64 b1;
+ u64 b2;
+ u64 b3;
+ u64 b4;
+ memcpy(tmp, b, 5 * sizeof(*b));
+ b0 = tmp[0];
+ b1 = tmp[1];
+ b2 = tmp[2];
+ b3 = tmp[3];
+ b4 = tmp[4];
+ tmp[0] = b0 + 0x3fffffffffff68LLU;
+ tmp[1] = b1 + 0x3ffffffffffff8LLU;
+ tmp[2] = b2 + 0x3ffffffffffff8LLU;
+ tmp[3] = b3 + 0x3ffffffffffff8LLU;
+ tmp[4] = b4 + 0x3ffffffffffff8LLU;
+ {
+ u64 xi = a[0];
+ u64 yi = tmp[0];
+ a[0] = yi - xi;
+ }
+ {
+ u64 xi = a[1];
+ u64 yi = tmp[1];
+ a[1] = yi - xi;
+ }
+ {
+ u64 xi = a[2];
+ u64 yi = tmp[2];
+ a[2] = yi - xi;
+ }
+ {
+ u64 xi = a[3];
+ u64 yi = tmp[3];
+ a[3] = yi - xi;
+ }
+ {
+ u64 xi = a[4];
+ u64 yi = tmp[4];
+ a[4] = yi - xi;
+ }
+}
+
+static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
+{
+ u128 tmp[5];
+ u128 b4;
+ u128 b0;
+ u128 b4_;
+ u128 b0_;
+ {
+ u64 xi = b[0];
+ tmp[0] = ((u128)(xi) * (s));
+ }
+ {
+ u64 xi = b[1];
+ tmp[1] = ((u128)(xi) * (s));
+ }
+ {
+ u64 xi = b[2];
+ tmp[2] = ((u128)(xi) * (s));
+ }
+ {
+ u64 xi = b[3];
+ tmp[3] = ((u128)(xi) * (s));
+ }
+ {
+ u64 xi = b[4];
+ tmp[4] = ((u128)(xi) * (s));
+ }
+ fproduct_carry_wide_(tmp);
+ b4 = tmp[4];
+ b0 = tmp[0];
+ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
+ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+ tmp[4] = b4_;
+ tmp[0] = b0_;
+ fproduct_copy_from_wide_(output, tmp);
+}
+
+static __always_inline void fmul(u64 *output, u64 *a, u64 *b)
+{
+ fmul_fmul(output, a, b);
+}
+
+static __always_inline void crecip(u64 *output, u64 *input)
+{
+ crecip_crecip(output, input);
+}
+
+static __always_inline void point_swap_conditional_step(u64 *a, u64 *b,
+ u64 swap1, u32 ctr)
+{
+ u32 i = ctr - 1;
+ u64 ai = a[i];
+ u64 bi = b[i];
+ u64 x = swap1 & (ai ^ bi);
+ u64 ai1 = ai ^ x;
+ u64 bi1 = bi ^ x;
+ a[i] = ai1;
+ b[i] = bi1;
+}
+
+static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1)
+{
+ point_swap_conditional_step(a, b, swap1, 5);
+ point_swap_conditional_step(a, b, swap1, 4);
+ point_swap_conditional_step(a, b, swap1, 3);
+ point_swap_conditional_step(a, b, swap1, 2);
+ point_swap_conditional_step(a, b, swap1, 1);
+}
+
+static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap)
+{
+ u64 swap1 = 0 - iswap;
+ point_swap_conditional5(a, b, swap1);
+ point_swap_conditional5(a + 5, b + 5, swap1);
+}
+
+static __always_inline void point_copy(u64 *output, u64 *input)
+{
+ memcpy(output, input, 5 * sizeof(*input));
+ memcpy(output + 5, input + 5, 5 * sizeof(*input));
+}
+
+static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p,
+ u64 *pq, u64 *qmqp)
+{
+ u64 *qx = qmqp;
+ u64 *x2 = pp;
+ u64 *z2 = pp + 5;
+ u64 *x3 = ppq;
+ u64 *z3 = ppq + 5;
+ u64 *x = p;
+ u64 *z = p + 5;
+ u64 *xprime = pq;
+ u64 *zprime = pq + 5;
+ u64 buf[40] = { 0 };
+ u64 *origx = buf;
+ u64 *origxprime0 = buf + 5;
+ u64 *xxprime0;
+ u64 *zzprime0;
+ u64 *origxprime;
+ xxprime0 = buf + 25;
+ zzprime0 = buf + 30;
+ memcpy(origx, x, 5 * sizeof(*x));
+ fsum(x, z);
+ fdifference(z, origx);
+ memcpy(origxprime0, xprime, 5 * sizeof(*xprime));
+ fsum(xprime, zprime);
+ fdifference(zprime, origxprime0);
+ fmul(xxprime0, xprime, z);
+ fmul(zzprime0, x, zprime);
+ origxprime = buf + 5;
+ {
+ u64 *xx0;
+ u64 *zz0;
+ u64 *xxprime;
+ u64 *zzprime;
+ u64 *zzzprime;
+ xx0 = buf + 15;
+ zz0 = buf + 20;
+ xxprime = buf + 25;
+ zzprime = buf + 30;
+ zzzprime = buf + 35;
+ memcpy(origxprime, xxprime, 5 * sizeof(*xxprime));
+ fsum(xxprime, zzprime);
+ fdifference(zzprime, origxprime);
+ fsquare_fsquare_times(x3, xxprime, 1);
+ fsquare_fsquare_times(zzzprime, zzprime, 1);
+ fmul(z3, zzzprime, qx);
+ fsquare_fsquare_times(xx0, x, 1);
+ fsquare_fsquare_times(zz0, z, 1);
+ {
+ u64 *zzz;
+ u64 *xx;
+ u64 *zz;
+ u64 scalar;
+ zzz = buf + 10;
+ xx = buf + 15;
+ zz = buf + 20;
+ fmul(x2, xx, zz);
+ fdifference(zz, xx);
+ scalar = 121665;
+ fscalar(zzz, zz, scalar);
+ fsum(zzz, xx);
+ fmul(z2, zzz, zz);
+ }
+ }
+}
+
+static __always_inline void
+ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
+ u64 *q, u8 byt)
+{
+ u64 bit0 = (u64)(byt >> 7);
+ u64 bit;
+ point_swap_conditional(nq, nqpq, bit0);
+ addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q);
+ bit = (u64)(byt >> 7);
+ point_swap_conditional(nq2, nqpq2, bit);
+}
+
+static __always_inline void
+ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2,
+ u64 *nqpq2, u64 *q, u8 byt)
+{
+ u8 byt1;
+ ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
+ byt1 = byt << 1;
+ ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
+}
+
+static __always_inline void
+ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
+ u64 *q, u8 byt, u32 i)
+{
+ while (i--) {
+ ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2,
+ nqpq2, q, byt);
+ byt <<= 2;
+ }
+}
+
+static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq,
+ u64 *nqpq, u64 *nq2,
+ u64 *nqpq2, u64 *q,
+ u32 i)
+{
+ while (i--) {
+ u8 byte = n1[i];
+ ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q,
+ byte, 4);
+ }
+}
+
+static void ladder_cmult(u64 *result, u8 *n1, u64 *q)
+{
+ u64 point_buf[40] = { 0 };
+ u64 *nq = point_buf;
+ u64 *nqpq = point_buf + 10;
+ u64 *nq2 = point_buf + 20;
+ u64 *nqpq2 = point_buf + 30;
+ point_copy(nqpq, q);
+ nq[0] = 1;
+ ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32);
+ point_copy(result, nq);
+}
+
+static __always_inline void format_fexpand(u64 *output, const u8 *input)
+{
+ const u8 *x00 = input + 6;
+ const u8 *x01 = input + 12;
+ const u8 *x02 = input + 19;
+ const u8 *x0 = input + 24;
+ u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
+ i0 = get_unaligned_le64(input);
+ i1 = get_unaligned_le64(x00);
+ i2 = get_unaligned_le64(x01);
+ i3 = get_unaligned_le64(x02);
+ i4 = get_unaligned_le64(x0);
+ output0 = i0 & 0x7ffffffffffffLLU;
+ output1 = i1 >> 3 & 0x7ffffffffffffLLU;
+ output2 = i2 >> 6 & 0x7ffffffffffffLLU;
+ output3 = i3 >> 1 & 0x7ffffffffffffLLU;
+ output4 = i4 >> 12 & 0x7ffffffffffffLLU;
+ output[0] = output0;
+ output[1] = output1;
+ output[2] = output2;
+ output[3] = output3;
+ output[4] = output4;
+}
+
+static __always_inline void format_fcontract_first_carry_pass(u64 *input)
+{
+ u64 t0 = input[0];
+ u64 t1 = input[1];
+ u64 t2 = input[2];
+ u64 t3 = input[3];
+ u64 t4 = input[4];
+ u64 t1_ = t1 + (t0 >> 51);
+ u64 t0_ = t0 & 0x7ffffffffffffLLU;
+ u64 t2_ = t2 + (t1_ >> 51);
+ u64 t1__ = t1_ & 0x7ffffffffffffLLU;
+ u64 t3_ = t3 + (t2_ >> 51);
+ u64 t2__ = t2_ & 0x7ffffffffffffLLU;
+ u64 t4_ = t4 + (t3_ >> 51);
+ u64 t3__ = t3_ & 0x7ffffffffffffLLU;
+ input[0] = t0_;
+ input[1] = t1__;
+ input[2] = t2__;
+ input[3] = t3__;
+ input[4] = t4_;
+}
+
+static __always_inline void format_fcontract_first_carry_full(u64 *input)
+{
+ format_fcontract_first_carry_pass(input);
+ modulo_carry_top(input);
+}
+
+static __always_inline void format_fcontract_second_carry_pass(u64 *input)
+{
+ u64 t0 = input[0];
+ u64 t1 = input[1];
+ u64 t2 = input[2];
+ u64 t3 = input[3];
+ u64 t4 = input[4];
+ u64 t1_ = t1 + (t0 >> 51);
+ u64 t0_ = t0 & 0x7ffffffffffffLLU;
+ u64 t2_ = t2 + (t1_ >> 51);
+ u64 t1__ = t1_ & 0x7ffffffffffffLLU;
+ u64 t3_ = t3 + (t2_ >> 51);
+ u64 t2__ = t2_ & 0x7ffffffffffffLLU;
+ u64 t4_ = t4 + (t3_ >> 51);
+ u64 t3__ = t3_ & 0x7ffffffffffffLLU;
+ input[0] = t0_;
+ input[1] = t1__;
+ input[2] = t2__;
+ input[3] = t3__;
+ input[4] = t4_;
+}
+
+static __always_inline void format_fcontract_second_carry_full(u64 *input)
+{
+ u64 i0;
+ u64 i1;
+ u64 i0_;
+ u64 i1_;
+ format_fcontract_second_carry_pass(input);
+ modulo_carry_top(input);
+ i0 = input[0];
+ i1 = input[1];
+ i0_ = i0 & 0x7ffffffffffffLLU;
+ i1_ = i1 + (i0 >> 51);
+ input[0] = i0_;
+ input[1] = i1_;
+}
+
+static __always_inline void format_fcontract_trim(u64 *input)
+{
+ u64 a0 = input[0];
+ u64 a1 = input[1];
+ u64 a2 = input[2];
+ u64 a3 = input[3];
+ u64 a4 = input[4];
+ u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU);
+ u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU);
+ u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU);
+ u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU);
+ u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU);
+ u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
+ u64 a0_ = a0 - (0x7ffffffffffedLLU & mask);
+ u64 a1_ = a1 - (0x7ffffffffffffLLU & mask);
+ u64 a2_ = a2 - (0x7ffffffffffffLLU & mask);
+ u64 a3_ = a3 - (0x7ffffffffffffLLU & mask);
+ u64 a4_ = a4 - (0x7ffffffffffffLLU & mask);
+ input[0] = a0_;
+ input[1] = a1_;
+ input[2] = a2_;
+ input[3] = a3_;
+ input[4] = a4_;
+}
+
+static __always_inline void format_fcontract_store(u8 *output, u64 *input)
+{
+ u64 t0 = input[0];
+ u64 t1 = input[1];
+ u64 t2 = input[2];
+ u64 t3 = input[3];
+ u64 t4 = input[4];
+ u64 o0 = t1 << 51 | t0;
+ u64 o1 = t2 << 38 | t1 >> 13;
+ u64 o2 = t3 << 25 | t2 >> 26;
+ u64 o3 = t4 << 12 | t3 >> 39;
+ u8 *b0 = output;
+ u8 *b1 = output + 8;
+ u8 *b2 = output + 16;
+ u8 *b3 = output + 24;
+ put_unaligned_le64(o0, b0);
+ put_unaligned_le64(o1, b1);
+ put_unaligned_le64(o2, b2);
+ put_unaligned_le64(o3, b3);
+}
+
+static __always_inline void format_fcontract(u8 *output, u64 *input)
+{
+ format_fcontract_first_carry_full(input);
+ format_fcontract_second_carry_full(input);
+ format_fcontract_trim(input);
+ format_fcontract_store(output, input);
+}
+
+static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point)
+{
+ u64 *x = point;
+ u64 *z = point + 5;
+ u64 buf[10] __aligned(32) = { 0 };
+ u64 *zmone = buf;
+ u64 *sc = buf + 5;
+ crecip(zmone, z);
+ fmul(sc, x, zmone);
+ format_fcontract(scalar, sc);
+}
+
+void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+ u64 buf0[10] __aligned(32) = { 0 };
+ u64 *x0 = buf0;
+ u64 *z = buf0 + 5;
+ u64 *q;
+ format_fexpand(x0, basepoint);
+ z[0] = 1;
+ q = buf0;
+ {
+ u8 e[32] __aligned(32) = { 0 };
+ u8 *scalar;
+ memcpy(e, secret, 32);
+ curve25519_clamp_secret(e);
+ scalar = e;
+ {
+ u64 buf[15] = { 0 };
+ u64 *nq = buf;
+ u64 *x = nq;
+ x[0] = 1;
+ ladder_cmult(nq, scalar, q);
+ format_scalar_of_point(mypublic, nq);
+ memzero_explicit(buf, sizeof(buf));
+ }
+ memzero_explicit(e, sizeof(e));
+ }
+ memzero_explicit(buf0, sizeof(buf0));
+}
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
new file mode 100644
index 000000000000..0106bebe6900
--- /dev/null
+++ b/lib/crypto/curve25519.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the Curve25519 ECDH algorithm, using either
+ * a 32-bit implementation or a 64-bit implementation with 128-bit integers,
+ * depending on what is supported by the target compiler.
+ *
+ * Information: https://cr.yp.to/ecdh.html
+ */
+
+#include <crypto/curve25519.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
+const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
+
+EXPORT_SYMBOL(curve25519_null_point);
+EXPORT_SYMBOL(curve25519_base_point);
+EXPORT_SYMBOL(curve25519_generic);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Curve25519 scalar multiplication");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
--
2.18.2
From b85ad2b88fc06d997f8f142222d9f8159cd5d2a2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:33 +0100
Subject: [PATCH 026/100] crypto: curve25519 - add kpp selftest
commit f613457a7af085728297bef71233c37faf3c01b1 upstream.
In preparation of introducing KPP implementations of Curve25519, import
the set of test cases proposed by the Zinc patch set, but converted to
the KPP format.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/testmgr.c | 6 +
crypto/testmgr.h | 1225 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1231 insertions(+)
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 711390861f71..57ab993b9ad2 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4295,6 +4295,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "cts(cbc(paes))",
.test = alg_test_null,
.fips_allowed = 1,
+ }, {
+ .alg = "curve25519",
+ .test = alg_test_kpp,
+ .suite = {
+ .kpp = __VECS(curve25519_tv_template)
+ }
}, {
.alg = "deflate",
.test = alg_test_comp,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 102fcad54966..5d132ae996b4 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1030,6 +1030,1231 @@ static const struct kpp_testvec dh_tv_template[] = {
}
};
+static const struct kpp_testvec curve25519_tv_template[] = {
+{
+ .secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
+ 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
+ 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
+ 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
+ .b_public = (u8[32]){ 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
+ 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
+ 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
+ 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
+ .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+{
+ .secret = (u8[32]){ 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
+ 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
+ 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
+ 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
+ .b_public = (u8[32]){ 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
+ 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
+ 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
+ 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
+ .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+{
+ .secret = (u8[32]){ 1 },
+ .b_public = (u8[32]){ 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .expected_ss = (u8[32]){ 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64,
+ 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d,
+ 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98,
+ 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+{
+ .secret = (u8[32]){ 1 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f,
+ 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d,
+ 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3,
+ 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+{
+ .secret = (u8[32]){ 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
+ .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+ .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+{
+ .secret = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f },
+ .expected_ss = (u8[32]){ 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2,
+ 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57,
+ 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05,
+ 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+{
+ .secret = (u8[32]){ 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 },
+ .expected_ss = (u8[32]){ 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d,
+ 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12,
+ 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99,
+ 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - normal case */
+{
+ .secret = (u8[32]){ 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda,
+ 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66,
+ 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3,
+ 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba },
+ .b_public = (u8[32]){ 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5,
+ 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9,
+ 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e,
+ 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a },
+ .expected_ss = (u8[32]){ 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5,
+ 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38,
+ 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e,
+ 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+ .secret = (u8[32]){ 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4,
+ 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5,
+ 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49,
+ 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 },
+ .b_public = (u8[32]){ 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5,
+ 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8,
+ 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3,
+ 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 },
+ .expected_ss = (u8[32]){ 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff,
+ 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d,
+ 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe,
+ 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+ .secret = (u8[32]){ 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9,
+ 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39,
+ 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5,
+ 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 },
+ .b_public = (u8[32]){ 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f,
+ 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b,
+ 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c,
+ 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 },
+ .expected_ss = (u8[32]){ 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53,
+ 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57,
+ 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0,
+ 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+ .secret = (u8[32]){ 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc,
+ 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d,
+ 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67,
+ 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c },
+ .b_public = (u8[32]){ 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97,
+ 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f,
+ 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45,
+ 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a },
+ .expected_ss = (u8[32]){ 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93,
+ 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2,
+ 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44,
+ 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+ .secret = (u8[32]){ 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1,
+ 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95,
+ 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99,
+ 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d },
+ .b_public = (u8[32]){ 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27,
+ 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07,
+ 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae,
+ 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c },
+ .expected_ss = (u8[32]){ 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73,
+ 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2,
+ 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f,
+ 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+ .secret = (u8[32]){ 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9,
+ 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd,
+ 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b,
+ 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 },
+ .b_public = (u8[32]){ 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5,
+ 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52,
+ 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8,
+ 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 },
+ .expected_ss = (u8[32]){ 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86,
+ 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4,
+ 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6,
+ 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+ .secret = (u8[32]){ 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04,
+ 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77,
+ 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90,
+ 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 },
+ .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .expected_ss = (u8[32]){ 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97,
+ 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9,
+ 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7,
+ 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+ .secret = (u8[32]){ 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36,
+ 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd,
+ 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c,
+ 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 },
+ .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .expected_ss = (u8[32]){ 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e,
+ 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b,
+ 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e,
+ 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+ .secret = (u8[32]){ 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed,
+ 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e,
+ 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd,
+ 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff,
+ 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00,
+ 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 },
+ .expected_ss = (u8[32]){ 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f,
+ 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1,
+ 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10,
+ 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+ .secret = (u8[32]){ 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3,
+ 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d,
+ 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00,
+ 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 },
+ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00,
+ 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff,
+ 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8,
+ 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4,
+ 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70,
+ 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+ .secret = (u8[32]){ 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3,
+ 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a,
+ 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e,
+ 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 },
+ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57,
+ 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c,
+ 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59,
+ 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+ .secret = (u8[32]){ 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f,
+ 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42,
+ 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9,
+ 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 },
+ .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c,
+ 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5,
+ 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65,
+ 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6,
+ 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4,
+ 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8,
+ 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe },
+ .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .expected_ss = (u8[32]){ 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7,
+ 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca,
+ 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f,
+ 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa,
+ 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3,
+ 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52,
+ 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
+ .expected_ss = (u8[32]){ 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3,
+ 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e,
+ 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75,
+ 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26,
+ 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea,
+ 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00,
+ 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
+ .expected_ss = (u8[32]){ 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8,
+ 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32,
+ 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87,
+ 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c,
+ 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6,
+ 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb,
+ 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff,
+ 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff,
+ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff,
+ 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f },
+ .expected_ss = (u8[32]){ 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85,
+ 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f,
+ 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0,
+ 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38,
+ 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b,
+ 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c,
+ 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .expected_ss = (u8[32]){ 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b,
+ 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81,
+ 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3,
+ 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d,
+ 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42,
+ 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98,
+ 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c,
+ 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9,
+ 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89,
+ 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+ .secret = (u8[32]){ 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29,
+ 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6,
+ 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c,
+ 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f },
+ .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75,
+ 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89,
+ 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c,
+ 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc,
+ 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1,
+ 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d,
+ 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae },
+ .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09,
+ 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde,
+ 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1,
+ 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81,
+ 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a,
+ 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99,
+ 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d },
+ .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17,
+ 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35,
+ 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55,
+ 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11,
+ 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b,
+ 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9,
+ 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 },
+ .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53,
+ 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e,
+ 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6,
+ 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78,
+ 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2,
+ 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd,
+ 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .expected_ss = (u8[32]){ 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb,
+ 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40,
+ 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2,
+ 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9,
+ 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60,
+ 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13,
+ 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 },
+ .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .expected_ss = (u8[32]){ 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c,
+ 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3,
+ 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65,
+ 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a,
+ 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7,
+ 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11,
+ 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e },
+ .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .expected_ss = (u8[32]){ 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82,
+ 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4,
+ 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c,
+ 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e,
+ 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a,
+ 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d,
+ 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f },
+ .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .expected_ss = (u8[32]){ 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2,
+ 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60,
+ 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25,
+ 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb,
+ 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97,
+ 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c,
+ 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 },
+ .b_public = (u8[32]){ 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23,
+ 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8,
+ 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69,
+ 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a,
+ 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23,
+ 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b,
+ 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 },
+ .b_public = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b,
+ 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44,
+ 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37,
+ 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80,
+ 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d,
+ 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b,
+ 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 },
+ .b_public = (u8[32]){ 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63,
+ 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae,
+ 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f,
+ 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0,
+ 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd,
+ 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49,
+ 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 },
+ .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41,
+ 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0,
+ 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf,
+ 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9,
+ 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa,
+ 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5,
+ 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e },
+ .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47,
+ 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3,
+ 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b,
+ 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8,
+ 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98,
+ 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0,
+ 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 },
+ .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0,
+ 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1,
+ 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a,
+ 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02,
+ 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4,
+ 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68,
+ 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d },
+ .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f,
+ 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2,
+ 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95,
+ 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7,
+ 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06,
+ 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9,
+ 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 },
+ .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5,
+ 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0,
+ 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80,
+ 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+ .secret = (u8[32]){ 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd,
+ 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4,
+ 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04,
+ 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 },
+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .expected_ss = (u8[32]){ 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0,
+ 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac,
+ 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48,
+ 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - RFC 7748 */
+{
+ .secret = (u8[32]){ 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 },
+ .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+ .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - RFC 7748 */
+{
+ .secret = (u8[32]){ 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c,
+ 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5,
+ 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4,
+ 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d },
+ .b_public = (u8[32]){ 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3,
+ 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c,
+ 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e,
+ 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 },
+ .expected_ss = (u8[32]){ 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d,
+ 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8,
+ 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52,
+ 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde,
+ 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8,
+ 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4,
+ 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 },
+ .expected_ss = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d,
+ 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64,
+ 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd,
+ 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 },
+ .expected_ss = (u8[32]){ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8,
+ 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf,
+ 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94,
+ 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d },
+ .expected_ss = (u8[32]){ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84,
+ 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62,
+ 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e,
+ 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 },
+ .expected_ss = (u8[32]){ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8,
+ 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58,
+ 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02,
+ 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 },
+ .expected_ss = (u8[32]){ 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9,
+ 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a,
+ 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44,
+ 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b },
+ .expected_ss = (u8[32]){ 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd,
+ 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22,
+ 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56,
+ 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b },
+ .expected_ss = (u8[32]){ 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53,
+ 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f,
+ 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18,
+ 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f },
+ .expected_ss = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55,
+ 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b,
+ 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79,
+ 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f },
+ .expected_ss = (u8[32]){ 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39,
+ 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c,
+ 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb,
+ 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e },
+ .expected_ss = (u8[32]){ 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04,
+ 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10,
+ 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58,
+ 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c },
+ .expected_ss = (u8[32]){ 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3,
+ 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c,
+ 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88,
+ 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 },
+ .expected_ss = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a,
+ 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49,
+ 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a,
+ 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f },
+ .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .b_public = (u8[32]){ 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca,
+ 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c,
+ 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb,
+ 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 },
+ .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .b_public = (u8[32]){ 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58,
+ 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7,
+ 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01,
+ 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d },
+ .expected_ss = (u8[32]){ 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d,
+ 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27,
+ 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b,
+ 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .b_public = (u8[32]){ 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26,
+ 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2,
+ 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44,
+ 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e },
+ .expected_ss = (u8[32]){ 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6,
+ 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d,
+ 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e,
+ 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .b_public = (u8[32]){ 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61,
+ 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67,
+ 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e,
+ 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c },
+ .expected_ss = (u8[32]){ 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65,
+ 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce,
+ 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0,
+ 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .b_public = (u8[32]){ 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee,
+ 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d,
+ 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14,
+ 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 },
+ .expected_ss = (u8[32]){ 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e,
+ 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc,
+ 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5,
+ 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .b_public = (u8[32]){ 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4,
+ 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5,
+ 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c,
+ 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 },
+ .expected_ss = (u8[32]){ 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b,
+ 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93,
+ 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f,
+ 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - private key == -1 (mod order) */
+{
+ .secret = (u8[32]){ 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8,
+ 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 },
+ .b_public = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
+ .expected_ss = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+},
+/* wycheproof - private key == 1 (mod order) on twist */
+{
+ .secret = (u8[32]){ 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef,
+ 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f },
+ .b_public = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
+ .expected_ss = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
+ .secret_size = 32,
+ .b_public_size = 32,
+ .expected_ss_size = 32,
+
+}
+};
+
static const struct kpp_testvec ecdh_tv_template[] = {
{
#ifndef CONFIG_CRYPTO_FIPS
--
2.18.2
From 19e6bba9e31ff0202097c57d184ba73eebf01980 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:34 +0100
Subject: [PATCH 027/100] crypto: curve25519 - implement generic KPP driver
commit ee772cb641135739c1530647391d5a04c39db192 upstream.
Expose the generic Curve25519 library via the crypto API KPP interface.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/Kconfig | 5 +++
crypto/Makefile | 1 +
crypto/curve25519-generic.c | 90 +++++++++++++++++++++++++++++++++++++
3 files changed, 96 insertions(+)
create mode 100644 crypto/curve25519-generic.c
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 8fd3954bf64c..a3fc859830c1 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -264,6 +264,11 @@ config CRYPTO_ECRDSA
standard algorithms (called GOST algorithms). Only signature verification
is implemented.
+config CRYPTO_CURVE25519
+ tristate "Curve25519 algorithm"
+ select CRYPTO_KPP
+ select CRYPTO_LIB_CURVE25519_GENERIC
+
comment "Authenticated Encryption with Associated Data"
config CRYPTO_CCM
diff --git a/crypto/Makefile b/crypto/Makefile
index fd27edea7c8e..4e7a0a8f7e35 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -167,6 +167,7 @@ obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
obj-$(CONFIG_CRYPTO_OFB) += ofb.o
obj-$(CONFIG_CRYPTO_ECC) += ecc.o
obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o
+obj-$(CONFIG_CRYPTO_CURVE25519) += curve25519-generic.o
ecdh_generic-y += ecdh.o
ecdh_generic-y += ecdh_helper.o
diff --git a/crypto/curve25519-generic.c b/crypto/curve25519-generic.c
new file mode 100644
index 000000000000..bd88fd571393
--- /dev/null
+++ b/crypto/curve25519-generic.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/kpp.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+ u8 const *bp;
+
+ if (req->src) {
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+ bp = public_key;
+ } else {
+ bp = curve25519_base_point;
+ }
+
+ curve25519_generic(buf, secret, bp);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-generic",
+ .base.cra_priority = 100,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_compute_value,
+ .compute_shared_secret = curve25519_compute_value,
+ .max_size = curve25519_max_size,
+};
+
+static int curve25519_init(void)
+{
+ return crypto_register_kpp(&curve25519_alg);
+}
+
+static void curve25519_exit(void)
+{
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+subsys_initcall(curve25519_init);
+module_exit(curve25519_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-generic");
+MODULE_LICENSE("GPL");
--
2.18.2
From 9564b9cc289e700a1cbdf08d1ab3f81ae442db9f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:35 +0100
Subject: [PATCH 028/100] crypto: lib/curve25519 - work around Clang stack
spilling issue
commit 660bb8e1f833ea63185fe80fde847e3e42f18e3b upstream.
Arnd reports that the 32-bit generic library code for Curve25119 ends
up using an excessive amount of stack space when built with Clang:
lib/crypto/curve25519-fiat32.c:756:6: error: stack frame size
of 1384 bytes in function 'curve25519_generic'
[-Werror,-Wframe-larger-than=]
Let's give some hints to the compiler regarding which routines should
not be inlined, to prevent it from running out of registers and spilling
to the stack. The resulting code performs identically under both GCC
and Clang, and makes the warning go away.
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
lib/crypto/curve25519-fiat32.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c
index 1c455207341d..2fde0ec33dbd 100644
--- a/lib/crypto/curve25519-fiat32.c
+++ b/lib/crypto/curve25519-fiat32.c
@@ -223,7 +223,7 @@ static __always_inline void fe_1(fe *h)
h->v[0] = 1;
}
-static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
@@ -266,7 +266,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
fe_add_impl(h->v, f->v, g->v);
}
-static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
@@ -309,7 +309,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
fe_sub_impl(h->v, f->v, g->v);
}
-static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
@@ -441,7 +441,7 @@ fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
fe_mul_impl(h->v, f->v, g->v);
}
-static void fe_sqr_impl(u32 out[10], const u32 in1[10])
+static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10])
{
{ const u32 x17 = in1[9];
{ const u32 x18 = in1[8];
@@ -619,7 +619,7 @@ static __always_inline void fe_invert(fe *out, const fe *z)
*
* Preconditions: b in {0,1}
*/
-static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
+static noinline void fe_cswap(fe *f, fe *g, unsigned int b)
{
unsigned i;
b = 0 - b;
--
2.18.2
From 02e323f7bc2dc013b79e8cb4d742c8f827598824 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:36 +0100
Subject: [PATCH 029/100] crypto: curve25519 - x86_64 library and KPP
implementations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit bb611bdfd6be34d9f822c73305fcc83720499d38 upstream.
This implementation is the fastest available x86_64 implementation, and
unlike Sandy2x, it doesn't requie use of the floating point registers at
all. Instead it makes use of BMI2 and ADX, available on recent
microarchitectures. The implementation was written by Armando
Faz-Hernández with contributions (upstream) from Samuel Neves and me,
in addition to further changes in the kernel implementation from us.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
[ardb: - move to arch/x86/crypto
- wire into lib/crypto framework
- implement crypto API KPP hooks ]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/Makefile | 1 +
arch/x86/crypto/curve25519-x86_64.c | 2475 +++++++++++++++++++++++++++
crypto/Kconfig | 6 +
3 files changed, 2482 insertions(+)
create mode 100644 arch/x86/crypto/curve25519-x86_64.c
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 922c8ecfa00f..958440eae27e 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
+obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
new file mode 100644
index 000000000000..a52a3fb15727
--- /dev/null
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -0,0 +1,2475 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
+
+enum { NUM_WORDS_ELTFP25519 = 4 };
+typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
+typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
+
+#define mul_eltfp25519_1w_adx(c, a, b) do { \
+ mul_256x256_integer_adx(m.buffer, a, b); \
+ red_eltfp25519_1w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
+ mul_256x256_integer_bmi2(m.buffer, a, b); \
+ red_eltfp25519_1w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_adx(a) do { \
+ sqr_256x256_integer_adx(m.buffer, a); \
+ red_eltfp25519_1w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_bmi2(a) do { \
+ sqr_256x256_integer_bmi2(m.buffer, a); \
+ red_eltfp25519_1w_bmi2(a, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_adx(c, a, b) do { \
+ mul2_256x256_integer_adx(m.buffer, a, b); \
+ red_eltfp25519_2w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
+ mul2_256x256_integer_bmi2(m.buffer, a, b); \
+ red_eltfp25519_2w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_adx(a) do { \
+ sqr2_256x256_integer_adx(m.buffer, a); \
+ red_eltfp25519_2w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_bmi2(a) do { \
+ sqr2_256x256_integer_bmi2(m.buffer, a); \
+ red_eltfp25519_2w_bmi2(a, m.buffer); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_adx(a, times) do { \
+ int ____counter = (times); \
+ while (____counter-- > 0) \
+ sqr_eltfp25519_1w_adx(a); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
+ int ____counter = (times); \
+ while (____counter-- > 0) \
+ sqr_eltfp25519_1w_bmi2(a); \
+} while (0)
+
+#define copy_eltfp25519_1w(C, A) do { \
+ (C)[0] = (A)[0]; \
+ (C)[1] = (A)[1]; \
+ (C)[2] = (A)[2]; \
+ (C)[3] = (A)[3]; \
+} while (0)
+
+#define setzero_eltfp25519_1w(C) do { \
+ (C)[0] = 0; \
+ (C)[1] = 0; \
+ (C)[2] = 0; \
+ (C)[3] = 0; \
+} while (0)
+
+__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
+ /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
+ 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
+ /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
+ 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
+ /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
+ 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
+ /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
+ 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
+ /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
+ 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
+ /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
+ 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
+ /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
+ 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
+ /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
+ 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
+ /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
+ 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
+ /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
+ 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
+ /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
+ 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
+ /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
+ 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
+ /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
+ 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
+ /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
+ 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
+ /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
+ 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
+ /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
+ 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
+ /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
+ 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
+ /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
+ 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
+ /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
+ 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
+ /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
+ 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
+ /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
+ 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
+ /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
+ 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
+ /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
+ 0x23758739f630a257UL, 0x295a407a01a78580UL,
+ /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
+ 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
+ /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
+ 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
+ /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
+ 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
+ /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
+ 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
+ /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
+ 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
+ /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
+ 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
+ /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
+ 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
+ /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
+ 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
+ /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
+ 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
+ /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
+ 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
+ /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
+ 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
+ /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
+ 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
+ /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
+ 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
+ /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
+ 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
+ /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
+ 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
+ /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
+ 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
+ /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
+ 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
+ /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
+ 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
+ /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
+ 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
+ /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
+ 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
+ /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
+ 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
+ /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
+ 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
+ /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
+ 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
+ /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
+ 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
+ /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
+ 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
+ /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
+ 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
+ /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
+ 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
+ /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
+ 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
+ /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
+ 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
+ /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
+ 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
+ /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
+ 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
+ /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
+ 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
+ /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
+ 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
+ /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
+ 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
+ /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
+ 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
+ /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
+ 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
+ /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
+ 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
+ /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
+ 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
+ /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
+ 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
+ /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
+ 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
+ /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
+ 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
+ /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
+ 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
+ /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
+ 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
+ /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
+ 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
+ /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
+ 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
+ /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
+ 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
+ /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
+ 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
+ /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
+ 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
+ /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
+ 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
+ /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
+ 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
+ /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
+ 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
+ /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
+ 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
+ /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
+ 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
+ /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
+ 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
+ /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
+ 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
+ /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
+ 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
+ /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
+ 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
+ /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
+ 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
+ /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
+ 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
+ /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
+ 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
+ /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
+ 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
+ /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
+ 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
+ /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
+ 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
+ /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
+ 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
+ /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
+ 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
+ /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
+ 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
+ /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
+ 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
+ /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
+ 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
+ /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
+ 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
+ /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
+ 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
+ /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
+ 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
+ /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
+ 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
+ /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
+ 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
+ /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
+ 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
+ /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
+ 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
+ /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
+ 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
+ /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
+ 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
+ /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
+ 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
+ /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
+ 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
+ /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
+ 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
+ /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
+ 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
+ /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
+ 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
+ /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
+ 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
+ /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
+ 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
+ /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
+ 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
+ /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
+ 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
+ /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
+ 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
+ /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
+ 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
+ /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
+ 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
+ /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
+ 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
+ /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
+ 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
+ /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
+ 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
+ /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
+ 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
+ /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
+ 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
+ /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
+ 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
+ /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
+ 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
+ /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
+ 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
+ /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
+ 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
+ /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
+ 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
+ /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
+ 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
+ /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
+ 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
+ /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
+ 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
+ /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
+ 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
+ /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
+ 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
+ /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
+ 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
+ /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
+ 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
+ /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
+ 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
+ /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
+ 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
+ /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
+ 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
+ /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
+ 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
+ /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
+ 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
+ /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
+ 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
+ /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
+ 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
+ /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
+ 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
+ /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
+ 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
+ /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
+ 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
+ /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
+ 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
+ /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
+ 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
+ /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
+ 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
+ /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
+ 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
+ /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
+ 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
+ /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
+ 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
+ /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
+ 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
+ /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
+ 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
+ /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
+ 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
+ /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
+ 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
+ /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
+ 0x52d17436309d4253UL, 0x356f97e13efae576UL,
+ /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
+ 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
+ /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
+ 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
+ /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
+ 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
+ /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
+ 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
+ /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
+ 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
+ /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
+ 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
+ /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
+ 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
+ /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
+ 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
+ /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
+ 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
+ /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
+ 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
+ /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
+ 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
+ /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
+ 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
+ /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
+ 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
+ /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
+ 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
+ /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
+ 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
+ /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
+ 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
+ /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
+ 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
+ /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
+ 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
+ /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
+ 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
+ /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
+ 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
+ /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
+ 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
+ /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
+ 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
+ /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
+ 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
+ /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
+ 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
+ /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
+ 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
+ /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
+ 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
+ /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
+ 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
+ /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
+ 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
+ /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
+ 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
+ /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
+ 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
+ /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
+ 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
+ /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
+ 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
+ /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
+ 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
+ /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
+ 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
+ /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
+ 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
+ /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
+ 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
+ /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
+ 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
+ /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
+ 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
+ /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
+ 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
+ /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
+ 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
+ /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
+ 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
+ /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
+ 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
+ /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
+ 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
+ /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
+ 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
+ /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
+ 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
+ /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
+ 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
+ /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
+ 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
+ /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
+ 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
+ /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
+ 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
+ /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
+ 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
+ /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
+ 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
+ /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
+ 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
+ /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
+ 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
+ /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
+ 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
+ /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
+ 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
+ /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
+ 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
+ /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
+ 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
+ /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
+ 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
+ /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
+ 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
+ /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
+ 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
+ /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
+ 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
+ /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
+ 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
+ /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
+ 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
+ /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
+ 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
+ /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
+ 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
+ /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
+ 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
+ /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
+ 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
+ /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
+ 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
+ /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
+ 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
+ /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
+ 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
+ /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
+ 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
+ /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
+ 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
+ /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
+ 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
+ /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
+ 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
+ /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
+ 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
+ /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
+ 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
+ /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
+ 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
+ /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
+ 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
+ /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
+ 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
+ /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
+ 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
+ /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
+ 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
+ /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
+ 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
+ /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
+ 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
+ /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
+ 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
+ /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
+ 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
+ /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
+ 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
+ /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
+ 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
+ /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
+ 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
+ /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
+ 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
+ /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
+ 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
+ /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
+ 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
+ /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
+ 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
+ /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
+ 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
+ /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
+ 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
+ /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
+ 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
+ /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
+ 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
+ /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
+ 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
+ /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
+ 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
+ /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
+ 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
+ /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
+ 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
+ /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
+ 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
+ /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
+ 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
+};
+
+/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
+ * a is two 256-bit integers: a0[0:3] and a1[4:7]
+ * b is two 256-bit integers: b0[0:3] and b1[4:7]
+ */
+static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "xorl %%r14d, %%r14d ;"
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "adox %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adox %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adox %%r10, %%rbx ;"
+ /******************************************/
+ "adox %%r14, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "adox %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rax ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rbx ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rcx ;"
+ /******************************************/
+ "adox %%r14, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "adox %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rbx ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rcx ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%r15 ;"
+ /******************************************/
+ "adox %%r14, %%rax ;"
+ "adcx %%r14, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "adox %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ /******************************************/
+ "adox %%r14, %%rbx ;"
+ "adcx %%r14, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+
+ "movq 32(%1), %%rdx; " /* C[0] */
+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, 64(%0);"
+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+ "adox %%r10, %%r15 ;"
+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
+ "adox %%r8, %%rax ;"
+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+ "adox %%r10, %%rbx ;"
+ /******************************************/
+ "adox %%r14, %%rcx ;"
+
+ "movq 40(%1), %%rdx; " /* C[1] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
+ "adox %%r15, %%r8 ;"
+ "movq %%r8, 72(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rax ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rbx ;"
+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rcx ;"
+ /******************************************/
+ "adox %%r14, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+
+ "movq 48(%1), %%rdx; " /* C[2] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
+ "adox %%rax, %%r8 ;"
+ "movq %%r8, 80(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rbx ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rcx ;"
+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%r15 ;"
+ /******************************************/
+ "adox %%r14, %%rax ;"
+ "adcx %%r14, %%rax ;"
+
+ "movq 56(%1), %%rdx; " /* C[3] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
+ "adox %%rbx, %%r8 ;"
+ "movq %%r8, 88(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rcx ;"
+ "movq %%rcx, 96(%0) ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rax ;"
+ "movq %%rax, 112(%0) ;"
+ /******************************************/
+ "adox %%r14, %%rbx ;"
+ "adcx %%r14, %%rbx ;"
+ "movq %%rbx, 120(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+
+ "movq 32(%1), %%rdx; " /* C[0] */
+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
+ "movq %%r8, 64(%0) ;"
+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 40(%1), %%rdx; " /* C[1] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 48(%1), %%rdx; " /* C[2] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 80(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 56(%1), %%rdx; " /* C[3] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 88(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 96(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 112(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 120(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 24(%1), %%rdx ;" /* A[3] */
+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq (%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+
+
+ "movq 32(%1), %%rdx ;" /* B[0] */
+ "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 56(%1), %%rdx ;" /* B[3] */
+ "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 40(%1), %%rdx ;" /* B[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq 32(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
+ /*******************/
+ "movq %%rax, 64(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "movq 40(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 80(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 88(%0) ;"
+ "movq 48(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 96(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 104(%0) ;"
+ "movq 56(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 112(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 120(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+ "movq 16(%1), %%rdx ;" /* A[2] */
+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+
+ "movq 40(%1), %%rdx ;" /* B[1] */
+ "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
+ "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
+ "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
+
+ "movq 48(%1), %%rdx ;" /* B[2] */
+ "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
+ "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq 32(%1), %%rdx ;" /* B[0] */
+ "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
+ /*******************/
+ "movq %%rax, 64(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "movq 40(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 80(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 88(%0) ;"
+ "movq 48(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 96(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "movq 56(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 112(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 120(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox (%1), %%r8 ;"
+ "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 8(%1), %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 16(%1), %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 24(%1), %%r11 ;"
+ /***************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+
+ "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox 64(%1), %%r8 ;"
+ "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 72(%1), %%r9 ;"
+ "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 80(%1), %%r10 ;"
+ "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 88(%1), %%r11 ;"
+ /****************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 40(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 48(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 56(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 32(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11");
+}
+
+static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /***************************************/
+ "adcq $0, %%rcx ;"
+ "addq (%1), %%r8 ;"
+ "adcq 8(%1), %%r9 ;"
+ "adcq 16(%1), %%r10 ;"
+ "adcq 24(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+
+ "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /****************************************/
+ "adcq $0, %%rcx ;"
+ "addq 64(%1), %%r8 ;"
+ "adcq 72(%1), %%r9 ;"
+ "adcq 80(%1), %%r10 ;"
+ "adcq 88(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 40(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 48(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 56(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 32(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "movq %%r10, 8(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 8(%0), %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 16(%0), %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 24(%0), %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 32(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq %%r14, 48(%0) ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+ "movq %%rax, 56(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
+ "%r13", "%r14", "%r15");
+}
+
+static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 24(%1), %%rdx ;" /* A[3] */
+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq (%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+ "movq 16(%1), %%rdx ;" /* A[2] */
+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox (%1), %%r8 ;"
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 8(%1), %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 16(%1), %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 24(%1), %%r11 ;"
+ /***************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11");
+}
+
+static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /***************************************/
+ "adcq $0, %%rcx ;"
+ "addq (%1), %%r8 ;"
+ "adcq 8(%1), %%r9 ;"
+ "adcq 16(%1), %%r10 ;"
+ "adcq 24(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "xorl %%ecx, %%ecx ;"
+ "movq (%2), %%r8 ;"
+ "adcx (%1), %%r8 ;"
+ "movq 8(%2), %%r9 ;"
+ "adcx 8(%1), %%r9 ;"
+ "movq 16(%2), %%r10 ;"
+ "adcx 16(%1), %%r10 ;"
+ "movq 24(%2), %%r11 ;"
+ "adcx 24(%1), %%r11 ;"
+ "cmovc %%eax, %%ecx ;"
+ "xorl %%eax, %%eax ;"
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rax, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rax, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rax, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $38, %%ecx ;"
+ "cmovc %%ecx, %%eax ;"
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "movq (%2), %%r8 ;"
+ "addq (%1), %%r8 ;"
+ "movq 8(%2), %%r9 ;"
+ "adcq 8(%1), %%r9 ;"
+ "movq 16(%2), %%r10 ;"
+ "adcq 16(%1), %%r10 ;"
+ "movq 24(%2), %%r11 ;"
+ "adcq 24(%1), %%r11 ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "movq (%1), %%r8 ;"
+ "subq (%2), %%r8 ;"
+ "movq 8(%1), %%r9 ;"
+ "sbbq 8(%2), %%r9 ;"
+ "movq 16(%1), %%r10 ;"
+ "sbbq 16(%2), %%r10 ;"
+ "movq 24(%1), %%r11 ;"
+ "sbbq 24(%2), %%r11 ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "subq %%rcx, %%r8 ;"
+ "sbbq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "sbbq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "sbbq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "subq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
+static __always_inline void
+mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+{
+ const u64 a24 = 121666;
+ asm volatile(
+ "movq %2, %%rdx ;"
+ "mulx (%1), %%r8, %%r10 ;"
+ "mulx 8(%1), %%r9, %%r11 ;"
+ "addq %%r10, %%r9 ;"
+ "mulx 16(%1), %%r10, %%rax ;"
+ "adcq %%r11, %%r10 ;"
+ "mulx 24(%1), %%r11, %%rcx ;"
+ "adcq %%rax, %%r11 ;"
+ /**************************/
+ "adcq $0, %%rcx ;"
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
+ "imul %%rdx, %%rcx ;"
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(a24)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+ struct {
+ eltfp25519_1w_buffer buffer;
+ eltfp25519_1w x0, x1, x2;
+ } __aligned(32) m;
+ u64 *T[4];
+
+ T[0] = m.x0;
+ T[1] = c; /* x^(-1) */
+ T[2] = m.x1;
+ T[3] = m.x2;
+
+ copy_eltfp25519_1w(T[1], a);
+ sqrn_eltfp25519_1w_adx(T[1], 1);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_adx(T[2], 2);
+ mul_eltfp25519_1w_adx(T[0], a, T[2]);
+ mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_adx(T[2], 1);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 5);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 10);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+ copy_eltfp25519_1w(T[3], T[2]);
+ sqrn_eltfp25519_1w_adx(T[3], 20);
+ mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
+ sqrn_eltfp25519_1w_adx(T[3], 10);
+ mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
+ copy_eltfp25519_1w(T[0], T[3]);
+ sqrn_eltfp25519_1w_adx(T[0], 50);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 100);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 50);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
+ sqrn_eltfp25519_1w_adx(T[2], 5);
+ mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+ struct {
+ eltfp25519_1w_buffer buffer;
+ eltfp25519_1w x0, x1, x2;
+ } __aligned(32) m;
+ u64 *T[5];
+
+ T[0] = m.x0;
+ T[1] = c; /* x^(-1) */
+ T[2] = m.x1;
+ T[3] = m.x2;
+
+ copy_eltfp25519_1w(T[1], a);
+ sqrn_eltfp25519_1w_bmi2(T[1], 1);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 2);
+ mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 1);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 5);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 10);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+ copy_eltfp25519_1w(T[3], T[2]);
+ sqrn_eltfp25519_1w_bmi2(T[3], 20);
+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
+ sqrn_eltfp25519_1w_bmi2(T[3], 10);
+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
+ copy_eltfp25519_1w(T[0], T[3]);
+ sqrn_eltfp25519_1w_bmi2(T[0], 50);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 100);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 50);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 5);
+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
+ * with a number such that 0 <= C < 2**255-19.
+ */
+static __always_inline void fred_eltfp25519_1w(u64 *const c)
+{
+ u64 tmp0 = 38, tmp1 = 19;
+ asm volatile(
+ "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
+ "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
+
+ /* Add either 19 or 38 to c */
+ "addq %4, %0 ;"
+ "adcq $0, %1 ;"
+ "adcq $0, %2 ;"
+ "adcq $0, %3 ;"
+
+ /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
+ "movl $0, %k4 ;"
+ "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
+ "btrq $63, %3 ;" /* Clear bit 255 */
+
+ /* Subtract 19 if necessary */
+ "subq %4, %0 ;"
+ "sbbq $0, %1 ;"
+ "sbbq $0, %2 ;"
+ "sbbq $0, %3 ;"
+
+ : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
+ "+r"(tmp1)
+ :
+ : "memory", "cc");
+}
+
+static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+{
+ u64 temp;
+ asm volatile(
+ "test %9, %9 ;"
+ "movq %0, %8 ;"
+ "cmovnzq %4, %0 ;"
+ "cmovnzq %8, %4 ;"
+ "movq %1, %8 ;"
+ "cmovnzq %5, %1 ;"
+ "cmovnzq %8, %5 ;"
+ "movq %2, %8 ;"
+ "cmovnzq %6, %2 ;"
+ "cmovnzq %8, %6 ;"
+ "movq %3, %8 ;"
+ "cmovnzq %7, %3 ;"
+ "cmovnzq %8, %7 ;"
+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
+ "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
+ "=r"(temp)
+ : "r"(bit)
+ : "cc"
+ );
+}
+
+static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+{
+ asm volatile(
+ "test %4, %4 ;"
+ "cmovnzq %5, %0 ;"
+ "cmovnzq %6, %1 ;"
+ "cmovnzq %7, %2 ;"
+ "cmovnzq %8, %3 ;"
+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
+ : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
+ : "cc"
+ );
+}
+
+static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE],
+ const u8 session_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[6 * NUM_WORDS_ELTFP25519];
+ u8 session[CURVE25519_KEY_SIZE];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ int i = 0, j = 0;
+ u64 prev = 0;
+ u64 *const X1 = (u64 *)m.session;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Px = m.coordinates + 0;
+ u64 *const Pz = m.coordinates + 4;
+ u64 *const Qx = m.coordinates + 8;
+ u64 *const Qz = m.coordinates + 12;
+ u64 *const X2 = Qx;
+ u64 *const Z2 = Qz;
+ u64 *const X3 = Px;
+ u64 *const Z3 = Pz;
+ u64 *const X2Z2 = Qx;
+ u64 *const X3Z3 = Px;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const D = m.workspace + 8;
+ u64 *const C = m.workspace + 12;
+ u64 *const DA = m.workspace + 16;
+ u64 *const CB = m.workspace + 20;
+ u64 *const AB = A;
+ u64 *const DC = D;
+ u64 *const DACB = DA;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+ memcpy(m.session, session_key, sizeof(m.session));
+
+ curve25519_clamp_secret(m.private);
+
+ /* As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ */
+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+ copy_eltfp25519_1w(Px, X1);
+ setzero_eltfp25519_1w(Pz);
+ setzero_eltfp25519_1w(Qx);
+ setzero_eltfp25519_1w(Qz);
+
+ Pz[0] = 1;
+ Qx[0] = 1;
+
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; --i) {
+ while (j >= 0) {
+ u64 bit = (key[i] >> j) & 0x1;
+ u64 swap = bit ^ prev;
+ prev = bit;
+
+ add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
+ add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
+ mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
+
+ cselect(swap, A, C);
+ cselect(swap, B, D);
+
+ sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
+ add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
+ add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
+ mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ --j;
+ }
+ j = 63;
+ }
+
+ inv_eltfp25519_1w_adx(A, Qz);
+ mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
+ fred_eltfp25519_1w((u64 *)shared);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[4 * NUM_WORDS_ELTFP25519];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ const int ite[4] = { 64, 64, 64, 63 };
+ const int q = 3;
+ u64 swap = 1;
+
+ int i = 0, j = 0, k = 0;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Ur1 = m.coordinates + 0;
+ u64 *const Zr1 = m.coordinates + 4;
+ u64 *const Ur2 = m.coordinates + 8;
+ u64 *const Zr2 = m.coordinates + 12;
+
+ u64 *const UZr1 = m.coordinates + 0;
+ u64 *const ZUr2 = m.coordinates + 8;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const C = m.workspace + 8;
+ u64 *const D = m.workspace + 12;
+
+ u64 *const AB = m.workspace + 0;
+ u64 *const CD = m.workspace + 8;
+
+ const u64 *const P = table_ladder_8k;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+
+ curve25519_clamp_secret(m.private);
+
+ setzero_eltfp25519_1w(Ur1);
+ setzero_eltfp25519_1w(Zr1);
+ setzero_eltfp25519_1w(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
+
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34UL;
+ Ur2[2] = 0xadc7a0b9235d48e2UL;
+ Ur2[1] = 0xbbf095ae14b2edf8UL;
+ Ur2[0] = 0x7e94e1fec82faabdUL;
+
+ /* main-loop */
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+ while (j < ite[i]) {
+ u64 bit = (key[i] >> j) & 0x1;
+ k = (64 * i + j - q);
+ swap = swap ^ bit;
+ cswap(swap, Ur1, Ur2);
+ cswap(swap, Zr1, Zr2);
+ swap = bit;
+ /* Addition */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
+ mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ ++j;
+ }
+ j = 0;
+ }
+
+ /* Doubling */
+ for (i = 0; i < q; ++i) {
+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
+ copy_eltfp25519_1w(C, B); /* C = B */
+ sub_eltfp25519_1w(B, A, B); /* B = A-B */
+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
+ add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
+ mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_eltfp25519_1w_adx(A, Zr1);
+ mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
+ fred_eltfp25519_1w((u64 *)session_key);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE],
+ const u8 session_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[6 * NUM_WORDS_ELTFP25519];
+ u8 session[CURVE25519_KEY_SIZE];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ int i = 0, j = 0;
+ u64 prev = 0;
+ u64 *const X1 = (u64 *)m.session;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Px = m.coordinates + 0;
+ u64 *const Pz = m.coordinates + 4;
+ u64 *const Qx = m.coordinates + 8;
+ u64 *const Qz = m.coordinates + 12;
+ u64 *const X2 = Qx;
+ u64 *const Z2 = Qz;
+ u64 *const X3 = Px;
+ u64 *const Z3 = Pz;
+ u64 *const X2Z2 = Qx;
+ u64 *const X3Z3 = Px;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const D = m.workspace + 8;
+ u64 *const C = m.workspace + 12;
+ u64 *const DA = m.workspace + 16;
+ u64 *const CB = m.workspace + 20;
+ u64 *const AB = A;
+ u64 *const DC = D;
+ u64 *const DACB = DA;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+ memcpy(m.session, session_key, sizeof(m.session));
+
+ curve25519_clamp_secret(m.private);
+
+ /* As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ */
+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+ copy_eltfp25519_1w(Px, X1);
+ setzero_eltfp25519_1w(Pz);
+ setzero_eltfp25519_1w(Qx);
+ setzero_eltfp25519_1w(Qz);
+
+ Pz[0] = 1;
+ Qx[0] = 1;
+
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; --i) {
+ while (j >= 0) {
+ u64 bit = (key[i] >> j) & 0x1;
+ u64 swap = bit ^ prev;
+ prev = bit;
+
+ add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
+ add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
+ mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
+
+ cselect(swap, A, C);
+ cselect(swap, B, D);
+
+ sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
+ add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
+ add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
+ mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ --j;
+ }
+ j = 63;
+ }
+
+ inv_eltfp25519_1w_bmi2(A, Qz);
+ mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
+ fred_eltfp25519_1w((u64 *)shared);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[4 * NUM_WORDS_ELTFP25519];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ const int ite[4] = { 64, 64, 64, 63 };
+ const int q = 3;
+ u64 swap = 1;
+
+ int i = 0, j = 0, k = 0;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Ur1 = m.coordinates + 0;
+ u64 *const Zr1 = m.coordinates + 4;
+ u64 *const Ur2 = m.coordinates + 8;
+ u64 *const Zr2 = m.coordinates + 12;
+
+ u64 *const UZr1 = m.coordinates + 0;
+ u64 *const ZUr2 = m.coordinates + 8;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const C = m.workspace + 8;
+ u64 *const D = m.workspace + 12;
+
+ u64 *const AB = m.workspace + 0;
+ u64 *const CD = m.workspace + 8;
+
+ const u64 *const P = table_ladder_8k;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+
+ curve25519_clamp_secret(m.private);
+
+ setzero_eltfp25519_1w(Ur1);
+ setzero_eltfp25519_1w(Zr1);
+ setzero_eltfp25519_1w(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
+
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34UL;
+ Ur2[2] = 0xadc7a0b9235d48e2UL;
+ Ur2[1] = 0xbbf095ae14b2edf8UL;
+ Ur2[0] = 0x7e94e1fec82faabdUL;
+
+ /* main-loop */
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+ while (j < ite[i]) {
+ u64 bit = (key[i] >> j) & 0x1;
+ k = (64 * i + j - q);
+ swap = swap ^ bit;
+ cswap(swap, Ur1, Ur2);
+ cswap(swap, Zr1, Zr2);
+ swap = bit;
+ /* Addition */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
+ mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ ++j;
+ }
+ j = 0;
+ }
+
+ /* Doubling */
+ for (i = 0; i < q; ++i) {
+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
+ copy_eltfp25519_1w(C, B); /* C = B */
+ sub_eltfp25519_1w(B, A, B); /* B = A-B */
+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
+ add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
+ mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_eltfp25519_1w_bmi2(A, Zr1);
+ mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
+ fred_eltfp25519_1w((u64 *)session_key);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&curve25519_use_adx))
+ curve25519_adx(mypublic, secret, basepoint);
+ else if (static_branch_likely(&curve25519_use_bmi2))
+ curve25519_bmi2(mypublic, secret, basepoint);
+ else
+ curve25519_generic(mypublic, secret, basepoint);
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&curve25519_use_adx))
+ curve25519_adx_base(pub, secret);
+ else if (static_branch_likely(&curve25519_use_bmi2))
+ curve25519_bmi2_base(pub, secret);
+ else
+ curve25519_generic(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_generate_public_key(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+
+ if (req->src)
+ return -EINVAL;
+
+ curve25519_base_arch(buf, secret);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_shared_secret(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+
+ if (!req->src)
+ return -EINVAL;
+
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+
+ curve25519_arch(buf, secret, public_key);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-x86",
+ .base.cra_priority = 200,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_generate_public_key,
+ .compute_shared_secret = curve25519_compute_shared_secret,
+ .max_size = curve25519_max_size,
+};
+
+static int __init curve25519_mod_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_BMI2))
+ static_branch_enable(&curve25519_use_bmi2);
+ else if (boot_cpu_has(X86_FEATURE_ADX))
+ static_branch_enable(&curve25519_use_adx);
+ else
+ return 0;
+ return crypto_register_kpp(&curve25519_alg);
+}
+
+static void __exit curve25519_mod_exit(void)
+{
+ if (boot_cpu_has(X86_FEATURE_BMI2) ||
+ boot_cpu_has(X86_FEATURE_ADX))
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(curve25519_mod_init);
+module_exit(curve25519_mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a3fc859830c1..b8b738bcc312 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -269,6 +269,12 @@ config CRYPTO_CURVE25519
select CRYPTO_KPP
select CRYPTO_LIB_CURVE25519_GENERIC
+config CRYPTO_CURVE25519_X86
+ tristate "x86_64 accelerated Curve25519 scalar multiplication library"
+ depends on X86 && 64BIT
+ select CRYPTO_LIB_CURVE25519_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
comment "Authenticated Encryption with Associated Data"
config CRYPTO_CCM
--
2.18.2
From ef199be33340e0c41e7ae3bae7e90821fdff04dc Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:37 +0100
Subject: [PATCH 030/100] crypto: arm/curve25519 - import Bernstein and
Schwabe's Curve25519 ARM implementation
commit f0fb006b604f98e2309a30f34ef455ac734f7c1c upstream.
This comes from Dan Bernstein and Peter Schwabe's public domain NEON
code, and is included here in raw form so that subsequent commits that
fix these up for the kernel can see how it has changed. This code does
have some entirely cosmetic formatting differences, adding indentation
and so forth, so that when we actually port it for use in the kernel in
the subsequent commit, it's obvious what's changed in the process.
This code originates from SUPERCOP 20180818, available at
<https://bench.cr.yp.to/supercop.html>.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/curve25519-core.S | 2105 +++++++++++++++++++++++++++++
1 file changed, 2105 insertions(+)
create mode 100644 arch/arm/crypto/curve25519-core.S
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
new file mode 100644
index 000000000000..f33b85fef382
--- /dev/null
+++ b/arch/arm/crypto/curve25519-core.S
@@ -0,0 +1,2105 @@
+/*
+ * Public domain code from Daniel J. Bernstein and Peter Schwabe, from
+ * SUPERCOP's curve25519/neon2/scalarmult.s.
+ */
+
+.fpu neon
+.text
+.align 4
+.global _crypto_scalarmult_curve25519_neon2
+.global crypto_scalarmult_curve25519_neon2
+.type _crypto_scalarmult_curve25519_neon2 STT_FUNC
+.type crypto_scalarmult_curve25519_neon2 STT_FUNC
+ _crypto_scalarmult_curve25519_neon2:
+ crypto_scalarmult_curve25519_neon2:
+ vpush {q4, q5, q6, q7}
+ mov r12, sp
+ sub sp, sp, #736
+ and sp, sp, #0xffffffe0
+ strd r4, [sp, #0]
+ strd r6, [sp, #8]
+ strd r8, [sp, #16]
+ strd r10, [sp, #24]
+ str r12, [sp, #480]
+ str r14, [sp, #484]
+ mov r0, r0
+ mov r1, r1
+ mov r2, r2
+ add r3, sp, #32
+ ldr r4, =0
+ ldr r5, =254
+ vmov.i32 q0, #1
+ vshr.u64 q1, q0, #7
+ vshr.u64 q0, q0, #8
+ vmov.i32 d4, #19
+ vmov.i32 d5, #38
+ add r6, sp, #512
+ vst1.8 {d2-d3}, [r6, : 128]
+ add r6, sp, #528
+ vst1.8 {d0-d1}, [r6, : 128]
+ add r6, sp, #544
+ vst1.8 {d4-d5}, [r6, : 128]
+ add r6, r3, #0
+ vmov.i32 q2, #0
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 d4, [r6, : 64]
+ add r6, r3, #0
+ ldr r7, =960
+ sub r7, r7, #2
+ neg r7, r7
+ sub r7, r7, r7, LSL #7
+ str r7, [r6]
+ add r6, sp, #704
+ vld1.8 {d4-d5}, [r1]!
+ vld1.8 {d6-d7}, [r1]
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 {d6-d7}, [r6, : 128]
+ sub r1, r6, #16
+ ldrb r6, [r1]
+ and r6, r6, #248
+ strb r6, [r1]
+ ldrb r6, [r1, #31]
+ and r6, r6, #127
+ orr r6, r6, #64
+ strb r6, [r1, #31]
+ vmov.i64 q2, #0xffffffff
+ vshr.u64 q3, q2, #7
+ vshr.u64 q2, q2, #6
+ vld1.8 {d8}, [r2]
+ vld1.8 {d10}, [r2]
+ add r2, r2, #6
+ vld1.8 {d12}, [r2]
+ vld1.8 {d14}, [r2]
+ add r2, r2, #6
+ vld1.8 {d16}, [r2]
+ add r2, r2, #4
+ vld1.8 {d18}, [r2]
+ vld1.8 {d20}, [r2]
+ add r2, r2, #6
+ vld1.8 {d22}, [r2]
+ add r2, r2, #2
+ vld1.8 {d24}, [r2]
+ vld1.8 {d26}, [r2]
+ vshr.u64 q5, q5, #26
+ vshr.u64 q6, q6, #3
+ vshr.u64 q7, q7, #29
+ vshr.u64 q8, q8, #6
+ vshr.u64 q10, q10, #25
+ vshr.u64 q11, q11, #3
+ vshr.u64 q12, q12, #12
+ vshr.u64 q13, q13, #38
+ vand q4, q4, q2
+ vand q6, q6, q2
+ vand q8, q8, q2
+ vand q10, q10, q2
+ vand q2, q12, q2
+ vand q5, q5, q3
+ vand q7, q7, q3
+ vand q9, q9, q3
+ vand q11, q11, q3
+ vand q3, q13, q3
+ add r2, r3, #48
+ vadd.i64 q12, q4, q1
+ vadd.i64 q13, q10, q1
+ vshr.s64 q12, q12, #26
+ vshr.s64 q13, q13, #26
+ vadd.i64 q5, q5, q12
+ vshl.i64 q12, q12, #26
+ vadd.i64 q14, q5, q0
+ vadd.i64 q11, q11, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q11, q0
+ vsub.i64 q4, q4, q12
+ vshr.s64 q12, q14, #25
+ vsub.i64 q10, q10, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q12
+ vshl.i64 q12, q12, #25
+ vadd.i64 q14, q6, q1
+ vadd.i64 q2, q2, q13
+ vsub.i64 q5, q5, q12
+ vshr.s64 q12, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q1
+ vadd.i64 q7, q7, q12
+ vshl.i64 q12, q12, #26
+ vadd.i64 q15, q7, q0
+ vsub.i64 q11, q11, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q12
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q3, q0
+ vadd.i64 q8, q8, q12
+ vshl.i64 q12, q12, #25
+ vadd.i64 q15, q8, q1
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q7, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q9, q9, q12
+ vtrn.32 d12, d14
+ vshl.i64 q12, q12, #26
+ vtrn.32 d13, d15
+ vadd.i64 q0, q9, q0
+ vadd.i64 q4, q4, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q6, q13, #4
+ vsub.i64 q7, q8, q12
+ vshr.s64 q0, q0, #25
+ vadd.i64 q4, q4, q6
+ vadd.i64 q6, q10, q0
+ vshl.i64 q0, q0, #25
+ vadd.i64 q8, q6, q1
+ vadd.i64 q4, q4, q13
+ vshl.i64 q10, q13, #25
+ vadd.i64 q1, q4, q1
+ vsub.i64 q0, q9, q0
+ vshr.s64 q8, q8, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d14, d0
+ vshr.s64 q1, q1, #26
+ vtrn.32 d15, d1
+ vadd.i64 q0, q11, q8
+ vst1.8 d14, [r2, : 64]
+ vshl.i64 q7, q8, #26
+ vadd.i64 q5, q5, q1
+ vtrn.32 d4, d6
+ vshl.i64 q1, q1, #26
+ vtrn.32 d5, d7
+ vsub.i64 q3, q6, q7
+ add r2, r2, #16
+ vsub.i64 q1, q4, q1
+ vst1.8 d4, [r2, : 64]
+ vtrn.32 d6, d0
+ vtrn.32 d7, d1
+ sub r2, r2, #8
+ vtrn.32 d2, d10
+ vtrn.32 d3, d11
+ vst1.8 d6, [r2, : 64]
+ sub r2, r2, #24
+ vst1.8 d2, [r2, : 64]
+ add r2, r3, #96
+ vmov.i32 q0, #0
+ vmov.i64 d2, #0xff
+ vmov.i64 d3, #0
+ vshr.u32 q1, q1, #7
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #144
+ vmov.i32 q0, #0
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #240
+ vmov.i32 q0, #0
+ vmov.i64 d2, #0xff
+ vmov.i64 d3, #0
+ vshr.u32 q1, q1, #7
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #48
+ add r6, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 d4, [r6, : 64]
+._mainloop:
+ mov r2, r5, LSR #3
+ and r6, r5, #7
+ ldrb r2, [r1, r2]
+ mov r2, r2, LSR r6
+ and r2, r2, #1
+ str r5, [sp, #488]
+ eor r4, r4, r2
+ str r2, [sp, #492]
+ neg r2, r4
+ add r4, r3, #96
+ add r5, r3, #192
+ add r6, r3, #144
+ vld1.8 {d8-d9}, [r4, : 128]!
+ add r7, r3, #240
+ vld1.8 {d10-d11}, [r5, : 128]!
+ veor q6, q4, q5
+ vld1.8 {d14-d15}, [r6, : 128]!
+ vdup.i32 q8, r2
+ vld1.8 {d18-d19}, [r7, : 128]!
+ veor q10, q7, q9
+ vld1.8 {d22-d23}, [r4, : 128]!
+ vand q6, q6, q8
+ vld1.8 {d24-d25}, [r5, : 128]!
+ vand q10, q10, q8
+ vld1.8 {d26-d27}, [r6, : 128]!
+ veor q4, q4, q6
+ vld1.8 {d28-d29}, [r7, : 128]!
+ veor q5, q5, q6
+ vld1.8 {d0}, [r4, : 64]
+ veor q6, q7, q10
+ vld1.8 {d2}, [r5, : 64]
+ veor q7, q9, q10
+ vld1.8 {d4}, [r6, : 64]
+ veor q9, q11, q12
+ vld1.8 {d6}, [r7, : 64]
+ veor q10, q0, q1
+ sub r2, r4, #32
+ vand q9, q9, q8
+ sub r4, r5, #32
+ vand q10, q10, q8
+ sub r5, r6, #32
+ veor q11, q11, q9
+ sub r6, r7, #32
+ veor q0, q0, q10
+ veor q9, q12, q9
+ veor q1, q1, q10
+ veor q10, q13, q14
+ veor q12, q2, q3
+ vand q10, q10, q8
+ vand q8, q12, q8
+ veor q12, q13, q10
+ veor q2, q2, q8
+ veor q10, q14, q10
+ veor q3, q3, q8
+ vadd.i32 q8, q4, q6
+ vsub.i32 q4, q4, q6
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vadd.i32 q6, q11, q12
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vsub.i32 q4, q11, q12
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vadd.i32 q6, q0, q2
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vsub.i32 q0, q0, q2
+ vst1.8 d12, [r2, : 64]
+ vadd.i32 q2, q5, q7
+ vst1.8 d0, [r5, : 64]
+ vsub.i32 q0, q5, q7
+ vst1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q2, q9, q10
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vsub.i32 q0, q9, q10
+ vst1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q2, q1, q3
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vsub.i32 q0, q1, q3
+ vst1.8 d4, [r4, : 64]
+ vst1.8 d0, [r6, : 64]
+ add r2, sp, #544
+ add r4, r3, #96
+ add r5, r3, #144
+ vld1.8 {d0-d1}, [r2, : 128]
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vzip.i32 q1, q2
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vld1.8 {d8-d9}, [r5, : 128]!
+ vshl.i32 q5, q1, #1
+ vzip.i32 q3, q4
+ vshl.i32 q6, q2, #1
+ vld1.8 {d14}, [r4, : 64]
+ vshl.i32 q8, q3, #1
+ vld1.8 {d15}, [r5, : 64]
+ vshl.i32 q9, q4, #1
+ vmul.i32 d21, d7, d1
+ vtrn.32 d14, d15
+ vmul.i32 q11, q4, q0
+ vmul.i32 q0, q7, q0
+ vmull.s32 q12, d2, d2
+ vmlal.s32 q12, d11, d1
+ vmlal.s32 q12, d12, d0
+ vmlal.s32 q12, d13, d23
+ vmlal.s32 q12, d16, d22
+ vmlal.s32 q12, d7, d21
+ vmull.s32 q10, d2, d11
+ vmlal.s32 q10, d4, d1
+ vmlal.s32 q10, d13, d0
+ vmlal.s32 q10, d6, d23
+ vmlal.s32 q10, d17, d22
+ vmull.s32 q13, d10, d4
+ vmlal.s32 q13, d11, d3
+ vmlal.s32 q13, d13, d1
+ vmlal.s32 q13, d16, d0
+ vmlal.s32 q13, d17, d23
+ vmlal.s32 q13, d8, d22
+ vmull.s32 q1, d10, d5
+ vmlal.s32 q1, d11, d4
+ vmlal.s32 q1, d6, d1
+ vmlal.s32 q1, d17, d0
+ vmlal.s32 q1, d8, d23
+ vmull.s32 q14, d10, d6
+ vmlal.s32 q14, d11, d13
+ vmlal.s32 q14, d4, d4
+ vmlal.s32 q14, d17, d1
+ vmlal.s32 q14, d18, d0
+ vmlal.s32 q14, d9, d23
+ vmull.s32 q11, d10, d7
+ vmlal.s32 q11, d11, d6
+ vmlal.s32 q11, d12, d5
+ vmlal.s32 q11, d8, d1
+ vmlal.s32 q11, d19, d0
+ vmull.s32 q15, d10, d8
+ vmlal.s32 q15, d11, d17
+ vmlal.s32 q15, d12, d6
+ vmlal.s32 q15, d13, d5
+ vmlal.s32 q15, d19, d1
+ vmlal.s32 q15, d14, d0
+ vmull.s32 q2, d10, d9
+ vmlal.s32 q2, d11, d8
+ vmlal.s32 q2, d12, d7
+ vmlal.s32 q2, d13, d6
+ vmlal.s32 q2, d14, d1
+ vmull.s32 q0, d15, d1
+ vmlal.s32 q0, d10, d14
+ vmlal.s32 q0, d11, d19
+ vmlal.s32 q0, d12, d8
+ vmlal.s32 q0, d13, d17
+ vmlal.s32 q0, d6, d6
+ add r2, sp, #512
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmull.s32 q3, d16, d7
+ vmlal.s32 q3, d10, d15
+ vmlal.s32 q3, d11, d14
+ vmlal.s32 q3, d12, d9
+ vmlal.s32 q3, d13, d8
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vadd.i64 q5, q12, q9
+ vadd.i64 q6, q15, q9
+ vshr.s64 q5, q5, #26
+ vshr.s64 q6, q6, #26
+ vadd.i64 q7, q10, q5
+ vshl.i64 q5, q5, #26
+ vadd.i64 q8, q7, q4
+ vadd.i64 q2, q2, q6
+ vshl.i64 q6, q6, #26
+ vadd.i64 q10, q2, q4
+ vsub.i64 q5, q12, q5
+ vshr.s64 q8, q8, #25
+ vsub.i64 q6, q15, q6
+ vshr.s64 q10, q10, #25
+ vadd.i64 q12, q13, q8
+ vshl.i64 q8, q8, #25
+ vadd.i64 q13, q12, q9
+ vadd.i64 q0, q0, q10
+ vsub.i64 q7, q7, q8
+ vshr.s64 q8, q13, #26
+ vshl.i64 q10, q10, #25
+ vadd.i64 q13, q0, q9
+ vadd.i64 q1, q1, q8
+ vshl.i64 q8, q8, #26
+ vadd.i64 q15, q1, q4
+ vsub.i64 q2, q2, q10
+ vshr.s64 q10, q13, #26
+ vsub.i64 q8, q12, q8
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q10
+ vshl.i64 q10, q10, #26
+ vadd.i64 q13, q3, q4
+ vadd.i64 q14, q14, q12
+ add r2, r3, #288
+ vshl.i64 q12, q12, #25
+ add r4, r3, #336
+ vadd.i64 q15, q14, q9
+ add r2, r2, #8
+ vsub.i64 q0, q0, q10
+ add r4, r4, #8
+ vshr.s64 q10, q13, #25
+ vsub.i64 q1, q1, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q13, q10, q10
+ vadd.i64 q11, q11, q12
+ vtrn.32 d16, d2
+ vshl.i64 q12, q12, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q11, q4
+ vadd.i64 q4, q5, q13
+ vst1.8 d16, [r2, : 64]!
+ vshl.i64 q5, q10, #4
+ vst1.8 d17, [r4, : 64]!
+ vsub.i64 q8, q14, q12
+ vshr.s64 q1, q1, #25
+ vadd.i64 q4, q4, q5
+ vadd.i64 q5, q6, q1
+ vshl.i64 q1, q1, #25
+ vadd.i64 q6, q5, q9
+ vadd.i64 q4, q4, q10
+ vshl.i64 q10, q10, #25
+ vadd.i64 q9, q4, q9
+ vsub.i64 q1, q11, q1
+ vshr.s64 q6, q6, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d16, d2
+ vshr.s64 q9, q9, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q2, q6
+ vst1.8 d16, [r2, : 64]
+ vshl.i64 q2, q6, #26
+ vst1.8 d17, [r4, : 64]
+ vadd.i64 q6, q7, q9
+ vtrn.32 d0, d6
+ vshl.i64 q7, q9, #26
+ vtrn.32 d1, d7
+ vsub.i64 q2, q5, q2
+ add r2, r2, #16
+ vsub.i64 q3, q4, q7
+ vst1.8 d0, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d1, [r4, : 64]
+ vtrn.32 d4, d2
+ vtrn.32 d5, d3
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d6, d12
+ vtrn.32 d7, d13
+ vst1.8 d4, [r2, : 64]
+ vst1.8 d5, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d6, [r2, : 64]
+ vst1.8 d7, [r4, : 64]
+ add r2, r3, #240
+ add r4, r3, #96
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #144
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #192
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #560
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]
+ vshl.i32 q6, q13, #1
+ add r2, sp, #576
+ vst1.8 {d20-d21}, [r2, : 128]
+ vshl.i32 q10, q14, #1
+ add r2, sp, #592
+ vst1.8 {d12-d13}, [r2, : 128]
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ add r2, sp, #608
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ add r2, sp, #624
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ add r2, sp, #640
+ vst1.8 {d20-d21}, [r2, : 128]
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ add r2, sp, #656
+ vst1.8 {d10-d11}, [r2, : 128]
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ add r2, sp, #672
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #576
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ add r2, sp, #576
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #656
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #624
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #608
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ add r2, sp, #608
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #560
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #640
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #592
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #512
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #528
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #576
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #672
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #608
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #144
+ vshl.i64 q7, q7, #25
+ add r4, r3, #96
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ add r2, r3, #288
+ add r4, r3, #336
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vsub.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4-d5}, [r4, : 128]!
+ vsub.i32 q1, q1, q2
+ add r5, r3, #240
+ vld1.8 {d4}, [r2, : 64]
+ vld1.8 {d6}, [r4, : 64]
+ vsub.i32 q2, q2, q3
+ vst1.8 {d0-d1}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r5, : 128]!
+ vst1.8 d4, [r5, : 64]
+ add r2, r3, #144
+ add r4, r3, #96
+ add r5, r3, #144
+ add r6, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vsub.i32 q2, q0, q1
+ vadd.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vsub.i32 q4, q1, q3
+ vadd.i32 q1, q1, q3
+ vld1.8 {d6}, [r2, : 64]
+ vld1.8 {d10}, [r4, : 64]
+ vsub.i32 q6, q3, q5
+ vadd.i32 q3, q3, q5
+ vst1.8 {d4-d5}, [r5, : 128]!
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 d12, [r5, : 64]
+ vst1.8 d6, [r6, : 64]
+ add r2, r3, #0
+ add r4, r3, #240
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #336
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #288
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #560
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]
+ vshl.i32 q6, q13, #1
+ add r2, sp, #576
+ vst1.8 {d20-d21}, [r2, : 128]
+ vshl.i32 q10, q14, #1
+ add r2, sp, #592
+ vst1.8 {d12-d13}, [r2, : 128]
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ add r2, sp, #608
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ add r2, sp, #624
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ add r2, sp, #640
+ vst1.8 {d20-d21}, [r2, : 128]
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ add r2, sp, #656
+ vst1.8 {d10-d11}, [r2, : 128]
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ add r2, sp, #672
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #576
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ add r2, sp, #576
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #656
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #624
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #608
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ add r2, sp, #608
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #560
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #640
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #592
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #512
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #528
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #576
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #672
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #608
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #288
+ vshl.i64 q7, q7, #25
+ add r4, r3, #96
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ add r2, sp, #544
+ add r4, r3, #144
+ add r5, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vzip.i32 q1, q2
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vld1.8 {d8-d9}, [r5, : 128]!
+ vshl.i32 q5, q1, #1
+ vzip.i32 q3, q4
+ vshl.i32 q6, q2, #1
+ vld1.8 {d14}, [r4, : 64]
+ vshl.i32 q8, q3, #1
+ vld1.8 {d15}, [r5, : 64]
+ vshl.i32 q9, q4, #1
+ vmul.i32 d21, d7, d1
+ vtrn.32 d14, d15
+ vmul.i32 q11, q4, q0
+ vmul.i32 q0, q7, q0
+ vmull.s32 q12, d2, d2
+ vmlal.s32 q12, d11, d1
+ vmlal.s32 q12, d12, d0
+ vmlal.s32 q12, d13, d23
+ vmlal.s32 q12, d16, d22
+ vmlal.s32 q12, d7, d21
+ vmull.s32 q10, d2, d11
+ vmlal.s32 q10, d4, d1
+ vmlal.s32 q10, d13, d0
+ vmlal.s32 q10, d6, d23
+ vmlal.s32 q10, d17, d22
+ vmull.s32 q13, d10, d4
+ vmlal.s32 q13, d11, d3
+ vmlal.s32 q13, d13, d1
+ vmlal.s32 q13, d16, d0
+ vmlal.s32 q13, d17, d23
+ vmlal.s32 q13, d8, d22
+ vmull.s32 q1, d10, d5
+ vmlal.s32 q1, d11, d4
+ vmlal.s32 q1, d6, d1
+ vmlal.s32 q1, d17, d0
+ vmlal.s32 q1, d8, d23
+ vmull.s32 q14, d10, d6
+ vmlal.s32 q14, d11, d13
+ vmlal.s32 q14, d4, d4
+ vmlal.s32 q14, d17, d1
+ vmlal.s32 q14, d18, d0
+ vmlal.s32 q14, d9, d23
+ vmull.s32 q11, d10, d7
+ vmlal.s32 q11, d11, d6
+ vmlal.s32 q11, d12, d5
+ vmlal.s32 q11, d8, d1
+ vmlal.s32 q11, d19, d0
+ vmull.s32 q15, d10, d8
+ vmlal.s32 q15, d11, d17
+ vmlal.s32 q15, d12, d6
+ vmlal.s32 q15, d13, d5
+ vmlal.s32 q15, d19, d1
+ vmlal.s32 q15, d14, d0
+ vmull.s32 q2, d10, d9
+ vmlal.s32 q2, d11, d8
+ vmlal.s32 q2, d12, d7
+ vmlal.s32 q2, d13, d6
+ vmlal.s32 q2, d14, d1
+ vmull.s32 q0, d15, d1
+ vmlal.s32 q0, d10, d14
+ vmlal.s32 q0, d11, d19
+ vmlal.s32 q0, d12, d8
+ vmlal.s32 q0, d13, d17
+ vmlal.s32 q0, d6, d6
+ add r2, sp, #512
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmull.s32 q3, d16, d7
+ vmlal.s32 q3, d10, d15
+ vmlal.s32 q3, d11, d14
+ vmlal.s32 q3, d12, d9
+ vmlal.s32 q3, d13, d8
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vadd.i64 q5, q12, q9
+ vadd.i64 q6, q15, q9
+ vshr.s64 q5, q5, #26
+ vshr.s64 q6, q6, #26
+ vadd.i64 q7, q10, q5
+ vshl.i64 q5, q5, #26
+ vadd.i64 q8, q7, q4
+ vadd.i64 q2, q2, q6
+ vshl.i64 q6, q6, #26
+ vadd.i64 q10, q2, q4
+ vsub.i64 q5, q12, q5
+ vshr.s64 q8, q8, #25
+ vsub.i64 q6, q15, q6
+ vshr.s64 q10, q10, #25
+ vadd.i64 q12, q13, q8
+ vshl.i64 q8, q8, #25
+ vadd.i64 q13, q12, q9
+ vadd.i64 q0, q0, q10
+ vsub.i64 q7, q7, q8
+ vshr.s64 q8, q13, #26
+ vshl.i64 q10, q10, #25
+ vadd.i64 q13, q0, q9
+ vadd.i64 q1, q1, q8
+ vshl.i64 q8, q8, #26
+ vadd.i64 q15, q1, q4
+ vsub.i64 q2, q2, q10
+ vshr.s64 q10, q13, #26
+ vsub.i64 q8, q12, q8
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q10
+ vshl.i64 q10, q10, #26
+ vadd.i64 q13, q3, q4
+ vadd.i64 q14, q14, q12
+ add r2, r3, #144
+ vshl.i64 q12, q12, #25
+ add r4, r3, #192
+ vadd.i64 q15, q14, q9
+ add r2, r2, #8
+ vsub.i64 q0, q0, q10
+ add r4, r4, #8
+ vshr.s64 q10, q13, #25
+ vsub.i64 q1, q1, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q13, q10, q10
+ vadd.i64 q11, q11, q12
+ vtrn.32 d16, d2
+ vshl.i64 q12, q12, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q11, q4
+ vadd.i64 q4, q5, q13
+ vst1.8 d16, [r2, : 64]!
+ vshl.i64 q5, q10, #4
+ vst1.8 d17, [r4, : 64]!
+ vsub.i64 q8, q14, q12
+ vshr.s64 q1, q1, #25
+ vadd.i64 q4, q4, q5
+ vadd.i64 q5, q6, q1
+ vshl.i64 q1, q1, #25
+ vadd.i64 q6, q5, q9
+ vadd.i64 q4, q4, q10
+ vshl.i64 q10, q10, #25
+ vadd.i64 q9, q4, q9
+ vsub.i64 q1, q11, q1
+ vshr.s64 q6, q6, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d16, d2
+ vshr.s64 q9, q9, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q2, q6
+ vst1.8 d16, [r2, : 64]
+ vshl.i64 q2, q6, #26
+ vst1.8 d17, [r4, : 64]
+ vadd.i64 q6, q7, q9
+ vtrn.32 d0, d6
+ vshl.i64 q7, q9, #26
+ vtrn.32 d1, d7
+ vsub.i64 q2, q5, q2
+ add r2, r2, #16
+ vsub.i64 q3, q4, q7
+ vst1.8 d0, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d1, [r4, : 64]
+ vtrn.32 d4, d2
+ vtrn.32 d5, d3
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d6, d12
+ vtrn.32 d7, d13
+ vst1.8 d4, [r2, : 64]
+ vst1.8 d5, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d6, [r2, : 64]
+ vst1.8 d7, [r4, : 64]
+ add r2, r3, #336
+ add r4, r3, #288
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vadd.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q1, q1, q2
+ add r5, r3, #288
+ vld1.8 {d4}, [r2, : 64]
+ vld1.8 {d6}, [r4, : 64]
+ vadd.i32 q2, q2, q3
+ vst1.8 {d0-d1}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r5, : 128]!
+ vst1.8 d4, [r5, : 64]
+ add r2, r3, #48
+ add r4, r3, #144
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #288
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #240
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #560
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]
+ vshl.i32 q6, q13, #1
+ add r2, sp, #576
+ vst1.8 {d20-d21}, [r2, : 128]
+ vshl.i32 q10, q14, #1
+ add r2, sp, #592
+ vst1.8 {d12-d13}, [r2, : 128]
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ add r2, sp, #608
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ add r2, sp, #624
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ add r2, sp, #640
+ vst1.8 {d20-d21}, [r2, : 128]
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ add r2, sp, #656
+ vst1.8 {d10-d11}, [r2, : 128]
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ add r2, sp, #672
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #576
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ add r2, sp, #576
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #656
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #624
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #608
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ add r2, sp, #608
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #560
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #640
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #592
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #512
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #528
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #576
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #672
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #608
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #240
+ vshl.i64 q7, q7, #25
+ add r4, r3, #144
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ ldr r2, [sp, #488]
+ ldr r4, [sp, #492]
+ subs r5, r2, #1
+ bge ._mainloop
+ add r1, r3, #144
+ add r2, r3, #336
+ vld1.8 {d0-d1}, [r1, : 128]!
+ vld1.8 {d2-d3}, [r1, : 128]!
+ vld1.8 {d4}, [r1, : 64]
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 d4, [r2, : 64]
+ ldr r1, =0
+._invertloop:
+ add r2, r3, #144
+ ldr r4, =0
+ ldr r5, =2
+ cmp r1, #1
+ ldreq r5, =1
+ addeq r2, r3, #336
+ addeq r4, r3, #48
+ cmp r1, #2
+ ldreq r5, =1
+ addeq r2, r3, #48
+ cmp r1, #3
+ ldreq r5, =5
+ addeq r4, r3, #336
+ cmp r1, #4
+ ldreq r5, =10
+ cmp r1, #5
+ ldreq r5, =20
+ cmp r1, #6
+ ldreq r5, =10
+ addeq r2, r3, #336
+ addeq r4, r3, #336
+ cmp r1, #7
+ ldreq r5, =50
+ cmp r1, #8
+ ldreq r5, =100
+ cmp r1, #9
+ ldreq r5, =50
+ addeq r2, r3, #336
+ cmp r1, #10
+ ldreq r5, =5
+ addeq r2, r3, #48
+ cmp r1, #11
+ ldreq r5, =0
+ addeq r2, r3, #96
+ add r6, r3, #144
+ add r7, r3, #288
+ vld1.8 {d0-d1}, [r6, : 128]!
+ vld1.8 {d2-d3}, [r6, : 128]!
+ vld1.8 {d4}, [r6, : 64]
+ vst1.8 {d0-d1}, [r7, : 128]!
+ vst1.8 {d2-d3}, [r7, : 128]!
+ vst1.8 d4, [r7, : 64]
+ cmp r5, #0
+ beq ._skipsquaringloop
+._squaringloop:
+ add r6, r3, #288
+ add r7, r3, #288
+ add r8, r3, #288
+ vmov.i32 q0, #19
+ vmov.i32 q1, #0
+ vmov.i32 q2, #1
+ vzip.i32 q1, q2
+ vld1.8 {d4-d5}, [r7, : 128]!
+ vld1.8 {d6-d7}, [r7, : 128]!
+ vld1.8 {d9}, [r7, : 64]
+ vld1.8 {d10-d11}, [r6, : 128]!
+ add r7, sp, #416
+ vld1.8 {d12-d13}, [r6, : 128]!
+ vmul.i32 q7, q2, q0
+ vld1.8 {d8}, [r6, : 64]
+ vext.32 d17, d11, d10, #1
+ vmul.i32 q9, q3, q0
+ vext.32 d16, d10, d8, #1
+ vshl.u32 q10, q5, q1
+ vext.32 d22, d14, d4, #1
+ vext.32 d24, d18, d6, #1
+ vshl.u32 q13, q6, q1
+ vshl.u32 d28, d8, d2
+ vrev64.i32 d22, d22
+ vmul.i32 d1, d9, d1
+ vrev64.i32 d24, d24
+ vext.32 d29, d8, d13, #1
+ vext.32 d0, d1, d9, #1
+ vrev64.i32 d0, d0
+ vext.32 d2, d9, d1, #1
+ vext.32 d23, d15, d5, #1
+ vmull.s32 q4, d20, d4
+ vrev64.i32 d23, d23
+ vmlal.s32 q4, d21, d1
+ vrev64.i32 d2, d2
+ vmlal.s32 q4, d26, d19
+ vext.32 d3, d5, d15, #1
+ vmlal.s32 q4, d27, d18
+ vrev64.i32 d3, d3
+ vmlal.s32 q4, d28, d15
+ vext.32 d14, d12, d11, #1
+ vmull.s32 q5, d16, d23
+ vext.32 d15, d13, d12, #1
+ vmlal.s32 q5, d17, d4
+ vst1.8 d8, [r7, : 64]!
+ vmlal.s32 q5, d14, d1
+ vext.32 d12, d9, d8, #0
+ vmlal.s32 q5, d15, d19
+ vmov.i64 d13, #0
+ vmlal.s32 q5, d29, d18
+ vext.32 d25, d19, d7, #1
+ vmlal.s32 q6, d20, d5
+ vrev64.i32 d25, d25
+ vmlal.s32 q6, d21, d4
+ vst1.8 d11, [r7, : 64]!
+ vmlal.s32 q6, d26, d1
+ vext.32 d9, d10, d10, #0
+ vmlal.s32 q6, d27, d19
+ vmov.i64 d8, #0
+ vmlal.s32 q6, d28, d18
+ vmlal.s32 q4, d16, d24
+ vmlal.s32 q4, d17, d5
+ vmlal.s32 q4, d14, d4
+ vst1.8 d12, [r7, : 64]!
+ vmlal.s32 q4, d15, d1
+ vext.32 d10, d13, d12, #0
+ vmlal.s32 q4, d29, d19
+ vmov.i64 d11, #0
+ vmlal.s32 q5, d20, d6
+ vmlal.s32 q5, d21, d5
+ vmlal.s32 q5, d26, d4
+ vext.32 d13, d8, d8, #0
+ vmlal.s32 q5, d27, d1
+ vmov.i64 d12, #0
+ vmlal.s32 q5, d28, d19
+ vst1.8 d9, [r7, : 64]!
+ vmlal.s32 q6, d16, d25
+ vmlal.s32 q6, d17, d6
+ vst1.8 d10, [r7, : 64]
+ vmlal.s32 q6, d14, d5
+ vext.32 d8, d11, d10, #0
+ vmlal.s32 q6, d15, d4
+ vmov.i64 d9, #0
+ vmlal.s32 q6, d29, d1
+ vmlal.s32 q4, d20, d7
+ vmlal.s32 q4, d21, d6
+ vmlal.s32 q4, d26, d5
+ vext.32 d11, d12, d12, #0
+ vmlal.s32 q4, d27, d4
+ vmov.i64 d10, #0
+ vmlal.s32 q4, d28, d1
+ vmlal.s32 q5, d16, d0
+ sub r6, r7, #32
+ vmlal.s32 q5, d17, d7
+ vmlal.s32 q5, d14, d6
+ vext.32 d30, d9, d8, #0
+ vmlal.s32 q5, d15, d5
+ vld1.8 {d31}, [r6, : 64]!
+ vmlal.s32 q5, d29, d4
+ vmlal.s32 q15, d20, d0
+ vext.32 d0, d6, d18, #1
+ vmlal.s32 q15, d21, d25
+ vrev64.i32 d0, d0
+ vmlal.s32 q15, d26, d24
+ vext.32 d1, d7, d19, #1
+ vext.32 d7, d10, d10, #0
+ vmlal.s32 q15, d27, d23
+ vrev64.i32 d1, d1
+ vld1.8 {d6}, [r6, : 64]
+ vmlal.s32 q15, d28, d22
+ vmlal.s32 q3, d16, d4
+ add r6, r6, #24
+ vmlal.s32 q3, d17, d2
+ vext.32 d4, d31, d30, #0
+ vmov d17, d11
+ vmlal.s32 q3, d14, d1
+ vext.32 d11, d13, d13, #0
+ vext.32 d13, d30, d30, #0
+ vmlal.s32 q3, d15, d0
+ vext.32 d1, d8, d8, #0
+ vmlal.s32 q3, d29, d3
+ vld1.8 {d5}, [r6, : 64]
+ sub r6, r6, #16
+ vext.32 d10, d6, d6, #0
+ vmov.i32 q1, #0xffffffff
+ vshl.i64 q4, q1, #25
+ add r7, sp, #512
+ vld1.8 {d14-d15}, [r7, : 128]
+ vadd.i64 q9, q2, q7
+ vshl.i64 q1, q1, #26
+ vshr.s64 q10, q9, #26
+ vld1.8 {d0}, [r6, : 64]!
+ vadd.i64 q5, q5, q10
+ vand q9, q9, q1
+ vld1.8 {d16}, [r6, : 64]!
+ add r6, sp, #528
+ vld1.8 {d20-d21}, [r6, : 128]
+ vadd.i64 q11, q5, q10
+ vsub.i64 q2, q2, q9
+ vshr.s64 q9, q11, #25
+ vext.32 d12, d5, d4, #0
+ vand q11, q11, q4
+ vadd.i64 q0, q0, q9
+ vmov d19, d7
+ vadd.i64 q3, q0, q7
+ vsub.i64 q5, q5, q11
+ vshr.s64 q11, q3, #26
+ vext.32 d18, d11, d10, #0
+ vand q3, q3, q1
+ vadd.i64 q8, q8, q11
+ vadd.i64 q11, q8, q10
+ vsub.i64 q0, q0, q3
+ vshr.s64 q3, q11, #25
+ vand q11, q11, q4
+ vadd.i64 q3, q6, q3
+ vadd.i64 q6, q3, q7
+ vsub.i64 q8, q8, q11
+ vshr.s64 q11, q6, #26
+ vand q6, q6, q1
+ vadd.i64 q9, q9, q11
+ vadd.i64 d25, d19, d21
+ vsub.i64 q3, q3, q6
+ vshr.s64 d23, d25, #25
+ vand q4, q12, q4
+ vadd.i64 d21, d23, d23
+ vshl.i64 d25, d23, #4
+ vadd.i64 d21, d21, d23
+ vadd.i64 d25, d25, d21
+ vadd.i64 d4, d4, d25
+ vzip.i32 q0, q8
+ vadd.i64 d12, d4, d14
+ add r6, r8, #8
+ vst1.8 d0, [r6, : 64]
+ vsub.i64 d19, d19, d9
+ add r6, r6, #16
+ vst1.8 d16, [r6, : 64]
+ vshr.s64 d22, d12, #26
+ vand q0, q6, q1
+ vadd.i64 d10, d10, d22
+ vzip.i32 q3, q9
+ vsub.i64 d4, d4, d0
+ sub r6, r6, #8
+ vst1.8 d6, [r6, : 64]
+ add r6, r6, #16
+ vst1.8 d18, [r6, : 64]
+ vzip.i32 q2, q5
+ sub r6, r6, #32
+ vst1.8 d4, [r6, : 64]
+ subs r5, r5, #1
+ bhi ._squaringloop
+._skipsquaringloop:
+ mov r2, r2
+ add r5, r3, #288
+ add r6, r3, #144
+ vmov.i32 q0, #19
+ vmov.i32 q1, #0
+ vmov.i32 q2, #1
+ vzip.i32 q1, q2
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vld1.8 {d6-d7}, [r5, : 128]!
+ vld1.8 {d9}, [r5, : 64]
+ vld1.8 {d10-d11}, [r2, : 128]!
+ add r5, sp, #416
+ vld1.8 {d12-d13}, [r2, : 128]!
+ vmul.i32 q7, q2, q0
+ vld1.8 {d8}, [r2, : 64]
+ vext.32 d17, d11, d10, #1
+ vmul.i32 q9, q3, q0
+ vext.32 d16, d10, d8, #1
+ vshl.u32 q10, q5, q1
+ vext.32 d22, d14, d4, #1
+ vext.32 d24, d18, d6, #1
+ vshl.u32 q13, q6, q1
+ vshl.u32 d28, d8, d2
+ vrev64.i32 d22, d22
+ vmul.i32 d1, d9, d1
+ vrev64.i32 d24, d24
+ vext.32 d29, d8, d13, #1
+ vext.32 d0, d1, d9, #1
+ vrev64.i32 d0, d0
+ vext.32 d2, d9, d1, #1
+ vext.32 d23, d15, d5, #1
+ vmull.s32 q4, d20, d4
+ vrev64.i32 d23, d23
+ vmlal.s32 q4, d21, d1
+ vrev64.i32 d2, d2
+ vmlal.s32 q4, d26, d19
+ vext.32 d3, d5, d15, #1
+ vmlal.s32 q4, d27, d18
+ vrev64.i32 d3, d3
+ vmlal.s32 q4, d28, d15
+ vext.32 d14, d12, d11, #1
+ vmull.s32 q5, d16, d23
+ vext.32 d15, d13, d12, #1
+ vmlal.s32 q5, d17, d4
+ vst1.8 d8, [r5, : 64]!
+ vmlal.s32 q5, d14, d1
+ vext.32 d12, d9, d8, #0
+ vmlal.s32 q5, d15, d19
+ vmov.i64 d13, #0
+ vmlal.s32 q5, d29, d18
+ vext.32 d25, d19, d7, #1
+ vmlal.s32 q6, d20, d5
+ vrev64.i32 d25, d25
+ vmlal.s32 q6, d21, d4
+ vst1.8 d11, [r5, : 64]!
+ vmlal.s32 q6, d26, d1
+ vext.32 d9, d10, d10, #0
+ vmlal.s32 q6, d27, d19
+ vmov.i64 d8, #0
+ vmlal.s32 q6, d28, d18
+ vmlal.s32 q4, d16, d24
+ vmlal.s32 q4, d17, d5
+ vmlal.s32 q4, d14, d4
+ vst1.8 d12, [r5, : 64]!
+ vmlal.s32 q4, d15, d1
+ vext.32 d10, d13, d12, #0
+ vmlal.s32 q4, d29, d19
+ vmov.i64 d11, #0
+ vmlal.s32 q5, d20, d6
+ vmlal.s32 q5, d21, d5
+ vmlal.s32 q5, d26, d4
+ vext.32 d13, d8, d8, #0
+ vmlal.s32 q5, d27, d1
+ vmov.i64 d12, #0
+ vmlal.s32 q5, d28, d19
+ vst1.8 d9, [r5, : 64]!
+ vmlal.s32 q6, d16, d25
+ vmlal.s32 q6, d17, d6
+ vst1.8 d10, [r5, : 64]
+ vmlal.s32 q6, d14, d5
+ vext.32 d8, d11, d10, #0
+ vmlal.s32 q6, d15, d4
+ vmov.i64 d9, #0
+ vmlal.s32 q6, d29, d1
+ vmlal.s32 q4, d20, d7
+ vmlal.s32 q4, d21, d6
+ vmlal.s32 q4, d26, d5
+ vext.32 d11, d12, d12, #0
+ vmlal.s32 q4, d27, d4
+ vmov.i64 d10, #0
+ vmlal.s32 q4, d28, d1
+ vmlal.s32 q5, d16, d0
+ sub r2, r5, #32
+ vmlal.s32 q5, d17, d7
+ vmlal.s32 q5, d14, d6
+ vext.32 d30, d9, d8, #0
+ vmlal.s32 q5, d15, d5
+ vld1.8 {d31}, [r2, : 64]!
+ vmlal.s32 q5, d29, d4
+ vmlal.s32 q15, d20, d0
+ vext.32 d0, d6, d18, #1
+ vmlal.s32 q15, d21, d25
+ vrev64.i32 d0, d0
+ vmlal.s32 q15, d26, d24
+ vext.32 d1, d7, d19, #1
+ vext.32 d7, d10, d10, #0
+ vmlal.s32 q15, d27, d23
+ vrev64.i32 d1, d1
+ vld1.8 {d6}, [r2, : 64]
+ vmlal.s32 q15, d28, d22
+ vmlal.s32 q3, d16, d4
+ add r2, r2, #24
+ vmlal.s32 q3, d17, d2
+ vext.32 d4, d31, d30, #0
+ vmov d17, d11
+ vmlal.s32 q3, d14, d1
+ vext.32 d11, d13, d13, #0
+ vext.32 d13, d30, d30, #0
+ vmlal.s32 q3, d15, d0
+ vext.32 d1, d8, d8, #0
+ vmlal.s32 q3, d29, d3
+ vld1.8 {d5}, [r2, : 64]
+ sub r2, r2, #16
+ vext.32 d10, d6, d6, #0
+ vmov.i32 q1, #0xffffffff
+ vshl.i64 q4, q1, #25
+ add r5, sp, #512
+ vld1.8 {d14-d15}, [r5, : 128]
+ vadd.i64 q9, q2, q7
+ vshl.i64 q1, q1, #26
+ vshr.s64 q10, q9, #26
+ vld1.8 {d0}, [r2, : 64]!
+ vadd.i64 q5, q5, q10
+ vand q9, q9, q1
+ vld1.8 {d16}, [r2, : 64]!
+ add r2, sp, #528
+ vld1.8 {d20-d21}, [r2, : 128]
+ vadd.i64 q11, q5, q10
+ vsub.i64 q2, q2, q9
+ vshr.s64 q9, q11, #25
+ vext.32 d12, d5, d4, #0
+ vand q11, q11, q4
+ vadd.i64 q0, q0, q9
+ vmov d19, d7
+ vadd.i64 q3, q0, q7
+ vsub.i64 q5, q5, q11
+ vshr.s64 q11, q3, #26
+ vext.32 d18, d11, d10, #0
+ vand q3, q3, q1
+ vadd.i64 q8, q8, q11
+ vadd.i64 q11, q8, q10
+ vsub.i64 q0, q0, q3
+ vshr.s64 q3, q11, #25
+ vand q11, q11, q4
+ vadd.i64 q3, q6, q3
+ vadd.i64 q6, q3, q7
+ vsub.i64 q8, q8, q11
+ vshr.s64 q11, q6, #26
+ vand q6, q6, q1
+ vadd.i64 q9, q9, q11
+ vadd.i64 d25, d19, d21
+ vsub.i64 q3, q3, q6
+ vshr.s64 d23, d25, #25
+ vand q4, q12, q4
+ vadd.i64 d21, d23, d23
+ vshl.i64 d25, d23, #4
+ vadd.i64 d21, d21, d23
+ vadd.i64 d25, d25, d21
+ vadd.i64 d4, d4, d25
+ vzip.i32 q0, q8
+ vadd.i64 d12, d4, d14
+ add r2, r6, #8
+ vst1.8 d0, [r2, : 64]
+ vsub.i64 d19, d19, d9
+ add r2, r2, #16
+ vst1.8 d16, [r2, : 64]
+ vshr.s64 d22, d12, #26
+ vand q0, q6, q1
+ vadd.i64 d10, d10, d22
+ vzip.i32 q3, q9
+ vsub.i64 d4, d4, d0
+ sub r2, r2, #8
+ vst1.8 d6, [r2, : 64]
+ add r2, r2, #16
+ vst1.8 d18, [r2, : 64]
+ vzip.i32 q2, q5
+ sub r2, r2, #32
+ vst1.8 d4, [r2, : 64]
+ cmp r4, #0
+ beq ._skippostcopy
+ add r2, r3, #144
+ mov r4, r4
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r4, : 128]!
+ vst1.8 {d2-d3}, [r4, : 128]!
+ vst1.8 d4, [r4, : 64]
+._skippostcopy:
+ cmp r1, #1
+ bne ._skipfinalcopy
+ add r2, r3, #288
+ add r4, r3, #144
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r4, : 128]!
+ vst1.8 {d2-d3}, [r4, : 128]!
+ vst1.8 d4, [r4, : 64]
+._skipfinalcopy:
+ add r1, r1, #1
+ cmp r1, #12
+ blo ._invertloop
+ add r1, r3, #144
+ ldr r2, [r1], #4
+ ldr r3, [r1], #4
+ ldr r4, [r1], #4
+ ldr r5, [r1], #4
+ ldr r6, [r1], #4
+ ldr r7, [r1], #4
+ ldr r8, [r1], #4
+ ldr r9, [r1], #4
+ ldr r10, [r1], #4
+ ldr r1, [r1]
+ add r11, r1, r1, LSL #4
+ add r11, r11, r1, LSL #1
+ add r11, r11, #16777216
+ mov r11, r11, ASR #25
+ add r11, r11, r2
+ mov r11, r11, ASR #26
+ add r11, r11, r3
+ mov r11, r11, ASR #25
+ add r11, r11, r4
+ mov r11, r11, ASR #26
+ add r11, r11, r5
+ mov r11, r11, ASR #25
+ add r11, r11, r6
+ mov r11, r11, ASR #26
+ add r11, r11, r7
+ mov r11, r11, ASR #25
+ add r11, r11, r8
+ mov r11, r11, ASR #26
+ add r11, r11, r9
+ mov r11, r11, ASR #25
+ add r11, r11, r10
+ mov r11, r11, ASR #26
+ add r11, r11, r1
+ mov r11, r11, ASR #25
+ add r2, r2, r11
+ add r2, r2, r11, LSL #1
+ add r2, r2, r11, LSL #4
+ mov r11, r2, ASR #26
+ add r3, r3, r11
+ sub r2, r2, r11, LSL #26
+ mov r11, r3, ASR #25
+ add r4, r4, r11
+ sub r3, r3, r11, LSL #25
+ mov r11, r4, ASR #26
+ add r5, r5, r11
+ sub r4, r4, r11, LSL #26
+ mov r11, r5, ASR #25
+ add r6, r6, r11
+ sub r5, r5, r11, LSL #25
+ mov r11, r6, ASR #26
+ add r7, r7, r11
+ sub r6, r6, r11, LSL #26
+ mov r11, r7, ASR #25
+ add r8, r8, r11
+ sub r7, r7, r11, LSL #25
+ mov r11, r8, ASR #26
+ add r9, r9, r11
+ sub r8, r8, r11, LSL #26
+ mov r11, r9, ASR #25
+ add r10, r10, r11
+ sub r9, r9, r11, LSL #25
+ mov r11, r10, ASR #26
+ add r1, r1, r11
+ sub r10, r10, r11, LSL #26
+ mov r11, r1, ASR #25
+ sub r1, r1, r11, LSL #25
+ add r2, r2, r3, LSL #26
+ mov r3, r3, LSR #6
+ add r3, r3, r4, LSL #19
+ mov r4, r4, LSR #13
+ add r4, r4, r5, LSL #13
+ mov r5, r5, LSR #19
+ add r5, r5, r6, LSL #6
+ add r6, r7, r8, LSL #25
+ mov r7, r8, LSR #7
+ add r7, r7, r9, LSL #19
+ mov r8, r9, LSR #13
+ add r8, r8, r10, LSL #12
+ mov r9, r10, LSR #20
+ add r1, r9, r1, LSL #6
+ str r2, [r0], #4
+ str r3, [r0], #4
+ str r4, [r0], #4
+ str r5, [r0], #4
+ str r6, [r0], #4
+ str r7, [r0], #4
+ str r8, [r0], #4
+ str r1, [r0]
+ ldrd r4, [sp, #0]
+ ldrd r6, [sp, #8]
+ ldrd r8, [sp, #16]
+ ldrd r10, [sp, #24]
+ ldr r12, [sp, #480]
+ ldr r14, [sp, #484]
+ ldr r0, =0
+ mov sp, r12
+ vpop {q4, q5, q6, q7}
+ bx lr
--
2.18.2
From 69668393c502f10dbd39d1c89f312b3bdce30763 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:38 +0100
Subject: [PATCH 031/100] crypto: arm/curve25519 - wire up NEON implementation
commit d8f1308a025fc7e00414194ed742d5f05a21e13c upstream.
This ports the SUPERCOP implementation for usage in kernel space. In
addition to the usual header, macro, and style changes required for
kernel space, it makes a few small changes to the code:
- The stack alignment is relaxed to 16 bytes.
- Superfluous mov statements have been removed.
- ldr for constants has been replaced with movw.
- ldreq has been replaced with moveq.
- The str epilogue has been made more idiomatic.
- SIMD registers are not pushed and popped at the beginning and end.
- The prologue and epilogue have been made idiomatic.
- A hole has been removed from the stack, saving 32 bytes.
- We write-back the base register whenever possible for vld1.8.
- Some multiplications have been reordered for better A7 performance.
There are more opportunities for cleanup, since this code is from qhasm,
which doesn't always do the most opportune thing. But even prior to
extensive hand optimizations, this code delivers significant performance
improvements (given in get_cycles() per call):
----------- -------------
| generic C | this commit |
------------ ----------- -------------
| Cortex-A7 | 49136 | 22395 |
------------ ----------- -------------
| Cortex-A17 | 17326 | 4983 |
------------ ----------- -------------
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
[ardb: - move to arch/arm/crypto
- wire into lib/crypto framework
- implement crypto API KPP hooks ]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/Kconfig | 6 +
arch/arm/crypto/Makefile | 2 +
arch/arm/crypto/curve25519-core.S | 347 +++++++++++++-----------------
arch/arm/crypto/curve25519-glue.c | 127 +++++++++++
4 files changed, 287 insertions(+), 195 deletions(-)
create mode 100644 arch/arm/crypto/curve25519-glue.c
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 2e8a9289bded..61fa7e4aa8f9 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -141,4 +141,10 @@ config CRYPTO_NHPOLY1305_NEON
depends on KERNEL_MODE_NEON
select CRYPTO_NHPOLY1305
+config CRYPTO_CURVE25519_NEON
+ tristate "NEON accelerated Curve25519 scalar multiplication library"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_LIB_CURVE25519_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4f6a8a81dabc..7700385cec9f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
+obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -58,6 +59,7 @@ chacha-neon-y := chacha-scalar-core.o chacha-glue.o
chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
+curve25519-neon-y := curve25519-core.o curve25519-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
index f33b85fef382..be18af52e7dc 100644
--- a/arch/arm/crypto/curve25519-core.S
+++ b/arch/arm/crypto/curve25519-core.S
@@ -1,43 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Public domain code from Daniel J. Bernstein and Peter Schwabe, from
- * SUPERCOP's curve25519/neon2/scalarmult.s.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
*/
-.fpu neon
+#include <linux/linkage.h>
+
.text
+.fpu neon
+.arch armv7-a
.align 4
-.global _crypto_scalarmult_curve25519_neon2
-.global crypto_scalarmult_curve25519_neon2
-.type _crypto_scalarmult_curve25519_neon2 STT_FUNC
-.type crypto_scalarmult_curve25519_neon2 STT_FUNC
- _crypto_scalarmult_curve25519_neon2:
- crypto_scalarmult_curve25519_neon2:
- vpush {q4, q5, q6, q7}
- mov r12, sp
- sub sp, sp, #736
- and sp, sp, #0xffffffe0
- strd r4, [sp, #0]
- strd r6, [sp, #8]
- strd r8, [sp, #16]
- strd r10, [sp, #24]
- str r12, [sp, #480]
- str r14, [sp, #484]
- mov r0, r0
- mov r1, r1
- mov r2, r2
- add r3, sp, #32
- ldr r4, =0
- ldr r5, =254
+
+ENTRY(curve25519_neon)
+ push {r4-r11, lr}
+ mov ip, sp
+ sub r3, sp, #704
+ and r3, r3, #0xfffffff0
+ mov sp, r3
+ movw r4, #0
+ movw r5, #254
vmov.i32 q0, #1
vshr.u64 q1, q0, #7
vshr.u64 q0, q0, #8
vmov.i32 d4, #19
vmov.i32 d5, #38
- add r6, sp, #512
- vst1.8 {d2-d3}, [r6, : 128]
- add r6, sp, #528
- vst1.8 {d0-d1}, [r6, : 128]
- add r6, sp, #544
+ add r6, sp, #480
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 {d0-d1}, [r6, : 128]!
vst1.8 {d4-d5}, [r6, : 128]
add r6, r3, #0
vmov.i32 q2, #0
@@ -45,12 +37,12 @@
vst1.8 {d4-d5}, [r6, : 128]!
vst1.8 d4, [r6, : 64]
add r6, r3, #0
- ldr r7, =960
+ movw r7, #960
sub r7, r7, #2
neg r7, r7
sub r7, r7, r7, LSL #7
str r7, [r6]
- add r6, sp, #704
+ add r6, sp, #672
vld1.8 {d4-d5}, [r1]!
vld1.8 {d6-d7}, [r1]
vst1.8 {d4-d5}, [r6, : 128]!
@@ -212,15 +204,15 @@
vst1.8 {d0-d1}, [r6, : 128]!
vst1.8 {d2-d3}, [r6, : 128]!
vst1.8 d4, [r6, : 64]
-._mainloop:
+.Lmainloop:
mov r2, r5, LSR #3
and r6, r5, #7
ldrb r2, [r1, r2]
mov r2, r2, LSR r6
and r2, r2, #1
- str r5, [sp, #488]
+ str r5, [sp, #456]
eor r4, r4, r2
- str r2, [sp, #492]
+ str r2, [sp, #460]
neg r2, r4
add r4, r3, #96
add r5, r3, #192
@@ -291,7 +283,7 @@
vsub.i32 q0, q1, q3
vst1.8 d4, [r4, : 64]
vst1.8 d0, [r6, : 64]
- add r2, sp, #544
+ add r2, sp, #512
add r4, r3, #96
add r5, r3, #144
vld1.8 {d0-d1}, [r2, : 128]
@@ -361,14 +353,13 @@
vmlal.s32 q0, d12, d8
vmlal.s32 q0, d13, d17
vmlal.s32 q0, d6, d6
- add r2, sp, #512
- vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d18-d19}, [r2, : 128]!
vmull.s32 q3, d16, d7
vmlal.s32 q3, d10, d15
vmlal.s32 q3, d11, d14
vmlal.s32 q3, d12, d9
vmlal.s32 q3, d13, d8
- add r2, sp, #528
vld1.8 {d8-d9}, [r2, : 128]
vadd.i64 q5, q12, q9
vadd.i64 q6, q15, q9
@@ -502,22 +493,19 @@
vadd.i32 q5, q5, q0
vtrn.32 q11, q14
vadd.i32 q6, q6, q3
- add r2, sp, #560
+ add r2, sp, #528
vadd.i32 q10, q10, q2
vtrn.32 d24, d25
- vst1.8 {d12-d13}, [r2, : 128]
+ vst1.8 {d12-d13}, [r2, : 128]!
vshl.i32 q6, q13, #1
- add r2, sp, #576
- vst1.8 {d20-d21}, [r2, : 128]
+ vst1.8 {d20-d21}, [r2, : 128]!
vshl.i32 q10, q14, #1
- add r2, sp, #592
- vst1.8 {d12-d13}, [r2, : 128]
+ vst1.8 {d12-d13}, [r2, : 128]!
vshl.i32 q15, q12, #1
vadd.i32 q8, q8, q4
vext.32 d10, d31, d30, #0
vadd.i32 q7, q7, q1
- add r2, sp, #608
- vst1.8 {d16-d17}, [r2, : 128]
+ vst1.8 {d16-d17}, [r2, : 128]!
vmull.s32 q8, d18, d5
vmlal.s32 q8, d26, d4
vmlal.s32 q8, d19, d9
@@ -528,8 +516,7 @@
vmlal.s32 q8, d29, d1
vmlal.s32 q8, d24, d6
vmlal.s32 q8, d25, d0
- add r2, sp, #624
- vst1.8 {d14-d15}, [r2, : 128]
+ vst1.8 {d14-d15}, [r2, : 128]!
vmull.s32 q2, d18, d4
vmlal.s32 q2, d12, d9
vmlal.s32 q2, d13, d8
@@ -537,8 +524,7 @@
vmlal.s32 q2, d22, d2
vmlal.s32 q2, d23, d1
vmlal.s32 q2, d24, d0
- add r2, sp, #640
- vst1.8 {d20-d21}, [r2, : 128]
+ vst1.8 {d20-d21}, [r2, : 128]!
vmull.s32 q7, d18, d9
vmlal.s32 q7, d26, d3
vmlal.s32 q7, d19, d8
@@ -547,14 +533,12 @@
vmlal.s32 q7, d28, d1
vmlal.s32 q7, d23, d6
vmlal.s32 q7, d29, d0
- add r2, sp, #656
- vst1.8 {d10-d11}, [r2, : 128]
+ vst1.8 {d10-d11}, [r2, : 128]!
vmull.s32 q5, d18, d3
vmlal.s32 q5, d19, d2
vmlal.s32 q5, d22, d1
vmlal.s32 q5, d23, d0
vmlal.s32 q5, d12, d8
- add r2, sp, #672
vst1.8 {d16-d17}, [r2, : 128]
vmull.s32 q4, d18, d8
vmlal.s32 q4, d26, d2
@@ -566,7 +550,7 @@
vmlal.s32 q8, d26, d1
vmlal.s32 q8, d19, d6
vmlal.s32 q8, d27, d0
- add r2, sp, #576
+ add r2, sp, #544
vld1.8 {d20-d21}, [r2, : 128]
vmlal.s32 q7, d24, d21
vmlal.s32 q7, d25, d20
@@ -575,32 +559,30 @@
vmlal.s32 q8, d22, d21
vmlal.s32 q8, d28, d20
vmlal.s32 q5, d24, d20
- add r2, sp, #576
vst1.8 {d14-d15}, [r2, : 128]
vmull.s32 q7, d18, d6
vmlal.s32 q7, d26, d0
- add r2, sp, #656
+ add r2, sp, #624
vld1.8 {d30-d31}, [r2, : 128]
vmlal.s32 q2, d30, d21
vmlal.s32 q7, d19, d21
vmlal.s32 q7, d27, d20
- add r2, sp, #624
+ add r2, sp, #592
vld1.8 {d26-d27}, [r2, : 128]
vmlal.s32 q4, d25, d27
vmlal.s32 q8, d29, d27
vmlal.s32 q8, d25, d26
vmlal.s32 q7, d28, d27
vmlal.s32 q7, d29, d26
- add r2, sp, #608
+ add r2, sp, #576
vld1.8 {d28-d29}, [r2, : 128]
vmlal.s32 q4, d24, d29
vmlal.s32 q8, d23, d29
vmlal.s32 q8, d24, d28
vmlal.s32 q7, d22, d29
vmlal.s32 q7, d23, d28
- add r2, sp, #608
vst1.8 {d8-d9}, [r2, : 128]
- add r2, sp, #560
+ add r2, sp, #528
vld1.8 {d8-d9}, [r2, : 128]
vmlal.s32 q7, d24, d9
vmlal.s32 q7, d25, d31
@@ -621,36 +603,36 @@
vmlal.s32 q0, d23, d26
vmlal.s32 q0, d24, d31
vmlal.s32 q0, d19, d20
- add r2, sp, #640
+ add r2, sp, #608
vld1.8 {d18-d19}, [r2, : 128]
vmlal.s32 q2, d18, d7
- vmlal.s32 q2, d19, d6
vmlal.s32 q5, d18, d6
- vmlal.s32 q5, d19, d21
vmlal.s32 q1, d18, d21
- vmlal.s32 q1, d19, d29
vmlal.s32 q0, d18, d28
- vmlal.s32 q0, d19, d9
vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
vmlal.s32 q6, d19, d28
- add r2, sp, #592
+ add r2, sp, #560
vld1.8 {d18-d19}, [r2, : 128]
- add r2, sp, #512
+ add r2, sp, #480
vld1.8 {d22-d23}, [r2, : 128]
vmlal.s32 q5, d19, d7
vmlal.s32 q0, d18, d21
vmlal.s32 q0, d19, d29
vmlal.s32 q6, d18, d6
- add r2, sp, #528
+ add r2, sp, #496
vld1.8 {d6-d7}, [r2, : 128]
vmlal.s32 q6, d19, d21
- add r2, sp, #576
+ add r2, sp, #544
vld1.8 {d18-d19}, [r2, : 128]
vmlal.s32 q0, d30, d8
- add r2, sp, #672
+ add r2, sp, #640
vld1.8 {d20-d21}, [r2, : 128]
vmlal.s32 q5, d30, d29
- add r2, sp, #608
+ add r2, sp, #576
vld1.8 {d24-d25}, [r2, : 128]
vmlal.s32 q1, d30, d28
vadd.i64 q13, q0, q11
@@ -823,22 +805,19 @@
vadd.i32 q5, q5, q0
vtrn.32 q11, q14
vadd.i32 q6, q6, q3
- add r2, sp, #560
+ add r2, sp, #528
vadd.i32 q10, q10, q2
vtrn.32 d24, d25
- vst1.8 {d12-d13}, [r2, : 128]
+ vst1.8 {d12-d13}, [r2, : 128]!
vshl.i32 q6, q13, #1
- add r2, sp, #576
- vst1.8 {d20-d21}, [r2, : 128]
+ vst1.8 {d20-d21}, [r2, : 128]!
vshl.i32 q10, q14, #1
- add r2, sp, #592
- vst1.8 {d12-d13}, [r2, : 128]
+ vst1.8 {d12-d13}, [r2, : 128]!
vshl.i32 q15, q12, #1
vadd.i32 q8, q8, q4
vext.32 d10, d31, d30, #0
vadd.i32 q7, q7, q1
- add r2, sp, #608
- vst1.8 {d16-d17}, [r2, : 128]
+ vst1.8 {d16-d17}, [r2, : 128]!
vmull.s32 q8, d18, d5
vmlal.s32 q8, d26, d4
vmlal.s32 q8, d19, d9
@@ -849,8 +828,7 @@
vmlal.s32 q8, d29, d1
vmlal.s32 q8, d24, d6
vmlal.s32 q8, d25, d0
- add r2, sp, #624
- vst1.8 {d14-d15}, [r2, : 128]
+ vst1.8 {d14-d15}, [r2, : 128]!
vmull.s32 q2, d18, d4
vmlal.s32 q2, d12, d9
vmlal.s32 q2, d13, d8
@@ -858,8 +836,7 @@
vmlal.s32 q2, d22, d2
vmlal.s32 q2, d23, d1
vmlal.s32 q2, d24, d0
- add r2, sp, #640
- vst1.8 {d20-d21}, [r2, : 128]
+ vst1.8 {d20-d21}, [r2, : 128]!
vmull.s32 q7, d18, d9
vmlal.s32 q7, d26, d3
vmlal.s32 q7, d19, d8
@@ -868,15 +845,13 @@
vmlal.s32 q7, d28, d1
vmlal.s32 q7, d23, d6
vmlal.s32 q7, d29, d0
- add r2, sp, #656
- vst1.8 {d10-d11}, [r2, : 128]
+ vst1.8 {d10-d11}, [r2, : 128]!
vmull.s32 q5, d18, d3
vmlal.s32 q5, d19, d2
vmlal.s32 q5, d22, d1
vmlal.s32 q5, d23, d0
vmlal.s32 q5, d12, d8
- add r2, sp, #672
- vst1.8 {d16-d17}, [r2, : 128]
+ vst1.8 {d16-d17}, [r2, : 128]!
vmull.s32 q4, d18, d8
vmlal.s32 q4, d26, d2
vmlal.s32 q4, d19, d7
@@ -887,7 +862,7 @@
vmlal.s32 q8, d26, d1
vmlal.s32 q8, d19, d6
vmlal.s32 q8, d27, d0
- add r2, sp, #576
+ add r2, sp, #544
vld1.8 {d20-d21}, [r2, : 128]
vmlal.s32 q7, d24, d21
vmlal.s32 q7, d25, d20
@@ -896,32 +871,30 @@
vmlal.s32 q8, d22, d21
vmlal.s32 q8, d28, d20
vmlal.s32 q5, d24, d20
- add r2, sp, #576
vst1.8 {d14-d15}, [r2, : 128]
vmull.s32 q7, d18, d6
vmlal.s32 q7, d26, d0
- add r2, sp, #656
+ add r2, sp, #624
vld1.8 {d30-d31}, [r2, : 128]
vmlal.s32 q2, d30, d21
vmlal.s32 q7, d19, d21
vmlal.s32 q7, d27, d20
- add r2, sp, #624
+ add r2, sp, #592
vld1.8 {d26-d27}, [r2, : 128]
vmlal.s32 q4, d25, d27
vmlal.s32 q8, d29, d27
vmlal.s32 q8, d25, d26
vmlal.s32 q7, d28, d27
vmlal.s32 q7, d29, d26
- add r2, sp, #608
+ add r2, sp, #576
vld1.8 {d28-d29}, [r2, : 128]
vmlal.s32 q4, d24, d29
vmlal.s32 q8, d23, d29
vmlal.s32 q8, d24, d28
vmlal.s32 q7, d22, d29
vmlal.s32 q7, d23, d28
- add r2, sp, #608
vst1.8 {d8-d9}, [r2, : 128]
- add r2, sp, #560
+ add r2, sp, #528
vld1.8 {d8-d9}, [r2, : 128]
vmlal.s32 q7, d24, d9
vmlal.s32 q7, d25, d31
@@ -942,36 +915,36 @@
vmlal.s32 q0, d23, d26
vmlal.s32 q0, d24, d31
vmlal.s32 q0, d19, d20
- add r2, sp, #640
+ add r2, sp, #608
vld1.8 {d18-d19}, [r2, : 128]
vmlal.s32 q2, d18, d7
- vmlal.s32 q2, d19, d6
vmlal.s32 q5, d18, d6
- vmlal.s32 q5, d19, d21
vmlal.s32 q1, d18, d21
- vmlal.s32 q1, d19, d29
vmlal.s32 q0, d18, d28
- vmlal.s32 q0, d19, d9
vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
vmlal.s32 q6, d19, d28
- add r2, sp, #592
+ add r2, sp, #560
vld1.8 {d18-d19}, [r2, : 128]
- add r2, sp, #512
+ add r2, sp, #480
vld1.8 {d22-d23}, [r2, : 128]
vmlal.s32 q5, d19, d7
vmlal.s32 q0, d18, d21
vmlal.s32 q0, d19, d29
vmlal.s32 q6, d18, d6
- add r2, sp, #528
+ add r2, sp, #496
vld1.8 {d6-d7}, [r2, : 128]
vmlal.s32 q6, d19, d21
- add r2, sp, #576
+ add r2, sp, #544
vld1.8 {d18-d19}, [r2, : 128]
vmlal.s32 q0, d30, d8
- add r2, sp, #672
+ add r2, sp, #640
vld1.8 {d20-d21}, [r2, : 128]
vmlal.s32 q5, d30, d29
- add r2, sp, #608
+ add r2, sp, #576
vld1.8 {d24-d25}, [r2, : 128]
vmlal.s32 q1, d30, d28
vadd.i64 q13, q0, q11
@@ -1069,7 +1042,7 @@
sub r4, r4, #24
vst1.8 d0, [r2, : 64]
vst1.8 d1, [r4, : 64]
- add r2, sp, #544
+ add r2, sp, #512
add r4, r3, #144
add r5, r3, #192
vld1.8 {d0-d1}, [r2, : 128]
@@ -1139,14 +1112,13 @@
vmlal.s32 q0, d12, d8
vmlal.s32 q0, d13, d17
vmlal.s32 q0, d6, d6
- add r2, sp, #512
- vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d18-d19}, [r2, : 128]!
vmull.s32 q3, d16, d7
vmlal.s32 q3, d10, d15
vmlal.s32 q3, d11, d14
vmlal.s32 q3, d12, d9
vmlal.s32 q3, d13, d8
- add r2, sp, #528
vld1.8 {d8-d9}, [r2, : 128]
vadd.i64 q5, q12, q9
vadd.i64 q6, q15, q9
@@ -1295,22 +1267,19 @@
vadd.i32 q5, q5, q0
vtrn.32 q11, q14
vadd.i32 q6, q6, q3
- add r2, sp, #560
+ add r2, sp, #528
vadd.i32 q10, q10, q2
vtrn.32 d24, d25
- vst1.8 {d12-d13}, [r2, : 128]
+ vst1.8 {d12-d13}, [r2, : 128]!
vshl.i32 q6, q13, #1
- add r2, sp, #576
- vst1.8 {d20-d21}, [r2, : 128]
+ vst1.8 {d20-d21}, [r2, : 128]!
vshl.i32 q10, q14, #1
- add r2, sp, #592
- vst1.8 {d12-d13}, [r2, : 128]
+ vst1.8 {d12-d13}, [r2, : 128]!
vshl.i32 q15, q12, #1
vadd.i32 q8, q8, q4
vext.32 d10, d31, d30, #0
vadd.i32 q7, q7, q1
- add r2, sp, #608
- vst1.8 {d16-d17}, [r2, : 128]
+ vst1.8 {d16-d17}, [r2, : 128]!
vmull.s32 q8, d18, d5
vmlal.s32 q8, d26, d4
vmlal.s32 q8, d19, d9
@@ -1321,8 +1290,7 @@
vmlal.s32 q8, d29, d1
vmlal.s32 q8, d24, d6
vmlal.s32 q8, d25, d0
- add r2, sp, #624
- vst1.8 {d14-d15}, [r2, : 128]
+ vst1.8 {d14-d15}, [r2, : 128]!
vmull.s32 q2, d18, d4
vmlal.s32 q2, d12, d9
vmlal.s32 q2, d13, d8
@@ -1330,8 +1298,7 @@
vmlal.s32 q2, d22, d2
vmlal.s32 q2, d23, d1
vmlal.s32 q2, d24, d0
- add r2, sp, #640
- vst1.8 {d20-d21}, [r2, : 128]
+ vst1.8 {d20-d21}, [r2, : 128]!
vmull.s32 q7, d18, d9
vmlal.s32 q7, d26, d3
vmlal.s32 q7, d19, d8
@@ -1340,15 +1307,13 @@
vmlal.s32 q7, d28, d1
vmlal.s32 q7, d23, d6
vmlal.s32 q7, d29, d0
- add r2, sp, #656
- vst1.8 {d10-d11}, [r2, : 128]
+ vst1.8 {d10-d11}, [r2, : 128]!
vmull.s32 q5, d18, d3
vmlal.s32 q5, d19, d2
vmlal.s32 q5, d22, d1
vmlal.s32 q5, d23, d0
vmlal.s32 q5, d12, d8
- add r2, sp, #672
- vst1.8 {d16-d17}, [r2, : 128]
+ vst1.8 {d16-d17}, [r2, : 128]!
vmull.s32 q4, d18, d8
vmlal.s32 q4, d26, d2
vmlal.s32 q4, d19, d7
@@ -1359,7 +1324,7 @@
vmlal.s32 q8, d26, d1
vmlal.s32 q8, d19, d6
vmlal.s32 q8, d27, d0
- add r2, sp, #576
+ add r2, sp, #544
vld1.8 {d20-d21}, [r2, : 128]
vmlal.s32 q7, d24, d21
vmlal.s32 q7, d25, d20
@@ -1368,32 +1333,30 @@
vmlal.s32 q8, d22, d21
vmlal.s32 q8, d28, d20
vmlal.s32 q5, d24, d20
- add r2, sp, #576
vst1.8 {d14-d15}, [r2, : 128]
vmull.s32 q7, d18, d6
vmlal.s32 q7, d26, d0
- add r2, sp, #656
+ add r2, sp, #624
vld1.8 {d30-d31}, [r2, : 128]
vmlal.s32 q2, d30, d21
vmlal.s32 q7, d19, d21
vmlal.s32 q7, d27, d20
- add r2, sp, #624
+ add r2, sp, #592
vld1.8 {d26-d27}, [r2, : 128]
vmlal.s32 q4, d25, d27
vmlal.s32 q8, d29, d27
vmlal.s32 q8, d25, d26
vmlal.s32 q7, d28, d27
vmlal.s32 q7, d29, d26
- add r2, sp, #608
+ add r2, sp, #576
vld1.8 {d28-d29}, [r2, : 128]
vmlal.s32 q4, d24, d29
vmlal.s32 q8, d23, d29
vmlal.s32 q8, d24, d28
vmlal.s32 q7, d22, d29
vmlal.s32 q7, d23, d28
- add r2, sp, #608
vst1.8 {d8-d9}, [r2, : 128]
- add r2, sp, #560
+ add r2, sp, #528
vld1.8 {d8-d9}, [r2, : 128]
vmlal.s32 q7, d24, d9
vmlal.s32 q7, d25, d31
@@ -1414,36 +1377,36 @@
vmlal.s32 q0, d23, d26
vmlal.s32 q0, d24, d31
vmlal.s32 q0, d19, d20
- add r2, sp, #640
+ add r2, sp, #608
vld1.8 {d18-d19}, [r2, : 128]
vmlal.s32 q2, d18, d7
- vmlal.s32 q2, d19, d6
vmlal.s32 q5, d18, d6
- vmlal.s32 q5, d19, d21
vmlal.s32 q1, d18, d21
- vmlal.s32 q1, d19, d29
vmlal.s32 q0, d18, d28
- vmlal.s32 q0, d19, d9
vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
vmlal.s32 q6, d19, d28
- add r2, sp, #592
+ add r2, sp, #560
vld1.8 {d18-d19}, [r2, : 128]
- add r2, sp, #512
+ add r2, sp, #480
vld1.8 {d22-d23}, [r2, : 128]
vmlal.s32 q5, d19, d7
vmlal.s32 q0, d18, d21
vmlal.s32 q0, d19, d29
vmlal.s32 q6, d18, d6
- add r2, sp, #528
+ add r2, sp, #496
vld1.8 {d6-d7}, [r2, : 128]
vmlal.s32 q6, d19, d21
- add r2, sp, #576
+ add r2, sp, #544
vld1.8 {d18-d19}, [r2, : 128]
vmlal.s32 q0, d30, d8
- add r2, sp, #672
+ add r2, sp, #640
vld1.8 {d20-d21}, [r2, : 128]
vmlal.s32 q5, d30, d29
- add r2, sp, #608
+ add r2, sp, #576
vld1.8 {d24-d25}, [r2, : 128]
vmlal.s32 q1, d30, d28
vadd.i64 q13, q0, q11
@@ -1541,10 +1504,10 @@
sub r4, r4, #24
vst1.8 d0, [r2, : 64]
vst1.8 d1, [r4, : 64]
- ldr r2, [sp, #488]
- ldr r4, [sp, #492]
+ ldr r2, [sp, #456]
+ ldr r4, [sp, #460]
subs r5, r2, #1
- bge ._mainloop
+ bge .Lmainloop
add r1, r3, #144
add r2, r3, #336
vld1.8 {d0-d1}, [r1, : 128]!
@@ -1553,41 +1516,41 @@
vst1.8 {d0-d1}, [r2, : 128]!
vst1.8 {d2-d3}, [r2, : 128]!
vst1.8 d4, [r2, : 64]
- ldr r1, =0
-._invertloop:
+ movw r1, #0
+.Linvertloop:
add r2, r3, #144
- ldr r4, =0
- ldr r5, =2
+ movw r4, #0
+ movw r5, #2
cmp r1, #1
- ldreq r5, =1
+ moveq r5, #1
addeq r2, r3, #336
addeq r4, r3, #48
cmp r1, #2
- ldreq r5, =1
+ moveq r5, #1
addeq r2, r3, #48
cmp r1, #3
- ldreq r5, =5
+ moveq r5, #5
addeq r4, r3, #336
cmp r1, #4
- ldreq r5, =10
+ moveq r5, #10
cmp r1, #5
- ldreq r5, =20
+ moveq r5, #20
cmp r1, #6
- ldreq r5, =10
+ moveq r5, #10
addeq r2, r3, #336
addeq r4, r3, #336
cmp r1, #7
- ldreq r5, =50
+ moveq r5, #50
cmp r1, #8
- ldreq r5, =100
+ moveq r5, #100
cmp r1, #9
- ldreq r5, =50
+ moveq r5, #50
addeq r2, r3, #336
cmp r1, #10
- ldreq r5, =5
+ moveq r5, #5
addeq r2, r3, #48
cmp r1, #11
- ldreq r5, =0
+ moveq r5, #0
addeq r2, r3, #96
add r6, r3, #144
add r7, r3, #288
@@ -1598,8 +1561,8 @@
vst1.8 {d2-d3}, [r7, : 128]!
vst1.8 d4, [r7, : 64]
cmp r5, #0
- beq ._skipsquaringloop
-._squaringloop:
+ beq .Lskipsquaringloop
+.Lsquaringloop:
add r6, r3, #288
add r7, r3, #288
add r8, r3, #288
@@ -1611,7 +1574,7 @@
vld1.8 {d6-d7}, [r7, : 128]!
vld1.8 {d9}, [r7, : 64]
vld1.8 {d10-d11}, [r6, : 128]!
- add r7, sp, #416
+ add r7, sp, #384
vld1.8 {d12-d13}, [r6, : 128]!
vmul.i32 q7, q2, q0
vld1.8 {d8}, [r6, : 64]
@@ -1726,7 +1689,7 @@
vext.32 d10, d6, d6, #0
vmov.i32 q1, #0xffffffff
vshl.i64 q4, q1, #25
- add r7, sp, #512
+ add r7, sp, #480
vld1.8 {d14-d15}, [r7, : 128]
vadd.i64 q9, q2, q7
vshl.i64 q1, q1, #26
@@ -1735,7 +1698,7 @@
vadd.i64 q5, q5, q10
vand q9, q9, q1
vld1.8 {d16}, [r6, : 64]!
- add r6, sp, #528
+ add r6, sp, #496
vld1.8 {d20-d21}, [r6, : 128]
vadd.i64 q11, q5, q10
vsub.i64 q2, q2, q9
@@ -1789,8 +1752,8 @@
sub r6, r6, #32
vst1.8 d4, [r6, : 64]
subs r5, r5, #1
- bhi ._squaringloop
-._skipsquaringloop:
+ bhi .Lsquaringloop
+.Lskipsquaringloop:
mov r2, r2
add r5, r3, #288
add r6, r3, #144
@@ -1802,7 +1765,7 @@
vld1.8 {d6-d7}, [r5, : 128]!
vld1.8 {d9}, [r5, : 64]
vld1.8 {d10-d11}, [r2, : 128]!
- add r5, sp, #416
+ add r5, sp, #384
vld1.8 {d12-d13}, [r2, : 128]!
vmul.i32 q7, q2, q0
vld1.8 {d8}, [r2, : 64]
@@ -1917,7 +1880,7 @@
vext.32 d10, d6, d6, #0
vmov.i32 q1, #0xffffffff
vshl.i64 q4, q1, #25
- add r5, sp, #512
+ add r5, sp, #480
vld1.8 {d14-d15}, [r5, : 128]
vadd.i64 q9, q2, q7
vshl.i64 q1, q1, #26
@@ -1926,7 +1889,7 @@
vadd.i64 q5, q5, q10
vand q9, q9, q1
vld1.8 {d16}, [r2, : 64]!
- add r2, sp, #528
+ add r2, sp, #496
vld1.8 {d20-d21}, [r2, : 128]
vadd.i64 q11, q5, q10
vsub.i64 q2, q2, q9
@@ -1980,7 +1943,7 @@
sub r2, r2, #32
vst1.8 d4, [r2, : 64]
cmp r4, #0
- beq ._skippostcopy
+ beq .Lskippostcopy
add r2, r3, #144
mov r4, r4
vld1.8 {d0-d1}, [r2, : 128]!
@@ -1989,9 +1952,9 @@
vst1.8 {d0-d1}, [r4, : 128]!
vst1.8 {d2-d3}, [r4, : 128]!
vst1.8 d4, [r4, : 64]
-._skippostcopy:
+.Lskippostcopy:
cmp r1, #1
- bne ._skipfinalcopy
+ bne .Lskipfinalcopy
add r2, r3, #288
add r4, r3, #144
vld1.8 {d0-d1}, [r2, : 128]!
@@ -2000,10 +1963,10 @@
vst1.8 {d0-d1}, [r4, : 128]!
vst1.8 {d2-d3}, [r4, : 128]!
vst1.8 d4, [r4, : 64]
-._skipfinalcopy:
+.Lskipfinalcopy:
add r1, r1, #1
cmp r1, #12
- blo ._invertloop
+ blo .Linvertloop
add r1, r3, #144
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -2085,21 +2048,15 @@
add r8, r8, r10, LSL #12
mov r9, r10, LSR #20
add r1, r9, r1, LSL #6
- str r2, [r0], #4
- str r3, [r0], #4
- str r4, [r0], #4
- str r5, [r0], #4
- str r6, [r0], #4
- str r7, [r0], #4
- str r8, [r0], #4
- str r1, [r0]
- ldrd r4, [sp, #0]
- ldrd r6, [sp, #8]
- ldrd r8, [sp, #16]
- ldrd r10, [sp, #24]
- ldr r12, [sp, #480]
- ldr r14, [sp, #484]
- ldr r0, =0
- mov sp, r12
- vpop {q4, q5, q6, q7}
- bx lr
+ str r2, [r0]
+ str r3, [r0, #4]
+ str r4, [r0, #8]
+ str r5, [r0, #12]
+ str r6, [r0, #16]
+ str r7, [r0, #20]
+ str r8, [r0, #24]
+ str r1, [r0, #28]
+ movw r0, #0
+ mov sp, ip
+ pop {r4-r11, pc}
+ENDPROC(curve25519_neon)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
new file mode 100644
index 000000000000..2e9e12d2f642
--- /dev/null
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/simd.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <crypto/curve25519.h>
+
+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE]);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
+ const u8 scalar[CURVE25519_KEY_SIZE],
+ const u8 point[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+ kernel_neon_begin();
+ curve25519_neon(out, scalar, point);
+ kernel_neon_end();
+ } else {
+ curve25519_generic(out, scalar, point);
+ }
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+ u8 const *bp;
+
+ if (req->src) {
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+ bp = public_key;
+ } else {
+ bp = curve25519_base_point;
+ }
+
+ curve25519_arch(buf, secret, bp);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-neon",
+ .base.cra_priority = 200,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_compute_value,
+ .compute_shared_secret = curve25519_compute_value,
+ .max_size = curve25519_max_size,
+};
+
+static int __init mod_init(void)
+{
+ if (elf_hwcap & HWCAP_NEON) {
+ static_branch_enable(&have_neon);
+ return crypto_register_kpp(&curve25519_alg);
+ }
+ return 0;
+}
+
+static void __exit mod_exit(void)
+{
+ if (elf_hwcap & HWCAP_NEON)
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-neon");
+MODULE_LICENSE("GPL v2");
--
2.18.2
From 3c53ca73117d858f90c1a41d8e78a6b5d569d2e6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:39 +0100
Subject: [PATCH 032/100] crypto: chacha20poly1305 - import construction and
selftest from Zinc
commit ed20078b7e3331e82828be357147af6a3282e4ce upstream.
This incorporates the chacha20poly1305 from the Zinc library, retaining
the library interface, but replacing the implementation with calls into
the code that already existed in the kernel's crypto API.
Note that this library API does not implement RFC7539 fully, given that
it is limited to 64-bit nonces. (The 96-bit nonce version that was part
of the selftest only has been removed, along with the 96-bit nonce test
vectors that only tested the selftest but not the actual library itself)
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/crypto/chacha20poly1305.h | 37 +
lib/crypto/Kconfig | 7 +
lib/crypto/Makefile | 4 +
lib/crypto/chacha20poly1305-selftest.c | 7348 ++++++++++++++++++++++++
lib/crypto/chacha20poly1305.c | 219 +
5 files changed, 7615 insertions(+)
create mode 100644 include/crypto/chacha20poly1305.h
create mode 100644 lib/crypto/chacha20poly1305-selftest.c
create mode 100644 lib/crypto/chacha20poly1305.c
diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
new file mode 100644
index 000000000000..ad3b1de58df8
--- /dev/null
+++ b/include/crypto/chacha20poly1305.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef __CHACHA20POLY1305_H
+#define __CHACHA20POLY1305_H
+
+#include <linux/types.h>
+
+enum chacha20poly1305_lengths {
+ XCHACHA20POLY1305_NONCE_SIZE = 24,
+ CHACHA20POLY1305_KEY_SIZE = 32,
+ CHACHA20POLY1305_AUTHTAG_SIZE = 16
+};
+
+void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+bool __must_check
+chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len, const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+bool __must_check xchacha20poly1305_decrypt(
+ u8 *dst, const u8 *src, const size_t src_len, const u8 *ad,
+ const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+#endif /* __CHACHA20POLY1305_H */
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index b1d830dc1c9e..0b2c4fce26d9 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -119,5 +119,12 @@ config CRYPTO_LIB_POLY1305
by either the generic implementation or an arch-specific one, if one
is available and enabled.
+config CRYPTO_LIB_CHACHA20POLY1305
+ tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)"
+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
+ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
+ select CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_POLY1305
+
config CRYPTO_LIB_SHA256
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 273c55d5e147..34a701ab8b92 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -16,6 +16,9 @@ libblake2s-generic-y += blake2s-generic.o
obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
libblake2s-y += blake2s.o
+obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
+libchacha20poly1305-y += chacha20poly1305.o
+
obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
libcurve25519-y := curve25519-fiat32.o
libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
@@ -32,4 +35,5 @@ libsha256-y := sha256.o
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
libblake2s-y += blake2s-selftest.o
+libchacha20poly1305-y += chacha20poly1305-selftest.o
endif
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
new file mode 100644
index 000000000000..d1ed0f27cfdb
--- /dev/null
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -0,0 +1,7348 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/chacha20poly1305.h>
+#include <crypto/poly1305.h>
+
+#include <asm/unaligned.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+struct chacha20poly1305_testvec {
+ const u8 *input, *output, *assoc, *nonce, *key;
+ size_t ilen, alen, nlen;
+ bool failure;
+};
+
+/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539
+ * 2.8.2. After they are generated by reference implementations. And the final
+ * marked ones are taken from wycheproof, but we only do these for the encrypt
+ * side, because mostly we're stressing the primitives rather than the actual
+ * chapoly construction.
+ */
+
+static const u8 enc_input001[] __initconst = {
+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
+ 0x9d
+};
+static const u8 enc_output001[] __initconst = {
+ 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4,
+ 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd,
+ 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89,
+ 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2,
+ 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee,
+ 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0,
+ 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00,
+ 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf,
+ 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce,
+ 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81,
+ 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd,
+ 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55,
+ 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61,
+ 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38,
+ 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0,
+ 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4,
+ 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46,
+ 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9,
+ 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e,
+ 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e,
+ 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15,
+ 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a,
+ 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea,
+ 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a,
+ 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99,
+ 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e,
+ 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10,
+ 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10,
+ 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94,
+ 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30,
+ 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf,
+ 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29,
+ 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70,
+ 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb,
+ 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f,
+ 0x38
+};
+static const u8 enc_assoc001[] __initconst = {
+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x4e, 0x91
+};
+static const u8 enc_nonce001[] __initconst = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
+};
+static const u8 enc_key001[] __initconst = {
+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
+};
+
+static const u8 enc_input002[] __initconst = { };
+static const u8 enc_output002[] __initconst = {
+ 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1,
+ 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92
+};
+static const u8 enc_assoc002[] __initconst = { };
+static const u8 enc_nonce002[] __initconst = {
+ 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e
+};
+static const u8 enc_key002[] __initconst = {
+ 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
+ 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86,
+ 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef,
+ 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68
+};
+
+static const u8 enc_input003[] __initconst = { };
+static const u8 enc_output003[] __initconst = {
+ 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6,
+ 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77
+};
+static const u8 enc_assoc003[] __initconst = {
+ 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b
+};
+static const u8 enc_nonce003[] __initconst = {
+ 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d
+};
+static const u8 enc_key003[] __initconst = {
+ 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
+ 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a,
+ 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08,
+ 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d
+};
+
+static const u8 enc_input004[] __initconst = {
+ 0xa4
+};
+static const u8 enc_output004[] __initconst = {
+ 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2,
+ 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac,
+ 0x89
+};
+static const u8 enc_assoc004[] __initconst = {
+ 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40
+};
+static const u8 enc_nonce004[] __initconst = {
+ 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4
+};
+static const u8 enc_key004[] __initconst = {
+ 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8,
+ 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1,
+ 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d,
+ 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e
+};
+
+static const u8 enc_input005[] __initconst = {
+ 0x2d
+};
+static const u8 enc_output005[] __initconst = {
+ 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e,
+ 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c,
+ 0xac
+};
+static const u8 enc_assoc005[] __initconst = { };
+static const u8 enc_nonce005[] __initconst = {
+ 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30
+};
+static const u8 enc_key005[] __initconst = {
+ 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31,
+ 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87,
+ 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01,
+ 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87
+};
+
+static const u8 enc_input006[] __initconst = {
+ 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a,
+ 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92,
+ 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37,
+ 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50,
+ 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec,
+ 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb,
+ 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66,
+ 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb,
+ 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b,
+ 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e,
+ 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3,
+ 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0,
+ 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb,
+ 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41,
+ 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc,
+ 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde,
+ 0x8f
+};
+static const u8 enc_output006[] __initconst = {
+ 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1,
+ 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15,
+ 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c,
+ 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda,
+ 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11,
+ 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8,
+ 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc,
+ 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3,
+ 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5,
+ 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02,
+ 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93,
+ 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78,
+ 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1,
+ 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66,
+ 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc,
+ 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0,
+ 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d,
+ 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a,
+ 0xeb
+};
+static const u8 enc_assoc006[] __initconst = {
+ 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b
+};
+static const u8 enc_nonce006[] __initconst = {
+ 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c
+};
+static const u8 enc_key006[] __initconst = {
+ 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae,
+ 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78,
+ 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9,
+ 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01
+};
+
+static const u8 enc_input007[] __initconst = {
+ 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5,
+ 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a,
+ 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1,
+ 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17,
+ 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c,
+ 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1,
+ 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51,
+ 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1,
+ 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86,
+ 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a,
+ 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a,
+ 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98,
+ 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36,
+ 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34,
+ 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57,
+ 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84,
+ 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4,
+ 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80,
+ 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82,
+ 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5,
+ 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d,
+ 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c,
+ 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf,
+ 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc,
+ 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3,
+ 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14,
+ 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81,
+ 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77,
+ 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3,
+ 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2,
+ 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b,
+ 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3
+};
+static const u8 enc_output007[] __initconst = {
+ 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c,
+ 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8,
+ 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c,
+ 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb,
+ 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0,
+ 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21,
+ 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70,
+ 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac,
+ 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99,
+ 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9,
+ 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f,
+ 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7,
+ 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53,
+ 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12,
+ 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6,
+ 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0,
+ 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54,
+ 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6,
+ 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e,
+ 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb,
+ 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30,
+ 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f,
+ 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2,
+ 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e,
+ 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34,
+ 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39,
+ 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7,
+ 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9,
+ 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82,
+ 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04,
+ 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34,
+ 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef,
+ 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42,
+ 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53
+};
+static const u8 enc_assoc007[] __initconst = { };
+static const u8 enc_nonce007[] __initconst = {
+ 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0
+};
+static const u8 enc_key007[] __initconst = {
+ 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd,
+ 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c,
+ 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80,
+ 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01
+};
+
+static const u8 enc_input008[] __initconst = {
+ 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10,
+ 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2,
+ 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c,
+ 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb,
+ 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12,
+ 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa,
+ 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6,
+ 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4,
+ 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91,
+ 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb,
+ 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47,
+ 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15,
+ 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f,
+ 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a,
+ 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3,
+ 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97,
+ 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80,
+ 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e,
+ 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f,
+ 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10,
+ 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a,
+ 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0,
+ 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35,
+ 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d,
+ 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d,
+ 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57,
+ 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4,
+ 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f,
+ 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39,
+ 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda,
+ 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17,
+ 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43,
+ 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19,
+ 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09,
+ 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21,
+ 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07,
+ 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f,
+ 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b,
+ 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a,
+ 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed,
+ 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2,
+ 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca,
+ 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff,
+ 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b,
+ 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b,
+ 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b,
+ 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6,
+ 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04,
+ 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48,
+ 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b,
+ 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13,
+ 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8,
+ 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f,
+ 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0,
+ 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92,
+ 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a,
+ 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41,
+ 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17,
+ 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30,
+ 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20,
+ 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49,
+ 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a,
+ 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b,
+ 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3
+};
+static const u8 enc_output008[] __initconst = {
+ 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd,
+ 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1,
+ 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93,
+ 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d,
+ 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c,
+ 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6,
+ 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4,
+ 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5,
+ 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84,
+ 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd,
+ 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed,
+ 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab,
+ 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13,
+ 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49,
+ 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6,
+ 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8,
+ 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2,
+ 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94,
+ 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18,
+ 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60,
+ 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8,
+ 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b,
+ 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f,
+ 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c,
+ 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20,
+ 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff,
+ 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9,
+ 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c,
+ 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9,
+ 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6,
+ 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea,
+ 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e,
+ 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82,
+ 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1,
+ 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70,
+ 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1,
+ 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c,
+ 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7,
+ 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc,
+ 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc,
+ 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3,
+ 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb,
+ 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97,
+ 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f,
+ 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39,
+ 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f,
+ 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d,
+ 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2,
+ 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d,
+ 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96,
+ 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b,
+ 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20,
+ 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95,
+ 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb,
+ 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35,
+ 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62,
+ 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9,
+ 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6,
+ 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8,
+ 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a,
+ 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93,
+ 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14,
+ 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99,
+ 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86,
+ 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f,
+ 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54
+};
+static const u8 enc_assoc008[] __initconst = { };
+static const u8 enc_nonce008[] __initconst = {
+ 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02
+};
+static const u8 enc_key008[] __initconst = {
+ 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53,
+ 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0,
+ 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86,
+ 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba
+};
+
+static const u8 enc_input009[] __initconst = {
+ 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b,
+ 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8,
+ 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca,
+ 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09,
+ 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5,
+ 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85,
+ 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44,
+ 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97,
+ 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77,
+ 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41,
+ 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c,
+ 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00,
+ 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82,
+ 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f,
+ 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e,
+ 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55,
+ 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab,
+ 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17,
+ 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e,
+ 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f,
+ 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82,
+ 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3,
+ 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f,
+ 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0,
+ 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08,
+ 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b,
+ 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85,
+ 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28,
+ 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c,
+ 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62,
+ 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2,
+ 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3,
+ 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62,
+ 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40,
+ 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f,
+ 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b,
+ 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91,
+ 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5,
+ 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c,
+ 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4,
+ 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49,
+ 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04,
+ 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03,
+ 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa,
+ 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec,
+ 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6,
+ 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69,
+ 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36,
+ 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8,
+ 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf,
+ 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe,
+ 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82,
+ 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab,
+ 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d,
+ 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3,
+ 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5,
+ 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34,
+ 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49,
+ 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f,
+ 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d,
+ 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42,
+ 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef,
+ 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27,
+ 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52,
+ 0x65
+};
+static const u8 enc_output009[] __initconst = {
+ 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf,
+ 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66,
+ 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72,
+ 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd,
+ 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28,
+ 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe,
+ 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06,
+ 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5,
+ 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7,
+ 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09,
+ 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a,
+ 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00,
+ 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62,
+ 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb,
+ 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2,
+ 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28,
+ 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e,
+ 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a,
+ 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6,
+ 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83,
+ 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9,
+ 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a,
+ 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79,
+ 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a,
+ 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea,
+ 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b,
+ 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52,
+ 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb,
+ 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89,
+ 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad,
+ 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19,
+ 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71,
+ 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d,
+ 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54,
+ 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a,
+ 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d,
+ 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95,
+ 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42,
+ 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16,
+ 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6,
+ 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf,
+ 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d,
+ 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f,
+ 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b,
+ 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e,
+ 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4,
+ 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c,
+ 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4,
+ 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1,
+ 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb,
+ 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff,
+ 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2,
+ 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06,
+ 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66,
+ 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90,
+ 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55,
+ 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc,
+ 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8,
+ 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62,
+ 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba,
+ 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2,
+ 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89,
+ 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06,
+ 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90,
+ 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf,
+ 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8,
+ 0xae
+};
+static const u8 enc_assoc009[] __initconst = {
+ 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e,
+ 0xef
+};
+static const u8 enc_nonce009[] __initconst = {
+ 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78
+};
+static const u8 enc_key009[] __initconst = {
+ 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5,
+ 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86,
+ 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2,
+ 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b
+};
+
+static const u8 enc_input010[] __initconst = {
+ 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf,
+ 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c,
+ 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22,
+ 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc,
+ 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16,
+ 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7,
+ 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4,
+ 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d,
+ 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5,
+ 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46,
+ 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82,
+ 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b,
+ 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a,
+ 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf,
+ 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca,
+ 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95,
+ 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09,
+ 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3,
+ 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3,
+ 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f,
+ 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58,
+ 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad,
+ 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde,
+ 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44,
+ 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a,
+ 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9,
+ 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26,
+ 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc,
+ 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74,
+ 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b,
+ 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93,
+ 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37,
+ 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f,
+ 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d,
+ 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca,
+ 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73,
+ 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f,
+ 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1,
+ 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9,
+ 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76,
+ 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac,
+ 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7,
+ 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce,
+ 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30,
+ 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb,
+ 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa,
+ 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd,
+ 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f,
+ 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb,
+ 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34,
+ 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e,
+ 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f,
+ 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53,
+ 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41,
+ 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e,
+ 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d,
+ 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27,
+ 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e,
+ 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8,
+ 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a,
+ 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12,
+ 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3,
+ 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66,
+ 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0,
+ 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c,
+ 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4,
+ 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49,
+ 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90,
+ 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11,
+ 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c,
+ 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b,
+ 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74,
+ 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c,
+ 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27,
+ 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1,
+ 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27,
+ 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88,
+ 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27,
+ 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b,
+ 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39,
+ 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7,
+ 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc,
+ 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe,
+ 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5,
+ 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf,
+ 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05,
+ 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73,
+ 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda,
+ 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe,
+ 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71,
+ 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed,
+ 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d,
+ 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33,
+ 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f,
+ 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a,
+ 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa,
+ 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e,
+ 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e,
+ 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87,
+ 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5,
+ 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4,
+ 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38,
+ 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34,
+ 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f,
+ 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36,
+ 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69,
+ 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44,
+ 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5,
+ 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce,
+ 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd,
+ 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27,
+ 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f,
+ 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8,
+ 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a,
+ 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5,
+ 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca,
+ 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e,
+ 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92,
+ 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13,
+ 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf,
+ 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6,
+ 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3,
+ 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b,
+ 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d,
+ 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f,
+ 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40,
+ 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c,
+ 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f
+};
+static const u8 enc_output010[] __initconst = {
+ 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b,
+ 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74,
+ 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1,
+ 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd,
+ 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6,
+ 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5,
+ 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96,
+ 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02,
+ 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30,
+ 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57,
+ 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53,
+ 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65,
+ 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71,
+ 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9,
+ 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18,
+ 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce,
+ 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a,
+ 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69,
+ 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2,
+ 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95,
+ 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49,
+ 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e,
+ 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a,
+ 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a,
+ 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e,
+ 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19,
+ 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b,
+ 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75,
+ 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d,
+ 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d,
+ 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f,
+ 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a,
+ 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d,
+ 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5,
+ 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c,
+ 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77,
+ 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46,
+ 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43,
+ 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe,
+ 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8,
+ 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76,
+ 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47,
+ 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8,
+ 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32,
+ 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59,
+ 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae,
+ 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a,
+ 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3,
+ 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74,
+ 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75,
+ 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2,
+ 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e,
+ 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2,
+ 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9,
+ 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1,
+ 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07,
+ 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79,
+ 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71,
+ 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad,
+ 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a,
+ 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c,
+ 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9,
+ 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79,
+ 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27,
+ 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90,
+ 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe,
+ 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99,
+ 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1,
+ 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9,
+ 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0,
+ 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28,
+ 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e,
+ 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20,
+ 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60,
+ 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47,
+ 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68,
+ 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe,
+ 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33,
+ 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8,
+ 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38,
+ 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7,
+ 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04,
+ 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c,
+ 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f,
+ 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c,
+ 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77,
+ 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54,
+ 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5,
+ 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4,
+ 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2,
+ 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e,
+ 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27,
+ 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f,
+ 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92,
+ 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55,
+ 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe,
+ 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04,
+ 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4,
+ 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56,
+ 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02,
+ 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2,
+ 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8,
+ 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27,
+ 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47,
+ 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10,
+ 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43,
+ 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0,
+ 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee,
+ 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47,
+ 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6,
+ 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d,
+ 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c,
+ 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3,
+ 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b,
+ 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09,
+ 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d,
+ 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1,
+ 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd,
+ 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4,
+ 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63,
+ 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87,
+ 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd,
+ 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e,
+ 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a,
+ 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c,
+ 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38,
+ 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a,
+ 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5,
+ 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9,
+ 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0
+};
+static const u8 enc_assoc010[] __initconst = {
+ 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27,
+ 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2
+};
+static const u8 enc_nonce010[] __initconst = {
+ 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30
+};
+static const u8 enc_key010[] __initconst = {
+ 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44,
+ 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf,
+ 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74,
+ 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7
+};
+
+static const u8 enc_input011[] __initconst = {
+ 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b,
+ 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b,
+ 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d,
+ 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee,
+ 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30,
+ 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20,
+ 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f,
+ 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e,
+ 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66,
+ 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46,
+ 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35,
+ 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6,
+ 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0,
+ 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15,
+ 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13,
+ 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7,
+ 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3,
+ 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37,
+ 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc,
+ 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95,
+ 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8,
+ 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac,
+ 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45,
+ 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf,
+ 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d,
+ 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc,
+ 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45,
+ 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a,
+ 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec,
+ 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e,
+ 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10,
+ 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8,
+ 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66,
+ 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0,
+ 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62,
+ 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b,
+ 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4,
+ 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96,
+ 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7,
+ 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74,
+ 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8,
+ 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b,
+ 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70,
+ 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95,
+ 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3,
+ 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9,
+ 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d,
+ 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e,
+ 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32,
+ 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5,
+ 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80,
+ 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3,
+ 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad,
+ 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d,
+ 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20,
+ 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17,
+ 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6,
+ 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d,
+ 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82,
+ 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c,
+ 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9,
+ 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb,
+ 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96,
+ 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9,
+ 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f,
+ 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40,
+ 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc,
+ 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce,
+ 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71,
+ 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f,
+ 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35,
+ 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90,
+ 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8,
+ 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01,
+ 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1,
+ 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe,
+ 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4,
+ 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf,
+ 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9,
+ 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f,
+ 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04,
+ 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7,
+ 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15,
+ 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc,
+ 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0,
+ 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae,
+ 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb,
+ 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed,
+ 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51,
+ 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52,
+ 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84,
+ 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5,
+ 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4,
+ 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e,
+ 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74,
+ 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f,
+ 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13,
+ 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea,
+ 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b,
+ 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef,
+ 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09,
+ 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe,
+ 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1,
+ 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9,
+ 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15,
+ 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a,
+ 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab,
+ 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36,
+ 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd,
+ 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde,
+ 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd,
+ 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47,
+ 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5,
+ 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69,
+ 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21,
+ 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98,
+ 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07,
+ 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57,
+ 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd,
+ 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03,
+ 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11,
+ 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96,
+ 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91,
+ 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d,
+ 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0,
+ 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9,
+ 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42,
+ 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a,
+ 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18,
+ 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc,
+ 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce,
+ 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc,
+ 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0,
+ 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf,
+ 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7,
+ 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80,
+ 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c,
+ 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82,
+ 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9,
+ 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20,
+ 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58,
+ 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6,
+ 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc,
+ 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50,
+ 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86,
+ 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a,
+ 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80,
+ 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec,
+ 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08,
+ 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c,
+ 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde,
+ 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d,
+ 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17,
+ 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f,
+ 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26,
+ 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96,
+ 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97,
+ 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6,
+ 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55,
+ 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e,
+ 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88,
+ 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5,
+ 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b,
+ 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15,
+ 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1,
+ 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4,
+ 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3,
+ 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf,
+ 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e,
+ 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb,
+ 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76,
+ 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5,
+ 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c,
+ 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde,
+ 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f,
+ 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51,
+ 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9,
+ 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99,
+ 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6,
+ 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04,
+ 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31,
+ 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a,
+ 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56,
+ 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e,
+ 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78,
+ 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a,
+ 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7,
+ 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb,
+ 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6,
+ 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8,
+ 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc,
+ 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84,
+ 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86,
+ 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76,
+ 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a,
+ 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73,
+ 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8,
+ 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6,
+ 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2,
+ 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56,
+ 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb,
+ 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab,
+ 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76,
+ 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69,
+ 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d,
+ 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc,
+ 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22,
+ 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39,
+ 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6,
+ 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9,
+ 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f,
+ 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1,
+ 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83,
+ 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc,
+ 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4,
+ 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59,
+ 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68,
+ 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef,
+ 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1,
+ 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3,
+ 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44,
+ 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09,
+ 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8,
+ 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a,
+ 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d,
+ 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae,
+ 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2,
+ 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10,
+ 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a,
+ 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34,
+ 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f,
+ 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9,
+ 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b,
+ 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d,
+ 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57,
+ 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03,
+ 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87,
+ 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca,
+ 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53,
+ 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f,
+ 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61,
+ 0x10, 0x1e, 0xbf, 0xec, 0xa8
+};
+static const u8 enc_output011[] __initconst = {
+ 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8,
+ 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc,
+ 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74,
+ 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73,
+ 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e,
+ 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9,
+ 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e,
+ 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd,
+ 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57,
+ 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19,
+ 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f,
+ 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45,
+ 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e,
+ 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39,
+ 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03,
+ 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f,
+ 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0,
+ 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce,
+ 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb,
+ 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52,
+ 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21,
+ 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a,
+ 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35,
+ 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91,
+ 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b,
+ 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e,
+ 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19,
+ 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07,
+ 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18,
+ 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96,
+ 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68,
+ 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4,
+ 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57,
+ 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c,
+ 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23,
+ 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8,
+ 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6,
+ 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40,
+ 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab,
+ 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb,
+ 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea,
+ 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8,
+ 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31,
+ 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0,
+ 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc,
+ 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94,
+ 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1,
+ 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46,
+ 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6,
+ 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7,
+ 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71,
+ 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a,
+ 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33,
+ 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38,
+ 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23,
+ 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb,
+ 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65,
+ 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73,
+ 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8,
+ 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb,
+ 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a,
+ 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca,
+ 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5,
+ 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71,
+ 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8,
+ 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d,
+ 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6,
+ 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d,
+ 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7,
+ 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5,
+ 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8,
+ 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd,
+ 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29,
+ 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22,
+ 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5,
+ 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67,
+ 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11,
+ 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e,
+ 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09,
+ 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4,
+ 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f,
+ 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa,
+ 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec,
+ 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b,
+ 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d,
+ 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b,
+ 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48,
+ 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3,
+ 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63,
+ 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd,
+ 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78,
+ 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed,
+ 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82,
+ 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f,
+ 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3,
+ 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9,
+ 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72,
+ 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74,
+ 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40,
+ 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b,
+ 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a,
+ 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5,
+ 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98,
+ 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71,
+ 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e,
+ 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4,
+ 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46,
+ 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e,
+ 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f,
+ 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93,
+ 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0,
+ 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5,
+ 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61,
+ 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64,
+ 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85,
+ 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20,
+ 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6,
+ 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc,
+ 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8,
+ 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50,
+ 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4,
+ 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80,
+ 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0,
+ 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a,
+ 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35,
+ 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43,
+ 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12,
+ 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7,
+ 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34,
+ 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42,
+ 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0,
+ 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95,
+ 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74,
+ 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5,
+ 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12,
+ 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6,
+ 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86,
+ 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97,
+ 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45,
+ 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19,
+ 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86,
+ 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c,
+ 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba,
+ 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29,
+ 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6,
+ 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6,
+ 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09,
+ 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31,
+ 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99,
+ 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b,
+ 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca,
+ 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00,
+ 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93,
+ 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3,
+ 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07,
+ 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda,
+ 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90,
+ 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b,
+ 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a,
+ 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6,
+ 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c,
+ 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57,
+ 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15,
+ 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e,
+ 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51,
+ 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75,
+ 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19,
+ 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08,
+ 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14,
+ 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba,
+ 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff,
+ 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90,
+ 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e,
+ 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93,
+ 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad,
+ 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2,
+ 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac,
+ 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d,
+ 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06,
+ 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c,
+ 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91,
+ 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17,
+ 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20,
+ 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7,
+ 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf,
+ 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c,
+ 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2,
+ 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e,
+ 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a,
+ 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05,
+ 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58,
+ 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8,
+ 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d,
+ 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71,
+ 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3,
+ 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe,
+ 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62,
+ 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16,
+ 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66,
+ 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4,
+ 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2,
+ 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35,
+ 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3,
+ 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4,
+ 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f,
+ 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe,
+ 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56,
+ 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b,
+ 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37,
+ 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3,
+ 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f,
+ 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f,
+ 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0,
+ 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70,
+ 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd,
+ 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f,
+ 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e,
+ 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67,
+ 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51,
+ 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23,
+ 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3,
+ 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5,
+ 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09,
+ 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7,
+ 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed,
+ 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb,
+ 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6,
+ 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5,
+ 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96,
+ 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe,
+ 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44,
+ 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6,
+ 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e,
+ 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0,
+ 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79,
+ 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f,
+ 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d,
+ 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82,
+ 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47,
+ 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93,
+ 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6,
+ 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69,
+ 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e,
+ 0x2b, 0xdf, 0xcd, 0xf9, 0x3c
+};
+static const u8 enc_assoc011[] __initconst = {
+ 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7
+};
+static const u8 enc_nonce011[] __initconst = {
+ 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa
+};
+static const u8 enc_key011[] __initconst = {
+ 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85,
+ 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca,
+ 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52,
+ 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38
+};
+
+static const u8 enc_input012[] __initconst = {
+ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
+ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
+ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
+ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
+ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
+ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
+ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
+ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
+ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
+ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
+ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
+ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
+ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
+ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
+ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
+ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
+ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
+ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
+ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
+ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
+ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
+ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
+ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
+ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
+ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
+ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
+ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
+ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
+ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
+ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
+ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
+ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
+ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
+ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
+ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
+ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
+ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
+ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
+ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
+ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
+ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
+ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
+ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
+ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
+ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
+ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
+ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
+ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
+ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
+ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
+ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
+ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
+ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
+ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
+ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
+ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
+ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
+ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
+ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
+ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
+ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
+ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
+ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
+ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
+ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
+ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
+ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
+ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
+ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
+ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
+ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
+ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
+ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
+ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
+ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
+ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
+ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
+ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
+ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
+ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
+ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
+ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
+ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
+ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
+ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
+ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
+ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
+ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
+ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
+ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
+ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
+ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
+ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
+ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
+ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
+ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
+ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
+ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
+ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
+ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
+ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
+ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
+ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
+ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
+ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
+ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
+ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
+ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
+ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
+ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
+ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
+ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
+ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
+ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
+ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
+ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
+ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
+ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
+ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
+ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
+ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
+ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
+ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
+ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
+ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
+ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
+ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
+ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
+ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
+ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
+ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
+ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
+ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
+ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
+ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
+ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
+ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
+ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
+ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
+ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
+ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
+ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
+ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
+ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
+ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
+ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
+ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
+ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
+ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
+ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
+ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
+ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
+ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
+ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
+ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
+ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
+ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
+ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
+ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
+ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
+ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
+ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
+ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
+ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
+ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
+ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
+ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
+ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
+ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
+ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
+ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
+ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
+ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
+ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
+ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
+ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
+ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
+ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
+ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
+ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
+ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
+ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
+ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
+ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
+ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
+ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
+ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
+ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
+ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
+ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
+ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
+ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
+ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
+ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
+ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
+ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
+ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
+ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
+ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
+ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
+ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
+ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
+ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
+ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
+ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
+ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
+ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
+ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
+ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
+ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
+ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
+ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
+ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
+ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
+ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
+ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
+ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
+ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
+ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
+ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
+ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
+ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
+ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
+ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
+ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
+ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
+ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
+ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
+ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
+ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
+ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
+ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
+ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
+ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
+ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
+ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
+ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
+ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
+ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
+ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
+ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
+ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
+ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
+ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
+ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
+ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
+ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
+ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
+ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
+ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
+ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
+ 0x78, 0xec, 0x00
+};
+static const u8 enc_output012[] __initconst = {
+ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
+ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
+ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
+ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
+ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
+ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
+ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
+ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
+ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
+ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
+ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
+ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
+ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
+ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
+ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
+ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
+ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
+ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
+ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
+ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
+ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
+ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
+ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
+ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
+ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
+ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
+ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
+ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
+ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
+ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
+ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
+ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
+ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
+ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
+ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
+ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
+ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
+ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
+ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
+ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
+ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
+ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
+ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
+ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
+ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
+ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
+ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
+ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
+ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
+ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
+ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
+ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
+ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
+ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
+ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
+ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
+ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
+ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
+ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
+ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
+ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
+ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
+ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
+ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
+ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
+ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
+ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
+ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
+ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
+ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
+ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
+ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
+ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
+ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
+ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
+ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
+ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
+ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
+ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
+ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
+ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
+ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
+ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
+ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
+ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
+ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
+ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
+ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
+ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
+ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
+ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
+ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
+ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
+ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
+ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
+ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
+ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
+ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
+ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
+ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
+ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
+ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
+ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
+ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
+ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
+ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
+ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
+ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
+ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
+ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
+ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
+ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
+ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
+ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
+ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
+ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
+ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
+ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
+ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
+ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
+ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
+ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
+ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
+ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
+ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
+ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
+ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
+ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
+ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
+ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
+ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
+ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
+ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
+ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
+ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
+ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
+ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
+ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
+ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
+ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
+ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
+ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
+ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
+ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
+ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
+ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
+ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
+ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
+ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
+ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
+ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
+ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
+ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
+ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
+ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
+ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
+ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
+ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
+ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
+ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
+ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
+ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
+ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
+ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
+ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
+ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
+ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
+ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
+ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
+ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
+ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
+ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
+ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
+ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
+ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
+ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
+ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
+ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
+ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
+ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
+ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
+ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
+ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
+ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
+ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
+ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
+ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
+ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
+ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
+ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
+ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
+ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
+ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
+ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
+ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
+ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
+ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
+ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
+ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
+ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
+ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
+ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
+ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
+ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
+ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
+ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
+ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
+ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
+ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
+ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
+ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
+ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
+ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
+ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
+ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
+ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
+ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
+ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
+ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
+ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
+ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
+ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
+ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
+ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
+ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
+ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
+ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
+ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
+ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
+ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
+ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
+ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
+ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
+ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
+ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
+ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
+ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
+ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
+ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
+ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
+ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
+ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
+ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
+ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
+ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
+ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
+ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
+ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
+ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
+ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
+ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
+ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
+ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
+ 0x70, 0xcf, 0xd6
+};
+static const u8 enc_assoc012[] __initconst = {
+ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
+ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
+ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
+ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
+ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
+ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
+ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
+ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
+};
+static const u8 enc_nonce012[] __initconst = {
+ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
+};
+static const u8 enc_key012[] __initconst = {
+ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
+ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
+ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
+ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
+};
+
+/* wycheproof - misc */
+static const u8 enc_input053[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
+ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
+ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
+ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe
+};
+static const u8 enc_output053[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xe6, 0xd3, 0xd7, 0x32, 0x4a, 0x1c, 0xbb, 0xa7,
+ 0x77, 0xbb, 0xb0, 0xec, 0xdd, 0xa3, 0x78, 0x07
+};
+static const u8 enc_assoc053[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_nonce053[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key053[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input054[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
+ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
+ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
+ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe,
+ 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe,
+ 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b,
+ 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5,
+ 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd
+};
+static const u8 enc_output054[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x06, 0x2d, 0xe6, 0x79, 0x5f, 0x27, 0x4f, 0xd2,
+ 0xa3, 0x05, 0xd7, 0x69, 0x80, 0xbc, 0x9c, 0xce
+};
+static const u8 enc_assoc054[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_nonce054[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key054[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input055[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
+ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
+ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
+ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe,
+ 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe,
+ 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b,
+ 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5,
+ 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd,
+ 0x7a, 0xda, 0x44, 0x42, 0x42, 0x69, 0xbf, 0xfa,
+ 0x55, 0x27, 0xf2, 0x70, 0xac, 0xf6, 0x85, 0x02,
+ 0xb7, 0x4c, 0x5a, 0xe2, 0xe6, 0x0c, 0x05, 0x80,
+ 0x98, 0x1a, 0x49, 0x38, 0x45, 0x93, 0x92, 0xc4,
+ 0x9b, 0xb2, 0xf2, 0x84, 0xb6, 0x46, 0xef, 0xc7,
+ 0xf3, 0xf0, 0xb1, 0x36, 0x1d, 0xc3, 0x48, 0xed,
+ 0x77, 0xd3, 0x0b, 0xc5, 0x76, 0x92, 0xed, 0x38,
+ 0xfb, 0xac, 0x01, 0x88, 0x38, 0x04, 0x88, 0xc7
+};
+static const u8 enc_output055[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xd8, 0xb4, 0x79, 0x02, 0xba, 0xae, 0xaf, 0xb3,
+ 0x42, 0x03, 0x05, 0x15, 0x29, 0xaf, 0x28, 0x2e
+};
+static const u8 enc_assoc055[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_nonce055[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key055[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input056[] __initconst = {
+ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
+ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
+ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
+ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41
+};
+static const u8 enc_output056[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xb3, 0x89, 0x1c, 0x84, 0x9c, 0xb5, 0x2c, 0x27,
+ 0x74, 0x7e, 0xdf, 0xcf, 0x31, 0x21, 0x3b, 0xb6
+};
+static const u8 enc_assoc056[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce056[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key056[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input057[] __initconst = {
+ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
+ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
+ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
+ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41,
+ 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01,
+ 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4,
+ 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a,
+ 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42
+};
+static const u8 enc_output057[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xf0, 0xc1, 0x2d, 0x26, 0xef, 0x03, 0x02, 0x9b,
+ 0x62, 0xc0, 0x08, 0xda, 0x27, 0xc5, 0xdc, 0x68
+};
+static const u8 enc_assoc057[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce057[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key057[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input058[] __initconst = {
+ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
+ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
+ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
+ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41,
+ 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01,
+ 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4,
+ 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a,
+ 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42,
+ 0x85, 0x25, 0xbb, 0xbd, 0xbd, 0x96, 0x40, 0x05,
+ 0xaa, 0xd8, 0x0d, 0x8f, 0x53, 0x09, 0x7a, 0xfd,
+ 0x48, 0xb3, 0xa5, 0x1d, 0x19, 0xf3, 0xfa, 0x7f,
+ 0x67, 0xe5, 0xb6, 0xc7, 0xba, 0x6c, 0x6d, 0x3b,
+ 0x64, 0x4d, 0x0d, 0x7b, 0x49, 0xb9, 0x10, 0x38,
+ 0x0c, 0x0f, 0x4e, 0xc9, 0xe2, 0x3c, 0xb7, 0x12,
+ 0x88, 0x2c, 0xf4, 0x3a, 0x89, 0x6d, 0x12, 0xc7,
+ 0x04, 0x53, 0xfe, 0x77, 0xc7, 0xfb, 0x77, 0x38
+};
+static const u8 enc_output058[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xee, 0x65, 0x78, 0x30, 0x01, 0xc2, 0x56, 0x91,
+ 0xfa, 0x28, 0xd0, 0xf5, 0xf1, 0xc1, 0xd7, 0x62
+};
+static const u8 enc_assoc058[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce058[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key058[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input059[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
+ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
+ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
+ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e
+};
+static const u8 enc_output059[] __initconst = {
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x79, 0xba, 0x7a, 0x29, 0xf5, 0xa7, 0xbb, 0x75,
+ 0x79, 0x7a, 0xf8, 0x7a, 0x61, 0x01, 0x29, 0xa4
+};
+static const u8 enc_assoc059[] __initconst = {
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
+};
+static const u8 enc_nonce059[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key059[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input060[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
+ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
+ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
+ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e,
+ 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e,
+ 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab,
+ 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65,
+ 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d
+};
+static const u8 enc_output060[] __initconst = {
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x36, 0xb1, 0x74, 0x38, 0x19, 0xe1, 0xb9, 0xba,
+ 0x15, 0x51, 0xe8, 0xed, 0x92, 0x2a, 0x95, 0x9a
+};
+static const u8 enc_assoc060[] __initconst = {
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
+};
+static const u8 enc_nonce060[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key060[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input061[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
+ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
+ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
+ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e,
+ 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e,
+ 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab,
+ 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65,
+ 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d,
+ 0x7a, 0xda, 0x44, 0xc2, 0x42, 0x69, 0xbf, 0x7a,
+ 0x55, 0x27, 0xf2, 0xf0, 0xac, 0xf6, 0x85, 0x82,
+ 0xb7, 0x4c, 0x5a, 0x62, 0xe6, 0x0c, 0x05, 0x00,
+ 0x98, 0x1a, 0x49, 0xb8, 0x45, 0x93, 0x92, 0x44,
+ 0x9b, 0xb2, 0xf2, 0x04, 0xb6, 0x46, 0xef, 0x47,
+ 0xf3, 0xf0, 0xb1, 0xb6, 0x1d, 0xc3, 0x48, 0x6d,
+ 0x77, 0xd3, 0x0b, 0x45, 0x76, 0x92, 0xed, 0xb8,
+ 0xfb, 0xac, 0x01, 0x08, 0x38, 0x04, 0x88, 0x47
+};
+static const u8 enc_output061[] __initconst = {
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0xfe, 0xac, 0x49, 0x55, 0x55, 0x4e, 0x80, 0x6f,
+ 0x3a, 0x19, 0x02, 0xe2, 0x44, 0x32, 0xc0, 0x8a
+};
+static const u8 enc_assoc061[] __initconst = {
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
+};
+static const u8 enc_nonce061[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key061[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input062[] __initconst = {
+ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
+ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
+ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
+ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1
+};
+static const u8 enc_output062[] __initconst = {
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0x20, 0xa3, 0x79, 0x8d, 0xf1, 0x29, 0x2c, 0x59,
+ 0x72, 0xbf, 0x97, 0x41, 0xae, 0xc3, 0x8a, 0x19
+};
+static const u8 enc_assoc062[] __initconst = {
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
+};
+static const u8 enc_nonce062[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key062[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input063[] __initconst = {
+ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
+ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
+ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
+ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1,
+ 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81,
+ 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54,
+ 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a,
+ 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2
+};
+static const u8 enc_output063[] __initconst = {
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xc0, 0x3d, 0x9f, 0x67, 0x35, 0x4a, 0x97, 0xb2,
+ 0xf0, 0x74, 0xf7, 0x55, 0x15, 0x57, 0xe4, 0x9c
+};
+static const u8 enc_assoc063[] __initconst = {
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
+};
+static const u8 enc_nonce063[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key063[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input064[] __initconst = {
+ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
+ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
+ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
+ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1,
+ 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81,
+ 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54,
+ 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a,
+ 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2,
+ 0x85, 0x25, 0xbb, 0x3d, 0xbd, 0x96, 0x40, 0x85,
+ 0xaa, 0xd8, 0x0d, 0x0f, 0x53, 0x09, 0x7a, 0x7d,
+ 0x48, 0xb3, 0xa5, 0x9d, 0x19, 0xf3, 0xfa, 0xff,
+ 0x67, 0xe5, 0xb6, 0x47, 0xba, 0x6c, 0x6d, 0xbb,
+ 0x64, 0x4d, 0x0d, 0xfb, 0x49, 0xb9, 0x10, 0xb8,
+ 0x0c, 0x0f, 0x4e, 0x49, 0xe2, 0x3c, 0xb7, 0x92,
+ 0x88, 0x2c, 0xf4, 0xba, 0x89, 0x6d, 0x12, 0x47,
+ 0x04, 0x53, 0xfe, 0xf7, 0xc7, 0xfb, 0x77, 0xb8
+};
+static const u8 enc_output064[] __initconst = {
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xc8, 0x6d, 0xa8, 0xdd, 0x65, 0x22, 0x86, 0xd5,
+ 0x02, 0x13, 0xd3, 0x28, 0xd6, 0x3e, 0x40, 0x06
+};
+static const u8 enc_assoc064[] __initconst = {
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
+};
+static const u8 enc_nonce064[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key064[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input065[] __initconst = {
+ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
+ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
+ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
+ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41
+};
+static const u8 enc_output065[] __initconst = {
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0xbe, 0xde, 0x90, 0x83, 0xce, 0xb3, 0x6d, 0xdf,
+ 0xe5, 0xfa, 0x81, 0x1f, 0x95, 0x47, 0x1c, 0x67
+};
+static const u8 enc_assoc065[] __initconst = {
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce065[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key065[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input066[] __initconst = {
+ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
+ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
+ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
+ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41,
+ 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01,
+ 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4,
+ 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a,
+ 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42
+};
+static const u8 enc_output066[] __initconst = {
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x30, 0x08, 0x74, 0xbb, 0x06, 0x92, 0xb6, 0x89,
+ 0xde, 0xad, 0x9a, 0xe1, 0x5b, 0x06, 0x73, 0x90
+};
+static const u8 enc_assoc066[] __initconst = {
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce066[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key066[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input067[] __initconst = {
+ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
+ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
+ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
+ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41,
+ 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01,
+ 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4,
+ 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a,
+ 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42,
+ 0x05, 0x25, 0xbb, 0xbd, 0x3d, 0x96, 0x40, 0x05,
+ 0x2a, 0xd8, 0x0d, 0x8f, 0xd3, 0x09, 0x7a, 0xfd,
+ 0xc8, 0xb3, 0xa5, 0x1d, 0x99, 0xf3, 0xfa, 0x7f,
+ 0xe7, 0xe5, 0xb6, 0xc7, 0x3a, 0x6c, 0x6d, 0x3b,
+ 0xe4, 0x4d, 0x0d, 0x7b, 0xc9, 0xb9, 0x10, 0x38,
+ 0x8c, 0x0f, 0x4e, 0xc9, 0x62, 0x3c, 0xb7, 0x12,
+ 0x08, 0x2c, 0xf4, 0x3a, 0x09, 0x6d, 0x12, 0xc7,
+ 0x84, 0x53, 0xfe, 0x77, 0x47, 0xfb, 0x77, 0x38
+};
+static const u8 enc_output067[] __initconst = {
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x99, 0xca, 0xd8, 0x5f, 0x45, 0xca, 0x40, 0x94,
+ 0x2d, 0x0d, 0x4d, 0x5e, 0x95, 0x0a, 0xde, 0x22
+};
+static const u8 enc_assoc067[] __initconst = {
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce067[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key067[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input068[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
+ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
+ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
+ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41
+};
+static const u8 enc_output068[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x8b, 0xbe, 0x14, 0x52, 0x72, 0xe7, 0xc2, 0xd9,
+ 0xa1, 0x89, 0x1a, 0x3a, 0xb0, 0x98, 0x3d, 0x9d
+};
+static const u8 enc_assoc068[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce068[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key068[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input069[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
+ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
+ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
+ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41,
+ 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01,
+ 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4,
+ 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a,
+ 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42
+};
+static const u8 enc_output069[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x3b, 0x41, 0x86, 0x19, 0x13, 0xa8, 0xf6, 0xde,
+ 0x7f, 0x61, 0xe2, 0x25, 0x63, 0x1b, 0xc3, 0x82
+};
+static const u8 enc_assoc069[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce069[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key069[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input070[] __initconst = {
+ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
+ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
+ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
+ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41,
+ 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01,
+ 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4,
+ 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a,
+ 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42,
+ 0x7a, 0xda, 0x44, 0x42, 0xbd, 0x96, 0x40, 0x05,
+ 0x55, 0x27, 0xf2, 0x70, 0x53, 0x09, 0x7a, 0xfd,
+ 0xb7, 0x4c, 0x5a, 0xe2, 0x19, 0xf3, 0xfa, 0x7f,
+ 0x98, 0x1a, 0x49, 0x38, 0xba, 0x6c, 0x6d, 0x3b,
+ 0x9b, 0xb2, 0xf2, 0x84, 0x49, 0xb9, 0x10, 0x38,
+ 0xf3, 0xf0, 0xb1, 0x36, 0xe2, 0x3c, 0xb7, 0x12,
+ 0x77, 0xd3, 0x0b, 0xc5, 0x89, 0x6d, 0x12, 0xc7,
+ 0xfb, 0xac, 0x01, 0x88, 0xc7, 0xfb, 0x77, 0x38
+};
+static const u8 enc_output070[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x84, 0x28, 0xbc, 0xf0, 0x23, 0xec, 0x6b, 0xf3,
+ 0x1f, 0xd9, 0xef, 0xb2, 0x03, 0xff, 0x08, 0x71
+};
+static const u8 enc_assoc070[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce070[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key070[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input071[] __initconst = {
+ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
+ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
+ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
+ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe
+};
+static const u8 enc_output071[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x13, 0x9f, 0xdf, 0x64, 0x74, 0xea, 0x24, 0xf5,
+ 0x49, 0xb0, 0x75, 0x82, 0x5f, 0x2c, 0x76, 0x20
+};
+static const u8 enc_assoc071[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_nonce071[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key071[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input072[] __initconst = {
+ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
+ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
+ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
+ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe,
+ 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe,
+ 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b,
+ 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5,
+ 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd
+};
+static const u8 enc_output072[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xbb, 0xad, 0x8d, 0x86, 0x3b, 0x83, 0x5a, 0x8e,
+ 0x86, 0x64, 0xfd, 0x1d, 0x45, 0x66, 0xb6, 0xb4
+};
+static const u8 enc_assoc072[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_nonce072[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key072[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input073[] __initconst = {
+ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
+ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
+ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
+ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe,
+ 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe,
+ 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b,
+ 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5,
+ 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd,
+ 0x85, 0x25, 0xbb, 0xbd, 0x42, 0x69, 0xbf, 0xfa,
+ 0xaa, 0xd8, 0x0d, 0x8f, 0xac, 0xf6, 0x85, 0x02,
+ 0x48, 0xb3, 0xa5, 0x1d, 0xe6, 0x0c, 0x05, 0x80,
+ 0x67, 0xe5, 0xb6, 0xc7, 0x45, 0x93, 0x92, 0xc4,
+ 0x64, 0x4d, 0x0d, 0x7b, 0xb6, 0x46, 0xef, 0xc7,
+ 0x0c, 0x0f, 0x4e, 0xc9, 0x1d, 0xc3, 0x48, 0xed,
+ 0x88, 0x2c, 0xf4, 0x3a, 0x76, 0x92, 0xed, 0x38,
+ 0x04, 0x53, 0xfe, 0x77, 0x38, 0x04, 0x88, 0xc7
+};
+static const u8 enc_output073[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x42, 0xf2, 0x35, 0x42, 0x97, 0x84, 0x9a, 0x51,
+ 0x1d, 0x53, 0xe5, 0x57, 0x17, 0x72, 0xf7, 0x1f
+};
+static const u8 enc_assoc073[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_nonce073[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
+};
+static const u8 enc_key073[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input076[] __initconst = {
+ 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85,
+ 0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02,
+ 0xce, 0x03, 0xa0, 0xfa, 0xf5, 0x99, 0x2a, 0x09,
+ 0x52, 0x2c, 0xdd, 0x12, 0x06, 0xd2, 0x20, 0xb8,
+ 0xf8, 0xbd, 0x07, 0xd1, 0xf1, 0xf5, 0xa1, 0xbd,
+ 0x9a, 0x71, 0xd1, 0x1c, 0x7f, 0x57, 0x9b, 0x85,
+ 0x58, 0x18, 0xc0, 0x8d, 0x4d, 0xe0, 0x36, 0x39,
+ 0x31, 0x83, 0xb7, 0xf5, 0x90, 0xb3, 0x35, 0xae,
+ 0xd8, 0xde, 0x5b, 0x57, 0xb1, 0x3c, 0x5f, 0xed,
+ 0xe2, 0x44, 0x1c, 0x3e, 0x18, 0x4a, 0xa9, 0xd4,
+ 0x6e, 0x61, 0x59, 0x85, 0x06, 0xb3, 0xe1, 0x1c,
+ 0x43, 0xc6, 0x2c, 0xbc, 0xac, 0xec, 0xed, 0x33,
+ 0x19, 0x08, 0x75, 0xb0, 0x12, 0x21, 0x8b, 0x19,
+ 0x30, 0xfb, 0x7c, 0x38, 0xec, 0x45, 0xac, 0x11,
+ 0xc3, 0x53, 0xd0, 0xcf, 0x93, 0x8d, 0xcc, 0xb9,
+ 0xef, 0xad, 0x8f, 0xed, 0xbe, 0x46, 0xda, 0xa5
+};
+static const u8 enc_output076[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x4b, 0x0b, 0xda, 0x8a, 0xd0, 0x43, 0x83, 0x0d,
+ 0x83, 0x19, 0xab, 0x82, 0xc5, 0x0c, 0x76, 0x63
+};
+static const u8 enc_assoc076[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce076[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xb4, 0xf0
+};
+static const u8 enc_key076[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input077[] __initconst = {
+ 0x86, 0xcb, 0xac, 0xae, 0x4d, 0x3f, 0x74, 0xae,
+ 0x01, 0x21, 0x3e, 0x05, 0x51, 0xcc, 0x15, 0x16,
+ 0x0e, 0xa1, 0xbe, 0x84, 0x08, 0xe3, 0xd5, 0xd7,
+ 0x4f, 0x01, 0x46, 0x49, 0x95, 0xa6, 0x9e, 0x61,
+ 0x76, 0xcb, 0x9e, 0x02, 0xb2, 0x24, 0x7e, 0xd2,
+ 0x99, 0x89, 0x2f, 0x91, 0x82, 0xa4, 0x5c, 0xaf,
+ 0x4c, 0x69, 0x40, 0x56, 0x11, 0x76, 0x6e, 0xdf,
+ 0xaf, 0xdc, 0x28, 0x55, 0x19, 0xea, 0x30, 0x48,
+ 0x0c, 0x44, 0xf0, 0x5e, 0x78, 0x1e, 0xac, 0xf8,
+ 0xfc, 0xec, 0xc7, 0x09, 0x0a, 0xbb, 0x28, 0xfa,
+ 0x5f, 0xd5, 0x85, 0xac, 0x8c, 0xda, 0x7e, 0x87,
+ 0x72, 0xe5, 0x94, 0xe4, 0xce, 0x6c, 0x88, 0x32,
+ 0x81, 0x93, 0x2e, 0x0f, 0x89, 0xf8, 0x77, 0xa1,
+ 0xf0, 0x4d, 0x9c, 0x32, 0xb0, 0x6c, 0xf9, 0x0b,
+ 0x0e, 0x76, 0x2b, 0x43, 0x0c, 0x4d, 0x51, 0x7c,
+ 0x97, 0x10, 0x70, 0x68, 0xf4, 0x98, 0xef, 0x7f
+};
+static const u8 enc_output077[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x4b, 0xc9, 0x8f, 0x72, 0xc4, 0x94, 0xc2, 0xa4,
+ 0x3c, 0x2b, 0x15, 0xa1, 0x04, 0x3f, 0x1c, 0xfa
+};
+static const u8 enc_assoc077[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce077[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xfb, 0x66
+};
+static const u8 enc_key077[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input078[] __initconst = {
+ 0xfa, 0xb1, 0xcd, 0xdf, 0x4f, 0xe1, 0x98, 0xef,
+ 0x63, 0xad, 0xd8, 0x81, 0xd6, 0xea, 0xd6, 0xc5,
+ 0x76, 0x37, 0xbb, 0xe9, 0x20, 0x18, 0xca, 0x7c,
+ 0x0b, 0x96, 0xfb, 0xa0, 0x87, 0x1e, 0x93, 0x2d,
+ 0xb1, 0xfb, 0xf9, 0x07, 0x61, 0xbe, 0x25, 0xdf,
+ 0x8d, 0xfa, 0xf9, 0x31, 0xce, 0x57, 0x57, 0xe6,
+ 0x17, 0xb3, 0xd7, 0xa9, 0xf0, 0xbf, 0x0f, 0xfe,
+ 0x5d, 0x59, 0x1a, 0x33, 0xc1, 0x43, 0xb8, 0xf5,
+ 0x3f, 0xd0, 0xb5, 0xa1, 0x96, 0x09, 0xfd, 0x62,
+ 0xe5, 0xc2, 0x51, 0xa4, 0x28, 0x1a, 0x20, 0x0c,
+ 0xfd, 0xc3, 0x4f, 0x28, 0x17, 0x10, 0x40, 0x6f,
+ 0x4e, 0x37, 0x62, 0x54, 0x46, 0xff, 0x6e, 0xf2,
+ 0x24, 0x91, 0x3d, 0xeb, 0x0d, 0x89, 0xaf, 0x33,
+ 0x71, 0x28, 0xe3, 0xd1, 0x55, 0xd1, 0x6d, 0x3e,
+ 0xc3, 0x24, 0x60, 0x41, 0x43, 0x21, 0x43, 0xe9,
+ 0xab, 0x3a, 0x6d, 0x2c, 0xcc, 0x2f, 0x4d, 0x62
+};
+static const u8 enc_output078[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xf7, 0xe9, 0xe1, 0x51, 0xb0, 0x25, 0x33, 0xc7,
+ 0x46, 0x58, 0xbf, 0xc7, 0x73, 0x7c, 0x68, 0x0d
+};
+static const u8 enc_assoc078[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce078[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xbb, 0x90
+};
+static const u8 enc_key078[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input079[] __initconst = {
+ 0x22, 0x72, 0x02, 0xbe, 0x7f, 0x35, 0x15, 0xe9,
+ 0xd1, 0xc0, 0x2e, 0xea, 0x2f, 0x19, 0x50, 0xb6,
+ 0x48, 0x1b, 0x04, 0x8a, 0x4c, 0x91, 0x50, 0x6c,
+ 0xb4, 0x0d, 0x50, 0x4e, 0x6c, 0x94, 0x9f, 0x82,
+ 0xd1, 0x97, 0xc2, 0x5a, 0xd1, 0x7d, 0xc7, 0x21,
+ 0x65, 0x11, 0x25, 0x78, 0x2a, 0xc7, 0xa7, 0x12,
+ 0x47, 0xfe, 0xae, 0xf3, 0x2f, 0x1f, 0x25, 0x0c,
+ 0xe4, 0xbb, 0x8f, 0x79, 0xac, 0xaa, 0x17, 0x9d,
+ 0x45, 0xa7, 0xb0, 0x54, 0x5f, 0x09, 0x24, 0x32,
+ 0x5e, 0xfa, 0x87, 0xd5, 0xe4, 0x41, 0xd2, 0x84,
+ 0x78, 0xc6, 0x1f, 0x22, 0x23, 0xee, 0x67, 0xc3,
+ 0xb4, 0x1f, 0x43, 0x94, 0x53, 0x5e, 0x2a, 0x24,
+ 0x36, 0x9a, 0x2e, 0x16, 0x61, 0x3c, 0x45, 0x94,
+ 0x90, 0xc1, 0x4f, 0xb1, 0xd7, 0x55, 0xfe, 0x53,
+ 0xfb, 0xe1, 0xee, 0x45, 0xb1, 0xb2, 0x1f, 0x71,
+ 0x62, 0xe2, 0xfc, 0xaa, 0x74, 0x2a, 0xbe, 0xfd
+};
+static const u8 enc_output079[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x79, 0x5b, 0xcf, 0xf6, 0x47, 0xc5, 0x53, 0xc2,
+ 0xe4, 0xeb, 0x6e, 0x0e, 0xaf, 0xd9, 0xe0, 0x4e
+};
+static const u8 enc_assoc079[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce079[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x48, 0x4a
+};
+static const u8 enc_key079[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input080[] __initconst = {
+ 0xfa, 0xe5, 0x83, 0x45, 0xc1, 0x6c, 0xb0, 0xf5,
+ 0xcc, 0x53, 0x7f, 0x2b, 0x1b, 0x34, 0x69, 0xc9,
+ 0x69, 0x46, 0x3b, 0x3e, 0xa7, 0x1b, 0xcf, 0x6b,
+ 0x98, 0xd6, 0x69, 0xa8, 0xe6, 0x0e, 0x04, 0xfc,
+ 0x08, 0xd5, 0xfd, 0x06, 0x9c, 0x36, 0x26, 0x38,
+ 0xe3, 0x40, 0x0e, 0xf4, 0xcb, 0x24, 0x2e, 0x27,
+ 0xe2, 0x24, 0x5e, 0x68, 0xcb, 0x9e, 0xc5, 0x83,
+ 0xda, 0x53, 0x40, 0xb1, 0x2e, 0xdf, 0x42, 0x3b,
+ 0x73, 0x26, 0xad, 0x20, 0xfe, 0xeb, 0x57, 0xda,
+ 0xca, 0x2e, 0x04, 0x67, 0xa3, 0x28, 0x99, 0xb4,
+ 0x2d, 0xf8, 0xe5, 0x6d, 0x84, 0xe0, 0x06, 0xbc,
+ 0x8a, 0x7a, 0xcc, 0x73, 0x1e, 0x7c, 0x1f, 0x6b,
+ 0xec, 0xb5, 0x71, 0x9f, 0x70, 0x77, 0xf0, 0xd4,
+ 0xf4, 0xc6, 0x1a, 0xb1, 0x1e, 0xba, 0xc1, 0x00,
+ 0x18, 0x01, 0xce, 0x33, 0xc4, 0xe4, 0xa7, 0x7d,
+ 0x83, 0x1d, 0x3c, 0xe3, 0x4e, 0x84, 0x10, 0xe1
+};
+static const u8 enc_output080[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x19, 0x46, 0xd6, 0x53, 0x96, 0x0f, 0x94, 0x7a,
+ 0x74, 0xd3, 0xe8, 0x09, 0x3c, 0xf4, 0x85, 0x02
+};
+static const u8 enc_assoc080[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce080[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x2f, 0x40
+};
+static const u8 enc_key080[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input081[] __initconst = {
+ 0xeb, 0xb2, 0x16, 0xdd, 0xd7, 0xca, 0x70, 0x92,
+ 0x15, 0xf5, 0x03, 0xdf, 0x9c, 0xe6, 0x3c, 0x5c,
+ 0xd2, 0x19, 0x4e, 0x7d, 0x90, 0x99, 0xe8, 0xa9,
+ 0x0b, 0x2a, 0xfa, 0xad, 0x5e, 0xba, 0x35, 0x06,
+ 0x99, 0x25, 0xa6, 0x03, 0xfd, 0xbc, 0x34, 0x1a,
+ 0xae, 0xd4, 0x15, 0x05, 0xb1, 0x09, 0x41, 0xfa,
+ 0x38, 0x56, 0xa7, 0xe2, 0x47, 0xb1, 0x04, 0x07,
+ 0x09, 0x74, 0x6c, 0xfc, 0x20, 0x96, 0xca, 0xa6,
+ 0x31, 0xb2, 0xff, 0xf4, 0x1c, 0x25, 0x05, 0x06,
+ 0xd8, 0x89, 0xc1, 0xc9, 0x06, 0x71, 0xad, 0xe8,
+ 0x53, 0xee, 0x63, 0x94, 0xc1, 0x91, 0x92, 0xa5,
+ 0xcf, 0x37, 0x10, 0xd1, 0x07, 0x30, 0x99, 0xe5,
+ 0xbc, 0x94, 0x65, 0x82, 0xfc, 0x0f, 0xab, 0x9f,
+ 0x54, 0x3c, 0x71, 0x6a, 0xe2, 0x48, 0x6a, 0x86,
+ 0x83, 0xfd, 0xca, 0x39, 0xd2, 0xe1, 0x4f, 0x23,
+ 0xd0, 0x0a, 0x58, 0x26, 0x64, 0xf4, 0xec, 0xb1
+};
+static const u8 enc_output081[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x36, 0xc3, 0x00, 0x29, 0x85, 0xdd, 0x21, 0xba,
+ 0xf8, 0x95, 0xd6, 0x33, 0x57, 0x3f, 0x12, 0xc0
+};
+static const u8 enc_assoc081[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce081[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x93, 0x35
+};
+static const u8 enc_key081[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input082[] __initconst = {
+ 0x40, 0x8a, 0xe6, 0xef, 0x1c, 0x7e, 0xf0, 0xfb,
+ 0x2c, 0x2d, 0x61, 0x08, 0x16, 0xfc, 0x78, 0x49,
+ 0xef, 0xa5, 0x8f, 0x78, 0x27, 0x3f, 0x5f, 0x16,
+ 0x6e, 0xa6, 0x5f, 0x81, 0xb5, 0x75, 0x74, 0x7d,
+ 0x03, 0x5b, 0x30, 0x40, 0xfe, 0xde, 0x1e, 0xb9,
+ 0x45, 0x97, 0x88, 0x66, 0x97, 0x88, 0x40, 0x8e,
+ 0x00, 0x41, 0x3b, 0x3e, 0x37, 0x6d, 0x15, 0x2d,
+ 0x20, 0x4a, 0xa2, 0xb7, 0xa8, 0x35, 0x58, 0xfc,
+ 0xd4, 0x8a, 0x0e, 0xf7, 0xa2, 0x6b, 0x1c, 0xd6,
+ 0xd3, 0x5d, 0x23, 0xb3, 0xf5, 0xdf, 0xe0, 0xca,
+ 0x77, 0xa4, 0xce, 0x32, 0xb9, 0x4a, 0xbf, 0x83,
+ 0xda, 0x2a, 0xef, 0xca, 0xf0, 0x68, 0x38, 0x08,
+ 0x79, 0xe8, 0x9f, 0xb0, 0xa3, 0x82, 0x95, 0x95,
+ 0xcf, 0x44, 0xc3, 0x85, 0x2a, 0xe2, 0xcc, 0x66,
+ 0x2b, 0x68, 0x9f, 0x93, 0x55, 0xd9, 0xc1, 0x83,
+ 0x80, 0x1f, 0x6a, 0xcc, 0x31, 0x3f, 0x89, 0x07
+};
+static const u8 enc_output082[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x65, 0x14, 0x51, 0x8e, 0x0a, 0x26, 0x41, 0x42,
+ 0xe0, 0xb7, 0x35, 0x1f, 0x96, 0x7f, 0xc2, 0xae
+};
+static const u8 enc_assoc082[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce082[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf7, 0xd5
+};
+static const u8 enc_key082[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input083[] __initconst = {
+ 0x0a, 0x0a, 0x24, 0x49, 0x9b, 0xca, 0xde, 0x58,
+ 0xcf, 0x15, 0x76, 0xc3, 0x12, 0xac, 0xa9, 0x84,
+ 0x71, 0x8c, 0xb4, 0xcc, 0x7e, 0x01, 0x53, 0xf5,
+ 0xa9, 0x01, 0x58, 0x10, 0x85, 0x96, 0x44, 0xdf,
+ 0xc0, 0x21, 0x17, 0x4e, 0x0b, 0x06, 0x0a, 0x39,
+ 0x74, 0x48, 0xde, 0x8b, 0x48, 0x4a, 0x86, 0x03,
+ 0xbe, 0x68, 0x0a, 0x69, 0x34, 0xc0, 0x90, 0x6f,
+ 0x30, 0xdd, 0x17, 0xea, 0xe2, 0xd4, 0xc5, 0xfa,
+ 0xa7, 0x77, 0xf8, 0xca, 0x53, 0x37, 0x0e, 0x08,
+ 0x33, 0x1b, 0x88, 0xc3, 0x42, 0xba, 0xc9, 0x59,
+ 0x78, 0x7b, 0xbb, 0x33, 0x93, 0x0e, 0x3b, 0x56,
+ 0xbe, 0x86, 0xda, 0x7f, 0x2a, 0x6e, 0xb1, 0xf9,
+ 0x40, 0x89, 0xd1, 0xd1, 0x81, 0x07, 0x4d, 0x43,
+ 0x02, 0xf8, 0xe0, 0x55, 0x2d, 0x0d, 0xe1, 0xfa,
+ 0xb3, 0x06, 0xa2, 0x1b, 0x42, 0xd4, 0xc3, 0xba,
+ 0x6e, 0x6f, 0x0c, 0xbc, 0xc8, 0x1e, 0x87, 0x7a
+};
+static const u8 enc_output083[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x4c, 0x19, 0x4d, 0xa6, 0xa9, 0x9f, 0xd6, 0x5b,
+ 0x40, 0xe9, 0xca, 0xd7, 0x98, 0xf4, 0x4b, 0x19
+};
+static const u8 enc_assoc083[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce083[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xfc, 0xe4
+};
+static const u8 enc_key083[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input084[] __initconst = {
+ 0x4a, 0x0a, 0xaf, 0xf8, 0x49, 0x47, 0x29, 0x18,
+ 0x86, 0x91, 0x70, 0x13, 0x40, 0xf3, 0xce, 0x2b,
+ 0x8a, 0x78, 0xee, 0xd3, 0xa0, 0xf0, 0x65, 0x99,
+ 0x4b, 0x72, 0x48, 0x4e, 0x79, 0x91, 0xd2, 0x5c,
+ 0x29, 0xaa, 0x07, 0x5e, 0xb1, 0xfc, 0x16, 0xde,
+ 0x93, 0xfe, 0x06, 0x90, 0x58, 0x11, 0x2a, 0xb2,
+ 0x84, 0xa3, 0xed, 0x18, 0x78, 0x03, 0x26, 0xd1,
+ 0x25, 0x8a, 0x47, 0x22, 0x2f, 0xa6, 0x33, 0xd8,
+ 0xb2, 0x9f, 0x3b, 0xd9, 0x15, 0x0b, 0x23, 0x9b,
+ 0x15, 0x46, 0xc2, 0xbb, 0x9b, 0x9f, 0x41, 0x0f,
+ 0xeb, 0xea, 0xd3, 0x96, 0x00, 0x0e, 0xe4, 0x77,
+ 0x70, 0x15, 0x32, 0xc3, 0xd0, 0xf5, 0xfb, 0xf8,
+ 0x95, 0xd2, 0x80, 0x19, 0x6d, 0x2f, 0x73, 0x7c,
+ 0x5e, 0x9f, 0xec, 0x50, 0xd9, 0x2b, 0xb0, 0xdf,
+ 0x5d, 0x7e, 0x51, 0x3b, 0xe5, 0xb8, 0xea, 0x97,
+ 0x13, 0x10, 0xd5, 0xbf, 0x16, 0xba, 0x7a, 0xee
+};
+static const u8 enc_output084[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xc8, 0xae, 0x77, 0x88, 0xcd, 0x28, 0x74, 0xab,
+ 0xc1, 0x38, 0x54, 0x1e, 0x11, 0xfd, 0x05, 0x87
+};
+static const u8 enc_assoc084[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce084[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x84, 0x86, 0xa8
+};
+static const u8 enc_key084[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input085[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x78, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x9c, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0xd4, 0xd2, 0x06, 0x61, 0x6f, 0x92, 0x93, 0xf6,
+ 0x5b, 0x45, 0xdb, 0xbc, 0x74, 0xe7, 0xc2, 0xed,
+ 0xfb, 0xcb, 0xbf, 0x1c, 0xfb, 0x67, 0x9b, 0xb7,
+ 0x39, 0xa5, 0x86, 0x2d, 0xe2, 0xbc, 0xb9, 0x37,
+ 0xf7, 0x4d, 0x5b, 0xf8, 0x67, 0x1c, 0x5a, 0x8a,
+ 0x50, 0x92, 0xf6, 0x1d, 0x54, 0xc9, 0xaa, 0x5b
+};
+static const u8 enc_output085[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x93, 0x3a, 0x51, 0x63, 0xc7, 0xf6, 0x23, 0x68,
+ 0x32, 0x7b, 0x3f, 0xbc, 0x10, 0x36, 0xc9, 0x43
+};
+static const u8 enc_assoc085[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce085[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key085[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input093[] __initconst = {
+ 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d,
+ 0x3d, 0xb7, 0x66, 0x4a, 0x34, 0xae, 0x6b, 0x44,
+ 0x4d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x5b, 0x8b, 0x94, 0x50, 0x9e, 0x2b, 0x74, 0xa3,
+ 0x6d, 0x34, 0x6e, 0x33, 0xd5, 0x72, 0x65, 0x9b,
+ 0xa9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0x83, 0xdc, 0xe9, 0xf3, 0x07, 0x3e, 0xfa, 0xdb,
+ 0x7d, 0x23, 0xb8, 0x7a, 0xce, 0x35, 0x16, 0x8c
+};
+static const u8 enc_output093[] __initconst = {
+ 0x00, 0x39, 0xe2, 0xfd, 0x2f, 0xd3, 0x12, 0x14,
+ 0x9e, 0x98, 0x98, 0x80, 0x88, 0x48, 0x13, 0xe7,
+ 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96,
+ 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00,
+ 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96,
+ 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00,
+ 0xa5, 0x19, 0xac, 0x1a, 0x35, 0xb4, 0xa5, 0x77,
+ 0x87, 0x51, 0x0a, 0xf7, 0x8d, 0x8d, 0x20, 0x0a
+};
+static const u8 enc_assoc093[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce093[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key093[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input094[] __initconst = {
+ 0xd3, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xe5, 0xda, 0x78, 0x76, 0x6f, 0xa1, 0x92, 0x90,
+ 0xc0, 0x31, 0xf7, 0x52, 0x08, 0x50, 0x67, 0x45,
+ 0xae, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x49, 0x6d, 0xde, 0xb0, 0x55, 0x09, 0xc6, 0xef,
+ 0xff, 0xab, 0x75, 0xeb, 0x2d, 0xf4, 0xab, 0x09,
+ 0x76, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x01, 0x49, 0xef, 0x50, 0x4b, 0x71, 0xb1, 0x20,
+ 0xca, 0x4f, 0xf3, 0x95, 0x19, 0xc2, 0xc2, 0x10
+};
+static const u8 enc_output094[] __initconst = {
+ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x62, 0x18, 0xb2, 0x7f, 0x83, 0xb8, 0xb4, 0x66,
+ 0x02, 0xf6, 0xe1, 0xd8, 0x34, 0x20, 0x7b, 0x02,
+ 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29,
+ 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02,
+ 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29,
+ 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02,
+ 0x30, 0x2f, 0xe8, 0x2a, 0xb0, 0xa0, 0x9a, 0xf6,
+ 0x44, 0x00, 0xd0, 0x15, 0xae, 0x83, 0xd9, 0xcc
+};
+static const u8 enc_assoc094[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce094[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key094[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input095[] __initconst = {
+ 0xe9, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x6d, 0xf1, 0x39, 0x4e, 0xdc, 0x53, 0x9b, 0x5b,
+ 0x3a, 0x09, 0x57, 0xbe, 0x0f, 0xb8, 0x59, 0x46,
+ 0x80, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0xd1, 0x76, 0x9f, 0xe8, 0x06, 0xbb, 0xfe, 0xb6,
+ 0xf5, 0x90, 0x95, 0x0f, 0x2e, 0xac, 0x9e, 0x0a,
+ 0x58, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x99, 0x52, 0xae, 0x08, 0x18, 0xc3, 0x89, 0x79,
+ 0xc0, 0x74, 0x13, 0x71, 0x1a, 0x9a, 0xf7, 0x13
+};
+static const u8 enc_output095[] __initconst = {
+ 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xea, 0x33, 0xf3, 0x47, 0x30, 0x4a, 0xbd, 0xad,
+ 0xf8, 0xce, 0x41, 0x34, 0x33, 0xc8, 0x45, 0x01,
+ 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70,
+ 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01,
+ 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70,
+ 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01,
+ 0x98, 0xa7, 0xe8, 0x36, 0xe0, 0xee, 0x4d, 0x02,
+ 0x35, 0x00, 0xd0, 0x55, 0x7e, 0xc2, 0xcb, 0xe0
+};
+static const u8 enc_assoc095[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce095[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key095[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input096[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x64, 0xf9, 0x0f, 0x5b, 0x26, 0x92, 0xb8, 0x60,
+ 0xd4, 0x59, 0x6f, 0xf4, 0xb3, 0x40, 0x2c, 0x5c,
+ 0x00, 0xb9, 0xbb, 0x53, 0x70, 0x7a, 0xa6, 0x67,
+ 0xd3, 0x56, 0xfe, 0x50, 0xc7, 0x19, 0x96, 0x94,
+ 0x03, 0x35, 0x61, 0xe7, 0xca, 0xca, 0x6d, 0x94,
+ 0x1d, 0xc3, 0xcd, 0x69, 0x14, 0xad, 0x69, 0x04
+};
+static const u8 enc_output096[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe3, 0x3b, 0xc5, 0x52, 0xca, 0x8b, 0x9e, 0x96,
+ 0x16, 0x9e, 0x79, 0x7e, 0x8f, 0x30, 0x30, 0x1b,
+ 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52,
+ 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f,
+ 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52,
+ 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f,
+ 0x6a, 0xb8, 0xdc, 0xe2, 0xc5, 0x9d, 0xa4, 0x73,
+ 0x71, 0x30, 0xb0, 0x25, 0x2f, 0x68, 0xa8, 0xd8
+};
+static const u8 enc_assoc096[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce096[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key096[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input097[] __initconst = {
+ 0x68, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xb0, 0x8f, 0x25, 0x67, 0x5b, 0x9b, 0xcb, 0xf6,
+ 0xe3, 0x84, 0x07, 0xde, 0x2e, 0xc7, 0x5a, 0x47,
+ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x2d, 0x2a, 0xf7, 0xcd, 0x6b, 0x08, 0x05, 0x01,
+ 0xd3, 0x1b, 0xa5, 0x4f, 0xb2, 0xeb, 0x75, 0x96,
+ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x65, 0x0e, 0xc6, 0x2d, 0x75, 0x70, 0x72, 0xce,
+ 0xe6, 0xff, 0x23, 0x31, 0x86, 0xdd, 0x1c, 0x8f
+};
+static const u8 enc_output097[] __initconst = {
+ 0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x37, 0x4d, 0xef, 0x6e, 0xb7, 0x82, 0xed, 0x00,
+ 0x21, 0x43, 0x11, 0x54, 0x12, 0xb7, 0x46, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7,
+ 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7,
+ 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d,
+ 0x04, 0x4d, 0xea, 0x60, 0x88, 0x80, 0x41, 0x2b,
+ 0xfd, 0xff, 0xcf, 0x35, 0x57, 0x9e, 0x9b, 0x26
+};
+static const u8 enc_assoc097[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce097[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key097[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input098[] __initconst = {
+ 0x6d, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xa1, 0x61, 0xb5, 0xab, 0x04, 0x09, 0x00, 0x62,
+ 0x9e, 0xfe, 0xff, 0x78, 0xd7, 0xd8, 0x6b, 0x45,
+ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0xc6, 0xf8, 0x07, 0x8c, 0xc8, 0xef, 0x12, 0xa0,
+ 0xff, 0x65, 0x7d, 0x6d, 0x08, 0xdb, 0x10, 0xb8,
+ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x8e, 0xdc, 0x36, 0x6c, 0xd6, 0x97, 0x65, 0x6f,
+ 0xca, 0x81, 0xfb, 0x13, 0x3c, 0xed, 0x79, 0xa1
+};
+static const u8 enc_output098[] __initconst = {
+ 0x6d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x26, 0xa3, 0x7f, 0xa2, 0xe8, 0x10, 0x26, 0x94,
+ 0x5c, 0x39, 0xe9, 0xf2, 0xeb, 0xa8, 0x77, 0x02,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66,
+ 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66,
+ 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3,
+ 0x1e, 0x6b, 0xea, 0x63, 0x14, 0x54, 0x2e, 0x2e,
+ 0xf9, 0xff, 0xcf, 0x45, 0x0b, 0x2e, 0x98, 0x2b
+};
+static const u8 enc_assoc098[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce098[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key098[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input099[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xfc, 0x01, 0xb8, 0x91, 0xe5, 0xf0, 0xf9, 0x12,
+ 0x8d, 0x7d, 0x1c, 0x57, 0x91, 0x92, 0xb6, 0x98,
+ 0x63, 0x41, 0x44, 0x15, 0xb6, 0x99, 0x68, 0x95,
+ 0x9a, 0x72, 0x91, 0xb7, 0xa5, 0xaf, 0x13, 0x48,
+ 0x60, 0xcd, 0x9e, 0xa1, 0x0c, 0x29, 0xa3, 0x66,
+ 0x54, 0xe7, 0xa2, 0x8e, 0x76, 0x1b, 0xec, 0xd8
+};
+static const u8 enc_output099[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x7b, 0xc3, 0x72, 0x98, 0x09, 0xe9, 0xdf, 0xe4,
+ 0x4f, 0xba, 0x0a, 0xdd, 0xad, 0xe2, 0xaa, 0xdf,
+ 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0,
+ 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3,
+ 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0,
+ 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3,
+ 0xed, 0x20, 0x17, 0xc8, 0xdb, 0xa4, 0x77, 0x56,
+ 0x29, 0x04, 0x9d, 0x78, 0x6e, 0x3b, 0xce, 0xb1
+};
+static const u8 enc_assoc099[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce099[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key099[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input100[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x6b, 0x6d, 0xc9, 0xd2, 0x1a, 0x81, 0x9e, 0x70,
+ 0xb5, 0x77, 0xf4, 0x41, 0x37, 0xd3, 0xd6, 0xbd,
+ 0x13, 0x35, 0xf5, 0xeb, 0x44, 0x49, 0x40, 0x77,
+ 0xb2, 0x64, 0x49, 0xa5, 0x4b, 0x6c, 0x7c, 0x75,
+ 0x10, 0xb9, 0x2f, 0x5f, 0xfe, 0xf9, 0x8b, 0x84,
+ 0x7c, 0xf1, 0x7a, 0x9c, 0x98, 0xd8, 0x83, 0xe5
+};
+static const u8 enc_output100[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xec, 0xaf, 0x03, 0xdb, 0xf6, 0x98, 0xb8, 0x86,
+ 0x77, 0xb0, 0xe2, 0xcb, 0x0b, 0xa3, 0xca, 0xfa,
+ 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42,
+ 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee,
+ 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42,
+ 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee,
+ 0x07, 0x3f, 0x17, 0xcb, 0x67, 0x78, 0x64, 0x59,
+ 0x25, 0x04, 0x9d, 0x88, 0x22, 0xcb, 0xca, 0xb6
+};
+static const u8 enc_assoc100[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce100[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key100[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input101[] __initconst = {
+ 0xff, 0xcb, 0x2b, 0x11, 0x06, 0xf8, 0x23, 0x4c,
+ 0x5e, 0x99, 0xd4, 0xdb, 0x4c, 0x70, 0x48, 0xde,
+ 0x32, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x16, 0xe9, 0x88, 0x4a, 0x11, 0x4f, 0x0e, 0x92,
+ 0x66, 0xce, 0xa3, 0x88, 0x5f, 0xe3, 0x6b, 0x9f,
+ 0xd6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0xce, 0xbe, 0xf5, 0xe9, 0x88, 0x5a, 0x80, 0xea,
+ 0x76, 0xd9, 0x75, 0xc1, 0x44, 0xa4, 0x18, 0x88
+};
+static const u8 enc_output101[] __initconst = {
+ 0xff, 0xa0, 0xfc, 0x3e, 0x80, 0x32, 0xc3, 0xd5,
+ 0xfd, 0xb6, 0x2a, 0x11, 0xf0, 0x96, 0x30, 0x7d,
+ 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7,
+ 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04,
+ 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7,
+ 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04,
+ 0x8b, 0x9b, 0xb4, 0xb4, 0x86, 0x12, 0x89, 0x65,
+ 0x8c, 0x69, 0x6a, 0x83, 0x40, 0x15, 0x04, 0x05
+};
+static const u8 enc_assoc101[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce101[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key101[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input102[] __initconst = {
+ 0x6f, 0x9e, 0x70, 0xed, 0x3b, 0x8b, 0xac, 0xa0,
+ 0x26, 0xe4, 0x6a, 0x5a, 0x09, 0x43, 0x15, 0x8d,
+ 0x21, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x0c, 0x61, 0x2c, 0x5e, 0x8d, 0x89, 0xa8, 0x73,
+ 0xdb, 0xca, 0xad, 0x5b, 0x73, 0x46, 0x42, 0x9b,
+ 0xc5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0xd4, 0x36, 0x51, 0xfd, 0x14, 0x9c, 0x26, 0x0b,
+ 0xcb, 0xdd, 0x7b, 0x12, 0x68, 0x01, 0x31, 0x8c
+};
+static const u8 enc_output102[] __initconst = {
+ 0x6f, 0xf5, 0xa7, 0xc2, 0xbd, 0x41, 0x4c, 0x39,
+ 0x85, 0xcb, 0x94, 0x90, 0xb5, 0xa5, 0x6d, 0x2e,
+ 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46,
+ 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00,
+ 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46,
+ 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00,
+ 0x8b, 0x3b, 0xbd, 0x51, 0x64, 0x44, 0x59, 0x56,
+ 0x8d, 0x81, 0xca, 0x1f, 0xa7, 0x2c, 0xe4, 0x04
+};
+static const u8 enc_assoc102[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce102[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key102[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input103[] __initconst = {
+ 0x41, 0x2b, 0x08, 0x0a, 0x3e, 0x19, 0xc1, 0x0d,
+ 0x44, 0xa1, 0xaf, 0x1e, 0xab, 0xde, 0xb4, 0xce,
+ 0x35, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x6b, 0x83, 0x94, 0x33, 0x09, 0x21, 0x48, 0x6c,
+ 0xa1, 0x1d, 0x29, 0x1c, 0x3e, 0x97, 0xee, 0x9a,
+ 0xd1, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0xb3, 0xd4, 0xe9, 0x90, 0x90, 0x34, 0xc6, 0x14,
+ 0xb1, 0x0a, 0xff, 0x55, 0x25, 0xd0, 0x9d, 0x8d
+};
+static const u8 enc_output103[] __initconst = {
+ 0x41, 0x40, 0xdf, 0x25, 0xb8, 0xd3, 0x21, 0x94,
+ 0xe7, 0x8e, 0x51, 0xd4, 0x17, 0x38, 0xcc, 0x6d,
+ 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59,
+ 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01,
+ 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59,
+ 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01,
+ 0x86, 0xfb, 0xab, 0x2b, 0x4a, 0x94, 0xf4, 0x7a,
+ 0xa5, 0x6f, 0x0a, 0xea, 0x65, 0xd1, 0x10, 0x08
+};
+static const u8 enc_assoc103[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce103[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key103[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input104[] __initconst = {
+ 0xb2, 0x47, 0xa7, 0x47, 0x23, 0x49, 0x1a, 0xac,
+ 0xac, 0xaa, 0xd7, 0x09, 0xc9, 0x1e, 0x93, 0x2b,
+ 0x31, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x9a, 0xde, 0x04, 0xe7, 0x5b, 0xb7, 0x01, 0xd9,
+ 0x66, 0x06, 0x01, 0xb3, 0x47, 0x65, 0xde, 0x98,
+ 0xd5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0x42, 0x89, 0x79, 0x44, 0xc2, 0xa2, 0x8f, 0xa1,
+ 0x76, 0x11, 0xd7, 0xfa, 0x5c, 0x22, 0xad, 0x8f
+};
+static const u8 enc_output104[] __initconst = {
+ 0xb2, 0x2c, 0x70, 0x68, 0xa5, 0x83, 0xfa, 0x35,
+ 0x0f, 0x85, 0x29, 0xc3, 0x75, 0xf8, 0xeb, 0x88,
+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec,
+ 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03,
+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec,
+ 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03,
+ 0xa0, 0x19, 0xac, 0x2e, 0xd6, 0x67, 0xe1, 0x7d,
+ 0xa1, 0x6f, 0x0a, 0xfa, 0x19, 0x61, 0x0d, 0x0d
+};
+static const u8 enc_assoc104[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce104[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key104[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input105[] __initconst = {
+ 0x74, 0x0f, 0x9e, 0x49, 0xf6, 0x10, 0xef, 0xa5,
+ 0x85, 0xb6, 0x59, 0xca, 0x6e, 0xd8, 0xb4, 0x99,
+ 0x2d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x41, 0x2d, 0x96, 0xaf, 0xbe, 0x80, 0xec, 0x3e,
+ 0x79, 0xd4, 0x51, 0xb0, 0x0a, 0x2d, 0xb2, 0x9a,
+ 0xc9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0x99, 0x7a, 0xeb, 0x0c, 0x27, 0x95, 0x62, 0x46,
+ 0x69, 0xc3, 0x87, 0xf9, 0x11, 0x6a, 0xc1, 0x8d
+};
+static const u8 enc_output105[] __initconst = {
+ 0x74, 0x64, 0x49, 0x66, 0x70, 0xda, 0x0f, 0x3c,
+ 0x26, 0x99, 0xa7, 0x00, 0xd2, 0x3e, 0xcc, 0x3a,
+ 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b,
+ 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01,
+ 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b,
+ 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01,
+ 0x73, 0x6e, 0x18, 0x18, 0x16, 0x96, 0xa5, 0x88,
+ 0x9c, 0x31, 0x59, 0xfa, 0xab, 0xab, 0x20, 0xfd
+};
+static const u8 enc_assoc105[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce105[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key105[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input106[] __initconst = {
+ 0xad, 0xba, 0x5d, 0x10, 0x5b, 0xc8, 0xaa, 0x06,
+ 0x2c, 0x23, 0x36, 0xcb, 0x88, 0x9d, 0xdb, 0xd5,
+ 0x37, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x17, 0x7c, 0x5f, 0xfe, 0x28, 0x75, 0xf4, 0x68,
+ 0xf6, 0xc2, 0x96, 0x57, 0x48, 0xf3, 0x59, 0x9a,
+ 0xd3, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0xcf, 0x2b, 0x22, 0x5d, 0xb1, 0x60, 0x7a, 0x10,
+ 0xe6, 0xd5, 0x40, 0x1e, 0x53, 0xb4, 0x2a, 0x8d
+};
+static const u8 enc_output106[] __initconst = {
+ 0xad, 0xd1, 0x8a, 0x3f, 0xdd, 0x02, 0x4a, 0x9f,
+ 0x8f, 0x0c, 0xc8, 0x01, 0x34, 0x7b, 0xa3, 0x76,
+ 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d,
+ 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01,
+ 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d,
+ 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01,
+ 0xba, 0xd5, 0x8f, 0x10, 0xa9, 0x1e, 0x6a, 0x88,
+ 0x9a, 0xba, 0x32, 0xfd, 0x17, 0xd8, 0x33, 0x1a
+};
+static const u8 enc_assoc106[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce106[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key106[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input107[] __initconst = {
+ 0xfe, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xc0, 0x01, 0xed, 0xc5, 0xda, 0x44, 0x2e, 0x71,
+ 0x9b, 0xce, 0x9a, 0xbe, 0x27, 0x3a, 0xf1, 0x44,
+ 0xb4, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x48, 0x02, 0x5f, 0x41, 0xfa, 0x4e, 0x33, 0x6c,
+ 0x78, 0x69, 0x57, 0xa2, 0xa7, 0xc4, 0x93, 0x0a,
+ 0x6c, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x00, 0x26, 0x6e, 0xa1, 0xe4, 0x36, 0x44, 0xa3,
+ 0x4d, 0x8d, 0xd1, 0xdc, 0x93, 0xf2, 0xfa, 0x13
+};
+static const u8 enc_output107[] __initconst = {
+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x47, 0xc3, 0x27, 0xcc, 0x36, 0x5d, 0x08, 0x87,
+ 0x59, 0x09, 0x8c, 0x34, 0x1b, 0x4a, 0xed, 0x03,
+ 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa,
+ 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01,
+ 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa,
+ 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01,
+ 0xd6, 0x8c, 0xe1, 0x74, 0x07, 0x9a, 0xdd, 0x02,
+ 0x8d, 0xd0, 0x5c, 0xf8, 0x14, 0x63, 0x04, 0x88
+};
+static const u8 enc_assoc107[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce107[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key107[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input108[] __initconst = {
+ 0xb5, 0x13, 0xb0, 0x6a, 0xb9, 0xac, 0x14, 0x43,
+ 0x5a, 0xcb, 0x8a, 0xa3, 0xa3, 0x7a, 0xfd, 0xb6,
+ 0x54, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x61, 0x95, 0x01, 0x93, 0xb1, 0xbf, 0x03, 0x11,
+ 0xff, 0x11, 0x79, 0x89, 0xae, 0xd9, 0xa9, 0x99,
+ 0xb0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0xb9, 0xc2, 0x7c, 0x30, 0x28, 0xaa, 0x8d, 0x69,
+ 0xef, 0x06, 0xaf, 0xc0, 0xb5, 0x9e, 0xda, 0x8e
+};
+static const u8 enc_output108[] __initconst = {
+ 0xb5, 0x78, 0x67, 0x45, 0x3f, 0x66, 0xf4, 0xda,
+ 0xf9, 0xe4, 0x74, 0x69, 0x1f, 0x9c, 0x85, 0x15,
+ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24,
+ 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02,
+ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24,
+ 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02,
+ 0xaa, 0x48, 0xa3, 0x88, 0x7d, 0x4b, 0x05, 0x96,
+ 0x99, 0xc2, 0xfd, 0xf9, 0xc6, 0x78, 0x7e, 0x0a
+};
+static const u8 enc_assoc108[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce108[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key108[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input109[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xd4, 0xf1, 0x09, 0xe8, 0x14, 0xce, 0xa8, 0x5a,
+ 0x08, 0xc0, 0x11, 0xd8, 0x50, 0xdd, 0x1d, 0xcb,
+ 0xcf, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x53, 0x40, 0xb8, 0x5a, 0x9a, 0xa0, 0x82, 0x96,
+ 0xb7, 0x7a, 0x5f, 0xc3, 0x96, 0x1f, 0x66, 0x0f,
+ 0x17, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x1b, 0x64, 0x89, 0xba, 0x84, 0xd8, 0xf5, 0x59,
+ 0x82, 0x9e, 0xd9, 0xbd, 0xa2, 0x29, 0x0f, 0x16
+};
+static const u8 enc_output109[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x53, 0x33, 0xc3, 0xe1, 0xf8, 0xd7, 0x8e, 0xac,
+ 0xca, 0x07, 0x07, 0x52, 0x6c, 0xad, 0x01, 0x8c,
+ 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50,
+ 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04,
+ 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50,
+ 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04,
+ 0xb9, 0x36, 0xa8, 0x17, 0xf2, 0x21, 0x1a, 0xf1,
+ 0x29, 0xe2, 0xcf, 0x16, 0x0f, 0xd4, 0x2b, 0xcb
+};
+static const u8 enc_assoc109[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce109[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key109[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input110[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xdf, 0x4c, 0x62, 0x03, 0x2d, 0x41, 0x19, 0xb5,
+ 0x88, 0x47, 0x7e, 0x99, 0x92, 0x5a, 0x56, 0xd9,
+ 0xd6, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0xfa, 0x84, 0xf0, 0x64, 0x55, 0x36, 0x42, 0x1b,
+ 0x2b, 0xb9, 0x24, 0x6e, 0xc2, 0x19, 0xed, 0x0b,
+ 0x0e, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0xb2, 0xa0, 0xc1, 0x84, 0x4b, 0x4e, 0x35, 0xd4,
+ 0x1e, 0x5d, 0xa2, 0x10, 0xf6, 0x2f, 0x84, 0x12
+};
+static const u8 enc_output110[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x58, 0x8e, 0xa8, 0x0a, 0xc1, 0x58, 0x3f, 0x43,
+ 0x4a, 0x80, 0x68, 0x13, 0xae, 0x2a, 0x4a, 0x9e,
+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd,
+ 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00,
+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd,
+ 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00,
+ 0x9f, 0x7a, 0xc4, 0x35, 0x1f, 0x6b, 0x91, 0xe6,
+ 0x30, 0x97, 0xa7, 0x13, 0x11, 0x5d, 0x05, 0xbe
+};
+static const u8 enc_assoc110[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce110[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key110[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input111[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x13, 0xf8, 0x0a, 0x00, 0x6d, 0xc1, 0xbb, 0xda,
+ 0xd6, 0x39, 0xa9, 0x2f, 0xc7, 0xec, 0xa6, 0x55,
+ 0xf7, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x63, 0x48, 0xb8, 0xfd, 0x29, 0xbf, 0x96, 0xd5,
+ 0x63, 0xa5, 0x17, 0xe2, 0x7d, 0x7b, 0xfc, 0x0f,
+ 0x2f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x2b, 0x6c, 0x89, 0x1d, 0x37, 0xc7, 0xe1, 0x1a,
+ 0x56, 0x41, 0x91, 0x9c, 0x49, 0x4d, 0x95, 0x16
+};
+static const u8 enc_output111[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x94, 0x3a, 0xc0, 0x09, 0x81, 0xd8, 0x9d, 0x2c,
+ 0x14, 0xfe, 0xbf, 0xa5, 0xfb, 0x9c, 0xba, 0x12,
+ 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13,
+ 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04,
+ 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13,
+ 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04,
+ 0x9a, 0x18, 0xa8, 0x28, 0x07, 0x02, 0x69, 0xf4,
+ 0x47, 0x00, 0xd0, 0x09, 0xe7, 0x17, 0x1c, 0xc9
+};
+static const u8 enc_assoc111[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce111[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key111[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input112[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x82, 0xe5, 0x9b, 0x45, 0x82, 0x91, 0x50, 0x38,
+ 0xf9, 0x33, 0x81, 0x1e, 0x65, 0x2d, 0xc6, 0x6a,
+ 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0xb6, 0x71, 0xc8, 0xca, 0xc2, 0x70, 0xc2, 0x65,
+ 0xa0, 0xac, 0x2f, 0x53, 0x57, 0x99, 0x88, 0x0a,
+ 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0xfe, 0x55, 0xf9, 0x2a, 0xdc, 0x08, 0xb5, 0xaa,
+ 0x95, 0x48, 0xa9, 0x2d, 0x63, 0xaf, 0xe1, 0x13
+};
+static const u8 enc_output112[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x05, 0x27, 0x51, 0x4c, 0x6e, 0x88, 0x76, 0xce,
+ 0x3b, 0xf4, 0x97, 0x94, 0x59, 0x5d, 0xda, 0x2d,
+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3,
+ 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01,
+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3,
+ 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01,
+ 0xb4, 0x36, 0xa8, 0x2b, 0x93, 0xd5, 0x55, 0xf7,
+ 0x43, 0x00, 0xd0, 0x19, 0x9b, 0xa7, 0x18, 0xce
+};
+static const u8 enc_assoc112[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce112[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key112[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input113[] __initconst = {
+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0xf1, 0xd1, 0x28, 0x87, 0xb7, 0x21, 0x69, 0x86,
+ 0xa1, 0x2d, 0x79, 0x09, 0x8b, 0x6d, 0xe6, 0x0f,
+ 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0xa7, 0xc7, 0x58, 0x99, 0xf3, 0xe6, 0x0a, 0xf1,
+ 0xfc, 0xb6, 0xc7, 0x30, 0x7d, 0x87, 0x59, 0x0f,
+ 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0xef, 0xe3, 0x69, 0x79, 0xed, 0x9e, 0x7d, 0x3e,
+ 0xc9, 0x52, 0x41, 0x4e, 0x49, 0xb1, 0x30, 0x16
+};
+static const u8 enc_output113[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x76, 0x13, 0xe2, 0x8e, 0x5b, 0x38, 0x4f, 0x70,
+ 0x63, 0xea, 0x6f, 0x83, 0xb7, 0x1d, 0xfa, 0x48,
+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37,
+ 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04,
+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37,
+ 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04,
+ 0xce, 0x54, 0xa8, 0x2e, 0x1f, 0xa9, 0x42, 0xfa,
+ 0x3f, 0x00, 0xd0, 0x29, 0x4f, 0x37, 0x15, 0xd3
+};
+static const u8 enc_assoc113[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce113[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key113[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input114[] __initconst = {
+ 0xcb, 0xf1, 0xda, 0x9e, 0x0b, 0xa9, 0x37, 0x73,
+ 0x74, 0xe6, 0x9e, 0x1c, 0x0e, 0x60, 0x0c, 0xfc,
+ 0x34, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0xbe, 0x3f, 0xa6, 0x6b, 0x6c, 0xe7, 0x80, 0x8a,
+ 0xa3, 0xe4, 0x59, 0x49, 0xf9, 0x44, 0x64, 0x9f,
+ 0xd0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0x66, 0x68, 0xdb, 0xc8, 0xf5, 0xf2, 0x0e, 0xf2,
+ 0xb3, 0xf3, 0x8f, 0x00, 0xe2, 0x03, 0x17, 0x88
+};
+static const u8 enc_output114[] __initconst = {
+ 0xcb, 0x9a, 0x0d, 0xb1, 0x8d, 0x63, 0xd7, 0xea,
+ 0xd7, 0xc9, 0x60, 0xd6, 0xb2, 0x86, 0x74, 0x5f,
+ 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf,
+ 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04,
+ 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf,
+ 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04,
+ 0x23, 0x83, 0xab, 0x0b, 0x79, 0x92, 0x05, 0x69,
+ 0x9b, 0x51, 0x0a, 0xa7, 0x09, 0xbf, 0x31, 0xf1
+};
+static const u8 enc_assoc114[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce114[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key114[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input115[] __initconst = {
+ 0x8f, 0x27, 0x86, 0x94, 0xc4, 0xe9, 0xda, 0xeb,
+ 0xd5, 0x8d, 0x3e, 0x5b, 0x96, 0x6e, 0x8b, 0x68,
+ 0x42, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
+ 0x06, 0x53, 0xe7, 0xa3, 0x31, 0x71, 0x88, 0x33,
+ 0xac, 0xc3, 0xb9, 0xad, 0xff, 0x1c, 0x31, 0x98,
+ 0xa6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
+ 0xde, 0x04, 0x9a, 0x00, 0xa8, 0x64, 0x06, 0x4b,
+ 0xbc, 0xd4, 0x6f, 0xe4, 0xe4, 0x5b, 0x42, 0x8f
+};
+static const u8 enc_output115[] __initconst = {
+ 0x8f, 0x4c, 0x51, 0xbb, 0x42, 0x23, 0x3a, 0x72,
+ 0x76, 0xa2, 0xc0, 0x91, 0x2a, 0x88, 0xf3, 0xcb,
+ 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06,
+ 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03,
+ 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06,
+ 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03,
+ 0x8b, 0xfb, 0xab, 0x17, 0xa9, 0xe0, 0xb8, 0x74,
+ 0x8b, 0x51, 0x0a, 0xe7, 0xd9, 0xfd, 0x23, 0x05
+};
+static const u8 enc_assoc115[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce115[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key115[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input116[] __initconst = {
+ 0xd5, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x9a, 0x22, 0xd7, 0x0a, 0x48, 0xe2, 0x4f, 0xdd,
+ 0xcd, 0xd4, 0x41, 0x9d, 0xe6, 0x4c, 0x8f, 0x44,
+ 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x77, 0xb5, 0xc9, 0x07, 0xd9, 0xc9, 0xe1, 0xea,
+ 0x51, 0x85, 0x1a, 0x20, 0x4a, 0xad, 0x9f, 0x0a,
+ 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x3f, 0x91, 0xf8, 0xe7, 0xc7, 0xb1, 0x96, 0x25,
+ 0x64, 0x61, 0x9c, 0x5e, 0x7e, 0x9b, 0xf6, 0x13
+};
+static const u8 enc_output116[] __initconst = {
+ 0xd5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x1d, 0xe0, 0x1d, 0x03, 0xa4, 0xfb, 0x69, 0x2b,
+ 0x0f, 0x13, 0x57, 0x17, 0xda, 0x3c, 0x93, 0x03,
+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c,
+ 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01,
+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c,
+ 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01,
+ 0x49, 0xbc, 0x6e, 0x9f, 0xc5, 0x1c, 0x4d, 0x50,
+ 0x30, 0x36, 0x64, 0x4d, 0x84, 0x27, 0x73, 0xd2
+};
+static const u8 enc_assoc116[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce116[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key116[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input117[] __initconst = {
+ 0xdb, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x75, 0xd5, 0x64, 0x3a, 0xa5, 0xaf, 0x93, 0x4d,
+ 0x8c, 0xce, 0x39, 0x2c, 0xc3, 0xee, 0xdb, 0x47,
+ 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0x60, 0x1b, 0x5a, 0xd2, 0x06, 0x7f, 0x28, 0x06,
+ 0x6a, 0x8f, 0x32, 0x81, 0x71, 0x5b, 0xa8, 0x08,
+ 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x28, 0x3f, 0x6b, 0x32, 0x18, 0x07, 0x5f, 0xc9,
+ 0x5f, 0x6b, 0xb4, 0xff, 0x45, 0x6d, 0xc1, 0x11
+};
+static const u8 enc_output117[] __initconst = {
+ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xf2, 0x17, 0xae, 0x33, 0x49, 0xb6, 0xb5, 0xbb,
+ 0x4e, 0x09, 0x2f, 0xa6, 0xff, 0x9e, 0xc7, 0x00,
+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0,
+ 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03,
+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0,
+ 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03,
+ 0x63, 0xda, 0x6e, 0xa2, 0x51, 0xf0, 0x39, 0x53,
+ 0x2c, 0x36, 0x64, 0x5d, 0x38, 0xb7, 0x6f, 0xd7
+};
+static const u8 enc_assoc117[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce117[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key117[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - edge case intermediate sums in poly1305 */
+static const u8 enc_input118[] __initconst = {
+ 0x93, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
+ 0x62, 0x48, 0x39, 0x60, 0x42, 0x16, 0xe4, 0x03,
+ 0xeb, 0xcc, 0x6a, 0xf5, 0x59, 0xec, 0x8b, 0x43,
+ 0x97, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
+ 0xd8, 0xc8, 0xc3, 0xfa, 0x1a, 0x9e, 0x47, 0x4a,
+ 0xbe, 0x52, 0xd0, 0x2c, 0x81, 0x87, 0xe9, 0x0f,
+ 0x4f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
+ 0x90, 0xec, 0xf2, 0x1a, 0x04, 0xe6, 0x30, 0x85,
+ 0x8b, 0xb6, 0x56, 0x52, 0xb5, 0xb1, 0x80, 0x16
+};
+static const u8 enc_output118[] __initconst = {
+ 0x93, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x8a, 0xf3, 0x69, 0xae, 0x0f, 0xc2, 0xf5,
+ 0x29, 0x0b, 0x7c, 0x7f, 0x65, 0x9c, 0x97, 0x04,
+ 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c,
+ 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04,
+ 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c,
+ 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04,
+ 0x73, 0xeb, 0x27, 0x24, 0xb5, 0xc4, 0x05, 0xf0,
+ 0x4d, 0x00, 0xd0, 0xf1, 0x58, 0x40, 0xa1, 0xc1
+};
+static const u8 enc_assoc118[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce118[] __initconst = {
+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
+};
+static const u8 enc_key118[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+static const struct chacha20poly1305_testvec
+chacha20poly1305_enc_vectors[] __initconst = {
+ { enc_input001, enc_output001, enc_assoc001, enc_nonce001, enc_key001,
+ sizeof(enc_input001), sizeof(enc_assoc001), sizeof(enc_nonce001) },
+ { enc_input002, enc_output002, enc_assoc002, enc_nonce002, enc_key002,
+ sizeof(enc_input002), sizeof(enc_assoc002), sizeof(enc_nonce002) },
+ { enc_input003, enc_output003, enc_assoc003, enc_nonce003, enc_key003,
+ sizeof(enc_input003), sizeof(enc_assoc003), sizeof(enc_nonce003) },
+ { enc_input004, enc_output004, enc_assoc004, enc_nonce004, enc_key004,
+ sizeof(enc_input004), sizeof(enc_assoc004), sizeof(enc_nonce004) },
+ { enc_input005, enc_output005, enc_assoc005, enc_nonce005, enc_key005,
+ sizeof(enc_input005), sizeof(enc_assoc005), sizeof(enc_nonce005) },
+ { enc_input006, enc_output006, enc_assoc006, enc_nonce006, enc_key006,
+ sizeof(enc_input006), sizeof(enc_assoc006), sizeof(enc_nonce006) },
+ { enc_input007, enc_output007, enc_assoc007, enc_nonce007, enc_key007,
+ sizeof(enc_input007), sizeof(enc_assoc007), sizeof(enc_nonce007) },
+ { enc_input008, enc_output008, enc_assoc008, enc_nonce008, enc_key008,
+ sizeof(enc_input008), sizeof(enc_assoc008), sizeof(enc_nonce008) },
+ { enc_input009, enc_output009, enc_assoc009, enc_nonce009, enc_key009,
+ sizeof(enc_input009), sizeof(enc_assoc009), sizeof(enc_nonce009) },
+ { enc_input010, enc_output010, enc_assoc010, enc_nonce010, enc_key010,
+ sizeof(enc_input010), sizeof(enc_assoc010), sizeof(enc_nonce010) },
+ { enc_input011, enc_output011, enc_assoc011, enc_nonce011, enc_key011,
+ sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) },
+ { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012,
+ sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) },
+ { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053,
+ sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) },
+ { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054,
+ sizeof(enc_input054), sizeof(enc_assoc054), sizeof(enc_nonce054) },
+ { enc_input055, enc_output055, enc_assoc055, enc_nonce055, enc_key055,
+ sizeof(enc_input055), sizeof(enc_assoc055), sizeof(enc_nonce055) },
+ { enc_input056, enc_output056, enc_assoc056, enc_nonce056, enc_key056,
+ sizeof(enc_input056), sizeof(enc_assoc056), sizeof(enc_nonce056) },
+ { enc_input057, enc_output057, enc_assoc057, enc_nonce057, enc_key057,
+ sizeof(enc_input057), sizeof(enc_assoc057), sizeof(enc_nonce057) },
+ { enc_input058, enc_output058, enc_assoc058, enc_nonce058, enc_key058,
+ sizeof(enc_input058), sizeof(enc_assoc058), sizeof(enc_nonce058) },
+ { enc_input059, enc_output059, enc_assoc059, enc_nonce059, enc_key059,
+ sizeof(enc_input059), sizeof(enc_assoc059), sizeof(enc_nonce059) },
+ { enc_input060, enc_output060, enc_assoc060, enc_nonce060, enc_key060,
+ sizeof(enc_input060), sizeof(enc_assoc060), sizeof(enc_nonce060) },
+ { enc_input061, enc_output061, enc_assoc061, enc_nonce061, enc_key061,
+ sizeof(enc_input061), sizeof(enc_assoc061), sizeof(enc_nonce061) },
+ { enc_input062, enc_output062, enc_assoc062, enc_nonce062, enc_key062,
+ sizeof(enc_input062), sizeof(enc_assoc062), sizeof(enc_nonce062) },
+ { enc_input063, enc_output063, enc_assoc063, enc_nonce063, enc_key063,
+ sizeof(enc_input063), sizeof(enc_assoc063), sizeof(enc_nonce063) },
+ { enc_input064, enc_output064, enc_assoc064, enc_nonce064, enc_key064,
+ sizeof(enc_input064), sizeof(enc_assoc064), sizeof(enc_nonce064) },
+ { enc_input065, enc_output065, enc_assoc065, enc_nonce065, enc_key065,
+ sizeof(enc_input065), sizeof(enc_assoc065), sizeof(enc_nonce065) },
+ { enc_input066, enc_output066, enc_assoc066, enc_nonce066, enc_key066,
+ sizeof(enc_input066), sizeof(enc_assoc066), sizeof(enc_nonce066) },
+ { enc_input067, enc_output067, enc_assoc067, enc_nonce067, enc_key067,
+ sizeof(enc_input067), sizeof(enc_assoc067), sizeof(enc_nonce067) },
+ { enc_input068, enc_output068, enc_assoc068, enc_nonce068, enc_key068,
+ sizeof(enc_input068), sizeof(enc_assoc068), sizeof(enc_nonce068) },
+ { enc_input069, enc_output069, enc_assoc069, enc_nonce069, enc_key069,
+ sizeof(enc_input069), sizeof(enc_assoc069), sizeof(enc_nonce069) },
+ { enc_input070, enc_output070, enc_assoc070, enc_nonce070, enc_key070,
+ sizeof(enc_input070), sizeof(enc_assoc070), sizeof(enc_nonce070) },
+ { enc_input071, enc_output071, enc_assoc071, enc_nonce071, enc_key071,
+ sizeof(enc_input071), sizeof(enc_assoc071), sizeof(enc_nonce071) },
+ { enc_input072, enc_output072, enc_assoc072, enc_nonce072, enc_key072,
+ sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) },
+ { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073,
+ sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) },
+ { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076,
+ sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) },
+ { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077,
+ sizeof(enc_input077), sizeof(enc_assoc077), sizeof(enc_nonce077) },
+ { enc_input078, enc_output078, enc_assoc078, enc_nonce078, enc_key078,
+ sizeof(enc_input078), sizeof(enc_assoc078), sizeof(enc_nonce078) },
+ { enc_input079, enc_output079, enc_assoc079, enc_nonce079, enc_key079,
+ sizeof(enc_input079), sizeof(enc_assoc079), sizeof(enc_nonce079) },
+ { enc_input080, enc_output080, enc_assoc080, enc_nonce080, enc_key080,
+ sizeof(enc_input080), sizeof(enc_assoc080), sizeof(enc_nonce080) },
+ { enc_input081, enc_output081, enc_assoc081, enc_nonce081, enc_key081,
+ sizeof(enc_input081), sizeof(enc_assoc081), sizeof(enc_nonce081) },
+ { enc_input082, enc_output082, enc_assoc082, enc_nonce082, enc_key082,
+ sizeof(enc_input082), sizeof(enc_assoc082), sizeof(enc_nonce082) },
+ { enc_input083, enc_output083, enc_assoc083, enc_nonce083, enc_key083,
+ sizeof(enc_input083), sizeof(enc_assoc083), sizeof(enc_nonce083) },
+ { enc_input084, enc_output084, enc_assoc084, enc_nonce084, enc_key084,
+ sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) },
+ { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085,
+ sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) },
+ { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093,
+ sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) },
+ { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094,
+ sizeof(enc_input094), sizeof(enc_assoc094), sizeof(enc_nonce094) },
+ { enc_input095, enc_output095, enc_assoc095, enc_nonce095, enc_key095,
+ sizeof(enc_input095), sizeof(enc_assoc095), sizeof(enc_nonce095) },
+ { enc_input096, enc_output096, enc_assoc096, enc_nonce096, enc_key096,
+ sizeof(enc_input096), sizeof(enc_assoc096), sizeof(enc_nonce096) },
+ { enc_input097, enc_output097, enc_assoc097, enc_nonce097, enc_key097,
+ sizeof(enc_input097), sizeof(enc_assoc097), sizeof(enc_nonce097) },
+ { enc_input098, enc_output098, enc_assoc098, enc_nonce098, enc_key098,
+ sizeof(enc_input098), sizeof(enc_assoc098), sizeof(enc_nonce098) },
+ { enc_input099, enc_output099, enc_assoc099, enc_nonce099, enc_key099,
+ sizeof(enc_input099), sizeof(enc_assoc099), sizeof(enc_nonce099) },
+ { enc_input100, enc_output100, enc_assoc100, enc_nonce100, enc_key100,
+ sizeof(enc_input100), sizeof(enc_assoc100), sizeof(enc_nonce100) },
+ { enc_input101, enc_output101, enc_assoc101, enc_nonce101, enc_key101,
+ sizeof(enc_input101), sizeof(enc_assoc101), sizeof(enc_nonce101) },
+ { enc_input102, enc_output102, enc_assoc102, enc_nonce102, enc_key102,
+ sizeof(enc_input102), sizeof(enc_assoc102), sizeof(enc_nonce102) },
+ { enc_input103, enc_output103, enc_assoc103, enc_nonce103, enc_key103,
+ sizeof(enc_input103), sizeof(enc_assoc103), sizeof(enc_nonce103) },
+ { enc_input104, enc_output104, enc_assoc104, enc_nonce104, enc_key104,
+ sizeof(enc_input104), sizeof(enc_assoc104), sizeof(enc_nonce104) },
+ { enc_input105, enc_output105, enc_assoc105, enc_nonce105, enc_key105,
+ sizeof(enc_input105), sizeof(enc_assoc105), sizeof(enc_nonce105) },
+ { enc_input106, enc_output106, enc_assoc106, enc_nonce106, enc_key106,
+ sizeof(enc_input106), sizeof(enc_assoc106), sizeof(enc_nonce106) },
+ { enc_input107, enc_output107, enc_assoc107, enc_nonce107, enc_key107,
+ sizeof(enc_input107), sizeof(enc_assoc107), sizeof(enc_nonce107) },
+ { enc_input108, enc_output108, enc_assoc108, enc_nonce108, enc_key108,
+ sizeof(enc_input108), sizeof(enc_assoc108), sizeof(enc_nonce108) },
+ { enc_input109, enc_output109, enc_assoc109, enc_nonce109, enc_key109,
+ sizeof(enc_input109), sizeof(enc_assoc109), sizeof(enc_nonce109) },
+ { enc_input110, enc_output110, enc_assoc110, enc_nonce110, enc_key110,
+ sizeof(enc_input110), sizeof(enc_assoc110), sizeof(enc_nonce110) },
+ { enc_input111, enc_output111, enc_assoc111, enc_nonce111, enc_key111,
+ sizeof(enc_input111), sizeof(enc_assoc111), sizeof(enc_nonce111) },
+ { enc_input112, enc_output112, enc_assoc112, enc_nonce112, enc_key112,
+ sizeof(enc_input112), sizeof(enc_assoc112), sizeof(enc_nonce112) },
+ { enc_input113, enc_output113, enc_assoc113, enc_nonce113, enc_key113,
+ sizeof(enc_input113), sizeof(enc_assoc113), sizeof(enc_nonce113) },
+ { enc_input114, enc_output114, enc_assoc114, enc_nonce114, enc_key114,
+ sizeof(enc_input114), sizeof(enc_assoc114), sizeof(enc_nonce114) },
+ { enc_input115, enc_output115, enc_assoc115, enc_nonce115, enc_key115,
+ sizeof(enc_input115), sizeof(enc_assoc115), sizeof(enc_nonce115) },
+ { enc_input116, enc_output116, enc_assoc116, enc_nonce116, enc_key116,
+ sizeof(enc_input116), sizeof(enc_assoc116), sizeof(enc_nonce116) },
+ { enc_input117, enc_output117, enc_assoc117, enc_nonce117, enc_key117,
+ sizeof(enc_input117), sizeof(enc_assoc117), sizeof(enc_nonce117) },
+ { enc_input118, enc_output118, enc_assoc118, enc_nonce118, enc_key118,
+ sizeof(enc_input118), sizeof(enc_assoc118), sizeof(enc_nonce118) }
+};
+
+static const u8 dec_input001[] __initconst = {
+ 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4,
+ 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd,
+ 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89,
+ 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2,
+ 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee,
+ 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0,
+ 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00,
+ 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf,
+ 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce,
+ 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81,
+ 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd,
+ 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55,
+ 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61,
+ 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38,
+ 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0,
+ 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4,
+ 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46,
+ 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9,
+ 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e,
+ 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e,
+ 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15,
+ 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a,
+ 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea,
+ 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a,
+ 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99,
+ 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e,
+ 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10,
+ 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10,
+ 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94,
+ 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30,
+ 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf,
+ 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29,
+ 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70,
+ 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb,
+ 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f,
+ 0x38
+};
+static const u8 dec_output001[] __initconst = {
+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
+ 0x9d
+};
+static const u8 dec_assoc001[] __initconst = {
+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x4e, 0x91
+};
+static const u8 dec_nonce001[] __initconst = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
+};
+static const u8 dec_key001[] __initconst = {
+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
+};
+
+static const u8 dec_input002[] __initconst = {
+ 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1,
+ 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92
+};
+static const u8 dec_output002[] __initconst = { };
+static const u8 dec_assoc002[] __initconst = { };
+static const u8 dec_nonce002[] __initconst = {
+ 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e
+};
+static const u8 dec_key002[] __initconst = {
+ 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
+ 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86,
+ 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef,
+ 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68
+};
+
+static const u8 dec_input003[] __initconst = {
+ 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6,
+ 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77
+};
+static const u8 dec_output003[] __initconst = { };
+static const u8 dec_assoc003[] __initconst = {
+ 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b
+};
+static const u8 dec_nonce003[] __initconst = {
+ 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d
+};
+static const u8 dec_key003[] __initconst = {
+ 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
+ 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a,
+ 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08,
+ 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d
+};
+
+static const u8 dec_input004[] __initconst = {
+ 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2,
+ 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac,
+ 0x89
+};
+static const u8 dec_output004[] __initconst = {
+ 0xa4
+};
+static const u8 dec_assoc004[] __initconst = {
+ 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40
+};
+static const u8 dec_nonce004[] __initconst = {
+ 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4
+};
+static const u8 dec_key004[] __initconst = {
+ 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8,
+ 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1,
+ 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d,
+ 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e
+};
+
+static const u8 dec_input005[] __initconst = {
+ 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e,
+ 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c,
+ 0xac
+};
+static const u8 dec_output005[] __initconst = {
+ 0x2d
+};
+static const u8 dec_assoc005[] __initconst = { };
+static const u8 dec_nonce005[] __initconst = {
+ 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30
+};
+static const u8 dec_key005[] __initconst = {
+ 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31,
+ 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87,
+ 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01,
+ 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87
+};
+
+static const u8 dec_input006[] __initconst = {
+ 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1,
+ 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15,
+ 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c,
+ 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda,
+ 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11,
+ 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8,
+ 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc,
+ 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3,
+ 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5,
+ 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02,
+ 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93,
+ 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78,
+ 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1,
+ 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66,
+ 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc,
+ 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0,
+ 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d,
+ 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a,
+ 0xeb
+};
+static const u8 dec_output006[] __initconst = {
+ 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a,
+ 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92,
+ 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37,
+ 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50,
+ 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec,
+ 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb,
+ 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66,
+ 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb,
+ 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b,
+ 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e,
+ 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3,
+ 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0,
+ 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb,
+ 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41,
+ 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc,
+ 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde,
+ 0x8f
+};
+static const u8 dec_assoc006[] __initconst = {
+ 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b
+};
+static const u8 dec_nonce006[] __initconst = {
+ 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c
+};
+static const u8 dec_key006[] __initconst = {
+ 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae,
+ 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78,
+ 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9,
+ 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01
+};
+
+static const u8 dec_input007[] __initconst = {
+ 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c,
+ 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8,
+ 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c,
+ 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb,
+ 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0,
+ 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21,
+ 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70,
+ 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac,
+ 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99,
+ 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9,
+ 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f,
+ 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7,
+ 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53,
+ 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12,
+ 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6,
+ 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0,
+ 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54,
+ 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6,
+ 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e,
+ 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb,
+ 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30,
+ 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f,
+ 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2,
+ 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e,
+ 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34,
+ 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39,
+ 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7,
+ 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9,
+ 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82,
+ 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04,
+ 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34,
+ 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef,
+ 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42,
+ 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53
+};
+static const u8 dec_output007[] __initconst = {
+ 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5,
+ 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a,
+ 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1,
+ 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17,
+ 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c,
+ 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1,
+ 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51,
+ 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1,
+ 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86,
+ 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a,
+ 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a,
+ 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98,
+ 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36,
+ 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34,
+ 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57,
+ 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84,
+ 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4,
+ 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80,
+ 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82,
+ 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5,
+ 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d,
+ 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c,
+ 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf,
+ 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc,
+ 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3,
+ 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14,
+ 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81,
+ 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77,
+ 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3,
+ 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2,
+ 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b,
+ 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3
+};
+static const u8 dec_assoc007[] __initconst = { };
+static const u8 dec_nonce007[] __initconst = {
+ 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0
+};
+static const u8 dec_key007[] __initconst = {
+ 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd,
+ 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c,
+ 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80,
+ 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01
+};
+
+static const u8 dec_input008[] __initconst = {
+ 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd,
+ 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1,
+ 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93,
+ 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d,
+ 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c,
+ 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6,
+ 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4,
+ 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5,
+ 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84,
+ 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd,
+ 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed,
+ 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab,
+ 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13,
+ 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49,
+ 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6,
+ 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8,
+ 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2,
+ 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94,
+ 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18,
+ 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60,
+ 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8,
+ 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b,
+ 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f,
+ 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c,
+ 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20,
+ 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff,
+ 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9,
+ 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c,
+ 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9,
+ 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6,
+ 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea,
+ 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e,
+ 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82,
+ 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1,
+ 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70,
+ 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1,
+ 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c,
+ 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7,
+ 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc,
+ 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc,
+ 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3,
+ 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb,
+ 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97,
+ 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f,
+ 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39,
+ 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f,
+ 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d,
+ 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2,
+ 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d,
+ 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96,
+ 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b,
+ 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20,
+ 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95,
+ 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb,
+ 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35,
+ 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62,
+ 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9,
+ 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6,
+ 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8,
+ 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a,
+ 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93,
+ 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14,
+ 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99,
+ 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86,
+ 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f,
+ 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54
+};
+static const u8 dec_output008[] __initconst = {
+ 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10,
+ 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2,
+ 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c,
+ 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb,
+ 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12,
+ 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa,
+ 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6,
+ 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4,
+ 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91,
+ 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb,
+ 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47,
+ 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15,
+ 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f,
+ 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a,
+ 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3,
+ 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97,
+ 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80,
+ 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e,
+ 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f,
+ 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10,
+ 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a,
+ 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0,
+ 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35,
+ 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d,
+ 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d,
+ 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57,
+ 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4,
+ 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f,
+ 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39,
+ 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda,
+ 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17,
+ 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43,
+ 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19,
+ 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09,
+ 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21,
+ 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07,
+ 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f,
+ 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b,
+ 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a,
+ 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed,
+ 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2,
+ 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca,
+ 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff,
+ 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b,
+ 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b,
+ 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b,
+ 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6,
+ 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04,
+ 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48,
+ 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b,
+ 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13,
+ 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8,
+ 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f,
+ 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0,
+ 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92,
+ 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a,
+ 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41,
+ 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17,
+ 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30,
+ 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20,
+ 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49,
+ 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a,
+ 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b,
+ 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3
+};
+static const u8 dec_assoc008[] __initconst = { };
+static const u8 dec_nonce008[] __initconst = {
+ 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02
+};
+static const u8 dec_key008[] __initconst = {
+ 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53,
+ 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0,
+ 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86,
+ 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba
+};
+
+static const u8 dec_input009[] __initconst = {
+ 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf,
+ 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66,
+ 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72,
+ 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd,
+ 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28,
+ 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe,
+ 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06,
+ 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5,
+ 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7,
+ 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09,
+ 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a,
+ 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00,
+ 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62,
+ 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb,
+ 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2,
+ 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28,
+ 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e,
+ 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a,
+ 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6,
+ 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83,
+ 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9,
+ 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a,
+ 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79,
+ 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a,
+ 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea,
+ 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b,
+ 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52,
+ 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb,
+ 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89,
+ 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad,
+ 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19,
+ 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71,
+ 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d,
+ 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54,
+ 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a,
+ 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d,
+ 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95,
+ 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42,
+ 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16,
+ 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6,
+ 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf,
+ 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d,
+ 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f,
+ 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b,
+ 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e,
+ 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4,
+ 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c,
+ 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4,
+ 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1,
+ 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb,
+ 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff,
+ 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2,
+ 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06,
+ 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66,
+ 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90,
+ 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55,
+ 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc,
+ 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8,
+ 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62,
+ 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba,
+ 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2,
+ 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89,
+ 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06,
+ 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90,
+ 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf,
+ 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8,
+ 0xae
+};
+static const u8 dec_output009[] __initconst = {
+ 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b,
+ 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8,
+ 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca,
+ 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09,
+ 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5,
+ 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85,
+ 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44,
+ 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97,
+ 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77,
+ 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41,
+ 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c,
+ 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00,
+ 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82,
+ 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f,
+ 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e,
+ 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55,
+ 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab,
+ 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17,
+ 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e,
+ 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f,
+ 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82,
+ 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3,
+ 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f,
+ 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0,
+ 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08,
+ 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b,
+ 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85,
+ 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28,
+ 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c,
+ 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62,
+ 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2,
+ 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3,
+ 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62,
+ 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40,
+ 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f,
+ 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b,
+ 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91,
+ 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5,
+ 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c,
+ 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4,
+ 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49,
+ 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04,
+ 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03,
+ 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa,
+ 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec,
+ 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6,
+ 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69,
+ 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36,
+ 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8,
+ 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf,
+ 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe,
+ 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82,
+ 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab,
+ 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d,
+ 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3,
+ 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5,
+ 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34,
+ 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49,
+ 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f,
+ 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d,
+ 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42,
+ 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef,
+ 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27,
+ 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52,
+ 0x65
+};
+static const u8 dec_assoc009[] __initconst = {
+ 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e,
+ 0xef
+};
+static const u8 dec_nonce009[] __initconst = {
+ 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78
+};
+static const u8 dec_key009[] __initconst = {
+ 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5,
+ 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86,
+ 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2,
+ 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b
+};
+
+static const u8 dec_input010[] __initconst = {
+ 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b,
+ 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74,
+ 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1,
+ 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd,
+ 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6,
+ 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5,
+ 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96,
+ 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02,
+ 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30,
+ 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57,
+ 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53,
+ 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65,
+ 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71,
+ 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9,
+ 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18,
+ 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce,
+ 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a,
+ 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69,
+ 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2,
+ 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95,
+ 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49,
+ 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e,
+ 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a,
+ 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a,
+ 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e,
+ 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19,
+ 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b,
+ 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75,
+ 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d,
+ 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d,
+ 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f,
+ 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a,
+ 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d,
+ 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5,
+ 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c,
+ 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77,
+ 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46,
+ 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43,
+ 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe,
+ 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8,
+ 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76,
+ 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47,
+ 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8,
+ 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32,
+ 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59,
+ 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae,
+ 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a,
+ 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3,
+ 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74,
+ 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75,
+ 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2,
+ 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e,
+ 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2,
+ 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9,
+ 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1,
+ 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07,
+ 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79,
+ 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71,
+ 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad,
+ 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a,
+ 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c,
+ 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9,
+ 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79,
+ 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27,
+ 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90,
+ 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe,
+ 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99,
+ 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1,
+ 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9,
+ 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0,
+ 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28,
+ 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e,
+ 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20,
+ 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60,
+ 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47,
+ 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68,
+ 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe,
+ 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33,
+ 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8,
+ 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38,
+ 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7,
+ 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04,
+ 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c,
+ 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f,
+ 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c,
+ 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77,
+ 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54,
+ 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5,
+ 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4,
+ 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2,
+ 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e,
+ 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27,
+ 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f,
+ 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92,
+ 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55,
+ 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe,
+ 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04,
+ 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4,
+ 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56,
+ 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02,
+ 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2,
+ 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8,
+ 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27,
+ 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47,
+ 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10,
+ 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43,
+ 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0,
+ 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee,
+ 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47,
+ 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6,
+ 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d,
+ 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c,
+ 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3,
+ 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b,
+ 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09,
+ 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d,
+ 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1,
+ 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd,
+ 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4,
+ 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63,
+ 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87,
+ 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd,
+ 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e,
+ 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a,
+ 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c,
+ 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38,
+ 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a,
+ 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5,
+ 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9,
+ 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0
+};
+static const u8 dec_output010[] __initconst = {
+ 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf,
+ 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c,
+ 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22,
+ 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc,
+ 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16,
+ 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7,
+ 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4,
+ 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d,
+ 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5,
+ 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46,
+ 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82,
+ 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b,
+ 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a,
+ 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf,
+ 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca,
+ 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95,
+ 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09,
+ 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3,
+ 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3,
+ 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f,
+ 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58,
+ 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad,
+ 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde,
+ 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44,
+ 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a,
+ 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9,
+ 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26,
+ 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc,
+ 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74,
+ 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b,
+ 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93,
+ 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37,
+ 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f,
+ 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d,
+ 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca,
+ 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73,
+ 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f,
+ 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1,
+ 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9,
+ 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76,
+ 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac,
+ 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7,
+ 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce,
+ 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30,
+ 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb,
+ 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa,
+ 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd,
+ 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f,
+ 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb,
+ 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34,
+ 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e,
+ 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f,
+ 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53,
+ 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41,
+ 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e,
+ 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d,
+ 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27,
+ 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e,
+ 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8,
+ 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a,
+ 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12,
+ 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3,
+ 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66,
+ 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0,
+ 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c,
+ 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4,
+ 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49,
+ 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90,
+ 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11,
+ 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c,
+ 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b,
+ 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74,
+ 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c,
+ 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27,
+ 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1,
+ 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27,
+ 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88,
+ 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27,
+ 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b,
+ 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39,
+ 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7,
+ 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc,
+ 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe,
+ 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5,
+ 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf,
+ 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05,
+ 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73,
+ 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda,
+ 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe,
+ 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71,
+ 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed,
+ 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d,
+ 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33,
+ 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f,
+ 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a,
+ 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa,
+ 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e,
+ 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e,
+ 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87,
+ 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5,
+ 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4,
+ 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38,
+ 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34,
+ 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f,
+ 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36,
+ 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69,
+ 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44,
+ 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5,
+ 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce,
+ 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd,
+ 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27,
+ 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f,
+ 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8,
+ 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a,
+ 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5,
+ 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca,
+ 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e,
+ 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92,
+ 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13,
+ 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf,
+ 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6,
+ 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3,
+ 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b,
+ 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d,
+ 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f,
+ 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40,
+ 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c,
+ 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f
+};
+static const u8 dec_assoc010[] __initconst = {
+ 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27,
+ 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2
+};
+static const u8 dec_nonce010[] __initconst = {
+ 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30
+};
+static const u8 dec_key010[] __initconst = {
+ 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44,
+ 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf,
+ 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74,
+ 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7
+};
+
+static const u8 dec_input011[] __initconst = {
+ 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8,
+ 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc,
+ 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74,
+ 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73,
+ 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e,
+ 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9,
+ 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e,
+ 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd,
+ 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57,
+ 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19,
+ 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f,
+ 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45,
+ 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e,
+ 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39,
+ 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03,
+ 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f,
+ 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0,
+ 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce,
+ 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb,
+ 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52,
+ 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21,
+ 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a,
+ 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35,
+ 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91,
+ 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b,
+ 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e,
+ 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19,
+ 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07,
+ 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18,
+ 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96,
+ 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68,
+ 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4,
+ 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57,
+ 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c,
+ 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23,
+ 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8,
+ 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6,
+ 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40,
+ 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab,
+ 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb,
+ 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea,
+ 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8,
+ 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31,
+ 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0,
+ 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc,
+ 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94,
+ 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1,
+ 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46,
+ 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6,
+ 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7,
+ 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71,
+ 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a,
+ 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33,
+ 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38,
+ 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23,
+ 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb,
+ 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65,
+ 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73,
+ 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8,
+ 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb,
+ 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a,
+ 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca,
+ 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5,
+ 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71,
+ 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8,
+ 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d,
+ 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6,
+ 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d,
+ 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7,
+ 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5,
+ 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8,
+ 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd,
+ 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29,
+ 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22,
+ 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5,
+ 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67,
+ 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11,
+ 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e,
+ 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09,
+ 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4,
+ 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f,
+ 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa,
+ 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec,
+ 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b,
+ 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d,
+ 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b,
+ 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48,
+ 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3,
+ 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63,
+ 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd,
+ 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78,
+ 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed,
+ 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82,
+ 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f,
+ 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3,
+ 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9,
+ 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72,
+ 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74,
+ 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40,
+ 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b,
+ 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a,
+ 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5,
+ 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98,
+ 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71,
+ 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e,
+ 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4,
+ 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46,
+ 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e,
+ 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f,
+ 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93,
+ 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0,
+ 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5,
+ 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61,
+ 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64,
+ 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85,
+ 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20,
+ 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6,
+ 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc,
+ 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8,
+ 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50,
+ 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4,
+ 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80,
+ 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0,
+ 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a,
+ 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35,
+ 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43,
+ 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12,
+ 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7,
+ 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34,
+ 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42,
+ 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0,
+ 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95,
+ 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74,
+ 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5,
+ 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12,
+ 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6,
+ 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86,
+ 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97,
+ 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45,
+ 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19,
+ 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86,
+ 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c,
+ 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba,
+ 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29,
+ 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6,
+ 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6,
+ 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09,
+ 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31,
+ 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99,
+ 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b,
+ 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca,
+ 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00,
+ 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93,
+ 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3,
+ 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07,
+ 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda,
+ 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90,
+ 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b,
+ 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a,
+ 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6,
+ 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c,
+ 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57,
+ 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15,
+ 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e,
+ 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51,
+ 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75,
+ 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19,
+ 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08,
+ 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14,
+ 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba,
+ 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff,
+ 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90,
+ 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e,
+ 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93,
+ 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad,
+ 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2,
+ 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac,
+ 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d,
+ 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06,
+ 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c,
+ 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91,
+ 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17,
+ 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20,
+ 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7,
+ 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf,
+ 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c,
+ 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2,
+ 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e,
+ 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a,
+ 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05,
+ 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58,
+ 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8,
+ 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d,
+ 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71,
+ 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3,
+ 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe,
+ 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62,
+ 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16,
+ 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66,
+ 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4,
+ 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2,
+ 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35,
+ 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3,
+ 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4,
+ 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f,
+ 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe,
+ 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56,
+ 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b,
+ 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37,
+ 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3,
+ 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f,
+ 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f,
+ 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0,
+ 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70,
+ 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd,
+ 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f,
+ 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e,
+ 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67,
+ 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51,
+ 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23,
+ 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3,
+ 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5,
+ 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09,
+ 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7,
+ 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed,
+ 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb,
+ 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6,
+ 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5,
+ 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96,
+ 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe,
+ 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44,
+ 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6,
+ 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e,
+ 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0,
+ 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79,
+ 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f,
+ 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d,
+ 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82,
+ 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47,
+ 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93,
+ 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6,
+ 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69,
+ 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e,
+ 0x2b, 0xdf, 0xcd, 0xf9, 0x3c
+};
+static const u8 dec_output011[] __initconst = {
+ 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b,
+ 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b,
+ 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d,
+ 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee,
+ 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30,
+ 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20,
+ 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f,
+ 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e,
+ 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66,
+ 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46,
+ 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35,
+ 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6,
+ 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0,
+ 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15,
+ 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13,
+ 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7,
+ 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3,
+ 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37,
+ 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc,
+ 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95,
+ 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8,
+ 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac,
+ 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45,
+ 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf,
+ 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d,
+ 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc,
+ 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45,
+ 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a,
+ 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec,
+ 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e,
+ 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10,
+ 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8,
+ 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66,
+ 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0,
+ 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62,
+ 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b,
+ 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4,
+ 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96,
+ 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7,
+ 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74,
+ 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8,
+ 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b,
+ 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70,
+ 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95,
+ 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3,
+ 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9,
+ 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d,
+ 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e,
+ 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32,
+ 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5,
+ 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80,
+ 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3,
+ 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad,
+ 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d,
+ 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20,
+ 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17,
+ 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6,
+ 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d,
+ 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82,
+ 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c,
+ 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9,
+ 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb,
+ 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96,
+ 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9,
+ 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f,
+ 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40,
+ 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc,
+ 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce,
+ 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71,
+ 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f,
+ 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35,
+ 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90,
+ 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8,
+ 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01,
+ 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1,
+ 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe,
+ 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4,
+ 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf,
+ 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9,
+ 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f,
+ 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04,
+ 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7,
+ 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15,
+ 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc,
+ 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0,
+ 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae,
+ 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb,
+ 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed,
+ 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51,
+ 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52,
+ 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84,
+ 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5,
+ 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4,
+ 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e,
+ 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74,
+ 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f,
+ 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13,
+ 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea,
+ 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b,
+ 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef,
+ 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09,
+ 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe,
+ 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1,
+ 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9,
+ 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15,
+ 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a,
+ 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab,
+ 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36,
+ 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd,
+ 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde,
+ 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd,
+ 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47,
+ 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5,
+ 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69,
+ 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21,
+ 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98,
+ 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07,
+ 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57,
+ 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd,
+ 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03,
+ 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11,
+ 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96,
+ 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91,
+ 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d,
+ 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0,
+ 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9,
+ 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42,
+ 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a,
+ 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18,
+ 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc,
+ 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce,
+ 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc,
+ 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0,
+ 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf,
+ 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7,
+ 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80,
+ 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c,
+ 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82,
+ 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9,
+ 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20,
+ 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58,
+ 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6,
+ 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc,
+ 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50,
+ 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86,
+ 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a,
+ 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80,
+ 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec,
+ 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08,
+ 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c,
+ 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde,
+ 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d,
+ 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17,
+ 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f,
+ 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26,
+ 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96,
+ 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97,
+ 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6,
+ 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55,
+ 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e,
+ 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88,
+ 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5,
+ 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b,
+ 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15,
+ 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1,
+ 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4,
+ 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3,
+ 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf,
+ 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e,
+ 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb,
+ 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76,
+ 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5,
+ 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c,
+ 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde,
+ 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f,
+ 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51,
+ 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9,
+ 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99,
+ 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6,
+ 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04,
+ 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31,
+ 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a,
+ 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56,
+ 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e,
+ 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78,
+ 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a,
+ 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7,
+ 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb,
+ 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6,
+ 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8,
+ 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc,
+ 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84,
+ 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86,
+ 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76,
+ 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a,
+ 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73,
+ 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8,
+ 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6,
+ 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2,
+ 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56,
+ 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb,
+ 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab,
+ 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76,
+ 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69,
+ 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d,
+ 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc,
+ 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22,
+ 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39,
+ 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6,
+ 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9,
+ 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f,
+ 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1,
+ 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83,
+ 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc,
+ 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4,
+ 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59,
+ 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68,
+ 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef,
+ 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1,
+ 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3,
+ 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44,
+ 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09,
+ 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8,
+ 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a,
+ 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d,
+ 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae,
+ 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2,
+ 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10,
+ 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a,
+ 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34,
+ 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f,
+ 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9,
+ 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b,
+ 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d,
+ 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57,
+ 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03,
+ 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87,
+ 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca,
+ 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53,
+ 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f,
+ 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61,
+ 0x10, 0x1e, 0xbf, 0xec, 0xa8
+};
+static const u8 dec_assoc011[] __initconst = {
+ 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7
+};
+static const u8 dec_nonce011[] __initconst = {
+ 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa
+};
+static const u8 dec_key011[] __initconst = {
+ 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85,
+ 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca,
+ 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52,
+ 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38
+};
+
+static const u8 dec_input012[] __initconst = {
+ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
+ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
+ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
+ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
+ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
+ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
+ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
+ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
+ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
+ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
+ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
+ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
+ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
+ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
+ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
+ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
+ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
+ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
+ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
+ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
+ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
+ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
+ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
+ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
+ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
+ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
+ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
+ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
+ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
+ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
+ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
+ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
+ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
+ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
+ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
+ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
+ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
+ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
+ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
+ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
+ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
+ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
+ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
+ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
+ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
+ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
+ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
+ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
+ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
+ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
+ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
+ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
+ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
+ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
+ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
+ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
+ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
+ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
+ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
+ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
+ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
+ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
+ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
+ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
+ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
+ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
+ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
+ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
+ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
+ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
+ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
+ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
+ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
+ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
+ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
+ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
+ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
+ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
+ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
+ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
+ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
+ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
+ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
+ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
+ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
+ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
+ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
+ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
+ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
+ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
+ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
+ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
+ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
+ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
+ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
+ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
+ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
+ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
+ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
+ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
+ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
+ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
+ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
+ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
+ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
+ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
+ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
+ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
+ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
+ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
+ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
+ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
+ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
+ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
+ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
+ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
+ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
+ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
+ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
+ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
+ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
+ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
+ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
+ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
+ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
+ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
+ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
+ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
+ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
+ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
+ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
+ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
+ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
+ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
+ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
+ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
+ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
+ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
+ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
+ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
+ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
+ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
+ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
+ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
+ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
+ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
+ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
+ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
+ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
+ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
+ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
+ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
+ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
+ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
+ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
+ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
+ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
+ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
+ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
+ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
+ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
+ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
+ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
+ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
+ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
+ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
+ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
+ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
+ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
+ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
+ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
+ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
+ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
+ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
+ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
+ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
+ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
+ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
+ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
+ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
+ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
+ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
+ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
+ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
+ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
+ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
+ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
+ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
+ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
+ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
+ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
+ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
+ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
+ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
+ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
+ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
+ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
+ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
+ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
+ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
+ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
+ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
+ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
+ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
+ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
+ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
+ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
+ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
+ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
+ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
+ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
+ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
+ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
+ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
+ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
+ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
+ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
+ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
+ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
+ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
+ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
+ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
+ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
+ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
+ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
+ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
+ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
+ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
+ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
+ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
+ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
+ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
+ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
+ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
+ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
+ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
+ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
+ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
+ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
+ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
+ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
+ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
+ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
+ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
+ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
+ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
+ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
+ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
+ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
+ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
+ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
+ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
+ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
+ 0x70, 0xcf, 0xd6
+};
+static const u8 dec_output012[] __initconst = {
+ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
+ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
+ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
+ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
+ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
+ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
+ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
+ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
+ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
+ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
+ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
+ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
+ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
+ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
+ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
+ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
+ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
+ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
+ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
+ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
+ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
+ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
+ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
+ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
+ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
+ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
+ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
+ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
+ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
+ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
+ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
+ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
+ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
+ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
+ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
+ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
+ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
+ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
+ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
+ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
+ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
+ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
+ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
+ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
+ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
+ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
+ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
+ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
+ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
+ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
+ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
+ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
+ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
+ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
+ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
+ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
+ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
+ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
+ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
+ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
+ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
+ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
+ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
+ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
+ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
+ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
+ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
+ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
+ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
+ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
+ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
+ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
+ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
+ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
+ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
+ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
+ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
+ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
+ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
+ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
+ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
+ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
+ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
+ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
+ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
+ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
+ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
+ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
+ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
+ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
+ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
+ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
+ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
+ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
+ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
+ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
+ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
+ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
+ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
+ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
+ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
+ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
+ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
+ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
+ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
+ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
+ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
+ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
+ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
+ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
+ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
+ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
+ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
+ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
+ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
+ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
+ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
+ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
+ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
+ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
+ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
+ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
+ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
+ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
+ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
+ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
+ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
+ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
+ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
+ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
+ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
+ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
+ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
+ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
+ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
+ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
+ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
+ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
+ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
+ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
+ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
+ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
+ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
+ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
+ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
+ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
+ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
+ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
+ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
+ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
+ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
+ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
+ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
+ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
+ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
+ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
+ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
+ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
+ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
+ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
+ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
+ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
+ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
+ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
+ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
+ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
+ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
+ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
+ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
+ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
+ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
+ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
+ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
+ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
+ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
+ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
+ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
+ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
+ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
+ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
+ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
+ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
+ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
+ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
+ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
+ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
+ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
+ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
+ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
+ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
+ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
+ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
+ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
+ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
+ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
+ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
+ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
+ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
+ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
+ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
+ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
+ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
+ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
+ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
+ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
+ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
+ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
+ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
+ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
+ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
+ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
+ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
+ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
+ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
+ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
+ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
+ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
+ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
+ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
+ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
+ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
+ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
+ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
+ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
+ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
+ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
+ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
+ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
+ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
+ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
+ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
+ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
+ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
+ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
+ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
+ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
+ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
+ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
+ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
+ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
+ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
+ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
+ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
+ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
+ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
+ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
+ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
+ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
+ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
+ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
+ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
+ 0x78, 0xec, 0x00
+};
+static const u8 dec_assoc012[] __initconst = {
+ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
+ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
+ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
+ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
+ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
+ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
+ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
+ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
+};
+static const u8 dec_nonce012[] __initconst = {
+ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
+};
+static const u8 dec_key012[] __initconst = {
+ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
+ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
+ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
+ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
+};
+
+static const u8 dec_input013[] __initconst = {
+ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
+ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
+ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
+ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
+ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
+ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
+ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
+ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
+ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
+ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
+ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
+ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
+ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
+ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
+ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
+ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
+ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
+ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
+ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
+ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
+ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
+ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
+ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
+ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
+ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
+ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
+ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
+ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
+ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
+ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
+ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
+ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
+ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
+ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
+ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
+ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
+ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
+ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
+ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
+ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
+ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
+ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
+ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
+ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
+ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
+ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
+ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
+ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
+ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
+ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
+ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
+ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
+ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
+ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
+ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
+ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
+ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
+ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
+ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
+ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
+ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
+ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
+ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
+ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
+ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
+ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
+ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
+ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
+ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
+ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
+ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
+ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
+ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
+ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
+ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
+ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
+ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
+ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
+ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
+ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
+ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
+ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
+ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
+ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
+ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
+ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
+ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
+ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
+ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
+ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
+ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
+ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
+ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
+ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
+ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
+ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
+ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
+ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
+ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
+ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
+ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
+ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
+ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
+ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
+ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
+ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
+ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
+ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
+ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
+ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
+ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
+ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
+ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
+ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
+ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
+ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
+ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
+ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
+ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
+ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
+ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
+ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
+ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
+ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
+ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
+ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
+ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
+ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
+ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
+ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
+ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
+ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
+ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
+ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
+ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
+ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
+ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
+ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
+ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
+ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
+ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
+ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
+ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
+ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
+ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
+ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
+ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
+ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
+ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
+ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
+ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
+ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
+ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
+ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
+ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
+ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
+ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
+ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
+ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
+ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
+ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
+ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
+ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
+ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
+ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
+ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
+ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
+ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
+ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
+ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
+ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
+ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
+ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
+ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
+ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
+ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
+ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
+ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
+ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
+ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
+ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
+ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
+ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
+ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
+ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
+ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
+ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
+ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
+ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
+ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
+ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
+ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
+ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
+ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
+ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
+ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
+ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
+ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
+ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
+ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
+ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
+ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
+ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
+ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
+ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
+ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
+ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
+ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
+ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
+ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
+ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
+ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
+ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
+ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
+ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
+ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
+ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
+ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
+ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
+ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
+ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
+ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
+ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
+ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
+ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
+ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
+ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
+ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
+ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
+ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
+ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
+ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
+ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
+ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
+ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
+ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
+ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
+ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
+ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
+ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
+ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
+ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
+ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
+ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
+ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
+ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
+ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
+ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
+ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
+ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
+ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
+ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
+ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
+ 0x70, 0xcf, 0xd7
+};
+static const u8 dec_output013[] __initconst = {
+ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
+ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
+ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
+ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
+ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
+ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
+ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
+ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
+ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
+ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
+ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
+ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
+ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
+ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
+ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
+ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
+ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
+ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
+ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
+ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
+ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
+ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
+ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
+ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
+ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
+ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
+ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
+ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
+ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
+ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
+ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
+ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
+ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
+ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
+ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
+ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
+ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
+ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
+ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
+ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
+ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
+ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
+ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
+ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
+ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
+ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
+ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
+ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
+ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
+ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
+ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
+ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
+ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
+ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
+ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
+ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
+ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
+ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
+ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
+ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
+ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
+ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
+ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
+ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
+ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
+ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
+ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
+ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
+ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
+ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
+ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
+ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
+ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
+ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
+ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
+ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
+ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
+ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
+ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
+ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
+ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
+ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
+ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
+ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
+ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
+ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
+ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
+ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
+ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
+ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
+ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
+ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
+ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
+ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
+ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
+ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
+ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
+ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
+ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
+ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
+ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
+ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
+ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
+ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
+ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
+ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
+ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
+ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
+ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
+ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
+ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
+ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
+ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
+ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
+ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
+ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
+ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
+ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
+ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
+ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
+ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
+ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
+ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
+ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
+ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
+ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
+ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
+ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
+ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
+ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
+ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
+ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
+ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
+ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
+ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
+ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
+ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
+ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
+ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
+ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
+ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
+ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
+ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
+ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
+ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
+ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
+ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
+ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
+ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
+ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
+ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
+ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
+ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
+ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
+ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
+ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
+ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
+ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
+ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
+ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
+ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
+ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
+ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
+ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
+ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
+ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
+ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
+ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
+ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
+ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
+ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
+ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
+ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
+ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
+ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
+ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
+ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
+ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
+ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
+ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
+ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
+ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
+ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
+ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
+ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
+ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
+ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
+ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
+ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
+ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
+ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
+ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
+ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
+ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
+ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
+ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
+ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
+ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
+ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
+ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
+ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
+ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
+ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
+ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
+ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
+ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
+ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
+ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
+ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
+ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
+ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
+ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
+ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
+ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
+ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
+ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
+ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
+ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
+ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
+ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
+ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
+ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
+ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
+ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
+ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
+ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
+ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
+ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
+ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
+ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
+ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
+ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
+ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
+ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
+ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
+ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
+ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
+ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
+ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
+ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
+ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
+ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
+ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
+ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
+ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
+ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
+ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
+ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
+ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
+ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
+ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
+ 0x78, 0xec, 0x00
+};
+static const u8 dec_assoc013[] __initconst = {
+ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
+ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
+ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
+ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
+ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
+ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
+ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
+ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
+};
+static const u8 dec_nonce013[] __initconst = {
+ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
+};
+static const u8 dec_key013[] __initconst = {
+ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
+ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
+ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
+ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
+};
+
+static const struct chacha20poly1305_testvec
+chacha20poly1305_dec_vectors[] __initconst = {
+ { dec_input001, dec_output001, dec_assoc001, dec_nonce001, dec_key001,
+ sizeof(dec_input001), sizeof(dec_assoc001), sizeof(dec_nonce001) },
+ { dec_input002, dec_output002, dec_assoc002, dec_nonce002, dec_key002,
+ sizeof(dec_input002), sizeof(dec_assoc002), sizeof(dec_nonce002) },
+ { dec_input003, dec_output003, dec_assoc003, dec_nonce003, dec_key003,
+ sizeof(dec_input003), sizeof(dec_assoc003), sizeof(dec_nonce003) },
+ { dec_input004, dec_output004, dec_assoc004, dec_nonce004, dec_key004,
+ sizeof(dec_input004), sizeof(dec_assoc004), sizeof(dec_nonce004) },
+ { dec_input005, dec_output005, dec_assoc005, dec_nonce005, dec_key005,
+ sizeof(dec_input005), sizeof(dec_assoc005), sizeof(dec_nonce005) },
+ { dec_input006, dec_output006, dec_assoc006, dec_nonce006, dec_key006,
+ sizeof(dec_input006), sizeof(dec_assoc006), sizeof(dec_nonce006) },
+ { dec_input007, dec_output007, dec_assoc007, dec_nonce007, dec_key007,
+ sizeof(dec_input007), sizeof(dec_assoc007), sizeof(dec_nonce007) },
+ { dec_input008, dec_output008, dec_assoc008, dec_nonce008, dec_key008,
+ sizeof(dec_input008), sizeof(dec_assoc008), sizeof(dec_nonce008) },
+ { dec_input009, dec_output009, dec_assoc009, dec_nonce009, dec_key009,
+ sizeof(dec_input009), sizeof(dec_assoc009), sizeof(dec_nonce009) },
+ { dec_input010, dec_output010, dec_assoc010, dec_nonce010, dec_key010,
+ sizeof(dec_input010), sizeof(dec_assoc010), sizeof(dec_nonce010) },
+ { dec_input011, dec_output011, dec_assoc011, dec_nonce011, dec_key011,
+ sizeof(dec_input011), sizeof(dec_assoc011), sizeof(dec_nonce011) },
+ { dec_input012, dec_output012, dec_assoc012, dec_nonce012, dec_key012,
+ sizeof(dec_input012), sizeof(dec_assoc012), sizeof(dec_nonce012) },
+ { dec_input013, dec_output013, dec_assoc013, dec_nonce013, dec_key013,
+ sizeof(dec_input013), sizeof(dec_assoc013), sizeof(dec_nonce013),
+ true }
+};
+
+static const u8 xenc_input001[] __initconst = {
+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
+ 0x9d
+};
+static const u8 xenc_output001[] __initconst = {
+ 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77,
+ 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92,
+ 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18,
+ 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d,
+ 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e,
+ 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86,
+ 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2,
+ 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85,
+ 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09,
+ 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49,
+ 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd,
+ 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8,
+ 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f,
+ 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79,
+ 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8,
+ 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0,
+ 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88,
+ 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71,
+ 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91,
+ 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf,
+ 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89,
+ 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46,
+ 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e,
+ 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90,
+ 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b,
+ 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58,
+ 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54,
+ 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1,
+ 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73,
+ 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69,
+ 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05,
+ 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83,
+ 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13,
+ 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8,
+ 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5,
+ 0x9c
+};
+static const u8 xenc_assoc001[] __initconst = {
+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x4e, 0x91
+};
+static const u8 xenc_nonce001[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+};
+static const u8 xenc_key001[] __initconst = {
+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
+};
+
+static const struct chacha20poly1305_testvec
+xchacha20poly1305_enc_vectors[] __initconst = {
+ { xenc_input001, xenc_output001, xenc_assoc001, xenc_nonce001, xenc_key001,
+ sizeof(xenc_input001), sizeof(xenc_assoc001), sizeof(xenc_nonce001) }
+};
+
+static const u8 xdec_input001[] __initconst = {
+ 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77,
+ 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92,
+ 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18,
+ 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d,
+ 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e,
+ 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86,
+ 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2,
+ 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85,
+ 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09,
+ 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49,
+ 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd,
+ 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8,
+ 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f,
+ 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79,
+ 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8,
+ 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0,
+ 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88,
+ 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71,
+ 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91,
+ 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf,
+ 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89,
+ 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46,
+ 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e,
+ 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90,
+ 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b,
+ 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58,
+ 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54,
+ 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1,
+ 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73,
+ 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69,
+ 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05,
+ 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83,
+ 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13,
+ 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8,
+ 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5,
+ 0x9c
+};
+static const u8 xdec_output001[] __initconst = {
+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
+ 0x9d
+};
+static const u8 xdec_assoc001[] __initconst = {
+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x4e, 0x91
+};
+static const u8 xdec_nonce001[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+};
+static const u8 xdec_key001[] __initconst = {
+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
+};
+
+static const struct chacha20poly1305_testvec
+xchacha20poly1305_dec_vectors[] __initconst = {
+ { xdec_input001, xdec_output001, xdec_assoc001, xdec_nonce001, xdec_key001,
+ sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) }
+};
+
+static void __init
+chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u8 *nonce, const size_t nonce_len,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ if (nonce_len == 8)
+ chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
+ get_unaligned_le64(nonce), key);
+ else
+ BUG();
+}
+
+static bool __init
+decryption_success(bool func_ret, bool expect_failure, int memcmp_result)
+{
+ if (expect_failure)
+ return !func_ret;
+ return func_ret && !memcmp_result;
+}
+
+bool __init chacha20poly1305_selftest(void)
+{
+ enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
+ size_t i;
+ u8 *computed_output = NULL, *heap_src = NULL;
+ bool success = true, ret;
+
+ heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
+ computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
+ if (!heap_src || !computed_output) {
+ pr_err("chacha20poly1305 self-test malloc: FAIL\n");
+ success = false;
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
+ chacha20poly1305_selftest_encrypt(computed_output,
+ chacha20poly1305_enc_vectors[i].input,
+ chacha20poly1305_enc_vectors[i].ilen,
+ chacha20poly1305_enc_vectors[i].assoc,
+ chacha20poly1305_enc_vectors[i].alen,
+ chacha20poly1305_enc_vectors[i].nonce,
+ chacha20poly1305_enc_vectors[i].nlen,
+ chacha20poly1305_enc_vectors[i].key);
+ if (memcmp(computed_output,
+ chacha20poly1305_enc_vectors[i].output,
+ chacha20poly1305_enc_vectors[i].ilen +
+ POLY1305_DIGEST_SIZE)) {
+ pr_err("chacha20poly1305 encryption self-test %zu: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
+ ret = chacha20poly1305_decrypt(computed_output,
+ chacha20poly1305_dec_vectors[i].input,
+ chacha20poly1305_dec_vectors[i].ilen,
+ chacha20poly1305_dec_vectors[i].assoc,
+ chacha20poly1305_dec_vectors[i].alen,
+ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
+ chacha20poly1305_dec_vectors[i].key);
+ if (!decryption_success(ret,
+ chacha20poly1305_dec_vectors[i].failure,
+ memcmp(computed_output,
+ chacha20poly1305_dec_vectors[i].output,
+ chacha20poly1305_dec_vectors[i].ilen -
+ POLY1305_DIGEST_SIZE))) {
+ pr_err("chacha20poly1305 decryption self-test %zu: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
+
+
+ for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
+ xchacha20poly1305_encrypt(computed_output,
+ xchacha20poly1305_enc_vectors[i].input,
+ xchacha20poly1305_enc_vectors[i].ilen,
+ xchacha20poly1305_enc_vectors[i].assoc,
+ xchacha20poly1305_enc_vectors[i].alen,
+ xchacha20poly1305_enc_vectors[i].nonce,
+ xchacha20poly1305_enc_vectors[i].key);
+ if (memcmp(computed_output,
+ xchacha20poly1305_enc_vectors[i].output,
+ xchacha20poly1305_enc_vectors[i].ilen +
+ POLY1305_DIGEST_SIZE)) {
+ pr_err("xchacha20poly1305 encryption self-test %zu: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
+ ret = xchacha20poly1305_decrypt(computed_output,
+ xchacha20poly1305_dec_vectors[i].input,
+ xchacha20poly1305_dec_vectors[i].ilen,
+ xchacha20poly1305_dec_vectors[i].assoc,
+ xchacha20poly1305_dec_vectors[i].alen,
+ xchacha20poly1305_dec_vectors[i].nonce,
+ xchacha20poly1305_dec_vectors[i].key);
+ if (!decryption_success(ret,
+ xchacha20poly1305_dec_vectors[i].failure,
+ memcmp(computed_output,
+ xchacha20poly1305_dec_vectors[i].output,
+ xchacha20poly1305_dec_vectors[i].ilen -
+ POLY1305_DIGEST_SIZE))) {
+ pr_err("xchacha20poly1305 decryption self-test %zu: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
+
+out:
+ kfree(heap_src);
+ kfree(computed_output);
+ return success;
+}
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
new file mode 100644
index 000000000000..c12ddbe9eb92
--- /dev/null
+++ b/lib/crypto/chacha20poly1305.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the ChaCha20Poly1305 AEAD construction.
+ *
+ * Information: https://tools.ietf.org/html/rfc8439
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha20poly1305.h>
+#include <crypto/chacha.h>
+#include <crypto/poly1305.h>
+
+#include <asm/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32))
+
+bool __init chacha20poly1305_selftest(void);
+
+static void chacha_load_key(u32 *k, const u8 *in)
+{
+ k[0] = get_unaligned_le32(in);
+ k[1] = get_unaligned_le32(in + 4);
+ k[2] = get_unaligned_le32(in + 8);
+ k[3] = get_unaligned_le32(in + 12);
+ k[4] = get_unaligned_le32(in + 16);
+ k[5] = get_unaligned_le32(in + 20);
+ k[6] = get_unaligned_le32(in + 24);
+ k[7] = get_unaligned_le32(in + 28);
+}
+
+static void xchacha_init(u32 *chacha_state, const u8 *key, const u8 *nonce)
+{
+ u32 k[CHACHA_KEY_WORDS];
+ u8 iv[CHACHA_IV_SIZE];
+
+ memset(iv, 0, 8);
+ memcpy(iv + 8, nonce + 16, 8);
+
+ chacha_load_key(k, key);
+
+ /* Compute the subkey given the original key and first 128 nonce bits */
+ chacha_init(chacha_state, k, nonce);
+ hchacha_block(chacha_state, k, 20);
+
+ chacha_init(chacha_state, k, iv);
+
+ memzero_explicit(k, sizeof(k));
+ memzero_explicit(iv, sizeof(iv));
+}
+
+static void
+__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len, u32 *chacha_state)
+{
+ const u8 *pad0 = page_address(ZERO_PAGE(0));
+ struct poly1305_desc_ctx poly1305_state;
+ union {
+ u8 block0[POLY1305_KEY_SIZE];
+ __le64 lens[2];
+ } b;
+
+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+ poly1305_init(&poly1305_state, b.block0);
+
+ poly1305_update(&poly1305_state, ad, ad_len);
+ if (ad_len & 0xf)
+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
+
+ chacha_crypt(chacha_state, dst, src, src_len, 20);
+
+ poly1305_update(&poly1305_state, dst, src_len);
+ if (src_len & 0xf)
+ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf));
+
+ b.lens[0] = cpu_to_le64(ad_len);
+ b.lens[1] = cpu_to_le64(src_len);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
+
+ poly1305_final(&poly1305_state, dst + src_len);
+
+ memzero_explicit(chacha_state, CHACHA_STATE_WORDS * sizeof(u32));
+ memzero_explicit(&b, sizeof(b));
+}
+
+void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ u32 k[CHACHA_KEY_WORDS];
+ __le64 iv[2];
+
+ chacha_load_key(k, key);
+
+ iv[0] = 0;
+ iv[1] = cpu_to_le64(nonce);
+
+ chacha_init(chacha_state, k, (u8 *)iv);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state);
+
+ memzero_explicit(iv, sizeof(iv));
+ memzero_explicit(k, sizeof(k));
+}
+EXPORT_SYMBOL(chacha20poly1305_encrypt);
+
+void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ u32 chacha_state[CHACHA_STATE_WORDS];
+
+ xchacha_init(chacha_state, key, nonce);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state);
+}
+EXPORT_SYMBOL(xchacha20poly1305_encrypt);
+
+static bool
+__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len, u32 *chacha_state)
+{
+ const u8 *pad0 = page_address(ZERO_PAGE(0));
+ struct poly1305_desc_ctx poly1305_state;
+ size_t dst_len;
+ int ret;
+ union {
+ u8 block0[POLY1305_KEY_SIZE];
+ u8 mac[POLY1305_DIGEST_SIZE];
+ __le64 lens[2];
+ } b;
+
+ if (unlikely(src_len < POLY1305_DIGEST_SIZE))
+ return false;
+
+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+ poly1305_init(&poly1305_state, b.block0);
+
+ poly1305_update(&poly1305_state, ad, ad_len);
+ if (ad_len & 0xf)
+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
+
+ dst_len = src_len - POLY1305_DIGEST_SIZE;
+ poly1305_update(&poly1305_state, src, dst_len);
+ if (dst_len & 0xf)
+ poly1305_update(&poly1305_state, pad0, 0x10 - (dst_len & 0xf));
+
+ b.lens[0] = cpu_to_le64(ad_len);
+ b.lens[1] = cpu_to_le64(dst_len);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
+
+ poly1305_final(&poly1305_state, b.mac);
+
+ ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE);
+ if (likely(!ret))
+ chacha_crypt(chacha_state, dst, src, dst_len, 20);
+
+ memzero_explicit(&b, sizeof(b));
+
+ return !ret;
+}
+
+bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ u32 k[CHACHA_KEY_WORDS];
+ __le64 iv[2];
+ bool ret;
+
+ chacha_load_key(k, key);
+
+ iv[0] = 0;
+ iv[1] = cpu_to_le64(nonce);
+
+ chacha_init(chacha_state, k, (u8 *)iv);
+ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
+ chacha_state);
+
+ memzero_explicit(chacha_state, sizeof(chacha_state));
+ memzero_explicit(iv, sizeof(iv));
+ memzero_explicit(k, sizeof(k));
+ return ret;
+}
+EXPORT_SYMBOL(chacha20poly1305_decrypt);
+
+bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ u32 chacha_state[CHACHA_STATE_WORDS];
+
+ xchacha_init(chacha_state, key, nonce);
+ return __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
+ chacha_state);
+}
+EXPORT_SYMBOL(xchacha20poly1305_decrypt);
+
+static int __init mod_init(void)
+{
+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+ WARN_ON(!chacha20poly1305_selftest()))
+ return -ENODEV;
+ return 0;
+}
+
+module_init(mod_init);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
--
2.18.2
From 265dc97191fdbe6f8bc4ea513b07ff27cc1885ad Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:40 +0100
Subject: [PATCH 033/100] crypto: lib/chacha20poly1305 - reimplement
crypt_from_sg() routine
commit d95312a3ccc0cd544d374be2fc45aeaa803e5fd9 upstream.
Reimplement the library routines to perform chacha20poly1305 en/decryption
on scatterlists, without [ab]using the [deprecated] blkcipher interface,
which is rather heavyweight and does things we don't really need.
Instead, we use the sg_miter API in a novel and clever way, to iterate
over the scatterlist in-place (i.e., source == destination, which is the
only way this library is expected to be used). That way, we don't have to
iterate over two scatterlists in parallel.
Another optimization is that, instead of relying on the blkcipher walker
to present the input in suitable chunks, we recognize that ChaCha is a
streamcipher, and so we can simply deal with partial blocks by keeping a
block of cipherstream on the stack and use crypto_xor() to mix it with
the in/output.
Finally, we omit the scatterwalk_and_copy() call if the last element of
the scatterlist covers the MAC as well (which is the common case),
avoiding the need to walk the scatterlist and kmap() the page twice.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/crypto/chacha20poly1305.h | 11 ++
lib/crypto/chacha20poly1305-selftest.c | 45 ++++++++
lib/crypto/chacha20poly1305.c | 150 +++++++++++++++++++++++++
3 files changed, 206 insertions(+)
diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
index ad3b1de58df8..234ee28078ef 100644
--- a/include/crypto/chacha20poly1305.h
+++ b/include/crypto/chacha20poly1305.h
@@ -7,6 +7,7 @@
#define __CHACHA20POLY1305_H
#include <linux/types.h>
+#include <linux/scatterlist.h>
enum chacha20poly1305_lengths {
XCHACHA20POLY1305_NONCE_SIZE = 24,
@@ -34,4 +35,14 @@ bool __must_check xchacha20poly1305_decrypt(
const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
#endif /* __CHACHA20POLY1305_H */
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index d1ed0f27cfdb..465de46dbdef 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -7250,6 +7250,7 @@ bool __init chacha20poly1305_selftest(void)
enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
size_t i;
u8 *computed_output = NULL, *heap_src = NULL;
+ struct scatterlist sg_src;
bool success = true, ret;
heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
@@ -7280,6 +7281,29 @@ bool __init chacha20poly1305_selftest(void)
}
}
+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
+ if (chacha20poly1305_enc_vectors[i].nlen != 8)
+ continue;
+ memcpy(heap_src, chacha20poly1305_enc_vectors[i].input,
+ chacha20poly1305_enc_vectors[i].ilen);
+ sg_init_one(&sg_src, heap_src,
+ chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE);
+ chacha20poly1305_encrypt_sg_inplace(&sg_src,
+ chacha20poly1305_enc_vectors[i].ilen,
+ chacha20poly1305_enc_vectors[i].assoc,
+ chacha20poly1305_enc_vectors[i].alen,
+ get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
+ chacha20poly1305_enc_vectors[i].key);
+ if (memcmp(heap_src,
+ chacha20poly1305_enc_vectors[i].output,
+ chacha20poly1305_enc_vectors[i].ilen +
+ POLY1305_DIGEST_SIZE)) {
+ pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
ret = chacha20poly1305_decrypt(computed_output,
@@ -7301,6 +7325,27 @@ bool __init chacha20poly1305_selftest(void)
}
}
+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
+ memcpy(heap_src, chacha20poly1305_dec_vectors[i].input,
+ chacha20poly1305_dec_vectors[i].ilen);
+ sg_init_one(&sg_src, heap_src,
+ chacha20poly1305_dec_vectors[i].ilen);
+ ret = chacha20poly1305_decrypt_sg_inplace(&sg_src,
+ chacha20poly1305_dec_vectors[i].ilen,
+ chacha20poly1305_dec_vectors[i].assoc,
+ chacha20poly1305_dec_vectors[i].alen,
+ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
+ chacha20poly1305_dec_vectors[i].key);
+ if (!decryption_success(ret,
+ chacha20poly1305_dec_vectors[i].failure,
+ memcmp(heap_src, chacha20poly1305_dec_vectors[i].output,
+ chacha20poly1305_dec_vectors[i].ilen -
+ POLY1305_DIGEST_SIZE))) {
+ pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
+ i + 1);
+ success = false;
+ }
+ }
for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index c12ddbe9eb92..821e5cc9b14e 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -11,6 +11,7 @@
#include <crypto/chacha20poly1305.h>
#include <crypto/chacha.h>
#include <crypto/poly1305.h>
+#include <crypto/scatterwalk.h>
#include <asm/unaligned.h>
#include <linux/kernel.h>
@@ -205,6 +206,155 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
}
EXPORT_SYMBOL(xchacha20poly1305_decrypt);
+static
+bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
+ const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE],
+ int encrypt)
+{
+ const u8 *pad0 = page_address(ZERO_PAGE(0));
+ struct poly1305_desc_ctx poly1305_state;
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ struct sg_mapping_iter miter;
+ size_t partial = 0;
+ unsigned int flags;
+ bool ret = true;
+ int sl;
+ union {
+ struct {
+ u32 k[CHACHA_KEY_WORDS];
+ __le64 iv[2];
+ };
+ u8 block0[POLY1305_KEY_SIZE];
+ u8 chacha_stream[CHACHA_BLOCK_SIZE];
+ struct {
+ u8 mac[2][POLY1305_DIGEST_SIZE];
+ };
+ __le64 lens[2];
+ } b __aligned(16);
+
+ chacha_load_key(b.k, key);
+
+ b.iv[0] = 0;
+ b.iv[1] = cpu_to_le64(nonce);
+
+ chacha_init(chacha_state, b.k, (u8 *)b.iv);
+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+ poly1305_init(&poly1305_state, b.block0);
+
+ if (unlikely(ad_len)) {
+ poly1305_update(&poly1305_state, ad, ad_len);
+ if (ad_len & 0xf)
+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
+ }
+
+ flags = SG_MITER_TO_SG;
+ if (!preemptible())
+ flags |= SG_MITER_ATOMIC;
+
+ sg_miter_start(&miter, src, sg_nents(src), flags);
+
+ for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
+ u8 *addr = miter.addr;
+ size_t length = min_t(size_t, sl, miter.length);
+
+ if (!encrypt)
+ poly1305_update(&poly1305_state, addr, length);
+
+ if (unlikely(partial)) {
+ size_t l = min(length, CHACHA_BLOCK_SIZE - partial);
+
+ crypto_xor(addr, b.chacha_stream + partial, l);
+ partial = (partial + l) & (CHACHA_BLOCK_SIZE - 1);
+
+ addr += l;
+ length -= l;
+ }
+
+ if (likely(length >= CHACHA_BLOCK_SIZE || length == sl)) {
+ size_t l = length;
+
+ if (unlikely(length < sl))
+ l &= ~(CHACHA_BLOCK_SIZE - 1);
+ chacha_crypt(chacha_state, addr, addr, l, 20);
+ addr += l;
+ length -= l;
+ }
+
+ if (unlikely(length > 0)) {
+ chacha_crypt(chacha_state, b.chacha_stream, pad0,
+ CHACHA_BLOCK_SIZE, 20);
+ crypto_xor(addr, b.chacha_stream, length);
+ partial = length;
+ }
+
+ if (encrypt)
+ poly1305_update(&poly1305_state, miter.addr,
+ min_t(size_t, sl, miter.length));
+ }
+
+ if (src_len & 0xf)
+ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf));
+
+ b.lens[0] = cpu_to_le64(ad_len);
+ b.lens[1] = cpu_to_le64(src_len);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
+
+ if (likely(sl <= -POLY1305_DIGEST_SIZE)) {
+ if (encrypt) {
+ poly1305_final(&poly1305_state,
+ miter.addr + miter.length + sl);
+ ret = true;
+ } else {
+ poly1305_final(&poly1305_state, b.mac[0]);
+ ret = !crypto_memneq(b.mac[0],
+ miter.addr + miter.length + sl,
+ POLY1305_DIGEST_SIZE);
+ }
+ }
+
+ sg_miter_stop(&miter);
+
+ if (unlikely(sl > -POLY1305_DIGEST_SIZE)) {
+ poly1305_final(&poly1305_state, b.mac[1]);
+ scatterwalk_map_and_copy(b.mac[encrypt], src, src_len,
+ sizeof(b.mac[1]), encrypt);
+ ret = encrypt ||
+ !crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE);
+ }
+
+ memzero_explicit(chacha_state, sizeof(chacha_state));
+ memzero_explicit(&b, sizeof(b));
+
+ return ret;
+}
+
+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ return chacha20poly1305_crypt_sg_inplace(src, src_len, ad, ad_len,
+ nonce, key, 1);
+}
+EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace);
+
+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u64 nonce,
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ if (unlikely(src_len < POLY1305_DIGEST_SIZE))
+ return false;
+
+ return chacha20poly1305_crypt_sg_inplace(src,
+ src_len - POLY1305_DIGEST_SIZE,
+ ad, ad_len, nonce, key, 0);
+}
+EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
+
static int __init mod_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
--
2.18.2
From d43d24b8f78fd3653105b3c7fccaf48406459faf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 17 Nov 2019 23:21:29 -0800
Subject: [PATCH 034/100] crypto: chacha_generic - remove unnecessary setkey()
functions
commit 2043323a799a660bc84bbee404cf7a2617ec6157 upstream.
Use chacha20_setkey() and chacha12_setkey() from
<crypto/internal/chacha.h> instead of defining them again in
chacha_generic.c.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/chacha_generic.c | 18 +++---------------
1 file changed, 3 insertions(+), 15 deletions(-)
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
index c1b147318393..8beea79ab117 100644
--- a/crypto/chacha_generic.c
+++ b/crypto/chacha_generic.c
@@ -37,18 +37,6 @@ static int chacha_stream_xor(struct skcipher_request *req,
return err;
}
-static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
-{
- return chacha_setkey(tfm, key, keysize, 20);
-}
-
-static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
-{
- return chacha_setkey(tfm, key, keysize, 12);
-}
-
static int crypto_chacha_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -91,7 +79,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = crypto_chacha_crypt,
.decrypt = crypto_chacha_crypt,
}, {
@@ -106,7 +94,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = crypto_xchacha_crypt,
.decrypt = crypto_xchacha_crypt,
}, {
@@ -121,7 +109,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = crypto_xchacha_crypt,
.decrypt = crypto_xchacha_crypt,
}
--
2.18.2
From a34dfabc1996f2c4cbcd1357e42af15c7cb212ea Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 17 Nov 2019 23:21:58 -0800
Subject: [PATCH 035/100] crypto: x86/chacha - only unregister algorithms if
registered
commit b62755aed3a3f5ca9edd2718339ccea3b6bbbe57 upstream.
It's not valid to call crypto_unregister_skciphers() without a prior
call to crypto_register_skciphers().
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/chacha_glue.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index b391e13a9e41..a94e30b6f941 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -304,7 +304,8 @@ static int __init chacha_simd_mod_init(void)
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
--
2.18.2
From cca80e2b0611bf97f9bfad1148c6cb301dde2aec Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 17 Nov 2019 23:22:16 -0800
Subject: [PATCH 036/100] crypto: lib/chacha20poly1305 - use chacha20_crypt()
commit 413808b71e6204b0cc1eeaa77960f7c3cd381d33 upstream.
Use chacha20_crypt() instead of chacha_crypt(), since it's not really
appropriate for users of the ChaCha library API to be passing the number
of rounds as an argument.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
lib/crypto/chacha20poly1305.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index 821e5cc9b14e..6d83cafebc69 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -66,14 +66,14 @@ __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
__le64 lens[2];
} b;
- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len);
if (ad_len & 0xf)
poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
- chacha_crypt(chacha_state, dst, src, src_len, 20);
+ chacha20_crypt(chacha_state, dst, src, src_len);
poly1305_update(&poly1305_state, dst, src_len);
if (src_len & 0xf)
@@ -140,7 +140,7 @@ __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
if (unlikely(src_len < POLY1305_DIGEST_SIZE))
return false;
- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len);
@@ -160,7 +160,7 @@ __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE);
if (likely(!ret))
- chacha_crypt(chacha_state, dst, src, dst_len, 20);
+ chacha20_crypt(chacha_state, dst, src, dst_len);
memzero_explicit(&b, sizeof(b));
@@ -241,7 +241,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
b.iv[1] = cpu_to_le64(nonce);
chacha_init(chacha_state, b.k, (u8 *)b.iv);
- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
poly1305_init(&poly1305_state, b.block0);
if (unlikely(ad_len)) {
@@ -278,14 +278,14 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
if (unlikely(length < sl))
l &= ~(CHACHA_BLOCK_SIZE - 1);
- chacha_crypt(chacha_state, addr, addr, l, 20);
+ chacha20_crypt(chacha_state, addr, addr, l);
addr += l;
length -= l;
}
if (unlikely(length > 0)) {
- chacha_crypt(chacha_state, b.chacha_stream, pad0,
- CHACHA_BLOCK_SIZE, 20);
+ chacha20_crypt(chacha_state, b.chacha_stream, pad0,
+ CHACHA_BLOCK_SIZE);
crypto_xor(addr, b.chacha_stream, length);
partial = length;
}
--
2.18.2
From f2be36196660d017639e2f1ac064665e3aa9cda4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 25 Nov 2019 11:31:12 +0100
Subject: [PATCH 037/100] crypto: arch - conditionalize crypto api in arch glue
for lib code
commit 8394bfec51e0e565556101bcc4e2fe7551104cd8 upstream.
For glue code that's used by Zinc, the actual Crypto API functions might
not necessarily exist, and don't need to exist either. Before this
patch, there are valid build configurations that lead to a unbuildable
kernel. This fixes it to conditionalize those symbols on the existence
of the proper config entry.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/chacha-glue.c | 26 ++++++++++++++++----------
arch/arm/crypto/curve25519-glue.c | 5 +++--
arch/arm/crypto/poly1305-glue.c | 9 ++++++---
arch/arm64/crypto/chacha-neon-glue.c | 5 +++--
arch/arm64/crypto/poly1305-glue.c | 5 +++--
arch/mips/crypto/chacha-glue.c | 6 ++++--
arch/mips/crypto/poly1305-glue.c | 6 ++++--
arch/x86/crypto/blake2s-glue.c | 6 ++++--
arch/x86/crypto/chacha_glue.c | 5 +++--
arch/x86/crypto/curve25519-x86_64.c | 7 ++++---
arch/x86/crypto/poly1305_glue.c | 5 +++--
11 files changed, 53 insertions(+), 32 deletions(-)
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index 3f0c057aa050..7bdf8823066d 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -286,11 +286,13 @@ static struct skcipher_alg neon_algs[] = {
static int __init chacha_simd_mod_init(void)
{
- int err;
+ int err = 0;
- err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
- if (err)
- return err;
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (err)
+ return err;
+ }
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
int i;
@@ -310,18 +312,22 @@ static int __init chacha_simd_mod_init(void)
static_branch_enable(&use_neon);
}
- err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
- if (err)
- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+ if (err)
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ }
}
return err;
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
- crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+ }
}
module_init(chacha_simd_mod_init);
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
index 2e9e12d2f642..f3f42cf3b893 100644
--- a/arch/arm/crypto/curve25519-glue.c
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -108,14 +108,15 @@ static int __init mod_init(void)
{
if (elf_hwcap & HWCAP_NEON) {
static_branch_enable(&have_neon);
- return crypto_register_kpp(&curve25519_alg);
+ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
+ crypto_register_kpp(&curve25519_alg) : 0;
}
return 0;
}
static void __exit mod_exit(void)
{
- if (elf_hwcap & HWCAP_NEON)
+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
crypto_unregister_kpp(&curve25519_alg);
}
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
index 74a725ac89c9..abe3f2d587dc 100644
--- a/arch/arm/crypto/poly1305-glue.c
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -249,16 +249,19 @@ static int __init arm_poly1305_mod_init(void)
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
(elf_hwcap & HWCAP_NEON))
static_branch_enable(&have_neon);
- else
+ else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
/* register only the first entry */
return crypto_register_shash(&arm_poly1305_algs[0]);
- return crypto_register_shashes(arm_poly1305_algs,
- ARRAY_SIZE(arm_poly1305_algs));
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+ crypto_register_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs)) : 0;
}
static void __exit arm_poly1305_mod_exit(void)
{
+ if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
+ return;
if (!static_branch_likely(&have_neon)) {
crypto_unregister_shash(&arm_poly1305_algs[0]);
return;
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index b08029d7bde6..71c11d2e9fcd 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -211,12 +211,13 @@ static int __init chacha_simd_mod_init(void)
static_branch_enable(&have_neon);
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
}
static void __exit chacha_simd_mod_fini(void)
{
- if (cpu_have_named_feature(ASIMD))
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && cpu_have_named_feature(ASIMD))
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index dd843d0ee83a..83a2338a8826 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -220,12 +220,13 @@ static int __init neon_poly1305_mod_init(void)
static_branch_enable(&have_neon);
- return crypto_register_shash(&neon_poly1305_alg);
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+ crypto_register_shash(&neon_poly1305_alg) : 0;
}
static void __exit neon_poly1305_mod_exit(void)
{
- if (cpu_have_named_feature(ASIMD))
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
crypto_unregister_shash(&neon_poly1305_alg);
}
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
index 779e399c9bef..90896029d0cd 100644
--- a/arch/mips/crypto/chacha-glue.c
+++ b/arch/mips/crypto/chacha-glue.c
@@ -128,12 +128,14 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
index b759b6ccc361..b37d29cf5d0a 100644
--- a/arch/mips/crypto/poly1305-glue.c
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -187,12 +187,14 @@ static struct shash_alg mips_poly1305_alg = {
static int __init mips_poly1305_mod_init(void)
{
- return crypto_register_shash(&mips_poly1305_alg);
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+ crypto_register_shash(&mips_poly1305_alg) : 0;
}
static void __exit mips_poly1305_mod_exit(void)
{
- crypto_unregister_shash(&mips_poly1305_alg);
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+ crypto_unregister_shash(&mips_poly1305_alg);
}
module_init(mips_poly1305_mod_init);
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 4a37ba7cdbe5..1d9ff8a45e1f 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -210,12 +210,14 @@ static int __init blake2s_mod_init(void)
XFEATURE_MASK_AVX512, NULL))
static_branch_enable(&blake2s_use_avx512);
- return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+ crypto_register_shashes(blake2s_algs,
+ ARRAY_SIZE(blake2s_algs)) : 0;
}
static void __exit blake2s_mod_exit(void)
{
- if (boot_cpu_has(X86_FEATURE_SSSE3))
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
}
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index a94e30b6f941..1bebe11b9ec9 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -299,12 +299,13 @@ static int __init chacha_simd_mod_init(void)
boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
static_branch_enable(&chacha_use_avx512vl);
}
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
}
static void __exit chacha_simd_mod_fini(void)
{
- if (boot_cpu_has(X86_FEATURE_SSSE3))
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index a52a3fb15727..eec7d2d24239 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -2457,13 +2457,14 @@ static int __init curve25519_mod_init(void)
static_branch_enable(&curve25519_use_adx);
else
return 0;
- return crypto_register_kpp(&curve25519_alg);
+ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
+ crypto_register_kpp(&curve25519_alg) : 0;
}
static void __exit curve25519_mod_exit(void)
{
- if (boot_cpu_has(X86_FEATURE_BMI2) ||
- boot_cpu_has(X86_FEATURE_ADX))
+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
+ (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
crypto_unregister_kpp(&curve25519_alg);
}
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 370cd88068ec..0cc4537e6617 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -224,12 +224,13 @@ static int __init poly1305_simd_mod_init(void)
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
static_branch_enable(&poly1305_use_avx2);
- return crypto_register_shash(&alg);
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
}
static void __exit poly1305_simd_mod_exit(void)
{
- crypto_unregister_shash(&alg);
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+ crypto_unregister_shash(&alg);
}
module_init(poly1305_simd_mod_init);
--
2.18.2
From 59949ef5e1e03dbbe4cfc235b2f1ae4f7f84b90a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= <valdis.kletnieks@vt.edu>
Date: Thu, 5 Dec 2019 20:58:36 -0500
Subject: [PATCH 038/100] crypto: chacha - fix warning message in header file
commit 579d705cd64e44f3fcda1a6cfd5f37468a5ddf63 upstream.
Building with W=1 causes a warning:
CC [M] arch/x86/crypto/chacha_glue.o
In file included from arch/x86/crypto/chacha_glue.c:10:
./include/crypto/internal/chacha.h:37:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration]
37 | static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
| ^~~~~~
Straighten out the order to match the rest of the header file.
Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/crypto/internal/chacha.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
index aa5d4a16aac5..b085dc1ac151 100644
--- a/include/crypto/internal/chacha.h
+++ b/include/crypto/internal/chacha.h
@@ -34,7 +34,7 @@ static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
return chacha_setkey(tfm, key, keysize, 20);
}
-static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keysize)
{
return chacha_setkey(tfm, key, keysize, 12);
--
2.18.2
From 543a947c79372b975b492c5e8706503a79a9831c Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 11 Dec 2019 10:26:39 +0100
Subject: [PATCH 039/100] crypto: arm/curve25519 - add arch-specific key
generation function
commit 84faa307249b341f6ad8de3e1869d77a65e26669 upstream.
Somehow this was forgotten when Zinc was being split into oddly shaped
pieces, resulting in linker errors. The x86_64 glue has a specific key
generation implementation, but the Arm one does not. However, it can
still receive the NEON speedups by calling the ordinary DH function
using the base point.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/curve25519-glue.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
index f3f42cf3b893..776ae07e0469 100644
--- a/arch/arm/crypto/curve25519-glue.c
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
}
EXPORT_SYMBOL(curve25519_arch);
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE])
+{
+ return curve25519_arch(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
unsigned int len)
{
--
2.18.2
From 7b55c4f9819e6791a6b881b2cf8880e947adfe8d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 16 Dec 2019 19:53:26 +0100
Subject: [PATCH 040/100] crypto: lib/curve25519 - re-add selftests
commit aa127963f1cab2b93c74c9b128a84610203fb674 upstream.
Somehow these were dropped when Zinc was being integrated, which is
problematic, because testing the library interface for Curve25519 is
important.. This commit simply adds them back and wires them in in the
same way that the blake2s selftests are wired in.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
lib/crypto/Makefile | 1 +
lib/crypto/curve25519-selftest.c | 1321 ++++++++++++++++++++++++++++++
lib/crypto/curve25519.c | 17 +
3 files changed, 1339 insertions(+)
create mode 100644 lib/crypto/curve25519-selftest.c
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 34a701ab8b92..f97f9b941110 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -36,4 +36,5 @@ libsha256-y := sha256.o
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
libblake2s-y += blake2s-selftest.o
libchacha20poly1305-y += chacha20poly1305-selftest.o
+libcurve25519-y += curve25519-selftest.o
endif
diff --git a/lib/crypto/curve25519-selftest.c b/lib/crypto/curve25519-selftest.c
new file mode 100644
index 000000000000..c85e85381e78
--- /dev/null
+++ b/lib/crypto/curve25519-selftest.c
@@ -0,0 +1,1321 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/curve25519.h>
+
+struct curve25519_test_vector {
+ u8 private[CURVE25519_KEY_SIZE];
+ u8 public[CURVE25519_KEY_SIZE];
+ u8 result[CURVE25519_KEY_SIZE];
+ bool valid;
+};
+static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = {
+ {
+ .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
+ 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
+ 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
+ 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
+ .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
+ 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
+ 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
+ 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
+ .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+ .valid = true
+ },
+ {
+ .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
+ 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
+ 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
+ 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
+ .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
+ 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
+ 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
+ 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
+ .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+ .valid = true
+ },
+ {
+ .private = { 1 },
+ .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64,
+ 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d,
+ 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98,
+ 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
+ .valid = true
+ },
+ {
+ .private = { 1 },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f,
+ 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d,
+ 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3,
+ 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
+ .valid = true
+ },
+ {
+ .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
+ .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+ .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+ .valid = true
+ },
+ {
+ .private = { 1, 2, 3, 4 },
+ .public = { 0 },
+ .result = { 0 },
+ .valid = false
+ },
+ {
+ .private = { 2, 4, 6, 8 },
+ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 },
+ .result = { 0 },
+ .valid = false
+ },
+ {
+ .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f },
+ .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2,
+ 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57,
+ 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05,
+ 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 },
+ .valid = true
+ },
+ {
+ .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 },
+ .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d,
+ 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12,
+ 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99,
+ 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c },
+ .valid = true
+ },
+ /* wycheproof - normal case */
+ {
+ .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda,
+ 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66,
+ 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3,
+ 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba },
+ .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5,
+ 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9,
+ 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e,
+ 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a },
+ .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5,
+ 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38,
+ 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e,
+ 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 },
+ .valid = true
+ },
+ /* wycheproof - public key on twist */
+ {
+ .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4,
+ 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5,
+ 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49,
+ 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 },
+ .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5,
+ 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8,
+ 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3,
+ 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 },
+ .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff,
+ 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d,
+ 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe,
+ 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 },
+ .valid = true
+ },
+ /* wycheproof - public key on twist */
+ {
+ .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9,
+ 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39,
+ 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5,
+ 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 },
+ .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f,
+ 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b,
+ 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c,
+ 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 },
+ .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53,
+ 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57,
+ 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0,
+ 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b },
+ .valid = true
+ },
+ /* wycheproof - public key on twist */
+ {
+ .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc,
+ 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d,
+ 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67,
+ 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c },
+ .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97,
+ 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f,
+ 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45,
+ 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a },
+ .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93,
+ 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2,
+ 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44,
+ 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a },
+ .valid = true
+ },
+ /* wycheproof - public key on twist */
+ {
+ .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1,
+ 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95,
+ 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99,
+ 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d },
+ .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27,
+ 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07,
+ 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae,
+ 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c },
+ .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73,
+ 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2,
+ 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f,
+ 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 },
+ .valid = true
+ },
+ /* wycheproof - public key on twist */
+ {
+ .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9,
+ 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd,
+ 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b,
+ 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 },
+ .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5,
+ 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52,
+ 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8,
+ 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 },
+ .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86,
+ 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4,
+ 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6,
+ 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 },
+ .valid = true
+ },
+ /* wycheproof - public key = 0 */
+ {
+ .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11,
+ 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac,
+ 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b,
+ 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc },
+ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key = 1 */
+ {
+ .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61,
+ 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea,
+ 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f,
+ 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab },
+ .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - edge case on twist */
+ {
+ .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04,
+ 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77,
+ 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90,
+ 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 },
+ .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97,
+ 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9,
+ 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7,
+ 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 },
+ .valid = true
+ },
+ /* wycheproof - edge case on twist */
+ {
+ .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36,
+ 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd,
+ 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c,
+ 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 },
+ .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e,
+ 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b,
+ 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e,
+ 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 },
+ .valid = true
+ },
+ /* wycheproof - edge case on twist */
+ {
+ .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed,
+ 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e,
+ 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd,
+ 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 },
+ .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff,
+ 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00,
+ 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 },
+ .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f,
+ 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1,
+ 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10,
+ 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b },
+ .valid = true
+ },
+ /* wycheproof - edge case on twist */
+ {
+ .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3,
+ 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d,
+ 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00,
+ 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 },
+ .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00,
+ 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff,
+ 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f },
+ .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8,
+ 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4,
+ 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70,
+ 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b },
+ .valid = true
+ },
+ /* wycheproof - edge case on twist */
+ {
+ .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3,
+ 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a,
+ 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e,
+ 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 },
+ .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57,
+ 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c,
+ 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59,
+ 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 },
+ .valid = true
+ },
+ /* wycheproof - edge case on twist */
+ {
+ .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f,
+ 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42,
+ 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9,
+ 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 },
+ .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c,
+ 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5,
+ 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65,
+ 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6,
+ 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4,
+ 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8,
+ 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe },
+ .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7,
+ 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca,
+ 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f,
+ 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa,
+ 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3,
+ 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52,
+ 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
+ .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3,
+ 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e,
+ 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75,
+ 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26,
+ 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea,
+ 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00,
+ 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
+ .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8,
+ 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32,
+ 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87,
+ 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c,
+ 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6,
+ 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb,
+ 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 },
+ .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff,
+ 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff,
+ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff,
+ 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f },
+ .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85,
+ 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f,
+ 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0,
+ 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38,
+ 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b,
+ 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c,
+ 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b,
+ 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81,
+ 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3,
+ 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d,
+ 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42,
+ 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98,
+ 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f },
+ .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c,
+ 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9,
+ 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89,
+ 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 },
+ .valid = true
+ },
+ /* wycheproof - edge case for public key */
+ {
+ .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29,
+ 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6,
+ 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c,
+ 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f },
+ .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75,
+ 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89,
+ 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c,
+ 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f },
+ .valid = true
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30,
+ 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69,
+ 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14,
+ 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 },
+ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3,
+ 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b,
+ 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef,
+ 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 },
+ .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20,
+ 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf,
+ 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43,
+ 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 },
+ .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f,
+ 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65,
+ 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06,
+ 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 },
+ .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe,
+ 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9,
+ 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f,
+ 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f },
+ .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8,
+ 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85,
+ 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c,
+ 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c },
+ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8,
+ 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d,
+ 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0,
+ 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 },
+ .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a,
+ 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b,
+ 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67,
+ 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 },
+ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46,
+ 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02,
+ 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3,
+ 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 },
+ .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30,
+ 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1,
+ 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6,
+ 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe },
+ .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f,
+ 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77,
+ 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0,
+ 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c },
+ .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key with low order */
+ {
+ .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e,
+ 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f,
+ 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77,
+ 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b },
+ .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = false
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc,
+ 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1,
+ 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d,
+ 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae },
+ .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09,
+ 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde,
+ 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1,
+ 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81,
+ 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a,
+ 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99,
+ 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d },
+ .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17,
+ 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35,
+ 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55,
+ 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11,
+ 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b,
+ 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9,
+ 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 },
+ .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53,
+ 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e,
+ 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6,
+ 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78,
+ 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2,
+ 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd,
+ 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb,
+ 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40,
+ 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2,
+ 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9,
+ 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60,
+ 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13,
+ 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 },
+ .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c,
+ 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3,
+ 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65,
+ 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a,
+ 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7,
+ 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11,
+ 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e },
+ .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82,
+ 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4,
+ 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c,
+ 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e,
+ 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a,
+ 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d,
+ 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f },
+ .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+ .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2,
+ 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60,
+ 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25,
+ 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb,
+ 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97,
+ 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c,
+ 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 },
+ .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23,
+ 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8,
+ 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69,
+ 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a,
+ 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23,
+ 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b,
+ 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 },
+ .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b,
+ 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44,
+ 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37,
+ 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80,
+ 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d,
+ 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b,
+ 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 },
+ .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63,
+ 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae,
+ 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f,
+ 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0,
+ 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd,
+ 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49,
+ 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 },
+ .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41,
+ 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0,
+ 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf,
+ 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9,
+ 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa,
+ 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5,
+ 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e },
+ .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47,
+ 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3,
+ 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b,
+ 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8,
+ 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98,
+ 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0,
+ 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 },
+ .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0,
+ 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1,
+ 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a,
+ 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02,
+ 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4,
+ 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68,
+ 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d },
+ .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f,
+ 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2,
+ 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95,
+ 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7,
+ 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06,
+ 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9,
+ 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 },
+ .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5,
+ 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0,
+ 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80,
+ 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 },
+ .valid = true
+ },
+ /* wycheproof - public key >= p */
+ {
+ .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd,
+ 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4,
+ 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04,
+ 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 },
+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0,
+ 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac,
+ 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48,
+ 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 },
+ .valid = true
+ },
+ /* wycheproof - RFC 7748 */
+ {
+ .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 },
+ .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+ .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+ .valid = true
+ },
+ /* wycheproof - RFC 7748 */
+ {
+ .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c,
+ 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5,
+ 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4,
+ 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d },
+ .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3,
+ 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c,
+ 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e,
+ 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 },
+ .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d,
+ 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8,
+ 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52,
+ 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde,
+ 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8,
+ 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4,
+ 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 },
+ .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d,
+ 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64,
+ 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd,
+ 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 },
+ .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8,
+ 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf,
+ 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94,
+ 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d },
+ .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84,
+ 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62,
+ 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e,
+ 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 },
+ .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8,
+ 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58,
+ 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02,
+ 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 },
+ .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9,
+ 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a,
+ 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44,
+ 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b },
+ .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd,
+ 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22,
+ 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56,
+ 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b },
+ .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53,
+ 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f,
+ 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18,
+ 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f },
+ .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55,
+ 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b,
+ 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79,
+ 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f },
+ .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39,
+ 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c,
+ 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb,
+ 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e },
+ .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04,
+ 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10,
+ 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58,
+ 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c },
+ .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3,
+ 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c,
+ 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88,
+ 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 },
+ .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a,
+ 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49,
+ 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a,
+ 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
+ .valid = true
+ },
+ /* wycheproof - edge case for shared secret */
+ {
+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+ .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca,
+ 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c,
+ 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb,
+ 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 },
+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 },
+ .valid = true
+ },
+ /* wycheproof - checking for overflow */
+ {
+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58,
+ 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7,
+ 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01,
+ 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d },
+ .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d,
+ 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27,
+ 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b,
+ 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 },
+ .valid = true
+ },
+ /* wycheproof - checking for overflow */
+ {
+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26,
+ 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2,
+ 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44,
+ 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e },
+ .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6,
+ 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d,
+ 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e,
+ 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 },
+ .valid = true
+ },
+ /* wycheproof - checking for overflow */
+ {
+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61,
+ 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67,
+ 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e,
+ 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c },
+ .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65,
+ 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce,
+ 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0,
+ 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 },
+ .valid = true
+ },
+ /* wycheproof - checking for overflow */
+ {
+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee,
+ 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d,
+ 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14,
+ 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 },
+ .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e,
+ 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc,
+ 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5,
+ 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b },
+ .valid = true
+ },
+ /* wycheproof - checking for overflow */
+ {
+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+ .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4,
+ 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5,
+ 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c,
+ 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 },
+ .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b,
+ 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93,
+ 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f,
+ 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 },
+ .valid = true
+ },
+ /* wycheproof - private key == -1 (mod order) */
+ {
+ .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8,
+ 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 },
+ .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
+ .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
+ .valid = true
+ },
+ /* wycheproof - private key == 1 (mod order) on twist */
+ {
+ .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef,
+ 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f },
+ .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
+ .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
+ .valid = true
+ }
+};
+
+bool __init curve25519_selftest(void)
+{
+ bool success = true, ret, ret2;
+ size_t i = 0, j;
+ u8 in[CURVE25519_KEY_SIZE];
+ u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE],
+ out3[CURVE25519_KEY_SIZE];
+
+ for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) {
+ memset(out, 0, CURVE25519_KEY_SIZE);
+ ret = curve25519(out, curve25519_test_vectors[i].private,
+ curve25519_test_vectors[i].public);
+ if (ret != curve25519_test_vectors[i].valid ||
+ memcmp(out, curve25519_test_vectors[i].result,
+ CURVE25519_KEY_SIZE)) {
+ pr_err("curve25519 self-test %zu: FAIL\n", i + 1);
+ success = false;
+ }
+ }
+
+ for (i = 0; i < 5; ++i) {
+ get_random_bytes(in, sizeof(in));
+ ret = curve25519_generate_public(out, in);
+ ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 });
+ curve25519_generic(out3, in, (u8[CURVE25519_KEY_SIZE]){ 9 });
+ if (ret != ret2 ||
+ memcmp(out, out2, CURVE25519_KEY_SIZE) ||
+ memcmp(out, out3, CURVE25519_KEY_SIZE)) {
+ pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x",
+ i + 1);
+ for (j = CURVE25519_KEY_SIZE; j-- > 0;)
+ printk(KERN_CONT "%02x", in[j]);
+ printk(KERN_CONT "\n");
+ success = false;
+ }
+ }
+
+ return success;
+}
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
index 0106bebe6900..c03ccdb99434 100644
--- a/lib/crypto/curve25519.c
+++ b/lib/crypto/curve25519.c
@@ -13,6 +13,8 @@
#include <linux/module.h>
#include <linux/init.h>
+bool curve25519_selftest(void);
+
const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
@@ -20,6 +22,21 @@ EXPORT_SYMBOL(curve25519_null_point);
EXPORT_SYMBOL(curve25519_base_point);
EXPORT_SYMBOL(curve25519_generic);
+static int __init mod_init(void)
+{
+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+ WARN_ON(!curve25519_selftest()))
+ return -ENODEV;
+ return 0;
+}
+
+static void __exit mod_exit(void)
+{
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Curve25519 scalar multiplication");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
--
2.18.2
From 773a55c545ad6cb457b4eac8d71d68e695ec0ac4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 5 Jan 2020 22:40:46 -0500
Subject: [PATCH 041/100] crypto: poly1305 - add new 32 and 64-bit generic
versions
commit 1c08a104360f3e18f4ee6346c21cc3923efb952e upstream.
These two C implementations from Zinc -- a 32x32 one and a 64x64 one,
depending on the platform -- come from Andrew Moon's public domain
poly1305-donna portable code, modified for usage in the kernel. The
precomputation in the 32-bit version and the use of 64x64 multiplies in
the 64-bit version make these perform better than the code it replaces.
Moon's code is also very widespread and has received many eyeballs of
scrutiny.
There's a bit of interference between the x86 implementation, which
relies on internal details of the old scalar implementation. In the next
commit, the x86 implementation will be replaced with a faster one that
doesn't rely on this, so none of this matters much. But for now, to keep
this passing the tests, we inline the bits of the old implementation
that the x86 implementation relied on. Also, since we now support a
slightly larger key space, via the union, some offsets had to be fixed
up.
Nonce calculation was folded in with the emit function, to take
advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no
nonce handling in emit, so this path was conditionalized. We also
introduced a new struct, poly1305_core_key, to represent the precise
amount of space that particular implementation uses.
Testing with kbench9000, depending on the CPU, the update function for
the 32x32 version has been improved by 4%-7%, and for the 64x64 by
19%-30%. The 32x32 gains are small, but I think there's great value in
having a parallel implementation to the 64x64 one so that the two can be
compared side-by-side as nice stand-alone units.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305-avx2-x86_64.S | 20 +--
arch/x86/crypto/poly1305_glue.c | 215 +++++++++++++++++++++++--
crypto/adiantum.c | 4 +-
crypto/nhpoly1305.c | 2 +-
crypto/poly1305_generic.c | 25 ++-
include/crypto/internal/poly1305.h | 45 ++----
include/crypto/nhpoly1305.h | 4 +-
include/crypto/poly1305.h | 26 ++-
lib/crypto/Makefile | 4 +-
lib/crypto/poly1305-donna32.c | 204 +++++++++++++++++++++++
lib/crypto/poly1305-donna64.c | 185 +++++++++++++++++++++
lib/crypto/poly1305.c | 169 +------------------
12 files changed, 675 insertions(+), 228 deletions(-)
create mode 100644 lib/crypto/poly1305-donna32.c
create mode 100644 lib/crypto/poly1305-donna64.c
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 8b341bc29d41..1688fb551070 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -34,16 +34,16 @@ ORMASK: .octa 0x00000000010000000000000001000000
#define u2 0x08(%r8)
#define u3 0x0c(%r8)
#define u4 0x10(%r8)
-#define w0 0x14(%r8)
-#define w1 0x18(%r8)
-#define w2 0x1c(%r8)
-#define w3 0x20(%r8)
-#define w4 0x24(%r8)
-#define y0 0x28(%r8)
-#define y1 0x2c(%r8)
-#define y2 0x30(%r8)
-#define y3 0x34(%r8)
-#define y4 0x38(%r8)
+#define w0 0x18(%r8)
+#define w1 0x1c(%r8)
+#define w2 0x20(%r8)
+#define w3 0x24(%r8)
+#define w4 0x28(%r8)
+#define y0 0x30(%r8)
+#define y1 0x34(%r8)
+#define y2 0x38(%r8)
+#define y3 0x3c(%r8)
+#define y4 0x40(%r8)
#define m %rsi
#define hc0 %ymm0
#define hc1 %ymm1
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 0cc4537e6617..edb7113e36f3 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -25,6 +25,21 @@ asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
+static inline u64 mlt(u64 a, u64 b)
+{
+ return a * b;
+}
+
+static inline u32 sr(u64 v, u_char n)
+{
+ return v >> n;
+}
+
+static inline u32 and(u32 v, u32 mask)
+{
+ return v & mask;
+}
+
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
u8 m[POLY1305_BLOCK_SIZE];
@@ -36,6 +51,168 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
poly1305_block_sse2(a, m, b, 1);
}
+static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key)
+{
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
+ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
+ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
+ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
+ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
+}
+
+static void poly1305_integer_blocks(struct poly1305_state *state,
+ const struct poly1305_key *key,
+ const void *src,
+ unsigned int nblocks, u32 hibit)
+{
+ u32 r0, r1, r2, r3, r4;
+ u32 s1, s2, s3, s4;
+ u32 h0, h1, h2, h3, h4;
+ u64 d0, d1, d2, d3, d4;
+
+ if (!nblocks)
+ return;
+
+ r0 = key->r[0];
+ r1 = key->r[1];
+ r2 = key->r[2];
+ r3 = key->r[3];
+ r4 = key->r[4];
+
+ s1 = r1 * 5;
+ s2 = r2 * 5;
+ s3 = r3 * 5;
+ s4 = r4 * 5;
+
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+
+ do {
+ /* h += m[i] */
+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
+ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
+
+ /* h *= r */
+ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
+ mlt(h3, s2) + mlt(h4, s1);
+ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
+ mlt(h3, s3) + mlt(h4, s2);
+ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
+ mlt(h3, s4) + mlt(h4, s3);
+ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
+ mlt(h3, r0) + mlt(h4, s4);
+ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
+ mlt(h3, r1) + mlt(h4, r0);
+
+ /* (partial) h %= p */
+ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
+ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
+ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
+ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
+ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
+ h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
+
+ src += POLY1305_BLOCK_SIZE;
+ } while (--nblocks);
+
+ state->h[0] = h0;
+ state->h[1] = h1;
+ state->h[2] = h2;
+ state->h[3] = h3;
+ state->h[4] = h4;
+}
+
+static void poly1305_integer_emit(const struct poly1305_state *state, void *dst)
+{
+ u32 h0, h1, h2, h3, h4;
+ u32 g0, g1, g2, g3, g4;
+ u32 mask;
+
+ /* fully carry h */
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+
+ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
+ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
+ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
+ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
+ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
+ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
+ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
+ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
+ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
+ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
+ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
+}
+
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
+{
+ poly1305_integer_setkey(desc->opaque_r, key);
+ desc->s[0] = get_unaligned_le32(key + 16);
+ desc->s[1] = get_unaligned_le32(key + 20);
+ desc->s[2] = get_unaligned_le32(key + 24);
+ desc->s[3] = get_unaligned_le32(key + 28);
+ poly1305_core_init(&desc->h);
+ desc->buflen = 0;
+ desc->sset = true;
+ desc->rset = 1;
+}
+EXPORT_SYMBOL_GPL(poly1305_init_arch);
+
+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ if (!dctx->sset) {
+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
+ poly1305_integer_setkey(dctx->r, src);
+ src += POLY1305_BLOCK_SIZE;
+ srclen -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (srclen >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ srclen -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ }
+ return srclen;
+}
+
static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
@@ -47,8 +224,8 @@ static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
srclen = datalen;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_core_blocks(&dctx->h, dctx->r, src,
- srclen / POLY1305_BLOCK_SIZE, 1);
+ poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src,
+ srclen / POLY1305_BLOCK_SIZE, 1);
srclen %= POLY1305_BLOCK_SIZE;
}
return srclen;
@@ -105,12 +282,6 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
return srclen;
}
-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
-{
- poly1305_init_generic(desc, key);
-}
-EXPORT_SYMBOL(poly1305_init_arch);
-
void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int srclen)
{
@@ -158,9 +329,31 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
}
EXPORT_SYMBOL(poly1305_update_arch);
-void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst)
{
- poly1305_final_generic(desc, digest);
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(desc->buflen)) {
+ desc->buf[desc->buflen++] = 1;
+ memset(desc->buf + desc->buflen, 0,
+ POLY1305_BLOCK_SIZE - desc->buflen);
+ poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0);
+ }
+
+ poly1305_integer_emit(&desc->h, digest);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
+ put_unaligned_le32(f, dst + 0);
+ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
+ put_unaligned_le32(f, dst + 12);
+
+ *desc = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL(poly1305_final_arch);
@@ -183,7 +376,7 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
if (unlikely(!dctx->sset))
return -ENOKEY;
- poly1305_final_generic(dctx, dst);
+ poly1305_final_arch(dctx, dst);
return 0;
}
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
index aded26092268..c846a887abe1 100644
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@ -72,7 +72,7 @@ struct adiantum_tfm_ctx {
struct crypto_skcipher *streamcipher;
struct crypto_cipher *blockcipher;
struct crypto_shash *hash;
- struct poly1305_key header_hash_key;
+ struct poly1305_core_key header_hash_key;
};
struct adiantum_request_ctx {
@@ -249,7 +249,7 @@ static void adiantum_hash_header(struct skcipher_request *req)
poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
- poly1305_core_emit(&state, &rctx->header_hash);
+ poly1305_core_emit(&state, NULL, &rctx->header_hash);
}
/* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
index f6b6a52092b4..8a3006c3b51b 100644
--- a/crypto/nhpoly1305.c
+++ b/crypto/nhpoly1305.c
@@ -210,7 +210,7 @@ int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn)
if (state->nh_remaining)
process_nh_hash_value(state, key);
- poly1305_core_emit(&state->poly_state, dst);
+ poly1305_core_emit(&state->poly_state, NULL, dst);
return 0;
}
EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 21edbd8c99fb..94af47eb6fa6 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -31,6 +31,29 @@ static int crypto_poly1305_init(struct shash_desc *desc)
return 0;
}
+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ if (!dctx->sset) {
+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
+ poly1305_core_setkey(&dctx->core_r, src);
+ src += POLY1305_BLOCK_SIZE;
+ srclen -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 2;
+ }
+ if (srclen >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ srclen -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ }
+ return srclen;
+}
+
static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int srclen)
{
@@ -42,7 +65,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
srclen = datalen;
}
- poly1305_core_blocks(&dctx->h, dctx->r, src,
+ poly1305_core_blocks(&dctx->h, &dctx->core_r, src,
srclen / POLY1305_BLOCK_SIZE, 1);
}
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
index 479b0cab2a1a..064e52ca5248 100644
--- a/include/crypto/internal/poly1305.h
+++ b/include/crypto/internal/poly1305.h
@@ -11,48 +11,23 @@
#include <crypto/poly1305.h>
/*
- * Poly1305 core functions. These implement the ε-almost-∆-universal hash
- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
- * ("s key") at the end. They also only support block-aligned inputs.
+ * Poly1305 core functions. These only accept whole blocks; the caller must
+ * handle any needed block buffering and padding. 'hibit' must be 1 for any
+ * full blocks, or 0 for the final block if it had to be padded. If 'nonce' is
+ * non-NULL, then it's added at the end to compute the Poly1305 MAC. Otherwise,
+ * only the ε-almost-∆-universal hash function (not the full MAC) is computed.
*/
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
+
+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key);
static inline void poly1305_core_init(struct poly1305_state *state)
{
*state = (struct poly1305_state){};
}
void poly1305_core_blocks(struct poly1305_state *state,
- const struct poly1305_key *key, const void *src,
+ const struct poly1305_core_key *key, const void *src,
unsigned int nblocks, u32 hibit);
-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
-
-/*
- * Poly1305 requires a unique key for each tag, which implies that we can't set
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
- * expect the key as the first 32 bytes in the update() call.
- */
-static inline
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- if (!dctx->sset) {
- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_core_setkey(dctx->r, src);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->rset = 1;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- }
- return srclen;
-}
+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
+ void *dst);
#endif
diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h
index 53c04423c582..306925fea190 100644
--- a/include/crypto/nhpoly1305.h
+++ b/include/crypto/nhpoly1305.h
@@ -7,7 +7,7 @@
#define _NHPOLY1305_H
#include <crypto/hash.h>
-#include <crypto/poly1305.h>
+#include <crypto/internal/poly1305.h>
/* NH parameterization: */
@@ -33,7 +33,7 @@
#define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES)
struct nhpoly1305_key {
- struct poly1305_key poly_key;
+ struct poly1305_core_key poly_key;
u32 nh_key[NH_KEY_WORDS];
};
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 74c6e1cd73ee..f1f67fc749cf 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -13,12 +13,29 @@
#define POLY1305_KEY_SIZE 32
#define POLY1305_DIGEST_SIZE 16
+/* The poly1305_key and poly1305_state types are mostly opaque and
+ * implementation-defined. Limbs might be in base 2^64 or base 2^26, or
+ * different yet. The union type provided keeps these 64-bit aligned for the
+ * case in which this is implemented using 64x64 multiplies.
+ */
+
struct poly1305_key {
- u32 r[5]; /* key, base 2^26 */
+ union {
+ u32 r[5];
+ u64 r64[3];
+ };
+};
+
+struct poly1305_core_key {
+ struct poly1305_key key;
+ struct poly1305_key precomputed_s;
};
struct poly1305_state {
- u32 h[5]; /* accumulator, base 2^26 */
+ union {
+ u32 h[5];
+ u64 h64[3];
+ };
};
struct poly1305_desc_ctx {
@@ -35,7 +52,10 @@ struct poly1305_desc_ctx {
/* accumulator */
struct poly1305_state h;
/* key */
- struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
+ union {
+ struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
+ struct poly1305_core_key core_r;
+ };
};
void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index f97f9b941110..6ecaf83a5a9a 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -28,7 +28,9 @@ obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
libdes-y := des.o
obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
-libpoly1305-y := poly1305.o
+libpoly1305-y := poly1305-donna32.o
+libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o
+libpoly1305-y += poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
diff --git a/lib/crypto/poly1305-donna32.c b/lib/crypto/poly1305-donna32.c
new file mode 100644
index 000000000000..3cc77d94390b
--- /dev/null
+++ b/lib/crypto/poly1305-donna32.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is based in part on Andrew Moon's poly1305-donna, which is in the
+ * public domain.
+ */
+
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/poly1305.h>
+
+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
+{
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff;
+ key->key.r[1] = (get_unaligned_le32(&raw_key[3]) >> 2) & 0x3ffff03;
+ key->key.r[2] = (get_unaligned_le32(&raw_key[6]) >> 4) & 0x3ffc0ff;
+ key->key.r[3] = (get_unaligned_le32(&raw_key[9]) >> 6) & 0x3f03fff;
+ key->key.r[4] = (get_unaligned_le32(&raw_key[12]) >> 8) & 0x00fffff;
+
+ /* s = 5*r */
+ key->precomputed_s.r[0] = key->key.r[1] * 5;
+ key->precomputed_s.r[1] = key->key.r[2] * 5;
+ key->precomputed_s.r[2] = key->key.r[3] * 5;
+ key->precomputed_s.r[3] = key->key.r[4] * 5;
+}
+EXPORT_SYMBOL(poly1305_core_setkey);
+
+void poly1305_core_blocks(struct poly1305_state *state,
+ const struct poly1305_core_key *key, const void *src,
+ unsigned int nblocks, u32 hibit)
+{
+ const u8 *input = src;
+ u32 r0, r1, r2, r3, r4;
+ u32 s1, s2, s3, s4;
+ u32 h0, h1, h2, h3, h4;
+ u64 d0, d1, d2, d3, d4;
+ u32 c;
+
+ if (!nblocks)
+ return;
+
+ hibit <<= 24;
+
+ r0 = key->key.r[0];
+ r1 = key->key.r[1];
+ r2 = key->key.r[2];
+ r3 = key->key.r[3];
+ r4 = key->key.r[4];
+
+ s1 = key->precomputed_s.r[0];
+ s2 = key->precomputed_s.r[1];
+ s3 = key->precomputed_s.r[2];
+ s4 = key->precomputed_s.r[3];
+
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+
+ do {
+ /* h += m[i] */
+ h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
+ h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
+ h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
+
+ /* h *= r */
+ d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
+ ((u64)h2 * s3) + ((u64)h3 * s2) +
+ ((u64)h4 * s1);
+ d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
+ ((u64)h2 * s4) + ((u64)h3 * s3) +
+ ((u64)h4 * s2);
+ d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
+ ((u64)h2 * r0) + ((u64)h3 * s4) +
+ ((u64)h4 * s3);
+ d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
+ ((u64)h2 * r1) + ((u64)h3 * r0) +
+ ((u64)h4 * s4);
+ d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
+ ((u64)h2 * r2) + ((u64)h3 * r1) +
+ ((u64)h4 * r0);
+
+ /* (partial) h %= p */
+ c = (u32)(d0 >> 26);
+ h0 = (u32)d0 & 0x3ffffff;
+ d1 += c;
+ c = (u32)(d1 >> 26);
+ h1 = (u32)d1 & 0x3ffffff;
+ d2 += c;
+ c = (u32)(d2 >> 26);
+ h2 = (u32)d2 & 0x3ffffff;
+ d3 += c;
+ c = (u32)(d3 >> 26);
+ h3 = (u32)d3 & 0x3ffffff;
+ d4 += c;
+ c = (u32)(d4 >> 26);
+ h4 = (u32)d4 & 0x3ffffff;
+ h0 += c * 5;
+ c = (h0 >> 26);
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ input += POLY1305_BLOCK_SIZE;
+ } while (--nblocks);
+
+ state->h[0] = h0;
+ state->h[1] = h1;
+ state->h[2] = h2;
+ state->h[3] = h3;
+ state->h[4] = h4;
+}
+EXPORT_SYMBOL(poly1305_core_blocks);
+
+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
+ void *dst)
+{
+ u8 *mac = dst;
+ u32 h0, h1, h2, h3, h4, c;
+ u32 g0, g1, g2, g3, g4;
+ u64 f;
+ u32 mask;
+
+ /* fully carry h */
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+
+ c = h1 >> 26;
+ h1 = h1 & 0x3ffffff;
+ h2 += c;
+ c = h2 >> 26;
+ h2 = h2 & 0x3ffffff;
+ h3 += c;
+ c = h3 >> 26;
+ h3 = h3 & 0x3ffffff;
+ h4 += c;
+ c = h4 >> 26;
+ h4 = h4 & 0x3ffffff;
+ h0 += c * 5;
+ c = h0 >> 26;
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ c = g0 >> 26;
+ g0 &= 0x3ffffff;
+ g1 = h1 + c;
+ c = g1 >> 26;
+ g1 &= 0x3ffffff;
+ g2 = h2 + c;
+ c = g2 >> 26;
+ g2 &= 0x3ffffff;
+ g3 = h3 + c;
+ c = g3 >> 26;
+ g3 &= 0x3ffffff;
+ g4 = h4 + c - (1UL << 26);
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ h0 = ((h0) | (h1 << 26)) & 0xffffffff;
+ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
+ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
+ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
+
+ if (likely(nonce)) {
+ /* mac = (h + nonce) % (2^128) */
+ f = (u64)h0 + nonce[0];
+ h0 = (u32)f;
+ f = (u64)h1 + nonce[1] + (f >> 32);
+ h1 = (u32)f;
+ f = (u64)h2 + nonce[2] + (f >> 32);
+ h2 = (u32)f;
+ f = (u64)h3 + nonce[3] + (f >> 32);
+ h3 = (u32)f;
+ }
+
+ put_unaligned_le32(h0, &mac[0]);
+ put_unaligned_le32(h1, &mac[4]);
+ put_unaligned_le32(h2, &mac[8]);
+ put_unaligned_le32(h3, &mac[12]);
+}
+EXPORT_SYMBOL(poly1305_core_emit);
diff --git a/lib/crypto/poly1305-donna64.c b/lib/crypto/poly1305-donna64.c
new file mode 100644
index 000000000000..6ae181bb4345
--- /dev/null
+++ b/lib/crypto/poly1305-donna64.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is based in part on Andrew Moon's poly1305-donna, which is in the
+ * public domain.
+ */
+
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/poly1305.h>
+
+typedef __uint128_t u128;
+
+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
+{
+ u64 t0, t1;
+
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ t0 = get_unaligned_le64(&raw_key[0]);
+ t1 = get_unaligned_le64(&raw_key[8]);
+
+ key->key.r64[0] = t0 & 0xffc0fffffffULL;
+ key->key.r64[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
+ key->key.r64[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
+
+ /* s = 20*r */
+ key->precomputed_s.r64[0] = key->key.r64[1] * 20;
+ key->precomputed_s.r64[1] = key->key.r64[2] * 20;
+}
+EXPORT_SYMBOL(poly1305_core_setkey);
+
+void poly1305_core_blocks(struct poly1305_state *state,
+ const struct poly1305_core_key *key, const void *src,
+ unsigned int nblocks, u32 hibit)
+{
+ const u8 *input = src;
+ u64 hibit64;
+ u64 r0, r1, r2;
+ u64 s1, s2;
+ u64 h0, h1, h2;
+ u64 c;
+ u128 d0, d1, d2, d;
+
+ if (!nblocks)
+ return;
+
+ hibit64 = ((u64)hibit) << 40;
+
+ r0 = key->key.r64[0];
+ r1 = key->key.r64[1];
+ r2 = key->key.r64[2];
+
+ h0 = state->h64[0];
+ h1 = state->h64[1];
+ h2 = state->h64[2];
+
+ s1 = key->precomputed_s.r64[0];
+ s2 = key->precomputed_s.r64[1];
+
+ do {
+ u64 t0, t1;
+
+ /* h += m[i] */
+ t0 = get_unaligned_le64(&input[0]);
+ t1 = get_unaligned_le64(&input[8]);
+
+ h0 += t0 & 0xfffffffffffULL;
+ h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
+ h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64;
+
+ /* h *= r */
+ d0 = (u128)h0 * r0;
+ d = (u128)h1 * s2;
+ d0 += d;
+ d = (u128)h2 * s1;
+ d0 += d;
+ d1 = (u128)h0 * r1;
+ d = (u128)h1 * r0;
+ d1 += d;
+ d = (u128)h2 * s2;
+ d1 += d;
+ d2 = (u128)h0 * r2;
+ d = (u128)h1 * r1;
+ d2 += d;
+ d = (u128)h2 * r0;
+ d2 += d;
+
+ /* (partial) h %= p */
+ c = (u64)(d0 >> 44);
+ h0 = (u64)d0 & 0xfffffffffffULL;
+ d1 += c;
+ c = (u64)(d1 >> 44);
+ h1 = (u64)d1 & 0xfffffffffffULL;
+ d2 += c;
+ c = (u64)(d2 >> 42);
+ h2 = (u64)d2 & 0x3ffffffffffULL;
+ h0 += c * 5;
+ c = h0 >> 44;
+ h0 = h0 & 0xfffffffffffULL;
+ h1 += c;
+
+ input += POLY1305_BLOCK_SIZE;
+ } while (--nblocks);
+
+ state->h64[0] = h0;
+ state->h64[1] = h1;
+ state->h64[2] = h2;
+}
+EXPORT_SYMBOL(poly1305_core_blocks);
+
+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
+ void *dst)
+{
+ u8 *mac = dst;
+ u64 h0, h1, h2, c;
+ u64 g0, g1, g2;
+ u64 t0, t1;
+
+ /* fully carry h */
+ h0 = state->h64[0];
+ h1 = state->h64[1];
+ h2 = state->h64[2];
+
+ c = h1 >> 44;
+ h1 &= 0xfffffffffffULL;
+ h2 += c;
+ c = h2 >> 42;
+ h2 &= 0x3ffffffffffULL;
+ h0 += c * 5;
+ c = h0 >> 44;
+ h0 &= 0xfffffffffffULL;
+ h1 += c;
+ c = h1 >> 44;
+ h1 &= 0xfffffffffffULL;
+ h2 += c;
+ c = h2 >> 42;
+ h2 &= 0x3ffffffffffULL;
+ h0 += c * 5;
+ c = h0 >> 44;
+ h0 &= 0xfffffffffffULL;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ c = g0 >> 44;
+ g0 &= 0xfffffffffffULL;
+ g1 = h1 + c;
+ c = g1 >> 44;
+ g1 &= 0xfffffffffffULL;
+ g2 = h2 + c - (1ULL << 42);
+
+ /* select h if h < p, or h + -p if h >= p */
+ c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
+ g0 &= c;
+ g1 &= c;
+ g2 &= c;
+ c = ~c;
+ h0 = (h0 & c) | g0;
+ h1 = (h1 & c) | g1;
+ h2 = (h2 & c) | g2;
+
+ if (likely(nonce)) {
+ /* h = (h + nonce) */
+ t0 = ((u64)nonce[1] << 32) | nonce[0];
+ t1 = ((u64)nonce[3] << 32) | nonce[2];
+
+ h0 += t0 & 0xfffffffffffULL;
+ c = h0 >> 44;
+ h0 &= 0xfffffffffffULL;
+ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
+ c = h1 >> 44;
+ h1 &= 0xfffffffffffULL;
+ h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
+ h2 &= 0x3ffffffffffULL;
+ }
+
+ /* mac = h % (2^128) */
+ h0 = h0 | (h1 << 44);
+ h1 = (h1 >> 20) | (h2 << 24);
+
+ put_unaligned_le64(h0, &mac[0]);
+ put_unaligned_le64(h1, &mac[8]);
+}
+EXPORT_SYMBOL(poly1305_core_emit);
diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
index 32ec293c65ae..9d2d14df0fee 100644
--- a/lib/crypto/poly1305.c
+++ b/lib/crypto/poly1305.c
@@ -12,151 +12,9 @@
#include <linux/module.h>
#include <asm/unaligned.h>
-static inline u64 mlt(u64 a, u64 b)
-{
- return a * b;
-}
-
-static inline u32 sr(u64 v, u_char n)
-{
- return v >> n;
-}
-
-static inline u32 and(u32 v, u32 mask)
-{
- return v & mask;
-}
-
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
-{
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
-}
-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
-
-void poly1305_core_blocks(struct poly1305_state *state,
- const struct poly1305_key *key, const void *src,
- unsigned int nblocks, u32 hibit)
-{
- u32 r0, r1, r2, r3, r4;
- u32 s1, s2, s3, s4;
- u32 h0, h1, h2, h3, h4;
- u64 d0, d1, d2, d3, d4;
-
- if (!nblocks)
- return;
-
- r0 = key->r[0];
- r1 = key->r[1];
- r2 = key->r[2];
- r3 = key->r[3];
- r4 = key->r[4];
-
- s1 = r1 * 5;
- s2 = r2 * 5;
- s3 = r3 * 5;
- s4 = r4 * 5;
-
- h0 = state->h[0];
- h1 = state->h[1];
- h2 = state->h[2];
- h3 = state->h[3];
- h4 = state->h[4];
-
- do {
- /* h += m[i] */
- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
- h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
-
- /* h *= r */
- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
- mlt(h3, s2) + mlt(h4, s1);
- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
- mlt(h3, s3) + mlt(h4, s2);
- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
- mlt(h3, s4) + mlt(h4, s3);
- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
- mlt(h3, r0) + mlt(h4, s4);
- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
- mlt(h3, r1) + mlt(h4, r0);
-
- /* (partial) h %= p */
- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
-
- src += POLY1305_BLOCK_SIZE;
- } while (--nblocks);
-
- state->h[0] = h0;
- state->h[1] = h1;
- state->h[2] = h2;
- state->h[3] = h3;
- state->h[4] = h4;
-}
-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
-
-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
-{
- u32 h0, h1, h2, h3, h4;
- u32 g0, g1, g2, g3, g4;
- u32 mask;
-
- /* fully carry h */
- h0 = state->h[0];
- h1 = state->h[1];
- h2 = state->h[2];
- h3 = state->h[3];
- h4 = state->h[4];
-
- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
-
- /* compute h + -p */
- g0 = h0 + 5;
- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
- /* select h if h < p, or h + -p if h >= p */
- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
- g0 &= mask;
- g1 &= mask;
- g2 &= mask;
- g3 &= mask;
- g4 &= mask;
- mask = ~mask;
- h0 = (h0 & mask) | g0;
- h1 = (h1 & mask) | g1;
- h2 = (h2 & mask) | g2;
- h3 = (h3 & mask) | g3;
- h4 = (h4 & mask) | g4;
-
- /* h = h % (2^128) */
- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_emit);
-
void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
{
- poly1305_core_setkey(desc->r, key);
+ poly1305_core_setkey(&desc->core_r, key);
desc->s[0] = get_unaligned_le32(key + 16);
desc->s[1] = get_unaligned_le32(key + 20);
desc->s[2] = get_unaligned_le32(key + 24);
@@ -164,7 +22,7 @@ void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
poly1305_core_init(&desc->h);
desc->buflen = 0;
desc->sset = true;
- desc->rset = 1;
+ desc->rset = 2;
}
EXPORT_SYMBOL_GPL(poly1305_init_generic);
@@ -181,13 +39,14 @@ void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
desc->buflen += bytes;
if (desc->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1);
+ poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf,
+ 1, 1);
desc->buflen = 0;
}
}
if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
- poly1305_core_blocks(&desc->h, desc->r, src,
+ poly1305_core_blocks(&desc->h, &desc->core_r, src,
nbytes / POLY1305_BLOCK_SIZE, 1);
src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
nbytes %= POLY1305_BLOCK_SIZE;
@@ -202,28 +61,14 @@ EXPORT_SYMBOL_GPL(poly1305_update_generic);
void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
{
- __le32 digest[4];
- u64 f = 0;
-
if (unlikely(desc->buflen)) {
desc->buf[desc->buflen++] = 1;
memset(desc->buf + desc->buflen, 0,
POLY1305_BLOCK_SIZE - desc->buflen);
- poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0);
+ poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0);
}
- poly1305_core_emit(&desc->h, digest);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
- put_unaligned_le32(f, dst + 0);
- f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
- put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
- put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
- put_unaligned_le32(f, dst + 12);
-
+ poly1305_core_emit(&desc->h, desc->s, dst);
*desc = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL_GPL(poly1305_final_generic);
--
2.18.2
From c416a98eb307de061d222f9db53651c9b0eb0964 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 5 Jan 2020 22:40:47 -0500
Subject: [PATCH 042/100] crypto: x86/poly1305 - import unmodified cryptogams
implementation
commit 0896ca2a0cb6127e8a129f1f2a680d49b6b0f65c upstream.
These x86_64 vectorized implementations come from Andy Polyakov's
CRYPTOGAMS implementation, and are included here in raw form without
modification, so that subsequent commits that fix these up for the
kernel can see how it has changed.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 4159 +++++++++++++++++
1 file changed, 4159 insertions(+)
create mode 100644 arch/x86/crypto/poly1305-x86_64-cryptogams.pl
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
new file mode 100644
index 000000000000..342ad7f18aa7
--- /dev/null
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -0,0 +1,4159 @@
+#! /usr/bin/env perl
+# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements Poly1305 hash for x86_64.
+#
+# March 2015
+#
+# Initial release.
+#
+# December 2016
+#
+# Add AVX512F+VL+BW code path.
+#
+# November 2017
+#
+# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be
+# executed even on Knights Landing. Trigger for modification was
+# observation that AVX512 code paths can negatively affect overall
+# Skylake-X system performance. Since we are likely to suppress
+# AVX512F capability flag [at least on Skylake-X], conversion serves
+# as kind of "investment protection". Note that next *lake processor,
+# Cannolake, has AVX512IFMA code path to execute...
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone,
+# measured with rdtsc at fixed clock frequency.
+#
+# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512
+# P4 4.46/+120% -
+# Core 2 2.41/+90% -
+# Westmere 1.88/+120% -
+# Sandy Bridge 1.39/+140% 1.10
+# Haswell 1.14/+175% 1.11 0.65
+# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35]
+# Silvermont 2.83/+95% -
+# Knights L 3.60/? 1.65 1.10 0.41(***)
+# Goldmont 1.70/+180% -
+# VIA Nano 1.82/+150% -
+# Sledgehammer 1.38/+160% -
+# Bulldozer 2.30/+130% 0.97
+# Ryzen 1.15/+200% 1.08 1.18
+#
+# (*) improvement coefficients relative to clang are more modest and
+# are ~50% on most processors, in both cases we are comparing to
+# __int128 code;
+# (**) SSE2 implementation was attempted, but among non-AVX processors
+# it was faster than integer-only code only on older Intel P4 and
+# Core processors, 50-30%, less newer processor is, but slower on
+# contemporary ones, for example almost 2x slower on Atom, and as
+# former are naturally disappearing, SSE2 is deemed unnecessary;
+# (***) strangely enough performance seems to vary from core to core,
+# listed result is best case;
+
+$flavour = shift;
+$output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+ $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
+ $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12);
+ $avx += 2 if ($1==2.11 && $2>=8);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+ `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+ $avx = ($1>=10) + ($1>=12);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
+ $avx = ($2>=3.0) + ($2>3.0);
+}
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
+
+my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx");
+my ($mac,$nonce)=($inp,$len); # *_emit arguments
+my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13));
+my ($h0,$h1,$h2)=("%r14","%rbx","%rbp");
+
+sub poly1305_iteration {
+# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1
+# output: $h0-$h2 *= $r0-$r1
+$code.=<<___;
+ mulq $h0 # h0*r1
+ mov %rax,$d2
+ mov $r0,%rax
+ mov %rdx,$d3
+
+ mulq $h0 # h0*r0
+ mov %rax,$h0 # future $h0
+ mov $r0,%rax
+ mov %rdx,$d1
+
+ mulq $h1 # h1*r0
+ add %rax,$d2
+ mov $s1,%rax
+ adc %rdx,$d3
+
+ mulq $h1 # h1*s1
+ mov $h2,$h1 # borrow $h1
+ add %rax,$h0
+ adc %rdx,$d1
+
+ imulq $s1,$h1 # h2*s1
+ add $h1,$d2
+ mov $d1,$h1
+ adc \$0,$d3
+
+ imulq $r0,$h2 # h2*r0
+ add $d2,$h1
+ mov \$-4,%rax # mask value
+ adc $h2,$d3
+
+ and $d3,%rax # last reduction step
+ mov $d3,$h2
+ shr \$2,$d3
+ and \$3,$h2
+ add $d3,%rax
+ add %rax,$h0
+ adc \$0,$h1
+ adc \$0,$h2
+___
+}
+
+########################################################################
+# Layout of opaque area is following.
+#
+# unsigned __int64 h[3]; # current hash value base 2^64
+# unsigned __int64 r[2]; # key value base 2^64
+
+$code.=<<___;
+.text
+
+.extern OPENSSL_ia32cap_P
+
+.globl poly1305_init
+.hidden poly1305_init
+.globl poly1305_blocks
+.hidden poly1305_blocks
+.globl poly1305_emit
+.hidden poly1305_emit
+
+.type poly1305_init,\@function,3
+.align 32
+poly1305_init:
+ xor %rax,%rax
+ mov %rax,0($ctx) # initialize hash value
+ mov %rax,8($ctx)
+ mov %rax,16($ctx)
+
+ cmp \$0,$inp
+ je .Lno_key
+
+ lea poly1305_blocks(%rip),%r10
+ lea poly1305_emit(%rip),%r11
+___
+$code.=<<___ if ($avx);
+ mov OPENSSL_ia32cap_P+4(%rip),%r9
+ lea poly1305_blocks_avx(%rip),%rax
+ lea poly1305_emit_avx(%rip),%rcx
+ bt \$`60-32`,%r9 # AVX?
+ cmovc %rax,%r10
+ cmovc %rcx,%r11
+___
+$code.=<<___ if ($avx>1);
+ lea poly1305_blocks_avx2(%rip),%rax
+ bt \$`5+32`,%r9 # AVX2?
+ cmovc %rax,%r10
+___
+$code.=<<___ if ($avx>3);
+ mov \$`(1<<31|1<<21|1<<16)`,%rax
+ shr \$32,%r9
+ and %rax,%r9
+ cmp %rax,%r9
+ je .Linit_base2_44
+___
+$code.=<<___;
+ mov \$0x0ffffffc0fffffff,%rax
+ mov \$0x0ffffffc0ffffffc,%rcx
+ and 0($inp),%rax
+ and 8($inp),%rcx
+ mov %rax,24($ctx)
+ mov %rcx,32($ctx)
+___
+$code.=<<___ if ($flavour !~ /elf32/);
+ mov %r10,0(%rdx)
+ mov %r11,8(%rdx)
+___
+$code.=<<___ if ($flavour =~ /elf32/);
+ mov %r10d,0(%rdx)
+ mov %r11d,4(%rdx)
+___
+$code.=<<___;
+ mov \$1,%eax
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,\@function,4
+.align 32
+poly1305_blocks:
+.cfi_startproc
+.Lblocks:
+ shr \$4,$len
+ jz .Lno_data # too short
+
+ push %rbx
+.cfi_push %rbx
+ push %rbp
+.cfi_push %rbp
+ push %r12
+.cfi_push %r12
+ push %r13
+.cfi_push %r13
+ push %r14
+.cfi_push %r14
+ push %r15
+.cfi_push %r15
+.Lblocks_body:
+
+ mov $len,%r15 # reassign $len
+
+ mov 24($ctx),$r0 # load r
+ mov 32($ctx),$s1
+
+ mov 0($ctx),$h0 # load hash value
+ mov 8($ctx),$h1
+ mov 16($ctx),$h2
+
+ mov $s1,$r1
+ shr \$2,$s1
+ mov $r1,%rax
+ add $r1,$s1 # s1 = r1 + (r1 >> 2)
+ jmp .Loop
+
+.align 32
+.Loop:
+ add 0($inp),$h0 # accumulate input
+ adc 8($inp),$h1
+ lea 16($inp),$inp
+ adc $padbit,$h2
+___
+ &poly1305_iteration();
+$code.=<<___;
+ mov $r1,%rax
+ dec %r15 # len-=16
+ jnz .Loop
+
+ mov $h0,0($ctx) # store hash value
+ mov $h1,8($ctx)
+ mov $h2,16($ctx)
+
+ mov 0(%rsp),%r15
+.cfi_restore %r15
+ mov 8(%rsp),%r14
+.cfi_restore %r14
+ mov 16(%rsp),%r13
+.cfi_restore %r13
+ mov 24(%rsp),%r12
+.cfi_restore %r12
+ mov 32(%rsp),%rbp
+.cfi_restore %rbp
+ mov 40(%rsp),%rbx
+.cfi_restore %rbx
+ lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lno_data:
+.Lblocks_epilogue:
+ ret
+.cfi_endproc
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,\@function,3
+.align 32
+poly1305_emit:
+.Lemit:
+ mov 0($ctx),%r8 # load hash value
+ mov 8($ctx),%r9
+ mov 16($ctx),%r10
+
+ mov %r8,%rax
+ add \$5,%r8 # compare to modulus
+ mov %r9,%rcx
+ adc \$0,%r9
+ adc \$0,%r10
+ shr \$2,%r10 # did 130-bit value overflow?
+ cmovnz %r8,%rax
+ cmovnz %r9,%rcx
+
+ add 0($nonce),%rax # accumulate nonce
+ adc 8($nonce),%rcx
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+___
+if ($avx) {
+
+########################################################################
+# Layout of opaque area is following.
+#
+# unsigned __int32 h[5]; # current hash value base 2^26
+# unsigned __int32 is_base2_26;
+# unsigned __int64 r[2]; # key value base 2^64
+# unsigned __int64 pad;
+# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9];
+#
+# where r^n are base 2^26 digits of degrees of multiplier key. There are
+# 5 digits, but last four are interleaved with multiples of 5, totalling
+# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4.
+
+my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
+ map("%xmm$_",(0..15));
+
+$code.=<<___;
+.type __poly1305_block,\@abi-omnipotent
+.align 32
+__poly1305_block:
+___
+ &poly1305_iteration();
+$code.=<<___;
+ ret
+.size __poly1305_block,.-__poly1305_block
+
+.type __poly1305_init_avx,\@abi-omnipotent
+.align 32
+__poly1305_init_avx:
+ mov $r0,$h0
+ mov $r1,$h1
+ xor $h2,$h2
+
+ lea 48+64($ctx),$ctx # size optimization
+
+ mov $r1,%rax
+ call __poly1305_block # r^2
+
+ mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26
+ mov \$0x3ffffff,%edx
+ mov $h0,$d1
+ and $h0#d,%eax
+ mov $r0,$d2
+ and $r0#d,%edx
+ mov %eax,`16*0+0-64`($ctx)
+ shr \$26,$d1
+ mov %edx,`16*0+4-64`($ctx)
+ shr \$26,$d2
+
+ mov \$0x3ffffff,%eax
+ mov \$0x3ffffff,%edx
+ and $d1#d,%eax
+ and $d2#d,%edx
+ mov %eax,`16*1+0-64`($ctx)
+ lea (%rax,%rax,4),%eax # *5
+ mov %edx,`16*1+4-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ mov %eax,`16*2+0-64`($ctx)
+ shr \$26,$d1
+ mov %edx,`16*2+4-64`($ctx)
+ shr \$26,$d2
+
+ mov $h1,%rax
+ mov $r1,%rdx
+ shl \$12,%rax
+ shl \$12,%rdx
+ or $d1,%rax
+ or $d2,%rdx
+ and \$0x3ffffff,%eax
+ and \$0x3ffffff,%edx
+ mov %eax,`16*3+0-64`($ctx)
+ lea (%rax,%rax,4),%eax # *5
+ mov %edx,`16*3+4-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ mov %eax,`16*4+0-64`($ctx)
+ mov $h1,$d1
+ mov %edx,`16*4+4-64`($ctx)
+ mov $r1,$d2
+
+ mov \$0x3ffffff,%eax
+ mov \$0x3ffffff,%edx
+ shr \$14,$d1
+ shr \$14,$d2
+ and $d1#d,%eax
+ and $d2#d,%edx
+ mov %eax,`16*5+0-64`($ctx)
+ lea (%rax,%rax,4),%eax # *5
+ mov %edx,`16*5+4-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ mov %eax,`16*6+0-64`($ctx)
+ shr \$26,$d1
+ mov %edx,`16*6+4-64`($ctx)
+ shr \$26,$d2
+
+ mov $h2,%rax
+ shl \$24,%rax
+ or %rax,$d1
+ mov $d1#d,`16*7+0-64`($ctx)
+ lea ($d1,$d1,4),$d1 # *5
+ mov $d2#d,`16*7+4-64`($ctx)
+ lea ($d2,$d2,4),$d2 # *5
+ mov $d1#d,`16*8+0-64`($ctx)
+ mov $d2#d,`16*8+4-64`($ctx)
+
+ mov $r1,%rax
+ call __poly1305_block # r^3
+
+ mov \$0x3ffffff,%eax # save r^3 base 2^26
+ mov $h0,$d1
+ and $h0#d,%eax
+ shr \$26,$d1
+ mov %eax,`16*0+12-64`($ctx)
+
+ mov \$0x3ffffff,%edx
+ and $d1#d,%edx
+ mov %edx,`16*1+12-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ shr \$26,$d1
+ mov %edx,`16*2+12-64`($ctx)
+
+ mov $h1,%rax
+ shl \$12,%rax
+ or $d1,%rax
+ and \$0x3ffffff,%eax
+ mov %eax,`16*3+12-64`($ctx)
+ lea (%rax,%rax,4),%eax # *5
+ mov $h1,$d1
+ mov %eax,`16*4+12-64`($ctx)
+
+ mov \$0x3ffffff,%edx
+ shr \$14,$d1
+ and $d1#d,%edx
+ mov %edx,`16*5+12-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ shr \$26,$d1
+ mov %edx,`16*6+12-64`($ctx)
+
+ mov $h2,%rax
+ shl \$24,%rax
+ or %rax,$d1
+ mov $d1#d,`16*7+12-64`($ctx)
+ lea ($d1,$d1,4),$d1 # *5
+ mov $d1#d,`16*8+12-64`($ctx)
+
+ mov $r1,%rax
+ call __poly1305_block # r^4
+
+ mov \$0x3ffffff,%eax # save r^4 base 2^26
+ mov $h0,$d1
+ and $h0#d,%eax
+ shr \$26,$d1
+ mov %eax,`16*0+8-64`($ctx)
+
+ mov \$0x3ffffff,%edx
+ and $d1#d,%edx
+ mov %edx,`16*1+8-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ shr \$26,$d1
+ mov %edx,`16*2+8-64`($ctx)
+
+ mov $h1,%rax
+ shl \$12,%rax
+ or $d1,%rax
+ and \$0x3ffffff,%eax
+ mov %eax,`16*3+8-64`($ctx)
+ lea (%rax,%rax,4),%eax # *5
+ mov $h1,$d1
+ mov %eax,`16*4+8-64`($ctx)
+
+ mov \$0x3ffffff,%edx
+ shr \$14,$d1
+ and $d1#d,%edx
+ mov %edx,`16*5+8-64`($ctx)
+ lea (%rdx,%rdx,4),%edx # *5
+ shr \$26,$d1
+ mov %edx,`16*6+8-64`($ctx)
+
+ mov $h2,%rax
+ shl \$24,%rax
+ or %rax,$d1
+ mov $d1#d,`16*7+8-64`($ctx)
+ lea ($d1,$d1,4),$d1 # *5
+ mov $d1#d,`16*8+8-64`($ctx)
+
+ lea -48-64($ctx),$ctx # size [de-]optimization
+ ret
+.size __poly1305_init_avx,.-__poly1305_init_avx
+
+.type poly1305_blocks_avx,\@function,4
+.align 32
+poly1305_blocks_avx:
+.cfi_startproc
+ mov 20($ctx),%r8d # is_base2_26
+ cmp \$128,$len
+ jae .Lblocks_avx
+ test %r8d,%r8d
+ jz .Lblocks
+
+.Lblocks_avx:
+ and \$-16,$len
+ jz .Lno_data_avx
+
+ vzeroupper
+
+ test %r8d,%r8d
+ jz .Lbase2_64_avx
+
+ test \$31,$len
+ jz .Leven_avx
+
+ push %rbx
+.cfi_push %rbx
+ push %rbp
+.cfi_push %rbp
+ push %r12
+.cfi_push %r12
+ push %r13
+.cfi_push %r13
+ push %r14
+.cfi_push %r14
+ push %r15
+.cfi_push %r15
+.Lblocks_avx_body:
+
+ mov $len,%r15 # reassign $len
+
+ mov 0($ctx),$d1 # load hash value
+ mov 8($ctx),$d2
+ mov 16($ctx),$h2#d
+
+ mov 24($ctx),$r0 # load r
+ mov 32($ctx),$s1
+
+ ################################# base 2^26 -> base 2^64
+ mov $d1#d,$h0#d
+ and \$`-1*(1<<31)`,$d1
+ mov $d2,$r1 # borrow $r1
+ mov $d2#d,$h1#d
+ and \$`-1*(1<<31)`,$d2
+
+ shr \$6,$d1
+ shl \$52,$r1
+ add $d1,$h0
+ shr \$12,$h1
+ shr \$18,$d2
+ add $r1,$h0
+ adc $d2,$h1
+
+ mov $h2,$d1
+ shl \$40,$d1
+ shr \$24,$h2
+ add $d1,$h1
+ adc \$0,$h2 # can be partially reduced...
+
+ mov \$-4,$d2 # ... so reduce
+ mov $h2,$d1
+ and $h2,$d2
+ shr \$2,$d1
+ and \$3,$h2
+ add $d2,$d1 # =*5
+ add $d1,$h0
+ adc \$0,$h1
+ adc \$0,$h2
+
+ mov $s1,$r1
+ mov $s1,%rax
+ shr \$2,$s1
+ add $r1,$s1 # s1 = r1 + (r1 >> 2)
+
+ add 0($inp),$h0 # accumulate input
+ adc 8($inp),$h1
+ lea 16($inp),$inp
+ adc $padbit,$h2
+
+ call __poly1305_block
+
+ test $padbit,$padbit # if $padbit is zero,
+ jz .Lstore_base2_64_avx # store hash in base 2^64 format
+
+ ################################# base 2^64 -> base 2^26
+ mov $h0,%rax
+ mov $h0,%rdx
+ shr \$52,$h0
+ mov $h1,$r0
+ mov $h1,$r1
+ shr \$26,%rdx
+ and \$0x3ffffff,%rax # h[0]
+ shl \$12,$r0
+ and \$0x3ffffff,%rdx # h[1]
+ shr \$14,$h1
+ or $r0,$h0
+ shl \$24,$h2
+ and \$0x3ffffff,$h0 # h[2]
+ shr \$40,$r1
+ and \$0x3ffffff,$h1 # h[3]
+ or $r1,$h2 # h[4]
+
+ sub \$16,%r15
+ jz .Lstore_base2_26_avx
+
+ vmovd %rax#d,$H0
+ vmovd %rdx#d,$H1
+ vmovd $h0#d,$H2
+ vmovd $h1#d,$H3
+ vmovd $h2#d,$H4
+ jmp .Lproceed_avx
+
+.align 32
+.Lstore_base2_64_avx:
+ mov $h0,0($ctx)
+ mov $h1,8($ctx)
+ mov $h2,16($ctx) # note that is_base2_26 is zeroed
+ jmp .Ldone_avx
+
+.align 16
+.Lstore_base2_26_avx:
+ mov %rax#d,0($ctx) # store hash value base 2^26
+ mov %rdx#d,4($ctx)
+ mov $h0#d,8($ctx)
+ mov $h1#d,12($ctx)
+ mov $h2#d,16($ctx)
+.align 16
+.Ldone_avx:
+ mov 0(%rsp),%r15
+.cfi_restore %r15
+ mov 8(%rsp),%r14
+.cfi_restore %r14
+ mov 16(%rsp),%r13
+.cfi_restore %r13
+ mov 24(%rsp),%r12
+.cfi_restore %r12
+ mov 32(%rsp),%rbp
+.cfi_restore %rbp
+ mov 40(%rsp),%rbx
+.cfi_restore %rbx
+ lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lno_data_avx:
+.Lblocks_avx_epilogue:
+ ret
+.cfi_endproc
+
+.align 32
+.Lbase2_64_avx:
+.cfi_startproc
+ push %rbx
+.cfi_push %rbx
+ push %rbp
+.cfi_push %rbp
+ push %r12
+.cfi_push %r12
+ push %r13
+.cfi_push %r13
+ push %r14
+.cfi_push %r14
+ push %r15
+.cfi_push %r15
+.Lbase2_64_avx_body:
+
+ mov $len,%r15 # reassign $len
+
+ mov 24($ctx),$r0 # load r
+ mov 32($ctx),$s1
+
+ mov 0($ctx),$h0 # load hash value
+ mov 8($ctx),$h1
+ mov 16($ctx),$h2#d
+
+ mov $s1,$r1
+ mov $s1,%rax
+ shr \$2,$s1
+ add $r1,$s1 # s1 = r1 + (r1 >> 2)
+
+ test \$31,$len
+ jz .Linit_avx
+
+ add 0($inp),$h0 # accumulate input
+ adc 8($inp),$h1
+ lea 16($inp),$inp
+ adc $padbit,$h2
+ sub \$16,%r15
+
+ call __poly1305_block
+
+.Linit_avx:
+ ################################# base 2^64 -> base 2^26
+ mov $h0,%rax
+ mov $h0,%rdx
+ shr \$52,$h0
+ mov $h1,$d1
+ mov $h1,$d2
+ shr \$26,%rdx
+ and \$0x3ffffff,%rax # h[0]
+ shl \$12,$d1
+ and \$0x3ffffff,%rdx # h[1]
+ shr \$14,$h1
+ or $d1,$h0
+ shl \$24,$h2
+ and \$0x3ffffff,$h0 # h[2]
+ shr \$40,$d2
+ and \$0x3ffffff,$h1 # h[3]
+ or $d2,$h2 # h[4]
+
+ vmovd %rax#d,$H0
+ vmovd %rdx#d,$H1
+ vmovd $h0#d,$H2
+ vmovd $h1#d,$H3
+ vmovd $h2#d,$H4
+ movl \$1,20($ctx) # set is_base2_26
+
+ call __poly1305_init_avx
+
+.Lproceed_avx:
+ mov %r15,$len
+
+ mov 0(%rsp),%r15
+.cfi_restore %r15
+ mov 8(%rsp),%r14
+.cfi_restore %r14
+ mov 16(%rsp),%r13
+.cfi_restore %r13
+ mov 24(%rsp),%r12
+.cfi_restore %r12
+ mov 32(%rsp),%rbp
+.cfi_restore %rbp
+ mov 40(%rsp),%rbx
+.cfi_restore %rbx
+ lea 48(%rsp),%rax
+ lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lbase2_64_avx_epilogue:
+ jmp .Ldo_avx
+.cfi_endproc
+
+.align 32
+.Leven_avx:
+.cfi_startproc
+ vmovd 4*0($ctx),$H0 # load hash value
+ vmovd 4*1($ctx),$H1
+ vmovd 4*2($ctx),$H2
+ vmovd 4*3($ctx),$H3
+ vmovd 4*4($ctx),$H4
+
+.Ldo_avx:
+___
+$code.=<<___ if (!$win64);
+ lea -0x58(%rsp),%r11
+.cfi_def_cfa %r11,0x60
+ sub \$0x178,%rsp
+___
+$code.=<<___ if ($win64);
+ lea -0xf8(%rsp),%r11
+ sub \$0x218,%rsp
+ vmovdqa %xmm6,0x50(%r11)
+ vmovdqa %xmm7,0x60(%r11)
+ vmovdqa %xmm8,0x70(%r11)
+ vmovdqa %xmm9,0x80(%r11)
+ vmovdqa %xmm10,0x90(%r11)
+ vmovdqa %xmm11,0xa0(%r11)
+ vmovdqa %xmm12,0xb0(%r11)
+ vmovdqa %xmm13,0xc0(%r11)
+ vmovdqa %xmm14,0xd0(%r11)
+ vmovdqa %xmm15,0xe0(%r11)
+.Ldo_avx_body:
+___
+$code.=<<___;
+ sub \$64,$len
+ lea -32($inp),%rax
+ cmovc %rax,$inp
+
+ vmovdqu `16*3`($ctx),$D4 # preload r0^2
+ lea `16*3+64`($ctx),$ctx # size optimization
+ lea .Lconst(%rip),%rcx
+
+ ################################################################
+ # load input
+ vmovdqu 16*2($inp),$T0
+ vmovdqu 16*3($inp),$T1
+ vmovdqa 64(%rcx),$MASK # .Lmask26
+
+ vpsrldq \$6,$T0,$T2 # splat input
+ vpsrldq \$6,$T1,$T3
+ vpunpckhqdq $T1,$T0,$T4 # 4
+ vpunpcklqdq $T1,$T0,$T0 # 0:1
+ vpunpcklqdq $T3,$T2,$T3 # 2:3
+
+ vpsrlq \$40,$T4,$T4 # 4
+ vpsrlq \$26,$T0,$T1
+ vpand $MASK,$T0,$T0 # 0
+ vpsrlq \$4,$T3,$T2
+ vpand $MASK,$T1,$T1 # 1
+ vpsrlq \$30,$T3,$T3
+ vpand $MASK,$T2,$T2 # 2
+ vpand $MASK,$T3,$T3 # 3
+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
+
+ jbe .Lskip_loop_avx
+
+ # expand and copy pre-calculated table to stack
+ vmovdqu `16*1-64`($ctx),$D1
+ vmovdqu `16*2-64`($ctx),$D2
+ vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434
+ vpshufd \$0x44,$D4,$D0 # xx12 -> 1212
+ vmovdqa $D3,-0x90(%r11)
+ vmovdqa $D0,0x00(%rsp)
+ vpshufd \$0xEE,$D1,$D4
+ vmovdqu `16*3-64`($ctx),$D0
+ vpshufd \$0x44,$D1,$D1
+ vmovdqa $D4,-0x80(%r11)
+ vmovdqa $D1,0x10(%rsp)
+ vpshufd \$0xEE,$D2,$D3
+ vmovdqu `16*4-64`($ctx),$D1
+ vpshufd \$0x44,$D2,$D2
+ vmovdqa $D3,-0x70(%r11)
+ vmovdqa $D2,0x20(%rsp)
+ vpshufd \$0xEE,$D0,$D4
+ vmovdqu `16*5-64`($ctx),$D2
+ vpshufd \$0x44,$D0,$D0
+ vmovdqa $D4,-0x60(%r11)
+ vmovdqa $D0,0x30(%rsp)
+ vpshufd \$0xEE,$D1,$D3
+ vmovdqu `16*6-64`($ctx),$D0
+ vpshufd \$0x44,$D1,$D1
+ vmovdqa $D3,-0x50(%r11)
+ vmovdqa $D1,0x40(%rsp)
+ vpshufd \$0xEE,$D2,$D4
+ vmovdqu `16*7-64`($ctx),$D1
+ vpshufd \$0x44,$D2,$D2
+ vmovdqa $D4,-0x40(%r11)
+ vmovdqa $D2,0x50(%rsp)
+ vpshufd \$0xEE,$D0,$D3
+ vmovdqu `16*8-64`($ctx),$D2
+ vpshufd \$0x44,$D0,$D0
+ vmovdqa $D3,-0x30(%r11)
+ vmovdqa $D0,0x60(%rsp)
+ vpshufd \$0xEE,$D1,$D4
+ vpshufd \$0x44,$D1,$D1
+ vmovdqa $D4,-0x20(%r11)
+ vmovdqa $D1,0x70(%rsp)
+ vpshufd \$0xEE,$D2,$D3
+ vmovdqa 0x00(%rsp),$D4 # preload r0^2
+ vpshufd \$0x44,$D2,$D2
+ vmovdqa $D3,-0x10(%r11)
+ vmovdqa $D2,0x80(%rsp)
+
+ jmp .Loop_avx
+
+.align 32
+.Loop_avx:
+ ################################################################
+ # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ # \___________________/
+ # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ # \___________________/ \____________________/
+ #
+ # Note that we start with inp[2:3]*r^2. This is because it
+ # doesn't depend on reduction in previous iteration.
+ ################################################################
+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ #
+ # though note that $Tx and $Hx are "reversed" in this section,
+ # and $D4 is preloaded with r0^2...
+
+ vpmuludq $T0,$D4,$D0 # d0 = h0*r0
+ vpmuludq $T1,$D4,$D1 # d1 = h1*r0
+ vmovdqa $H2,0x20(%r11) # offload hash
+ vpmuludq $T2,$D4,$D2 # d3 = h2*r0
+ vmovdqa 0x10(%rsp),$H2 # r1^2
+ vpmuludq $T3,$D4,$D3 # d3 = h3*r0
+ vpmuludq $T4,$D4,$D4 # d4 = h4*r0
+
+ vmovdqa $H0,0x00(%r11) #
+ vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1
+ vmovdqa $H1,0x10(%r11) #
+ vpmuludq $T3,$H2,$H1 # h3*r1
+ vpaddq $H0,$D0,$D0 # d0 += h4*s1
+ vpaddq $H1,$D4,$D4 # d4 += h3*r1
+ vmovdqa $H3,0x30(%r11) #
+ vpmuludq $T2,$H2,$H0 # h2*r1
+ vpmuludq $T1,$H2,$H1 # h1*r1
+ vpaddq $H0,$D3,$D3 # d3 += h2*r1
+ vmovdqa 0x30(%rsp),$H3 # r2^2
+ vpaddq $H1,$D2,$D2 # d2 += h1*r1
+ vmovdqa $H4,0x40(%r11) #
+ vpmuludq $T0,$H2,$H2 # h0*r1
+ vpmuludq $T2,$H3,$H0 # h2*r2
+ vpaddq $H2,$D1,$D1 # d1 += h0*r1
+
+ vmovdqa 0x40(%rsp),$H4 # s2^2
+ vpaddq $H0,$D4,$D4 # d4 += h2*r2
+ vpmuludq $T1,$H3,$H1 # h1*r2
+ vpmuludq $T0,$H3,$H3 # h0*r2
+ vpaddq $H1,$D3,$D3 # d3 += h1*r2
+ vmovdqa 0x50(%rsp),$H2 # r3^2
+ vpaddq $H3,$D2,$D2 # d2 += h0*r2
+ vpmuludq $T4,$H4,$H0 # h4*s2
+ vpmuludq $T3,$H4,$H4 # h3*s2
+ vpaddq $H0,$D1,$D1 # d1 += h4*s2
+ vmovdqa 0x60(%rsp),$H3 # s3^2
+ vpaddq $H4,$D0,$D0 # d0 += h3*s2
+
+ vmovdqa 0x80(%rsp),$H4 # s4^2
+ vpmuludq $T1,$H2,$H1 # h1*r3
+ vpmuludq $T0,$H2,$H2 # h0*r3
+ vpaddq $H1,$D4,$D4 # d4 += h1*r3
+ vpaddq $H2,$D3,$D3 # d3 += h0*r3
+ vpmuludq $T4,$H3,$H0 # h4*s3
+ vpmuludq $T3,$H3,$H1 # h3*s3
+ vpaddq $H0,$D2,$D2 # d2 += h4*s3
+ vmovdqu 16*0($inp),$H0 # load input
+ vpaddq $H1,$D1,$D1 # d1 += h3*s3
+ vpmuludq $T2,$H3,$H3 # h2*s3
+ vpmuludq $T2,$H4,$T2 # h2*s4
+ vpaddq $H3,$D0,$D0 # d0 += h2*s3
+
+ vmovdqu 16*1($inp),$H1 #
+ vpaddq $T2,$D1,$D1 # d1 += h2*s4
+ vpmuludq $T3,$H4,$T3 # h3*s4
+ vpmuludq $T4,$H4,$T4 # h4*s4
+ vpsrldq \$6,$H0,$H2 # splat input
+ vpaddq $T3,$D2,$D2 # d2 += h3*s4
+ vpaddq $T4,$D3,$D3 # d3 += h4*s4
+ vpsrldq \$6,$H1,$H3 #
+ vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4
+ vpmuludq $T1,$H4,$T0 # h1*s4
+ vpunpckhqdq $H1,$H0,$H4 # 4
+ vpaddq $T4,$D4,$D4 # d4 += h0*r4
+ vmovdqa -0x90(%r11),$T4 # r0^4
+ vpaddq $T0,$D0,$D0 # d0 += h1*s4
+
+ vpunpcklqdq $H1,$H0,$H0 # 0:1
+ vpunpcklqdq $H3,$H2,$H3 # 2:3
+
+ #vpsrlq \$40,$H4,$H4 # 4
+ vpsrldq \$`40/8`,$H4,$H4 # 4
+ vpsrlq \$26,$H0,$H1
+ vpand $MASK,$H0,$H0 # 0
+ vpsrlq \$4,$H3,$H2
+ vpand $MASK,$H1,$H1 # 1
+ vpand 0(%rcx),$H4,$H4 # .Lmask24
+ vpsrlq \$30,$H3,$H3
+ vpand $MASK,$H2,$H2 # 2
+ vpand $MASK,$H3,$H3 # 3
+ vpor 32(%rcx),$H4,$H4 # padbit, yes, always
+
+ vpaddq 0x00(%r11),$H0,$H0 # add hash value
+ vpaddq 0x10(%r11),$H1,$H1
+ vpaddq 0x20(%r11),$H2,$H2
+ vpaddq 0x30(%r11),$H3,$H3
+ vpaddq 0x40(%r11),$H4,$H4
+
+ lea 16*2($inp),%rax
+ lea 16*4($inp),$inp
+ sub \$64,$len
+ cmovc %rax,$inp
+
+ ################################################################
+ # Now we accumulate (inp[0:1]+hash)*r^4
+ ################################################################
+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ vpmuludq $H0,$T4,$T0 # h0*r0
+ vpmuludq $H1,$T4,$T1 # h1*r0
+ vpaddq $T0,$D0,$D0
+ vpaddq $T1,$D1,$D1
+ vmovdqa -0x80(%r11),$T2 # r1^4
+ vpmuludq $H2,$T4,$T0 # h2*r0
+ vpmuludq $H3,$T4,$T1 # h3*r0
+ vpaddq $T0,$D2,$D2
+ vpaddq $T1,$D3,$D3
+ vpmuludq $H4,$T4,$T4 # h4*r0
+ vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
+ vpaddq $T4,$D4,$D4
+
+ vpaddq $T0,$D0,$D0 # d0 += h4*s1
+ vpmuludq $H2,$T2,$T1 # h2*r1
+ vpmuludq $H3,$T2,$T0 # h3*r1
+ vpaddq $T1,$D3,$D3 # d3 += h2*r1
+ vmovdqa -0x60(%r11),$T3 # r2^4
+ vpaddq $T0,$D4,$D4 # d4 += h3*r1
+ vpmuludq $H1,$T2,$T1 # h1*r1
+ vpmuludq $H0,$T2,$T2 # h0*r1
+ vpaddq $T1,$D2,$D2 # d2 += h1*r1
+ vpaddq $T2,$D1,$D1 # d1 += h0*r1
+
+ vmovdqa -0x50(%r11),$T4 # s2^4
+ vpmuludq $H2,$T3,$T0 # h2*r2
+ vpmuludq $H1,$T3,$T1 # h1*r2
+ vpaddq $T0,$D4,$D4 # d4 += h2*r2
+ vpaddq $T1,$D3,$D3 # d3 += h1*r2
+ vmovdqa -0x40(%r11),$T2 # r3^4
+ vpmuludq $H0,$T3,$T3 # h0*r2
+ vpmuludq $H4,$T4,$T0 # h4*s2
+ vpaddq $T3,$D2,$D2 # d2 += h0*r2
+ vpaddq $T0,$D1,$D1 # d1 += h4*s2
+ vmovdqa -0x30(%r11),$T3 # s3^4
+ vpmuludq $H3,$T4,$T4 # h3*s2
+ vpmuludq $H1,$T2,$T1 # h1*r3
+ vpaddq $T4,$D0,$D0 # d0 += h3*s2
+
+ vmovdqa -0x10(%r11),$T4 # s4^4
+ vpaddq $T1,$D4,$D4 # d4 += h1*r3
+ vpmuludq $H0,$T2,$T2 # h0*r3
+ vpmuludq $H4,$T3,$T0 # h4*s3
+ vpaddq $T2,$D3,$D3 # d3 += h0*r3
+ vpaddq $T0,$D2,$D2 # d2 += h4*s3
+ vmovdqu 16*2($inp),$T0 # load input
+ vpmuludq $H3,$T3,$T2 # h3*s3
+ vpmuludq $H2,$T3,$T3 # h2*s3
+ vpaddq $T2,$D1,$D1 # d1 += h3*s3
+ vmovdqu 16*3($inp),$T1 #
+ vpaddq $T3,$D0,$D0 # d0 += h2*s3
+
+ vpmuludq $H2,$T4,$H2 # h2*s4
+ vpmuludq $H3,$T4,$H3 # h3*s4
+ vpsrldq \$6,$T0,$T2 # splat input
+ vpaddq $H2,$D1,$D1 # d1 += h2*s4
+ vpmuludq $H4,$T4,$H4 # h4*s4
+ vpsrldq \$6,$T1,$T3 #
+ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4
+ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4
+ vpmuludq -0x20(%r11),$H0,$H4 # h0*r4
+ vpmuludq $H1,$T4,$H0
+ vpunpckhqdq $T1,$T0,$T4 # 4
+ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4
+ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
+
+ vpunpcklqdq $T1,$T0,$T0 # 0:1
+ vpunpcklqdq $T3,$T2,$T3 # 2:3
+
+ #vpsrlq \$40,$T4,$T4 # 4
+ vpsrldq \$`40/8`,$T4,$T4 # 4
+ vpsrlq \$26,$T0,$T1
+ vmovdqa 0x00(%rsp),$D4 # preload r0^2
+ vpand $MASK,$T0,$T0 # 0
+ vpsrlq \$4,$T3,$T2
+ vpand $MASK,$T1,$T1 # 1
+ vpand 0(%rcx),$T4,$T4 # .Lmask24
+ vpsrlq \$30,$T3,$T3
+ vpand $MASK,$T2,$T2 # 2
+ vpand $MASK,$T3,$T3 # 3
+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
+
+ ################################################################
+ # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ # and P. Schwabe
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $D0,$D1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H4,$D0
+ vpand $MASK,$H4,$H4
+
+ vpsrlq \$26,$H1,$D1
+ vpand $MASK,$H1,$H1
+ vpaddq $D1,$H2,$H2 # h1 -> h2
+
+ vpaddq $D0,$H0,$H0
+ vpsllq \$2,$D0,$D0
+ vpaddq $D0,$H0,$H0 # h4 -> h0
+
+ vpsrlq \$26,$H2,$D2
+ vpand $MASK,$H2,$H2
+ vpaddq $D2,$H3,$H3 # h2 -> h3
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ ja .Loop_avx
+
+.Lskip_loop_avx:
+ ################################################################
+ # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2
+ add \$32,$len
+ jnz .Long_tail_avx
+
+ vpaddq $H2,$T2,$T2
+ vpaddq $H0,$T0,$T0
+ vpaddq $H1,$T1,$T1
+ vpaddq $H3,$T3,$T3
+ vpaddq $H4,$T4,$T4
+
+.Long_tail_avx:
+ vmovdqa $H2,0x20(%r11)
+ vmovdqa $H0,0x00(%r11)
+ vmovdqa $H1,0x10(%r11)
+ vmovdqa $H3,0x30(%r11)
+ vmovdqa $H4,0x40(%r11)
+
+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ vpmuludq $T2,$D4,$D2 # d2 = h2*r0
+ vpmuludq $T0,$D4,$D0 # d0 = h0*r0
+ vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n
+ vpmuludq $T1,$D4,$D1 # d1 = h1*r0
+ vpmuludq $T3,$D4,$D3 # d3 = h3*r0
+ vpmuludq $T4,$D4,$D4 # d4 = h4*r0
+
+ vpmuludq $T3,$H2,$H0 # h3*r1
+ vpaddq $H0,$D4,$D4 # d4 += h3*r1
+ vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n
+ vpmuludq $T2,$H2,$H1 # h2*r1
+ vpaddq $H1,$D3,$D3 # d3 += h2*r1
+ vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n
+ vpmuludq $T1,$H2,$H0 # h1*r1
+ vpaddq $H0,$D2,$D2 # d2 += h1*r1
+ vpmuludq $T0,$H2,$H2 # h0*r1
+ vpaddq $H2,$D1,$D1 # d1 += h0*r1
+ vpmuludq $T4,$H3,$H3 # h4*s1
+ vpaddq $H3,$D0,$D0 # d0 += h4*s1
+
+ vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n
+ vpmuludq $T2,$H4,$H1 # h2*r2
+ vpaddq $H1,$D4,$D4 # d4 += h2*r2
+ vpmuludq $T1,$H4,$H0 # h1*r2
+ vpaddq $H0,$D3,$D3 # d3 += h1*r2
+ vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n
+ vpmuludq $T0,$H4,$H4 # h0*r2
+ vpaddq $H4,$D2,$D2 # d2 += h0*r2
+ vpmuludq $T4,$H2,$H1 # h4*s2
+ vpaddq $H1,$D1,$D1 # d1 += h4*s2
+ vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n
+ vpmuludq $T3,$H2,$H2 # h3*s2
+ vpaddq $H2,$D0,$D0 # d0 += h3*s2
+
+ vpmuludq $T1,$H3,$H0 # h1*r3
+ vpaddq $H0,$D4,$D4 # d4 += h1*r3
+ vpmuludq $T0,$H3,$H3 # h0*r3
+ vpaddq $H3,$D3,$D3 # d3 += h0*r3
+ vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n
+ vpmuludq $T4,$H4,$H1 # h4*s3
+ vpaddq $H1,$D2,$D2 # d2 += h4*s3
+ vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n
+ vpmuludq $T3,$H4,$H0 # h3*s3
+ vpaddq $H0,$D1,$D1 # d1 += h3*s3
+ vpmuludq $T2,$H4,$H4 # h2*s3
+ vpaddq $H4,$D0,$D0 # d0 += h2*s3
+
+ vpmuludq $T0,$H2,$H2 # h0*r4
+ vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4
+ vpmuludq $T4,$H3,$H1 # h4*s4
+ vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4
+ vpmuludq $T3,$H3,$H0 # h3*s4
+ vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4
+ vpmuludq $T2,$H3,$H1 # h2*s4
+ vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4
+ vpmuludq $T1,$H3,$H3 # h1*s4
+ vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4
+
+ jz .Lshort_tail_avx
+
+ vmovdqu 16*0($inp),$H0 # load input
+ vmovdqu 16*1($inp),$H1
+
+ vpsrldq \$6,$H0,$H2 # splat input
+ vpsrldq \$6,$H1,$H3
+ vpunpckhqdq $H1,$H0,$H4 # 4
+ vpunpcklqdq $H1,$H0,$H0 # 0:1
+ vpunpcklqdq $H3,$H2,$H3 # 2:3
+
+ vpsrlq \$40,$H4,$H4 # 4
+ vpsrlq \$26,$H0,$H1
+ vpand $MASK,$H0,$H0 # 0
+ vpsrlq \$4,$H3,$H2
+ vpand $MASK,$H1,$H1 # 1
+ vpsrlq \$30,$H3,$H3
+ vpand $MASK,$H2,$H2 # 2
+ vpand $MASK,$H3,$H3 # 3
+ vpor 32(%rcx),$H4,$H4 # padbit, yes, always
+
+ vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4
+ vpaddq 0x00(%r11),$H0,$H0
+ vpaddq 0x10(%r11),$H1,$H1
+ vpaddq 0x20(%r11),$H2,$H2
+ vpaddq 0x30(%r11),$H3,$H3
+ vpaddq 0x40(%r11),$H4,$H4
+
+ ################################################################
+ # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate
+
+ vpmuludq $H0,$T4,$T0 # h0*r0
+ vpaddq $T0,$D0,$D0 # d0 += h0*r0
+ vpmuludq $H1,$T4,$T1 # h1*r0
+ vpaddq $T1,$D1,$D1 # d1 += h1*r0
+ vpmuludq $H2,$T4,$T0 # h2*r0
+ vpaddq $T0,$D2,$D2 # d2 += h2*r0
+ vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n
+ vpmuludq $H3,$T4,$T1 # h3*r0
+ vpaddq $T1,$D3,$D3 # d3 += h3*r0
+ vpmuludq $H4,$T4,$T4 # h4*r0
+ vpaddq $T4,$D4,$D4 # d4 += h4*r0
+
+ vpmuludq $H3,$T2,$T0 # h3*r1
+ vpaddq $T0,$D4,$D4 # d4 += h3*r1
+ vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1
+ vpmuludq $H2,$T2,$T1 # h2*r1
+ vpaddq $T1,$D3,$D3 # d3 += h2*r1
+ vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2
+ vpmuludq $H1,$T2,$T0 # h1*r1
+ vpaddq $T0,$D2,$D2 # d2 += h1*r1
+ vpmuludq $H0,$T2,$T2 # h0*r1
+ vpaddq $T2,$D1,$D1 # d1 += h0*r1
+ vpmuludq $H4,$T3,$T3 # h4*s1
+ vpaddq $T3,$D0,$D0 # d0 += h4*s1
+
+ vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2
+ vpmuludq $H2,$T4,$T1 # h2*r2
+ vpaddq $T1,$D4,$D4 # d4 += h2*r2
+ vpmuludq $H1,$T4,$T0 # h1*r2
+ vpaddq $T0,$D3,$D3 # d3 += h1*r2
+ vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3
+ vpmuludq $H0,$T4,$T4 # h0*r2
+ vpaddq $T4,$D2,$D2 # d2 += h0*r2
+ vpmuludq $H4,$T2,$T1 # h4*s2
+ vpaddq $T1,$D1,$D1 # d1 += h4*s2
+ vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3
+ vpmuludq $H3,$T2,$T2 # h3*s2
+ vpaddq $T2,$D0,$D0 # d0 += h3*s2
+
+ vpmuludq $H1,$T3,$T0 # h1*r3
+ vpaddq $T0,$D4,$D4 # d4 += h1*r3
+ vpmuludq $H0,$T3,$T3 # h0*r3
+ vpaddq $T3,$D3,$D3 # d3 += h0*r3
+ vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4
+ vpmuludq $H4,$T4,$T1 # h4*s3
+ vpaddq $T1,$D2,$D2 # d2 += h4*s3
+ vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4
+ vpmuludq $H3,$T4,$T0 # h3*s3
+ vpaddq $T0,$D1,$D1 # d1 += h3*s3
+ vpmuludq $H2,$T4,$T4 # h2*s3
+ vpaddq $T4,$D0,$D0 # d0 += h2*s3
+
+ vpmuludq $H0,$T2,$T2 # h0*r4
+ vpaddq $T2,$D4,$D4 # d4 += h0*r4
+ vpmuludq $H4,$T3,$T1 # h4*s4
+ vpaddq $T1,$D3,$D3 # d3 += h4*s4
+ vpmuludq $H3,$T3,$T0 # h3*s4
+ vpaddq $T0,$D2,$D2 # d2 += h3*s4
+ vpmuludq $H2,$T3,$T1 # h2*s4
+ vpaddq $T1,$D1,$D1 # d1 += h2*s4
+ vpmuludq $H1,$T3,$T3 # h1*s4
+ vpaddq $T3,$D0,$D0 # d0 += h1*s4
+
+.Lshort_tail_avx:
+ ################################################################
+ # horizontal addition
+
+ vpsrldq \$8,$D4,$T4
+ vpsrldq \$8,$D3,$T3
+ vpsrldq \$8,$D1,$T1
+ vpsrldq \$8,$D0,$T0
+ vpsrldq \$8,$D2,$T2
+ vpaddq $T3,$D3,$D3
+ vpaddq $T4,$D4,$D4
+ vpaddq $T0,$D0,$D0
+ vpaddq $T1,$D1,$D1
+ vpaddq $T2,$D2,$D2
+
+ ################################################################
+ # lazy reduction
+
+ vpsrlq \$26,$D3,$H3
+ vpand $MASK,$D3,$D3
+ vpaddq $H3,$D4,$D4 # h3 -> h4
+
+ vpsrlq \$26,$D0,$H0
+ vpand $MASK,$D0,$D0
+ vpaddq $H0,$D1,$D1 # h0 -> h1
+
+ vpsrlq \$26,$D4,$H4
+ vpand $MASK,$D4,$D4
+
+ vpsrlq \$26,$D1,$H1
+ vpand $MASK,$D1,$D1
+ vpaddq $H1,$D2,$D2 # h1 -> h2
+
+ vpaddq $H4,$D0,$D0
+ vpsllq \$2,$H4,$H4
+ vpaddq $H4,$D0,$D0 # h4 -> h0
+
+ vpsrlq \$26,$D2,$H2
+ vpand $MASK,$D2,$D2
+ vpaddq $H2,$D3,$D3 # h2 -> h3
+
+ vpsrlq \$26,$D0,$H0
+ vpand $MASK,$D0,$D0
+ vpaddq $H0,$D1,$D1 # h0 -> h1
+
+ vpsrlq \$26,$D3,$H3
+ vpand $MASK,$D3,$D3
+ vpaddq $H3,$D4,$D4 # h3 -> h4
+
+ vmovd $D0,`4*0-48-64`($ctx) # save partially reduced
+ vmovd $D1,`4*1-48-64`($ctx)
+ vmovd $D2,`4*2-48-64`($ctx)
+ vmovd $D3,`4*3-48-64`($ctx)
+ vmovd $D4,`4*4-48-64`($ctx)
+___
+$code.=<<___ if ($win64);
+ vmovdqa 0x50(%r11),%xmm6
+ vmovdqa 0x60(%r11),%xmm7
+ vmovdqa 0x70(%r11),%xmm8
+ vmovdqa 0x80(%r11),%xmm9
+ vmovdqa 0x90(%r11),%xmm10
+ vmovdqa 0xa0(%r11),%xmm11
+ vmovdqa 0xb0(%r11),%xmm12
+ vmovdqa 0xc0(%r11),%xmm13
+ vmovdqa 0xd0(%r11),%xmm14
+ vmovdqa 0xe0(%r11),%xmm15
+ lea 0xf8(%r11),%rsp
+.Ldo_avx_epilogue:
+___
+$code.=<<___ if (!$win64);
+ lea 0x58(%r11),%rsp
+.cfi_def_cfa %rsp,8
+___
+$code.=<<___;
+ vzeroupper
+ ret
+.cfi_endproc
+.size poly1305_blocks_avx,.-poly1305_blocks_avx
+
+.type poly1305_emit_avx,\@function,3
+.align 32
+poly1305_emit_avx:
+ cmpl \$0,20($ctx) # is_base2_26?
+ je .Lemit
+
+ mov 0($ctx),%eax # load hash value base 2^26
+ mov 4($ctx),%ecx
+ mov 8($ctx),%r8d
+ mov 12($ctx),%r11d
+ mov 16($ctx),%r10d
+
+ shl \$26,%rcx # base 2^26 -> base 2^64
+ mov %r8,%r9
+ shl \$52,%r8
+ add %rcx,%rax
+ shr \$12,%r9
+ add %rax,%r8 # h0
+ adc \$0,%r9
+
+ shl \$14,%r11
+ mov %r10,%rax
+ shr \$24,%r10
+ add %r11,%r9
+ shl \$40,%rax
+ add %rax,%r9 # h1
+ adc \$0,%r10 # h2
+
+ mov %r10,%rax # could be partially reduced, so reduce
+ mov %r10,%rcx
+ and \$3,%r10
+ shr \$2,%rax
+ and \$-4,%rcx
+ add %rcx,%rax
+ add %rax,%r8
+ adc \$0,%r9
+ adc \$0,%r10
+
+ mov %r8,%rax
+ add \$5,%r8 # compare to modulus
+ mov %r9,%rcx
+ adc \$0,%r9
+ adc \$0,%r10
+ shr \$2,%r10 # did 130-bit value overflow?
+ cmovnz %r8,%rax
+ cmovnz %r9,%rcx
+
+ add 0($nonce),%rax # accumulate nonce
+ adc 8($nonce),%rcx
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+ ret
+.size poly1305_emit_avx,.-poly1305_emit_avx
+___
+
+if ($avx>1) {
+my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
+ map("%ymm$_",(0..15));
+my $S4=$MASK;
+
+$code.=<<___;
+.type poly1305_blocks_avx2,\@function,4
+.align 32
+poly1305_blocks_avx2:
+.cfi_startproc
+ mov 20($ctx),%r8d # is_base2_26
+ cmp \$128,$len
+ jae .Lblocks_avx2
+ test %r8d,%r8d
+ jz .Lblocks
+
+.Lblocks_avx2:
+ and \$-16,$len
+ jz .Lno_data_avx2
+
+ vzeroupper
+
+ test %r8d,%r8d
+ jz .Lbase2_64_avx2
+
+ test \$63,$len
+ jz .Leven_avx2
+
+ push %rbx
+.cfi_push %rbx
+ push %rbp
+.cfi_push %rbp
+ push %r12
+.cfi_push %r12
+ push %r13
+.cfi_push %r13
+ push %r14
+.cfi_push %r14
+ push %r15
+.cfi_push %r15
+.Lblocks_avx2_body:
+
+ mov $len,%r15 # reassign $len
+
+ mov 0($ctx),$d1 # load hash value
+ mov 8($ctx),$d2
+ mov 16($ctx),$h2#d
+
+ mov 24($ctx),$r0 # load r
+ mov 32($ctx),$s1
+
+ ################################# base 2^26 -> base 2^64
+ mov $d1#d,$h0#d
+ and \$`-1*(1<<31)`,$d1
+ mov $d2,$r1 # borrow $r1
+ mov $d2#d,$h1#d
+ and \$`-1*(1<<31)`,$d2
+
+ shr \$6,$d1
+ shl \$52,$r1
+ add $d1,$h0
+ shr \$12,$h1
+ shr \$18,$d2
+ add $r1,$h0
+ adc $d2,$h1
+
+ mov $h2,$d1
+ shl \$40,$d1
+ shr \$24,$h2
+ add $d1,$h1
+ adc \$0,$h2 # can be partially reduced...
+
+ mov \$-4,$d2 # ... so reduce
+ mov $h2,$d1
+ and $h2,$d2
+ shr \$2,$d1
+ and \$3,$h2
+ add $d2,$d1 # =*5
+ add $d1,$h0
+ adc \$0,$h1
+ adc \$0,$h2
+
+ mov $s1,$r1
+ mov $s1,%rax
+ shr \$2,$s1
+ add $r1,$s1 # s1 = r1 + (r1 >> 2)
+
+.Lbase2_26_pre_avx2:
+ add 0($inp),$h0 # accumulate input
+ adc 8($inp),$h1
+ lea 16($inp),$inp
+ adc $padbit,$h2
+ sub \$16,%r15
+
+ call __poly1305_block
+ mov $r1,%rax
+
+ test \$63,%r15
+ jnz .Lbase2_26_pre_avx2
+
+ test $padbit,$padbit # if $padbit is zero,
+ jz .Lstore_base2_64_avx2 # store hash in base 2^64 format
+
+ ################################# base 2^64 -> base 2^26
+ mov $h0,%rax
+ mov $h0,%rdx
+ shr \$52,$h0
+ mov $h1,$r0
+ mov $h1,$r1
+ shr \$26,%rdx
+ and \$0x3ffffff,%rax # h[0]
+ shl \$12,$r0
+ and \$0x3ffffff,%rdx # h[1]
+ shr \$14,$h1
+ or $r0,$h0
+ shl \$24,$h2
+ and \$0x3ffffff,$h0 # h[2]
+ shr \$40,$r1
+ and \$0x3ffffff,$h1 # h[3]
+ or $r1,$h2 # h[4]
+
+ test %r15,%r15
+ jz .Lstore_base2_26_avx2
+
+ vmovd %rax#d,%x#$H0
+ vmovd %rdx#d,%x#$H1
+ vmovd $h0#d,%x#$H2
+ vmovd $h1#d,%x#$H3
+ vmovd $h2#d,%x#$H4
+ jmp .Lproceed_avx2
+
+.align 32
+.Lstore_base2_64_avx2:
+ mov $h0,0($ctx)
+ mov $h1,8($ctx)
+ mov $h2,16($ctx) # note that is_base2_26 is zeroed
+ jmp .Ldone_avx2
+
+.align 16
+.Lstore_base2_26_avx2:
+ mov %rax#d,0($ctx) # store hash value base 2^26
+ mov %rdx#d,4($ctx)
+ mov $h0#d,8($ctx)
+ mov $h1#d,12($ctx)
+ mov $h2#d,16($ctx)
+.align 16
+.Ldone_avx2:
+ mov 0(%rsp),%r15
+.cfi_restore %r15
+ mov 8(%rsp),%r14
+.cfi_restore %r14
+ mov 16(%rsp),%r13
+.cfi_restore %r13
+ mov 24(%rsp),%r12
+.cfi_restore %r12
+ mov 32(%rsp),%rbp
+.cfi_restore %rbp
+ mov 40(%rsp),%rbx
+.cfi_restore %rbx
+ lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lno_data_avx2:
+.Lblocks_avx2_epilogue:
+ ret
+.cfi_endproc
+
+.align 32
+.Lbase2_64_avx2:
+.cfi_startproc
+ push %rbx
+.cfi_push %rbx
+ push %rbp
+.cfi_push %rbp
+ push %r12
+.cfi_push %r12
+ push %r13
+.cfi_push %r13
+ push %r14
+.cfi_push %r14
+ push %r15
+.cfi_push %r15
+.Lbase2_64_avx2_body:
+
+ mov $len,%r15 # reassign $len
+
+ mov 24($ctx),$r0 # load r
+ mov 32($ctx),$s1
+
+ mov 0($ctx),$h0 # load hash value
+ mov 8($ctx),$h1
+ mov 16($ctx),$h2#d
+
+ mov $s1,$r1
+ mov $s1,%rax
+ shr \$2,$s1
+ add $r1,$s1 # s1 = r1 + (r1 >> 2)
+
+ test \$63,$len
+ jz .Linit_avx2
+
+.Lbase2_64_pre_avx2:
+ add 0($inp),$h0 # accumulate input
+ adc 8($inp),$h1
+ lea 16($inp),$inp
+ adc $padbit,$h2
+ sub \$16,%r15
+
+ call __poly1305_block
+ mov $r1,%rax
+
+ test \$63,%r15
+ jnz .Lbase2_64_pre_avx2
+
+.Linit_avx2:
+ ################################# base 2^64 -> base 2^26
+ mov $h0,%rax
+ mov $h0,%rdx
+ shr \$52,$h0
+ mov $h1,$d1
+ mov $h1,$d2
+ shr \$26,%rdx
+ and \$0x3ffffff,%rax # h[0]
+ shl \$12,$d1
+ and \$0x3ffffff,%rdx # h[1]
+ shr \$14,$h1
+ or $d1,$h0
+ shl \$24,$h2
+ and \$0x3ffffff,$h0 # h[2]
+ shr \$40,$d2
+ and \$0x3ffffff,$h1 # h[3]
+ or $d2,$h2 # h[4]
+
+ vmovd %rax#d,%x#$H0
+ vmovd %rdx#d,%x#$H1
+ vmovd $h0#d,%x#$H2
+ vmovd $h1#d,%x#$H3
+ vmovd $h2#d,%x#$H4
+ movl \$1,20($ctx) # set is_base2_26
+
+ call __poly1305_init_avx
+
+.Lproceed_avx2:
+ mov %r15,$len # restore $len
+ mov OPENSSL_ia32cap_P+8(%rip),%r10d
+ mov \$`(1<<31|1<<30|1<<16)`,%r11d
+
+ mov 0(%rsp),%r15
+.cfi_restore %r15
+ mov 8(%rsp),%r14
+.cfi_restore %r14
+ mov 16(%rsp),%r13
+.cfi_restore %r13
+ mov 24(%rsp),%r12
+.cfi_restore %r12
+ mov 32(%rsp),%rbp
+.cfi_restore %rbp
+ mov 40(%rsp),%rbx
+.cfi_restore %rbx
+ lea 48(%rsp),%rax
+ lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lbase2_64_avx2_epilogue:
+ jmp .Ldo_avx2
+.cfi_endproc
+
+.align 32
+.Leven_avx2:
+.cfi_startproc
+ mov OPENSSL_ia32cap_P+8(%rip),%r10d
+ vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26
+ vmovd 4*1($ctx),%x#$H1
+ vmovd 4*2($ctx),%x#$H2
+ vmovd 4*3($ctx),%x#$H3
+ vmovd 4*4($ctx),%x#$H4
+
+.Ldo_avx2:
+___
+$code.=<<___ if ($avx>2);
+ cmp \$512,$len
+ jb .Lskip_avx512
+ and %r11d,%r10d
+ test \$`1<<16`,%r10d # check for AVX512F
+ jnz .Lblocks_avx512
+.Lskip_avx512:
+___
+$code.=<<___ if (!$win64);
+ lea -8(%rsp),%r11
+.cfi_def_cfa %r11,16
+ sub \$0x128,%rsp
+___
+$code.=<<___ if ($win64);
+ lea -0xf8(%rsp),%r11
+ sub \$0x1c8,%rsp
+ vmovdqa %xmm6,0x50(%r11)
+ vmovdqa %xmm7,0x60(%r11)
+ vmovdqa %xmm8,0x70(%r11)
+ vmovdqa %xmm9,0x80(%r11)
+ vmovdqa %xmm10,0x90(%r11)
+ vmovdqa %xmm11,0xa0(%r11)
+ vmovdqa %xmm12,0xb0(%r11)
+ vmovdqa %xmm13,0xc0(%r11)
+ vmovdqa %xmm14,0xd0(%r11)
+ vmovdqa %xmm15,0xe0(%r11)
+.Ldo_avx2_body:
+___
+$code.=<<___;
+ lea .Lconst(%rip),%rcx
+ lea 48+64($ctx),$ctx # size optimization
+ vmovdqa 96(%rcx),$T0 # .Lpermd_avx2
+
+ # expand and copy pre-calculated table to stack
+ vmovdqu `16*0-64`($ctx),%x#$T2
+ and \$-512,%rsp
+ vmovdqu `16*1-64`($ctx),%x#$T3
+ vmovdqu `16*2-64`($ctx),%x#$T4
+ vmovdqu `16*3-64`($ctx),%x#$D0
+ vmovdqu `16*4-64`($ctx),%x#$D1
+ vmovdqu `16*5-64`($ctx),%x#$D2
+ lea 0x90(%rsp),%rax # size optimization
+ vmovdqu `16*6-64`($ctx),%x#$D3
+ vpermd $T2,$T0,$T2 # 00003412 -> 14243444
+ vmovdqu `16*7-64`($ctx),%x#$D4
+ vpermd $T3,$T0,$T3
+ vmovdqu `16*8-64`($ctx),%x#$MASK
+ vpermd $T4,$T0,$T4
+ vmovdqa $T2,0x00(%rsp)
+ vpermd $D0,$T0,$D0
+ vmovdqa $T3,0x20-0x90(%rax)
+ vpermd $D1,$T0,$D1
+ vmovdqa $T4,0x40-0x90(%rax)
+ vpermd $D2,$T0,$D2
+ vmovdqa $D0,0x60-0x90(%rax)
+ vpermd $D3,$T0,$D3
+ vmovdqa $D1,0x80-0x90(%rax)
+ vpermd $D4,$T0,$D4
+ vmovdqa $D2,0xa0-0x90(%rax)
+ vpermd $MASK,$T0,$MASK
+ vmovdqa $D3,0xc0-0x90(%rax)
+ vmovdqa $D4,0xe0-0x90(%rax)
+ vmovdqa $MASK,0x100-0x90(%rax)
+ vmovdqa 64(%rcx),$MASK # .Lmask26
+
+ ################################################################
+ # load input
+ vmovdqu 16*0($inp),%x#$T0
+ vmovdqu 16*1($inp),%x#$T1
+ vinserti128 \$1,16*2($inp),$T0,$T0
+ vinserti128 \$1,16*3($inp),$T1,$T1
+ lea 16*4($inp),$inp
+
+ vpsrldq \$6,$T0,$T2 # splat input
+ vpsrldq \$6,$T1,$T3
+ vpunpckhqdq $T1,$T0,$T4 # 4
+ vpunpcklqdq $T3,$T2,$T2 # 2:3
+ vpunpcklqdq $T1,$T0,$T0 # 0:1
+
+ vpsrlq \$30,$T2,$T3
+ vpsrlq \$4,$T2,$T2
+ vpsrlq \$26,$T0,$T1
+ vpsrlq \$40,$T4,$T4 # 4
+ vpand $MASK,$T2,$T2 # 2
+ vpand $MASK,$T0,$T0 # 0
+ vpand $MASK,$T1,$T1 # 1
+ vpand $MASK,$T3,$T3 # 3
+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
+
+ vpaddq $H2,$T2,$H2 # accumulate input
+ sub \$64,$len
+ jz .Ltail_avx2
+ jmp .Loop_avx2
+
+.align 32
+.Loop_avx2:
+ ################################################################
+ # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4
+ # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3
+ # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2
+ # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1
+ # \________/\__________/
+ ################################################################
+ #vpaddq $H2,$T2,$H2 # accumulate input
+ vpaddq $H0,$T0,$H0
+ vmovdqa `32*0`(%rsp),$T0 # r0^4
+ vpaddq $H1,$T1,$H1
+ vmovdqa `32*1`(%rsp),$T1 # r1^4
+ vpaddq $H3,$T3,$H3
+ vmovdqa `32*3`(%rsp),$T2 # r2^4
+ vpaddq $H4,$T4,$H4
+ vmovdqa `32*6-0x90`(%rax),$T3 # s3^4
+ vmovdqa `32*8-0x90`(%rax),$S4 # s4^4
+
+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ #
+ # however, as h2 is "chronologically" first one available pull
+ # corresponding operations up, so it's
+ #
+ # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4
+ # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4
+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3
+ # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4
+
+ vpmuludq $H2,$T0,$D2 # d2 = h2*r0
+ vpmuludq $H2,$T1,$D3 # d3 = h2*r1
+ vpmuludq $H2,$T2,$D4 # d4 = h2*r2
+ vpmuludq $H2,$T3,$D0 # d0 = h2*s3
+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
+
+ vpmuludq $H0,$T1,$T4 # h0*r1
+ vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp
+ vpaddq $T4,$D1,$D1 # d1 += h0*r1
+ vpaddq $H2,$D2,$D2 # d2 += h1*r1
+ vpmuludq $H3,$T1,$T4 # h3*r1
+ vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1
+ vpaddq $T4,$D4,$D4 # d4 += h3*r1
+ vpaddq $H2,$D0,$D0 # d0 += h4*s1
+ vmovdqa `32*4-0x90`(%rax),$T1 # s2
+
+ vpmuludq $H0,$T0,$T4 # h0*r0
+ vpmuludq $H1,$T0,$H2 # h1*r0
+ vpaddq $T4,$D0,$D0 # d0 += h0*r0
+ vpaddq $H2,$D1,$D1 # d1 += h1*r0
+ vpmuludq $H3,$T0,$T4 # h3*r0
+ vpmuludq $H4,$T0,$H2 # h4*r0
+ vmovdqu 16*0($inp),%x#$T0 # load input
+ vpaddq $T4,$D3,$D3 # d3 += h3*r0
+ vpaddq $H2,$D4,$D4 # d4 += h4*r0
+ vinserti128 \$1,16*2($inp),$T0,$T0
+
+ vpmuludq $H3,$T1,$T4 # h3*s2
+ vpmuludq $H4,$T1,$H2 # h4*s2
+ vmovdqu 16*1($inp),%x#$T1
+ vpaddq $T4,$D0,$D0 # d0 += h3*s2
+ vpaddq $H2,$D1,$D1 # d1 += h4*s2
+ vmovdqa `32*5-0x90`(%rax),$H2 # r3
+ vpmuludq $H1,$T2,$T4 # h1*r2
+ vpmuludq $H0,$T2,$T2 # h0*r2
+ vpaddq $T4,$D3,$D3 # d3 += h1*r2
+ vpaddq $T2,$D2,$D2 # d2 += h0*r2
+ vinserti128 \$1,16*3($inp),$T1,$T1
+ lea 16*4($inp),$inp
+
+ vpmuludq $H1,$H2,$T4 # h1*r3
+ vpmuludq $H0,$H2,$H2 # h0*r3
+ vpsrldq \$6,$T0,$T2 # splat input
+ vpaddq $T4,$D4,$D4 # d4 += h1*r3
+ vpaddq $H2,$D3,$D3 # d3 += h0*r3
+ vpmuludq $H3,$T3,$T4 # h3*s3
+ vpmuludq $H4,$T3,$H2 # h4*s3
+ vpsrldq \$6,$T1,$T3
+ vpaddq $T4,$D1,$D1 # d1 += h3*s3
+ vpaddq $H2,$D2,$D2 # d2 += h4*s3
+ vpunpckhqdq $T1,$T0,$T4 # 4
+
+ vpmuludq $H3,$S4,$H3 # h3*s4
+ vpmuludq $H4,$S4,$H4 # h4*s4
+ vpunpcklqdq $T1,$T0,$T0 # 0:1
+ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4
+ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4
+ vpunpcklqdq $T3,$T2,$T3 # 2:3
+ vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4
+ vpmuludq $H1,$S4,$H0 # h1*s4
+ vmovdqa 64(%rcx),$MASK # .Lmask26
+ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4
+ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
+
+ ################################################################
+ # lazy reduction (interleaved with tail of input splat)
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $D0,$D1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H4,$D4
+ vpand $MASK,$H4,$H4
+
+ vpsrlq \$4,$T3,$T2
+
+ vpsrlq \$26,$H1,$D1
+ vpand $MASK,$H1,$H1
+ vpaddq $D1,$H2,$H2 # h1 -> h2
+
+ vpaddq $D4,$H0,$H0
+ vpsllq \$2,$D4,$D4
+ vpaddq $D4,$H0,$H0 # h4 -> h0
+
+ vpand $MASK,$T2,$T2 # 2
+ vpsrlq \$26,$T0,$T1
+
+ vpsrlq \$26,$H2,$D2
+ vpand $MASK,$H2,$H2
+ vpaddq $D2,$H3,$H3 # h2 -> h3
+
+ vpaddq $T2,$H2,$H2 # modulo-scheduled
+ vpsrlq \$30,$T3,$T3
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpsrlq \$40,$T4,$T4 # 4
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vpand $MASK,$T0,$T0 # 0
+ vpand $MASK,$T1,$T1 # 1
+ vpand $MASK,$T3,$T3 # 3
+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
+
+ sub \$64,$len
+ jnz .Loop_avx2
+
+ .byte 0x66,0x90
+.Ltail_avx2:
+ ################################################################
+ # while above multiplications were by r^4 in all lanes, in last
+ # iteration we multiply least significant lane by r^4 and most
+ # significant one by r, so copy of above except that references
+ # to the precomputed table are displaced by 4...
+
+ #vpaddq $H2,$T2,$H2 # accumulate input
+ vpaddq $H0,$T0,$H0
+ vmovdqu `32*0+4`(%rsp),$T0 # r0^4
+ vpaddq $H1,$T1,$H1
+ vmovdqu `32*1+4`(%rsp),$T1 # r1^4
+ vpaddq $H3,$T3,$H3
+ vmovdqu `32*3+4`(%rsp),$T2 # r2^4
+ vpaddq $H4,$T4,$H4
+ vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4
+ vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4
+
+ vpmuludq $H2,$T0,$D2 # d2 = h2*r0
+ vpmuludq $H2,$T1,$D3 # d3 = h2*r1
+ vpmuludq $H2,$T2,$D4 # d4 = h2*r2
+ vpmuludq $H2,$T3,$D0 # d0 = h2*s3
+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
+
+ vpmuludq $H0,$T1,$T4 # h0*r1
+ vpmuludq $H1,$T1,$H2 # h1*r1
+ vpaddq $T4,$D1,$D1 # d1 += h0*r1
+ vpaddq $H2,$D2,$D2 # d2 += h1*r1
+ vpmuludq $H3,$T1,$T4 # h3*r1
+ vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1
+ vpaddq $T4,$D4,$D4 # d4 += h3*r1
+ vpaddq $H2,$D0,$D0 # d0 += h4*s1
+
+ vpmuludq $H0,$T0,$T4 # h0*r0
+ vpmuludq $H1,$T0,$H2 # h1*r0
+ vpaddq $T4,$D0,$D0 # d0 += h0*r0
+ vmovdqu `32*4+4-0x90`(%rax),$T1 # s2
+ vpaddq $H2,$D1,$D1 # d1 += h1*r0
+ vpmuludq $H3,$T0,$T4 # h3*r0
+ vpmuludq $H4,$T0,$H2 # h4*r0
+ vpaddq $T4,$D3,$D3 # d3 += h3*r0
+ vpaddq $H2,$D4,$D4 # d4 += h4*r0
+
+ vpmuludq $H3,$T1,$T4 # h3*s2
+ vpmuludq $H4,$T1,$H2 # h4*s2
+ vpaddq $T4,$D0,$D0 # d0 += h3*s2
+ vpaddq $H2,$D1,$D1 # d1 += h4*s2
+ vmovdqu `32*5+4-0x90`(%rax),$H2 # r3
+ vpmuludq $H1,$T2,$T4 # h1*r2
+ vpmuludq $H0,$T2,$T2 # h0*r2
+ vpaddq $T4,$D3,$D3 # d3 += h1*r2
+ vpaddq $T2,$D2,$D2 # d2 += h0*r2
+
+ vpmuludq $H1,$H2,$T4 # h1*r3
+ vpmuludq $H0,$H2,$H2 # h0*r3
+ vpaddq $T4,$D4,$D4 # d4 += h1*r3
+ vpaddq $H2,$D3,$D3 # d3 += h0*r3
+ vpmuludq $H3,$T3,$T4 # h3*s3
+ vpmuludq $H4,$T3,$H2 # h4*s3
+ vpaddq $T4,$D1,$D1 # d1 += h3*s3
+ vpaddq $H2,$D2,$D2 # d2 += h4*s3
+
+ vpmuludq $H3,$S4,$H3 # h3*s4
+ vpmuludq $H4,$S4,$H4 # h4*s4
+ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4
+ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4
+ vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4
+ vpmuludq $H1,$S4,$H0 # h1*s4
+ vmovdqa 64(%rcx),$MASK # .Lmask26
+ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4
+ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
+
+ ################################################################
+ # horizontal addition
+
+ vpsrldq \$8,$D1,$T1
+ vpsrldq \$8,$H2,$T2
+ vpsrldq \$8,$H3,$T3
+ vpsrldq \$8,$H4,$T4
+ vpsrldq \$8,$H0,$T0
+ vpaddq $T1,$D1,$D1
+ vpaddq $T2,$H2,$H2
+ vpaddq $T3,$H3,$H3
+ vpaddq $T4,$H4,$H4
+ vpaddq $T0,$H0,$H0
+
+ vpermq \$0x2,$H3,$T3
+ vpermq \$0x2,$H4,$T4
+ vpermq \$0x2,$H0,$T0
+ vpermq \$0x2,$D1,$T1
+ vpermq \$0x2,$H2,$T2
+ vpaddq $T3,$H3,$H3
+ vpaddq $T4,$H4,$H4
+ vpaddq $T0,$H0,$H0
+ vpaddq $T1,$D1,$D1
+ vpaddq $T2,$H2,$H2
+
+ ################################################################
+ # lazy reduction
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $D0,$D1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H4,$D4
+ vpand $MASK,$H4,$H4
+
+ vpsrlq \$26,$H1,$D1
+ vpand $MASK,$H1,$H1
+ vpaddq $D1,$H2,$H2 # h1 -> h2
+
+ vpaddq $D4,$H0,$H0
+ vpsllq \$2,$D4,$D4
+ vpaddq $D4,$H0,$H0 # h4 -> h0
+
+ vpsrlq \$26,$H2,$D2
+ vpand $MASK,$H2,$H2
+ vpaddq $D2,$H3,$H3 # h2 -> h3
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
+ vmovd %x#$H1,`4*1-48-64`($ctx)
+ vmovd %x#$H2,`4*2-48-64`($ctx)
+ vmovd %x#$H3,`4*3-48-64`($ctx)
+ vmovd %x#$H4,`4*4-48-64`($ctx)
+___
+$code.=<<___ if ($win64);
+ vmovdqa 0x50(%r11),%xmm6
+ vmovdqa 0x60(%r11),%xmm7
+ vmovdqa 0x70(%r11),%xmm8
+ vmovdqa 0x80(%r11),%xmm9
+ vmovdqa 0x90(%r11),%xmm10
+ vmovdqa 0xa0(%r11),%xmm11
+ vmovdqa 0xb0(%r11),%xmm12
+ vmovdqa 0xc0(%r11),%xmm13
+ vmovdqa 0xd0(%r11),%xmm14
+ vmovdqa 0xe0(%r11),%xmm15
+ lea 0xf8(%r11),%rsp
+.Ldo_avx2_epilogue:
+___
+$code.=<<___ if (!$win64);
+ lea 8(%r11),%rsp
+.cfi_def_cfa %rsp,8
+___
+$code.=<<___;
+ vzeroupper
+ ret
+.cfi_endproc
+.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
+___
+#######################################################################
+if ($avx>2) {
+# On entry we have input length divisible by 64. But since inner loop
+# processes 128 bytes per iteration, cases when length is not divisible
+# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
+# reason stack layout is kept identical to poly1305_blocks_avx2. If not
+# for this tail, we wouldn't have to even allocate stack frame...
+
+my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24));
+my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29));
+my $PADBIT="%zmm30";
+
+map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
+map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4));
+map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
+map(s/%y/%z/,($MASK));
+
+$code.=<<___;
+.type poly1305_blocks_avx512,\@function,4
+.align 32
+poly1305_blocks_avx512:
+.cfi_startproc
+.Lblocks_avx512:
+ mov \$15,%eax
+ kmovw %eax,%k2
+___
+$code.=<<___ if (!$win64);
+ lea -8(%rsp),%r11
+.cfi_def_cfa %r11,16
+ sub \$0x128,%rsp
+___
+$code.=<<___ if ($win64);
+ lea -0xf8(%rsp),%r11
+ sub \$0x1c8,%rsp
+ vmovdqa %xmm6,0x50(%r11)
+ vmovdqa %xmm7,0x60(%r11)
+ vmovdqa %xmm8,0x70(%r11)
+ vmovdqa %xmm9,0x80(%r11)
+ vmovdqa %xmm10,0x90(%r11)
+ vmovdqa %xmm11,0xa0(%r11)
+ vmovdqa %xmm12,0xb0(%r11)
+ vmovdqa %xmm13,0xc0(%r11)
+ vmovdqa %xmm14,0xd0(%r11)
+ vmovdqa %xmm15,0xe0(%r11)
+.Ldo_avx512_body:
+___
+$code.=<<___;
+ lea .Lconst(%rip),%rcx
+ lea 48+64($ctx),$ctx # size optimization
+ vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2
+
+ # expand pre-calculated table
+ vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0}
+ and \$-512,%rsp
+ vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1}
+ mov \$0x20,%rax
+ vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
+ vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2}
+ vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2}
+ vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3}
+ vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3}
+ vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4}
+ vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4}
+ vpermd $D0,$T2,$R0 # 00003412 -> 14243444
+ vpbroadcastq 64(%rcx),$MASK # .Lmask26
+ vpermd $D1,$T2,$R1
+ vpermd $T0,$T2,$S1
+ vpermd $D2,$T2,$R2
+ vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0
+ vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
+ vpermd $T1,$T2,$S2
+ vmovdqu64 $R1,0x00(%rsp,%rax){%k2}
+ vpsrlq \$32,$R1,$T1
+ vpermd $D3,$T2,$R3
+ vmovdqa64 $S1,0x40(%rsp){%k2}
+ vpermd $T3,$T2,$S3
+ vpermd $D4,$T2,$R4
+ vmovdqu64 $R2,0x40(%rsp,%rax){%k2}
+ vpermd $T4,$T2,$S4
+ vmovdqa64 $S2,0x80(%rsp){%k2}
+ vmovdqu64 $R3,0x80(%rsp,%rax){%k2}
+ vmovdqa64 $S3,0xc0(%rsp){%k2}
+ vmovdqu64 $R4,0xc0(%rsp,%rax){%k2}
+ vmovdqa64 $S4,0x100(%rsp){%k2}
+
+ ################################################################
+ # calculate 5th through 8th powers of the key
+ #
+ # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1
+ # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2
+ # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3
+ # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4
+ # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0
+
+ vpmuludq $T0,$R0,$D0 # d0 = r0'*r0
+ vpmuludq $T0,$R1,$D1 # d1 = r0'*r1
+ vpmuludq $T0,$R2,$D2 # d2 = r0'*r2
+ vpmuludq $T0,$R3,$D3 # d3 = r0'*r3
+ vpmuludq $T0,$R4,$D4 # d4 = r0'*r4
+ vpsrlq \$32,$R2,$T2
+
+ vpmuludq $T1,$S4,$M0
+ vpmuludq $T1,$R0,$M1
+ vpmuludq $T1,$R1,$M2
+ vpmuludq $T1,$R2,$M3
+ vpmuludq $T1,$R3,$M4
+ vpsrlq \$32,$R3,$T3
+ vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4
+ vpaddq $M1,$D1,$D1 # d1 += r1'*r0
+ vpaddq $M2,$D2,$D2 # d2 += r1'*r1
+ vpaddq $M3,$D3,$D3 # d3 += r1'*r2
+ vpaddq $M4,$D4,$D4 # d4 += r1'*r3
+
+ vpmuludq $T2,$S3,$M0
+ vpmuludq $T2,$S4,$M1
+ vpmuludq $T2,$R1,$M3
+ vpmuludq $T2,$R2,$M4
+ vpmuludq $T2,$R0,$M2
+ vpsrlq \$32,$R4,$T4
+ vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3
+ vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4
+ vpaddq $M3,$D3,$D3 # d3 += r2'*r1
+ vpaddq $M4,$D4,$D4 # d4 += r2'*r2
+ vpaddq $M2,$D2,$D2 # d2 += r2'*r0
+
+ vpmuludq $T3,$S2,$M0
+ vpmuludq $T3,$R0,$M3
+ vpmuludq $T3,$R1,$M4
+ vpmuludq $T3,$S3,$M1
+ vpmuludq $T3,$S4,$M2
+ vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2
+ vpaddq $M3,$D3,$D3 # d3 += r3'*r0
+ vpaddq $M4,$D4,$D4 # d4 += r3'*r1
+ vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3
+ vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4
+
+ vpmuludq $T4,$S4,$M3
+ vpmuludq $T4,$R0,$M4
+ vpmuludq $T4,$S1,$M0
+ vpmuludq $T4,$S2,$M1
+ vpmuludq $T4,$S3,$M2
+ vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4
+ vpaddq $M4,$D4,$D4 # d4 += r2'*r0
+ vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1
+ vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2
+ vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3
+
+ ################################################################
+ # load input
+ vmovdqu64 16*0($inp),%z#$T3
+ vmovdqu64 16*4($inp),%z#$T4
+ lea 16*8($inp),$inp
+
+ ################################################################
+ # lazy reduction
+
+ vpsrlq \$26,$D3,$M3
+ vpandq $MASK,$D3,$D3
+ vpaddq $M3,$D4,$D4 # d3 -> d4
+
+ vpsrlq \$26,$D0,$M0
+ vpandq $MASK,$D0,$D0
+ vpaddq $M0,$D1,$D1 # d0 -> d1
+
+ vpsrlq \$26,$D4,$M4
+ vpandq $MASK,$D4,$D4
+
+ vpsrlq \$26,$D1,$M1
+ vpandq $MASK,$D1,$D1
+ vpaddq $M1,$D2,$D2 # d1 -> d2
+
+ vpaddq $M4,$D0,$D0
+ vpsllq \$2,$M4,$M4
+ vpaddq $M4,$D0,$D0 # d4 -> d0
+
+ vpsrlq \$26,$D2,$M2
+ vpandq $MASK,$D2,$D2
+ vpaddq $M2,$D3,$D3 # d2 -> d3
+
+ vpsrlq \$26,$D0,$M0
+ vpandq $MASK,$D0,$D0
+ vpaddq $M0,$D1,$D1 # d0 -> d1
+
+ vpsrlq \$26,$D3,$M3
+ vpandq $MASK,$D3,$D3
+ vpaddq $M3,$D4,$D4 # d3 -> d4
+
+ ################################################################
+ # at this point we have 14243444 in $R0-$S4 and 05060708 in
+ # $D0-$D4, ...
+
+ vpunpcklqdq $T4,$T3,$T0 # transpose input
+ vpunpckhqdq $T4,$T3,$T4
+
+ # ... since input 64-bit lanes are ordered as 73625140, we could
+ # "vperm" it to 76543210 (here and in each loop iteration), *or*
+ # we could just flow along, hence the goal for $R0-$S4 is
+ # 1858286838784888 ...
+
+ vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512:
+ mov \$0x7777,%eax
+ kmovw %eax,%k1
+
+ vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4---
+ vpermd $R1,$M0,$R1
+ vpermd $R2,$M0,$R2
+ vpermd $R3,$M0,$R3
+ vpermd $R4,$M0,$R4
+
+ vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888
+ vpermd $D1,$M0,${R1}{%k1}
+ vpermd $D2,$M0,${R2}{%k1}
+ vpermd $D3,$M0,${R3}{%k1}
+ vpermd $D4,$M0,${R4}{%k1}
+
+ vpslld \$2,$R1,$S1 # *5
+ vpslld \$2,$R2,$S2
+ vpslld \$2,$R3,$S3
+ vpslld \$2,$R4,$S4
+ vpaddd $R1,$S1,$S1
+ vpaddd $R2,$S2,$S2
+ vpaddd $R3,$S3,$S3
+ vpaddd $R4,$S4,$S4
+
+ vpbroadcastq 32(%rcx),$PADBIT # .L129
+
+ vpsrlq \$52,$T0,$T2 # splat input
+ vpsllq \$12,$T4,$T3
+ vporq $T3,$T2,$T2
+ vpsrlq \$26,$T0,$T1
+ vpsrlq \$14,$T4,$T3
+ vpsrlq \$40,$T4,$T4 # 4
+ vpandq $MASK,$T2,$T2 # 2
+ vpandq $MASK,$T0,$T0 # 0
+ #vpandq $MASK,$T1,$T1 # 1
+ #vpandq $MASK,$T3,$T3 # 3
+ #vporq $PADBIT,$T4,$T4 # padbit, yes, always
+
+ vpaddq $H2,$T2,$H2 # accumulate input
+ sub \$192,$len
+ jbe .Ltail_avx512
+ jmp .Loop_avx512
+
+.align 32
+.Loop_avx512:
+ ################################################################
+ # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8
+ # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7
+ # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6
+ # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5
+ # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4
+ # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3
+ # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2
+ # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1
+ # \________/\___________/
+ ################################################################
+ #vpaddq $H2,$T2,$H2 # accumulate input
+
+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ #
+ # however, as h2 is "chronologically" first one available pull
+ # corresponding operations up, so it's
+ #
+ # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4
+ # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0
+ # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1
+ # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2
+ # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3
+
+ vpmuludq $H2,$R1,$D3 # d3 = h2*r1
+ vpaddq $H0,$T0,$H0
+ vpmuludq $H2,$R2,$D4 # d4 = h2*r2
+ vpandq $MASK,$T1,$T1 # 1
+ vpmuludq $H2,$S3,$D0 # d0 = h2*s3
+ vpandq $MASK,$T3,$T3 # 3
+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
+ vporq $PADBIT,$T4,$T4 # padbit, yes, always
+ vpmuludq $H2,$R0,$D2 # d2 = h2*r0
+ vpaddq $H1,$T1,$H1 # accumulate input
+ vpaddq $H3,$T3,$H3
+ vpaddq $H4,$T4,$H4
+
+ vmovdqu64 16*0($inp),$T3 # load input
+ vmovdqu64 16*4($inp),$T4
+ lea 16*8($inp),$inp
+ vpmuludq $H0,$R3,$M3
+ vpmuludq $H0,$R4,$M4
+ vpmuludq $H0,$R0,$M0
+ vpmuludq $H0,$R1,$M1
+ vpaddq $M3,$D3,$D3 # d3 += h0*r3
+ vpaddq $M4,$D4,$D4 # d4 += h0*r4
+ vpaddq $M0,$D0,$D0 # d0 += h0*r0
+ vpaddq $M1,$D1,$D1 # d1 += h0*r1
+
+ vpmuludq $H1,$R2,$M3
+ vpmuludq $H1,$R3,$M4
+ vpmuludq $H1,$S4,$M0
+ vpmuludq $H0,$R2,$M2
+ vpaddq $M3,$D3,$D3 # d3 += h1*r2
+ vpaddq $M4,$D4,$D4 # d4 += h1*r3
+ vpaddq $M0,$D0,$D0 # d0 += h1*s4
+ vpaddq $M2,$D2,$D2 # d2 += h0*r2
+
+ vpunpcklqdq $T4,$T3,$T0 # transpose input
+ vpunpckhqdq $T4,$T3,$T4
+
+ vpmuludq $H3,$R0,$M3
+ vpmuludq $H3,$R1,$M4
+ vpmuludq $H1,$R0,$M1
+ vpmuludq $H1,$R1,$M2
+ vpaddq $M3,$D3,$D3 # d3 += h3*r0
+ vpaddq $M4,$D4,$D4 # d4 += h3*r1
+ vpaddq $M1,$D1,$D1 # d1 += h1*r0
+ vpaddq $M2,$D2,$D2 # d2 += h1*r1
+
+ vpmuludq $H4,$S4,$M3
+ vpmuludq $H4,$R0,$M4
+ vpmuludq $H3,$S2,$M0
+ vpmuludq $H3,$S3,$M1
+ vpaddq $M3,$D3,$D3 # d3 += h4*s4
+ vpmuludq $H3,$S4,$M2
+ vpaddq $M4,$D4,$D4 # d4 += h4*r0
+ vpaddq $M0,$D0,$D0 # d0 += h3*s2
+ vpaddq $M1,$D1,$D1 # d1 += h3*s3
+ vpaddq $M2,$D2,$D2 # d2 += h3*s4
+
+ vpmuludq $H4,$S1,$M0
+ vpmuludq $H4,$S2,$M1
+ vpmuludq $H4,$S3,$M2
+ vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1
+ vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2
+ vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3
+
+ ################################################################
+ # lazy reduction (interleaved with input splat)
+
+ vpsrlq \$52,$T0,$T2 # splat input
+ vpsllq \$12,$T4,$T3
+
+ vpsrlq \$26,$D3,$H3
+ vpandq $MASK,$D3,$D3
+ vpaddq $H3,$D4,$H4 # h3 -> h4
+
+ vporq $T3,$T2,$T2
+
+ vpsrlq \$26,$H0,$D0
+ vpandq $MASK,$H0,$H0
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpandq $MASK,$T2,$T2 # 2
+
+ vpsrlq \$26,$H4,$D4
+ vpandq $MASK,$H4,$H4
+
+ vpsrlq \$26,$H1,$D1
+ vpandq $MASK,$H1,$H1
+ vpaddq $D1,$H2,$H2 # h1 -> h2
+
+ vpaddq $D4,$H0,$H0
+ vpsllq \$2,$D4,$D4
+ vpaddq $D4,$H0,$H0 # h4 -> h0
+
+ vpaddq $T2,$H2,$H2 # modulo-scheduled
+ vpsrlq \$26,$T0,$T1
+
+ vpsrlq \$26,$H2,$D2
+ vpandq $MASK,$H2,$H2
+ vpaddq $D2,$D3,$H3 # h2 -> h3
+
+ vpsrlq \$14,$T4,$T3
+
+ vpsrlq \$26,$H0,$D0
+ vpandq $MASK,$H0,$H0
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpsrlq \$40,$T4,$T4 # 4
+
+ vpsrlq \$26,$H3,$D3
+ vpandq $MASK,$H3,$H3
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vpandq $MASK,$T0,$T0 # 0
+ #vpandq $MASK,$T1,$T1 # 1
+ #vpandq $MASK,$T3,$T3 # 3
+ #vporq $PADBIT,$T4,$T4 # padbit, yes, always
+
+ sub \$128,$len
+ ja .Loop_avx512
+
+.Ltail_avx512:
+ ################################################################
+ # while above multiplications were by r^8 in all lanes, in last
+ # iteration we multiply least significant lane by r^8 and most
+ # significant one by r, that's why table gets shifted...
+
+ vpsrlq \$32,$R0,$R0 # 0105020603070408
+ vpsrlq \$32,$R1,$R1
+ vpsrlq \$32,$R2,$R2
+ vpsrlq \$32,$S3,$S3
+ vpsrlq \$32,$S4,$S4
+ vpsrlq \$32,$R3,$R3
+ vpsrlq \$32,$R4,$R4
+ vpsrlq \$32,$S1,$S1
+ vpsrlq \$32,$S2,$S2
+
+ ################################################################
+ # load either next or last 64 byte of input
+ lea ($inp,$len),$inp
+
+ #vpaddq $H2,$T2,$H2 # accumulate input
+ vpaddq $H0,$T0,$H0
+
+ vpmuludq $H2,$R1,$D3 # d3 = h2*r1
+ vpmuludq $H2,$R2,$D4 # d4 = h2*r2
+ vpmuludq $H2,$S3,$D0 # d0 = h2*s3
+ vpandq $MASK,$T1,$T1 # 1
+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
+ vpandq $MASK,$T3,$T3 # 3
+ vpmuludq $H2,$R0,$D2 # d2 = h2*r0
+ vporq $PADBIT,$T4,$T4 # padbit, yes, always
+ vpaddq $H1,$T1,$H1 # accumulate input
+ vpaddq $H3,$T3,$H3
+ vpaddq $H4,$T4,$H4
+
+ vmovdqu 16*0($inp),%x#$T0
+ vpmuludq $H0,$R3,$M3
+ vpmuludq $H0,$R4,$M4
+ vpmuludq $H0,$R0,$M0
+ vpmuludq $H0,$R1,$M1
+ vpaddq $M3,$D3,$D3 # d3 += h0*r3
+ vpaddq $M4,$D4,$D4 # d4 += h0*r4
+ vpaddq $M0,$D0,$D0 # d0 += h0*r0
+ vpaddq $M1,$D1,$D1 # d1 += h0*r1
+
+ vmovdqu 16*1($inp),%x#$T1
+ vpmuludq $H1,$R2,$M3
+ vpmuludq $H1,$R3,$M4
+ vpmuludq $H1,$S4,$M0
+ vpmuludq $H0,$R2,$M2
+ vpaddq $M3,$D3,$D3 # d3 += h1*r2
+ vpaddq $M4,$D4,$D4 # d4 += h1*r3
+ vpaddq $M0,$D0,$D0 # d0 += h1*s4
+ vpaddq $M2,$D2,$D2 # d2 += h0*r2
+
+ vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0
+ vpmuludq $H3,$R0,$M3
+ vpmuludq $H3,$R1,$M4
+ vpmuludq $H1,$R0,$M1
+ vpmuludq $H1,$R1,$M2
+ vpaddq $M3,$D3,$D3 # d3 += h3*r0
+ vpaddq $M4,$D4,$D4 # d4 += h3*r1
+ vpaddq $M1,$D1,$D1 # d1 += h1*r0
+ vpaddq $M2,$D2,$D2 # d2 += h1*r1
+
+ vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1
+ vpmuludq $H4,$S4,$M3
+ vpmuludq $H4,$R0,$M4
+ vpmuludq $H3,$S2,$M0
+ vpmuludq $H3,$S3,$M1
+ vpmuludq $H3,$S4,$M2
+ vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4
+ vpaddq $M4,$D4,$D4 # d4 += h4*r0
+ vpaddq $M0,$D0,$D0 # d0 += h3*s2
+ vpaddq $M1,$D1,$D1 # d1 += h3*s3
+ vpaddq $M2,$D2,$D2 # d2 += h3*s4
+
+ vpmuludq $H4,$S1,$M0
+ vpmuludq $H4,$S2,$M1
+ vpmuludq $H4,$S3,$M2
+ vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1
+ vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2
+ vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3
+
+ ################################################################
+ # horizontal addition
+
+ mov \$1,%eax
+ vpermq \$0xb1,$H3,$D3
+ vpermq \$0xb1,$D4,$H4
+ vpermq \$0xb1,$H0,$D0
+ vpermq \$0xb1,$H1,$D1
+ vpermq \$0xb1,$H2,$D2
+ vpaddq $D3,$H3,$H3
+ vpaddq $D4,$H4,$H4
+ vpaddq $D0,$H0,$H0
+ vpaddq $D1,$H1,$H1
+ vpaddq $D2,$H2,$H2
+
+ kmovw %eax,%k3
+ vpermq \$0x2,$H3,$D3
+ vpermq \$0x2,$H4,$D4
+ vpermq \$0x2,$H0,$D0
+ vpermq \$0x2,$H1,$D1
+ vpermq \$0x2,$H2,$D2
+ vpaddq $D3,$H3,$H3
+ vpaddq $D4,$H4,$H4
+ vpaddq $D0,$H0,$H0
+ vpaddq $D1,$H1,$H1
+ vpaddq $D2,$H2,$H2
+
+ vextracti64x4 \$0x1,$H3,%y#$D3
+ vextracti64x4 \$0x1,$H4,%y#$D4
+ vextracti64x4 \$0x1,$H0,%y#$D0
+ vextracti64x4 \$0x1,$H1,%y#$D1
+ vextracti64x4 \$0x1,$H2,%y#$D2
+ vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case
+ vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2
+ vpaddq $D0,$H0,${H0}{%k3}{z}
+ vpaddq $D1,$H1,${H1}{%k3}{z}
+ vpaddq $D2,$H2,${H2}{%k3}{z}
+___
+map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
+map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK));
+$code.=<<___;
+ ################################################################
+ # lazy reduction (interleaved with input splat)
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpsrldq \$6,$T0,$T2 # splat input
+ vpsrldq \$6,$T1,$T3
+ vpunpckhqdq $T1,$T0,$T4 # 4
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpunpcklqdq $T3,$T2,$T2 # 2:3
+ vpunpcklqdq $T1,$T0,$T0 # 0:1
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H4,$D4
+ vpand $MASK,$H4,$H4
+
+ vpsrlq \$26,$H1,$D1
+ vpand $MASK,$H1,$H1
+ vpsrlq \$30,$T2,$T3
+ vpsrlq \$4,$T2,$T2
+ vpaddq $D1,$H2,$H2 # h1 -> h2
+
+ vpaddq $D4,$H0,$H0
+ vpsllq \$2,$D4,$D4
+ vpsrlq \$26,$T0,$T1
+ vpsrlq \$40,$T4,$T4 # 4
+ vpaddq $D4,$H0,$H0 # h4 -> h0
+
+ vpsrlq \$26,$H2,$D2
+ vpand $MASK,$H2,$H2
+ vpand $MASK,$T2,$T2 # 2
+ vpand $MASK,$T0,$T0 # 0
+ vpaddq $D2,$H3,$H3 # h2 -> h3
+
+ vpsrlq \$26,$H0,$D0
+ vpand $MASK,$H0,$H0
+ vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2
+ vpand $MASK,$T1,$T1 # 1
+ vpaddq $D0,$H1,$H1 # h0 -> h1
+
+ vpsrlq \$26,$H3,$D3
+ vpand $MASK,$H3,$H3
+ vpand $MASK,$T3,$T3 # 3
+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
+ vpaddq $D3,$H4,$H4 # h3 -> h4
+
+ lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2
+ add \$64,$len
+ jnz .Ltail_avx2
+
+ vpsubq $T2,$H2,$H2 # undo input accumulation
+ vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
+ vmovd %x#$H1,`4*1-48-64`($ctx)
+ vmovd %x#$H2,`4*2-48-64`($ctx)
+ vmovd %x#$H3,`4*3-48-64`($ctx)
+ vmovd %x#$H4,`4*4-48-64`($ctx)
+ vzeroall
+___
+$code.=<<___ if ($win64);
+ movdqa 0x50(%r11),%xmm6
+ movdqa 0x60(%r11),%xmm7
+ movdqa 0x70(%r11),%xmm8
+ movdqa 0x80(%r11),%xmm9
+ movdqa 0x90(%r11),%xmm10
+ movdqa 0xa0(%r11),%xmm11
+ movdqa 0xb0(%r11),%xmm12
+ movdqa 0xc0(%r11),%xmm13
+ movdqa 0xd0(%r11),%xmm14
+ movdqa 0xe0(%r11),%xmm15
+ lea 0xf8(%r11),%rsp
+.Ldo_avx512_epilogue:
+___
+$code.=<<___ if (!$win64);
+ lea 8(%r11),%rsp
+.cfi_def_cfa %rsp,8
+___
+$code.=<<___;
+ ret
+.cfi_endproc
+.size poly1305_blocks_avx512,.-poly1305_blocks_avx512
+___
+if ($avx>3) {
+########################################################################
+# VPMADD52 version using 2^44 radix.
+#
+# One can argue that base 2^52 would be more natural. Well, even though
+# some operations would be more natural, one has to recognize couple of
+# things. Base 2^52 doesn't provide advantage over base 2^44 if you look
+# at amount of multiply-n-accumulate operations. Secondly, it makes it
+# impossible to pre-compute multiples of 5 [referred to as s[]/sN in
+# reference implementations], which means that more such operations
+# would have to be performed in inner loop, which in turn makes critical
+# path longer. In other words, even though base 2^44 reduction might
+# look less elegant, overall critical path is actually shorter...
+
+########################################################################
+# Layout of opaque area is following.
+#
+# unsigned __int64 h[3]; # current hash value base 2^44
+# unsigned __int64 s[2]; # key value*20 base 2^44
+# unsigned __int64 r[3]; # key value base 2^44
+# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4];
+# # r^n positions reflect
+# # placement in register, not
+# # memory, R[3] is R[1]*20
+
+$code.=<<___;
+.type poly1305_init_base2_44,\@function,3
+.align 32
+poly1305_init_base2_44:
+ xor %rax,%rax
+ mov %rax,0($ctx) # initialize hash value
+ mov %rax,8($ctx)
+ mov %rax,16($ctx)
+
+.Linit_base2_44:
+ lea poly1305_blocks_vpmadd52(%rip),%r10
+ lea poly1305_emit_base2_44(%rip),%r11
+
+ mov \$0x0ffffffc0fffffff,%rax
+ mov \$0x0ffffffc0ffffffc,%rcx
+ and 0($inp),%rax
+ mov \$0x00000fffffffffff,%r8
+ and 8($inp),%rcx
+ mov \$0x00000fffffffffff,%r9
+ and %rax,%r8
+ shrd \$44,%rcx,%rax
+ mov %r8,40($ctx) # r0
+ and %r9,%rax
+ shr \$24,%rcx
+ mov %rax,48($ctx) # r1
+ lea (%rax,%rax,4),%rax # *5
+ mov %rcx,56($ctx) # r2
+ shl \$2,%rax # magic <<2
+ lea (%rcx,%rcx,4),%rcx # *5
+ shl \$2,%rcx # magic <<2
+ mov %rax,24($ctx) # s1
+ mov %rcx,32($ctx) # s2
+ movq \$-1,64($ctx) # write impossible value
+___
+$code.=<<___ if ($flavour !~ /elf32/);
+ mov %r10,0(%rdx)
+ mov %r11,8(%rdx)
+___
+$code.=<<___ if ($flavour =~ /elf32/);
+ mov %r10d,0(%rdx)
+ mov %r11d,4(%rdx)
+___
+$code.=<<___;
+ mov \$1,%eax
+ ret
+.size poly1305_init_base2_44,.-poly1305_init_base2_44
+___
+{
+my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17));
+my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
+my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25));
+
+$code.=<<___;
+.type poly1305_blocks_vpmadd52,\@function,4
+.align 32
+poly1305_blocks_vpmadd52:
+ shr \$4,$len
+ jz .Lno_data_vpmadd52 # too short
+
+ shl \$40,$padbit
+ mov 64($ctx),%r8 # peek on power of the key
+
+ # if powers of the key are not calculated yet, process up to 3
+ # blocks with this single-block subroutine, otherwise ensure that
+ # length is divisible by 2 blocks and pass the rest down to next
+ # subroutine...
+
+ mov \$3,%rax
+ mov \$1,%r10
+ cmp \$4,$len # is input long
+ cmovae %r10,%rax
+ test %r8,%r8 # is power value impossible?
+ cmovns %r10,%rax
+
+ and $len,%rax # is input of favourable length?
+ jz .Lblocks_vpmadd52_4x
+
+ sub %rax,$len
+ mov \$7,%r10d
+ mov \$1,%r11d
+ kmovw %r10d,%k7
+ lea .L2_44_inp_permd(%rip),%r10
+ kmovw %r11d,%k1
+
+ vmovq $padbit,%x#$PAD
+ vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd
+ vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift
+ vpermq \$0xcf,$PAD,$PAD
+ vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask
+
+ vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value
+ vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys
+ vmovdqu64 32($ctx),${r1r0s2}{%k7}{z}
+ vmovdqu64 24($ctx),${r0s2s1}{%k7}{z}
+
+ vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt
+ vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft
+
+ jmp .Loop_vpmadd52
+
+.align 32
+.Loop_vpmadd52:
+ vmovdqu32 0($inp),%x#$T0 # load input as ----3210
+ lea 16($inp),$inp
+
+ vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
+ vpsrlvq $inp_shift,$T0,$T0
+ vpandq $reduc_mask,$T0,$T0
+ vporq $PAD,$T0,$T0
+
+ vpaddq $T0,$Dlo,$Dlo # accumulate input
+
+ vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value
+ vpermq \$0b01010101,$Dlo,${H1}{%k7}{z}
+ vpermq \$0b10101010,$Dlo,${H2}{%k7}{z}
+
+ vpxord $Dlo,$Dlo,$Dlo
+ vpxord $Dhi,$Dhi,$Dhi
+
+ vpmadd52luq $r2r1r0,$H0,$Dlo
+ vpmadd52huq $r2r1r0,$H0,$Dhi
+
+ vpmadd52luq $r1r0s2,$H1,$Dlo
+ vpmadd52huq $r1r0s2,$H1,$Dhi
+
+ vpmadd52luq $r0s2s1,$H2,$Dlo
+ vpmadd52huq $r0s2s1,$H2,$Dhi
+
+ vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword
+ vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword
+ vpandq $reduc_mask,$Dlo,$Dlo
+
+ vpaddq $T0,$Dhi,$Dhi
+
+ vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword
+
+ vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-)
+
+ vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word
+ vpandq $reduc_mask,$Dlo,$Dlo
+
+ vpermq \$0b10010011,$T0,$T0
+
+ vpaddq $T0,$Dlo,$Dlo
+
+ vpermq \$0b10010011,$Dlo,${T0}{%k1}{z}
+
+ vpaddq $T0,$Dlo,$Dlo
+ vpsllq \$2,$T0,$T0
+
+ vpaddq $T0,$Dlo,$Dlo
+
+ dec %rax # len-=16
+ jnz .Loop_vpmadd52
+
+ vmovdqu64 $Dlo,0($ctx){%k7} # store hash value
+
+ test $len,$len
+ jnz .Lblocks_vpmadd52_4x
+
+.Lno_data_vpmadd52:
+ ret
+.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
+___
+}
+{
+########################################################################
+# As implied by its name 4x subroutine processes 4 blocks in parallel
+# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power
+# and is handled in 256-bit %ymm registers.
+
+my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
+my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
+my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
+
+$code.=<<___;
+.type poly1305_blocks_vpmadd52_4x,\@function,4
+.align 32
+poly1305_blocks_vpmadd52_4x:
+ shr \$4,$len
+ jz .Lno_data_vpmadd52_4x # too short
+
+ shl \$40,$padbit
+ mov 64($ctx),%r8 # peek on power of the key
+
+.Lblocks_vpmadd52_4x:
+ vpbroadcastq $padbit,$PAD
+
+ vmovdqa64 .Lx_mask44(%rip),$mask44
+ mov \$5,%eax
+ vmovdqa64 .Lx_mask42(%rip),$mask42
+ kmovw %eax,%k1 # used in 2x path
+
+ test %r8,%r8 # is power value impossible?
+ js .Linit_vpmadd52 # if it is, then init R[4]
+
+ vmovq 0($ctx),%x#$H0 # load current hash value
+ vmovq 8($ctx),%x#$H1
+ vmovq 16($ctx),%x#$H2
+
+ test \$3,$len # is length 4*n+2?
+ jnz .Lblocks_vpmadd52_2x_do
+
+.Lblocks_vpmadd52_4x_do:
+ vpbroadcastq 64($ctx),$R0 # load 4th power of the key
+ vpbroadcastq 96($ctx),$R1
+ vpbroadcastq 128($ctx),$R2
+ vpbroadcastq 160($ctx),$S1
+
+.Lblocks_vpmadd52_4x_key_loaded:
+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
+ vpaddq $R2,$S2,$S2
+ vpsllq \$2,$S2,$S2
+
+ test \$7,$len # is len 8*n?
+ jz .Lblocks_vpmadd52_8x
+
+ vmovdqu64 16*0($inp),$T2 # load data
+ vmovdqu64 16*2($inp),$T3
+ lea 16*4($inp),$inp
+
+ vpunpcklqdq $T3,$T2,$T1 # transpose data
+ vpunpckhqdq $T3,$T2,$T3
+
+ # at this point 64-bit lanes are ordered as 3-1-2-0
+
+ vpsrlq \$24,$T3,$T2 # splat the data
+ vporq $PAD,$T2,$T2
+ vpaddq $T2,$H2,$H2 # accumulate input
+ vpandq $mask44,$T1,$T0
+ vpsrlq \$44,$T1,$T1
+ vpsllq \$20,$T3,$T3
+ vporq $T3,$T1,$T1
+ vpandq $mask44,$T1,$T1
+
+ sub \$4,$len
+ jz .Ltail_vpmadd52_4x
+ jmp .Loop_vpmadd52_4x
+ ud2
+
+.align 32
+.Linit_vpmadd52:
+ vmovq 24($ctx),%x#$S1 # load key
+ vmovq 56($ctx),%x#$H2
+ vmovq 32($ctx),%x#$S2
+ vmovq 40($ctx),%x#$R0
+ vmovq 48($ctx),%x#$R1
+
+ vmovdqa $R0,$H0
+ vmovdqa $R1,$H1
+ vmovdqa $H2,$R2
+
+ mov \$2,%eax
+
+.Lmul_init_vpmadd52:
+ vpxorq $D0lo,$D0lo,$D0lo
+ vpmadd52luq $H2,$S1,$D0lo
+ vpxorq $D0hi,$D0hi,$D0hi
+ vpmadd52huq $H2,$S1,$D0hi
+ vpxorq $D1lo,$D1lo,$D1lo
+ vpmadd52luq $H2,$S2,$D1lo
+ vpxorq $D1hi,$D1hi,$D1hi
+ vpmadd52huq $H2,$S2,$D1hi
+ vpxorq $D2lo,$D2lo,$D2lo
+ vpmadd52luq $H2,$R0,$D2lo
+ vpxorq $D2hi,$D2hi,$D2hi
+ vpmadd52huq $H2,$R0,$D2hi
+
+ vpmadd52luq $H0,$R0,$D0lo
+ vpmadd52huq $H0,$R0,$D0hi
+ vpmadd52luq $H0,$R1,$D1lo
+ vpmadd52huq $H0,$R1,$D1hi
+ vpmadd52luq $H0,$R2,$D2lo
+ vpmadd52huq $H0,$R2,$D2hi
+
+ vpmadd52luq $H1,$S2,$D0lo
+ vpmadd52huq $H1,$S2,$D0hi
+ vpmadd52luq $H1,$R0,$D1lo
+ vpmadd52huq $H1,$R0,$D1hi
+ vpmadd52luq $H1,$R1,$D2lo
+ vpmadd52huq $H1,$R1,$D2hi
+
+ ################################################################
+ # partial reduction
+ vpsrlq \$44,$D0lo,$tmp
+ vpsllq \$8,$D0hi,$D0hi
+ vpandq $mask44,$D0lo,$H0
+ vpaddq $tmp,$D0hi,$D0hi
+
+ vpaddq $D0hi,$D1lo,$D1lo
+
+ vpsrlq \$44,$D1lo,$tmp
+ vpsllq \$8,$D1hi,$D1hi
+ vpandq $mask44,$D1lo,$H1
+ vpaddq $tmp,$D1hi,$D1hi
+
+ vpaddq $D1hi,$D2lo,$D2lo
+
+ vpsrlq \$42,$D2lo,$tmp
+ vpsllq \$10,$D2hi,$D2hi
+ vpandq $mask42,$D2lo,$H2
+ vpaddq $tmp,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+ vpsllq \$2,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+
+ vpsrlq \$44,$H0,$tmp # additional step
+ vpandq $mask44,$H0,$H0
+
+ vpaddq $tmp,$H1,$H1
+
+ dec %eax
+ jz .Ldone_init_vpmadd52
+
+ vpunpcklqdq $R1,$H1,$R1 # 1,2
+ vpbroadcastq %x#$H1,%x#$H1 # 2,2
+ vpunpcklqdq $R2,$H2,$R2
+ vpbroadcastq %x#$H2,%x#$H2
+ vpunpcklqdq $R0,$H0,$R0
+ vpbroadcastq %x#$H0,%x#$H0
+
+ vpsllq \$2,$R1,$S1 # S1 = R1*5*4
+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
+ vpaddq $R1,$S1,$S1
+ vpaddq $R2,$S2,$S2
+ vpsllq \$2,$S1,$S1
+ vpsllq \$2,$S2,$S2
+
+ jmp .Lmul_init_vpmadd52
+ ud2
+
+.align 32
+.Ldone_init_vpmadd52:
+ vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4
+ vinserti128 \$1,%x#$R2,$H2,$R2
+ vinserti128 \$1,%x#$R0,$H0,$R0
+
+ vpermq \$0b11011000,$R1,$R1 # 1,3,2,4
+ vpermq \$0b11011000,$R2,$R2
+ vpermq \$0b11011000,$R0,$R0
+
+ vpsllq \$2,$R1,$S1 # S1 = R1*5*4
+ vpaddq $R1,$S1,$S1
+ vpsllq \$2,$S1,$S1
+
+ vmovq 0($ctx),%x#$H0 # load current hash value
+ vmovq 8($ctx),%x#$H1
+ vmovq 16($ctx),%x#$H2
+
+ test \$3,$len # is length 4*n+2?
+ jnz .Ldone_init_vpmadd52_2x
+
+ vmovdqu64 $R0,64($ctx) # save key powers
+ vpbroadcastq %x#$R0,$R0 # broadcast 4th power
+ vmovdqu64 $R1,96($ctx)
+ vpbroadcastq %x#$R1,$R1
+ vmovdqu64 $R2,128($ctx)
+ vpbroadcastq %x#$R2,$R2
+ vmovdqu64 $S1,160($ctx)
+ vpbroadcastq %x#$S1,$S1
+
+ jmp .Lblocks_vpmadd52_4x_key_loaded
+ ud2
+
+.align 32
+.Ldone_init_vpmadd52_2x:
+ vmovdqu64 $R0,64($ctx) # save key powers
+ vpsrldq \$8,$R0,$R0 # 0-1-0-2
+ vmovdqu64 $R1,96($ctx)
+ vpsrldq \$8,$R1,$R1
+ vmovdqu64 $R2,128($ctx)
+ vpsrldq \$8,$R2,$R2
+ vmovdqu64 $S1,160($ctx)
+ vpsrldq \$8,$S1,$S1
+ jmp .Lblocks_vpmadd52_2x_key_loaded
+ ud2
+
+.align 32
+.Lblocks_vpmadd52_2x_do:
+ vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers
+ vmovdqu64 160+8($ctx),${S1}{%k1}{z}
+ vmovdqu64 64+8($ctx),${R0}{%k1}{z}
+ vmovdqu64 96+8($ctx),${R1}{%k1}{z}
+
+.Lblocks_vpmadd52_2x_key_loaded:
+ vmovdqu64 16*0($inp),$T2 # load data
+ vpxorq $T3,$T3,$T3
+ lea 16*2($inp),$inp
+
+ vpunpcklqdq $T3,$T2,$T1 # transpose data
+ vpunpckhqdq $T3,$T2,$T3
+
+ # at this point 64-bit lanes are ordered as x-1-x-0
+
+ vpsrlq \$24,$T3,$T2 # splat the data
+ vporq $PAD,$T2,$T2
+ vpaddq $T2,$H2,$H2 # accumulate input
+ vpandq $mask44,$T1,$T0
+ vpsrlq \$44,$T1,$T1
+ vpsllq \$20,$T3,$T3
+ vporq $T3,$T1,$T1
+ vpandq $mask44,$T1,$T1
+
+ jmp .Ltail_vpmadd52_2x
+ ud2
+
+.align 32
+.Loop_vpmadd52_4x:
+ #vpaddq $T2,$H2,$H2 # accumulate input
+ vpaddq $T0,$H0,$H0
+ vpaddq $T1,$H1,$H1
+
+ vpxorq $D0lo,$D0lo,$D0lo
+ vpmadd52luq $H2,$S1,$D0lo
+ vpxorq $D0hi,$D0hi,$D0hi
+ vpmadd52huq $H2,$S1,$D0hi
+ vpxorq $D1lo,$D1lo,$D1lo
+ vpmadd52luq $H2,$S2,$D1lo
+ vpxorq $D1hi,$D1hi,$D1hi
+ vpmadd52huq $H2,$S2,$D1hi
+ vpxorq $D2lo,$D2lo,$D2lo
+ vpmadd52luq $H2,$R0,$D2lo
+ vpxorq $D2hi,$D2hi,$D2hi
+ vpmadd52huq $H2,$R0,$D2hi
+
+ vmovdqu64 16*0($inp),$T2 # load data
+ vmovdqu64 16*2($inp),$T3
+ lea 16*4($inp),$inp
+ vpmadd52luq $H0,$R0,$D0lo
+ vpmadd52huq $H0,$R0,$D0hi
+ vpmadd52luq $H0,$R1,$D1lo
+ vpmadd52huq $H0,$R1,$D1hi
+ vpmadd52luq $H0,$R2,$D2lo
+ vpmadd52huq $H0,$R2,$D2hi
+
+ vpunpcklqdq $T3,$T2,$T1 # transpose data
+ vpunpckhqdq $T3,$T2,$T3
+ vpmadd52luq $H1,$S2,$D0lo
+ vpmadd52huq $H1,$S2,$D0hi
+ vpmadd52luq $H1,$R0,$D1lo
+ vpmadd52huq $H1,$R0,$D1hi
+ vpmadd52luq $H1,$R1,$D2lo
+ vpmadd52huq $H1,$R1,$D2hi
+
+ ################################################################
+ # partial reduction (interleaved with data splat)
+ vpsrlq \$44,$D0lo,$tmp
+ vpsllq \$8,$D0hi,$D0hi
+ vpandq $mask44,$D0lo,$H0
+ vpaddq $tmp,$D0hi,$D0hi
+
+ vpsrlq \$24,$T3,$T2
+ vporq $PAD,$T2,$T2
+ vpaddq $D0hi,$D1lo,$D1lo
+
+ vpsrlq \$44,$D1lo,$tmp
+ vpsllq \$8,$D1hi,$D1hi
+ vpandq $mask44,$D1lo,$H1
+ vpaddq $tmp,$D1hi,$D1hi
+
+ vpandq $mask44,$T1,$T0
+ vpsrlq \$44,$T1,$T1
+ vpsllq \$20,$T3,$T3
+ vpaddq $D1hi,$D2lo,$D2lo
+
+ vpsrlq \$42,$D2lo,$tmp
+ vpsllq \$10,$D2hi,$D2hi
+ vpandq $mask42,$D2lo,$H2
+ vpaddq $tmp,$D2hi,$D2hi
+
+ vpaddq $T2,$H2,$H2 # accumulate input
+ vpaddq $D2hi,$H0,$H0
+ vpsllq \$2,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+ vporq $T3,$T1,$T1
+ vpandq $mask44,$T1,$T1
+
+ vpsrlq \$44,$H0,$tmp # additional step
+ vpandq $mask44,$H0,$H0
+
+ vpaddq $tmp,$H1,$H1
+
+ sub \$4,$len # len-=64
+ jnz .Loop_vpmadd52_4x
+
+.Ltail_vpmadd52_4x:
+ vmovdqu64 128($ctx),$R2 # load all key powers
+ vmovdqu64 160($ctx),$S1
+ vmovdqu64 64($ctx),$R0
+ vmovdqu64 96($ctx),$R1
+
+.Ltail_vpmadd52_2x:
+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
+ vpaddq $R2,$S2,$S2
+ vpsllq \$2,$S2,$S2
+
+ #vpaddq $T2,$H2,$H2 # accumulate input
+ vpaddq $T0,$H0,$H0
+ vpaddq $T1,$H1,$H1
+
+ vpxorq $D0lo,$D0lo,$D0lo
+ vpmadd52luq $H2,$S1,$D0lo
+ vpxorq $D0hi,$D0hi,$D0hi
+ vpmadd52huq $H2,$S1,$D0hi
+ vpxorq $D1lo,$D1lo,$D1lo
+ vpmadd52luq $H2,$S2,$D1lo
+ vpxorq $D1hi,$D1hi,$D1hi
+ vpmadd52huq $H2,$S2,$D1hi
+ vpxorq $D2lo,$D2lo,$D2lo
+ vpmadd52luq $H2,$R0,$D2lo
+ vpxorq $D2hi,$D2hi,$D2hi
+ vpmadd52huq $H2,$R0,$D2hi
+
+ vpmadd52luq $H0,$R0,$D0lo
+ vpmadd52huq $H0,$R0,$D0hi
+ vpmadd52luq $H0,$R1,$D1lo
+ vpmadd52huq $H0,$R1,$D1hi
+ vpmadd52luq $H0,$R2,$D2lo
+ vpmadd52huq $H0,$R2,$D2hi
+
+ vpmadd52luq $H1,$S2,$D0lo
+ vpmadd52huq $H1,$S2,$D0hi
+ vpmadd52luq $H1,$R0,$D1lo
+ vpmadd52huq $H1,$R0,$D1hi
+ vpmadd52luq $H1,$R1,$D2lo
+ vpmadd52huq $H1,$R1,$D2hi
+
+ ################################################################
+ # horizontal addition
+
+ mov \$1,%eax
+ kmovw %eax,%k1
+ vpsrldq \$8,$D0lo,$T0
+ vpsrldq \$8,$D0hi,$H0
+ vpsrldq \$8,$D1lo,$T1
+ vpsrldq \$8,$D1hi,$H1
+ vpaddq $T0,$D0lo,$D0lo
+ vpaddq $H0,$D0hi,$D0hi
+ vpsrldq \$8,$D2lo,$T2
+ vpsrldq \$8,$D2hi,$H2
+ vpaddq $T1,$D1lo,$D1lo
+ vpaddq $H1,$D1hi,$D1hi
+ vpermq \$0x2,$D0lo,$T0
+ vpermq \$0x2,$D0hi,$H0
+ vpaddq $T2,$D2lo,$D2lo
+ vpaddq $H2,$D2hi,$D2hi
+
+ vpermq \$0x2,$D1lo,$T1
+ vpermq \$0x2,$D1hi,$H1
+ vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
+ vpaddq $H0,$D0hi,${D0hi}{%k1}{z}
+ vpermq \$0x2,$D2lo,$T2
+ vpermq \$0x2,$D2hi,$H2
+ vpaddq $T1,$D1lo,${D1lo}{%k1}{z}
+ vpaddq $H1,$D1hi,${D1hi}{%k1}{z}
+ vpaddq $T2,$D2lo,${D2lo}{%k1}{z}
+ vpaddq $H2,$D2hi,${D2hi}{%k1}{z}
+
+ ################################################################
+ # partial reduction
+ vpsrlq \$44,$D0lo,$tmp
+ vpsllq \$8,$D0hi,$D0hi
+ vpandq $mask44,$D0lo,$H0
+ vpaddq $tmp,$D0hi,$D0hi
+
+ vpaddq $D0hi,$D1lo,$D1lo
+
+ vpsrlq \$44,$D1lo,$tmp
+ vpsllq \$8,$D1hi,$D1hi
+ vpandq $mask44,$D1lo,$H1
+ vpaddq $tmp,$D1hi,$D1hi
+
+ vpaddq $D1hi,$D2lo,$D2lo
+
+ vpsrlq \$42,$D2lo,$tmp
+ vpsllq \$10,$D2hi,$D2hi
+ vpandq $mask42,$D2lo,$H2
+ vpaddq $tmp,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+ vpsllq \$2,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+
+ vpsrlq \$44,$H0,$tmp # additional step
+ vpandq $mask44,$H0,$H0
+
+ vpaddq $tmp,$H1,$H1
+ # at this point $len is
+ # either 4*n+2 or 0...
+ sub \$2,$len # len-=32
+ ja .Lblocks_vpmadd52_4x_do
+
+ vmovq %x#$H0,0($ctx)
+ vmovq %x#$H1,8($ctx)
+ vmovq %x#$H2,16($ctx)
+ vzeroall
+
+.Lno_data_vpmadd52_4x:
+ ret
+.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
+___
+}
+{
+########################################################################
+# As implied by its name 8x subroutine processes 8 blocks in parallel...
+# This is intermediate version, as it's used only in cases when input
+# length is either 8*n, 8*n+1 or 8*n+2...
+
+my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
+my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
+my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
+my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10));
+
+$code.=<<___;
+.type poly1305_blocks_vpmadd52_8x,\@function,4
+.align 32
+poly1305_blocks_vpmadd52_8x:
+ shr \$4,$len
+ jz .Lno_data_vpmadd52_8x # too short
+
+ shl \$40,$padbit
+ mov 64($ctx),%r8 # peek on power of the key
+
+ vmovdqa64 .Lx_mask44(%rip),$mask44
+ vmovdqa64 .Lx_mask42(%rip),$mask42
+
+ test %r8,%r8 # is power value impossible?
+ js .Linit_vpmadd52 # if it is, then init R[4]
+
+ vmovq 0($ctx),%x#$H0 # load current hash value
+ vmovq 8($ctx),%x#$H1
+ vmovq 16($ctx),%x#$H2
+
+.Lblocks_vpmadd52_8x:
+ ################################################################
+ # fist we calculate more key powers
+
+ vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers
+ vmovdqu64 160($ctx),$S1
+ vmovdqu64 64($ctx),$R0
+ vmovdqu64 96($ctx),$R1
+
+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
+ vpaddq $R2,$S2,$S2
+ vpsllq \$2,$S2,$S2
+
+ vpbroadcastq %x#$R2,$RR2 # broadcast 4th power
+ vpbroadcastq %x#$R0,$RR0
+ vpbroadcastq %x#$R1,$RR1
+
+ vpxorq $D0lo,$D0lo,$D0lo
+ vpmadd52luq $RR2,$S1,$D0lo
+ vpxorq $D0hi,$D0hi,$D0hi
+ vpmadd52huq $RR2,$S1,$D0hi
+ vpxorq $D1lo,$D1lo,$D1lo
+ vpmadd52luq $RR2,$S2,$D1lo
+ vpxorq $D1hi,$D1hi,$D1hi
+ vpmadd52huq $RR2,$S2,$D1hi
+ vpxorq $D2lo,$D2lo,$D2lo
+ vpmadd52luq $RR2,$R0,$D2lo
+ vpxorq $D2hi,$D2hi,$D2hi
+ vpmadd52huq $RR2,$R0,$D2hi
+
+ vpmadd52luq $RR0,$R0,$D0lo
+ vpmadd52huq $RR0,$R0,$D0hi
+ vpmadd52luq $RR0,$R1,$D1lo
+ vpmadd52huq $RR0,$R1,$D1hi
+ vpmadd52luq $RR0,$R2,$D2lo
+ vpmadd52huq $RR0,$R2,$D2hi
+
+ vpmadd52luq $RR1,$S2,$D0lo
+ vpmadd52huq $RR1,$S2,$D0hi
+ vpmadd52luq $RR1,$R0,$D1lo
+ vpmadd52huq $RR1,$R0,$D1hi
+ vpmadd52luq $RR1,$R1,$D2lo
+ vpmadd52huq $RR1,$R1,$D2hi
+
+ ################################################################
+ # partial reduction
+ vpsrlq \$44,$D0lo,$tmp
+ vpsllq \$8,$D0hi,$D0hi
+ vpandq $mask44,$D0lo,$RR0
+ vpaddq $tmp,$D0hi,$D0hi
+
+ vpaddq $D0hi,$D1lo,$D1lo
+
+ vpsrlq \$44,$D1lo,$tmp
+ vpsllq \$8,$D1hi,$D1hi
+ vpandq $mask44,$D1lo,$RR1
+ vpaddq $tmp,$D1hi,$D1hi
+
+ vpaddq $D1hi,$D2lo,$D2lo
+
+ vpsrlq \$42,$D2lo,$tmp
+ vpsllq \$10,$D2hi,$D2hi
+ vpandq $mask42,$D2lo,$RR2
+ vpaddq $tmp,$D2hi,$D2hi
+
+ vpaddq $D2hi,$RR0,$RR0
+ vpsllq \$2,$D2hi,$D2hi
+
+ vpaddq $D2hi,$RR0,$RR0
+
+ vpsrlq \$44,$RR0,$tmp # additional step
+ vpandq $mask44,$RR0,$RR0
+
+ vpaddq $tmp,$RR1,$RR1
+
+ ################################################################
+ # At this point Rx holds 1324 powers, RRx - 5768, and the goal
+ # is 15263748, which reflects how data is loaded...
+
+ vpunpcklqdq $R2,$RR2,$T2 # 3748
+ vpunpckhqdq $R2,$RR2,$R2 # 1526
+ vpunpcklqdq $R0,$RR0,$T0
+ vpunpckhqdq $R0,$RR0,$R0
+ vpunpcklqdq $R1,$RR1,$T1
+ vpunpckhqdq $R1,$RR1,$R1
+___
+######## switch to %zmm
+map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
+map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
+map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
+map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2);
+
+$code.=<<___;
+ vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748
+ vshufi64x2 \$0x44,$R0,$T0,$RR0
+ vshufi64x2 \$0x44,$R1,$T1,$RR1
+
+ vmovdqu64 16*0($inp),$T2 # load data
+ vmovdqu64 16*4($inp),$T3
+ lea 16*8($inp),$inp
+
+ vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4
+ vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4
+ vpaddq $RR2,$SS2,$SS2
+ vpaddq $RR1,$SS1,$SS1
+ vpsllq \$2,$SS2,$SS2
+ vpsllq \$2,$SS1,$SS1
+
+ vpbroadcastq $padbit,$PAD
+ vpbroadcastq %x#$mask44,$mask44
+ vpbroadcastq %x#$mask42,$mask42
+
+ vpbroadcastq %x#$SS1,$S1 # broadcast 8th power
+ vpbroadcastq %x#$SS2,$S2
+ vpbroadcastq %x#$RR0,$R0
+ vpbroadcastq %x#$RR1,$R1
+ vpbroadcastq %x#$RR2,$R2
+
+ vpunpcklqdq $T3,$T2,$T1 # transpose data
+ vpunpckhqdq $T3,$T2,$T3
+
+ # at this point 64-bit lanes are ordered as 73625140
+
+ vpsrlq \$24,$T3,$T2 # splat the data
+ vporq $PAD,$T2,$T2
+ vpaddq $T2,$H2,$H2 # accumulate input
+ vpandq $mask44,$T1,$T0
+ vpsrlq \$44,$T1,$T1
+ vpsllq \$20,$T3,$T3
+ vporq $T3,$T1,$T1
+ vpandq $mask44,$T1,$T1
+
+ sub \$8,$len
+ jz .Ltail_vpmadd52_8x
+ jmp .Loop_vpmadd52_8x
+
+.align 32
+.Loop_vpmadd52_8x:
+ #vpaddq $T2,$H2,$H2 # accumulate input
+ vpaddq $T0,$H0,$H0
+ vpaddq $T1,$H1,$H1
+
+ vpxorq $D0lo,$D0lo,$D0lo
+ vpmadd52luq $H2,$S1,$D0lo
+ vpxorq $D0hi,$D0hi,$D0hi
+ vpmadd52huq $H2,$S1,$D0hi
+ vpxorq $D1lo,$D1lo,$D1lo
+ vpmadd52luq $H2,$S2,$D1lo
+ vpxorq $D1hi,$D1hi,$D1hi
+ vpmadd52huq $H2,$S2,$D1hi
+ vpxorq $D2lo,$D2lo,$D2lo
+ vpmadd52luq $H2,$R0,$D2lo
+ vpxorq $D2hi,$D2hi,$D2hi
+ vpmadd52huq $H2,$R0,$D2hi
+
+ vmovdqu64 16*0($inp),$T2 # load data
+ vmovdqu64 16*4($inp),$T3
+ lea 16*8($inp),$inp
+ vpmadd52luq $H0,$R0,$D0lo
+ vpmadd52huq $H0,$R0,$D0hi
+ vpmadd52luq $H0,$R1,$D1lo
+ vpmadd52huq $H0,$R1,$D1hi
+ vpmadd52luq $H0,$R2,$D2lo
+ vpmadd52huq $H0,$R2,$D2hi
+
+ vpunpcklqdq $T3,$T2,$T1 # transpose data
+ vpunpckhqdq $T3,$T2,$T3
+ vpmadd52luq $H1,$S2,$D0lo
+ vpmadd52huq $H1,$S2,$D0hi
+ vpmadd52luq $H1,$R0,$D1lo
+ vpmadd52huq $H1,$R0,$D1hi
+ vpmadd52luq $H1,$R1,$D2lo
+ vpmadd52huq $H1,$R1,$D2hi
+
+ ################################################################
+ # partial reduction (interleaved with data splat)
+ vpsrlq \$44,$D0lo,$tmp
+ vpsllq \$8,$D0hi,$D0hi
+ vpandq $mask44,$D0lo,$H0
+ vpaddq $tmp,$D0hi,$D0hi
+
+ vpsrlq \$24,$T3,$T2
+ vporq $PAD,$T2,$T2
+ vpaddq $D0hi,$D1lo,$D1lo
+
+ vpsrlq \$44,$D1lo,$tmp
+ vpsllq \$8,$D1hi,$D1hi
+ vpandq $mask44,$D1lo,$H1
+ vpaddq $tmp,$D1hi,$D1hi
+
+ vpandq $mask44,$T1,$T0
+ vpsrlq \$44,$T1,$T1
+ vpsllq \$20,$T3,$T3
+ vpaddq $D1hi,$D2lo,$D2lo
+
+ vpsrlq \$42,$D2lo,$tmp
+ vpsllq \$10,$D2hi,$D2hi
+ vpandq $mask42,$D2lo,$H2
+ vpaddq $tmp,$D2hi,$D2hi
+
+ vpaddq $T2,$H2,$H2 # accumulate input
+ vpaddq $D2hi,$H0,$H0
+ vpsllq \$2,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+ vporq $T3,$T1,$T1
+ vpandq $mask44,$T1,$T1
+
+ vpsrlq \$44,$H0,$tmp # additional step
+ vpandq $mask44,$H0,$H0
+
+ vpaddq $tmp,$H1,$H1
+
+ sub \$8,$len # len-=128
+ jnz .Loop_vpmadd52_8x
+
+.Ltail_vpmadd52_8x:
+ #vpaddq $T2,$H2,$H2 # accumulate input
+ vpaddq $T0,$H0,$H0
+ vpaddq $T1,$H1,$H1
+
+ vpxorq $D0lo,$D0lo,$D0lo
+ vpmadd52luq $H2,$SS1,$D0lo
+ vpxorq $D0hi,$D0hi,$D0hi
+ vpmadd52huq $H2,$SS1,$D0hi
+ vpxorq $D1lo,$D1lo,$D1lo
+ vpmadd52luq $H2,$SS2,$D1lo
+ vpxorq $D1hi,$D1hi,$D1hi
+ vpmadd52huq $H2,$SS2,$D1hi
+ vpxorq $D2lo,$D2lo,$D2lo
+ vpmadd52luq $H2,$RR0,$D2lo
+ vpxorq $D2hi,$D2hi,$D2hi
+ vpmadd52huq $H2,$RR0,$D2hi
+
+ vpmadd52luq $H0,$RR0,$D0lo
+ vpmadd52huq $H0,$RR0,$D0hi
+ vpmadd52luq $H0,$RR1,$D1lo
+ vpmadd52huq $H0,$RR1,$D1hi
+ vpmadd52luq $H0,$RR2,$D2lo
+ vpmadd52huq $H0,$RR2,$D2hi
+
+ vpmadd52luq $H1,$SS2,$D0lo
+ vpmadd52huq $H1,$SS2,$D0hi
+ vpmadd52luq $H1,$RR0,$D1lo
+ vpmadd52huq $H1,$RR0,$D1hi
+ vpmadd52luq $H1,$RR1,$D2lo
+ vpmadd52huq $H1,$RR1,$D2hi
+
+ ################################################################
+ # horizontal addition
+
+ mov \$1,%eax
+ kmovw %eax,%k1
+ vpsrldq \$8,$D0lo,$T0
+ vpsrldq \$8,$D0hi,$H0
+ vpsrldq \$8,$D1lo,$T1
+ vpsrldq \$8,$D1hi,$H1
+ vpaddq $T0,$D0lo,$D0lo
+ vpaddq $H0,$D0hi,$D0hi
+ vpsrldq \$8,$D2lo,$T2
+ vpsrldq \$8,$D2hi,$H2
+ vpaddq $T1,$D1lo,$D1lo
+ vpaddq $H1,$D1hi,$D1hi
+ vpermq \$0x2,$D0lo,$T0
+ vpermq \$0x2,$D0hi,$H0
+ vpaddq $T2,$D2lo,$D2lo
+ vpaddq $H2,$D2hi,$D2hi
+
+ vpermq \$0x2,$D1lo,$T1
+ vpermq \$0x2,$D1hi,$H1
+ vpaddq $T0,$D0lo,$D0lo
+ vpaddq $H0,$D0hi,$D0hi
+ vpermq \$0x2,$D2lo,$T2
+ vpermq \$0x2,$D2hi,$H2
+ vpaddq $T1,$D1lo,$D1lo
+ vpaddq $H1,$D1hi,$D1hi
+ vextracti64x4 \$1,$D0lo,%y#$T0
+ vextracti64x4 \$1,$D0hi,%y#$H0
+ vpaddq $T2,$D2lo,$D2lo
+ vpaddq $H2,$D2hi,$D2hi
+
+ vextracti64x4 \$1,$D1lo,%y#$T1
+ vextracti64x4 \$1,$D1hi,%y#$H1
+ vextracti64x4 \$1,$D2lo,%y#$T2
+ vextracti64x4 \$1,$D2hi,%y#$H2
+___
+######## switch back to %ymm
+map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
+map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
+map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
+
+$code.=<<___;
+ vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
+ vpaddq $H0,$D0hi,${D0hi}{%k1}{z}
+ vpaddq $T1,$D1lo,${D1lo}{%k1}{z}
+ vpaddq $H1,$D1hi,${D1hi}{%k1}{z}
+ vpaddq $T2,$D2lo,${D2lo}{%k1}{z}
+ vpaddq $H2,$D2hi,${D2hi}{%k1}{z}
+
+ ################################################################
+ # partial reduction
+ vpsrlq \$44,$D0lo,$tmp
+ vpsllq \$8,$D0hi,$D0hi
+ vpandq $mask44,$D0lo,$H0
+ vpaddq $tmp,$D0hi,$D0hi
+
+ vpaddq $D0hi,$D1lo,$D1lo
+
+ vpsrlq \$44,$D1lo,$tmp
+ vpsllq \$8,$D1hi,$D1hi
+ vpandq $mask44,$D1lo,$H1
+ vpaddq $tmp,$D1hi,$D1hi
+
+ vpaddq $D1hi,$D2lo,$D2lo
+
+ vpsrlq \$42,$D2lo,$tmp
+ vpsllq \$10,$D2hi,$D2hi
+ vpandq $mask42,$D2lo,$H2
+ vpaddq $tmp,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+ vpsllq \$2,$D2hi,$D2hi
+
+ vpaddq $D2hi,$H0,$H0
+
+ vpsrlq \$44,$H0,$tmp # additional step
+ vpandq $mask44,$H0,$H0
+
+ vpaddq $tmp,$H1,$H1
+
+ ################################################################
+
+ vmovq %x#$H0,0($ctx)
+ vmovq %x#$H1,8($ctx)
+ vmovq %x#$H2,16($ctx)
+ vzeroall
+
+.Lno_data_vpmadd52_8x:
+ ret
+.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
+___
+}
+$code.=<<___;
+.type poly1305_emit_base2_44,\@function,3
+.align 32
+poly1305_emit_base2_44:
+ mov 0($ctx),%r8 # load hash value
+ mov 8($ctx),%r9
+ mov 16($ctx),%r10
+
+ mov %r9,%rax
+ shr \$20,%r9
+ shl \$44,%rax
+ mov %r10,%rcx
+ shr \$40,%r10
+ shl \$24,%rcx
+
+ add %rax,%r8
+ adc %rcx,%r9
+ adc \$0,%r10
+
+ mov %r8,%rax
+ add \$5,%r8 # compare to modulus
+ mov %r9,%rcx
+ adc \$0,%r9
+ adc \$0,%r10
+ shr \$2,%r10 # did 130-bit value overflow?
+ cmovnz %r8,%rax
+ cmovnz %r9,%rcx
+
+ add 0($nonce),%rax # accumulate nonce
+ adc 8($nonce),%rcx
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+ ret
+.size poly1305_emit_base2_44,.-poly1305_emit_base2_44
+___
+} } }
+$code.=<<___;
+.align 64
+.Lconst:
+.Lmask24:
+.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
+.L129:
+.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
+.Lmask26:
+.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
+.Lpermd_avx2:
+.long 2,2,2,3,2,0,2,1
+.Lpermd_avx512:
+.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
+
+.L2_44_inp_permd:
+.long 0,1,1,2,2,3,7,7
+.L2_44_inp_shift:
+.quad 0,12,24,64
+.L2_44_mask:
+.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
+.L2_44_shift_rgt:
+.quad 44,44,42,64
+.L2_44_shift_lft:
+.quad 8,8,10,64
+
+.align 64
+.Lx_mask44:
+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.Lx_mask42:
+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+___
+}
+$code.=<<___;
+.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align 16
+___
+
+{ # chacha20-poly1305 helpers
+my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order
+$code.=<<___;
+.globl xor128_encrypt_n_pad
+.type xor128_encrypt_n_pad,\@abi-omnipotent
+.align 16
+xor128_encrypt_n_pad:
+ sub $otp,$inp
+ sub $otp,$out
+ mov $len,%r10 # put len aside
+ shr \$4,$len # len / 16
+ jz .Ltail_enc
+ nop
+.Loop_enc_xmm:
+ movdqu ($inp,$otp),%xmm0
+ pxor ($otp),%xmm0
+ movdqu %xmm0,($out,$otp)
+ movdqa %xmm0,($otp)
+ lea 16($otp),$otp
+ dec $len
+ jnz .Loop_enc_xmm
+
+ and \$15,%r10 # len % 16
+ jz .Ldone_enc
+
+.Ltail_enc:
+ mov \$16,$len
+ sub %r10,$len
+ xor %eax,%eax
+.Loop_enc_byte:
+ mov ($inp,$otp),%al
+ xor ($otp),%al
+ mov %al,($out,$otp)
+ mov %al,($otp)
+ lea 1($otp),$otp
+ dec %r10
+ jnz .Loop_enc_byte
+
+ xor %eax,%eax
+.Loop_enc_pad:
+ mov %al,($otp)
+ lea 1($otp),$otp
+ dec $len
+ jnz .Loop_enc_pad
+
+.Ldone_enc:
+ mov $otp,%rax
+ ret
+.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+
+.globl xor128_decrypt_n_pad
+.type xor128_decrypt_n_pad,\@abi-omnipotent
+.align 16
+xor128_decrypt_n_pad:
+ sub $otp,$inp
+ sub $otp,$out
+ mov $len,%r10 # put len aside
+ shr \$4,$len # len / 16
+ jz .Ltail_dec
+ nop
+.Loop_dec_xmm:
+ movdqu ($inp,$otp),%xmm0
+ movdqa ($otp),%xmm1
+ pxor %xmm0,%xmm1
+ movdqu %xmm1,($out,$otp)
+ movdqa %xmm0,($otp)
+ lea 16($otp),$otp
+ dec $len
+ jnz .Loop_dec_xmm
+
+ pxor %xmm1,%xmm1
+ and \$15,%r10 # len % 16
+ jz .Ldone_dec
+
+.Ltail_dec:
+ mov \$16,$len
+ sub %r10,$len
+ xor %eax,%eax
+ xor %r11,%r11
+.Loop_dec_byte:
+ mov ($inp,$otp),%r11b
+ mov ($otp),%al
+ xor %r11b,%al
+ mov %al,($out,$otp)
+ mov %r11b,($otp)
+ lea 1($otp),$otp
+ dec %r10
+ jnz .Loop_dec_byte
+
+ xor %eax,%eax
+.Loop_dec_pad:
+ mov %al,($otp)
+ lea 1($otp),$otp
+ dec $len
+ jnz .Loop_dec_pad
+
+.Ldone_dec:
+ mov $otp,%rax
+ ret
+.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
+___
+}
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+.type se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ mov 8($disp),%rsi # disp->ImageBase
+ mov 56($disp),%r11 # disp->HandlerData
+
+ mov 0(%r11),%r10d # HandlerData[0]
+ lea (%rsi,%r10),%r10 # prologue label
+ cmp %r10,%rbx # context->Rip<.Lprologue
+ jb .Lcommon_seh_tail
+
+ mov 152($context),%rax # pull context->Rsp
+
+ mov 4(%r11),%r10d # HandlerData[1]
+ lea (%rsi,%r10),%r10 # epilogue label
+ cmp %r10,%rbx # context->Rip>=.Lepilogue
+ jae .Lcommon_seh_tail
+
+ lea 48(%rax),%rax
+
+ mov -8(%rax),%rbx
+ mov -16(%rax),%rbp
+ mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
+ mov -48(%rax),%r15
+ mov %rbx,144($context) # restore context->Rbx
+ mov %rbp,160($context) # restore context->Rbp
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R14
+
+ jmp .Lcommon_seh_tail
+.size se_handler,.-se_handler
+
+.type avx_handler,\@abi-omnipotent
+.align 16
+avx_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ mov 8($disp),%rsi # disp->ImageBase
+ mov 56($disp),%r11 # disp->HandlerData
+
+ mov 0(%r11),%r10d # HandlerData[0]
+ lea (%rsi,%r10),%r10 # prologue label
+ cmp %r10,%rbx # context->Rip<prologue label
+ jb .Lcommon_seh_tail
+
+ mov 152($context),%rax # pull context->Rsp
+
+ mov 4(%r11),%r10d # HandlerData[1]
+ lea (%rsi,%r10),%r10 # epilogue label
+ cmp %r10,%rbx # context->Rip>=epilogue label
+ jae .Lcommon_seh_tail
+
+ mov 208($context),%rax # pull context->R11
+
+ lea 0x50(%rax),%rsi
+ lea 0xf8(%rax),%rax
+ lea 512($context),%rdi # &context.Xmm6
+ mov \$20,%ecx
+ .long 0xa548f3fc # cld; rep movsq
+
+.Lcommon_seh_tail:
+ mov 8(%rax),%rdi
+ mov 16(%rax),%rsi
+ mov %rax,152($context) # restore context->Rsp
+ mov %rsi,168($context) # restore context->Rsi
+ mov %rdi,176($context) # restore context->Rdi
+
+ mov 40($disp),%rdi # disp->ContextRecord
+ mov $context,%rsi # context
+ mov \$154,%ecx # sizeof(CONTEXT)
+ .long 0xa548f3fc # cld; rep movsq
+
+ mov $disp,%rsi
+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
+ mov 40(%rsi),%r10 # disp->ContextRecord
+ lea 56(%rsi),%r11 # &disp->HandlerData
+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
+ mov %r10,32(%rsp) # arg5
+ mov %r11,40(%rsp) # arg6
+ mov %r12,48(%rsp) # arg7
+ mov %rcx,56(%rsp) # arg8, (NULL)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ mov \$1,%eax # ExceptionContinueSearch
+ add \$64,%rsp
+ popfq
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ pop %rdi
+ pop %rsi
+ ret
+.size avx_handler,.-avx_handler
+
+.section .pdata
+.align 4
+ .rva .LSEH_begin_poly1305_init
+ .rva .LSEH_end_poly1305_init
+ .rva .LSEH_info_poly1305_init
+
+ .rva .LSEH_begin_poly1305_blocks
+ .rva .LSEH_end_poly1305_blocks
+ .rva .LSEH_info_poly1305_blocks
+
+ .rva .LSEH_begin_poly1305_emit
+ .rva .LSEH_end_poly1305_emit
+ .rva .LSEH_info_poly1305_emit
+___
+$code.=<<___ if ($avx);
+ .rva .LSEH_begin_poly1305_blocks_avx
+ .rva .Lbase2_64_avx
+ .rva .LSEH_info_poly1305_blocks_avx_1
+
+ .rva .Lbase2_64_avx
+ .rva .Leven_avx
+ .rva .LSEH_info_poly1305_blocks_avx_2
+
+ .rva .Leven_avx
+ .rva .LSEH_end_poly1305_blocks_avx
+ .rva .LSEH_info_poly1305_blocks_avx_3
+
+ .rva .LSEH_begin_poly1305_emit_avx
+ .rva .LSEH_end_poly1305_emit_avx
+ .rva .LSEH_info_poly1305_emit_avx
+___
+$code.=<<___ if ($avx>1);
+ .rva .LSEH_begin_poly1305_blocks_avx2
+ .rva .Lbase2_64_avx2
+ .rva .LSEH_info_poly1305_blocks_avx2_1
+
+ .rva .Lbase2_64_avx2
+ .rva .Leven_avx2
+ .rva .LSEH_info_poly1305_blocks_avx2_2
+
+ .rva .Leven_avx2
+ .rva .LSEH_end_poly1305_blocks_avx2
+ .rva .LSEH_info_poly1305_blocks_avx2_3
+___
+$code.=<<___ if ($avx>2);
+ .rva .LSEH_begin_poly1305_blocks_avx512
+ .rva .LSEH_end_poly1305_blocks_avx512
+ .rva .LSEH_info_poly1305_blocks_avx512
+___
+$code.=<<___;
+.section .xdata
+.align 8
+.LSEH_info_poly1305_init:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init
+
+.LSEH_info_poly1305_blocks:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .Lblocks_body,.Lblocks_epilogue
+
+.LSEH_info_poly1305_emit:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit
+___
+$code.=<<___ if ($avx);
+.LSEH_info_poly1305_blocks_avx_1:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx_2:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx_3:
+ .byte 9,0,0,0
+ .rva avx_handler
+ .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[]
+
+.LSEH_info_poly1305_emit_avx:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_poly1305_blocks_avx2_1:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx2_2:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx2_3:
+ .byte 9,0,0,0
+ .rva avx_handler
+ .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[]
+___
+$code.=<<___ if ($avx>2);
+.LSEH_info_poly1305_blocks_avx512:
+ .byte 9,0,0,0
+ .rva avx_handler
+ .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[]
+___
+}
+
+foreach (split('\n',$code)) {
+ s/\`([^\`]*)\`/eval($1)/ge;
+ s/%r([a-z]+)#d/%e$1/g;
+ s/%r([0-9]+)#d/%r$1d/g;
+ s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g;
+
+ print $_,"\n";
+}
+close STDOUT;
--
2.18.2
From cc9daa3df108633221ac63d297bb8cd45057d3eb Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 5 Jan 2020 22:40:48 -0500
Subject: [PATCH 043/100] crypto: x86/poly1305 - wire up faster implementations
for kernel
commit d7d7b853566254648df59f7ea27ea05952a6cfa8 upstream.
These x86_64 vectorized implementations support AVX, AVX-2, and AVX512F.
The AVX-512F implementation is disabled on Skylake, due to throttling,
but it is quite fast on >= Cannonlake.
On the left is cycle counts on a Core i7 6700HQ using the AVX-2
codepath, comparing this implementation ("new") to the implementation in
the current crypto api ("old"). On the right are benchmarks on a Xeon
Gold 5120 using the AVX-512 codepath. The new implementation is faster
on all benchmarks.
AVX-2 AVX-512
--------- -----------
size old new size old new
---- ---- ---- ---- ---- ----
0 70 68 0 74 70
16 92 90 16 96 92
32 134 104 32 136 106
48 172 120 48 184 124
64 218 136 64 218 138
80 254 158 80 260 160
96 298 174 96 300 176
112 342 192 112 342 194
128 388 212 128 384 212
144 428 228 144 420 226
160 466 246 160 464 248
176 510 264 176 504 264
192 550 282 192 544 282
208 594 302 208 582 300
224 628 316 224 624 318
240 676 334 240 662 338
256 716 354 256 708 358
272 764 374 272 748 372
288 802 352 288 788 358
304 420 366 304 422 370
320 428 360 320 432 364
336 484 378 336 486 380
352 426 384 352 434 390
368 478 400 368 480 408
384 488 394 384 490 398
400 542 408 400 542 412
416 486 416 416 492 426
432 534 430 432 538 436
448 544 422 448 546 432
464 600 438 464 600 448
480 540 448 480 548 456
496 594 464 496 594 476
512 602 456 512 606 470
528 656 476 528 656 480
544 600 480 544 606 498
560 650 494 560 652 512
576 664 490 576 662 508
592 714 508 592 716 522
608 656 514 608 664 538
624 708 532 624 710 552
640 716 524 640 720 516
656 770 536 656 772 526
672 716 548 672 722 544
688 770 562 688 768 556
704 774 552 704 778 556
720 826 568 720 832 568
736 768 574 736 780 584
752 822 592 752 826 600
768 830 584 768 836 560
784 884 602 784 888 572
800 828 610 800 838 588
816 884 628 816 884 604
832 888 618 832 894 598
848 942 632 848 946 612
864 884 644 864 896 628
880 936 660 880 942 644
896 948 652 896 952 608
912 1000 664 912 1004 616
928 942 676 928 954 634
944 994 690 944 1000 646
960 1002 680 960 1008 646
976 1054 694 976 1062 658
992 1002 706 992 1012 674
1008 1052 720 1008 1058 690
This commit wires in the prior implementation from Andy, and makes the
following changes to be suitable for kernel land.
- Some cosmetic and structural changes, like renaming labels to
.Lname, constants, and other Linux conventions, as well as making
the code easy for us to maintain moving forward.
- CPU feature checking is done in C by the glue code.
- We avoid jumping into the middle of functions, to appease objtool,
and instead parameterize shared code.
- We maintain frame pointers so that stack traces make sense.
- We remove the dependency on the perl xlate code, which transforms
the output into things that assemblers we don't care about use.
Importantly, none of our changes affect the arithmetic or core code, but
just involve the differing environment of kernel space.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/.gitignore | 1 +
arch/x86/crypto/Makefile | 11 +-
arch/x86/crypto/poly1305-avx2-x86_64.S | 390 ----------
arch/x86/crypto/poly1305-sse2-x86_64.S | 590 ---------------
arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 682 ++++++++++--------
arch/x86/crypto/poly1305_glue.c | 473 +++++-------
lib/crypto/Kconfig | 2 +-
7 files changed, 572 insertions(+), 1577 deletions(-)
create mode 100644 arch/x86/crypto/.gitignore
delete mode 100644 arch/x86/crypto/poly1305-avx2-x86_64.S
delete mode 100644 arch/x86/crypto/poly1305-sse2-x86_64.S
diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore
new file mode 100644
index 000000000000..c406ea6571fa
--- /dev/null
+++ b/arch/x86/crypto/.gitignore
@@ -0,0 +1 @@
+poly1305-x86_64.S
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 958440eae27e..b69e00bf20b8 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -73,6 +73,10 @@ aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
+ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),)
+targets += poly1305-x86_64-cryptogams.S
+endif
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
@@ -101,10 +105,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
-poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
ifeq ($(avx2_supported),yes)
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
-poly1305-x86_64-y += poly1305-avx2-x86_64.o
endif
ifeq ($(sha1_ni_supported),yes)
sha1-ssse3-y += sha1_ni_asm.o
@@ -118,3 +120,8 @@ sha256-ssse3-y += sha256_ni_asm.o
endif
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $< > $@
+$(obj)/%.S: $(src)/%.pl FORCE
+ $(call if_changed,perlasm)
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
deleted file mode 100644
index 1688fb551070..000000000000
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ /dev/null
@@ -1,390 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst32.ANMASK, "aM", @progbits, 32
-.align 32
-ANMASK: .octa 0x0000000003ffffff0000000003ffffff
- .octa 0x0000000003ffffff0000000003ffffff
-
-.section .rodata.cst32.ORMASK, "aM", @progbits, 32
-.align 32
-ORMASK: .octa 0x00000000010000000000000001000000
- .octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define w0 0x18(%r8)
-#define w1 0x1c(%r8)
-#define w2 0x20(%r8)
-#define w3 0x24(%r8)
-#define w4 0x28(%r8)
-#define y0 0x30(%r8)
-#define y1 0x34(%r8)
-#define y2 0x38(%r8)
-#define y3 0x3c(%r8)
-#define y4 0x40(%r8)
-#define m %rsi
-#define hc0 %ymm0
-#define hc1 %ymm1
-#define hc2 %ymm2
-#define hc3 %ymm3
-#define hc4 %ymm4
-#define hc0x %xmm0
-#define hc1x %xmm1
-#define hc2x %xmm2
-#define hc3x %xmm3
-#define hc4x %xmm4
-#define t1 %ymm5
-#define t2 %ymm6
-#define t1x %xmm5
-#define t2x %xmm6
-#define ruwy0 %ymm7
-#define ruwy1 %ymm8
-#define ruwy2 %ymm9
-#define ruwy3 %ymm10
-#define ruwy4 %ymm11
-#define ruwy0x %xmm7
-#define ruwy1x %xmm8
-#define ruwy2x %xmm9
-#define ruwy3x %xmm10
-#define ruwy4x %xmm11
-#define svxz1 %ymm12
-#define svxz2 %ymm13
-#define svxz3 %ymm14
-#define svxz4 %ymm15
-#define d0 %r9
-#define d1 %r10
-#define d2 %r11
-#define d3 %r12
-#define d4 %r13
-
-ENTRY(poly1305_4block_avx2)
- # %rdi: Accumulator h[5]
- # %rsi: 64 byte input block m
- # %rdx: Poly1305 key r[5]
- # %rcx: Quadblock count
- # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
-
- # This four-block variant uses loop unrolled block processing. It
- # requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
- # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
-
- vzeroupper
- push %rbx
- push %r12
- push %r13
-
- # combine r0,u0,w0,y0
- vmovd y0,ruwy0x
- vmovd w0,t1x
- vpunpcklqdq t1,ruwy0,ruwy0
- vmovd u0,t1x
- vmovd r0,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy0,ruwy0
-
- # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
- vmovd y1,ruwy1x
- vmovd w1,t1x
- vpunpcklqdq t1,ruwy1,ruwy1
- vmovd u1,t1x
- vmovd r1,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy1,ruwy1
- vpslld $2,ruwy1,svxz1
- vpaddd ruwy1,svxz1,svxz1
-
- # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
- vmovd y2,ruwy2x
- vmovd w2,t1x
- vpunpcklqdq t1,ruwy2,ruwy2
- vmovd u2,t1x
- vmovd r2,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy2,ruwy2
- vpslld $2,ruwy2,svxz2
- vpaddd ruwy2,svxz2,svxz2
-
- # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
- vmovd y3,ruwy3x
- vmovd w3,t1x
- vpunpcklqdq t1,ruwy3,ruwy3
- vmovd u3,t1x
- vmovd r3,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy3,ruwy3
- vpslld $2,ruwy3,svxz3
- vpaddd ruwy3,svxz3,svxz3
-
- # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
- vmovd y4,ruwy4x
- vmovd w4,t1x
- vpunpcklqdq t1,ruwy4,ruwy4
- vmovd u4,t1x
- vmovd r4,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy4,ruwy4
- vpslld $2,ruwy4,svxz4
- vpaddd ruwy4,svxz4,svxz4
-
-.Ldoblock4:
- # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
- # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
- vmovd 0x00(m),hc0x
- vmovd 0x10(m),t1x
- vpunpcklqdq t1,hc0,hc0
- vmovd 0x20(m),t1x
- vmovd 0x30(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc0,hc0
- vpand ANMASK(%rip),hc0,hc0
- vmovd h0,t1x
- vpaddd t1,hc0,hc0
- # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
- # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
- vmovd 0x03(m),hc1x
- vmovd 0x13(m),t1x
- vpunpcklqdq t1,hc1,hc1
- vmovd 0x23(m),t1x
- vmovd 0x33(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc1,hc1
- vpsrld $2,hc1,hc1
- vpand ANMASK(%rip),hc1,hc1
- vmovd h1,t1x
- vpaddd t1,hc1,hc1
- # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
- # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
- vmovd 0x06(m),hc2x
- vmovd 0x16(m),t1x
- vpunpcklqdq t1,hc2,hc2
- vmovd 0x26(m),t1x
- vmovd 0x36(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc2,hc2
- vpsrld $4,hc2,hc2
- vpand ANMASK(%rip),hc2,hc2
- vmovd h2,t1x
- vpaddd t1,hc2,hc2
- # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
- # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
- vmovd 0x09(m),hc3x
- vmovd 0x19(m),t1x
- vpunpcklqdq t1,hc3,hc3
- vmovd 0x29(m),t1x
- vmovd 0x39(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc3,hc3
- vpsrld $6,hc3,hc3
- vpand ANMASK(%rip),hc3,hc3
- vmovd h3,t1x
- vpaddd t1,hc3,hc3
- # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
- # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
- vmovd 0x0c(m),hc4x
- vmovd 0x1c(m),t1x
- vpunpcklqdq t1,hc4,hc4
- vmovd 0x2c(m),t1x
- vmovd 0x3c(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc4,hc4
- vpsrld $8,hc4,hc4
- vpor ORMASK(%rip),hc4,hc4
- vmovd h4,t1x
- vpaddd t1,hc4,hc4
-
- # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
- vpmuludq hc0,ruwy0,t1
- # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
- vpmuludq hc1,svxz4,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
- vpmuludq hc2,svxz3,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
- vpmuludq hc3,svxz2,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
- vpmuludq hc4,svxz1,t2
- vpaddq t2,t1,t1
- # d0 = t1[0] + t1[1] + t[2] + t[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d0
-
- # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
- vpmuludq hc0,ruwy1,t1
- # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
- vpmuludq hc1,ruwy0,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
- vpmuludq hc2,svxz4,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
- vpmuludq hc3,svxz3,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
- vpmuludq hc4,svxz2,t2
- vpaddq t2,t1,t1
- # d1 = t1[0] + t1[1] + t1[3] + t1[4]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d1
-
- # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
- vpmuludq hc0,ruwy2,t1
- # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
- vpmuludq hc1,ruwy1,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
- vpmuludq hc2,ruwy0,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
- vpmuludq hc3,svxz4,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
- vpmuludq hc4,svxz3,t2
- vpaddq t2,t1,t1
- # d2 = t1[0] + t1[1] + t1[2] + t1[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d2
-
- # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
- vpmuludq hc0,ruwy3,t1
- # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
- vpmuludq hc1,ruwy2,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
- vpmuludq hc2,ruwy1,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
- vpmuludq hc3,ruwy0,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
- vpmuludq hc4,svxz4,t2
- vpaddq t2,t1,t1
- # d3 = t1[0] + t1[1] + t1[2] + t1[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d3
-
- # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
- vpmuludq hc0,ruwy4,t1
- # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
- vpmuludq hc1,ruwy3,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
- vpmuludq hc2,ruwy2,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
- vpmuludq hc3,ruwy1,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
- vpmuludq hc4,ruwy0,t2
- vpaddq t2,t1,t1
- # d4 = t1[0] + t1[1] + t1[2] + t1[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d4
-
- # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
- # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
- # amount. Careful: we must not assume the carry bits 'd0 >> 26',
- # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
- # integers. It's true in a single-block implementation, but not here.
-
- # d1 += d0 >> 26
- mov d0,%rax
- shr $26,%rax
- add %rax,d1
- # h0 = d0 & 0x3ffffff
- mov d0,%rbx
- and $0x3ffffff,%ebx
-
- # d2 += d1 >> 26
- mov d1,%rax
- shr $26,%rax
- add %rax,d2
- # h1 = d1 & 0x3ffffff
- mov d1,%rax
- and $0x3ffffff,%eax
- mov %eax,h1
-
- # d3 += d2 >> 26
- mov d2,%rax
- shr $26,%rax
- add %rax,d3
- # h2 = d2 & 0x3ffffff
- mov d2,%rax
- and $0x3ffffff,%eax
- mov %eax,h2
-
- # d4 += d3 >> 26
- mov d3,%rax
- shr $26,%rax
- add %rax,d4
- # h3 = d3 & 0x3ffffff
- mov d3,%rax
- and $0x3ffffff,%eax
- mov %eax,h3
-
- # h0 += (d4 >> 26) * 5
- mov d4,%rax
- shr $26,%rax
- lea (%rax,%rax,4),%rax
- add %rax,%rbx
- # h4 = d4 & 0x3ffffff
- mov d4,%rax
- and $0x3ffffff,%eax
- mov %eax,h4
-
- # h1 += h0 >> 26
- mov %rbx,%rax
- shr $26,%rax
- add %eax,h1
- # h0 = h0 & 0x3ffffff
- andl $0x3ffffff,%ebx
- mov %ebx,h0
-
- add $0x40,m
- dec %rcx
- jnz .Ldoblock4
-
- vzeroupper
- pop %r13
- pop %r12
- pop %rbx
- ret
-ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
deleted file mode 100644
index 5578f846e622..000000000000
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ /dev/null
@@ -1,590 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst16.ANMASK, "aM", @progbits, 16
-.align 16
-ANMASK: .octa 0x0000000003ffffff0000000003ffffff
-
-.section .rodata.cst16.ORMASK, "aM", @progbits, 16
-.align 16
-ORMASK: .octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define s1 0x00(%rsp)
-#define s2 0x04(%rsp)
-#define s3 0x08(%rsp)
-#define s4 0x0c(%rsp)
-#define m %rsi
-#define h01 %xmm0
-#define h23 %xmm1
-#define h44 %xmm2
-#define t1 %xmm3
-#define t2 %xmm4
-#define t3 %xmm5
-#define t4 %xmm6
-#define mask %xmm7
-#define d0 %r8
-#define d1 %r9
-#define d2 %r10
-#define d3 %r11
-#define d4 %r12
-
-ENTRY(poly1305_block_sse2)
- # %rdi: Accumulator h[5]
- # %rsi: 16 byte input block m
- # %rdx: Poly1305 key r[5]
- # %rcx: Block count
-
- # This single block variant tries to improve performance by doing two
- # multiplications in parallel using SSE instructions. There is quite
- # some quardword packing involved, hence the speedup is marginal.
-
- push %rbx
- push %r12
- sub $0x10,%rsp
-
- # s1..s4 = r1..r4 * 5
- mov r1,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s1
- mov r2,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s2
- mov r3,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s3
- mov r4,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s4
-
- movdqa ANMASK(%rip),mask
-
-.Ldoblock:
- # h01 = [0, h1, 0, h0]
- # h23 = [0, h3, 0, h2]
- # h44 = [0, h4, 0, h4]
- movd h0,h01
- movd h1,t1
- movd h2,h23
- movd h3,t2
- movd h4,h44
- punpcklqdq t1,h01
- punpcklqdq t2,h23
- punpcklqdq h44,h44
-
- # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
- movd 0x00(m),t1
- movd 0x03(m),t2
- psrld $2,t2
- punpcklqdq t2,t1
- pand mask,t1
- paddd t1,h01
- # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
- movd 0x06(m),t1
- movd 0x09(m),t2
- psrld $4,t1
- psrld $6,t2
- punpcklqdq t2,t1
- pand mask,t1
- paddd t1,h23
- # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
- mov 0x0c(m),%eax
- shr $8,%eax
- or $0x01000000,%eax
- movd %eax,t1
- pshufd $0xc4,t1,t1
- paddd t1,h44
-
- # t1[0] = h0 * r0 + h2 * s3
- # t1[1] = h1 * s4 + h3 * s2
- movd r0,t1
- movd s4,t2
- punpcklqdq t2,t1
- pmuludq h01,t1
- movd s3,t2
- movd s2,t3
- punpcklqdq t3,t2
- pmuludq h23,t2
- paddq t2,t1
- # t2[0] = h0 * r1 + h2 * s4
- # t2[1] = h1 * r0 + h3 * s3
- movd r1,t2
- movd r0,t3
- punpcklqdq t3,t2
- pmuludq h01,t2
- movd s4,t3
- movd s3,t4
- punpcklqdq t4,t3
- pmuludq h23,t3
- paddq t3,t2
- # t3[0] = h4 * s1
- # t3[1] = h4 * s2
- movd s1,t3
- movd s2,t4
- punpcklqdq t4,t3
- pmuludq h44,t3
- # d0 = t1[0] + t1[1] + t3[0]
- # d1 = t2[0] + t2[1] + t3[1]
- movdqa t1,t4
- punpcklqdq t2,t4
- punpckhqdq t2,t1
- paddq t4,t1
- paddq t3,t1
- movq t1,d0
- psrldq $8,t1
- movq t1,d1
-
- # t1[0] = h0 * r2 + h2 * r0
- # t1[1] = h1 * r1 + h3 * s4
- movd r2,t1
- movd r1,t2
- punpcklqdq t2,t1
- pmuludq h01,t1
- movd r0,t2
- movd s4,t3
- punpcklqdq t3,t2
- pmuludq h23,t2
- paddq t2,t1
- # t2[0] = h0 * r3 + h2 * r1
- # t2[1] = h1 * r2 + h3 * r0
- movd r3,t2
- movd r2,t3
- punpcklqdq t3,t2
- pmuludq h01,t2
- movd r1,t3
- movd r0,t4
- punpcklqdq t4,t3
- pmuludq h23,t3
- paddq t3,t2
- # t3[0] = h4 * s3
- # t3[1] = h4 * s4
- movd s3,t3
- movd s4,t4
- punpcklqdq t4,t3
- pmuludq h44,t3
- # d2 = t1[0] + t1[1] + t3[0]
- # d3 = t2[0] + t2[1] + t3[1]
- movdqa t1,t4
- punpcklqdq t2,t4
- punpckhqdq t2,t1
- paddq t4,t1
- paddq t3,t1
- movq t1,d2
- psrldq $8,t1
- movq t1,d3
-
- # t1[0] = h0 * r4 + h2 * r2
- # t1[1] = h1 * r3 + h3 * r1
- movd r4,t1
- movd r3,t2
- punpcklqdq t2,t1
- pmuludq h01,t1
- movd r2,t2
- movd r1,t3
- punpcklqdq t3,t2
- pmuludq h23,t2
- paddq t2,t1
- # t3[0] = h4 * r0
- movd r0,t3
- pmuludq h44,t3
- # d4 = t1[0] + t1[1] + t3[0]
- movdqa t1,t4
- psrldq $8,t4
- paddq t4,t1
- paddq t3,t1
- movq t1,d4
-
- # d1 += d0 >> 26
- mov d0,%rax
- shr $26,%rax
- add %rax,d1
- # h0 = d0 & 0x3ffffff
- mov d0,%rbx
- and $0x3ffffff,%ebx
-
- # d2 += d1 >> 26
- mov d1,%rax
- shr $26,%rax
- add %rax,d2
- # h1 = d1 & 0x3ffffff
- mov d1,%rax
- and $0x3ffffff,%eax
- mov %eax,h1
-
- # d3 += d2 >> 26
- mov d2,%rax
- shr $26,%rax
- add %rax,d3
- # h2 = d2 & 0x3ffffff
- mov d2,%rax
- and $0x3ffffff,%eax
- mov %eax,h2
-
- # d4 += d3 >> 26
- mov d3,%rax
- shr $26,%rax
- add %rax,d4
- # h3 = d3 & 0x3ffffff
- mov d3,%rax
- and $0x3ffffff,%eax
- mov %eax,h3
-
- # h0 += (d4 >> 26) * 5
- mov d4,%rax
- shr $26,%rax
- lea (%rax,%rax,4),%rax
- add %rax,%rbx
- # h4 = d4 & 0x3ffffff
- mov d4,%rax
- and $0x3ffffff,%eax
- mov %eax,h4
-
- # h1 += h0 >> 26
- mov %rbx,%rax
- shr $26,%rax
- add %eax,h1
- # h0 = h0 & 0x3ffffff
- andl $0x3ffffff,%ebx
- mov %ebx,h0
-
- add $0x10,m
- dec %rcx
- jnz .Ldoblock
-
- # Zeroing of key material
- mov %rcx,0x00(%rsp)
- mov %rcx,0x08(%rsp)
-
- add $0x10,%rsp
- pop %r12
- pop %rbx
- ret
-ENDPROC(poly1305_block_sse2)
-
-
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define hc0 %xmm0
-#define hc1 %xmm1
-#define hc2 %xmm2
-#define hc3 %xmm5
-#define hc4 %xmm6
-#define ru0 %xmm7
-#define ru1 %xmm8
-#define ru2 %xmm9
-#define ru3 %xmm10
-#define ru4 %xmm11
-#define sv1 %xmm12
-#define sv2 %xmm13
-#define sv3 %xmm14
-#define sv4 %xmm15
-#undef d0
-#define d0 %r13
-
-ENTRY(poly1305_2block_sse2)
- # %rdi: Accumulator h[5]
- # %rsi: 16 byte input block m
- # %rdx: Poly1305 key r[5]
- # %rcx: Doubleblock count
- # %r8: Poly1305 derived key r^2 u[5]
-
- # This two-block variant further improves performance by using loop
- # unrolled block processing. This is more straight forward and does
- # less byte shuffling, but requires a second Poly1305 key r^2:
- # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r
-
- push %rbx
- push %r12
- push %r13
-
- # combine r0,u0
- movd u0,ru0
- movd r0,t1
- punpcklqdq t1,ru0
-
- # combine r1,u1 and s1=r1*5,v1=u1*5
- movd u1,ru1
- movd r1,t1
- punpcklqdq t1,ru1
- movdqa ru1,sv1
- pslld $2,sv1
- paddd ru1,sv1
-
- # combine r2,u2 and s2=r2*5,v2=u2*5
- movd u2,ru2
- movd r2,t1
- punpcklqdq t1,ru2
- movdqa ru2,sv2
- pslld $2,sv2
- paddd ru2,sv2
-
- # combine r3,u3 and s3=r3*5,v3=u3*5
- movd u3,ru3
- movd r3,t1
- punpcklqdq t1,ru3
- movdqa ru3,sv3
- pslld $2,sv3
- paddd ru3,sv3
-
- # combine r4,u4 and s4=r4*5,v4=u4*5
- movd u4,ru4
- movd r4,t1
- punpcklqdq t1,ru4
- movdqa ru4,sv4
- pslld $2,sv4
- paddd ru4,sv4
-
-.Ldoblock2:
- # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
- movd 0x00(m),hc0
- movd 0x10(m),t1
- punpcklqdq t1,hc0
- pand ANMASK(%rip),hc0
- movd h0,t1
- paddd t1,hc0
- # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
- movd 0x03(m),hc1
- movd 0x13(m),t1
- punpcklqdq t1,hc1
- psrld $2,hc1
- pand ANMASK(%rip),hc1
- movd h1,t1
- paddd t1,hc1
- # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
- movd 0x06(m),hc2
- movd 0x16(m),t1
- punpcklqdq t1,hc2
- psrld $4,hc2
- pand ANMASK(%rip),hc2
- movd h2,t1
- paddd t1,hc2
- # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
- movd 0x09(m),hc3
- movd 0x19(m),t1
- punpcklqdq t1,hc3
- psrld $6,hc3
- pand ANMASK(%rip),hc3
- movd h3,t1
- paddd t1,hc3
- # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
- movd 0x0c(m),hc4
- movd 0x1c(m),t1
- punpcklqdq t1,hc4
- psrld $8,hc4
- por ORMASK(%rip),hc4
- movd h4,t1
- paddd t1,hc4
-
- # t1 = [ hc0[1] * r0, hc0[0] * u0 ]
- movdqa ru0,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * s4, hc1[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * s3, hc2[0] * v3 ]
- movdqa sv3,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * s2, hc3[0] * v2 ]
- movdqa sv2,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s1, hc4[0] * v1 ]
- movdqa sv1,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d0 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d0
-
- # t1 = [ hc0[1] * r1, hc0[0] * u1 ]
- movdqa ru1,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r0, hc1[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * s4, hc2[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * s3, hc3[0] * v3 ]
- movdqa sv3,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s2, hc4[0] * v2 ]
- movdqa sv2,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d1 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d1
-
- # t1 = [ hc0[1] * r2, hc0[0] * u2 ]
- movdqa ru2,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r1, hc1[0] * u1 ]
- movdqa ru1,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * r0, hc2[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * s4, hc3[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s3, hc4[0] * v3 ]
- movdqa sv3,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d2 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d2
-
- # t1 = [ hc0[1] * r3, hc0[0] * u3 ]
- movdqa ru3,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r2, hc1[0] * u2 ]
- movdqa ru2,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * r1, hc2[0] * u1 ]
- movdqa ru1,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * r0, hc3[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s4, hc4[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d3 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d3
-
- # t1 = [ hc0[1] * r4, hc0[0] * u4 ]
- movdqa ru4,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r3, hc1[0] * u3 ]
- movdqa ru3,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * r2, hc2[0] * u2 ]
- movdqa ru2,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * r1, hc3[0] * u1 ]
- movdqa ru1,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * r0, hc4[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d4 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d4
-
- # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
- # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
- # amount. Careful: we must not assume the carry bits 'd0 >> 26',
- # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
- # integers. It's true in a single-block implementation, but not here.
-
- # d1 += d0 >> 26
- mov d0,%rax
- shr $26,%rax
- add %rax,d1
- # h0 = d0 & 0x3ffffff
- mov d0,%rbx
- and $0x3ffffff,%ebx
-
- # d2 += d1 >> 26
- mov d1,%rax
- shr $26,%rax
- add %rax,d2
- # h1 = d1 & 0x3ffffff
- mov d1,%rax
- and $0x3ffffff,%eax
- mov %eax,h1
-
- # d3 += d2 >> 26
- mov d2,%rax
- shr $26,%rax
- add %rax,d3
- # h2 = d2 & 0x3ffffff
- mov d2,%rax
- and $0x3ffffff,%eax
- mov %eax,h2
-
- # d4 += d3 >> 26
- mov d3,%rax
- shr $26,%rax
- add %rax,d4
- # h3 = d3 & 0x3ffffff
- mov d3,%rax
- and $0x3ffffff,%eax
- mov %eax,h3
-
- # h0 += (d4 >> 26) * 5
- mov d4,%rax
- shr $26,%rax
- lea (%rax,%rax,4),%rax
- add %rax,%rbx
- # h4 = d4 & 0x3ffffff
- mov d4,%rax
- and $0x3ffffff,%eax
- mov %eax,h4
-
- # h1 += h0 >> 26
- mov %rbx,%rax
- shr $26,%rax
- add %eax,h1
- # h0 = h0 & 0x3ffffff
- andl $0x3ffffff,%ebx
- mov %ebx,h0
-
- add $0x20,m
- dec %rcx
- jnz .Ldoblock2
-
- pop %r13
- pop %r12
- pop %rbx
- ret
-ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 342ad7f18aa7..80061bea6b16 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -1,11 +1,14 @@
-#! /usr/bin/env perl
-# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
+# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
+#
+# This code is taken from the OpenSSL project but the author, Andy Polyakov,
+# has relicensed it under the licenses specified in the SPDX header above.
+# The original headers, including the original license headers, are
+# included below for completeness.
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -32,7 +35,7 @@
# Skylake-X system performance. Since we are likely to suppress
# AVX512F capability flag [at least on Skylake-X], conversion serves
# as kind of "investment protection". Note that next *lake processor,
-# Cannolake, has AVX512IFMA code path to execute...
+# Cannonlake, has AVX512IFMA code path to execute...
#
# Numbers are cycles per processed byte with poly1305_blocks alone,
# measured with rdtsc at fixed clock frequency.
@@ -68,39 +71,114 @@ $output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
- $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26);
+$kernel=0; $kernel=1 if (!$flavour && !$output);
+
+if (!$kernel) {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+ die "can't locate x86_64-xlate.pl";
+
+ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+ *STDOUT=*OUT;
+
+ if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+ $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25);
+ }
+
+ if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
+ $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12);
+ $avx += 1 if ($1==2.11 && $2>=8);
+ }
+
+ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+ `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+ $avx = ($1>=10) + ($1>=11);
+ }
+
+ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
+ $avx = ($2>=3.0) + ($2>3.0);
+ }
+} else {
+ $avx = 4; # The kernel uses ifdefs for this.
}
-if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
- $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12);
- $avx += 2 if ($1==2.11 && $2>=8);
+sub declare_function() {
+ my ($name, $align, $nargs) = @_;
+ if($kernel) {
+ $code .= ".align $align\n";
+ $code .= "ENTRY($name)\n";
+ $code .= ".L$name:\n";
+ } else {
+ $code .= ".globl $name\n";
+ $code .= ".type $name,\@function,$nargs\n";
+ $code .= ".align $align\n";
+ $code .= "$name:\n";
+ }
}
-if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
- `ml64 2>&1` =~ /Version ([0-9]+)\./) {
- $avx = ($1>=10) + ($1>=12);
+sub end_function() {
+ my ($name) = @_;
+ if($kernel) {
+ $code .= "ENDPROC($name)\n";
+ } else {
+ $code .= ".size $name,.-$name\n";
+ }
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
- $avx = ($2>=3.0) + ($2>3.0);
-}
+$code.=<<___ if $kernel;
+#include <linux/linkage.h>
+___
+
+if ($avx) {
+$code.=<<___ if $kernel;
+.section .rodata
+___
+$code.=<<___;
+.align 64
+.Lconst:
+.Lmask24:
+.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
+.L129:
+.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
+.Lmask26:
+.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
+.Lpermd_avx2:
+.long 2,2,2,3,2,0,2,1
+.Lpermd_avx512:
+.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
+
+.L2_44_inp_permd:
+.long 0,1,1,2,2,3,7,7
+.L2_44_inp_shift:
+.quad 0,12,24,64
+.L2_44_mask:
+.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
+.L2_44_shift_rgt:
+.quad 44,44,42,64
+.L2_44_shift_lft:
+.quad 8,8,10,64
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT=*OUT;
+.align 64
+.Lx_mask44:
+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.Lx_mask42:
+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+___
+}
+$code.=<<___ if (!$kernel);
+.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align 16
+___
my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx");
my ($mac,$nonce)=($inp,$len); # *_emit arguments
-my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13));
-my ($h0,$h1,$h2)=("%r14","%rbx","%rbp");
+my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13");
+my ($h0,$h1,$h2)=("%r14","%rbx","%r10");
sub poly1305_iteration {
# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1
@@ -155,19 +233,19 @@ ___
$code.=<<___;
.text
-
+___
+$code.=<<___ if (!$kernel);
.extern OPENSSL_ia32cap_P
-.globl poly1305_init
-.hidden poly1305_init
-.globl poly1305_blocks
-.hidden poly1305_blocks
-.globl poly1305_emit
-.hidden poly1305_emit
-
-.type poly1305_init,\@function,3
-.align 32
-poly1305_init:
+.globl poly1305_init_x86_64
+.hidden poly1305_init_x86_64
+.globl poly1305_blocks_x86_64
+.hidden poly1305_blocks_x86_64
+.globl poly1305_emit_x86_64
+.hidden poly1305_emit_x86_64
+___
+&declare_function("poly1305_init_x86_64", 32, 3);
+$code.=<<___;
xor %rax,%rax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
@@ -175,11 +253,12 @@ poly1305_init:
cmp \$0,$inp
je .Lno_key
-
- lea poly1305_blocks(%rip),%r10
- lea poly1305_emit(%rip),%r11
___
-$code.=<<___ if ($avx);
+$code.=<<___ if (!$kernel);
+ lea poly1305_blocks_x86_64(%rip),%r10
+ lea poly1305_emit_x86_64(%rip),%r11
+___
+$code.=<<___ if (!$kernel && $avx);
mov OPENSSL_ia32cap_P+4(%rip),%r9
lea poly1305_blocks_avx(%rip),%rax
lea poly1305_emit_avx(%rip),%rcx
@@ -187,12 +266,12 @@ $code.=<<___ if ($avx);
cmovc %rax,%r10
cmovc %rcx,%r11
___
-$code.=<<___ if ($avx>1);
+$code.=<<___ if (!$kernel && $avx>1);
lea poly1305_blocks_avx2(%rip),%rax
bt \$`5+32`,%r9 # AVX2?
cmovc %rax,%r10
___
-$code.=<<___ if ($avx>3);
+$code.=<<___ if (!$kernel && $avx>3);
mov \$`(1<<31|1<<21|1<<16)`,%rax
shr \$32,%r9
and %rax,%r9
@@ -207,11 +286,11 @@ $code.=<<___;
mov %rax,24($ctx)
mov %rcx,32($ctx)
___
-$code.=<<___ if ($flavour !~ /elf32/);
+$code.=<<___ if (!$kernel && $flavour !~ /elf32/);
mov %r10,0(%rdx)
mov %r11,8(%rdx)
___
-$code.=<<___ if ($flavour =~ /elf32/);
+$code.=<<___ if (!$kernel && $flavour =~ /elf32/);
mov %r10d,0(%rdx)
mov %r11d,4(%rdx)
___
@@ -219,11 +298,11 @@ $code.=<<___;
mov \$1,%eax
.Lno_key:
ret
-.size poly1305_init,.-poly1305_init
+___
+&end_function("poly1305_init_x86_64");
-.type poly1305_blocks,\@function,4
-.align 32
-poly1305_blocks:
+&declare_function("poly1305_blocks_x86_64", 32, 4);
+$code.=<<___;
.cfi_startproc
.Lblocks:
shr \$4,$len
@@ -231,8 +310,6 @@ poly1305_blocks:
push %rbx
.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
@@ -241,6 +318,8 @@ poly1305_blocks:
.cfi_push %r14
push %r15
.cfi_push %r15
+ push $ctx
+.cfi_push $ctx
.Lblocks_body:
mov $len,%r15 # reassign $len
@@ -265,26 +344,29 @@ poly1305_blocks:
lea 16($inp),$inp
adc $padbit,$h2
___
+
&poly1305_iteration();
+
$code.=<<___;
mov $r1,%rax
dec %r15 # len-=16
jnz .Loop
+ mov 0(%rsp),$ctx
+.cfi_restore $ctx
+
mov $h0,0($ctx) # store hash value
mov $h1,8($ctx)
mov $h2,16($ctx)
- mov 0(%rsp),%r15
+ mov 8(%rsp),%r15
.cfi_restore %r15
- mov 8(%rsp),%r14
+ mov 16(%rsp),%r14
.cfi_restore %r14
- mov 16(%rsp),%r13
+ mov 24(%rsp),%r13
.cfi_restore %r13
- mov 24(%rsp),%r12
+ mov 32(%rsp),%r12
.cfi_restore %r12
- mov 32(%rsp),%rbp
-.cfi_restore %rbp
mov 40(%rsp),%rbx
.cfi_restore %rbx
lea 48(%rsp),%rsp
@@ -293,11 +375,11 @@ $code.=<<___;
.Lblocks_epilogue:
ret
.cfi_endproc
-.size poly1305_blocks,.-poly1305_blocks
+___
+&end_function("poly1305_blocks_x86_64");
-.type poly1305_emit,\@function,3
-.align 32
-poly1305_emit:
+&declare_function("poly1305_emit_x86_64", 32, 3);
+$code.=<<___;
.Lemit:
mov 0($ctx),%r8 # load hash value
mov 8($ctx),%r9
@@ -318,10 +400,14 @@ poly1305_emit:
mov %rcx,8($mac)
ret
-.size poly1305_emit,.-poly1305_emit
___
+&end_function("poly1305_emit_x86_64");
if ($avx) {
+if($kernel) {
+ $code .= "#ifdef CONFIG_AS_AVX\n";
+}
+
########################################################################
# Layout of opaque area is following.
#
@@ -342,15 +428,19 @@ $code.=<<___;
.type __poly1305_block,\@abi-omnipotent
.align 32
__poly1305_block:
+ push $ctx
___
&poly1305_iteration();
$code.=<<___;
+ pop $ctx
ret
.size __poly1305_block,.-__poly1305_block
.type __poly1305_init_avx,\@abi-omnipotent
.align 32
__poly1305_init_avx:
+ push %rbp
+ mov %rsp,%rbp
mov $r0,$h0
mov $r1,$h1
xor $h2,$h2
@@ -507,12 +597,13 @@ __poly1305_init_avx:
mov $d1#d,`16*8+8-64`($ctx)
lea -48-64($ctx),$ctx # size [de-]optimization
+ pop %rbp
ret
.size __poly1305_init_avx,.-__poly1305_init_avx
+___
-.type poly1305_blocks_avx,\@function,4
-.align 32
-poly1305_blocks_avx:
+&declare_function("poly1305_blocks_avx", 32, 4);
+$code.=<<___;
.cfi_startproc
mov 20($ctx),%r8d # is_base2_26
cmp \$128,$len
@@ -532,10 +623,11 @@ poly1305_blocks_avx:
test \$31,$len
jz .Leven_avx
- push %rbx
-.cfi_push %rbx
push %rbp
.cfi_push %rbp
+ mov %rsp,%rbp
+ push %rbx
+.cfi_push %rbx
push %r12
.cfi_push %r12
push %r13
@@ -645,20 +737,18 @@ poly1305_blocks_avx:
mov $h2#d,16($ctx)
.align 16
.Ldone_avx:
- mov 0(%rsp),%r15
+ pop %r15
.cfi_restore %r15
- mov 8(%rsp),%r14
+ pop %r14
.cfi_restore %r14
- mov 16(%rsp),%r13
+ pop %r13
.cfi_restore %r13
- mov 24(%rsp),%r12
+ pop %r12
.cfi_restore %r12
- mov 32(%rsp),%rbp
-.cfi_restore %rbp
- mov 40(%rsp),%rbx
+ pop %rbx
.cfi_restore %rbx
- lea 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
+ pop %rbp
+.cfi_restore %rbp
.Lno_data_avx:
.Lblocks_avx_epilogue:
ret
@@ -667,10 +757,11 @@ poly1305_blocks_avx:
.align 32
.Lbase2_64_avx:
.cfi_startproc
- push %rbx
-.cfi_push %rbx
push %rbp
.cfi_push %rbp
+ mov %rsp,%rbp
+ push %rbx
+.cfi_push %rbx
push %r12
.cfi_push %r12
push %r13
@@ -736,22 +827,18 @@ poly1305_blocks_avx:
.Lproceed_avx:
mov %r15,$len
-
- mov 0(%rsp),%r15
+ pop %r15
.cfi_restore %r15
- mov 8(%rsp),%r14
+ pop %r14
.cfi_restore %r14
- mov 16(%rsp),%r13
+ pop %r13
.cfi_restore %r13
- mov 24(%rsp),%r12
+ pop %r12
.cfi_restore %r12
- mov 32(%rsp),%rbp
-.cfi_restore %rbp
- mov 40(%rsp),%rbx
+ pop %rbx
.cfi_restore %rbx
- lea 48(%rsp),%rax
- lea 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
+ pop %rbp
+.cfi_restore %rbp
.Lbase2_64_avx_epilogue:
jmp .Ldo_avx
.cfi_endproc
@@ -768,8 +855,11 @@ poly1305_blocks_avx:
.Ldo_avx:
___
$code.=<<___ if (!$win64);
+ lea 8(%rsp),%r10
+.cfi_def_cfa_register %r10
+ and \$-32,%rsp
+ sub \$-8,%rsp
lea -0x58(%rsp),%r11
-.cfi_def_cfa %r11,0x60
sub \$0x178,%rsp
___
$code.=<<___ if ($win64);
@@ -1361,18 +1451,18 @@ $code.=<<___ if ($win64);
.Ldo_avx_epilogue:
___
$code.=<<___ if (!$win64);
- lea 0x58(%r11),%rsp
-.cfi_def_cfa %rsp,8
+ lea -8(%r10),%rsp
+.cfi_def_cfa_register %rsp
___
$code.=<<___;
vzeroupper
ret
.cfi_endproc
-.size poly1305_blocks_avx,.-poly1305_blocks_avx
+___
+&end_function("poly1305_blocks_avx");
-.type poly1305_emit_avx,\@function,3
-.align 32
-poly1305_emit_avx:
+&declare_function("poly1305_emit_avx", 32, 3);
+$code.=<<___;
cmpl \$0,20($ctx) # is_base2_26?
je .Lemit
@@ -1423,41 +1513,51 @@ poly1305_emit_avx:
mov %rcx,8($mac)
ret
-.size poly1305_emit_avx,.-poly1305_emit_avx
___
+&end_function("poly1305_emit_avx");
+
+if ($kernel) {
+ $code .= "#endif\n";
+}
if ($avx>1) {
+
+if ($kernel) {
+ $code .= "#ifdef CONFIG_AS_AVX2\n";
+}
+
my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
map("%ymm$_",(0..15));
my $S4=$MASK;
+sub poly1305_blocks_avxN {
+ my ($avx512) = @_;
+ my $suffix = $avx512 ? "_avx512" : "";
$code.=<<___;
-.type poly1305_blocks_avx2,\@function,4
-.align 32
-poly1305_blocks_avx2:
.cfi_startproc
mov 20($ctx),%r8d # is_base2_26
cmp \$128,$len
- jae .Lblocks_avx2
+ jae .Lblocks_avx2$suffix
test %r8d,%r8d
jz .Lblocks
-.Lblocks_avx2:
+.Lblocks_avx2$suffix:
and \$-16,$len
- jz .Lno_data_avx2
+ jz .Lno_data_avx2$suffix
vzeroupper
test %r8d,%r8d
- jz .Lbase2_64_avx2
+ jz .Lbase2_64_avx2$suffix
test \$63,$len
- jz .Leven_avx2
+ jz .Leven_avx2$suffix
- push %rbx
-.cfi_push %rbx
push %rbp
.cfi_push %rbp
+ mov %rsp,%rbp
+ push %rbx
+.cfi_push %rbx
push %r12
.cfi_push %r12
push %r13
@@ -1466,7 +1566,7 @@ poly1305_blocks_avx2:
.cfi_push %r14
push %r15
.cfi_push %r15
-.Lblocks_avx2_body:
+.Lblocks_avx2_body$suffix:
mov $len,%r15 # reassign $len
@@ -1513,7 +1613,7 @@ poly1305_blocks_avx2:
shr \$2,$s1
add $r1,$s1 # s1 = r1 + (r1 >> 2)
-.Lbase2_26_pre_avx2:
+.Lbase2_26_pre_avx2$suffix:
add 0($inp),$h0 # accumulate input
adc 8($inp),$h1
lea 16($inp),$inp
@@ -1524,10 +1624,10 @@ poly1305_blocks_avx2:
mov $r1,%rax
test \$63,%r15
- jnz .Lbase2_26_pre_avx2
+ jnz .Lbase2_26_pre_avx2$suffix
test $padbit,$padbit # if $padbit is zero,
- jz .Lstore_base2_64_avx2 # store hash in base 2^64 format
+ jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format
################################# base 2^64 -> base 2^26
mov $h0,%rax
@@ -1548,57 +1648,56 @@ poly1305_blocks_avx2:
or $r1,$h2 # h[4]
test %r15,%r15
- jz .Lstore_base2_26_avx2
+ jz .Lstore_base2_26_avx2$suffix
vmovd %rax#d,%x#$H0
vmovd %rdx#d,%x#$H1
vmovd $h0#d,%x#$H2
vmovd $h1#d,%x#$H3
vmovd $h2#d,%x#$H4
- jmp .Lproceed_avx2
+ jmp .Lproceed_avx2$suffix
.align 32
-.Lstore_base2_64_avx2:
+.Lstore_base2_64_avx2$suffix:
mov $h0,0($ctx)
mov $h1,8($ctx)
mov $h2,16($ctx) # note that is_base2_26 is zeroed
- jmp .Ldone_avx2
+ jmp .Ldone_avx2$suffix
.align 16
-.Lstore_base2_26_avx2:
+.Lstore_base2_26_avx2$suffix:
mov %rax#d,0($ctx) # store hash value base 2^26
mov %rdx#d,4($ctx)
mov $h0#d,8($ctx)
mov $h1#d,12($ctx)
mov $h2#d,16($ctx)
.align 16
-.Ldone_avx2:
- mov 0(%rsp),%r15
+.Ldone_avx2$suffix:
+ pop %r15
.cfi_restore %r15
- mov 8(%rsp),%r14
+ pop %r14
.cfi_restore %r14
- mov 16(%rsp),%r13
+ pop %r13
.cfi_restore %r13
- mov 24(%rsp),%r12
+ pop %r12
.cfi_restore %r12
- mov 32(%rsp),%rbp
-.cfi_restore %rbp
- mov 40(%rsp),%rbx
+ pop %rbx
.cfi_restore %rbx
- lea 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lno_data_avx2:
-.Lblocks_avx2_epilogue:
+ pop %rbp
+.cfi_restore %rbp
+.Lno_data_avx2$suffix:
+.Lblocks_avx2_epilogue$suffix:
ret
.cfi_endproc
.align 32
-.Lbase2_64_avx2:
+.Lbase2_64_avx2$suffix:
.cfi_startproc
- push %rbx
-.cfi_push %rbx
push %rbp
.cfi_push %rbp
+ mov %rsp,%rbp
+ push %rbx
+.cfi_push %rbx
push %r12
.cfi_push %r12
push %r13
@@ -1607,7 +1706,7 @@ poly1305_blocks_avx2:
.cfi_push %r14
push %r15
.cfi_push %r15
-.Lbase2_64_avx2_body:
+.Lbase2_64_avx2_body$suffix:
mov $len,%r15 # reassign $len
@@ -1624,9 +1723,9 @@ poly1305_blocks_avx2:
add $r1,$s1 # s1 = r1 + (r1 >> 2)
test \$63,$len
- jz .Linit_avx2
+ jz .Linit_avx2$suffix
-.Lbase2_64_pre_avx2:
+.Lbase2_64_pre_avx2$suffix:
add 0($inp),$h0 # accumulate input
adc 8($inp),$h1
lea 16($inp),$inp
@@ -1637,9 +1736,9 @@ poly1305_blocks_avx2:
mov $r1,%rax
test \$63,%r15
- jnz .Lbase2_64_pre_avx2
+ jnz .Lbase2_64_pre_avx2$suffix
-.Linit_avx2:
+.Linit_avx2$suffix:
################################# base 2^64 -> base 2^26
mov $h0,%rax
mov $h0,%rdx
@@ -1667,69 +1766,77 @@ poly1305_blocks_avx2:
call __poly1305_init_avx
-.Lproceed_avx2:
+.Lproceed_avx2$suffix:
mov %r15,$len # restore $len
- mov OPENSSL_ia32cap_P+8(%rip),%r10d
+___
+$code.=<<___ if (!$kernel);
+ mov OPENSSL_ia32cap_P+8(%rip),%r9d
mov \$`(1<<31|1<<30|1<<16)`,%r11d
-
- mov 0(%rsp),%r15
+___
+$code.=<<___;
+ pop %r15
.cfi_restore %r15
- mov 8(%rsp),%r14
+ pop %r14
.cfi_restore %r14
- mov 16(%rsp),%r13
+ pop %r13
.cfi_restore %r13
- mov 24(%rsp),%r12
+ pop %r12
.cfi_restore %r12
- mov 32(%rsp),%rbp
-.cfi_restore %rbp
- mov 40(%rsp),%rbx
+ pop %rbx
.cfi_restore %rbx
- lea 48(%rsp),%rax
- lea 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lbase2_64_avx2_epilogue:
- jmp .Ldo_avx2
+ pop %rbp
+.cfi_restore %rbp
+.Lbase2_64_avx2_epilogue$suffix:
+ jmp .Ldo_avx2$suffix
.cfi_endproc
.align 32
-.Leven_avx2:
+.Leven_avx2$suffix:
.cfi_startproc
- mov OPENSSL_ia32cap_P+8(%rip),%r10d
+___
+$code.=<<___ if (!$kernel);
+ mov OPENSSL_ia32cap_P+8(%rip),%r9d
+___
+$code.=<<___;
vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26
vmovd 4*1($ctx),%x#$H1
vmovd 4*2($ctx),%x#$H2
vmovd 4*3($ctx),%x#$H3
vmovd 4*4($ctx),%x#$H4
-.Ldo_avx2:
+.Ldo_avx2$suffix:
___
-$code.=<<___ if ($avx>2);
+$code.=<<___ if (!$kernel && $avx>2);
cmp \$512,$len
jb .Lskip_avx512
- and %r11d,%r10d
- test \$`1<<16`,%r10d # check for AVX512F
+ and %r11d,%r9d
+ test \$`1<<16`,%r9d # check for AVX512F
jnz .Lblocks_avx512
-.Lskip_avx512:
+.Lskip_avx512$suffix:
+___
+$code.=<<___ if ($avx > 2 && $avx512 && $kernel);
+ cmp \$512,$len
+ jae .Lblocks_avx512
___
$code.=<<___ if (!$win64);
- lea -8(%rsp),%r11
-.cfi_def_cfa %r11,16
+ lea 8(%rsp),%r10
+.cfi_def_cfa_register %r10
sub \$0x128,%rsp
___
$code.=<<___ if ($win64);
- lea -0xf8(%rsp),%r11
+ lea 8(%rsp),%r10
sub \$0x1c8,%rsp
- vmovdqa %xmm6,0x50(%r11)
- vmovdqa %xmm7,0x60(%r11)
- vmovdqa %xmm8,0x70(%r11)
- vmovdqa %xmm9,0x80(%r11)
- vmovdqa %xmm10,0x90(%r11)
- vmovdqa %xmm11,0xa0(%r11)
- vmovdqa %xmm12,0xb0(%r11)
- vmovdqa %xmm13,0xc0(%r11)
- vmovdqa %xmm14,0xd0(%r11)
- vmovdqa %xmm15,0xe0(%r11)
-.Ldo_avx2_body:
+ vmovdqa %xmm6,-0xb0(%r10)
+ vmovdqa %xmm7,-0xa0(%r10)
+ vmovdqa %xmm8,-0x90(%r10)
+ vmovdqa %xmm9,-0x80(%r10)
+ vmovdqa %xmm10,-0x70(%r10)
+ vmovdqa %xmm11,-0x60(%r10)
+ vmovdqa %xmm12,-0x50(%r10)
+ vmovdqa %xmm13,-0x40(%r10)
+ vmovdqa %xmm14,-0x30(%r10)
+ vmovdqa %xmm15,-0x20(%r10)
+.Ldo_avx2_body$suffix:
___
$code.=<<___;
lea .Lconst(%rip),%rcx
@@ -1794,11 +1901,11 @@ $code.=<<___;
vpaddq $H2,$T2,$H2 # accumulate input
sub \$64,$len
- jz .Ltail_avx2
- jmp .Loop_avx2
+ jz .Ltail_avx2$suffix
+ jmp .Loop_avx2$suffix
.align 32
-.Loop_avx2:
+.Loop_avx2$suffix:
################################################################
# ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4
# ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3
@@ -1946,10 +2053,10 @@ $code.=<<___;
vpor 32(%rcx),$T4,$T4 # padbit, yes, always
sub \$64,$len
- jnz .Loop_avx2
+ jnz .Loop_avx2$suffix
.byte 0x66,0x90
-.Ltail_avx2:
+.Ltail_avx2$suffix:
################################################################
# while above multiplications were by r^4 in all lanes, in last
# iteration we multiply least significant lane by r^4 and most
@@ -2087,37 +2194,29 @@ $code.=<<___;
vmovd %x#$H4,`4*4-48-64`($ctx)
___
$code.=<<___ if ($win64);
- vmovdqa 0x50(%r11),%xmm6
- vmovdqa 0x60(%r11),%xmm7
- vmovdqa 0x70(%r11),%xmm8
- vmovdqa 0x80(%r11),%xmm9
- vmovdqa 0x90(%r11),%xmm10
- vmovdqa 0xa0(%r11),%xmm11
- vmovdqa 0xb0(%r11),%xmm12
- vmovdqa 0xc0(%r11),%xmm13
- vmovdqa 0xd0(%r11),%xmm14
- vmovdqa 0xe0(%r11),%xmm15
- lea 0xf8(%r11),%rsp
-.Ldo_avx2_epilogue:
+ vmovdqa -0xb0(%r10),%xmm6
+ vmovdqa -0xa0(%r10),%xmm7
+ vmovdqa -0x90(%r10),%xmm8
+ vmovdqa -0x80(%r10),%xmm9
+ vmovdqa -0x70(%r10),%xmm10
+ vmovdqa -0x60(%r10),%xmm11
+ vmovdqa -0x50(%r10),%xmm12
+ vmovdqa -0x40(%r10),%xmm13
+ vmovdqa -0x30(%r10),%xmm14
+ vmovdqa -0x20(%r10),%xmm15
+ lea -8(%r10),%rsp
+.Ldo_avx2_epilogue$suffix:
___
$code.=<<___ if (!$win64);
- lea 8(%r11),%rsp
-.cfi_def_cfa %rsp,8
+ lea -8(%r10),%rsp
+.cfi_def_cfa_register %rsp
___
$code.=<<___;
vzeroupper
ret
.cfi_endproc
-.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
___
-#######################################################################
-if ($avx>2) {
-# On entry we have input length divisible by 64. But since inner loop
-# processes 128 bytes per iteration, cases when length is not divisible
-# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
-# reason stack layout is kept identical to poly1305_blocks_avx2. If not
-# for this tail, we wouldn't have to even allocate stack frame...
-
+if($avx > 2 && $avx512) {
my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24));
my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29));
my $PADBIT="%zmm30";
@@ -2128,32 +2227,29 @@ map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
map(s/%y/%z/,($MASK));
$code.=<<___;
-.type poly1305_blocks_avx512,\@function,4
-.align 32
-poly1305_blocks_avx512:
.cfi_startproc
.Lblocks_avx512:
mov \$15,%eax
kmovw %eax,%k2
___
$code.=<<___ if (!$win64);
- lea -8(%rsp),%r11
-.cfi_def_cfa %r11,16
+ lea 8(%rsp),%r10
+.cfi_def_cfa_register %r10
sub \$0x128,%rsp
___
$code.=<<___ if ($win64);
- lea -0xf8(%rsp),%r11
+ lea 8(%rsp),%r10
sub \$0x1c8,%rsp
- vmovdqa %xmm6,0x50(%r11)
- vmovdqa %xmm7,0x60(%r11)
- vmovdqa %xmm8,0x70(%r11)
- vmovdqa %xmm9,0x80(%r11)
- vmovdqa %xmm10,0x90(%r11)
- vmovdqa %xmm11,0xa0(%r11)
- vmovdqa %xmm12,0xb0(%r11)
- vmovdqa %xmm13,0xc0(%r11)
- vmovdqa %xmm14,0xd0(%r11)
- vmovdqa %xmm15,0xe0(%r11)
+ vmovdqa %xmm6,-0xb0(%r10)
+ vmovdqa %xmm7,-0xa0(%r10)
+ vmovdqa %xmm8,-0x90(%r10)
+ vmovdqa %xmm9,-0x80(%r10)
+ vmovdqa %xmm10,-0x70(%r10)
+ vmovdqa %xmm11,-0x60(%r10)
+ vmovdqa %xmm12,-0x50(%r10)
+ vmovdqa %xmm13,-0x40(%r10)
+ vmovdqa %xmm14,-0x30(%r10)
+ vmovdqa %xmm15,-0x20(%r10)
.Ldo_avx512_body:
___
$code.=<<___;
@@ -2679,7 +2775,7 @@ $code.=<<___;
lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2
add \$64,$len
- jnz .Ltail_avx2
+ jnz .Ltail_avx2$suffix
vpsubq $T2,$H2,$H2 # undo input accumulation
vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
@@ -2690,29 +2786,61 @@ $code.=<<___;
vzeroall
___
$code.=<<___ if ($win64);
- movdqa 0x50(%r11),%xmm6
- movdqa 0x60(%r11),%xmm7
- movdqa 0x70(%r11),%xmm8
- movdqa 0x80(%r11),%xmm9
- movdqa 0x90(%r11),%xmm10
- movdqa 0xa0(%r11),%xmm11
- movdqa 0xb0(%r11),%xmm12
- movdqa 0xc0(%r11),%xmm13
- movdqa 0xd0(%r11),%xmm14
- movdqa 0xe0(%r11),%xmm15
- lea 0xf8(%r11),%rsp
+ movdqa -0xb0(%r10),%xmm6
+ movdqa -0xa0(%r10),%xmm7
+ movdqa -0x90(%r10),%xmm8
+ movdqa -0x80(%r10),%xmm9
+ movdqa -0x70(%r10),%xmm10
+ movdqa -0x60(%r10),%xmm11
+ movdqa -0x50(%r10),%xmm12
+ movdqa -0x40(%r10),%xmm13
+ movdqa -0x30(%r10),%xmm14
+ movdqa -0x20(%r10),%xmm15
+ lea -8(%r10),%rsp
.Ldo_avx512_epilogue:
___
$code.=<<___ if (!$win64);
- lea 8(%r11),%rsp
-.cfi_def_cfa %rsp,8
+ lea -8(%r10),%rsp
+.cfi_def_cfa_register %rsp
___
$code.=<<___;
ret
.cfi_endproc
-.size poly1305_blocks_avx512,.-poly1305_blocks_avx512
___
-if ($avx>3) {
+
+}
+
+}
+
+&declare_function("poly1305_blocks_avx2", 32, 4);
+poly1305_blocks_avxN(0);
+&end_function("poly1305_blocks_avx2");
+
+if($kernel) {
+ $code .= "#endif\n";
+}
+
+#######################################################################
+if ($avx>2) {
+# On entry we have input length divisible by 64. But since inner loop
+# processes 128 bytes per iteration, cases when length is not divisible
+# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
+# reason stack layout is kept identical to poly1305_blocks_avx2. If not
+# for this tail, we wouldn't have to even allocate stack frame...
+
+if($kernel) {
+ $code .= "#ifdef CONFIG_AS_AVX512\n";
+}
+
+&declare_function("poly1305_blocks_avx512", 32, 4);
+poly1305_blocks_avxN(1);
+&end_function("poly1305_blocks_avx512");
+
+if ($kernel) {
+ $code .= "#endif\n";
+}
+
+if (!$kernel && $avx>3) {
########################################################################
# VPMADD52 version using 2^44 radix.
#
@@ -3753,45 +3881,9 @@ poly1305_emit_base2_44:
.size poly1305_emit_base2_44,.-poly1305_emit_base2_44
___
} } }
-$code.=<<___;
-.align 64
-.Lconst:
-.Lmask24:
-.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
-.L129:
-.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
-.Lmask26:
-.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
-.Lpermd_avx2:
-.long 2,2,2,3,2,0,2,1
-.Lpermd_avx512:
-.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
-
-.L2_44_inp_permd:
-.long 0,1,1,2,2,3,7,7
-.L2_44_inp_shift:
-.quad 0,12,24,64
-.L2_44_mask:
-.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
-.L2_44_shift_rgt:
-.quad 44,44,42,64
-.L2_44_shift_lft:
-.quad 8,8,10,64
-
-.align 64
-.Lx_mask44:
-.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
-.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
-.Lx_mask42:
-.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
-.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
-___
}
-$code.=<<___;
-.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
-.align 16
-___
+if (!$kernel)
{ # chacha20-poly1305 helpers
my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
("%rdi","%rsi","%rdx","%rcx"); # Unix order
@@ -4038,17 +4130,17 @@ avx_handler:
.section .pdata
.align 4
- .rva .LSEH_begin_poly1305_init
- .rva .LSEH_end_poly1305_init
- .rva .LSEH_info_poly1305_init
+ .rva .LSEH_begin_poly1305_init_x86_64
+ .rva .LSEH_end_poly1305_init_x86_64
+ .rva .LSEH_info_poly1305_init_x86_64
- .rva .LSEH_begin_poly1305_blocks
- .rva .LSEH_end_poly1305_blocks
- .rva .LSEH_info_poly1305_blocks
+ .rva .LSEH_begin_poly1305_blocks_x86_64
+ .rva .LSEH_end_poly1305_blocks_x86_64
+ .rva .LSEH_info_poly1305_blocks_x86_64
- .rva .LSEH_begin_poly1305_emit
- .rva .LSEH_end_poly1305_emit
- .rva .LSEH_info_poly1305_emit
+ .rva .LSEH_begin_poly1305_emit_x86_64
+ .rva .LSEH_end_poly1305_emit_x86_64
+ .rva .LSEH_info_poly1305_emit_x86_64
___
$code.=<<___ if ($avx);
.rva .LSEH_begin_poly1305_blocks_avx
@@ -4088,20 +4180,20 @@ ___
$code.=<<___;
.section .xdata
.align 8
-.LSEH_info_poly1305_init:
+.LSEH_info_poly1305_init_x86_64:
.byte 9,0,0,0
.rva se_handler
- .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init
+ .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64
-.LSEH_info_poly1305_blocks:
+.LSEH_info_poly1305_blocks_x86_64:
.byte 9,0,0,0
.rva se_handler
.rva .Lblocks_body,.Lblocks_epilogue
-.LSEH_info_poly1305_emit:
+.LSEH_info_poly1305_emit_x86_64:
.byte 9,0,0,0
.rva se_handler
- .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit
+ .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64
___
$code.=<<___ if ($avx);
.LSEH_info_poly1305_blocks_avx_1:
@@ -4148,12 +4240,26 @@ $code.=<<___ if ($avx>2);
___
}
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/\/\// and !/^$/);
+ print;
+}
+close SELF;
+
foreach (split('\n',$code)) {
s/\`([^\`]*)\`/eval($1)/ge;
s/%r([a-z]+)#d/%e$1/g;
s/%r([0-9]+)#d/%r$1d/g;
s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g;
+ if ($kernel) {
+ s/(^\.type.*),[0-9]+$/\1/;
+ s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/;
+ next if /^\.cfi.*/;
+ }
+
print $_,"\n";
}
close STDOUT;
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index edb7113e36f3..657363588e0c 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -1,8 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <crypto/algapi.h>
@@ -13,279 +11,170 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <asm/intel-family.h>
#include <asm/simd.h>
-asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
- const u32 *r, unsigned int blocks);
-asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
- unsigned int blocks, const u32 *u);
-asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
- unsigned int blocks, const u32 *u);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
+asmlinkage void poly1305_init_x86_64(void *ctx,
+ const u8 key[POLY1305_KEY_SIZE]);
+asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
+ const size_t len, const u32 padbit);
+asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
+ const u32 padbit);
+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
+ const u32 padbit);
+asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
+ const size_t len, const u32 padbit);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
+
+struct poly1305_arch_internal {
+ union {
+ struct {
+ u32 h[5];
+ u32 is_base2_26;
+ };
+ u64 hs[3];
+ };
+ u64 r[2];
+ u64 pad;
+ struct { u32 r2, r1, r4, r3; } rn[9];
+};
-static inline u64 mlt(u64 a, u64 b)
+/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
+ * the unfortunate situation of using AVX and then having to go back to scalar
+ * -- because the user is silly and has called the update function from two
+ * separate contexts -- then we need to convert back to the original base before
+ * proceeding. It is possible to reason that the initial reduction below is
+ * sufficient given the implementation invariants. However, for an avoidance of
+ * doubt and because this is not performance critical, we do the full reduction
+ * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
+ */
+static void convert_to_base2_64(void *ctx)
{
- return a * b;
-}
+ struct poly1305_arch_internal *state = ctx;
+ u32 cy;
-static inline u32 sr(u64 v, u_char n)
-{
- return v >> n;
-}
+ if (!state->is_base2_26)
+ return;
-static inline u32 and(u32 v, u32 mask)
-{
- return v & mask;
+ cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
+ cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
+ cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
+ cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
+ state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
+ state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
+ state->hs[2] = state->h[4] >> 24;
+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+ cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
+ state->hs[2] &= 3;
+ state->hs[0] += cy;
+ state->hs[1] += (cy = ULT(state->hs[0], cy));
+ state->hs[2] += ULT(state->hs[1], cy);
+#undef ULT
+ state->is_base2_26 = 0;
}
-static void poly1305_simd_mult(u32 *a, const u32 *b)
+static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE])
{
- u8 m[POLY1305_BLOCK_SIZE];
-
- memset(m, 0, sizeof(m));
- /* The poly1305 block function adds a hi-bit to the accumulator which
- * we don't need for key multiplication; compensate for it. */
- a[4] -= 1 << 24;
- poly1305_block_sse2(a, m, b, 1);
+ poly1305_init_x86_64(ctx, key);
}
-static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key)
+static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
+ const u32 padbit)
{
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
-}
+ struct poly1305_arch_internal *state = ctx;
-static void poly1305_integer_blocks(struct poly1305_state *state,
- const struct poly1305_key *key,
- const void *src,
- unsigned int nblocks, u32 hibit)
-{
- u32 r0, r1, r2, r3, r4;
- u32 s1, s2, s3, s4;
- u32 h0, h1, h2, h3, h4;
- u64 d0, d1, d2, d3, d4;
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
+ PAGE_SIZE % POLY1305_BLOCK_SIZE);
- if (!nblocks)
+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
+ (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
+ !crypto_simd_usable()) {
+ convert_to_base2_64(ctx);
+ poly1305_blocks_x86_64(ctx, inp, len, padbit);
return;
+ }
- r0 = key->r[0];
- r1 = key->r[1];
- r2 = key->r[2];
- r3 = key->r[3];
- r4 = key->r[4];
-
- s1 = r1 * 5;
- s2 = r2 * 5;
- s3 = r3 * 5;
- s4 = r4 * 5;
-
- h0 = state->h[0];
- h1 = state->h[1];
- h2 = state->h[2];
- h3 = state->h[3];
- h4 = state->h[4];
-
- do {
- /* h += m[i] */
- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
- h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
-
- /* h *= r */
- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
- mlt(h3, s2) + mlt(h4, s1);
- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
- mlt(h3, s3) + mlt(h4, s2);
- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
- mlt(h3, s4) + mlt(h4, s3);
- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
- mlt(h3, r0) + mlt(h4, s4);
- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
- mlt(h3, r1) + mlt(h4, r0);
-
- /* (partial) h %= p */
- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
-
- src += POLY1305_BLOCK_SIZE;
- } while (--nblocks);
-
- state->h[0] = h0;
- state->h[1] = h1;
- state->h[2] = h2;
- state->h[3] = h3;
- state->h[4] = h4;
+ for (;;) {
+ const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+ kernel_fpu_begin();
+ if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
+ poly1305_blocks_avx512(ctx, inp, bytes, padbit);
+ else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2))
+ poly1305_blocks_avx2(ctx, inp, bytes, padbit);
+ else
+ poly1305_blocks_avx(ctx, inp, bytes, padbit);
+ kernel_fpu_end();
+ len -= bytes;
+ if (!len)
+ break;
+ inp += bytes;
+ }
}
-static void poly1305_integer_emit(const struct poly1305_state *state, void *dst)
+static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4])
{
- u32 h0, h1, h2, h3, h4;
- u32 g0, g1, g2, g3, g4;
- u32 mask;
-
- /* fully carry h */
- h0 = state->h[0];
- h1 = state->h[1];
- h2 = state->h[2];
- h3 = state->h[3];
- h4 = state->h[4];
-
- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
-
- /* compute h + -p */
- g0 = h0 + 5;
- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
- /* select h if h < p, or h + -p if h >= p */
- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
- g0 &= mask;
- g1 &= mask;
- g2 &= mask;
- g3 &= mask;
- g4 &= mask;
- mask = ~mask;
- h0 = (h0 & mask) | g0;
- h1 = (h1 & mask) | g1;
- h2 = (h2 & mask) | g2;
- h3 = (h3 & mask) | g3;
- h4 = (h4 & mask) | g4;
-
- /* h = h % (2^128) */
- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
+ struct poly1305_arch_internal *state = ctx;
+
+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
+ !state->is_base2_26 || !crypto_simd_usable()) {
+ convert_to_base2_64(ctx);
+ poly1305_emit_x86_64(ctx, mac, nonce);
+ } else
+ poly1305_emit_avx(ctx, mac, nonce);
}
-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
{
- poly1305_integer_setkey(desc->opaque_r, key);
- desc->s[0] = get_unaligned_le32(key + 16);
- desc->s[1] = get_unaligned_le32(key + 20);
- desc->s[2] = get_unaligned_le32(key + 24);
- desc->s[3] = get_unaligned_le32(key + 28);
- poly1305_core_init(&desc->h);
- desc->buflen = 0;
- desc->sset = true;
- desc->rset = 1;
+ poly1305_simd_init(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(&key[16]);
+ dctx->s[1] = get_unaligned_le32(&key[20]);
+ dctx->s[2] = get_unaligned_le32(&key[24]);
+ dctx->s[3] = get_unaligned_le32(&key[28]);
+ dctx->buflen = 0;
+ dctx->sset = true;
}
-EXPORT_SYMBOL_GPL(poly1305_init_arch);
+EXPORT_SYMBOL(poly1305_init_arch);
-static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
+ const u8 *inp, unsigned int len)
{
- if (!dctx->sset) {
- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_integer_setkey(dctx->r, src);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
+ unsigned int acc = 0;
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
+ poly1305_simd_init(&dctx->h, inp);
+ inp += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ acc += POLY1305_BLOCK_SIZE;
dctx->rset = 1;
}
- if (srclen >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(&inp[0]);
+ dctx->s[1] = get_unaligned_le32(&inp[4]);
+ dctx->s[2] = get_unaligned_le32(&inp[8]);
+ dctx->s[3] = get_unaligned_le32(&inp[12]);
+ inp += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ acc += POLY1305_BLOCK_SIZE;
dctx->sset = true;
}
}
- return srclen;
-}
-
-static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- unsigned int datalen;
-
- if (unlikely(!dctx->sset)) {
- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
- src += srclen - datalen;
- srclen = datalen;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src,
- srclen / POLY1305_BLOCK_SIZE, 1);
- srclen %= POLY1305_BLOCK_SIZE;
- }
- return srclen;
-}
-
-static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- unsigned int blocks, datalen;
-
- if (unlikely(!dctx->sset)) {
- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
- src += srclen - datalen;
- srclen = datalen;
- }
-
- if (IS_ENABLED(CONFIG_AS_AVX2) &&
- static_branch_likely(&poly1305_use_avx2) &&
- srclen >= POLY1305_BLOCK_SIZE * 4) {
- if (unlikely(dctx->rset < 4)) {
- if (dctx->rset < 2) {
- dctx->r[1] = dctx->r[0];
- poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
- }
- dctx->r[2] = dctx->r[1];
- poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
- dctx->r[3] = dctx->r[2];
- poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
- dctx->rset = 4;
- }
- blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
- poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
- dctx->r[1].r);
- src += POLY1305_BLOCK_SIZE * 4 * blocks;
- srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
- }
-
- if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
- if (unlikely(dctx->rset < 2)) {
- dctx->r[1] = dctx->r[0];
- poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
- dctx->rset = 2;
- }
- blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
- poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
- blocks, dctx->r[1].r);
- src += POLY1305_BLOCK_SIZE * 2 * blocks;
- srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
- srclen -= POLY1305_BLOCK_SIZE;
- }
- return srclen;
+ return acc;
}
void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int srclen)
{
- unsigned int bytes;
+ unsigned int bytes, used;
if (unlikely(dctx->buflen)) {
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
@@ -295,31 +184,19 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- if (static_branch_likely(&poly1305_use_simd) &&
- likely(crypto_simd_usable())) {
- kernel_fpu_begin();
- poly1305_simd_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
- kernel_fpu_end();
- } else {
- poly1305_scalar_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
- }
+ if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE)))
+ poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
dctx->buflen = 0;
}
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- if (static_branch_likely(&poly1305_use_simd) &&
- likely(crypto_simd_usable())) {
- kernel_fpu_begin();
- bytes = poly1305_simd_blocks(dctx, src, srclen);
- kernel_fpu_end();
- } else {
- bytes = poly1305_scalar_blocks(dctx, src, srclen);
- }
- src += srclen - bytes;
- srclen = bytes;
+ bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+ srclen -= bytes;
+ used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+ if (likely(bytes - used))
+ poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1);
+ src += bytes;
}
if (unlikely(srclen)) {
@@ -329,31 +206,17 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
}
EXPORT_SYMBOL(poly1305_update_arch);
-void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst)
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
{
- __le32 digest[4];
- u64 f = 0;
-
- if (unlikely(desc->buflen)) {
- desc->buf[desc->buflen++] = 1;
- memset(desc->buf + desc->buflen, 0,
- POLY1305_BLOCK_SIZE - desc->buflen);
- poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0);
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
}
- poly1305_integer_emit(&desc->h, digest);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
- put_unaligned_le32(f, dst + 0);
- f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
- put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
- put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
- put_unaligned_le32(f, dst + 12);
-
- *desc = (struct poly1305_desc_ctx){};
+ poly1305_simd_emit(&dctx->h, dst, dctx->s);
+ *dctx = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL(poly1305_final_arch);
@@ -361,38 +224,34 @@ static int crypto_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- poly1305_core_init(&dctx->h);
- dctx->buflen = 0;
- dctx->rset = 0;
- dctx->sset = false;
-
+ *dctx = (struct poly1305_desc_ctx){};
return 0;
}
-static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+static int crypto_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- poly1305_final_arch(dctx, dst);
+ poly1305_update_arch(dctx, src, srclen);
return 0;
}
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- poly1305_update_arch(dctx, src, srclen);
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
return 0;
}
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
.init = crypto_poly1305_init,
- .update = poly1305_simd_update,
+ .update = crypto_poly1305_update,
.final = crypto_poly1305_final,
.descsize = sizeof(struct poly1305_desc_ctx),
.base = {
@@ -406,17 +265,19 @@ static struct shash_alg alg = {
static int __init poly1305_simd_mod_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_XMM2))
- return 0;
-
- static_branch_enable(&poly1305_use_simd);
-
- if (IS_ENABLED(CONFIG_AS_AVX2) &&
- boot_cpu_has(X86_FEATURE_AVX) &&
+ if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx);
+ if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
static_branch_enable(&poly1305_use_avx2);
-
+ if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
+ /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
+ boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X)
+ static_branch_enable(&poly1305_use_avx512);
return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
}
@@ -430,7 +291,7 @@ module_init(poly1305_simd_mod_init);
module_exit(poly1305_simd_mod_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
MODULE_DESCRIPTION("Poly1305 authenticator");
MODULE_ALIAS_CRYPTO("poly1305");
MODULE_ALIAS_CRYPTO("poly1305-simd");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 0b2c4fce26d9..14c032de276e 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -90,7 +90,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
default 2 if MIPS
- default 4 if X86_64
+ default 11 if X86_64
default 9 if ARM || ARM64
default 1
--
2.18.2
From 35dd657ae5c12cfd9ff4322040b3a8f2ae079f96 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 5 Jan 2020 22:40:49 -0500
Subject: [PATCH 044/100] crypto: {arm,arm64,mips}/poly1305 - remove redundant
non-reduction from emit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit 31899908a0d248b030b4464425b86c717e0007d4 upstream.
This appears to be some kind of copy and paste error, and is actually
dead code.
Pre: f = 0 ⇒ (f >> 32) = 0
f = (f >> 32) + le32_to_cpu(digest[0]);
Post: 0 ≤ f < 2³²
put_unaligned_le32(f, dst);
Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
f = (f >> 32) + le32_to_cpu(digest[1]);
Post: 0 ≤ f < 2³²
put_unaligned_le32(f, dst + 4);
Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
f = (f >> 32) + le32_to_cpu(digest[2]);
Post: 0 ≤ f < 2³²
put_unaligned_le32(f, dst + 8);
Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
f = (f >> 32) + le32_to_cpu(digest[3]);
Post: 0 ≤ f < 2³²
put_unaligned_le32(f, dst + 12);
Therefore this sequence is redundant. And Andy's code appears to handle
misalignment acceptably.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Tested-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/poly1305-glue.c | 18 ++----------------
arch/arm64/crypto/poly1305-glue.c | 18 ++----------------
arch/mips/crypto/poly1305-glue.c | 18 ++----------------
3 files changed, 6 insertions(+), 48 deletions(-)
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
index abe3f2d587dc..ceec04ec2f40 100644
--- a/arch/arm/crypto/poly1305-glue.c
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -20,7 +20,7 @@
void poly1305_init_arm(void *state, const u8 *key);
void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
-void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
+void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
{
@@ -179,9 +179,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
{
- __le32 digest[4];
- u64 f = 0;
-
if (unlikely(dctx->buflen)) {
dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0,
@@ -189,18 +186,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
}
- poly1305_emit_arm(&dctx->h, digest, dctx->s);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + le32_to_cpu(digest[0]);
- put_unaligned_le32(f, dst);
- f = (f >> 32) + le32_to_cpu(digest[1]);
- put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + le32_to_cpu(digest[2]);
- put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + le32_to_cpu(digest[3]);
- put_unaligned_le32(f, dst + 12);
-
+ poly1305_emit_arm(&dctx->h, dst, dctx->s);
*dctx = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL(poly1305_final_arch);
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index 83a2338a8826..e97b092f56b8 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -21,7 +21,7 @@
asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
-asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
+asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@@ -162,9 +162,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
{
- __le32 digest[4];
- u64 f = 0;
-
if (unlikely(dctx->buflen)) {
dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0,
@@ -172,18 +169,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
}
- poly1305_emit(&dctx->h, digest, dctx->s);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + le32_to_cpu(digest[0]);
- put_unaligned_le32(f, dst);
- f = (f >> 32) + le32_to_cpu(digest[1]);
- put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + le32_to_cpu(digest[2]);
- put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + le32_to_cpu(digest[3]);
- put_unaligned_le32(f, dst + 12);
-
+ poly1305_emit(&dctx->h, dst, dctx->s);
*dctx = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL(poly1305_final_arch);
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
index b37d29cf5d0a..fc881b46d911 100644
--- a/arch/mips/crypto/poly1305-glue.c
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -15,7 +15,7 @@
asmlinkage void poly1305_init_mips(void *state, const u8 *key);
asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
-asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
+asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
{
@@ -134,9 +134,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
{
- __le32 digest[4];
- u64 f = 0;
-
if (unlikely(dctx->buflen)) {
dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0,
@@ -144,18 +141,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
}
- poly1305_emit_mips(&dctx->h, digest, dctx->s);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + le32_to_cpu(digest[0]);
- put_unaligned_le32(f, dst);
- f = (f >> 32) + le32_to_cpu(digest[1]);
- put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + le32_to_cpu(digest[2]);
- put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + le32_to_cpu(digest[3]);
- put_unaligned_le32(f, dst + 12);
-
+ poly1305_emit_mips(&dctx->h, dst, dctx->s);
*dctx = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL(poly1305_final_arch);
--
2.18.2
From 10debe0a7c7c0cfbd09cd459ffed05c68f21a31b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jan 2020 12:37:35 +0800
Subject: [PATCH 045/100] crypto: curve25519 - Fix selftest build error
commit a8bdf2c42ee4d1ee42af1f3601f85de94e70a421 upstream.
If CRYPTO_CURVE25519 is y, CRYPTO_LIB_CURVE25519_GENERIC will be
y, but CRYPTO_LIB_CURVE25519 may be set to m, this causes build
errors:
lib/crypto/curve25519-selftest.o: In function `curve25519':
curve25519-selftest.c:(.text.unlikely+0xc): undefined reference to `curve25519_arch'
lib/crypto/curve25519-selftest.o: In function `curve25519_selftest':
curve25519-selftest.c:(.init.text+0x17e): undefined reference to `curve25519_base_arch'
This is because the curve25519 self-test code is being controlled
by the GENERIC option rather than the overall CURVE25519 option,
as is the case with blake2s. To recap, the GENERIC and ARCH options
for CURVE25519 are internal only and selected by users such as
the Crypto API, or the externally visible CURVE25519 option which
in turn is selected by wireguard. The self-test is specific to the
the external CURVE25519 option and should not be enabled by the
Crypto API.
This patch fixes this by splitting the GENERIC module from the
CURVE25519 module with the latter now containing just the self-test.
Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: aa127963f1ca ("crypto: lib/curve25519 - re-add selftests")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
lib/crypto/Makefile | 9 ++++++---
lib/crypto/curve25519-generic.c | 24 ++++++++++++++++++++++++
lib/crypto/curve25519.c | 7 -------
3 files changed, 30 insertions(+), 10 deletions(-)
create mode 100644 lib/crypto/curve25519-generic.c
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 6ecaf83a5a9a..3a435629d9ce 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -19,9 +19,12 @@ libblake2s-y += blake2s.o
obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
libchacha20poly1305-y += chacha20poly1305.o
-obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
-libcurve25519-y := curve25519-fiat32.o
-libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
+obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o
+libcurve25519-generic-y := curve25519-fiat32.o
+libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
+libcurve25519-generic-y += curve25519-generic.o
+
+obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o
libcurve25519-y += curve25519.o
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
diff --git a/lib/crypto/curve25519-generic.c b/lib/crypto/curve25519-generic.c
new file mode 100644
index 000000000000..de7c99172fa2
--- /dev/null
+++ b/lib/crypto/curve25519-generic.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the Curve25519 ECDH algorithm, using either
+ * a 32-bit implementation or a 64-bit implementation with 128-bit integers,
+ * depending on what is supported by the target compiler.
+ *
+ * Information: https://cr.yp.to/ecdh.html
+ */
+
+#include <crypto/curve25519.h>
+#include <linux/module.h>
+
+const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
+const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
+
+EXPORT_SYMBOL(curve25519_null_point);
+EXPORT_SYMBOL(curve25519_base_point);
+EXPORT_SYMBOL(curve25519_generic);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Curve25519 scalar multiplication");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
index c03ccdb99434..288a62cd29b2 100644
--- a/lib/crypto/curve25519.c
+++ b/lib/crypto/curve25519.c
@@ -15,13 +15,6 @@
bool curve25519_selftest(void);
-const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
-const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
-
-EXPORT_SYMBOL(curve25519_null_point);
-EXPORT_SYMBOL(curve25519_base_point);
-EXPORT_SYMBOL(curve25519_generic);
-
static int __init mod_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
--
2.18.2
From e82348f61b5731bae5b1d97cb6f3aefd4a6d6b95 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 16 Jan 2020 18:23:55 +0100
Subject: [PATCH 046/100] crypto: x86/poly1305 - fix .gitignore typo
commit 1f6868995326cc82102049e349d8dbd116bdb656 upstream.
Admist the kbuild robot induced changes, the .gitignore file for the
generated file wasn't updated with the non-clashing filename. This
commit adjusts that.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/.gitignore | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore
index c406ea6571fa..30be0400a439 100644
--- a/arch/x86/crypto/.gitignore
+++ b/arch/x86/crypto/.gitignore
@@ -1 +1 @@
-poly1305-x86_64.S
+poly1305-x86_64-cryptogams.S
--
2.18.2
From 50ab09a1e41c36049b0b4d194ac3998b6e98bf09 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 16 Jan 2020 21:26:34 +0100
Subject: [PATCH 047/100] crypto: chacha20poly1305 - add back missing test
vectors and test chunking
commit 72c7943792c9e7788ddd182337bcf8f650cf56f5 upstream.
When this was originally ported, the 12-byte nonce vectors were left out
to keep things simple. I agree that we don't need nor want a library
interface for 12-byte nonces. But these test vectors were specially
crafted to look at issues in the underlying primitives and related
interactions. Therefore, we actually want to keep around all of the
test vectors, and simply have a helper function to test them with.
Secondly, the sglist-based chunking code in the library interface is
rather complicated, so this adds a developer-only test for ensuring that
all the book keeping is correct, across a wide array of possibilities.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
lib/crypto/chacha20poly1305-selftest.c | 1712 +++++++++++++++++++++++-
1 file changed, 1698 insertions(+), 14 deletions(-)
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index 465de46dbdef..c391a91364e9 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -4,6 +4,7 @@
*/
#include <crypto/chacha20poly1305.h>
+#include <crypto/chacha.h>
#include <crypto/poly1305.h>
#include <asm/unaligned.h>
@@ -1926,6 +1927,1104 @@ static const u8 enc_key012[] __initconst = {
0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
};
+/* wycheproof - rfc7539 */
+static const u8 enc_input013[] __initconst = {
+ 0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61,
+ 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
+ 0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20,
+ 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
+ 0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39,
+ 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
+ 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66,
+ 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f,
+ 0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20,
+ 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20,
+ 0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75,
+ 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73,
+ 0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f,
+ 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
+ 0x74, 0x2e
+};
+static const u8 enc_output013[] __initconst = {
+ 0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb,
+ 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
+ 0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe,
+ 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
+ 0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12,
+ 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
+ 0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29,
+ 0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36,
+ 0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c,
+ 0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58,
+ 0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94,
+ 0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc,
+ 0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d,
+ 0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b,
+ 0x61, 0x16, 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09,
+ 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60,
+ 0x06, 0x91
+};
+static const u8 enc_assoc013[] __initconst = {
+ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3,
+ 0xc4, 0xc5, 0xc6, 0xc7
+};
+static const u8 enc_nonce013[] __initconst = {
+ 0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43,
+ 0x44, 0x45, 0x46, 0x47
+};
+static const u8 enc_key013[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input014[] __initconst = { };
+static const u8 enc_output014[] __initconst = {
+ 0x76, 0xac, 0xb3, 0x42, 0xcf, 0x31, 0x66, 0xa5,
+ 0xb6, 0x3c, 0x0c, 0x0e, 0xa1, 0x38, 0x3c, 0x8d
+};
+static const u8 enc_assoc014[] __initconst = { };
+static const u8 enc_nonce014[] __initconst = {
+ 0x4d, 0xa5, 0xbf, 0x8d, 0xfd, 0x58, 0x52, 0xc1,
+ 0xea, 0x12, 0x37, 0x9d
+};
+static const u8 enc_key014[] __initconst = {
+ 0x80, 0xba, 0x31, 0x92, 0xc8, 0x03, 0xce, 0x96,
+ 0x5e, 0xa3, 0x71, 0xd5, 0xff, 0x07, 0x3c, 0xf0,
+ 0xf4, 0x3b, 0x6a, 0x2a, 0xb5, 0x76, 0xb2, 0x08,
+ 0x42, 0x6e, 0x11, 0x40, 0x9c, 0x09, 0xb9, 0xb0
+};
+
+/* wycheproof - misc */
+static const u8 enc_input015[] __initconst = { };
+static const u8 enc_output015[] __initconst = {
+ 0x90, 0x6f, 0xa6, 0x28, 0x4b, 0x52, 0xf8, 0x7b,
+ 0x73, 0x59, 0xcb, 0xaa, 0x75, 0x63, 0xc7, 0x09
+};
+static const u8 enc_assoc015[] __initconst = {
+ 0xbd, 0x50, 0x67, 0x64, 0xf2, 0xd2, 0xc4, 0x10
+};
+static const u8 enc_nonce015[] __initconst = {
+ 0xa9, 0x2e, 0xf0, 0xac, 0x99, 0x1d, 0xd5, 0x16,
+ 0xa3, 0xc6, 0xf6, 0x89
+};
+static const u8 enc_key015[] __initconst = {
+ 0x7a, 0x4c, 0xd7, 0x59, 0x17, 0x2e, 0x02, 0xeb,
+ 0x20, 0x4d, 0xb2, 0xc3, 0xf5, 0xc7, 0x46, 0x22,
+ 0x7d, 0xf5, 0x84, 0xfc, 0x13, 0x45, 0x19, 0x63,
+ 0x91, 0xdb, 0xb9, 0x57, 0x7a, 0x25, 0x07, 0x42
+};
+
+/* wycheproof - misc */
+static const u8 enc_input016[] __initconst = {
+ 0x2a
+};
+static const u8 enc_output016[] __initconst = {
+ 0x3a, 0xca, 0xc2, 0x7d, 0xec, 0x09, 0x68, 0x80,
+ 0x1e, 0x9f, 0x6e, 0xde, 0xd6, 0x9d, 0x80, 0x75,
+ 0x22
+};
+static const u8 enc_assoc016[] __initconst = { };
+static const u8 enc_nonce016[] __initconst = {
+ 0x99, 0xe2, 0x3e, 0xc4, 0x89, 0x85, 0xbc, 0xcd,
+ 0xee, 0xab, 0x60, 0xf1
+};
+static const u8 enc_key016[] __initconst = {
+ 0xcc, 0x56, 0xb6, 0x80, 0x55, 0x2e, 0xb7, 0x50,
+ 0x08, 0xf5, 0x48, 0x4b, 0x4c, 0xb8, 0x03, 0xfa,
+ 0x50, 0x63, 0xeb, 0xd6, 0xea, 0xb9, 0x1f, 0x6a,
+ 0xb6, 0xae, 0xf4, 0x91, 0x6a, 0x76, 0x62, 0x73
+};
+
+/* wycheproof - misc */
+static const u8 enc_input017[] __initconst = {
+ 0x51
+};
+static const u8 enc_output017[] __initconst = {
+ 0xc4, 0x16, 0x83, 0x10, 0xca, 0x45, 0xb1, 0xf7,
+ 0xc6, 0x6c, 0xad, 0x4e, 0x99, 0xe4, 0x3f, 0x72,
+ 0xb9
+};
+static const u8 enc_assoc017[] __initconst = {
+ 0x91, 0xca, 0x6c, 0x59, 0x2c, 0xbc, 0xca, 0x53
+};
+static const u8 enc_nonce017[] __initconst = {
+ 0xab, 0x0d, 0xca, 0x71, 0x6e, 0xe0, 0x51, 0xd2,
+ 0x78, 0x2f, 0x44, 0x03
+};
+static const u8 enc_key017[] __initconst = {
+ 0x46, 0xf0, 0x25, 0x49, 0x65, 0xf7, 0x69, 0xd5,
+ 0x2b, 0xdb, 0x4a, 0x70, 0xb4, 0x43, 0x19, 0x9f,
+ 0x8e, 0xf2, 0x07, 0x52, 0x0d, 0x12, 0x20, 0xc5,
+ 0x5e, 0x4b, 0x70, 0xf0, 0xfd, 0xa6, 0x20, 0xee
+};
+
+/* wycheproof - misc */
+static const u8 enc_input018[] __initconst = {
+ 0x5c, 0x60
+};
+static const u8 enc_output018[] __initconst = {
+ 0x4d, 0x13, 0x91, 0xe8, 0xb6, 0x1e, 0xfb, 0x39,
+ 0xc1, 0x22, 0x19, 0x54, 0x53, 0x07, 0x7b, 0x22,
+ 0xe5, 0xe2
+};
+static const u8 enc_assoc018[] __initconst = { };
+static const u8 enc_nonce018[] __initconst = {
+ 0x46, 0x1a, 0xf1, 0x22, 0xe9, 0xf2, 0xe0, 0x34,
+ 0x7e, 0x03, 0xf2, 0xdb
+};
+static const u8 enc_key018[] __initconst = {
+ 0x2f, 0x7f, 0x7e, 0x4f, 0x59, 0x2b, 0xb3, 0x89,
+ 0x19, 0x49, 0x89, 0x74, 0x35, 0x07, 0xbf, 0x3e,
+ 0xe9, 0xcb, 0xde, 0x17, 0x86, 0xb6, 0x69, 0x5f,
+ 0xe6, 0xc0, 0x25, 0xfd, 0x9b, 0xa4, 0xc1, 0x00
+};
+
+/* wycheproof - misc */
+static const u8 enc_input019[] __initconst = {
+ 0xdd, 0xf2
+};
+static const u8 enc_output019[] __initconst = {
+ 0xb6, 0x0d, 0xea, 0xd0, 0xfd, 0x46, 0x97, 0xec,
+ 0x2e, 0x55, 0x58, 0x23, 0x77, 0x19, 0xd0, 0x24,
+ 0x37, 0xa2
+};
+static const u8 enc_assoc019[] __initconst = {
+ 0x88, 0x36, 0x4f, 0xc8, 0x06, 0x05, 0x18, 0xbf
+};
+static const u8 enc_nonce019[] __initconst = {
+ 0x61, 0x54, 0x6b, 0xa5, 0xf1, 0x72, 0x05, 0x90,
+ 0xb6, 0x04, 0x0a, 0xc6
+};
+static const u8 enc_key019[] __initconst = {
+ 0xc8, 0x83, 0x3d, 0xce, 0x5e, 0xa9, 0xf2, 0x48,
+ 0xaa, 0x20, 0x30, 0xea, 0xcf, 0xe7, 0x2b, 0xff,
+ 0xe6, 0x9a, 0x62, 0x0c, 0xaf, 0x79, 0x33, 0x44,
+ 0xe5, 0x71, 0x8f, 0xe0, 0xd7, 0xab, 0x1a, 0x58
+};
+
+/* wycheproof - misc */
+static const u8 enc_input020[] __initconst = {
+ 0xab, 0x85, 0xe9, 0xc1, 0x57, 0x17, 0x31
+};
+static const u8 enc_output020[] __initconst = {
+ 0x5d, 0xfe, 0x34, 0x40, 0xdb, 0xb3, 0xc3, 0xed,
+ 0x7a, 0x43, 0x4e, 0x26, 0x02, 0xd3, 0x94, 0x28,
+ 0x1e, 0x0a, 0xfa, 0x9f, 0xb7, 0xaa, 0x42
+};
+static const u8 enc_assoc020[] __initconst = { };
+static const u8 enc_nonce020[] __initconst = {
+ 0x3c, 0x4e, 0x65, 0x4d, 0x66, 0x3f, 0xa4, 0x59,
+ 0x6d, 0xc5, 0x5b, 0xb7
+};
+static const u8 enc_key020[] __initconst = {
+ 0x55, 0x56, 0x81, 0x58, 0xd3, 0xa6, 0x48, 0x3f,
+ 0x1f, 0x70, 0x21, 0xea, 0xb6, 0x9b, 0x70, 0x3f,
+ 0x61, 0x42, 0x51, 0xca, 0xdc, 0x1a, 0xf5, 0xd3,
+ 0x4a, 0x37, 0x4f, 0xdb, 0xfc, 0x5a, 0xda, 0xc7
+};
+
+/* wycheproof - misc */
+static const u8 enc_input021[] __initconst = {
+ 0x4e, 0xe5, 0xcd, 0xa2, 0x0d, 0x42, 0x90
+};
+static const u8 enc_output021[] __initconst = {
+ 0x4b, 0xd4, 0x72, 0x12, 0x94, 0x1c, 0xe3, 0x18,
+ 0x5f, 0x14, 0x08, 0xee, 0x7f, 0xbf, 0x18, 0xf5,
+ 0xab, 0xad, 0x6e, 0x22, 0x53, 0xa1, 0xba
+};
+static const u8 enc_assoc021[] __initconst = {
+ 0x84, 0xe4, 0x6b, 0xe8, 0xc0, 0x91, 0x90, 0x53
+};
+static const u8 enc_nonce021[] __initconst = {
+ 0x58, 0x38, 0x93, 0x75, 0xc6, 0x9e, 0xe3, 0x98,
+ 0xde, 0x94, 0x83, 0x96
+};
+static const u8 enc_key021[] __initconst = {
+ 0xe3, 0xc0, 0x9e, 0x7f, 0xab, 0x1a, 0xef, 0xb5,
+ 0x16, 0xda, 0x6a, 0x33, 0x02, 0x2a, 0x1d, 0xd4,
+ 0xeb, 0x27, 0x2c, 0x80, 0xd5, 0x40, 0xc5, 0xda,
+ 0x52, 0xa7, 0x30, 0xf3, 0x4d, 0x84, 0x0d, 0x7f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input022[] __initconst = {
+ 0xbe, 0x33, 0x08, 0xf7, 0x2a, 0x2c, 0x6a, 0xed
+};
+static const u8 enc_output022[] __initconst = {
+ 0x8e, 0x94, 0x39, 0xa5, 0x6e, 0xee, 0xc8, 0x17,
+ 0xfb, 0xe8, 0xa6, 0xed, 0x8f, 0xab, 0xb1, 0x93,
+ 0x75, 0x39, 0xdd, 0x6c, 0x00, 0xe9, 0x00, 0x21
+};
+static const u8 enc_assoc022[] __initconst = { };
+static const u8 enc_nonce022[] __initconst = {
+ 0x4f, 0x07, 0xaf, 0xed, 0xfd, 0xc3, 0xb6, 0xc2,
+ 0x36, 0x18, 0x23, 0xd3
+};
+static const u8 enc_key022[] __initconst = {
+ 0x51, 0xe4, 0xbf, 0x2b, 0xad, 0x92, 0xb7, 0xaf,
+ 0xf1, 0xa4, 0xbc, 0x05, 0x55, 0x0b, 0xa8, 0x1d,
+ 0xf4, 0xb9, 0x6f, 0xab, 0xf4, 0x1c, 0x12, 0xc7,
+ 0xb0, 0x0e, 0x60, 0xe4, 0x8d, 0xb7, 0xe1, 0x52
+};
+
+/* wycheproof - misc */
+static const u8 enc_input023[] __initconst = {
+ 0xa4, 0xc9, 0xc2, 0x80, 0x1b, 0x71, 0xf7, 0xdf
+};
+static const u8 enc_output023[] __initconst = {
+ 0xb9, 0xb9, 0x10, 0x43, 0x3a, 0xf0, 0x52, 0xb0,
+ 0x45, 0x30, 0xf5, 0x1a, 0xee, 0xe0, 0x24, 0xe0,
+ 0xa4, 0x45, 0xa6, 0x32, 0x8f, 0xa6, 0x7a, 0x18
+};
+static const u8 enc_assoc023[] __initconst = {
+ 0x66, 0xc0, 0xae, 0x70, 0x07, 0x6c, 0xb1, 0x4d
+};
+static const u8 enc_nonce023[] __initconst = {
+ 0xb4, 0xea, 0x66, 0x6e, 0xe1, 0x19, 0x56, 0x33,
+ 0x66, 0x48, 0x4a, 0x78
+};
+static const u8 enc_key023[] __initconst = {
+ 0x11, 0x31, 0xc1, 0x41, 0x85, 0x77, 0xa0, 0x54,
+ 0xde, 0x7a, 0x4a, 0xc5, 0x51, 0x95, 0x0f, 0x1a,
+ 0x05, 0x3f, 0x9a, 0xe4, 0x6e, 0x5b, 0x75, 0xfe,
+ 0x4a, 0xbd, 0x56, 0x08, 0xd7, 0xcd, 0xda, 0xdd
+};
+
+/* wycheproof - misc */
+static const u8 enc_input024[] __initconst = {
+ 0x42, 0xba, 0xae, 0x59, 0x78, 0xfe, 0xaf, 0x5c,
+ 0x36, 0x8d, 0x14, 0xe0
+};
+static const u8 enc_output024[] __initconst = {
+ 0xff, 0x7d, 0xc2, 0x03, 0xb2, 0x6c, 0x46, 0x7a,
+ 0x6b, 0x50, 0xdb, 0x33, 0x57, 0x8c, 0x0f, 0x27,
+ 0x58, 0xc2, 0xe1, 0x4e, 0x36, 0xd4, 0xfc, 0x10,
+ 0x6d, 0xcb, 0x29, 0xb4
+};
+static const u8 enc_assoc024[] __initconst = { };
+static const u8 enc_nonce024[] __initconst = {
+ 0x9a, 0x59, 0xfc, 0xe2, 0x6d, 0xf0, 0x00, 0x5e,
+ 0x07, 0x53, 0x86, 0x56
+};
+static const u8 enc_key024[] __initconst = {
+ 0x99, 0xb6, 0x2b, 0xd5, 0xaf, 0xbe, 0x3f, 0xb0,
+ 0x15, 0xbd, 0xe9, 0x3f, 0x0a, 0xbf, 0x48, 0x39,
+ 0x57, 0xa1, 0xc3, 0xeb, 0x3c, 0xa5, 0x9c, 0xb5,
+ 0x0b, 0x39, 0xf7, 0xf8, 0xa9, 0xcc, 0x51, 0xbe
+};
+
+/* wycheproof - misc */
+static const u8 enc_input025[] __initconst = {
+ 0xfd, 0xc8, 0x5b, 0x94, 0xa4, 0xb2, 0xa6, 0xb7,
+ 0x59, 0xb1, 0xa0, 0xda
+};
+static const u8 enc_output025[] __initconst = {
+ 0x9f, 0x88, 0x16, 0xde, 0x09, 0x94, 0xe9, 0x38,
+ 0xd9, 0xe5, 0x3f, 0x95, 0xd0, 0x86, 0xfc, 0x6c,
+ 0x9d, 0x8f, 0xa9, 0x15, 0xfd, 0x84, 0x23, 0xa7,
+ 0xcf, 0x05, 0x07, 0x2f
+};
+static const u8 enc_assoc025[] __initconst = {
+ 0xa5, 0x06, 0xe1, 0xa5, 0xc6, 0x90, 0x93, 0xf9
+};
+static const u8 enc_nonce025[] __initconst = {
+ 0x58, 0xdb, 0xd4, 0xad, 0x2c, 0x4a, 0xd3, 0x5d,
+ 0xd9, 0x06, 0xe9, 0xce
+};
+static const u8 enc_key025[] __initconst = {
+ 0x85, 0xf3, 0x5b, 0x62, 0x82, 0xcf, 0xf4, 0x40,
+ 0xbc, 0x10, 0x20, 0xc8, 0x13, 0x6f, 0xf2, 0x70,
+ 0x31, 0x11, 0x0f, 0xa6, 0x3e, 0xc1, 0x6f, 0x1e,
+ 0x82, 0x51, 0x18, 0xb0, 0x06, 0xb9, 0x12, 0x57
+};
+
+/* wycheproof - misc */
+static const u8 enc_input026[] __initconst = {
+ 0x51, 0xf8, 0xc1, 0xf7, 0x31, 0xea, 0x14, 0xac,
+ 0xdb, 0x21, 0x0a, 0x6d, 0x97, 0x3e, 0x07
+};
+static const u8 enc_output026[] __initconst = {
+ 0x0b, 0x29, 0x63, 0x8e, 0x1f, 0xbd, 0xd6, 0xdf,
+ 0x53, 0x97, 0x0b, 0xe2, 0x21, 0x00, 0x42, 0x2a,
+ 0x91, 0x34, 0x08, 0x7d, 0x67, 0xa4, 0x6e, 0x79,
+ 0x17, 0x8d, 0x0a, 0x93, 0xf5, 0xe1, 0xd2
+};
+static const u8 enc_assoc026[] __initconst = { };
+static const u8 enc_nonce026[] __initconst = {
+ 0x68, 0xab, 0x7f, 0xdb, 0xf6, 0x19, 0x01, 0xda,
+ 0xd4, 0x61, 0xd2, 0x3c
+};
+static const u8 enc_key026[] __initconst = {
+ 0x67, 0x11, 0x96, 0x27, 0xbd, 0x98, 0x8e, 0xda,
+ 0x90, 0x62, 0x19, 0xe0, 0x8c, 0x0d, 0x0d, 0x77,
+ 0x9a, 0x07, 0xd2, 0x08, 0xce, 0x8a, 0x4f, 0xe0,
+ 0x70, 0x9a, 0xf7, 0x55, 0xee, 0xec, 0x6d, 0xcb
+};
+
+/* wycheproof - misc */
+static const u8 enc_input027[] __initconst = {
+ 0x97, 0x46, 0x9d, 0xa6, 0x67, 0xd6, 0x11, 0x0f,
+ 0x9c, 0xbd, 0xa1, 0xd1, 0xa2, 0x06, 0x73
+};
+static const u8 enc_output027[] __initconst = {
+ 0x32, 0xdb, 0x66, 0xc4, 0xa3, 0x81, 0x9d, 0x81,
+ 0x55, 0x74, 0x55, 0xe5, 0x98, 0x0f, 0xed, 0xfe,
+ 0xae, 0x30, 0xde, 0xc9, 0x4e, 0x6a, 0xd3, 0xa9,
+ 0xee, 0xa0, 0x6a, 0x0d, 0x70, 0x39, 0x17
+};
+static const u8 enc_assoc027[] __initconst = {
+ 0x64, 0x53, 0xa5, 0x33, 0x84, 0x63, 0x22, 0x12
+};
+static const u8 enc_nonce027[] __initconst = {
+ 0xd9, 0x5b, 0x32, 0x43, 0xaf, 0xae, 0xf7, 0x14,
+ 0xc5, 0x03, 0x5b, 0x6a
+};
+static const u8 enc_key027[] __initconst = {
+ 0xe6, 0xf1, 0x11, 0x8d, 0x41, 0xe4, 0xb4, 0x3f,
+ 0xb5, 0x82, 0x21, 0xb7, 0xed, 0x79, 0x67, 0x38,
+ 0x34, 0xe0, 0xd8, 0xac, 0x5c, 0x4f, 0xa6, 0x0b,
+ 0xbc, 0x8b, 0xc4, 0x89, 0x3a, 0x58, 0x89, 0x4d
+};
+
+/* wycheproof - misc */
+static const u8 enc_input028[] __initconst = {
+ 0x54, 0x9b, 0x36, 0x5a, 0xf9, 0x13, 0xf3, 0xb0,
+ 0x81, 0x13, 0x1c, 0xcb, 0x6b, 0x82, 0x55, 0x88
+};
+static const u8 enc_output028[] __initconst = {
+ 0xe9, 0x11, 0x0e, 0x9f, 0x56, 0xab, 0x3c, 0xa4,
+ 0x83, 0x50, 0x0c, 0xea, 0xba, 0xb6, 0x7a, 0x13,
+ 0x83, 0x6c, 0xca, 0xbf, 0x15, 0xa6, 0xa2, 0x2a,
+ 0x51, 0xc1, 0x07, 0x1c, 0xfa, 0x68, 0xfa, 0x0c
+};
+static const u8 enc_assoc028[] __initconst = { };
+static const u8 enc_nonce028[] __initconst = {
+ 0x2f, 0xcb, 0x1b, 0x38, 0xa9, 0x9e, 0x71, 0xb8,
+ 0x47, 0x40, 0xad, 0x9b
+};
+static const u8 enc_key028[] __initconst = {
+ 0x59, 0xd4, 0xea, 0xfb, 0x4d, 0xe0, 0xcf, 0xc7,
+ 0xd3, 0xdb, 0x99, 0xa8, 0xf5, 0x4b, 0x15, 0xd7,
+ 0xb3, 0x9f, 0x0a, 0xcc, 0x8d, 0xa6, 0x97, 0x63,
+ 0xb0, 0x19, 0xc1, 0x69, 0x9f, 0x87, 0x67, 0x4a
+};
+
+/* wycheproof - misc */
+static const u8 enc_input029[] __initconst = {
+ 0x55, 0xa4, 0x65, 0x64, 0x4f, 0x5b, 0x65, 0x09,
+ 0x28, 0xcb, 0xee, 0x7c, 0x06, 0x32, 0x14, 0xd6
+};
+static const u8 enc_output029[] __initconst = {
+ 0xe4, 0xb1, 0x13, 0xcb, 0x77, 0x59, 0x45, 0xf3,
+ 0xd3, 0xa8, 0xae, 0x9e, 0xc1, 0x41, 0xc0, 0x0c,
+ 0x7c, 0x43, 0xf1, 0x6c, 0xe0, 0x96, 0xd0, 0xdc,
+ 0x27, 0xc9, 0x58, 0x49, 0xdc, 0x38, 0x3b, 0x7d
+};
+static const u8 enc_assoc029[] __initconst = {
+ 0x03, 0x45, 0x85, 0x62, 0x1a, 0xf8, 0xd7, 0xff
+};
+static const u8 enc_nonce029[] __initconst = {
+ 0x11, 0x8a, 0x69, 0x64, 0xc2, 0xd3, 0xe3, 0x80,
+ 0x07, 0x1f, 0x52, 0x66
+};
+static const u8 enc_key029[] __initconst = {
+ 0xb9, 0x07, 0xa4, 0x50, 0x75, 0x51, 0x3f, 0xe8,
+ 0xa8, 0x01, 0x9e, 0xde, 0xe3, 0xf2, 0x59, 0x14,
+ 0x87, 0xb2, 0xa0, 0x30, 0xb0, 0x3c, 0x6e, 0x1d,
+ 0x77, 0x1c, 0x86, 0x25, 0x71, 0xd2, 0xea, 0x1e
+};
+
+/* wycheproof - misc */
+static const u8 enc_input030[] __initconst = {
+ 0x3f, 0xf1, 0x51, 0x4b, 0x1c, 0x50, 0x39, 0x15,
+ 0x91, 0x8f, 0x0c, 0x0c, 0x31, 0x09, 0x4a, 0x6e,
+ 0x1f
+};
+static const u8 enc_output030[] __initconst = {
+ 0x02, 0xcc, 0x3a, 0xcb, 0x5e, 0xe1, 0xfc, 0xdd,
+ 0x12, 0xa0, 0x3b, 0xb8, 0x57, 0x97, 0x64, 0x74,
+ 0xd3, 0xd8, 0x3b, 0x74, 0x63, 0xa2, 0xc3, 0x80,
+ 0x0f, 0xe9, 0x58, 0xc2, 0x8e, 0xaa, 0x29, 0x08,
+ 0x13
+};
+static const u8 enc_assoc030[] __initconst = { };
+static const u8 enc_nonce030[] __initconst = {
+ 0x45, 0xaa, 0xa3, 0xe5, 0xd1, 0x6d, 0x2d, 0x42,
+ 0xdc, 0x03, 0x44, 0x5d
+};
+static const u8 enc_key030[] __initconst = {
+ 0x3b, 0x24, 0x58, 0xd8, 0x17, 0x6e, 0x16, 0x21,
+ 0xc0, 0xcc, 0x24, 0xc0, 0xc0, 0xe2, 0x4c, 0x1e,
+ 0x80, 0xd7, 0x2f, 0x7e, 0xe9, 0x14, 0x9a, 0x4b,
+ 0x16, 0x61, 0x76, 0x62, 0x96, 0x16, 0xd0, 0x11
+};
+
+/* wycheproof - misc */
+static const u8 enc_input031[] __initconst = {
+ 0x63, 0x85, 0x8c, 0xa3, 0xe2, 0xce, 0x69, 0x88,
+ 0x7b, 0x57, 0x8a, 0x3c, 0x16, 0x7b, 0x42, 0x1c,
+ 0x9c
+};
+static const u8 enc_output031[] __initconst = {
+ 0x35, 0x76, 0x64, 0x88, 0xd2, 0xbc, 0x7c, 0x2b,
+ 0x8d, 0x17, 0xcb, 0xbb, 0x9a, 0xbf, 0xad, 0x9e,
+ 0x6d, 0x1f, 0x39, 0x1e, 0x65, 0x7b, 0x27, 0x38,
+ 0xdd, 0xa0, 0x84, 0x48, 0xcb, 0xa2, 0x81, 0x1c,
+ 0xeb
+};
+static const u8 enc_assoc031[] __initconst = {
+ 0x9a, 0xaf, 0x29, 0x9e, 0xee, 0xa7, 0x8f, 0x79
+};
+static const u8 enc_nonce031[] __initconst = {
+ 0xf0, 0x38, 0x4f, 0xb8, 0x76, 0x12, 0x14, 0x10,
+ 0x63, 0x3d, 0x99, 0x3d
+};
+static const u8 enc_key031[] __initconst = {
+ 0xf6, 0x0c, 0x6a, 0x1b, 0x62, 0x57, 0x25, 0xf7,
+ 0x6c, 0x70, 0x37, 0xb4, 0x8f, 0xe3, 0x57, 0x7f,
+ 0xa7, 0xf7, 0xb8, 0x7b, 0x1b, 0xd5, 0xa9, 0x82,
+ 0x17, 0x6d, 0x18, 0x23, 0x06, 0xff, 0xb8, 0x70
+};
+
+/* wycheproof - misc */
+static const u8 enc_input032[] __initconst = {
+ 0x10, 0xf1, 0xec, 0xf9, 0xc6, 0x05, 0x84, 0x66,
+ 0x5d, 0x9a, 0xe5, 0xef, 0xe2, 0x79, 0xe7, 0xf7,
+ 0x37, 0x7e, 0xea, 0x69, 0x16, 0xd2, 0xb1, 0x11
+};
+static const u8 enc_output032[] __initconst = {
+ 0x42, 0xf2, 0x6c, 0x56, 0xcb, 0x4b, 0xe2, 0x1d,
+ 0x9d, 0x8d, 0x0c, 0x80, 0xfc, 0x99, 0xdd, 0xe0,
+ 0x0d, 0x75, 0xf3, 0x80, 0x74, 0xbf, 0xe7, 0x64,
+ 0x54, 0xaa, 0x7e, 0x13, 0xd4, 0x8f, 0xff, 0x7d,
+ 0x75, 0x57, 0x03, 0x94, 0x57, 0x04, 0x0a, 0x3a
+};
+static const u8 enc_assoc032[] __initconst = { };
+static const u8 enc_nonce032[] __initconst = {
+ 0xe6, 0xb1, 0xad, 0xf2, 0xfd, 0x58, 0xa8, 0x76,
+ 0x2c, 0x65, 0xf3, 0x1b
+};
+static const u8 enc_key032[] __initconst = {
+ 0x02, 0x12, 0xa8, 0xde, 0x50, 0x07, 0xed, 0x87,
+ 0xb3, 0x3f, 0x1a, 0x70, 0x90, 0xb6, 0x11, 0x4f,
+ 0x9e, 0x08, 0xce, 0xfd, 0x96, 0x07, 0xf2, 0xc2,
+ 0x76, 0xbd, 0xcf, 0xdb, 0xc5, 0xce, 0x9c, 0xd7
+};
+
+/* wycheproof - misc */
+static const u8 enc_input033[] __initconst = {
+ 0x92, 0x22, 0xf9, 0x01, 0x8e, 0x54, 0xfd, 0x6d,
+ 0xe1, 0x20, 0x08, 0x06, 0xa9, 0xee, 0x8e, 0x4c,
+ 0xc9, 0x04, 0xd2, 0x9f, 0x25, 0xcb, 0xa1, 0x93
+};
+static const u8 enc_output033[] __initconst = {
+ 0x12, 0x30, 0x32, 0x43, 0x7b, 0x4b, 0xfd, 0x69,
+ 0x20, 0xe8, 0xf7, 0xe7, 0xe0, 0x08, 0x7a, 0xe4,
+ 0x88, 0x9e, 0xbe, 0x7a, 0x0a, 0xd0, 0xe9, 0x00,
+ 0x3c, 0xf6, 0x8f, 0x17, 0x95, 0x50, 0xda, 0x63,
+ 0xd3, 0xb9, 0x6c, 0x2d, 0x55, 0x41, 0x18, 0x65
+};
+static const u8 enc_assoc033[] __initconst = {
+ 0x3e, 0x8b, 0xc5, 0xad, 0xe1, 0x82, 0xff, 0x08
+};
+static const u8 enc_nonce033[] __initconst = {
+ 0x6b, 0x28, 0x2e, 0xbe, 0xcc, 0x54, 0x1b, 0xcd,
+ 0x78, 0x34, 0xed, 0x55
+};
+static const u8 enc_key033[] __initconst = {
+ 0xc5, 0xbc, 0x09, 0x56, 0x56, 0x46, 0xe7, 0xed,
+ 0xda, 0x95, 0x4f, 0x1f, 0x73, 0x92, 0x23, 0xda,
+ 0xda, 0x20, 0xb9, 0x5c, 0x44, 0xab, 0x03, 0x3d,
+ 0x0f, 0xae, 0x4b, 0x02, 0x83, 0xd1, 0x8b, 0xe3
+};
+
+/* wycheproof - misc */
+static const u8 enc_input034[] __initconst = {
+ 0xb0, 0x53, 0x99, 0x92, 0x86, 0xa2, 0x82, 0x4f,
+ 0x42, 0xcc, 0x8c, 0x20, 0x3a, 0xb2, 0x4e, 0x2c,
+ 0x97, 0xa6, 0x85, 0xad, 0xcc, 0x2a, 0xd3, 0x26,
+ 0x62, 0x55, 0x8e, 0x55, 0xa5, 0xc7, 0x29
+};
+static const u8 enc_output034[] __initconst = {
+ 0x45, 0xc7, 0xd6, 0xb5, 0x3a, 0xca, 0xd4, 0xab,
+ 0xb6, 0x88, 0x76, 0xa6, 0xe9, 0x6a, 0x48, 0xfb,
+ 0x59, 0x52, 0x4d, 0x2c, 0x92, 0xc9, 0xd8, 0xa1,
+ 0x89, 0xc9, 0xfd, 0x2d, 0xb9, 0x17, 0x46, 0x56,
+ 0x6d, 0x3c, 0xa1, 0x0e, 0x31, 0x1b, 0x69, 0x5f,
+ 0x3e, 0xae, 0x15, 0x51, 0x65, 0x24, 0x93
+};
+static const u8 enc_assoc034[] __initconst = { };
+static const u8 enc_nonce034[] __initconst = {
+ 0x04, 0xa9, 0xbe, 0x03, 0x50, 0x8a, 0x5f, 0x31,
+ 0x37, 0x1a, 0x6f, 0xd2
+};
+static const u8 enc_key034[] __initconst = {
+ 0x2e, 0xb5, 0x1c, 0x46, 0x9a, 0xa8, 0xeb, 0x9e,
+ 0x6c, 0x54, 0xa8, 0x34, 0x9b, 0xae, 0x50, 0xa2,
+ 0x0f, 0x0e, 0x38, 0x27, 0x11, 0xbb, 0xa1, 0x15,
+ 0x2c, 0x42, 0x4f, 0x03, 0xb6, 0x67, 0x1d, 0x71
+};
+
+/* wycheproof - misc */
+static const u8 enc_input035[] __initconst = {
+ 0xf4, 0x52, 0x06, 0xab, 0xc2, 0x55, 0x52, 0xb2,
+ 0xab, 0xc9, 0xab, 0x7f, 0xa2, 0x43, 0x03, 0x5f,
+ 0xed, 0xaa, 0xdd, 0xc3, 0xb2, 0x29, 0x39, 0x56,
+ 0xf1, 0xea, 0x6e, 0x71, 0x56, 0xe7, 0xeb
+};
+static const u8 enc_output035[] __initconst = {
+ 0x46, 0xa8, 0x0c, 0x41, 0x87, 0x02, 0x47, 0x20,
+ 0x08, 0x46, 0x27, 0x58, 0x00, 0x80, 0xdd, 0xe5,
+ 0xa3, 0xf4, 0xa1, 0x10, 0x93, 0xa7, 0x07, 0x6e,
+ 0xd6, 0xf3, 0xd3, 0x26, 0xbc, 0x7b, 0x70, 0x53,
+ 0x4d, 0x4a, 0xa2, 0x83, 0x5a, 0x52, 0xe7, 0x2d,
+ 0x14, 0xdf, 0x0e, 0x4f, 0x47, 0xf2, 0x5f
+};
+static const u8 enc_assoc035[] __initconst = {
+ 0x37, 0x46, 0x18, 0xa0, 0x6e, 0xa9, 0x8a, 0x48
+};
+static const u8 enc_nonce035[] __initconst = {
+ 0x47, 0x0a, 0x33, 0x9e, 0xcb, 0x32, 0x19, 0xb8,
+ 0xb8, 0x1a, 0x1f, 0x8b
+};
+static const u8 enc_key035[] __initconst = {
+ 0x7f, 0x5b, 0x74, 0xc0, 0x7e, 0xd1, 0xb4, 0x0f,
+ 0xd1, 0x43, 0x58, 0xfe, 0x2f, 0xf2, 0xa7, 0x40,
+ 0xc1, 0x16, 0xc7, 0x70, 0x65, 0x10, 0xe6, 0xa4,
+ 0x37, 0xf1, 0x9e, 0xa4, 0x99, 0x11, 0xce, 0xc4
+};
+
+/* wycheproof - misc */
+static const u8 enc_input036[] __initconst = {
+ 0xb9, 0xc5, 0x54, 0xcb, 0xc3, 0x6a, 0xc1, 0x8a,
+ 0xe8, 0x97, 0xdf, 0x7b, 0xee, 0xca, 0xc1, 0xdb,
+ 0xeb, 0x4e, 0xaf, 0xa1, 0x56, 0xbb, 0x60, 0xce,
+ 0x2e, 0x5d, 0x48, 0xf0, 0x57, 0x15, 0xe6, 0x78
+};
+static const u8 enc_output036[] __initconst = {
+ 0xea, 0x29, 0xaf, 0xa4, 0x9d, 0x36, 0xe8, 0x76,
+ 0x0f, 0x5f, 0xe1, 0x97, 0x23, 0xb9, 0x81, 0x1e,
+ 0xd5, 0xd5, 0x19, 0x93, 0x4a, 0x44, 0x0f, 0x50,
+ 0x81, 0xac, 0x43, 0x0b, 0x95, 0x3b, 0x0e, 0x21,
+ 0x22, 0x25, 0x41, 0xaf, 0x46, 0xb8, 0x65, 0x33,
+ 0xc6, 0xb6, 0x8d, 0x2f, 0xf1, 0x08, 0xa7, 0xea
+};
+static const u8 enc_assoc036[] __initconst = { };
+static const u8 enc_nonce036[] __initconst = {
+ 0x72, 0xcf, 0xd9, 0x0e, 0xf3, 0x02, 0x6c, 0xa2,
+ 0x2b, 0x7e, 0x6e, 0x6a
+};
+static const u8 enc_key036[] __initconst = {
+ 0xe1, 0x73, 0x1d, 0x58, 0x54, 0xe1, 0xb7, 0x0c,
+ 0xb3, 0xff, 0xe8, 0xb7, 0x86, 0xa2, 0xb3, 0xeb,
+ 0xf0, 0x99, 0x43, 0x70, 0x95, 0x47, 0x57, 0xb9,
+ 0xdc, 0x8c, 0x7b, 0xc5, 0x35, 0x46, 0x34, 0xa3
+};
+
+/* wycheproof - misc */
+static const u8 enc_input037[] __initconst = {
+ 0x6b, 0x26, 0x04, 0x99, 0x6c, 0xd3, 0x0c, 0x14,
+ 0xa1, 0x3a, 0x52, 0x57, 0xed, 0x6c, 0xff, 0xd3,
+ 0xbc, 0x5e, 0x29, 0xd6, 0xb9, 0x7e, 0xb1, 0x79,
+ 0x9e, 0xb3, 0x35, 0xe2, 0x81, 0xea, 0x45, 0x1e
+};
+static const u8 enc_output037[] __initconst = {
+ 0x6d, 0xad, 0x63, 0x78, 0x97, 0x54, 0x4d, 0x8b,
+ 0xf6, 0xbe, 0x95, 0x07, 0xed, 0x4d, 0x1b, 0xb2,
+ 0xe9, 0x54, 0xbc, 0x42, 0x7e, 0x5d, 0xe7, 0x29,
+ 0xda, 0xf5, 0x07, 0x62, 0x84, 0x6f, 0xf2, 0xf4,
+ 0x7b, 0x99, 0x7d, 0x93, 0xc9, 0x82, 0x18, 0x9d,
+ 0x70, 0x95, 0xdc, 0x79, 0x4c, 0x74, 0x62, 0x32
+};
+static const u8 enc_assoc037[] __initconst = {
+ 0x23, 0x33, 0xe5, 0xce, 0x0f, 0x93, 0xb0, 0x59
+};
+static const u8 enc_nonce037[] __initconst = {
+ 0x26, 0x28, 0x80, 0xd4, 0x75, 0xf3, 0xda, 0xc5,
+ 0x34, 0x0d, 0xd1, 0xb8
+};
+static const u8 enc_key037[] __initconst = {
+ 0x27, 0xd8, 0x60, 0x63, 0x1b, 0x04, 0x85, 0xa4,
+ 0x10, 0x70, 0x2f, 0xea, 0x61, 0xbc, 0x87, 0x3f,
+ 0x34, 0x42, 0x26, 0x0c, 0xad, 0xed, 0x4a, 0xbd,
+ 0xe2, 0x5b, 0x78, 0x6a, 0x2d, 0x97, 0xf1, 0x45
+};
+
+/* wycheproof - misc */
+static const u8 enc_input038[] __initconst = {
+ 0x97, 0x3d, 0x0c, 0x75, 0x38, 0x26, 0xba, 0xe4,
+ 0x66, 0xcf, 0x9a, 0xbb, 0x34, 0x93, 0x15, 0x2e,
+ 0x9d, 0xe7, 0x81, 0x9e, 0x2b, 0xd0, 0xc7, 0x11,
+ 0x71, 0x34, 0x6b, 0x4d, 0x2c, 0xeb, 0xf8, 0x04,
+ 0x1a, 0xa3, 0xce, 0xdc, 0x0d, 0xfd, 0x7b, 0x46,
+ 0x7e, 0x26, 0x22, 0x8b, 0xc8, 0x6c, 0x9a
+};
+static const u8 enc_output038[] __initconst = {
+ 0xfb, 0xa7, 0x8a, 0xe4, 0xf9, 0xd8, 0x08, 0xa6,
+ 0x2e, 0x3d, 0xa4, 0x0b, 0xe2, 0xcb, 0x77, 0x00,
+ 0xc3, 0x61, 0x3d, 0x9e, 0xb2, 0xc5, 0x29, 0xc6,
+ 0x52, 0xe7, 0x6a, 0x43, 0x2c, 0x65, 0x8d, 0x27,
+ 0x09, 0x5f, 0x0e, 0xb8, 0xf9, 0x40, 0xc3, 0x24,
+ 0x98, 0x1e, 0xa9, 0x35, 0xe5, 0x07, 0xf9, 0x8f,
+ 0x04, 0x69, 0x56, 0xdb, 0x3a, 0x51, 0x29, 0x08,
+ 0xbd, 0x7a, 0xfc, 0x8f, 0x2a, 0xb0, 0xa9
+};
+static const u8 enc_assoc038[] __initconst = { };
+static const u8 enc_nonce038[] __initconst = {
+ 0xe7, 0x4a, 0x51, 0x5e, 0x7e, 0x21, 0x02, 0xb9,
+ 0x0b, 0xef, 0x55, 0xd2
+};
+static const u8 enc_key038[] __initconst = {
+ 0xcf, 0x0d, 0x40, 0xa4, 0x64, 0x4e, 0x5f, 0x51,
+ 0x81, 0x51, 0x65, 0xd5, 0x30, 0x1b, 0x22, 0x63,
+ 0x1f, 0x45, 0x44, 0xc4, 0x9a, 0x18, 0x78, 0xe3,
+ 0xa0, 0xa5, 0xe8, 0xe1, 0xaa, 0xe0, 0xf2, 0x64
+};
+
+/* wycheproof - misc */
+static const u8 enc_input039[] __initconst = {
+ 0xa9, 0x89, 0x95, 0x50, 0x4d, 0xf1, 0x6f, 0x74,
+ 0x8b, 0xfb, 0x77, 0x85, 0xff, 0x91, 0xee, 0xb3,
+ 0xb6, 0x60, 0xea, 0x9e, 0xd3, 0x45, 0x0c, 0x3d,
+ 0x5e, 0x7b, 0x0e, 0x79, 0xef, 0x65, 0x36, 0x59,
+ 0xa9, 0x97, 0x8d, 0x75, 0x54, 0x2e, 0xf9, 0x1c,
+ 0x45, 0x67, 0x62, 0x21, 0x56, 0x40, 0xb9
+};
+static const u8 enc_output039[] __initconst = {
+ 0xa1, 0xff, 0xed, 0x80, 0x76, 0x18, 0x29, 0xec,
+ 0xce, 0x24, 0x2e, 0x0e, 0x88, 0xb1, 0x38, 0x04,
+ 0x90, 0x16, 0xbc, 0xa0, 0x18, 0xda, 0x2b, 0x6e,
+ 0x19, 0x98, 0x6b, 0x3e, 0x31, 0x8c, 0xae, 0x8d,
+ 0x80, 0x61, 0x98, 0xfb, 0x4c, 0x52, 0x7c, 0xc3,
+ 0x93, 0x50, 0xeb, 0xdd, 0xea, 0xc5, 0x73, 0xc4,
+ 0xcb, 0xf0, 0xbe, 0xfd, 0xa0, 0xb7, 0x02, 0x42,
+ 0xc6, 0x40, 0xd7, 0xcd, 0x02, 0xd7, 0xa3
+};
+static const u8 enc_assoc039[] __initconst = {
+ 0xb3, 0xe4, 0x06, 0x46, 0x83, 0xb0, 0x2d, 0x84
+};
+static const u8 enc_nonce039[] __initconst = {
+ 0xd4, 0xd8, 0x07, 0x34, 0x16, 0x83, 0x82, 0x5b,
+ 0x31, 0xcd, 0x4d, 0x95
+};
+static const u8 enc_key039[] __initconst = {
+ 0x6c, 0xbf, 0xd7, 0x1c, 0x64, 0x5d, 0x18, 0x4c,
+ 0xf5, 0xd2, 0x3c, 0x40, 0x2b, 0xdb, 0x0d, 0x25,
+ 0xec, 0x54, 0x89, 0x8c, 0x8a, 0x02, 0x73, 0xd4,
+ 0x2e, 0xb5, 0xbe, 0x10, 0x9f, 0xdc, 0xb2, 0xac
+};
+
+/* wycheproof - misc */
+static const u8 enc_input040[] __initconst = {
+ 0xd0, 0x96, 0x80, 0x31, 0x81, 0xbe, 0xef, 0x9e,
+ 0x00, 0x8f, 0xf8, 0x5d, 0x5d, 0xdc, 0x38, 0xdd,
+ 0xac, 0xf0, 0xf0, 0x9e, 0xe5, 0xf7, 0xe0, 0x7f,
+ 0x1e, 0x40, 0x79, 0xcb, 0x64, 0xd0, 0xdc, 0x8f,
+ 0x5e, 0x67, 0x11, 0xcd, 0x49, 0x21, 0xa7, 0x88,
+ 0x7d, 0xe7, 0x6e, 0x26, 0x78, 0xfd, 0xc6, 0x76,
+ 0x18, 0xf1, 0x18, 0x55, 0x86, 0xbf, 0xea, 0x9d,
+ 0x4c, 0x68, 0x5d, 0x50, 0xe4, 0xbb, 0x9a, 0x82
+};
+static const u8 enc_output040[] __initconst = {
+ 0x9a, 0x4e, 0xf2, 0x2b, 0x18, 0x16, 0x77, 0xb5,
+ 0x75, 0x5c, 0x08, 0xf7, 0x47, 0xc0, 0xf8, 0xd8,
+ 0xe8, 0xd4, 0xc1, 0x8a, 0x9c, 0xc2, 0x40, 0x5c,
+ 0x12, 0xbb, 0x51, 0xbb, 0x18, 0x72, 0xc8, 0xe8,
+ 0xb8, 0x77, 0x67, 0x8b, 0xec, 0x44, 0x2c, 0xfc,
+ 0xbb, 0x0f, 0xf4, 0x64, 0xa6, 0x4b, 0x74, 0x33,
+ 0x2c, 0xf0, 0x72, 0x89, 0x8c, 0x7e, 0x0e, 0xdd,
+ 0xf6, 0x23, 0x2e, 0xa6, 0xe2, 0x7e, 0xfe, 0x50,
+ 0x9f, 0xf3, 0x42, 0x7a, 0x0f, 0x32, 0xfa, 0x56,
+ 0x6d, 0x9c, 0xa0, 0xa7, 0x8a, 0xef, 0xc0, 0x13
+};
+static const u8 enc_assoc040[] __initconst = { };
+static const u8 enc_nonce040[] __initconst = {
+ 0xd6, 0x10, 0x40, 0xa3, 0x13, 0xed, 0x49, 0x28,
+ 0x23, 0xcc, 0x06, 0x5b
+};
+static const u8 enc_key040[] __initconst = {
+ 0x5b, 0x1d, 0x10, 0x35, 0xc0, 0xb1, 0x7e, 0xe0,
+ 0xb0, 0x44, 0x47, 0x67, 0xf8, 0x0a, 0x25, 0xb8,
+ 0xc1, 0xb7, 0x41, 0xf4, 0xb5, 0x0a, 0x4d, 0x30,
+ 0x52, 0x22, 0x6b, 0xaa, 0x1c, 0x6f, 0xb7, 0x01
+};
+
+/* wycheproof - misc */
+static const u8 enc_input041[] __initconst = {
+ 0x94, 0xee, 0x16, 0x6d, 0x6d, 0x6e, 0xcf, 0x88,
+ 0x32, 0x43, 0x71, 0x36, 0xb4, 0xae, 0x80, 0x5d,
+ 0x42, 0x88, 0x64, 0x35, 0x95, 0x86, 0xd9, 0x19,
+ 0x3a, 0x25, 0x01, 0x62, 0x93, 0xed, 0xba, 0x44,
+ 0x3c, 0x58, 0xe0, 0x7e, 0x7b, 0x71, 0x95, 0xec,
+ 0x5b, 0xd8, 0x45, 0x82, 0xa9, 0xd5, 0x6c, 0x8d,
+ 0x4a, 0x10, 0x8c, 0x7d, 0x7c, 0xe3, 0x4e, 0x6c,
+ 0x6f, 0x8e, 0xa1, 0xbe, 0xc0, 0x56, 0x73, 0x17
+};
+static const u8 enc_output041[] __initconst = {
+ 0x5f, 0xbb, 0xde, 0xcc, 0x34, 0xbe, 0x20, 0x16,
+ 0x14, 0xf6, 0x36, 0x03, 0x1e, 0xeb, 0x42, 0xf1,
+ 0xca, 0xce, 0x3c, 0x79, 0xa1, 0x2c, 0xff, 0xd8,
+ 0x71, 0xee, 0x8e, 0x73, 0x82, 0x0c, 0x82, 0x97,
+ 0x49, 0xf1, 0xab, 0xb4, 0x29, 0x43, 0x67, 0x84,
+ 0x9f, 0xb6, 0xc2, 0xaa, 0x56, 0xbd, 0xa8, 0xa3,
+ 0x07, 0x8f, 0x72, 0x3d, 0x7c, 0x1c, 0x85, 0x20,
+ 0x24, 0xb0, 0x17, 0xb5, 0x89, 0x73, 0xfb, 0x1e,
+ 0x09, 0x26, 0x3d, 0xa7, 0xb4, 0xcb, 0x92, 0x14,
+ 0x52, 0xf9, 0x7d, 0xca, 0x40, 0xf5, 0x80, 0xec
+};
+static const u8 enc_assoc041[] __initconst = {
+ 0x71, 0x93, 0xf6, 0x23, 0x66, 0x33, 0x21, 0xa2
+};
+static const u8 enc_nonce041[] __initconst = {
+ 0xd3, 0x1c, 0x21, 0xab, 0xa1, 0x75, 0xb7, 0x0d,
+ 0xe4, 0xeb, 0xb1, 0x9c
+};
+static const u8 enc_key041[] __initconst = {
+ 0x97, 0xd6, 0x35, 0xc4, 0xf4, 0x75, 0x74, 0xd9,
+ 0x99, 0x8a, 0x90, 0x87, 0x5d, 0xa1, 0xd3, 0xa2,
+ 0x84, 0xb7, 0x55, 0xb2, 0xd3, 0x92, 0x97, 0xa5,
+ 0x72, 0x52, 0x35, 0x19, 0x0e, 0x10, 0xa9, 0x7e
+};
+
+/* wycheproof - misc */
+static const u8 enc_input042[] __initconst = {
+ 0xb4, 0x29, 0xeb, 0x80, 0xfb, 0x8f, 0xe8, 0xba,
+ 0xed, 0xa0, 0xc8, 0x5b, 0x9c, 0x33, 0x34, 0x58,
+ 0xe7, 0xc2, 0x99, 0x2e, 0x55, 0x84, 0x75, 0x06,
+ 0x9d, 0x12, 0xd4, 0x5c, 0x22, 0x21, 0x75, 0x64,
+ 0x12, 0x15, 0x88, 0x03, 0x22, 0x97, 0xef, 0xf5,
+ 0x67, 0x83, 0x74, 0x2a, 0x5f, 0xc2, 0x2d, 0x74,
+ 0x10, 0xff, 0xb2, 0x9d, 0x66, 0x09, 0x86, 0x61,
+ 0xd7, 0x6f, 0x12, 0x6c, 0x3c, 0x27, 0x68, 0x9e,
+ 0x43, 0xb3, 0x72, 0x67, 0xca, 0xc5, 0xa3, 0xa6,
+ 0xd3, 0xab, 0x49, 0xe3, 0x91, 0xda, 0x29, 0xcd,
+ 0x30, 0x54, 0xa5, 0x69, 0x2e, 0x28, 0x07, 0xe4,
+ 0xc3, 0xea, 0x46, 0xc8, 0x76, 0x1d, 0x50, 0xf5,
+ 0x92
+};
+static const u8 enc_output042[] __initconst = {
+ 0xd0, 0x10, 0x2f, 0x6c, 0x25, 0x8b, 0xf4, 0x97,
+ 0x42, 0xce, 0xc3, 0x4c, 0xf2, 0xd0, 0xfe, 0xdf,
+ 0x23, 0xd1, 0x05, 0xfb, 0x4c, 0x84, 0xcf, 0x98,
+ 0x51, 0x5e, 0x1b, 0xc9, 0xa6, 0x4f, 0x8a, 0xd5,
+ 0xbe, 0x8f, 0x07, 0x21, 0xbd, 0xe5, 0x06, 0x45,
+ 0xd0, 0x00, 0x83, 0xc3, 0xa2, 0x63, 0xa3, 0x10,
+ 0x53, 0xb7, 0x60, 0x24, 0x5f, 0x52, 0xae, 0x28,
+ 0x66, 0xa5, 0xec, 0x83, 0xb1, 0x9f, 0x61, 0xbe,
+ 0x1d, 0x30, 0xd5, 0xc5, 0xd9, 0xfe, 0xcc, 0x4c,
+ 0xbb, 0xe0, 0x8f, 0xd3, 0x85, 0x81, 0x3a, 0x2a,
+ 0xa3, 0x9a, 0x00, 0xff, 0x9c, 0x10, 0xf7, 0xf2,
+ 0x37, 0x02, 0xad, 0xd1, 0xe4, 0xb2, 0xff, 0xa3,
+ 0x1c, 0x41, 0x86, 0x5f, 0xc7, 0x1d, 0xe1, 0x2b,
+ 0x19, 0x61, 0x21, 0x27, 0xce, 0x49, 0x99, 0x3b,
+ 0xb0
+};
+static const u8 enc_assoc042[] __initconst = { };
+static const u8 enc_nonce042[] __initconst = {
+ 0x17, 0xc8, 0x6a, 0x8a, 0xbb, 0xb7, 0xe0, 0x03,
+ 0xac, 0xde, 0x27, 0x99
+};
+static const u8 enc_key042[] __initconst = {
+ 0xfe, 0x6e, 0x55, 0xbd, 0xae, 0xd1, 0xf7, 0x28,
+ 0x4c, 0xa5, 0xfc, 0x0f, 0x8c, 0x5f, 0x2b, 0x8d,
+ 0xf5, 0x6d, 0xc0, 0xf4, 0x9e, 0x8c, 0xa6, 0x6a,
+ 0x41, 0x99, 0x5e, 0x78, 0x33, 0x51, 0xf9, 0x01
+};
+
+/* wycheproof - misc */
+static const u8 enc_input043[] __initconst = {
+ 0xce, 0xb5, 0x34, 0xce, 0x50, 0xdc, 0x23, 0xff,
+ 0x63, 0x8a, 0xce, 0x3e, 0xf6, 0x3a, 0xb2, 0xcc,
+ 0x29, 0x73, 0xee, 0xad, 0xa8, 0x07, 0x85, 0xfc,
+ 0x16, 0x5d, 0x06, 0xc2, 0xf5, 0x10, 0x0f, 0xf5,
+ 0xe8, 0xab, 0x28, 0x82, 0xc4, 0x75, 0xaf, 0xcd,
+ 0x05, 0xcc, 0xd4, 0x9f, 0x2e, 0x7d, 0x8f, 0x55,
+ 0xef, 0x3a, 0x72, 0xe3, 0xdc, 0x51, 0xd6, 0x85,
+ 0x2b, 0x8e, 0x6b, 0x9e, 0x7a, 0xec, 0xe5, 0x7b,
+ 0xe6, 0x55, 0x6b, 0x0b, 0x6d, 0x94, 0x13, 0xe3,
+ 0x3f, 0xc5, 0xfc, 0x24, 0xa9, 0xa2, 0x05, 0xad,
+ 0x59, 0x57, 0x4b, 0xb3, 0x9d, 0x94, 0x4a, 0x92,
+ 0xdc, 0x47, 0x97, 0x0d, 0x84, 0xa6, 0xad, 0x31,
+ 0x76
+};
+static const u8 enc_output043[] __initconst = {
+ 0x75, 0x45, 0x39, 0x1b, 0x51, 0xde, 0x01, 0xd5,
+ 0xc5, 0x3d, 0xfa, 0xca, 0x77, 0x79, 0x09, 0x06,
+ 0x3e, 0x58, 0xed, 0xee, 0x4b, 0xb1, 0x22, 0x7e,
+ 0x71, 0x10, 0xac, 0x4d, 0x26, 0x20, 0xc2, 0xae,
+ 0xc2, 0xf8, 0x48, 0xf5, 0x6d, 0xee, 0xb0, 0x37,
+ 0xa8, 0xdc, 0xed, 0x75, 0xaf, 0xa8, 0xa6, 0xc8,
+ 0x90, 0xe2, 0xde, 0xe4, 0x2f, 0x95, 0x0b, 0xb3,
+ 0x3d, 0x9e, 0x24, 0x24, 0xd0, 0x8a, 0x50, 0x5d,
+ 0x89, 0x95, 0x63, 0x97, 0x3e, 0xd3, 0x88, 0x70,
+ 0xf3, 0xde, 0x6e, 0xe2, 0xad, 0xc7, 0xfe, 0x07,
+ 0x2c, 0x36, 0x6c, 0x14, 0xe2, 0xcf, 0x7c, 0xa6,
+ 0x2f, 0xb3, 0xd3, 0x6b, 0xee, 0x11, 0x68, 0x54,
+ 0x61, 0xb7, 0x0d, 0x44, 0xef, 0x8c, 0x66, 0xc5,
+ 0xc7, 0xbb, 0xf1, 0x0d, 0xca, 0xdd, 0x7f, 0xac,
+ 0xf6
+};
+static const u8 enc_assoc043[] __initconst = {
+ 0xa1, 0x1c, 0x40, 0xb6, 0x03, 0x76, 0x73, 0x30
+};
+static const u8 enc_nonce043[] __initconst = {
+ 0x46, 0x36, 0x2f, 0x45, 0xd6, 0x37, 0x9e, 0x63,
+ 0xe5, 0x22, 0x94, 0x60
+};
+static const u8 enc_key043[] __initconst = {
+ 0xaa, 0xbc, 0x06, 0x34, 0x74, 0xe6, 0x5c, 0x4c,
+ 0x3e, 0x9b, 0xdc, 0x48, 0x0d, 0xea, 0x97, 0xb4,
+ 0x51, 0x10, 0xc8, 0x61, 0x88, 0x46, 0xff, 0x6b,
+ 0x15, 0xbd, 0xd2, 0xa4, 0xa5, 0x68, 0x2c, 0x4e
+};
+
+/* wycheproof - misc */
+static const u8 enc_input044[] __initconst = {
+ 0xe5, 0xcc, 0xaa, 0x44, 0x1b, 0xc8, 0x14, 0x68,
+ 0x8f, 0x8f, 0x6e, 0x8f, 0x28, 0xb5, 0x00, 0xb2
+};
+static const u8 enc_output044[] __initconst = {
+ 0x7e, 0x72, 0xf5, 0xa1, 0x85, 0xaf, 0x16, 0xa6,
+ 0x11, 0x92, 0x1b, 0x43, 0x8f, 0x74, 0x9f, 0x0b,
+ 0x12, 0x42, 0xc6, 0x70, 0x73, 0x23, 0x34, 0x02,
+ 0x9a, 0xdf, 0xe1, 0xc5, 0x00, 0x16, 0x51, 0xe4
+};
+static const u8 enc_assoc044[] __initconst = {
+ 0x02
+};
+static const u8 enc_nonce044[] __initconst = {
+ 0x87, 0x34, 0x5f, 0x10, 0x55, 0xfd, 0x9e, 0x21,
+ 0x02, 0xd5, 0x06, 0x56
+};
+static const u8 enc_key044[] __initconst = {
+ 0x7d, 0x00, 0xb4, 0x80, 0x95, 0xad, 0xfa, 0x32,
+ 0x72, 0x05, 0x06, 0x07, 0xb2, 0x64, 0x18, 0x50,
+ 0x02, 0xba, 0x99, 0x95, 0x7c, 0x49, 0x8b, 0xe0,
+ 0x22, 0x77, 0x0f, 0x2c, 0xe2, 0xf3, 0x14, 0x3c
+};
+
+/* wycheproof - misc */
+static const u8 enc_input045[] __initconst = {
+ 0x02, 0xcd, 0xe1, 0x68, 0xfb, 0xa3, 0xf5, 0x44,
+ 0xbb, 0xd0, 0x33, 0x2f, 0x7a, 0xde, 0xad, 0xa8
+};
+static const u8 enc_output045[] __initconst = {
+ 0x85, 0xf2, 0x9a, 0x71, 0x95, 0x57, 0xcd, 0xd1,
+ 0x4d, 0x1f, 0x8f, 0xff, 0xab, 0x6d, 0x9e, 0x60,
+ 0x73, 0x2c, 0xa3, 0x2b, 0xec, 0xd5, 0x15, 0xa1,
+ 0xed, 0x35, 0x3f, 0x54, 0x2e, 0x99, 0x98, 0x58
+};
+static const u8 enc_assoc045[] __initconst = {
+ 0xb6, 0x48
+};
+static const u8 enc_nonce045[] __initconst = {
+ 0x87, 0xa3, 0x16, 0x3e, 0xc0, 0x59, 0x8a, 0xd9,
+ 0x5b, 0x3a, 0xa7, 0x13
+};
+static const u8 enc_key045[] __initconst = {
+ 0x64, 0x32, 0x71, 0x7f, 0x1d, 0xb8, 0x5e, 0x41,
+ 0xac, 0x78, 0x36, 0xbc, 0xe2, 0x51, 0x85, 0xa0,
+ 0x80, 0xd5, 0x76, 0x2b, 0x9e, 0x2b, 0x18, 0x44,
+ 0x4b, 0x6e, 0xc7, 0x2c, 0x3b, 0xd8, 0xe4, 0xdc
+};
+
+/* wycheproof - misc */
+static const u8 enc_input046[] __initconst = {
+ 0x16, 0xdd, 0xd2, 0x3f, 0xf5, 0x3f, 0x3d, 0x23,
+ 0xc0, 0x63, 0x34, 0x48, 0x70, 0x40, 0xeb, 0x47
+};
+static const u8 enc_output046[] __initconst = {
+ 0xc1, 0xb2, 0x95, 0x93, 0x6d, 0x56, 0xfa, 0xda,
+ 0xc0, 0x3e, 0x5f, 0x74, 0x2b, 0xff, 0x73, 0xa1,
+ 0x39, 0xc4, 0x57, 0xdb, 0xab, 0x66, 0x38, 0x2b,
+ 0xab, 0xb3, 0xb5, 0x58, 0x00, 0xcd, 0xa5, 0xb8
+};
+static const u8 enc_assoc046[] __initconst = {
+ 0xbd, 0x4c, 0xd0, 0x2f, 0xc7, 0x50, 0x2b, 0xbd,
+ 0xbd, 0xf6, 0xc9, 0xa3, 0xcb, 0xe8, 0xf0
+};
+static const u8 enc_nonce046[] __initconst = {
+ 0x6f, 0x57, 0x3a, 0xa8, 0x6b, 0xaa, 0x49, 0x2b,
+ 0xa4, 0x65, 0x96, 0xdf
+};
+static const u8 enc_key046[] __initconst = {
+ 0x8e, 0x34, 0xcf, 0x73, 0xd2, 0x45, 0xa1, 0x08,
+ 0x2a, 0x92, 0x0b, 0x86, 0x36, 0x4e, 0xb8, 0x96,
+ 0xc4, 0x94, 0x64, 0x67, 0xbc, 0xb3, 0xd5, 0x89,
+ 0x29, 0xfc, 0xb3, 0x66, 0x90, 0xe6, 0x39, 0x4f
+};
+
+/* wycheproof - misc */
+static const u8 enc_input047[] __initconst = {
+ 0x62, 0x3b, 0x78, 0x50, 0xc3, 0x21, 0xe2, 0xcf,
+ 0x0c, 0x6f, 0xbc, 0xc8, 0xdf, 0xd1, 0xaf, 0xf2
+};
+static const u8 enc_output047[] __initconst = {
+ 0xc8, 0x4c, 0x9b, 0xb7, 0xc6, 0x1c, 0x1b, 0xcb,
+ 0x17, 0x77, 0x2a, 0x1c, 0x50, 0x0c, 0x50, 0x95,
+ 0xdb, 0xad, 0xf7, 0xa5, 0x13, 0x8c, 0xa0, 0x34,
+ 0x59, 0xa2, 0xcd, 0x65, 0x83, 0x1e, 0x09, 0x2f
+};
+static const u8 enc_assoc047[] __initconst = {
+ 0x89, 0xcc, 0xe9, 0xfb, 0x47, 0x44, 0x1d, 0x07,
+ 0xe0, 0x24, 0x5a, 0x66, 0xfe, 0x8b, 0x77, 0x8b
+};
+static const u8 enc_nonce047[] __initconst = {
+ 0x1a, 0x65, 0x18, 0xf0, 0x2e, 0xde, 0x1d, 0xa6,
+ 0x80, 0x92, 0x66, 0xd9
+};
+static const u8 enc_key047[] __initconst = {
+ 0xcb, 0x55, 0x75, 0xf5, 0xc7, 0xc4, 0x5c, 0x91,
+ 0xcf, 0x32, 0x0b, 0x13, 0x9f, 0xb5, 0x94, 0x23,
+ 0x75, 0x60, 0xd0, 0xa3, 0xe6, 0xf8, 0x65, 0xa6,
+ 0x7d, 0x4f, 0x63, 0x3f, 0x2c, 0x08, 0xf0, 0x16
+};
+
+/* wycheproof - misc */
+static const u8 enc_input048[] __initconst = {
+ 0x87, 0xb3, 0xa4, 0xd7, 0xb2, 0x6d, 0x8d, 0x32,
+ 0x03, 0xa0, 0xde, 0x1d, 0x64, 0xef, 0x82, 0xe3
+};
+static const u8 enc_output048[] __initconst = {
+ 0x94, 0xbc, 0x80, 0x62, 0x1e, 0xd1, 0xe7, 0x1b,
+ 0x1f, 0xd2, 0xb5, 0xc3, 0xa1, 0x5e, 0x35, 0x68,
+ 0x33, 0x35, 0x11, 0x86, 0x17, 0x96, 0x97, 0x84,
+ 0x01, 0x59, 0x8b, 0x96, 0x37, 0x22, 0xf5, 0xb3
+};
+static const u8 enc_assoc048[] __initconst = {
+ 0xd1, 0x9f, 0x2d, 0x98, 0x90, 0x95, 0xf7, 0xab,
+ 0x03, 0xa5, 0xfd, 0xe8, 0x44, 0x16, 0xe0, 0x0c,
+ 0x0e
+};
+static const u8 enc_nonce048[] __initconst = {
+ 0x56, 0x4d, 0xee, 0x49, 0xab, 0x00, 0xd2, 0x40,
+ 0xfc, 0x10, 0x68, 0xc3
+};
+static const u8 enc_key048[] __initconst = {
+ 0xa5, 0x56, 0x9e, 0x72, 0x9a, 0x69, 0xb2, 0x4b,
+ 0xa6, 0xe0, 0xff, 0x15, 0xc4, 0x62, 0x78, 0x97,
+ 0x43, 0x68, 0x24, 0xc9, 0x41, 0xe9, 0xd0, 0x0b,
+ 0x2e, 0x93, 0xfd, 0xdc, 0x4b, 0xa7, 0x76, 0x57
+};
+
+/* wycheproof - misc */
+static const u8 enc_input049[] __initconst = {
+ 0xe6, 0x01, 0xb3, 0x85, 0x57, 0x79, 0x7d, 0xa2,
+ 0xf8, 0xa4, 0x10, 0x6a, 0x08, 0x9d, 0x1d, 0xa6
+};
+static const u8 enc_output049[] __initconst = {
+ 0x29, 0x9b, 0x5d, 0x3f, 0x3d, 0x03, 0xc0, 0x87,
+ 0x20, 0x9a, 0x16, 0xe2, 0x85, 0x14, 0x31, 0x11,
+ 0x4b, 0x45, 0x4e, 0xd1, 0x98, 0xde, 0x11, 0x7e,
+ 0x83, 0xec, 0x49, 0xfa, 0x8d, 0x85, 0x08, 0xd6
+};
+static const u8 enc_assoc049[] __initconst = {
+ 0x5e, 0x64, 0x70, 0xfa, 0xcd, 0x99, 0xc1, 0xd8,
+ 0x1e, 0x37, 0xcd, 0x44, 0x01, 0x5f, 0xe1, 0x94,
+ 0x80, 0xa2, 0xa4, 0xd3, 0x35, 0x2a, 0x4f, 0xf5,
+ 0x60, 0xc0, 0x64, 0x0f, 0xdb, 0xda
+};
+static const u8 enc_nonce049[] __initconst = {
+ 0xdf, 0x87, 0x13, 0xe8, 0x7e, 0xc3, 0xdb, 0xcf,
+ 0xad, 0x14, 0xd5, 0x3e
+};
+static const u8 enc_key049[] __initconst = {
+ 0x56, 0x20, 0x74, 0x65, 0xb4, 0xe4, 0x8e, 0x6d,
+ 0x04, 0x63, 0x0f, 0x4a, 0x42, 0xf3, 0x5c, 0xfc,
+ 0x16, 0x3a, 0xb2, 0x89, 0xc2, 0x2a, 0x2b, 0x47,
+ 0x84, 0xf6, 0xf9, 0x29, 0x03, 0x30, 0xbe, 0xe0
+};
+
+/* wycheproof - misc */
+static const u8 enc_input050[] __initconst = {
+ 0xdc, 0x9e, 0x9e, 0xaf, 0x11, 0xe3, 0x14, 0x18,
+ 0x2d, 0xf6, 0xa4, 0xeb, 0xa1, 0x7a, 0xec, 0x9c
+};
+static const u8 enc_output050[] __initconst = {
+ 0x60, 0x5b, 0xbf, 0x90, 0xae, 0xb9, 0x74, 0xf6,
+ 0x60, 0x2b, 0xc7, 0x78, 0x05, 0x6f, 0x0d, 0xca,
+ 0x38, 0xea, 0x23, 0xd9, 0x90, 0x54, 0xb4, 0x6b,
+ 0x42, 0xff, 0xe0, 0x04, 0x12, 0x9d, 0x22, 0x04
+};
+static const u8 enc_assoc050[] __initconst = {
+ 0xba, 0x44, 0x6f, 0x6f, 0x9a, 0x0c, 0xed, 0x22,
+ 0x45, 0x0f, 0xeb, 0x10, 0x73, 0x7d, 0x90, 0x07,
+ 0xfd, 0x69, 0xab, 0xc1, 0x9b, 0x1d, 0x4d, 0x90,
+ 0x49, 0xa5, 0x55, 0x1e, 0x86, 0xec, 0x2b, 0x37
+};
+static const u8 enc_nonce050[] __initconst = {
+ 0x8d, 0xf4, 0xb1, 0x5a, 0x88, 0x8c, 0x33, 0x28,
+ 0x6a, 0x7b, 0x76, 0x51
+};
+static const u8 enc_key050[] __initconst = {
+ 0x39, 0x37, 0x98, 0x6a, 0xf8, 0x6d, 0xaf, 0xc1,
+ 0xba, 0x0c, 0x46, 0x72, 0xd8, 0xab, 0xc4, 0x6c,
+ 0x20, 0x70, 0x62, 0x68, 0x2d, 0x9c, 0x26, 0x4a,
+ 0xb0, 0x6d, 0x6c, 0x58, 0x07, 0x20, 0x51, 0x30
+};
+
+/* wycheproof - misc */
+static const u8 enc_input051[] __initconst = {
+ 0x81, 0xce, 0x84, 0xed, 0xe9, 0xb3, 0x58, 0x59,
+ 0xcc, 0x8c, 0x49, 0xa8, 0xf6, 0xbe, 0x7d, 0xc6
+};
+static const u8 enc_output051[] __initconst = {
+ 0x7b, 0x7c, 0xe0, 0xd8, 0x24, 0x80, 0x9a, 0x70,
+ 0xde, 0x32, 0x56, 0x2c, 0xcf, 0x2c, 0x2b, 0xbd,
+ 0x15, 0xd4, 0x4a, 0x00, 0xce, 0x0d, 0x19, 0xb4,
+ 0x23, 0x1f, 0x92, 0x1e, 0x22, 0xbc, 0x0a, 0x43
+};
+static const u8 enc_assoc051[] __initconst = {
+ 0xd4, 0x1a, 0x82, 0x8d, 0x5e, 0x71, 0x82, 0x92,
+ 0x47, 0x02, 0x19, 0x05, 0x40, 0x2e, 0xa2, 0x57,
+ 0xdc, 0xcb, 0xc3, 0xb8, 0x0f, 0xcd, 0x56, 0x75,
+ 0x05, 0x6b, 0x68, 0xbb, 0x59, 0xe6, 0x2e, 0x88,
+ 0x73
+};
+static const u8 enc_nonce051[] __initconst = {
+ 0xbe, 0x40, 0xe5, 0xf1, 0xa1, 0x18, 0x17, 0xa0,
+ 0xa8, 0xfa, 0x89, 0x49
+};
+static const u8 enc_key051[] __initconst = {
+ 0x36, 0x37, 0x2a, 0xbc, 0xdb, 0x78, 0xe0, 0x27,
+ 0x96, 0x46, 0xac, 0x3d, 0x17, 0x6b, 0x96, 0x74,
+ 0xe9, 0x15, 0x4e, 0xec, 0xf0, 0xd5, 0x46, 0x9c,
+ 0x65, 0x1e, 0xc7, 0xe1, 0x6b, 0x4c, 0x11, 0x99
+};
+
+/* wycheproof - misc */
+static const u8 enc_input052[] __initconst = {
+ 0xa6, 0x67, 0x47, 0xc8, 0x9e, 0x85, 0x7a, 0xf3,
+ 0xa1, 0x8e, 0x2c, 0x79, 0x50, 0x00, 0x87, 0xed
+};
+static const u8 enc_output052[] __initconst = {
+ 0xca, 0x82, 0xbf, 0xf3, 0xe2, 0xf3, 0x10, 0xcc,
+ 0xc9, 0x76, 0x67, 0x2c, 0x44, 0x15, 0xe6, 0x9b,
+ 0x57, 0x63, 0x8c, 0x62, 0xa5, 0xd8, 0x5d, 0xed,
+ 0x77, 0x4f, 0x91, 0x3c, 0x81, 0x3e, 0xa0, 0x32
+};
+static const u8 enc_assoc052[] __initconst = {
+ 0x3f, 0x2d, 0xd4, 0x9b, 0xbf, 0x09, 0xd6, 0x9a,
+ 0x78, 0xa3, 0xd8, 0x0e, 0xa2, 0x56, 0x66, 0x14,
+ 0xfc, 0x37, 0x94, 0x74, 0x19, 0x6c, 0x1a, 0xae,
+ 0x84, 0x58, 0x3d, 0xa7, 0x3d, 0x7f, 0xf8, 0x5c,
+ 0x6f, 0x42, 0xca, 0x42, 0x05, 0x6a, 0x97, 0x92,
+ 0xcc, 0x1b, 0x9f, 0xb3, 0xc7, 0xd2, 0x61
+};
+static const u8 enc_nonce052[] __initconst = {
+ 0x84, 0xc8, 0x7d, 0xae, 0x4e, 0xee, 0x27, 0x73,
+ 0x0e, 0xc3, 0x5d, 0x12
+};
+static const u8 enc_key052[] __initconst = {
+ 0x9f, 0x14, 0x79, 0xed, 0x09, 0x7d, 0x7f, 0xe5,
+ 0x29, 0xc1, 0x1f, 0x2f, 0x5a, 0xdd, 0x9a, 0xaf,
+ 0xf4, 0xa1, 0xca, 0x0b, 0x68, 0x99, 0x7a, 0x2c,
+ 0xb7, 0xf7, 0x97, 0x49, 0xbd, 0x90, 0xaa, 0xf4
+};
+
/* wycheproof - misc */
static const u8 enc_input053[] __initconst = {
0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
@@ -2759,6 +3858,126 @@ static const u8 enc_key073[] __initconst = {
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
};
+/* wycheproof - checking for int overflows */
+static const u8 enc_input074[] __initconst = {
+ 0xd4, 0x50, 0x0b, 0xf0, 0x09, 0x49, 0x35, 0x51,
+ 0xc3, 0x80, 0xad, 0xf5, 0x2c, 0x57, 0x3a, 0x69,
+ 0xdf, 0x7e, 0x8b, 0x76, 0x24, 0x63, 0x33, 0x0f,
+ 0xac, 0xc1, 0x6a, 0x57, 0x26, 0xbe, 0x71, 0x90,
+ 0xc6, 0x3c, 0x5a, 0x1c, 0x92, 0x65, 0x84, 0xa0,
+ 0x96, 0x75, 0x68, 0x28, 0xdc, 0xdc, 0x64, 0xac,
+ 0xdf, 0x96, 0x3d, 0x93, 0x1b, 0xf1, 0xda, 0xe2,
+ 0x38, 0xf3, 0xf1, 0x57, 0x22, 0x4a, 0xc4, 0xb5,
+ 0x42, 0xd7, 0x85, 0xb0, 0xdd, 0x84, 0xdb, 0x6b,
+ 0xe3, 0xbc, 0x5a, 0x36, 0x63, 0xe8, 0x41, 0x49,
+ 0xff, 0xbe, 0xd0, 0x9e, 0x54, 0xf7, 0x8f, 0x16,
+ 0xa8, 0x22, 0x3b, 0x24, 0xcb, 0x01, 0x9f, 0x58,
+ 0xb2, 0x1b, 0x0e, 0x55, 0x1e, 0x7a, 0xa0, 0x73,
+ 0x27, 0x62, 0x95, 0x51, 0x37, 0x6c, 0xcb, 0xc3,
+ 0x93, 0x76, 0x71, 0xa0, 0x62, 0x9b, 0xd9, 0x5c,
+ 0x99, 0x15, 0xc7, 0x85, 0x55, 0x77, 0x1e, 0x7a
+};
+static const u8 enc_output074[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x0b, 0x30, 0x0d, 0x8d, 0xa5, 0x6c, 0x21, 0x85,
+ 0x75, 0x52, 0x79, 0x55, 0x3c, 0x4c, 0x82, 0xca
+};
+static const u8 enc_assoc074[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce074[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x00, 0x02, 0x50, 0x6e
+};
+static const u8 enc_key074[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
+/* wycheproof - checking for int overflows */
+static const u8 enc_input075[] __initconst = {
+ 0x7d, 0xe8, 0x7f, 0x67, 0x29, 0x94, 0x52, 0x75,
+ 0xd0, 0x65, 0x5d, 0xa4, 0xc7, 0xfd, 0xe4, 0x56,
+ 0x9e, 0x16, 0xf1, 0x11, 0xb5, 0xeb, 0x26, 0xc2,
+ 0x2d, 0x85, 0x9e, 0x3f, 0xf8, 0x22, 0xec, 0xed,
+ 0x3a, 0x6d, 0xd9, 0xa6, 0x0f, 0x22, 0x95, 0x7f,
+ 0x7b, 0x7c, 0x85, 0x7e, 0x88, 0x22, 0xeb, 0x9f,
+ 0xe0, 0xb8, 0xd7, 0x02, 0x21, 0x41, 0xf2, 0xd0,
+ 0xb4, 0x8f, 0x4b, 0x56, 0x12, 0xd3, 0x22, 0xa8,
+ 0x8d, 0xd0, 0xfe, 0x0b, 0x4d, 0x91, 0x79, 0x32,
+ 0x4f, 0x7c, 0x6c, 0x9e, 0x99, 0x0e, 0xfb, 0xd8,
+ 0x0e, 0x5e, 0xd6, 0x77, 0x58, 0x26, 0x49, 0x8b,
+ 0x1e, 0xfe, 0x0f, 0x71, 0xa0, 0xf3, 0xec, 0x5b,
+ 0x29, 0xcb, 0x28, 0xc2, 0x54, 0x0a, 0x7d, 0xcd,
+ 0x51, 0xb7, 0xda, 0xae, 0xe0, 0xff, 0x4a, 0x7f,
+ 0x3a, 0xc1, 0xee, 0x54, 0xc2, 0x9e, 0xe4, 0xc1,
+ 0x70, 0xde, 0x40, 0x8f, 0x66, 0x69, 0x21, 0x94
+};
+static const u8 enc_output075[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xc5, 0x78, 0xe2, 0xaa, 0x44, 0xd3, 0x09, 0xb7,
+ 0xb6, 0xa5, 0x19, 0x3b, 0xdc, 0x61, 0x18, 0xf5
+};
+static const u8 enc_assoc075[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_nonce075[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x00, 0x03, 0x18, 0xa5
+};
+static const u8 enc_key075[] __initconst = {
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
+};
+
/* wycheproof - checking for int overflows */
static const u8 enc_input076[] __initconst = {
0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85,
@@ -3349,6 +4568,286 @@ static const u8 enc_key085[] __initconst = {
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
};
+/* wycheproof - special case tag */
+static const u8 enc_input086[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output086[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+};
+static const u8 enc_assoc086[] __initconst = {
+ 0x85, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xa6, 0x90, 0x2f, 0xcb, 0xc8, 0x83, 0xbb, 0xc1,
+ 0x80, 0xb2, 0x56, 0xae, 0x34, 0xad, 0x7f, 0x00
+};
+static const u8 enc_nonce086[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key086[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - special case tag */
+static const u8 enc_input087[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output087[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_assoc087[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x24, 0x7e, 0x50, 0x64, 0x2a, 0x1c, 0x0a, 0x2f,
+ 0x8f, 0x77, 0x21, 0x96, 0x09, 0xdb, 0xa9, 0x58
+};
+static const u8 enc_nonce087[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key087[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - special case tag */
+static const u8 enc_input088[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output088[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+static const u8 enc_assoc088[] __initconst = {
+ 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xd9, 0xe7, 0x2c, 0x06, 0x4a, 0xc8, 0x96, 0x1f,
+ 0x3f, 0xa5, 0x85, 0xe0, 0xe2, 0xab, 0xd6, 0x00
+};
+static const u8 enc_nonce088[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key088[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - special case tag */
+static const u8 enc_input089[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output089[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
+};
+static const u8 enc_assoc089[] __initconst = {
+ 0x65, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x95, 0xaf, 0x0f, 0x4d, 0x0b, 0x68, 0x6e, 0xae,
+ 0xcc, 0xca, 0x43, 0x07, 0xd5, 0x96, 0xf5, 0x02
+};
+static const u8 enc_nonce089[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key089[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - special case tag */
+static const u8 enc_input090[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output090[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
+};
+static const u8 enc_assoc090[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x85, 0x40, 0xb4, 0x64, 0x35, 0x77, 0x07, 0xbe,
+ 0x3a, 0x39, 0xd5, 0x5c, 0x34, 0xf8, 0xbc, 0xb3
+};
+static const u8 enc_nonce090[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key090[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - special case tag */
+static const u8 enc_input091[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output091[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00
+};
+static const u8 enc_assoc091[] __initconst = {
+ 0x4f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x66, 0x23, 0xd9, 0x90, 0xb8, 0x98, 0xd8, 0x30,
+ 0xd2, 0x12, 0xaf, 0x23, 0x83, 0x33, 0x07, 0x01
+};
+static const u8 enc_nonce091[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key091[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
+/* wycheproof - special case tag */
+static const u8 enc_input092[] __initconst = {
+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
+};
+static const u8 enc_output092[] __initconst = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static const u8 enc_assoc092[] __initconst = {
+ 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x5f, 0x16, 0xd0, 0x9f, 0x17, 0x78, 0x72, 0x11,
+ 0xb7, 0xd4, 0x84, 0xe0, 0x24, 0xf8, 0x97, 0x01
+};
+static const u8 enc_nonce092[] __initconst = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b
+};
+static const u8 enc_key092[] __initconst = {
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+};
+
/* wycheproof - edge case intermediate sums in poly1305 */
static const u8 enc_input093[] __initconst = {
0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d,
@@ -4455,6 +5954,86 @@ chacha20poly1305_enc_vectors[] __initconst = {
sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) },
{ enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012,
sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) },
+ { enc_input013, enc_output013, enc_assoc013, enc_nonce013, enc_key013,
+ sizeof(enc_input013), sizeof(enc_assoc013), sizeof(enc_nonce013) },
+ { enc_input014, enc_output014, enc_assoc014, enc_nonce014, enc_key014,
+ sizeof(enc_input014), sizeof(enc_assoc014), sizeof(enc_nonce014) },
+ { enc_input015, enc_output015, enc_assoc015, enc_nonce015, enc_key015,
+ sizeof(enc_input015), sizeof(enc_assoc015), sizeof(enc_nonce015) },
+ { enc_input016, enc_output016, enc_assoc016, enc_nonce016, enc_key016,
+ sizeof(enc_input016), sizeof(enc_assoc016), sizeof(enc_nonce016) },
+ { enc_input017, enc_output017, enc_assoc017, enc_nonce017, enc_key017,
+ sizeof(enc_input017), sizeof(enc_assoc017), sizeof(enc_nonce017) },
+ { enc_input018, enc_output018, enc_assoc018, enc_nonce018, enc_key018,
+ sizeof(enc_input018), sizeof(enc_assoc018), sizeof(enc_nonce018) },
+ { enc_input019, enc_output019, enc_assoc019, enc_nonce019, enc_key019,
+ sizeof(enc_input019), sizeof(enc_assoc019), sizeof(enc_nonce019) },
+ { enc_input020, enc_output020, enc_assoc020, enc_nonce020, enc_key020,
+ sizeof(enc_input020), sizeof(enc_assoc020), sizeof(enc_nonce020) },
+ { enc_input021, enc_output021, enc_assoc021, enc_nonce021, enc_key021,
+ sizeof(enc_input021), sizeof(enc_assoc021), sizeof(enc_nonce021) },
+ { enc_input022, enc_output022, enc_assoc022, enc_nonce022, enc_key022,
+ sizeof(enc_input022), sizeof(enc_assoc022), sizeof(enc_nonce022) },
+ { enc_input023, enc_output023, enc_assoc023, enc_nonce023, enc_key023,
+ sizeof(enc_input023), sizeof(enc_assoc023), sizeof(enc_nonce023) },
+ { enc_input024, enc_output024, enc_assoc024, enc_nonce024, enc_key024,
+ sizeof(enc_input024), sizeof(enc_assoc024), sizeof(enc_nonce024) },
+ { enc_input025, enc_output025, enc_assoc025, enc_nonce025, enc_key025,
+ sizeof(enc_input025), sizeof(enc_assoc025), sizeof(enc_nonce025) },
+ { enc_input026, enc_output026, enc_assoc026, enc_nonce026, enc_key026,
+ sizeof(enc_input026), sizeof(enc_assoc026), sizeof(enc_nonce026) },
+ { enc_input027, enc_output027, enc_assoc027, enc_nonce027, enc_key027,
+ sizeof(enc_input027), sizeof(enc_assoc027), sizeof(enc_nonce027) },
+ { enc_input028, enc_output028, enc_assoc028, enc_nonce028, enc_key028,
+ sizeof(enc_input028), sizeof(enc_assoc028), sizeof(enc_nonce028) },
+ { enc_input029, enc_output029, enc_assoc029, enc_nonce029, enc_key029,
+ sizeof(enc_input029), sizeof(enc_assoc029), sizeof(enc_nonce029) },
+ { enc_input030, enc_output030, enc_assoc030, enc_nonce030, enc_key030,
+ sizeof(enc_input030), sizeof(enc_assoc030), sizeof(enc_nonce030) },
+ { enc_input031, enc_output031, enc_assoc031, enc_nonce031, enc_key031,
+ sizeof(enc_input031), sizeof(enc_assoc031), sizeof(enc_nonce031) },
+ { enc_input032, enc_output032, enc_assoc032, enc_nonce032, enc_key032,
+ sizeof(enc_input032), sizeof(enc_assoc032), sizeof(enc_nonce032) },
+ { enc_input033, enc_output033, enc_assoc033, enc_nonce033, enc_key033,
+ sizeof(enc_input033), sizeof(enc_assoc033), sizeof(enc_nonce033) },
+ { enc_input034, enc_output034, enc_assoc034, enc_nonce034, enc_key034,
+ sizeof(enc_input034), sizeof(enc_assoc034), sizeof(enc_nonce034) },
+ { enc_input035, enc_output035, enc_assoc035, enc_nonce035, enc_key035,
+ sizeof(enc_input035), sizeof(enc_assoc035), sizeof(enc_nonce035) },
+ { enc_input036, enc_output036, enc_assoc036, enc_nonce036, enc_key036,
+ sizeof(enc_input036), sizeof(enc_assoc036), sizeof(enc_nonce036) },
+ { enc_input037, enc_output037, enc_assoc037, enc_nonce037, enc_key037,
+ sizeof(enc_input037), sizeof(enc_assoc037), sizeof(enc_nonce037) },
+ { enc_input038, enc_output038, enc_assoc038, enc_nonce038, enc_key038,
+ sizeof(enc_input038), sizeof(enc_assoc038), sizeof(enc_nonce038) },
+ { enc_input039, enc_output039, enc_assoc039, enc_nonce039, enc_key039,
+ sizeof(enc_input039), sizeof(enc_assoc039), sizeof(enc_nonce039) },
+ { enc_input040, enc_output040, enc_assoc040, enc_nonce040, enc_key040,
+ sizeof(enc_input040), sizeof(enc_assoc040), sizeof(enc_nonce040) },
+ { enc_input041, enc_output041, enc_assoc041, enc_nonce041, enc_key041,
+ sizeof(enc_input041), sizeof(enc_assoc041), sizeof(enc_nonce041) },
+ { enc_input042, enc_output042, enc_assoc042, enc_nonce042, enc_key042,
+ sizeof(enc_input042), sizeof(enc_assoc042), sizeof(enc_nonce042) },
+ { enc_input043, enc_output043, enc_assoc043, enc_nonce043, enc_key043,
+ sizeof(enc_input043), sizeof(enc_assoc043), sizeof(enc_nonce043) },
+ { enc_input044, enc_output044, enc_assoc044, enc_nonce044, enc_key044,
+ sizeof(enc_input044), sizeof(enc_assoc044), sizeof(enc_nonce044) },
+ { enc_input045, enc_output045, enc_assoc045, enc_nonce045, enc_key045,
+ sizeof(enc_input045), sizeof(enc_assoc045), sizeof(enc_nonce045) },
+ { enc_input046, enc_output046, enc_assoc046, enc_nonce046, enc_key046,
+ sizeof(enc_input046), sizeof(enc_assoc046), sizeof(enc_nonce046) },
+ { enc_input047, enc_output047, enc_assoc047, enc_nonce047, enc_key047,
+ sizeof(enc_input047), sizeof(enc_assoc047), sizeof(enc_nonce047) },
+ { enc_input048, enc_output048, enc_assoc048, enc_nonce048, enc_key048,
+ sizeof(enc_input048), sizeof(enc_assoc048), sizeof(enc_nonce048) },
+ { enc_input049, enc_output049, enc_assoc049, enc_nonce049, enc_key049,
+ sizeof(enc_input049), sizeof(enc_assoc049), sizeof(enc_nonce049) },
+ { enc_input050, enc_output050, enc_assoc050, enc_nonce050, enc_key050,
+ sizeof(enc_input050), sizeof(enc_assoc050), sizeof(enc_nonce050) },
+ { enc_input051, enc_output051, enc_assoc051, enc_nonce051, enc_key051,
+ sizeof(enc_input051), sizeof(enc_assoc051), sizeof(enc_nonce051) },
+ { enc_input052, enc_output052, enc_assoc052, enc_nonce052, enc_key052,
+ sizeof(enc_input052), sizeof(enc_assoc052), sizeof(enc_nonce052) },
{ enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053,
sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) },
{ enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054,
@@ -4497,6 +6076,10 @@ chacha20poly1305_enc_vectors[] __initconst = {
sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) },
{ enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073,
sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) },
+ { enc_input074, enc_output074, enc_assoc074, enc_nonce074, enc_key074,
+ sizeof(enc_input074), sizeof(enc_assoc074), sizeof(enc_nonce074) },
+ { enc_input075, enc_output075, enc_assoc075, enc_nonce075, enc_key075,
+ sizeof(enc_input075), sizeof(enc_assoc075), sizeof(enc_nonce075) },
{ enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076,
sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) },
{ enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077,
@@ -4517,6 +6100,20 @@ chacha20poly1305_enc_vectors[] __initconst = {
sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) },
{ enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085,
sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) },
+ { enc_input086, enc_output086, enc_assoc086, enc_nonce086, enc_key086,
+ sizeof(enc_input086), sizeof(enc_assoc086), sizeof(enc_nonce086) },
+ { enc_input087, enc_output087, enc_assoc087, enc_nonce087, enc_key087,
+ sizeof(enc_input087), sizeof(enc_assoc087), sizeof(enc_nonce087) },
+ { enc_input088, enc_output088, enc_assoc088, enc_nonce088, enc_key088,
+ sizeof(enc_input088), sizeof(enc_assoc088), sizeof(enc_nonce088) },
+ { enc_input089, enc_output089, enc_assoc089, enc_nonce089, enc_key089,
+ sizeof(enc_input089), sizeof(enc_assoc089), sizeof(enc_nonce089) },
+ { enc_input090, enc_output090, enc_assoc090, enc_nonce090, enc_key090,
+ sizeof(enc_input090), sizeof(enc_assoc090), sizeof(enc_nonce090) },
+ { enc_input091, enc_output091, enc_assoc091, enc_nonce091, enc_key091,
+ sizeof(enc_input091), sizeof(enc_assoc091), sizeof(enc_nonce091) },
+ { enc_input092, enc_output092, enc_assoc092, enc_nonce092, enc_key092,
+ sizeof(enc_input092), sizeof(enc_assoc092), sizeof(enc_nonce092) },
{ enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093,
sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) },
{ enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094,
@@ -7224,6 +8821,43 @@ xchacha20poly1305_dec_vectors[] __initconst = {
sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) }
};
+/* This is for the selftests-only, since it is only useful for the purpose of
+ * testing the underlying primitives and interactions.
+ */
+static void __init
+chacha20poly1305_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u8 nonce[12],
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ const u8 *pad0 = page_address(ZERO_PAGE(0));
+ struct poly1305_desc_ctx poly1305_state;
+ u32 chacha20_state[CHACHA_STATE_WORDS];
+ union {
+ u8 block0[POLY1305_KEY_SIZE];
+ __le64 lens[2];
+ } b = {{ 0 }};
+ u8 bottom_row[16] = { 0 };
+ u32 le_key[8];
+ int i;
+
+ memcpy(&bottom_row[4], nonce, 12);
+ for (i = 0; i < 8; ++i)
+ le_key[i] = get_unaligned_le32(key + sizeof(le_key[i]) * i);
+ chacha_init(chacha20_state, le_key, bottom_row);
+ chacha20_crypt(chacha20_state, b.block0, b.block0, sizeof(b.block0));
+ poly1305_init(&poly1305_state, b.block0);
+ poly1305_update(&poly1305_state, ad, ad_len);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+ chacha20_crypt(chacha20_state, dst, src, src_len);
+ poly1305_update(&poly1305_state, dst, src_len);
+ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
+ b.lens[0] = cpu_to_le64(ad_len);
+ b.lens[1] = cpu_to_le64(src_len);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
+ poly1305_final(&poly1305_state, dst + src_len);
+}
+
static void __init
chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
@@ -7233,6 +8867,9 @@ chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
if (nonce_len == 8)
chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
get_unaligned_le64(nonce), key);
+ else if (nonce_len == 12)
+ chacha20poly1305_encrypt_bignonce(dst, src, src_len, ad,
+ ad_len, nonce, key);
else
BUG();
}
@@ -7248,14 +8885,14 @@ decryption_success(bool func_ret, bool expect_failure, int memcmp_result)
bool __init chacha20poly1305_selftest(void)
{
enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
- size_t i;
- u8 *computed_output = NULL, *heap_src = NULL;
- struct scatterlist sg_src;
+ size_t i, j, k, total_len;
+ u8 *computed_output = NULL, *input = NULL;
bool success = true, ret;
+ struct scatterlist sg_src[3];
- heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
- if (!heap_src || !computed_output) {
+ input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
+ if (!computed_output || !input) {
pr_err("chacha20poly1305 self-test malloc: FAIL\n");
success = false;
goto out;
@@ -7284,17 +8921,17 @@ bool __init chacha20poly1305_selftest(void)
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
if (chacha20poly1305_enc_vectors[i].nlen != 8)
continue;
- memcpy(heap_src, chacha20poly1305_enc_vectors[i].input,
+ memcpy(computed_output, chacha20poly1305_enc_vectors[i].input,
chacha20poly1305_enc_vectors[i].ilen);
- sg_init_one(&sg_src, heap_src,
+ sg_init_one(sg_src, computed_output,
chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE);
- chacha20poly1305_encrypt_sg_inplace(&sg_src,
+ ret = chacha20poly1305_encrypt_sg_inplace(sg_src,
chacha20poly1305_enc_vectors[i].ilen,
chacha20poly1305_enc_vectors[i].assoc,
chacha20poly1305_enc_vectors[i].alen,
get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
chacha20poly1305_enc_vectors[i].key);
- if (memcmp(heap_src,
+ if (!ret || memcmp(computed_output,
chacha20poly1305_enc_vectors[i].output,
chacha20poly1305_enc_vectors[i].ilen +
POLY1305_DIGEST_SIZE)) {
@@ -7326,11 +8963,11 @@ bool __init chacha20poly1305_selftest(void)
}
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
- memcpy(heap_src, chacha20poly1305_dec_vectors[i].input,
+ memcpy(computed_output, chacha20poly1305_dec_vectors[i].input,
chacha20poly1305_dec_vectors[i].ilen);
- sg_init_one(&sg_src, heap_src,
+ sg_init_one(sg_src, computed_output,
chacha20poly1305_dec_vectors[i].ilen);
- ret = chacha20poly1305_decrypt_sg_inplace(&sg_src,
+ ret = chacha20poly1305_decrypt_sg_inplace(sg_src,
chacha20poly1305_dec_vectors[i].ilen,
chacha20poly1305_dec_vectors[i].assoc,
chacha20poly1305_dec_vectors[i].alen,
@@ -7338,7 +8975,7 @@ bool __init chacha20poly1305_selftest(void)
chacha20poly1305_dec_vectors[i].key);
if (!decryption_success(ret,
chacha20poly1305_dec_vectors[i].failure,
- memcmp(heap_src, chacha20poly1305_dec_vectors[i].output,
+ memcmp(computed_output, chacha20poly1305_dec_vectors[i].output,
chacha20poly1305_dec_vectors[i].ilen -
POLY1305_DIGEST_SIZE))) {
pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
@@ -7365,6 +9002,7 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
+
for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
ret = xchacha20poly1305_decrypt(computed_output,
@@ -7386,8 +9024,54 @@ bool __init chacha20poly1305_selftest(void)
}
}
+ for (total_len = POLY1305_DIGEST_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST)
+ && total_len <= 1 << 10; ++total_len) {
+ for (i = 0; i <= total_len; ++i) {
+ for (j = i; j <= total_len; ++j) {
+ sg_init_table(sg_src, 3);
+ sg_set_buf(&sg_src[0], input, i);
+ sg_set_buf(&sg_src[1], input + i, j - i);
+ sg_set_buf(&sg_src[2], input + j, total_len - j);
+ memset(computed_output, 0, total_len);
+ memset(input, 0, total_len);
+
+ if (!chacha20poly1305_encrypt_sg_inplace(sg_src,
+ total_len - POLY1305_DIGEST_SIZE, NULL, 0,
+ 0, enc_key001))
+ goto chunkfail;
+ chacha20poly1305_encrypt(computed_output,
+ computed_output,
+ total_len - POLY1305_DIGEST_SIZE, NULL, 0, 0,
+ enc_key001);
+ if (memcmp(computed_output, input, total_len))
+ goto chunkfail;
+ if (!chacha20poly1305_decrypt(computed_output,
+ input, total_len, NULL, 0, 0, enc_key001))
+ goto chunkfail;
+ for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) {
+ if (computed_output[k])
+ goto chunkfail;
+ }
+ if (!chacha20poly1305_decrypt_sg_inplace(sg_src,
+ total_len, NULL, 0, 0, enc_key001))
+ goto chunkfail;
+ for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) {
+ if (input[k])
+ goto chunkfail;
+ }
+ continue;
+
+ chunkfail:
+ pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n",
+ total_len, i, j);
+ success = false;
+ }
+
+ }
+ }
+
out:
- kfree(heap_src);
kfree(computed_output);
+ kfree(input);
return success;
}
--
2.18.2
From 1cb521f8391f1fa9eaaf55c56d9ecdc97030f5d9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 17 Jan 2020 11:42:22 +0100
Subject: [PATCH 048/100] crypto: x86/poly1305 - emit does base conversion
itself
commit f9e7fe32a792726186301423ff63a465d63386e1 upstream.
The emit code does optional base conversion itself in assembly, so we
don't need to do that here. Also, neither one of these functions uses
simd instructions, so checking for that doesn't make sense either.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/poly1305_glue.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 657363588e0c..79bb58737d52 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -123,13 +123,9 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
const u32 nonce[4])
{
- struct poly1305_arch_internal *state = ctx;
-
- if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
- !state->is_base2_26 || !crypto_simd_usable()) {
- convert_to_base2_64(ctx);
+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx))
poly1305_emit_x86_64(ctx, mac, nonce);
- } else
+ else
poly1305_emit_avx(ctx, mac, nonce);
}
--
2.18.2
From 286fdbb5f1330fe73266f14df8398eac1c6db2d3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 17 Jan 2020 17:43:18 +0100
Subject: [PATCH 049/100] crypto: arm/chacha - fix build failured when kernel
mode NEON is disabled
commit 0bc81767c5bd9d005fae1099fb39eb3688370cb1 upstream.
When the ARM accelerated ChaCha driver is built as part of a configuration
that has kernel mode NEON disabled, we expect the compiler to propagate
the build time constant expression IS_ENABLED(CONFIG_KERNEL_MODE_NEON) in
a way that eliminates all the cross-object references to the actual NEON
routines, which allows the chacha-neon-core.o object to be omitted from
the build entirely.
Unfortunately, this fails to work as expected in some cases, and we may
end up with a build error such as
chacha-glue.c:(.text+0xc0): undefined reference to `chacha_4block_xor_neon'
caused by the fact that chacha_doneon() has not been eliminated from the
object code, even though it will never be called in practice.
Let's fix this by adding some IS_ENABLED(CONFIG_KERNEL_MODE_NEON) tests
that are not strictly needed from a logical point of view, but should
help the compiler infer that the NEON code paths are unreachable in
those cases.
Fixes: b36d8c09e710c71f ("crypto: arm/chacha - remove dependency on generic ...")
Reported-by: Russell King <linux@armlinux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/chacha-glue.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index 7bdf8823066d..893692ed12b7 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
- if (!neon) {
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
nbytes, state, ctx->nrounds);
state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
@@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_request *req, bool neon)
chacha_init_generic(state, ctx->key, req->iv);
- if (!neon) {
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
hchacha_block_arm(state, subctx.key, ctx->nrounds);
} else {
kernel_neon_begin();
--
2.18.2
From 0754cc145e417709e9be36785aeac0c4c4d5d5c1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 17 Jan 2020 12:01:36 +0100
Subject: [PATCH 050/100] crypto: Kconfig - allow tests to be disabled when
manager is disabled
commit 2343d1529aff8b552589f622c23932035ed7a05d upstream.
The library code uses CRYPTO_MANAGER_DISABLE_TESTS to conditionalize its
tests, but the library code can also exist without CRYPTO_MANAGER. That
means on minimal configs, the test code winds up being built with no way
to disable it.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/Kconfig | 4 ----
1 file changed, 4 deletions(-)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index b8b738bcc312..8fcf630471dc 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -136,8 +136,6 @@ config CRYPTO_USER
Userspace configuration for cryptographic instantiations such as
cbc(aes).
-if CRYPTO_MANAGER2
-
config CRYPTO_MANAGER_DISABLE_TESTS
bool "Disable run-time self tests"
default y
@@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
This is intended for developer use only, as these tests take much
longer to run than the normal self tests.
-endif # if CRYPTO_MANAGER2
-
config CRYPTO_GF128MUL
tristate
--
2.18.2
From 6de0d949776ff25dda2069b744db5e011e33056c Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 6 Feb 2020 12:42:01 +0100
Subject: [PATCH 051/100] crypto: chacha20poly1305 - prevent integer overflow
on large input
commit c9cc0517bba9f0213f1e55172feceb99e5512daf upstream.
This code assigns src_len (size_t) to sl (int), which causes problems
when src_len is very large. Probably nobody in the kernel should be
passing this much data to chacha20poly1305 all in one go anyway, so I
don't think we need to change the algorithm or introduce larger types
or anything. But we should at least error out early in this case and
print a warning so that we get reports if this does happen and can look
into why anybody is possibly passing it that much data or if they're
accidently passing -1 or similar.
Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine")
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: stable@vger.kernel.org # 5.5+
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
lib/crypto/chacha20poly1305.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index 6d83cafebc69..ad0699ce702f 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
__le64 lens[2];
} b __aligned(16);
+ if (WARN_ON(src_len > INT_MAX))
+ return false;
+
chacha_load_key(b.k, key);
b.iv[0] = 0;
--
2.18.2
From 214f482df388cc9b22d443813acfb8eeebb9b7c6 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 1 Mar 2020 22:52:35 +0800
Subject: [PATCH 052/100] crypto: x86/curve25519 - support assemblers with no
adx support
commit 1579f1bc3b753d17a44de3457d5c6f4a5b14c752 upstream.
Some older version of GAS do not support the ADX instructions, similarly
to how they also don't support AVX and such. This commit adds the same
build-time detection mechanisms we use for AVX and others for ADX, and
then makes sure that the curve25519 library dispatcher calls the right
functions.
Reported-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/Makefile | 5 +++--
arch/x86/crypto/Makefile | 7 ++++++-
include/crypto/curve25519.h | 6 ++++--
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94df0868804b..513a55562d75 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -194,9 +194,10 @@ avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
+adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b69e00bf20b8..8c2e9eadee8a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
+adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no)
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+
+# These modules require the assembler to support ADX.
+ifeq ($(adx_supported),yes)
+ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+endif
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
index 4e6dc840b159..9ecb3c1f0f15 100644
--- a/include/crypto/curve25519.h
+++ b/include/crypto/curve25519.h
@@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
curve25519_arch(mypublic, secret, basepoint);
else
curve25519_generic(mypublic, secret, basepoint);
@@ -49,7 +50,8 @@ __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
CURVE25519_KEY_SIZE)))
return false;
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
curve25519_base_arch(pub, secret);
else
curve25519_generic(pub, secret, curve25519_base_point);
--
2.18.2
From 232561dc8a8eb5139d8124cf5011e6db8fbb9e3f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 18 Mar 2020 20:27:32 -0600
Subject: [PATCH 053/100] crypto: arm64/chacha - correctly walk through blocks
commit c8cfcb78c65877313cda7bcbace624d3dbd1f3b3 upstream.
Prior, passing in chunks of 2, 3, or 4, followed by any additional
chunks would result in the chacha state counter getting out of sync,
resulting in incorrect encryption/decryption, which is a pretty nasty
crypto vuln: "why do images look weird on webpages?" WireGuard users
never experienced this prior, because we have always, out of tree, used
a different crypto library, until the recent Frankenzinc addition. This
commit fixes the issue by advancing the pointers and state counter by
the actual size processed. It also fixes up a bug in the (optional,
costly) stride test that prevented it from running on arm64.
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Reported-and-tested-by: Emil Renner Berthing <kernel@esmil.dk>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm64/crypto/chacha-neon-glue.c | 8 ++++----
lib/crypto/chacha20poly1305-selftest.c | 11 ++++++++---
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 71c11d2e9fcd..218943612261 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
break;
}
chacha_4block_xor_neon(state, dst, src, nrounds, l);
- bytes -= CHACHA_BLOCK_SIZE * 5;
- src += CHACHA_BLOCK_SIZE * 5;
- dst += CHACHA_BLOCK_SIZE * 5;
- state[12] += 5;
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
}
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index c391a91364e9..fa43deda2660 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(void)
&& total_len <= 1 << 10; ++total_len) {
for (i = 0; i <= total_len; ++i) {
for (j = i; j <= total_len; ++j) {
+ k = 0;
sg_init_table(sg_src, 3);
- sg_set_buf(&sg_src[0], input, i);
- sg_set_buf(&sg_src[1], input + i, j - i);
- sg_set_buf(&sg_src[2], input + j, total_len - j);
+ if (i)
+ sg_set_buf(&sg_src[k++], input, i);
+ if (j - i)
+ sg_set_buf(&sg_src[k++], input + i, j - i);
+ if (total_len - j)
+ sg_set_buf(&sg_src[k++], input + j, total_len - j);
+ sg_init_marker(sg_src, k);
memset(computed_output, 0, total_len);
memset(input, 0, total_len);
--
2.18.2
From e0f9507b756594951d5d4ea1591387aa62d4caa0 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 20 Jan 2020 18:18:15 +0100
Subject: [PATCH 054/100] crypto: x86/curve25519 - replace with formally
verified implementation
commit 07b586fe06625b0b610dc3d3a969c51913d143d4 upstream.
This comes from INRIA's HACL*/Vale. It implements the same algorithm and
implementation strategy as the code it replaces, only this code has been
formally verified, sans the base point multiplication, which uses code
similar to prior, only it uses the formally verified field arithmetic
alongside reproducable ladder generation steps. This doesn't have a
pure-bmi2 version, which means haswell no longer benefits, but the
increased (doubled) code complexity is not worth it for a single
generation of chips that's already old.
Performance-wise, this is around 1% slower on older microarchitectures,
and slightly faster on newer microarchitectures, mainly 10nm ones or
backports of 10nm to 14nm. This implementation is "everest" below:
Xeon E5-2680 v4 (Broadwell)
armfazh: 133340 cycles per call
everest: 133436 cycles per call
Xeon Gold 5120 (Sky Lake Server)
armfazh: 112636 cycles per call
everest: 113906 cycles per call
Core i5-6300U (Sky Lake Client)
armfazh: 116810 cycles per call
everest: 117916 cycles per call
Core i7-7600U (Kaby Lake)
armfazh: 119523 cycles per call
everest: 119040 cycles per call
Core i7-8750H (Coffee Lake)
armfazh: 113914 cycles per call
everest: 113650 cycles per call
Core i9-9880H (Coffee Lake Refresh)
armfazh: 112616 cycles per call
everest: 114082 cycles per call
Core i3-8121U (Cannon Lake)
armfazh: 113202 cycles per call
everest: 111382 cycles per call
Core i7-8265U (Whiskey Lake)
armfazh: 127307 cycles per call
everest: 127697 cycles per call
Core i7-8550U (Kaby Lake Refresh)
armfazh: 127522 cycles per call
everest: 127083 cycles per call
Xeon Platinum 8275CL (Cascade Lake)
armfazh: 114380 cycles per call
everest: 114656 cycles per call
Achieving these kind of results with formally verified code is quite
remarkable, especialy considering that performance is favorable for
newer chips.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/curve25519-x86_64.c | 3546 ++++++++++-----------------
1 file changed, 1292 insertions(+), 2254 deletions(-)
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index eec7d2d24239..e4e58b8e9afe 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -1,8 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
- * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
*/
#include <crypto/curve25519.h>
@@ -16,2337 +15,1378 @@
#include <asm/cpufeature.h>
#include <asm/processor.h>
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
-
-enum { NUM_WORDS_ELTFP25519 = 4 };
-typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
-typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
-
-#define mul_eltfp25519_1w_adx(c, a, b) do { \
- mul_256x256_integer_adx(m.buffer, a, b); \
- red_eltfp25519_1w_adx(c, m.buffer); \
-} while (0)
-
-#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
- mul_256x256_integer_bmi2(m.buffer, a, b); \
- red_eltfp25519_1w_bmi2(c, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_1w_adx(a) do { \
- sqr_256x256_integer_adx(m.buffer, a); \
- red_eltfp25519_1w_adx(a, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_1w_bmi2(a) do { \
- sqr_256x256_integer_bmi2(m.buffer, a); \
- red_eltfp25519_1w_bmi2(a, m.buffer); \
-} while (0)
-
-#define mul_eltfp25519_2w_adx(c, a, b) do { \
- mul2_256x256_integer_adx(m.buffer, a, b); \
- red_eltfp25519_2w_adx(c, m.buffer); \
-} while (0)
-
-#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
- mul2_256x256_integer_bmi2(m.buffer, a, b); \
- red_eltfp25519_2w_bmi2(c, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_2w_adx(a) do { \
- sqr2_256x256_integer_adx(m.buffer, a); \
- red_eltfp25519_2w_adx(a, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_2w_bmi2(a) do { \
- sqr2_256x256_integer_bmi2(m.buffer, a); \
- red_eltfp25519_2w_bmi2(a, m.buffer); \
-} while (0)
-
-#define sqrn_eltfp25519_1w_adx(a, times) do { \
- int ____counter = (times); \
- while (____counter-- > 0) \
- sqr_eltfp25519_1w_adx(a); \
-} while (0)
-
-#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
- int ____counter = (times); \
- while (____counter-- > 0) \
- sqr_eltfp25519_1w_bmi2(a); \
-} while (0)
-
-#define copy_eltfp25519_1w(C, A) do { \
- (C)[0] = (A)[0]; \
- (C)[1] = (A)[1]; \
- (C)[2] = (A)[2]; \
- (C)[3] = (A)[3]; \
-} while (0)
-
-#define setzero_eltfp25519_1w(C) do { \
- (C)[0] = 0; \
- (C)[1] = 0; \
- (C)[2] = 0; \
- (C)[3] = 0; \
-} while (0)
-
-__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
- /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
- 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
- /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
- 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
- /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
- 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
- /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
- 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
- /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
- 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
- /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
- 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
- /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
- 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
- /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
- 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
- /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
- 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
- /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
- 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
- /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
- 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
- /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
- 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
- /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
- 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
- /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
- 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
- /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
- 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
- /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
- 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
- /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
- 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
- /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
- 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
- /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
- 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
- /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
- 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
- /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
- 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
- /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
- 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
- /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
- 0x23758739f630a257UL, 0x295a407a01a78580UL,
- /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
- 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
- /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
- 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
- /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
- 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
- /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
- 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
- /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
- 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
- /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
- 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
- /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
- 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
- /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
- 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
- /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
- 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
- /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
- 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
- /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
- 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
- /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
- 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
- /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
- 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
- /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
- 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
- /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
- 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
- /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
- 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
- /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
- 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
- /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
- 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
- /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
- 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
- /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
- 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
- /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
- 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
- /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
- 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
- /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
- 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
- /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
- 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
- /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
- 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
- /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
- 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
- /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
- 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
- /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
- 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
- /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
- 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
- /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
- 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
- /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
- 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
- /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
- 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
- /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
- 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
- /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
- 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
- /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
- 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
- /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
- 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
- /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
- 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
- /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
- 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
- /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
- 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
- /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
- 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
- /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
- 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
- /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
- 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
- /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
- 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
- /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
- 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
- /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
- 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
- /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
- 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
- /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
- 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
- /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
- 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
- /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
- 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
- /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
- 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
- /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
- 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
- /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
- 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
- /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
- 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
- /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
- 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
- /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
- 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
- /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
- 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
- /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
- 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
- /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
- 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
- /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
- 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
- /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
- 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
- /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
- 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
- /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
- 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
- /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
- 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
- /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
- 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
- /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
- 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
- /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
- 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
- /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
- 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
- /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
- 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
- /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
- 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
- /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
- 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
- /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
- 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
- /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
- 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
- /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
- 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
- /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
- 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
- /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
- 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
- /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
- 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
- /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
- 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
- /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
- 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
- /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
- 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
- /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
- 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
- /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
- 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
- /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
- 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
- /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
- 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
- /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
- 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
- /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
- 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
- /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
- 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
- /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
- 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
- /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
- 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
- /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
- 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
- /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
- 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
- /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
- 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
- /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
- 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
- /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
- 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
- /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
- 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
- /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
- 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
- /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
- 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
- /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
- 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
- /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
- 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
- /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
- 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
- /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
- 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
- /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
- 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
- /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
- 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
- /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
- 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
- /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
- 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
- /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
- 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
- /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
- 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
- /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
- 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
- /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
- 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
- /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
- 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
- /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
- 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
- /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
- 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
- /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
- 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
- /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
- 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
- /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
- 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
- /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
- 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
- /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
- 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
- /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
- 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
- /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
- 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
- /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
- 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
- /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
- 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
- /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
- 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
- /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
- 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
- /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
- 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
- /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
- 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
- /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
- 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
- /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
- 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
- /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
- 0x52d17436309d4253UL, 0x356f97e13efae576UL,
- /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
- 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
- /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
- 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
- /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
- 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
- /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
- 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
- /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
- 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
- /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
- 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
- /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
- 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
- /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
- 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
- /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
- 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
- /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
- 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
- /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
- 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
- /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
- 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
- /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
- 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
- /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
- 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
- /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
- 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
- /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
- 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
- /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
- 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
- /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
- 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
- /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
- 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
- /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
- 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
- /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
- 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
- /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
- 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
- /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
- 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
- /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
- 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
- /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
- 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
- /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
- 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
- /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
- 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
- /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
- 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
- /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
- 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
- /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
- 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
- /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
- 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
- /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
- 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
- /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
- 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
- /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
- 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
- /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
- 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
- /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
- 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
- /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
- 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
- /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
- 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
- /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
- 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
- /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
- 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
- /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
- 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
- /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
- 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
- /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
- 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
- /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
- 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
- /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
- 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
- /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
- 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
- /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
- 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
- /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
- 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
- /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
- 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
- /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
- 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
- /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
- 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
- /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
- 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
- /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
- 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
- /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
- 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
- /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
- 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
- /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
- 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
- /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
- 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
- /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
- 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
- /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
- 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
- /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
- 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
- /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
- 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
- /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
- 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
- /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
- 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
- /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
- 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
- /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
- 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
- /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
- 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
- /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
- 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
- /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
- 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
- /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
- 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
- /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
- 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
- /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
- 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
- /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
- 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
- /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
- 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
- /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
- 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
- /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
- 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
- /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
- 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
- /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
- 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
- /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
- 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
- /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
- 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
- /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
- 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
- /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
- 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
- /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
- 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
- /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
- 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
- /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
- 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
- /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
- 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
- /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
- 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
- /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
- 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
- /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
- 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
- /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
- 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
- /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
- 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
- /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
- 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
- /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
- 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
- /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
- 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
- /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
- 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
- /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
- 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
- /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
- 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
- /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
- 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
- /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
- 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
- /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
- 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
- /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
- 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
- /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
- 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
- /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
- 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
-};
-
-/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
- * a is two 256-bit integers: a0[0:3] and a1[4:7]
- * b is two 256-bit integers: b0[0:3] and b1[4:7]
- */
-static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
- const u64 *const b)
-{
- asm volatile(
- "xorl %%r14d, %%r14d ;"
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
- "adox %%r10, %%r15 ;"
- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
- "adox %%r8, %%rax ;"
- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
- "adox %%r10, %%rbx ;"
- /******************************************/
- "adox %%r14, %%rcx ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "adox %%r15, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rax ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rbx ;"
- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rcx ;"
- /******************************************/
- "adox %%r14, %%r15 ;"
- "adcx %%r14, %%r15 ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "xorl %%r10d, %%r10d ;"
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "adox %%rax, %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rbx ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rcx ;"
- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%r15 ;"
- /******************************************/
- "adox %%r14, %%rax ;"
- "adcx %%r14, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "xorl %%r10d, %%r10d ;"
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "adox %%rbx, %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rcx ;"
- "movq %%rcx, 32(%0) ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rax ;"
- "movq %%rax, 48(%0) ;"
- /******************************************/
- "adox %%r14, %%rbx ;"
- "adcx %%r14, %%rbx ;"
- "movq %%rbx, 56(%0) ;"
-
- "movq 32(%1), %%rdx; " /* C[0] */
- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
- "xorl %%r10d, %%r10d ;"
- "movq %%r8, 64(%0);"
- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
- "adox %%r10, %%r15 ;"
- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
- "adox %%r8, %%rax ;"
- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
- "adox %%r10, %%rbx ;"
- /******************************************/
- "adox %%r14, %%rcx ;"
-
- "movq 40(%1), %%rdx; " /* C[1] */
- "xorl %%r10d, %%r10d ;"
- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
- "adox %%r15, %%r8 ;"
- "movq %%r8, 72(%0);"
- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rax ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rbx ;"
- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rcx ;"
- /******************************************/
- "adox %%r14, %%r15 ;"
- "adcx %%r14, %%r15 ;"
-
- "movq 48(%1), %%rdx; " /* C[2] */
- "xorl %%r10d, %%r10d ;"
- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
- "adox %%rax, %%r8 ;"
- "movq %%r8, 80(%0);"
- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rbx ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rcx ;"
- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%r15 ;"
- /******************************************/
- "adox %%r14, %%rax ;"
- "adcx %%r14, %%rax ;"
-
- "movq 56(%1), %%rdx; " /* C[3] */
- "xorl %%r10d, %%r10d ;"
- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
- "adox %%rbx, %%r8 ;"
- "movq %%r8, 88(%0);"
- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rcx ;"
- "movq %%rcx, 96(%0) ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%r15 ;"
- "movq %%r15, 104(%0) ;"
- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rax ;"
- "movq %%rax, 112(%0) ;"
- /******************************************/
- "adox %%r14, %%rbx ;"
- "adcx %%r14, %%rbx ;"
- "movq %%rbx, 120(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r14", "%r15");
-}
-
-static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
- const u64 *const b)
+static __always_inline u64 eq_mask(u64 a, u64 b)
{
- asm volatile(
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
- "addq %%r10, %%r15 ;"
- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
- "adcq %%r8, %%rax ;"
- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
- "adcq %%r10, %%rbx ;"
- /******************************************/
- "adcq $0, %%rcx ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "addq %%r15, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%r15 ;"
-
- "addq %%r9, %%rax ;"
- "adcq %%r11, %%rbx ;"
- "adcq %%r13, %%rcx ;"
- "adcq $0, %%r15 ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "addq %%rax, %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rax ;"
-
- "addq %%r9, %%rbx ;"
- "adcq %%r11, %%rcx ;"
- "adcq %%r13, %%r15 ;"
- "adcq $0, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "addq %%rbx, %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rbx ;"
-
- "addq %%r9, %%rcx ;"
- "movq %%rcx, 32(%0) ;"
- "adcq %%r11, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "adcq %%r13, %%rax ;"
- "movq %%rax, 48(%0) ;"
- "adcq $0, %%rbx ;"
- "movq %%rbx, 56(%0) ;"
-
- "movq 32(%1), %%rdx; " /* C[0] */
- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
- "movq %%r8, 64(%0) ;"
- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
- "addq %%r10, %%r15 ;"
- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
- "adcq %%r8, %%rax ;"
- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
- "adcq %%r10, %%rbx ;"
- /******************************************/
- "adcq $0, %%rcx ;"
-
- "movq 40(%1), %%rdx; " /* C[1] */
- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
- "addq %%r15, %%r8 ;"
- "movq %%r8, 72(%0) ;"
- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%r15 ;"
-
- "addq %%r9, %%rax ;"
- "adcq %%r11, %%rbx ;"
- "adcq %%r13, %%rcx ;"
- "adcq $0, %%r15 ;"
-
- "movq 48(%1), %%rdx; " /* C[2] */
- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
- "addq %%rax, %%r8 ;"
- "movq %%r8, 80(%0) ;"
- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rax ;"
-
- "addq %%r9, %%rbx ;"
- "adcq %%r11, %%rcx ;"
- "adcq %%r13, %%r15 ;"
- "adcq $0, %%rax ;"
-
- "movq 56(%1), %%rdx; " /* C[3] */
- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
- "addq %%rbx, %%r8 ;"
- "movq %%r8, 88(%0) ;"
- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rbx ;"
-
- "addq %%r9, %%rcx ;"
- "movq %%rcx, 96(%0) ;"
- "adcq %%r11, %%r15 ;"
- "movq %%r15, 104(%0) ;"
- "adcq %%r13, %%rax ;"
- "movq %%rax, 112(%0) ;"
- "adcq $0, %%rbx ;"
- "movq %%rbx, 120(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r15");
+ u64 x = a ^ b;
+ u64 minus_x = ~x + (u64)1U;
+ u64 x_or_minus_x = x | minus_x;
+ u64 xnx = x_or_minus_x >> (u32)63U;
+ return xnx - (u64)1U;
}
-static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+static __always_inline u64 gte_mask(u64 a, u64 b)
{
- asm volatile(
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
- "xorl %%r15d, %%r15d;"
- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
- "adcx %%r14, %%r9 ;"
- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
- "adcx %%rax, %%r10 ;"
- "movq 24(%1), %%rdx ;" /* A[3] */
- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
- "adcx %%rcx, %%r11 ;"
- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
- "adcx %%rax, %%rbx ;"
- "movq 8(%1), %%rdx ;" /* A[1] */
- "adcx %%r15, %%r13 ;"
- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
- "movq $0, %%r14 ;"
- /******************************************/
- "adcx %%r15, %%r14 ;"
-
- "xorl %%r15d, %%r15d;"
- "adox %%rax, %%r10 ;"
- "adcx %%r8, %%r8 ;"
- "adox %%rcx, %%r11 ;"
- "adcx %%r9, %%r9 ;"
- "adox %%r15, %%rbx ;"
- "adcx %%r10, %%r10 ;"
- "adox %%r15, %%r13 ;"
- "adcx %%r11, %%r11 ;"
- "adox %%r15, %%r14 ;"
- "adcx %%rbx, %%rbx ;"
- "adcx %%r13, %%r13 ;"
- "adcx %%r14, %%r14 ;"
-
- "movq (%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%rbx ;"
- "movq %%rbx, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
-
-
- "movq 32(%1), %%rdx ;" /* B[0] */
- "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
- "xorl %%r15d, %%r15d;"
- "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
- "adcx %%r14, %%r9 ;"
- "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
- "adcx %%rax, %%r10 ;"
- "movq 56(%1), %%rdx ;" /* B[3] */
- "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
- "adcx %%rcx, %%r11 ;"
- "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
- "adcx %%rax, %%rbx ;"
- "movq 40(%1), %%rdx ;" /* B[1] */
- "adcx %%r15, %%r13 ;"
- "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
- "movq $0, %%r14 ;"
- /******************************************/
- "adcx %%r15, %%r14 ;"
-
- "xorl %%r15d, %%r15d;"
- "adox %%rax, %%r10 ;"
- "adcx %%r8, %%r8 ;"
- "adox %%rcx, %%r11 ;"
- "adcx %%r9, %%r9 ;"
- "adox %%r15, %%rbx ;"
- "adcx %%r10, %%r10 ;"
- "adox %%r15, %%r13 ;"
- "adcx %%r11, %%r11 ;"
- "adox %%r15, %%r14 ;"
- "adcx %%rbx, %%rbx ;"
- "adcx %%r13, %%r13 ;"
- "adcx %%r14, %%r14 ;"
-
- "movq 32(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
- /*******************/
- "movq %%rax, 64(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 72(%0) ;"
- "movq 40(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 80(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 88(%0) ;"
- "movq 48(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 96(%0) ;"
- "adcq %%rcx, %%rbx ;"
- "movq %%rbx, 104(%0) ;"
- "movq 56(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 112(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 120(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r14", "%r15");
+ u64 x = a;
+ u64 y = b;
+ u64 x_xor_y = x ^ y;
+ u64 x_sub_y = x - y;
+ u64 x_sub_y_xor_y = x_sub_y ^ y;
+ u64 q = x_xor_y | x_sub_y_xor_y;
+ u64 x_xor_q = x ^ q;
+ u64 x_xor_q_ = x_xor_q >> (u32)63U;
+ return x_xor_q_ - (u64)1U;
}
-static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+/* Computes the addition of four-element f1 with value in f2
+ * and returns the carry (if any) */
+static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
{
- asm volatile(
- "movq 8(%1), %%rdx ;" /* A[1] */
- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
-
- "movq 16(%1), %%rdx ;" /* A[2] */
- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
-
- "addq %%rax, %%r9 ;"
- "adcq %%rdx, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq %%r14, %%r15 ;"
- "adcq $0, %%r13 ;"
- "movq $0, %%r14 ;"
- "adcq $0, %%r14 ;"
-
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
-
- "addq %%rax, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq $0, %%r15 ;"
- "adcq $0, %%r13 ;"
- "adcq $0, %%r14 ;"
-
- "shldq $1, %%r13, %%r14 ;"
- "shldq $1, %%r15, %%r13 ;"
- "shldq $1, %%r11, %%r15 ;"
- "shldq $1, %%r10, %%r11 ;"
- "shldq $1, %%r9, %%r10 ;"
- "shldq $1, %%r8, %%r9 ;"
- "shlq $1, %%r8 ;"
-
- /*******************/
- "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
-
- "movq 40(%1), %%rdx ;" /* B[1] */
- "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
- "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
- "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
-
- "movq 48(%1), %%rdx ;" /* B[2] */
- "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
- "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
-
- "addq %%rax, %%r9 ;"
- "adcq %%rdx, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq %%r14, %%r15 ;"
- "adcq $0, %%r13 ;"
- "movq $0, %%r14 ;"
- "adcq $0, %%r14 ;"
-
- "movq 32(%1), %%rdx ;" /* B[0] */
- "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
-
- "addq %%rax, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq $0, %%r15 ;"
- "adcq $0, %%r13 ;"
- "adcq $0, %%r14 ;"
-
- "shldq $1, %%r13, %%r14 ;"
- "shldq $1, %%r15, %%r13 ;"
- "shldq $1, %%r11, %%r15 ;"
- "shldq $1, %%r10, %%r11 ;"
- "shldq $1, %%r9, %%r10 ;"
- "shldq $1, %%r8, %%r9 ;"
- "shlq $1, %%r8 ;"
-
- /*******************/
- "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
- /*******************/
- "movq %%rax, 64(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 72(%0) ;"
- "movq 40(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 80(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 88(%0) ;"
- "movq 48(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 96(%0) ;"
- "adcq %%rcx, %%r15 ;"
- "movq %%r15, 104(%0) ;"
- "movq 56(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 112(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 120(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11", "%r13", "%r14", "%r15");
-}
+ u64 carry_r;
-static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
-{
asm volatile(
- "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
- "xorl %%ebx, %%ebx ;"
- "adox (%1), %%r8 ;"
- "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
- "adcx %%r10, %%r9 ;"
- "adox 8(%1), %%r9 ;"
- "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
- "adcx %%r11, %%r10 ;"
- "adox 16(%1), %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
- "adcx %%rax, %%r11 ;"
- "adox 24(%1), %%r11 ;"
- /***************************************/
- "adcx %%rbx, %%rcx ;"
- "adox %%rbx, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
- "adcx %%rcx, %%r8 ;"
- "adcx %%rbx, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcx %%rbx, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcx %%rbx, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
-
- "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
- "xorl %%ebx, %%ebx ;"
- "adox 64(%1), %%r8 ;"
- "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
- "adcx %%r10, %%r9 ;"
- "adox 72(%1), %%r9 ;"
- "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
- "adcx %%r11, %%r10 ;"
- "adox 80(%1), %%r10 ;"
- "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
- "adcx %%rax, %%r11 ;"
- "adox 88(%1), %%r11 ;"
- /****************************************/
- "adcx %%rbx, %%rcx ;"
- "adox %%rbx, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
- "adcx %%rcx, %%r8 ;"
- "adcx %%rbx, %%r9 ;"
- "movq %%r9, 40(%0) ;"
- "adcx %%rbx, %%r10 ;"
- "movq %%r10, 48(%0) ;"
- "adcx %%rbx, %%r11 ;"
- "movq %%r11, 56(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 32(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11");
-}
+ /* Clear registers to propagate the carry bit */
+ " xor %%r8, %%r8;"
+ " xor %%r9, %%r9;"
+ " xor %%r10, %%r10;"
+ " xor %%r11, %%r11;"
+ " xor %1, %1;"
+
+ /* Begin addition chain */
+ " addq 0(%3), %0;"
+ " movq %0, 0(%2);"
+ " adcxq 8(%3), %%r8;"
+ " movq %%r8, 8(%2);"
+ " adcxq 16(%3), %%r9;"
+ " movq %%r9, 16(%2);"
+ " adcxq 24(%3), %%r10;"
+ " movq %%r10, 24(%2);"
+
+ /* Return the carry bit in a register */
+ " adcx %%r11, %1;"
+ : "+&r" (f2), "=&r" (carry_r)
+ : "r" (out), "r" (f1)
+ : "%r8", "%r9", "%r10", "%r11", "memory", "cc"
+ );
-static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
-{
- asm volatile(
- "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
- "addq %%r10, %%r9 ;"
- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcq %%r11, %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcq %%rax, %%r11 ;"
- /***************************************/
- "adcq $0, %%rcx ;"
- "addq (%1), %%r8 ;"
- "adcq 8(%1), %%r9 ;"
- "adcq 16(%1), %%r10 ;"
- "adcq 24(%1), %%r11 ;"
- "adcq $0, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
-
- "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
- "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
- "addq %%r10, %%r9 ;"
- "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcq %%r11, %%r10 ;"
- "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcq %%rax, %%r11 ;"
- /****************************************/
- "adcq $0, %%rcx ;"
- "addq 64(%1), %%r8 ;"
- "adcq 72(%1), %%r9 ;"
- "adcq 80(%1), %%r10 ;"
- "adcq 88(%1), %%r11 ;"
- "adcq $0, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 40(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 48(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 56(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 32(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11");
+ return carry_r;
}
-static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
- const u64 *const b)
+/* Computes the field addition of two field elements */
+static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
{
asm volatile(
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
- "adox %%r9, %%r10 ;"
- "movq %%r10, 8(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
- "adox %%r11, %%r15 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
- "adox %%r13, %%r14 ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "adcx 8(%0), %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adox %%r9, %%r10 ;"
- "adcx %%r15, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
- "adox %%r11, %%r15 ;"
- "adcx %%r14, %%r15 ;"
- "movq $0, %%r8 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
- "adox %%r13, %%r14 ;"
- "adcx %%rax, %%r14 ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
- "adcx %%r8, %%rax ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "adcx 16(%0), %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adox %%r9, %%r10 ;"
- "adcx %%r15, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
- "adox %%r11, %%r15 ;"
- "adcx %%r14, %%r15 ;"
- "movq $0, %%r8 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
- "adox %%r13, %%r14 ;"
- "adcx %%rax, %%r14 ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
- "adcx %%r8, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "adcx 24(%0), %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adox %%r9, %%r10 ;"
- "adcx %%r15, %%r10 ;"
- "movq %%r10, 32(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
- "adox %%r11, %%r15 ;"
- "adcx %%r14, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "movq $0, %%r8 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
- "adox %%r13, %%r14 ;"
- "adcx %%rax, %%r14 ;"
- "movq %%r14, 48(%0) ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
- "adcx %%r8, %%rax ;"
- "movq %%rax, 56(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
- "%r13", "%r14", "%r15");
+ /* Compute the raw addition of f1 + f2 */
+ " movq 0(%0), %%r8;"
+ " addq 0(%2), %%r8;"
+ " movq 8(%0), %%r9;"
+ " adcxq 8(%2), %%r9;"
+ " movq 16(%0), %%r10;"
+ " adcxq 16(%2), %%r10;"
+ " movq 24(%0), %%r11;"
+ " adcxq 24(%2), %%r11;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute carry*38 */
+ " mov $0, %%rax;"
+ " mov $38, %0;"
+ " cmovc %0, %%rax;"
+
+ /* Step 2: Add carry*38 to the original sum */
+ " xor %%rcx, %%rcx;"
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %0, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r" (f2)
+ : "r" (out), "r" (f1)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
+ );
}
-static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
- const u64 *const b)
+/* Computes the field substraction of two field elements */
+static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
{
asm volatile(
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
- "addq %%r10, %%r15 ;"
- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
- "adcq %%r8, %%rax ;"
- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
- "adcq %%r10, %%rbx ;"
- /******************************************/
- "adcq $0, %%rcx ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "addq %%r15, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%r15 ;"
-
- "addq %%r9, %%rax ;"
- "adcq %%r11, %%rbx ;"
- "adcq %%r13, %%rcx ;"
- "adcq $0, %%r15 ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "addq %%rax, %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rax ;"
-
- "addq %%r9, %%rbx ;"
- "adcq %%r11, %%rcx ;"
- "adcq %%r13, %%r15 ;"
- "adcq $0, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "addq %%rbx, %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rbx ;"
-
- "addq %%r9, %%rcx ;"
- "movq %%rcx, 32(%0) ;"
- "adcq %%r11, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "adcq %%r13, %%rax ;"
- "movq %%rax, 48(%0) ;"
- "adcq $0, %%rbx ;"
- "movq %%rbx, 56(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r15");
+ /* Compute the raw substraction of f1-f2 */
+ " movq 0(%1), %%r8;"
+ " subq 0(%2), %%r8;"
+ " movq 8(%1), %%r9;"
+ " sbbq 8(%2), %%r9;"
+ " movq 16(%1), %%r10;"
+ " sbbq 16(%2), %%r10;"
+ " movq 24(%1), %%r11;"
+ " sbbq 24(%2), %%r11;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute carry*38 */
+ " mov $0, %%rax;"
+ " mov $38, %%rcx;"
+ " cmovc %%rcx, %%rax;"
+
+ /* Step 2: Substract carry*38 from the original difference */
+ " sub %%rax, %%r8;"
+ " sbb $0, %%r9;"
+ " sbb $0, %%r10;"
+ " sbb $0, %%r11;"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rcx, %%rax;"
+ " sub %%rax, %%r8;"
+
+ /* Store the result */
+ " movq %%r8, 0(%0);"
+ " movq %%r9, 8(%0);"
+ " movq %%r10, 16(%0);"
+ " movq %%r11, 24(%0);"
+ :
+ : "r" (out), "r" (f1), "r" (f2)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
+ );
}
-static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+/* Computes a field multiplication: out <- f1 * f2
+ * Uses the 8-element buffer tmp for intermediate results */
+static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
{
asm volatile(
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
- "xorl %%r15d, %%r15d;"
- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
- "adcx %%r14, %%r9 ;"
- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
- "adcx %%rax, %%r10 ;"
- "movq 24(%1), %%rdx ;" /* A[3] */
- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
- "adcx %%rcx, %%r11 ;"
- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
- "adcx %%rax, %%rbx ;"
- "movq 8(%1), %%rdx ;" /* A[1] */
- "adcx %%r15, %%r13 ;"
- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
- "movq $0, %%r14 ;"
- /******************************************/
- "adcx %%r15, %%r14 ;"
-
- "xorl %%r15d, %%r15d;"
- "adox %%rax, %%r10 ;"
- "adcx %%r8, %%r8 ;"
- "adox %%rcx, %%r11 ;"
- "adcx %%r9, %%r9 ;"
- "adox %%r15, %%rbx ;"
- "adcx %%r10, %%r10 ;"
- "adox %%r15, %%r13 ;"
- "adcx %%r11, %%r11 ;"
- "adox %%r15, %%r14 ;"
- "adcx %%rbx, %%rbx ;"
- "adcx %%r13, %%r13 ;"
- "adcx %%r14, %%r14 ;"
-
- "movq (%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%rbx ;"
- "movq %%rbx, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r14", "%r15");
+ /* Compute the raw multiplication: tmp <- src1 * src2 */
+
+ /* Compute src1[0] * src2 */
+ " movq 0(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ /* Compute src1[1] * src2 */
+ " movq 8(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[2] * src2 */
+ " movq 16(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[3] * src2 */
+ " movq 24(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %3, %3;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%r12;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%r12, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %3, %%rax;"
+ " adox %3, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %3, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %3, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %3, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
+ :
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
+ );
}
-static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+/* Computes two field multiplications:
+ * out[0] <- f1[0] * f2[0]
+ * out[1] <- f1[1] * f2[1]
+ * Uses the 16-element buffer tmp for intermediate results. */
+static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
{
asm volatile(
- "movq 8(%1), %%rdx ;" /* A[1] */
- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
-
- "movq 16(%1), %%rdx ;" /* A[2] */
- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
-
- "addq %%rax, %%r9 ;"
- "adcq %%rdx, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq %%r14, %%r15 ;"
- "adcq $0, %%r13 ;"
- "movq $0, %%r14 ;"
- "adcq $0, %%r14 ;"
-
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
-
- "addq %%rax, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq $0, %%r15 ;"
- "adcq $0, %%r13 ;"
- "adcq $0, %%r14 ;"
-
- "shldq $1, %%r13, %%r14 ;"
- "shldq $1, %%r15, %%r13 ;"
- "shldq $1, %%r11, %%r15 ;"
- "shldq $1, %%r10, %%r11 ;"
- "shldq $1, %%r9, %%r10 ;"
- "shldq $1, %%r8, %%r9 ;"
- "shlq $1, %%r8 ;"
-
- /*******************/
- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11", "%r13", "%r14", "%r15");
+ /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
+
+ /* Compute src1[0] * src2 */
+ " movq 0(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ /* Compute src1[1] * src2 */
+ " movq 8(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[2] * src2 */
+ " movq 16(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[3] * src2 */
+ " movq 24(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
+
+ /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
+
+ /* Compute src1[0] * src2 */
+ " movq 32(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ /* Compute src1[1] * src2 */
+ " movq 40(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);"
+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[2] * src2 */
+ " movq 48(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);"
+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[3] * src2 */
+ " movq 56(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);"
+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Wrap the results back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %3, %3;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%r12;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%r12, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %3, %%rax;"
+ " adox %3, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %3, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %3, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %3, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 96(%1), %%r8, %%r13;"
+ " xor %3, %3;"
+ " adoxq 64(%1), %%r8;"
+ " mulxq 104(%1), %%r9, %%r12;"
+ " adcx %%r13, %%r9;"
+ " adoxq 72(%1), %%r9;"
+ " mulxq 112(%1), %%r10, %%r13;"
+ " adcx %%r12, %%r10;"
+ " adoxq 80(%1), %%r10;"
+ " mulxq 120(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 88(%1), %%r11;"
+ " adcx %3, %%rax;"
+ " adox %3, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %3, %%r9;"
+ " movq %%r9, 40(%0);"
+ " adcx %3, %%r10;"
+ " movq %%r10, 48(%0);"
+ " adcx %3, %%r11;"
+ " movq %%r11, 56(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 32(%0);"
+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
+ :
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
+ );
}
-static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+/* Computes the field multiplication of four-element f1 with value in f2 */
+static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
{
- asm volatile(
- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
- "xorl %%ebx, %%ebx ;"
- "adox (%1), %%r8 ;"
- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
- "adcx %%r10, %%r9 ;"
- "adox 8(%1), %%r9 ;"
- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcx %%r11, %%r10 ;"
- "adox 16(%1), %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcx %%rax, %%r11 ;"
- "adox 24(%1), %%r11 ;"
- /***************************************/
- "adcx %%rbx, %%rcx ;"
- "adox %%rbx, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
- "adcx %%rcx, %%r8 ;"
- "adcx %%rbx, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcx %%rbx, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcx %%rbx, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11");
-}
+ register u64 f2_r asm("rdx") = f2;
-static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
-{
asm volatile(
- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
- "addq %%r10, %%r9 ;"
- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcq %%r11, %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcq %%rax, %%r11 ;"
- /***************************************/
- "adcq $0, %%rcx ;"
- "addq (%1), %%r8 ;"
- "adcq 8(%1), %%r9 ;"
- "adcq 16(%1), %%r10 ;"
- "adcq 24(%1), %%r11 ;"
- "adcq $0, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11");
+ /* Compute the raw multiplication of f1*f2 */
+ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
+ " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */
+ " add %%rcx, %%r9;"
+ " mov $0, %%rcx;"
+ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
+ " adcx %%r12, %%r10;"
+ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
+ " adcx %%r13, %%r11;"
+ " adcx %%rcx, %%rax;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute carry*38 */
+ " mov $38, %%rdx;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r" (f2_r)
+ : "r" (out), "r" (f1)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc"
+ );
}
-static __always_inline void
-add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+/* Computes p1 <- bit ? p2 : p1 in constant time */
+static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
{
asm volatile(
- "mov $38, %%eax ;"
- "xorl %%ecx, %%ecx ;"
- "movq (%2), %%r8 ;"
- "adcx (%1), %%r8 ;"
- "movq 8(%2), %%r9 ;"
- "adcx 8(%1), %%r9 ;"
- "movq 16(%2), %%r10 ;"
- "adcx 16(%1), %%r10 ;"
- "movq 24(%2), %%r11 ;"
- "adcx 24(%1), %%r11 ;"
- "cmovc %%eax, %%ecx ;"
- "xorl %%eax, %%eax ;"
- "adcx %%rcx, %%r8 ;"
- "adcx %%rax, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcx %%rax, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcx %%rax, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $38, %%ecx ;"
- "cmovc %%ecx, %%eax ;"
- "addq %%rax, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+ /* Invert the polarity of bit to match cmov expectations */
+ " add $18446744073709551615, %0;"
+
+ /* cswap p1[0], p2[0] */
+ " movq 0(%1), %%r8;"
+ " movq 0(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 0(%1);"
+ " movq %%r9, 0(%2);"
+
+ /* cswap p1[1], p2[1] */
+ " movq 8(%1), %%r8;"
+ " movq 8(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 8(%1);"
+ " movq %%r9, 8(%2);"
+
+ /* cswap p1[2], p2[2] */
+ " movq 16(%1), %%r8;"
+ " movq 16(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 16(%1);"
+ " movq %%r9, 16(%2);"
+
+ /* cswap p1[3], p2[3] */
+ " movq 24(%1), %%r8;"
+ " movq 24(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 24(%1);"
+ " movq %%r9, 24(%2);"
+
+ /* cswap p1[4], p2[4] */
+ " movq 32(%1), %%r8;"
+ " movq 32(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 32(%1);"
+ " movq %%r9, 32(%2);"
+
+ /* cswap p1[5], p2[5] */
+ " movq 40(%1), %%r8;"
+ " movq 40(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 40(%1);"
+ " movq %%r9, 40(%2);"
+
+ /* cswap p1[6], p2[6] */
+ " movq 48(%1), %%r8;"
+ " movq 48(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 48(%1);"
+ " movq %%r9, 48(%2);"
+
+ /* cswap p1[7], p2[7] */
+ " movq 56(%1), %%r8;"
+ " movq 56(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 56(%1);"
+ " movq %%r9, 56(%2);"
+ : "+&r" (bit)
+ : "r" (p1), "r" (p2)
+ : "%r8", "%r9", "%r10", "memory", "cc"
+ );
}
-static __always_inline void
-add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+/* Computes the square of a field element: out <- f * f
+ * Uses the 8-element buffer tmp for intermediate results */
+static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
{
asm volatile(
- "mov $38, %%eax ;"
- "movq (%2), %%r8 ;"
- "addq (%1), %%r8 ;"
- "movq 8(%2), %%r9 ;"
- "adcq 8(%1), %%r9 ;"
- "movq 16(%2), %%r10 ;"
- "adcq 16(%1), %%r10 ;"
- "movq 24(%2), %%r11 ;"
- "adcq 24(%1), %%r11 ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+ /* Compute the raw multiplication: tmp <- f * f */
+
+ /* Step 1: Compute all partial products */
+ " movq 0(%1), %%rdx;" /* f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 24(%1), %%rdx;" /* f[3] */
+ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15, %%r15;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%r12;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%r12, %%r12;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 0(%0);"
+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
+ " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
+
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %%rcx, %%rcx;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%r12;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%r12, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+ : "+&r" (tmp), "+&r" (f), "+&r" (out)
+ :
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
+ );
}
-static __always_inline void
-sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+/* Computes two field squarings:
+ * out[0] <- f[0] * f[0]
+ * out[1] <- f[1] * f[1]
+ * Uses the 16-element buffer tmp for intermediate results */
+static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
{
asm volatile(
- "mov $38, %%eax ;"
- "movq (%1), %%r8 ;"
- "subq (%2), %%r8 ;"
- "movq 8(%1), %%r9 ;"
- "sbbq 8(%2), %%r9 ;"
- "movq 16(%1), %%r10 ;"
- "sbbq 16(%2), %%r10 ;"
- "movq 24(%1), %%r11 ;"
- "sbbq 24(%2), %%r11 ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "subq %%rcx, %%r8 ;"
- "sbbq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "sbbq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "sbbq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "subq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+ /* Step 1: Compute all partial products */
+ " movq 0(%1), %%rdx;" /* f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 24(%1), %%rdx;" /* f[3] */
+ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15, %%r15;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%r12;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%r12, %%r12;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 0(%0);"
+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
+ " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
+
+ /* Step 1: Compute all partial products */
+ " movq 32(%1), %%rdx;" /* f[0] */
+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 56(%1), %%rdx;" /* f[3] */
+ " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
+ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15, %%r15;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%r12;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%r12, %%r12;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
+ " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 64(%0);"
+ " add %%rcx, %%r8;" " movq %%r8, 72(%0);"
+ " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;" " movq %%r9, 80(%0);"
+ " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
+ " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
+ " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);"
+ " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
+ " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
+
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %%rcx, %%rcx;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%r12;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%r12, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 96(%1), %%r8, %%r13;"
+ " xor %%rcx, %%rcx;"
+ " adoxq 64(%1), %%r8;"
+ " mulxq 104(%1), %%r9, %%r12;"
+ " adcx %%r13, %%r9;"
+ " adoxq 72(%1), %%r9;"
+ " mulxq 112(%1), %%r10, %%r13;"
+ " adcx %%r12, %%r10;"
+ " adoxq 80(%1), %%r10;"
+ " mulxq 120(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 88(%1), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 40(%0);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 48(%0);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 56(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 32(%0);"
+ : "+&r" (tmp), "+&r" (f), "+&r" (out)
+ :
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
+ );
}
-/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
-static __always_inline void
-mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
{
- const u64 a24 = 121666;
- asm volatile(
- "movq %2, %%rdx ;"
- "mulx (%1), %%r8, %%r10 ;"
- "mulx 8(%1), %%r9, %%r11 ;"
- "addq %%r10, %%r9 ;"
- "mulx 16(%1), %%r10, %%rax ;"
- "adcq %%r11, %%r10 ;"
- "mulx 24(%1), %%r11, %%rcx ;"
- "adcq %%rax, %%r11 ;"
- /**************************/
- "adcq $0, %%rcx ;"
- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
- "imul %%rdx, %%rcx ;"
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(a24)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11");
+ u64 *nq = p01_tmp1;
+ u64 *nq_p1 = p01_tmp1 + (u32)8U;
+ u64 *tmp1 = p01_tmp1 + (u32)16U;
+ u64 *x1 = q;
+ u64 *x2 = nq;
+ u64 *z2 = nq + (u32)4U;
+ u64 *z3 = nq_p1 + (u32)4U;
+ u64 *a = tmp1;
+ u64 *b = tmp1 + (u32)4U;
+ u64 *ab = tmp1;
+ u64 *dc = tmp1 + (u32)8U;
+ u64 *x3;
+ u64 *z31;
+ u64 *d0;
+ u64 *c0;
+ u64 *a1;
+ u64 *b1;
+ u64 *d;
+ u64 *c;
+ u64 *ab1;
+ u64 *dc1;
+ fadd(a, x2, z2);
+ fsub(b, x2, z2);
+ x3 = nq_p1;
+ z31 = nq_p1 + (u32)4U;
+ d0 = dc;
+ c0 = dc + (u32)4U;
+ fadd(c0, x3, z31);
+ fsub(d0, x3, z31);
+ fmul2(dc, dc, ab, tmp2);
+ fadd(x3, d0, c0);
+ fsub(z31, d0, c0);
+ a1 = tmp1;
+ b1 = tmp1 + (u32)4U;
+ d = tmp1 + (u32)8U;
+ c = tmp1 + (u32)12U;
+ ab1 = tmp1;
+ dc1 = tmp1 + (u32)8U;
+ fsqr2(dc1, ab1, tmp2);
+ fsqr2(nq_p1, nq_p1, tmp2);
+ a1[0U] = c[0U];
+ a1[1U] = c[1U];
+ a1[2U] = c[2U];
+ a1[3U] = c[3U];
+ fsub(c, d, c);
+ fmul_scalar(b1, c, (u64)121665U);
+ fadd(b1, b1, d);
+ fmul2(nq, dc1, ab1, tmp2);
+ fmul(z3, z3, x1, tmp2);
}
-static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
{
- struct {
- eltfp25519_1w_buffer buffer;
- eltfp25519_1w x0, x1, x2;
- } __aligned(32) m;
- u64 *T[4];
-
- T[0] = m.x0;
- T[1] = c; /* x^(-1) */
- T[2] = m.x1;
- T[3] = m.x2;
-
- copy_eltfp25519_1w(T[1], a);
- sqrn_eltfp25519_1w_adx(T[1], 1);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_adx(T[2], 2);
- mul_eltfp25519_1w_adx(T[0], a, T[2]);
- mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_adx(T[2], 1);
- mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 5);
- mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 10);
- mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
- copy_eltfp25519_1w(T[3], T[2]);
- sqrn_eltfp25519_1w_adx(T[3], 20);
- mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
- sqrn_eltfp25519_1w_adx(T[3], 10);
- mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
- copy_eltfp25519_1w(T[0], T[3]);
- sqrn_eltfp25519_1w_adx(T[0], 50);
- mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 100);
- mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 50);
- mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
- sqrn_eltfp25519_1w_adx(T[2], 5);
- mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
-
- memzero_explicit(&m, sizeof(m));
+ u64 *x2 = nq;
+ u64 *z2 = nq + (u32)4U;
+ u64 *a = tmp1;
+ u64 *b = tmp1 + (u32)4U;
+ u64 *d = tmp1 + (u32)8U;
+ u64 *c = tmp1 + (u32)12U;
+ u64 *ab = tmp1;
+ u64 *dc = tmp1 + (u32)8U;
+ fadd(a, x2, z2);
+ fsub(b, x2, z2);
+ fsqr2(dc, ab, tmp2);
+ a[0U] = c[0U];
+ a[1U] = c[1U];
+ a[2U] = c[2U];
+ a[3U] = c[3U];
+ fsub(c, d, c);
+ fmul_scalar(b, c, (u64)121665U);
+ fadd(b, b, d);
+ fmul2(nq, dc, ab, tmp2);
}
-static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
{
- struct {
- eltfp25519_1w_buffer buffer;
- eltfp25519_1w x0, x1, x2;
- } __aligned(32) m;
- u64 *T[5];
-
- T[0] = m.x0;
- T[1] = c; /* x^(-1) */
- T[2] = m.x1;
- T[3] = m.x2;
-
- copy_eltfp25519_1w(T[1], a);
- sqrn_eltfp25519_1w_bmi2(T[1], 1);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_bmi2(T[2], 2);
- mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
- mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_bmi2(T[2], 1);
- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 5);
- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 10);
- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
- copy_eltfp25519_1w(T[3], T[2]);
- sqrn_eltfp25519_1w_bmi2(T[3], 20);
- mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
- sqrn_eltfp25519_1w_bmi2(T[3], 10);
- mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
- copy_eltfp25519_1w(T[0], T[3]);
- sqrn_eltfp25519_1w_bmi2(T[0], 50);
- mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 100);
- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 50);
- mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
- sqrn_eltfp25519_1w_bmi2(T[2], 5);
- mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
-
- memzero_explicit(&m, sizeof(m));
+ u64 tmp2[16U] = { 0U };
+ u64 p01_tmp1_swap[33U] = { 0U };
+ u64 *p0 = p01_tmp1_swap;
+ u64 *p01 = p01_tmp1_swap;
+ u64 *p03 = p01;
+ u64 *p11 = p01 + (u32)8U;
+ u64 *x0;
+ u64 *z0;
+ u64 *p01_tmp1;
+ u64 *p01_tmp11;
+ u64 *nq10;
+ u64 *nq_p11;
+ u64 *swap1;
+ u64 sw0;
+ u64 *nq1;
+ u64 *tmp1;
+ memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
+ x0 = p03;
+ z0 = p03 + (u32)4U;
+ x0[0U] = (u64)1U;
+ x0[1U] = (u64)0U;
+ x0[2U] = (u64)0U;
+ x0[3U] = (u64)0U;
+ z0[0U] = (u64)0U;
+ z0[1U] = (u64)0U;
+ z0[2U] = (u64)0U;
+ z0[3U] = (u64)0U;
+ p01_tmp1 = p01_tmp1_swap;
+ p01_tmp11 = p01_tmp1_swap;
+ nq10 = p01_tmp1_swap;
+ nq_p11 = p01_tmp1_swap + (u32)8U;
+ swap1 = p01_tmp1_swap + (u32)32U;
+ cswap2((u64)1U, nq10, nq_p11);
+ point_add_and_double(init1, p01_tmp11, tmp2);
+ swap1[0U] = (u64)1U;
+ {
+ u32 i;
+ for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
+ u64 *p01_tmp12 = p01_tmp1_swap;
+ u64 *swap2 = p01_tmp1_swap + (u32)32U;
+ u64 *nq2 = p01_tmp12;
+ u64 *nq_p12 = p01_tmp12 + (u32)8U;
+ u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
+ u64 sw = swap2[0U] ^ bit;
+ cswap2(sw, nq2, nq_p12);
+ point_add_and_double(init1, p01_tmp12, tmp2);
+ swap2[0U] = bit;
+ }
+ }
+ sw0 = swap1[0U];
+ cswap2(sw0, nq10, nq_p11);
+ nq1 = p01_tmp1;
+ tmp1 = p01_tmp1 + (u32)16U;
+ point_double(nq1, tmp1, tmp2);
+ point_double(nq1, tmp1, tmp2);
+ point_double(nq1, tmp1, tmp2);
+ memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
+
+ memzero_explicit(tmp2, sizeof(tmp2));
+ memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
}
-/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
- * with a number such that 0 <= C < 2**255-19.
- */
-static __always_inline void fred_eltfp25519_1w(u64 *const c)
+static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
{
- u64 tmp0 = 38, tmp1 = 19;
- asm volatile(
- "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
- "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
-
- /* Add either 19 or 38 to c */
- "addq %4, %0 ;"
- "adcq $0, %1 ;"
- "adcq $0, %2 ;"
- "adcq $0, %3 ;"
-
- /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
- "movl $0, %k4 ;"
- "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
- "btrq $63, %3 ;" /* Clear bit 255 */
-
- /* Subtract 19 if necessary */
- "subq %4, %0 ;"
- "sbbq $0, %1 ;"
- "sbbq $0, %2 ;"
- "sbbq $0, %3 ;"
-
- : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
- "+r"(tmp1)
- :
- : "memory", "cc");
+ u32 i;
+ fsqr(o, inp, tmp);
+ for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
+ fsqr(o, o, tmp);
}
-static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+static void finv(u64 *o, const u64 *i, u64 *tmp)
{
- u64 temp;
- asm volatile(
- "test %9, %9 ;"
- "movq %0, %8 ;"
- "cmovnzq %4, %0 ;"
- "cmovnzq %8, %4 ;"
- "movq %1, %8 ;"
- "cmovnzq %5, %1 ;"
- "cmovnzq %8, %5 ;"
- "movq %2, %8 ;"
- "cmovnzq %6, %2 ;"
- "cmovnzq %8, %6 ;"
- "movq %3, %8 ;"
- "cmovnzq %7, %3 ;"
- "cmovnzq %8, %7 ;"
- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
- "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
- "=r"(temp)
- : "r"(bit)
- : "cc"
- );
+ u64 t1[16U] = { 0U };
+ u64 *a0 = t1;
+ u64 *b = t1 + (u32)4U;
+ u64 *c = t1 + (u32)8U;
+ u64 *t00 = t1 + (u32)12U;
+ u64 *tmp1 = tmp;
+ u64 *a;
+ u64 *t0;
+ fsquare_times(a0, i, tmp1, (u32)1U);
+ fsquare_times(t00, a0, tmp1, (u32)2U);
+ fmul(b, t00, i, tmp);
+ fmul(a0, b, a0, tmp);
+ fsquare_times(t00, a0, tmp1, (u32)1U);
+ fmul(b, t00, b, tmp);
+ fsquare_times(t00, b, tmp1, (u32)5U);
+ fmul(b, t00, b, tmp);
+ fsquare_times(t00, b, tmp1, (u32)10U);
+ fmul(c, t00, b, tmp);
+ fsquare_times(t00, c, tmp1, (u32)20U);
+ fmul(t00, t00, c, tmp);
+ fsquare_times(t00, t00, tmp1, (u32)10U);
+ fmul(b, t00, b, tmp);
+ fsquare_times(t00, b, tmp1, (u32)50U);
+ fmul(c, t00, b, tmp);
+ fsquare_times(t00, c, tmp1, (u32)100U);
+ fmul(t00, t00, c, tmp);
+ fsquare_times(t00, t00, tmp1, (u32)50U);
+ fmul(t00, t00, b, tmp);
+ fsquare_times(t00, t00, tmp1, (u32)5U);
+ a = t1;
+ t0 = t1 + (u32)12U;
+ fmul(o, t0, a, tmp);
}
-static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+static void store_felem(u64 *b, u64 *f)
{
- asm volatile(
- "test %4, %4 ;"
- "cmovnzq %5, %0 ;"
- "cmovnzq %6, %1 ;"
- "cmovnzq %7, %2 ;"
- "cmovnzq %8, %3 ;"
- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
- : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
- : "cc"
- );
+ u64 f30 = f[3U];
+ u64 top_bit0 = f30 >> (u32)63U;
+ u64 carry0;
+ u64 f31;
+ u64 top_bit;
+ u64 carry;
+ u64 f0;
+ u64 f1;
+ u64 f2;
+ u64 f3;
+ u64 m0;
+ u64 m1;
+ u64 m2;
+ u64 m3;
+ u64 mask;
+ u64 f0_;
+ u64 f1_;
+ u64 f2_;
+ u64 f3_;
+ u64 o0;
+ u64 o1;
+ u64 o2;
+ u64 o3;
+ f[3U] = f30 & (u64)0x7fffffffffffffffU;
+ carry0 = add_scalar(f, f, (u64)19U * top_bit0);
+ f31 = f[3U];
+ top_bit = f31 >> (u32)63U;
+ f[3U] = f31 & (u64)0x7fffffffffffffffU;
+ carry = add_scalar(f, f, (u64)19U * top_bit);
+ f0 = f[0U];
+ f1 = f[1U];
+ f2 = f[2U];
+ f3 = f[3U];
+ m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
+ m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
+ m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
+ m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
+ mask = ((m0 & m1) & m2) & m3;
+ f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
+ f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
+ f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
+ f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
+ o0 = f0_;
+ o1 = f1_;
+ o2 = f2_;
+ o3 = f3_;
+ b[0U] = o0;
+ b[1U] = o1;
+ b[2U] = o2;
+ b[3U] = o3;
}
-static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE],
- const u8 session_key[CURVE25519_KEY_SIZE])
+static void encode_point(u8 *o, const u64 *i)
{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[6 * NUM_WORDS_ELTFP25519];
- u8 session[CURVE25519_KEY_SIZE];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- int i = 0, j = 0;
- u64 prev = 0;
- u64 *const X1 = (u64 *)m.session;
- u64 *const key = (u64 *)m.private;
- u64 *const Px = m.coordinates + 0;
- u64 *const Pz = m.coordinates + 4;
- u64 *const Qx = m.coordinates + 8;
- u64 *const Qz = m.coordinates + 12;
- u64 *const X2 = Qx;
- u64 *const Z2 = Qz;
- u64 *const X3 = Px;
- u64 *const Z3 = Pz;
- u64 *const X2Z2 = Qx;
- u64 *const X3Z3 = Px;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const D = m.workspace + 8;
- u64 *const C = m.workspace + 12;
- u64 *const DA = m.workspace + 16;
- u64 *const CB = m.workspace + 20;
- u64 *const AB = A;
- u64 *const DC = D;
- u64 *const DACB = DA;
-
- memcpy(m.private, private_key, sizeof(m.private));
- memcpy(m.session, session_key, sizeof(m.session));
-
- curve25519_clamp_secret(m.private);
-
- /* As in the draft:
- * When receiving such an array, implementations of curve25519
- * MUST mask the most-significant bit in the final byte. This
- * is done to preserve compatibility with point formats which
- * reserve the sign bit for use in other protocols and to
- * increase resistance to implementation fingerprinting
- */
- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
-
- copy_eltfp25519_1w(Px, X1);
- setzero_eltfp25519_1w(Pz);
- setzero_eltfp25519_1w(Qx);
- setzero_eltfp25519_1w(Qz);
-
- Pz[0] = 1;
- Qx[0] = 1;
-
- /* main-loop */
- prev = 0;
- j = 62;
- for (i = 3; i >= 0; --i) {
- while (j >= 0) {
- u64 bit = (key[i] >> j) & 0x1;
- u64 swap = bit ^ prev;
- prev = bit;
-
- add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
- add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
- mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
-
- cselect(swap, A, C);
- cselect(swap, B, D);
-
- sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
- add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
- sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
-
- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
-
- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
- add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
- mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
- mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
- --j;
- }
- j = 63;
- }
-
- inv_eltfp25519_1w_adx(A, Qz);
- mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
- fred_eltfp25519_1w((u64 *)shared);
-
- memzero_explicit(&m, sizeof(m));
+ const u64 *x = i;
+ const u64 *z = i + (u32)4U;
+ u64 tmp[4U] = { 0U };
+ u64 tmp_w[16U] = { 0U };
+ finv(tmp, z, tmp_w);
+ fmul(tmp, tmp, x, tmp_w);
+ store_felem((u64 *)o, tmp);
}
-static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE])
+static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[4 * NUM_WORDS_ELTFP25519];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- const int ite[4] = { 64, 64, 64, 63 };
- const int q = 3;
- u64 swap = 1;
-
- int i = 0, j = 0, k = 0;
- u64 *const key = (u64 *)m.private;
- u64 *const Ur1 = m.coordinates + 0;
- u64 *const Zr1 = m.coordinates + 4;
- u64 *const Ur2 = m.coordinates + 8;
- u64 *const Zr2 = m.coordinates + 12;
-
- u64 *const UZr1 = m.coordinates + 0;
- u64 *const ZUr2 = m.coordinates + 8;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const C = m.workspace + 8;
- u64 *const D = m.workspace + 12;
-
- u64 *const AB = m.workspace + 0;
- u64 *const CD = m.workspace + 8;
-
- const u64 *const P = table_ladder_8k;
-
- memcpy(m.private, private_key, sizeof(m.private));
-
- curve25519_clamp_secret(m.private);
-
- setzero_eltfp25519_1w(Ur1);
- setzero_eltfp25519_1w(Zr1);
- setzero_eltfp25519_1w(Zr2);
- Ur1[0] = 1;
- Zr1[0] = 1;
- Zr2[0] = 1;
-
- /* G-S */
- Ur2[3] = 0x1eaecdeee27cab34UL;
- Ur2[2] = 0xadc7a0b9235d48e2UL;
- Ur2[1] = 0xbbf095ae14b2edf8UL;
- Ur2[0] = 0x7e94e1fec82faabdUL;
-
- /* main-loop */
- j = q;
- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
- while (j < ite[i]) {
- u64 bit = (key[i] >> j) & 0x1;
- k = (64 * i + j - q);
- swap = swap ^ bit;
- cswap(swap, Ur1, Ur2);
- cswap(swap, Zr1, Zr2);
- swap = bit;
- /* Addition */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
- add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
- sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
- mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
- ++j;
+ u64 init1[8U] = { 0U };
+ u64 tmp[4U] = { 0U };
+ u64 tmp3;
+ u64 *x;
+ u64 *z;
+ {
+ u32 i;
+ for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
+ u64 *os = tmp;
+ const u8 *bj = pub + i * (u32)8U;
+ u64 u = *(u64 *)bj;
+ u64 r = u;
+ u64 x0 = r;
+ os[i] = x0;
}
- j = 0;
}
-
- /* Doubling */
- for (i = 0; i < q; ++i) {
- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
- copy_eltfp25519_1w(C, B); /* C = B */
- sub_eltfp25519_1w(B, A, B); /* B = A-B */
- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
- add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
- mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
- }
-
- /* Convert to affine coordinates */
- inv_eltfp25519_1w_adx(A, Zr1);
- mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
- fred_eltfp25519_1w((u64 *)session_key);
-
- memzero_explicit(&m, sizeof(m));
+ tmp3 = tmp[3U];
+ tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
+ x = init1;
+ z = init1 + (u32)4U;
+ z[0U] = (u64)1U;
+ z[1U] = (u64)0U;
+ z[2U] = (u64)0U;
+ z[3U] = (u64)0U;
+ x[0U] = tmp[0U];
+ x[1U] = tmp[1U];
+ x[2U] = tmp[2U];
+ x[3U] = tmp[3U];
+ montgomery_ladder(init1, priv, init1);
+ encode_point(out, init1);
}
-static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE],
- const u8 session_key[CURVE25519_KEY_SIZE])
-{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[6 * NUM_WORDS_ELTFP25519];
- u8 session[CURVE25519_KEY_SIZE];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- int i = 0, j = 0;
- u64 prev = 0;
- u64 *const X1 = (u64 *)m.session;
- u64 *const key = (u64 *)m.private;
- u64 *const Px = m.coordinates + 0;
- u64 *const Pz = m.coordinates + 4;
- u64 *const Qx = m.coordinates + 8;
- u64 *const Qz = m.coordinates + 12;
- u64 *const X2 = Qx;
- u64 *const Z2 = Qz;
- u64 *const X3 = Px;
- u64 *const Z3 = Pz;
- u64 *const X2Z2 = Qx;
- u64 *const X3Z3 = Px;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const D = m.workspace + 8;
- u64 *const C = m.workspace + 12;
- u64 *const DA = m.workspace + 16;
- u64 *const CB = m.workspace + 20;
- u64 *const AB = A;
- u64 *const DC = D;
- u64 *const DACB = DA;
-
- memcpy(m.private, private_key, sizeof(m.private));
- memcpy(m.session, session_key, sizeof(m.session));
-
- curve25519_clamp_secret(m.private);
-
- /* As in the draft:
- * When receiving such an array, implementations of curve25519
- * MUST mask the most-significant bit in the final byte. This
- * is done to preserve compatibility with point formats which
- * reserve the sign bit for use in other protocols and to
- * increase resistance to implementation fingerprinting
- */
- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
-
- copy_eltfp25519_1w(Px, X1);
- setzero_eltfp25519_1w(Pz);
- setzero_eltfp25519_1w(Qx);
- setzero_eltfp25519_1w(Qz);
-
- Pz[0] = 1;
- Qx[0] = 1;
-
- /* main-loop */
- prev = 0;
- j = 62;
- for (i = 3; i >= 0; --i) {
- while (j >= 0) {
- u64 bit = (key[i] >> j) & 0x1;
- u64 swap = bit ^ prev;
- prev = bit;
-
- add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
- add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
- mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
-
- cselect(swap, A, C);
- cselect(swap, B, D);
-
- sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
- add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
- sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
-
- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
-
- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
- add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
- mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
- mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
- --j;
- }
- j = 63;
- }
-
- inv_eltfp25519_1w_bmi2(A, Qz);
- mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
- fred_eltfp25519_1w((u64 *)shared);
+/* The below constants were generated using this sage script:
+ *
+ * #!/usr/bin/env sage
+ * import sys
+ * from sage.all import *
+ * def limbs(n):
+ * n = int(n)
+ * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
+ * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
+ * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
+ * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
+ * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
+ * print("static const u64 table_ladder[] = {")
+ * p = ec.lift_x(9)
+ * for i in range(252):
+ * l = (p[0] + p[2]) / (p[0] - p[2])
+ * print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
+ * p = p * 2
+ * print("};")
+ *
+ */
- memzero_explicit(&m, sizeof(m));
-}
+static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
+
+static const u64 table_ladder[] = {
+ 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
+ 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
+ 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
+ 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
+ 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
+ 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
+ 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
+ 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
+ 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
+ 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
+ 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
+ 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
+ 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
+ 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
+ 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
+ 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
+ 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
+ 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
+ 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
+ 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
+ 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
+ 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
+ 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
+ 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
+ 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
+ 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
+ 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
+ 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
+ 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
+ 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
+ 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
+ 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
+ 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
+ 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
+ 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
+ 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
+ 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
+ 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
+ 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
+ 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
+ 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
+ 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
+ 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
+ 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
+ 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
+ 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
+ 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
+ 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
+ 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
+ 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
+ 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
+ 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
+ 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
+ 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
+ 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
+ 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
+ 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
+ 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
+ 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
+ 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
+ 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
+ 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
+ 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
+ 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
+ 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
+ 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
+ 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
+ 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
+ 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
+ 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
+ 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
+ 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
+ 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
+ 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
+ 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
+ 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
+ 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
+ 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
+ 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
+ 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
+ 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
+ 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
+ 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
+ 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
+ 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
+ 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
+ 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
+ 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
+ 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
+ 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
+ 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
+ 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
+ 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
+ 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
+ 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
+ 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
+ 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
+ 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
+ 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
+ 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
+ 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
+ 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
+ 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
+ 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
+ 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
+ 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
+ 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
+ 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
+ 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
+ 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
+ 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
+ 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
+ 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
+ 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
+ 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
+ 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
+ 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
+ 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
+ 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
+ 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
+ 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
+ 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
+ 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
+ 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
+ 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
+ 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
+ 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
+ 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
+ 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
+ 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
+ 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
+ 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
+ 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
+ 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
+ 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
+ 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
+ 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
+ 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
+ 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
+ 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
+ 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
+ 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
+ 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
+ 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
+ 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
+ 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
+ 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
+ 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
+ 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
+ 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
+ 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
+ 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
+ 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
+ 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
+ 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
+ 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
+ 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
+ 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
+ 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
+ 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
+ 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
+ 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
+ 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
+ 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
+ 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
+ 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
+ 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
+ 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
+ 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
+ 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
+ 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
+ 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
+ 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
+ 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
+ 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
+ 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
+ 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
+ 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
+ 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
+ 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
+ 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
+ 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
+ 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
+ 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
+ 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
+ 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
+ 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
+ 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
+ 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
+ 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
+ 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
+ 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
+ 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
+ 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
+ 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
+ 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
+ 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
+ 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
+ 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
+ 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
+ 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
+ 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
+ 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
+ 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
+ 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
+ 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
+ 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
+ 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
+ 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
+ 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
+ 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
+ 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
+ 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
+ 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
+ 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
+ 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
+ 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
+ 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
+ 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
+ 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
+ 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
+ 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
+ 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
+ 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
+ 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
+ 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
+ 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
+ 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
+ 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
+ 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
+ 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
+ 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
+ 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
+ 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
+ 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
+ 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
+ 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
+ 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
+ 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
+ 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
+ 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
+ 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
+ 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
+ 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
+ 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
+ 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
+ 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
+ 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
+ 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
+ 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
+ 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
+ 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
+};
-static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE])
+static void curve25519_ever64_base(u8 *out, const u8 *priv)
{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[4 * NUM_WORDS_ELTFP25519];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- const int ite[4] = { 64, 64, 64, 63 };
- const int q = 3;
u64 swap = 1;
-
- int i = 0, j = 0, k = 0;
- u64 *const key = (u64 *)m.private;
- u64 *const Ur1 = m.coordinates + 0;
- u64 *const Zr1 = m.coordinates + 4;
- u64 *const Ur2 = m.coordinates + 8;
- u64 *const Zr2 = m.coordinates + 12;
-
- u64 *const UZr1 = m.coordinates + 0;
- u64 *const ZUr2 = m.coordinates + 8;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const C = m.workspace + 8;
- u64 *const D = m.workspace + 12;
-
- u64 *const AB = m.workspace + 0;
- u64 *const CD = m.workspace + 8;
-
- const u64 *const P = table_ladder_8k;
-
- memcpy(m.private, private_key, sizeof(m.private));
-
- curve25519_clamp_secret(m.private);
-
- setzero_eltfp25519_1w(Ur1);
- setzero_eltfp25519_1w(Zr1);
- setzero_eltfp25519_1w(Zr2);
- Ur1[0] = 1;
- Zr1[0] = 1;
- Zr2[0] = 1;
-
- /* G-S */
- Ur2[3] = 0x1eaecdeee27cab34UL;
- Ur2[2] = 0xadc7a0b9235d48e2UL;
- Ur2[1] = 0xbbf095ae14b2edf8UL;
- Ur2[0] = 0x7e94e1fec82faabdUL;
-
- /* main-loop */
- j = q;
- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
- while (j < ite[i]) {
- u64 bit = (key[i] >> j) & 0x1;
- k = (64 * i + j - q);
+ int i, j, k;
+ u64 tmp[16 + 32 + 4];
+ u64 *x1 = &tmp[0];
+ u64 *z1 = &tmp[4];
+ u64 *x2 = &tmp[8];
+ u64 *z2 = &tmp[12];
+ u64 *xz1 = &tmp[0];
+ u64 *xz2 = &tmp[8];
+ u64 *a = &tmp[0 + 16];
+ u64 *b = &tmp[4 + 16];
+ u64 *c = &tmp[8 + 16];
+ u64 *ab = &tmp[0 + 16];
+ u64 *abcd = &tmp[0 + 16];
+ u64 *ef = &tmp[16 + 16];
+ u64 *efgh = &tmp[16 + 16];
+ u64 *key = &tmp[0 + 16 + 32];
+
+ memcpy(key, priv, 32);
+ ((u8 *)key)[0] &= 248;
+ ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
+
+ x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
+ z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
+ z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
+ memcpy(x2, p_minus_s, sizeof(p_minus_s));
+
+ j = 3;
+ for (i = 0; i < 4; ++i) {
+ while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
+ u64 bit = (key[i] >> j) & 1;
+ k = (64 * i + j - 3);
swap = swap ^ bit;
- cswap(swap, Ur1, Ur2);
- cswap(swap, Zr1, Zr2);
+ cswap2(swap, xz1, xz2);
swap = bit;
- /* Addition */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
- add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
- sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
- mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ fsub(b, x1, z1);
+ fadd(a, x1, z1);
+ fmul(c, &table_ladder[4 * k], b, ef);
+ fsub(b, a, c);
+ fadd(a, a, c);
+ fsqr2(ab, ab, efgh);
+ fmul2(xz1, xz2, ab, efgh);
++j;
}
j = 0;
}
- /* Doubling */
- for (i = 0; i < q; ++i) {
- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
- copy_eltfp25519_1w(C, B); /* C = B */
- sub_eltfp25519_1w(B, A, B); /* B = A-B */
- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
- add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
- mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
- }
+ point_double(xz1, abcd, efgh);
+ point_double(xz1, abcd, efgh);
+ point_double(xz1, abcd, efgh);
+ encode_point(out, xz1);
- /* Convert to affine coordinates */
- inv_eltfp25519_1w_bmi2(A, Zr1);
- mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
- fred_eltfp25519_1w((u64 *)session_key);
-
- memzero_explicit(&m, sizeof(m));
+ memzero_explicit(tmp, sizeof(tmp));
}
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
+
void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
- if (static_branch_likely(&curve25519_use_adx))
- curve25519_adx(mypublic, secret, basepoint);
- else if (static_branch_likely(&curve25519_use_bmi2))
- curve25519_bmi2(mypublic, secret, basepoint);
+ if (static_branch_likely(&curve25519_use_bmi2_adx))
+ curve25519_ever64(mypublic, secret, basepoint);
else
curve25519_generic(mypublic, secret, basepoint);
}
@@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch);
void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
- if (static_branch_likely(&curve25519_use_adx))
- curve25519_adx_base(pub, secret);
- else if (static_branch_likely(&curve25519_use_bmi2))
- curve25519_bmi2_base(pub, secret);
+ if (static_branch_likely(&curve25519_use_bmi2_adx))
+ curve25519_ever64_base(pub, secret);
else
curve25519_generic(pub, secret, curve25519_base_point);
}
@@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};
+
static int __init curve25519_mod_init(void)
{
- if (boot_cpu_has(X86_FEATURE_BMI2))
- static_branch_enable(&curve25519_use_bmi2);
- else if (boot_cpu_has(X86_FEATURE_ADX))
- static_branch_enable(&curve25519_use_adx);
+ if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
+ static_branch_enable(&curve25519_use_bmi2_adx);
else
return 0;
return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
@@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-x86");
MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
--
2.18.2
From 1d46b9410c99a51d869bd4b7cd41ea32f2fea63d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 1 Mar 2020 16:06:56 +0800
Subject: [PATCH 055/100] crypto: x86/curve25519 - leave r12 as spare register
commit dc7fc3a53ae158263196b1892b672aedf67796c5 upstream.
This updates to the newer register selection proved by HACL*, which
leads to a more compact instruction encoding, and saves around 100
cycles.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/x86/crypto/curve25519-x86_64.c | 110 ++++++++++++++--------------
1 file changed, 55 insertions(+), 55 deletions(-)
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index e4e58b8e9afe..8a17621f7d3a 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -167,28 +167,28 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" movq 0(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 8(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 16(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 24(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
/* Line up pointers */
@@ -202,11 +202,11 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" mulxq 32(%1), %%r8, %%r13;"
" xor %3, %3;"
" adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%r12;"
+ " mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
" adoxq 8(%1), %%r9;"
" mulxq 48(%1), %%r10, %%r13;"
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" adoxq 16(%1), %%r10;"
" mulxq 56(%1), %%r11, %%rax;"
" adcx %%r13, %%r11;"
@@ -231,7 +231,7 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" movq %%r8, 0(%0);"
: "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
:
- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
);
}
@@ -248,28 +248,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" movq 0(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 8(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 16(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 24(%1), %%rdx;"
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
@@ -279,28 +279,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" movq 32(%1), %%rdx;"
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
" mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 40(%1), %%rdx;"
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);"
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 48(%1), %%rdx;"
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);"
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 56(%1), %%rdx;"
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);"
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
/* Line up pointers */
@@ -314,11 +314,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" mulxq 32(%1), %%r8, %%r13;"
" xor %3, %3;"
" adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%r12;"
+ " mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
" adoxq 8(%1), %%r9;"
" mulxq 48(%1), %%r10, %%r13;"
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" adoxq 16(%1), %%r10;"
" mulxq 56(%1), %%r11, %%rax;"
" adcx %%r13, %%r11;"
@@ -347,11 +347,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" mulxq 96(%1), %%r8, %%r13;"
" xor %3, %3;"
" adoxq 64(%1), %%r8;"
- " mulxq 104(%1), %%r9, %%r12;"
+ " mulxq 104(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
" adoxq 72(%1), %%r9;"
" mulxq 112(%1), %%r10, %%r13;"
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" adoxq 80(%1), %%r10;"
" mulxq 120(%1), %%r11, %%rax;"
" adcx %%r13, %%r11;"
@@ -376,7 +376,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
" movq %%r8, 32(%0);"
: "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
:
- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
);
}
@@ -388,11 +388,11 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
asm volatile(
/* Compute the raw multiplication of f1*f2 */
" mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
- " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */
+ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
" add %%rcx, %%r9;"
" mov $0, %%rcx;"
" mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
" adcx %%r13, %%r11;"
" adcx %%rcx, %%rax;"
@@ -419,7 +419,7 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
" movq %%r8, 0(%1);"
: "+&r" (f2_r)
: "r" (out), "r" (f1)
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc"
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
);
}
@@ -520,8 +520,8 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 24(%1), %%rdx;" /* f[3] */
- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
" movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
@@ -531,12 +531,12 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
" adcx %%r9, %%r9;"
- " adox %%r15, %%r12;"
+ " adox %%r15, %%rbx;"
" adcx %%r10, %%r10;"
" adox %%r15, %%r13;"
" adcx %%r11, %%r11;"
" adox %%r15, %%r14;"
- " adcx %%r12, %%r12;"
+ " adcx %%rbx, %%rbx;"
" adcx %%r13, %%r13;"
" adcx %%r14, %%r14;"
@@ -549,7 +549,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
" movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
" adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
" movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
" adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
" adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
@@ -565,11 +565,11 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" mulxq 32(%1), %%r8, %%r13;"
" xor %%rcx, %%rcx;"
" adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%r12;"
+ " mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
" adoxq 8(%1), %%r9;"
" mulxq 48(%1), %%r10, %%r13;"
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" adoxq 16(%1), %%r10;"
" mulxq 56(%1), %%r11, %%rax;"
" adcx %%r13, %%r11;"
@@ -594,7 +594,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" movq %%r8, 0(%0);"
: "+&r" (tmp), "+&r" (f), "+&r" (out)
:
- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
);
}
@@ -611,8 +611,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 24(%1), %%rdx;" /* f[3] */
- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
" movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
@@ -622,12 +622,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
" adcx %%r9, %%r9;"
- " adox %%r15, %%r12;"
+ " adox %%r15, %%rbx;"
" adcx %%r10, %%r10;"
" adox %%r15, %%r13;"
" adcx %%r11, %%r11;"
" adox %%r15, %%r14;"
- " adcx %%r12, %%r12;"
+ " adcx %%rbx, %%rbx;"
" adcx %%r13, %%r13;"
" adcx %%r14, %%r14;"
@@ -640,7 +640,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
" movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
" adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
" movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
" adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
" adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
@@ -651,8 +651,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 56(%1), %%rdx;" /* f[3] */
- " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
+ " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
" movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
" mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
@@ -662,12 +662,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
" adcx %%r9, %%r9;"
- " adox %%r15, %%r12;"
+ " adox %%r15, %%rbx;"
" adcx %%r10, %%r10;"
" adox %%r15, %%r13;"
" adcx %%r11, %%r11;"
" adox %%r15, %%r14;"
- " adcx %%r12, %%r12;"
+ " adcx %%rbx, %%rbx;"
" adcx %%r13, %%r13;"
" adcx %%r14, %%r14;"
@@ -680,7 +680,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
" movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
" adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
- " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);"
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
" movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
" adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
" adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
@@ -694,11 +694,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 32(%1), %%r8, %%r13;"
" xor %%rcx, %%rcx;"
" adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%r12;"
+ " mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
" adoxq 8(%1), %%r9;"
" mulxq 48(%1), %%r10, %%r13;"
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" adoxq 16(%1), %%r10;"
" mulxq 56(%1), %%r11, %%rax;"
" adcx %%r13, %%r11;"
@@ -727,11 +727,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 96(%1), %%r8, %%r13;"
" xor %%rcx, %%rcx;"
" adoxq 64(%1), %%r8;"
- " mulxq 104(%1), %%r9, %%r12;"
+ " mulxq 104(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
" adoxq 72(%1), %%r9;"
" mulxq 112(%1), %%r10, %%r13;"
- " adcx %%r12, %%r10;"
+ " adcx %%rbx, %%r10;"
" adoxq 80(%1), %%r10;"
" mulxq 120(%1), %%r11, %%rax;"
" adcx %%r13, %%r11;"
@@ -756,7 +756,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" movq %%r8, 32(%0);"
: "+&r" (tmp), "+&r" (f), "+&r" (out)
:
- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
);
}
--
2.18.2
From 1b621c6bdc78e16880d8aaae55dceade812c56fd Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 19 Mar 2020 11:56:17 -0600
Subject: [PATCH 056/100] crypto: arm[64]/poly1305 - add artifact to .gitignore
files
commit 6e4e00d8b68ca7eb30d08afb740033e0d36abe55 upstream.
The .S_shipped yields a .S, and the pattern in these directories is to
add that to .gitignore so that git-status doesn't raise a fuss.
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Reported-by: Emil Renner Berthing <kernel@esmil.dk>
Cc: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/.gitignore | 1 +
arch/arm64/crypto/.gitignore | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore
index 31e1f538df7d..a3c7ad52a469 100644
--- a/arch/arm/crypto/.gitignore
+++ b/arch/arm/crypto/.gitignore
@@ -1,3 +1,4 @@
aesbs-core.S
sha256-core.S
sha512-core.S
+poly1305-core.S
diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore
index 879df8781ed5..e403b1343328 100644
--- a/arch/arm64/crypto/.gitignore
+++ b/arch/arm64/crypto/.gitignore
@@ -1,2 +1,3 @@
sha256-core.S
sha512-core.S
+poly1305-core.S
--
2.18.2
From 04d656cc3b94e33050abc7df5cdc9b1e245aae84 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 23 Apr 2020 15:54:04 -0600
Subject: [PATCH 057/100] crypto: arch/lib - limit simd usage to 4k chunks
commit 706024a52c614b478b63f7728d202532ce6591a9 upstream.
The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).
Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.
Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
arch/arm/crypto/chacha-glue.c | 14 +++++++++++---
arch/arm/crypto/poly1305-glue.c | 15 +++++++++++----
arch/arm64/crypto/chacha-neon-glue.c | 14 +++++++++++---
arch/arm64/crypto/poly1305-glue.c | 15 +++++++++++----
arch/x86/crypto/blake2s-glue.c | 10 ++++------
arch/x86/crypto/chacha_glue.c | 14 +++++++++++---
arch/x86/crypto/poly1305_glue.c | 13 ++++++-------
7 files changed, 65 insertions(+), 30 deletions(-)
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index 893692ed12b7..cd131b454c2e 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
return;
}
- kernel_neon_begin();
- chacha_doneon(state, dst, src, bytes, nrounds);
- kernel_neon_end();
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, todo, nrounds);
+ kernel_neon_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
index ceec04ec2f40..13cfef4ae22e 100644
--- a/arch/arm/crypto/poly1305-glue.c
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && do_neon) {
- kernel_neon_begin();
- poly1305_blocks_neon(&dctx->h, src, len, 1);
- kernel_neon_end();
+ do {
+ unsigned int todo = min_t(unsigned int, len, SZ_4K);
+
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, todo, 1);
+ kernel_neon_end();
+
+ len -= todo;
+ src += todo;
+ } while (len);
} else {
poly1305_blocks_arm(&dctx->h, src, len, 1);
+ src += len;
}
- src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 218943612261..1d9824c4ae43 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
!crypto_simd_usable())
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- kernel_neon_begin();
- chacha_doneon(state, dst, src, bytes, nrounds);
- kernel_neon_end();
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, todo, nrounds);
+ kernel_neon_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index e97b092f56b8..f33ada70c4ed 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
- kernel_neon_begin();
- poly1305_blocks_neon(&dctx->h, src, len, 1);
- kernel_neon_end();
+ do {
+ unsigned int todo = min_t(unsigned int, len, SZ_4K);
+
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, todo, 1);
+ kernel_neon_end();
+
+ len -= todo;
+ src += todo;
+ } while (len);
} else {
poly1305_blocks(&dctx->h, src, len, 1);
+ src += len;
}
- src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 1d9ff8a45e1f..94ac5bdd9f6f 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
blake2s_compress_generic(state, block, nblocks, inc);
return;
}
- for (;;) {
+ do {
const size_t blocks = min_t(size_t, nblocks,
- PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+ SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) &&
@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
kernel_fpu_end();
nblocks -= blocks;
- if (!nblocks)
- break;
block += blocks * BLAKE2S_BLOCK_SIZE;
- }
+ } while (nblocks);
}
EXPORT_SYMBOL(blake2s_compress_arch);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 1bebe11b9ec9..f3bfce21bc0d 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- kernel_fpu_begin();
- chacha_dosimd(state, dst, src, bytes, nrounds);
- kernel_fpu_end();
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, todo, nrounds);
+ kernel_fpu_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 79bb58737d52..61b2bc8b6986 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
- PAGE_SIZE % POLY1305_BLOCK_SIZE);
+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
+ SZ_4K % POLY1305_BLOCK_SIZE);
if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return;
}
- for (;;) {
- const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+ do {
+ const size_t bytes = min_t(size_t, len, SZ_4K);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
kernel_fpu_end();
+
len -= bytes;
- if (!len)
- break;
inp += bytes;
- }
+ } while (len);
}
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
--
2.18.2
From af068e66b2ae1e5fbef3ff78dcd10867ad5af096 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 11 Feb 2020 20:47:05 +0100
Subject: [PATCH 058/100] icmp: introduce helper for nat'd source address in
network device context
commit 0b41713b606694257b90d61ba7e2712d8457648b upstream.
This introduces a helper function to be called only by network drivers
that wraps calls to icmp[v6]_send in a conntrack transformation, in case
NAT has been used. We don't want to pollute the non-driver path, though,
so we introduce this as a helper to be called by places that actually
make use of this, as suggested by Florian.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/linux/icmpv6.h | 10 ++++++++++
include/net/icmp.h | 6 ++++++
net/ipv4/icmp.c | 33 +++++++++++++++++++++++++++++++++
net/ipv6/ip6_icmp.c | 34 ++++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+)
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index a8f888976137..024b7a4cd98e 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -22,12 +22,22 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
unsigned int data_len);
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
+#else
+#define icmpv6_ndo_send icmpv6_send
+#endif
+
#else
static inline void icmpv6_send(struct sk_buff *skb,
u8 type, u8 code, __u32 info)
{
+}
+static inline void icmpv6_ndo_send(struct sk_buff *skb,
+ u8 type, u8 code, __u32 info)
+{
}
#endif
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 5d4bfdba9adf..9ac2d2672a93 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32
__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
}
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+#else
+#define icmp_ndo_send icmp_send
+#endif
+
int icmp_rcv(struct sk_buff *skb);
int icmp_err(struct sk_buff *skb, u32 info);
int icmp_init(void);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ac95ba78b903..9eb05a8139cb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -747,6 +747,39 @@ out:;
}
EXPORT_SYMBOL(__icmp_send);
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ __be32 orig_ip;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmp_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct iphdr))))
+ goto out;
+
+ orig_ip = ip_hdr(skb_in)->saddr;
+ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ icmp_send(skb_in, type, code, info);
+ ip_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..e0086758b6ee 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,38 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmpv6_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
+ goto out;
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ icmpv6_send(skb_in, type, code, info);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
--
2.18.2
From 93ca3bf7fc729df26023968388d2c063b177e828 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 9 Dec 2019 00:27:34 +0100
Subject: [PATCH 059/100] net: WireGuard secure network tunnel
commit e7096c131e5161fa3b8e52a650d7719d2857adfd upstream.
WireGuard is a layer 3 secure networking tunnel made specifically for
the kernel, that aims to be much simpler and easier to audit than IPsec.
Extensive documentation and description of the protocol and
considerations, along with formal proofs of the cryptography, are
available at:
* https://www.wireguard.com/
* https://www.wireguard.com/papers/wireguard.pdf
This commit implements WireGuard as a simple network device driver,
accessible in the usual RTNL way used by virtual network drivers. It
makes use of the udp_tunnel APIs, GRO, GSO, NAPI, and the usual set of
networking subsystem APIs. It has a somewhat novel multicore queueing
system designed for maximum throughput and minimal latency of encryption
operations, but it is implemented modestly using workqueues and NAPI.
Configuration is done via generic Netlink, and following a review from
the Netlink maintainer a year ago, several high profile userspace tools
have already implemented the API.
This commit also comes with several different tests, both in-kernel
tests and out-of-kernel tests based on network namespaces, taking profit
of the fact that sockets used by WireGuard intentionally stay in the
namespace the WireGuard interface was originally created, exactly like
the semantics of userspace tun devices. See wireguard.com/netns/ for
pictures and examples.
The source code is fairly short, but rather than combining everything
into a single file, WireGuard is developed as cleanly separable files,
making auditing and comprehension easier. Things are laid out as
follows:
* noise.[ch], cookie.[ch], messages.h: These implement the bulk of the
cryptographic aspects of the protocol, and are mostly data-only in
nature, taking in buffers of bytes and spitting out buffers of
bytes. They also handle reference counting for their various shared
pieces of data, like keys and key lists.
* ratelimiter.[ch]: Used as an integral part of cookie.[ch] for
ratelimiting certain types of cryptographic operations in accordance
with particular WireGuard semantics.
* allowedips.[ch], peerlookup.[ch]: The main lookup structures of
WireGuard, the former being trie-like with particular semantics, an
integral part of the design of the protocol, and the latter just
being nice helper functions around the various hashtables we use.
* device.[ch]: Implementation of functions for the netdevice and for
rtnl, responsible for maintaining the life of a given interface and
wiring it up to the rest of WireGuard.
* peer.[ch]: Each interface has a list of peers, with helper functions
available here for creation, destruction, and reference counting.
* socket.[ch]: Implementation of functions related to udp_socket and
the general set of kernel socket APIs, for sending and receiving
ciphertext UDP packets, and taking care of WireGuard-specific sticky
socket routing semantics for the automatic roaming.
* netlink.[ch]: Userspace API entry point for configuring WireGuard
peers and devices. The API has been implemented by several userspace
tools and network management utility, and the WireGuard project
distributes the basic wg(8) tool.
* queueing.[ch]: Shared function on the rx and tx path for handling
the various queues used in the multicore algorithms.
* send.c: Handles encrypting outgoing packets in parallel on
multiple cores, before sending them in order on a single core, via
workqueues and ring buffers. Also handles sending handshake and cookie
messages as part of the protocol, in parallel.
* receive.c: Handles decrypting incoming packets in parallel on
multiple cores, before passing them off in order to be ingested via
the rest of the networking subsystem with GRO via the typical NAPI
poll function. Also handles receiving handshake and cookie messages
as part of the protocol, in parallel.
* timers.[ch]: Uses the timer wheel to implement protocol particular
event timeouts, and gives a set of very simple event-driven entry
point functions for callers.
* main.c, version.h: Initialization and deinitialization of the module.
* selftest/*.h: Runtime unit tests for some of the most security
sensitive functions.
* tools/testing/selftests/wireguard/netns.sh: Aforementioned testing
script using network namespaces.
This commit aims to be as self-contained as possible, implementing
WireGuard as a standalone module not needing much special handling or
coordination from the network subsystem. I expect for future
optimizations to the network stack to positively improve WireGuard, and
vice-versa, but for the time being, this exists as intentionally
standalone.
We introduce a menu option for CONFIG_WIREGUARD, as well as providing a
verbose debug log and self-tests via CONFIG_WIREGUARD_DEBUG.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: David Miller <davem@davemloft.net>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
[Jason: ported to 5.4 by doing the following:
- wg_get_device_start uses genl_family_attrbuf
- trival skb_redirect_reset change from 2c64605b590e is folded in]
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
MAINTAINERS | 8 +
drivers/net/Kconfig | 41 +
drivers/net/Makefile | 1 +
drivers/net/wireguard/Makefile | 18 +
drivers/net/wireguard/allowedips.c | 381 +++++++++
drivers/net/wireguard/allowedips.h | 59 ++
drivers/net/wireguard/cookie.c | 236 ++++++
drivers/net/wireguard/cookie.h | 59 ++
drivers/net/wireguard/device.c | 458 ++++++++++
drivers/net/wireguard/device.h | 73 ++
drivers/net/wireguard/main.c | 64 ++
drivers/net/wireguard/messages.h | 128 +++
drivers/net/wireguard/netlink.c | 648 +++++++++++++++
drivers/net/wireguard/netlink.h | 12 +
drivers/net/wireguard/noise.c | 828 +++++++++++++++++++
drivers/net/wireguard/noise.h | 137 +++
drivers/net/wireguard/peer.c | 240 ++++++
drivers/net/wireguard/peer.h | 83 ++
drivers/net/wireguard/peerlookup.c | 221 +++++
drivers/net/wireguard/peerlookup.h | 64 ++
drivers/net/wireguard/queueing.c | 53 ++
drivers/net/wireguard/queueing.h | 197 +++++
drivers/net/wireguard/ratelimiter.c | 223 +++++
drivers/net/wireguard/ratelimiter.h | 19 +
drivers/net/wireguard/receive.c | 595 +++++++++++++
drivers/net/wireguard/selftest/allowedips.c | 683 +++++++++++++++
drivers/net/wireguard/selftest/counter.c | 104 +++
drivers/net/wireguard/selftest/ratelimiter.c | 226 +++++
drivers/net/wireguard/send.c | 413 +++++++++
drivers/net/wireguard/socket.c | 437 ++++++++++
drivers/net/wireguard/socket.h | 44 +
drivers/net/wireguard/timers.c | 243 ++++++
drivers/net/wireguard/timers.h | 31 +
drivers/net/wireguard/version.h | 1 +
include/uapi/linux/wireguard.h | 196 +++++
tools/testing/selftests/wireguard/netns.sh | 537 ++++++++++++
36 files changed, 7761 insertions(+)
create mode 100644 drivers/net/wireguard/Makefile
create mode 100644 drivers/net/wireguard/allowedips.c
create mode 100644 drivers/net/wireguard/allowedips.h
create mode 100644 drivers/net/wireguard/cookie.c
create mode 100644 drivers/net/wireguard/cookie.h
create mode 100644 drivers/net/wireguard/device.c
create mode 100644 drivers/net/wireguard/device.h
create mode 100644 drivers/net/wireguard/main.c
create mode 100644 drivers/net/wireguard/messages.h
create mode 100644 drivers/net/wireguard/netlink.c
create mode 100644 drivers/net/wireguard/netlink.h
create mode 100644 drivers/net/wireguard/noise.c
create mode 100644 drivers/net/wireguard/noise.h
create mode 100644 drivers/net/wireguard/peer.c
create mode 100644 drivers/net/wireguard/peer.h
create mode 100644 drivers/net/wireguard/peerlookup.c
create mode 100644 drivers/net/wireguard/peerlookup.h
create mode 100644 drivers/net/wireguard/queueing.c
create mode 100644 drivers/net/wireguard/queueing.h
create mode 100644 drivers/net/wireguard/ratelimiter.c
create mode 100644 drivers/net/wireguard/ratelimiter.h
create mode 100644 drivers/net/wireguard/receive.c
create mode 100644 drivers/net/wireguard/selftest/allowedips.c
create mode 100644 drivers/net/wireguard/selftest/counter.c
create mode 100644 drivers/net/wireguard/selftest/ratelimiter.c
create mode 100644 drivers/net/wireguard/send.c
create mode 100644 drivers/net/wireguard/socket.c
create mode 100644 drivers/net/wireguard/socket.h
create mode 100644 drivers/net/wireguard/timers.c
create mode 100644 drivers/net/wireguard/timers.h
create mode 100644 drivers/net/wireguard/version.h
create mode 100644 include/uapi/linux/wireguard.h
create mode 100755 tools/testing/selftests/wireguard/netns.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index fe6fa5d3a63e..d05f78261f33 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17583,6 +17583,14 @@ L: linux-gpio@vger.kernel.org
S: Maintained
F: drivers/gpio/gpio-ws16c48.c
+WIREGUARD SECURE NETWORK TUNNEL
+M: Jason A. Donenfeld <Jason@zx2c4.com>
+S: Maintained
+F: drivers/net/wireguard/
+F: tools/testing/selftests/wireguard/
+L: wireguard@lists.zx2c4.com
+L: netdev@vger.kernel.org
+
WISTRON LAPTOP BUTTON DRIVER
M: Miloslav Trmac <mitr@volny.cz>
S: Maintained
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index df3cd2589bcf..16ad145e22c9 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -71,6 +71,47 @@ config DUMMY
To compile this driver as a module, choose M here: the module
will be called dummy.
+config WIREGUARD
+ tristate "WireGuard secure network tunnel"
+ depends on NET && INET
+ depends on IPV6 || !IPV6
+ select NET_UDP_TUNNEL
+ select DST_CACHE
+ select CRYPTO
+ select CRYPTO_LIB_CURVE25519
+ select CRYPTO_LIB_CHACHA20POLY1305
+ select CRYPTO_LIB_BLAKE2S
+ select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT
+ select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
+ select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
+ select CRYPTO_CURVE25519_X86 if X86 && 64BIT
+ select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
+ select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_POLY1305_ARM if ARM
+ select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
+ select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
+ select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+ help
+ WireGuard is a secure, fast, and easy to use replacement for IPSec
+ that uses modern cryptography and clever networking tricks. It's
+ designed to be fairly general purpose and abstract enough to fit most
+ use cases, while at the same time remaining extremely simple to
+ configure. See www.wireguard.com for more info.
+
+ It's safe to say Y or M here, as the driver is very lightweight and
+ is only in use when an administrator chooses to add an interface.
+
+config WIREGUARD_DEBUG
+ bool "Debugging checks and verbose messages"
+ depends on WIREGUARD
+ help
+ This will write log messages for handshake and other events
+ that occur for a WireGuard interface. It will also perform some
+ extra validation checks and unit tests at various points. This is
+ only useful for debugging.
+
+ Say N here unless you know what you're doing.
+
config EQUALIZER
tristate "EQL (serial line load balancing) support"
---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0d3ba056cda3..953b7c12f0b0 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/
obj-$(CONFIG_IPVLAN) += ipvlan/
obj-$(CONFIG_IPVTAP) += ipvlan/
obj-$(CONFIG_DUMMY) += dummy.o
+obj-$(CONFIG_WIREGUARD) += wireguard/
obj-$(CONFIG_EQUALIZER) += eql.o
obj-$(CONFIG_IFB) += ifb.o
obj-$(CONFIG_MACSEC) += macsec.o
diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile
new file mode 100644
index 000000000000..fc52b2cb500b
--- /dev/null
+++ b/drivers/net/wireguard/Makefile
@@ -0,0 +1,18 @@
+ccflags-y := -O3
+ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
+ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
+wireguard-y := main.o
+wireguard-y += noise.o
+wireguard-y += device.o
+wireguard-y += peer.o
+wireguard-y += timers.o
+wireguard-y += queueing.o
+wireguard-y += send.o
+wireguard-y += receive.o
+wireguard-y += socket.o
+wireguard-y += peerlookup.o
+wireguard-y += allowedips.o
+wireguard-y += ratelimiter.o
+wireguard-y += cookie.o
+wireguard-y += netlink.o
+obj-$(CONFIG_WIREGUARD) := wireguard.o
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
new file mode 100644
index 000000000000..72667d5399c3
--- /dev/null
+++ b/drivers/net/wireguard/allowedips.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "allowedips.h"
+#include "peer.h"
+
+static void swap_endian(u8 *dst, const u8 *src, u8 bits)
+{
+ if (bits == 32) {
+ *(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
+ } else if (bits == 128) {
+ ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
+ ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
+ }
+}
+
+static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
+ u8 cidr, u8 bits)
+{
+ node->cidr = cidr;
+ node->bit_at_a = cidr / 8U;
+#ifdef __LITTLE_ENDIAN
+ node->bit_at_a ^= (bits / 8U - 1U) % 8U;
+#endif
+ node->bit_at_b = 7U - (cidr % 8U);
+ node->bitlen = bits;
+ memcpy(node->bits, src, bits / 8U);
+}
+#define CHOOSE_NODE(parent, key) \
+ parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static void node_free_rcu(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct allowedips_node, rcu));
+}
+
+static void push_rcu(struct allowedips_node **stack,
+ struct allowedips_node __rcu *p, unsigned int *len)
+{
+ if (rcu_access_pointer(p)) {
+ WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
+ stack[(*len)++] = rcu_dereference_raw(p);
+ }
+}
+
+static void root_free_rcu(struct rcu_head *rcu)
+{
+ struct allowedips_node *node, *stack[128] = {
+ container_of(rcu, struct allowedips_node, rcu) };
+ unsigned int len = 1;
+
+ while (len > 0 && (node = stack[--len])) {
+ push_rcu(stack, node->bit[0], &len);
+ push_rcu(stack, node->bit[1], &len);
+ kfree(node);
+ }
+}
+
+static void root_remove_peer_lists(struct allowedips_node *root)
+{
+ struct allowedips_node *node, *stack[128] = { root };
+ unsigned int len = 1;
+
+ while (len > 0 && (node = stack[--len])) {
+ push_rcu(stack, node->bit[0], &len);
+ push_rcu(stack, node->bit[1], &len);
+ if (rcu_access_pointer(node->peer))
+ list_del(&node->peer_list);
+ }
+}
+
+static void walk_remove_by_peer(struct allowedips_node __rcu **top,
+ struct wg_peer *peer, struct mutex *lock)
+{
+#define REF(p) rcu_access_pointer(p)
+#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
+#define PUSH(p) ({ \
+ WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
+ stack[len++] = p; \
+ })
+
+ struct allowedips_node __rcu **stack[128], **nptr;
+ struct allowedips_node *node, *prev;
+ unsigned int len;
+
+ if (unlikely(!peer || !REF(*top)))
+ return;
+
+ for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
+ nptr = stack[len - 1];
+ node = DEREF(nptr);
+ if (!node) {
+ --len;
+ continue;
+ }
+ if (!prev || REF(prev->bit[0]) == node ||
+ REF(prev->bit[1]) == node) {
+ if (REF(node->bit[0]))
+ PUSH(&node->bit[0]);
+ else if (REF(node->bit[1]))
+ PUSH(&node->bit[1]);
+ } else if (REF(node->bit[0]) == prev) {
+ if (REF(node->bit[1]))
+ PUSH(&node->bit[1]);
+ } else {
+ if (rcu_dereference_protected(node->peer,
+ lockdep_is_held(lock)) == peer) {
+ RCU_INIT_POINTER(node->peer, NULL);
+ list_del_init(&node->peer_list);
+ if (!node->bit[0] || !node->bit[1]) {
+ rcu_assign_pointer(*nptr, DEREF(
+ &node->bit[!REF(node->bit[0])]));
+ call_rcu(&node->rcu, node_free_rcu);
+ node = DEREF(nptr);
+ }
+ }
+ --len;
+ }
+ }
+
+#undef REF
+#undef DEREF
+#undef PUSH
+}
+
+static unsigned int fls128(u64 a, u64 b)
+{
+ return a ? fls64(a) + 64U : fls64(b);
+}
+
+static u8 common_bits(const struct allowedips_node *node, const u8 *key,
+ u8 bits)
+{
+ if (bits == 32)
+ return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key);
+ else if (bits == 128)
+ return 128U - fls128(
+ *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0],
+ *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]);
+ return 0;
+}
+
+static bool prefix_matches(const struct allowedips_node *node, const u8 *key,
+ u8 bits)
+{
+ /* This could be much faster if it actually just compared the common
+ * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and
+ * the rest, but it turns out that common_bits is already super fast on
+ * modern processors, even taking into account the unfortunate bswap.
+ * So, we just inline it like this instead.
+ */
+ return common_bits(node, key, bits) >= node->cidr;
+}
+
+static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
+ const u8 *key)
+{
+ struct allowedips_node *node = trie, *found = NULL;
+
+ while (node && prefix_matches(node, key, bits)) {
+ if (rcu_access_pointer(node->peer))
+ found = node;
+ if (node->cidr == bits)
+ break;
+ node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+ }
+ return found;
+}
+
+/* Returns a strong reference to a peer */
+static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits,
+ const void *be_ip)
+{
+ /* Aligned so it can be passed to fls/fls64 */
+ u8 ip[16] __aligned(__alignof(u64));
+ struct allowedips_node *node;
+ struct wg_peer *peer = NULL;
+
+ swap_endian(ip, be_ip, bits);
+
+ rcu_read_lock_bh();
+retry:
+ node = find_node(rcu_dereference_bh(root), bits, ip);
+ if (node) {
+ peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer));
+ if (!peer)
+ goto retry;
+ }
+ rcu_read_unlock_bh();
+ return peer;
+}
+
+static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
+ u8 cidr, u8 bits, struct allowedips_node **rnode,
+ struct mutex *lock)
+{
+ struct allowedips_node *node = rcu_dereference_protected(trie,
+ lockdep_is_held(lock));
+ struct allowedips_node *parent = NULL;
+ bool exact = false;
+
+ while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
+ parent = node;
+ if (parent->cidr == cidr) {
+ exact = true;
+ break;
+ }
+ node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
+ lockdep_is_held(lock));
+ }
+ *rnode = parent;
+ return exact;
+}
+
+static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock)
+{
+ struct allowedips_node *node, *parent, *down, *newnode;
+
+ if (unlikely(cidr > bits || !peer))
+ return -EINVAL;
+
+ if (!rcu_access_pointer(*trie)) {
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (unlikely(!node))
+ return -ENOMEM;
+ RCU_INIT_POINTER(node->peer, peer);
+ list_add_tail(&node->peer_list, &peer->allowedips_list);
+ copy_and_assign_cidr(node, key, cidr, bits);
+ rcu_assign_pointer(*trie, node);
+ return 0;
+ }
+ if (node_placement(*trie, key, cidr, bits, &node, lock)) {
+ rcu_assign_pointer(node->peer, peer);
+ list_move_tail(&node->peer_list, &peer->allowedips_list);
+ return 0;
+ }
+
+ newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
+ if (unlikely(!newnode))
+ return -ENOMEM;
+ RCU_INIT_POINTER(newnode->peer, peer);
+ list_add_tail(&newnode->peer_list, &peer->allowedips_list);
+ copy_and_assign_cidr(newnode, key, cidr, bits);
+
+ if (!node) {
+ down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
+ } else {
+ down = rcu_dereference_protected(CHOOSE_NODE(node, key),
+ lockdep_is_held(lock));
+ if (!down) {
+ rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
+ return 0;
+ }
+ }
+ cidr = min(cidr, common_bits(down, key, bits));
+ parent = node;
+
+ if (newnode->cidr == cidr) {
+ rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
+ if (!parent)
+ rcu_assign_pointer(*trie, newnode);
+ else
+ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
+ newnode);
+ } else {
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (unlikely(!node)) {
+ kfree(newnode);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&node->peer_list);
+ copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+
+ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
+ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
+ if (!parent)
+ rcu_assign_pointer(*trie, node);
+ else
+ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
+ node);
+ }
+ return 0;
+}
+
+void wg_allowedips_init(struct allowedips *table)
+{
+ table->root4 = table->root6 = NULL;
+ table->seq = 1;
+}
+
+void wg_allowedips_free(struct allowedips *table, struct mutex *lock)
+{
+ struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6;
+
+ ++table->seq;
+ RCU_INIT_POINTER(table->root4, NULL);
+ RCU_INIT_POINTER(table->root6, NULL);
+ if (rcu_access_pointer(old4)) {
+ struct allowedips_node *node = rcu_dereference_protected(old4,
+ lockdep_is_held(lock));
+
+ root_remove_peer_lists(node);
+ call_rcu(&node->rcu, root_free_rcu);
+ }
+ if (rcu_access_pointer(old6)) {
+ struct allowedips_node *node = rcu_dereference_protected(old6,
+ lockdep_is_held(lock));
+
+ root_remove_peer_lists(node);
+ call_rcu(&node->rcu, root_free_rcu);
+ }
+}
+
+int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock)
+{
+ /* Aligned so it can be passed to fls */
+ u8 key[4] __aligned(__alignof(u32));
+
+ ++table->seq;
+ swap_endian(key, (const u8 *)ip, 32);
+ return add(&table->root4, 32, key, cidr, peer, lock);
+}
+
+int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock)
+{
+ /* Aligned so it can be passed to fls64 */
+ u8 key[16] __aligned(__alignof(u64));
+
+ ++table->seq;
+ swap_endian(key, (const u8 *)ip, 128);
+ return add(&table->root6, 128, key, cidr, peer, lock);
+}
+
+void wg_allowedips_remove_by_peer(struct allowedips *table,
+ struct wg_peer *peer, struct mutex *lock)
+{
+ ++table->seq;
+ walk_remove_by_peer(&table->root4, peer, lock);
+ walk_remove_by_peer(&table->root6, peer, lock);
+}
+
+int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
+{
+ const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U);
+ swap_endian(ip, node->bits, node->bitlen);
+ memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes);
+ if (node->cidr)
+ ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U);
+
+ *cidr = node->cidr;
+ return node->bitlen == 32 ? AF_INET : AF_INET6;
+}
+
+/* Returns a strong reference to a peer */
+struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
+ struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
+ return NULL;
+}
+
+/* Returns a strong reference to a peer */
+struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
+ struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
+ return NULL;
+}
+
+#include "selftest/allowedips.c"
diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h
new file mode 100644
index 000000000000..e5c83cafcef4
--- /dev/null
+++ b/drivers/net/wireguard/allowedips.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_ALLOWEDIPS_H
+#define _WG_ALLOWEDIPS_H
+
+#include <linux/mutex.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+struct wg_peer;
+
+struct allowedips_node {
+ struct wg_peer __rcu *peer;
+ struct allowedips_node __rcu *bit[2];
+ /* While it may seem scandalous that we waste space for v4,
+ * we're alloc'ing to the nearest power of 2 anyway, so this
+ * doesn't actually make a difference.
+ */
+ u8 bits[16] __aligned(__alignof(u64));
+ u8 cidr, bit_at_a, bit_at_b, bitlen;
+
+ /* Keep rarely used list at bottom to be beyond cache line. */
+ union {
+ struct list_head peer_list;
+ struct rcu_head rcu;
+ };
+};
+
+struct allowedips {
+ struct allowedips_node __rcu *root4;
+ struct allowedips_node __rcu *root6;
+ u64 seq;
+};
+
+void wg_allowedips_init(struct allowedips *table);
+void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
+int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock);
+int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock);
+void wg_allowedips_remove_by_peer(struct allowedips *table,
+ struct wg_peer *peer, struct mutex *lock);
+/* The ip input pointer should be __aligned(__alignof(u64))) */
+int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr);
+
+/* These return a strong reference to a peer: */
+struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
+ struct sk_buff *skb);
+struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
+ struct sk_buff *skb);
+
+#ifdef DEBUG
+bool wg_allowedips_selftest(void);
+#endif
+
+#endif /* _WG_ALLOWEDIPS_H */
diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c
new file mode 100644
index 000000000000..4956f0499c19
--- /dev/null
+++ b/drivers/net/wireguard/cookie.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "cookie.h"
+#include "peer.h"
+#include "device.h"
+#include "messages.h"
+#include "ratelimiter.h"
+#include "timers.h"
+
+#include <crypto/blake2s.h>
+#include <crypto/chacha20poly1305.h>
+
+#include <net/ipv6.h>
+#include <crypto/algapi.h>
+
+void wg_cookie_checker_init(struct cookie_checker *checker,
+ struct wg_device *wg)
+{
+ init_rwsem(&checker->secret_lock);
+ checker->secret_birthdate = ktime_get_coarse_boottime_ns();
+ get_random_bytes(checker->secret, NOISE_HASH_LEN);
+ checker->device = wg;
+}
+
+enum { COOKIE_KEY_LABEL_LEN = 8 };
+static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
+static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
+
+static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
+ const u8 label[COOKIE_KEY_LABEL_LEN])
+{
+ struct blake2s_state blake;
+
+ blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
+ blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
+ blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
+ blake2s_final(&blake, key);
+}
+
+/* Must hold peer->handshake.static_identity->lock */
+void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker)
+{
+ if (likely(checker->device->static_identity.has_identity)) {
+ precompute_key(checker->cookie_encryption_key,
+ checker->device->static_identity.static_public,
+ cookie_key_label);
+ precompute_key(checker->message_mac1_key,
+ checker->device->static_identity.static_public,
+ mac1_key_label);
+ } else {
+ memset(checker->cookie_encryption_key, 0,
+ NOISE_SYMMETRIC_KEY_LEN);
+ memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
+ }
+}
+
+void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer)
+{
+ precompute_key(peer->latest_cookie.cookie_decryption_key,
+ peer->handshake.remote_static, cookie_key_label);
+ precompute_key(peer->latest_cookie.message_mac1_key,
+ peer->handshake.remote_static, mac1_key_label);
+}
+
+void wg_cookie_init(struct cookie *cookie)
+{
+ memset(cookie, 0, sizeof(*cookie));
+ init_rwsem(&cookie->lock);
+}
+
+static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
+ const u8 key[NOISE_SYMMETRIC_KEY_LEN])
+{
+ len = len - sizeof(struct message_macs) +
+ offsetof(struct message_macs, mac1);
+ blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
+}
+
+static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
+ const u8 cookie[COOKIE_LEN])
+{
+ len = len - sizeof(struct message_macs) +
+ offsetof(struct message_macs, mac2);
+ blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
+}
+
+static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
+ struct cookie_checker *checker)
+{
+ struct blake2s_state state;
+
+ if (wg_birthdate_has_expired(checker->secret_birthdate,
+ COOKIE_SECRET_MAX_AGE)) {
+ down_write(&checker->secret_lock);
+ checker->secret_birthdate = ktime_get_coarse_boottime_ns();
+ get_random_bytes(checker->secret, NOISE_HASH_LEN);
+ up_write(&checker->secret_lock);
+ }
+
+ down_read(&checker->secret_lock);
+
+ blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
+ if (skb->protocol == htons(ETH_P_IP))
+ blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
+ sizeof(struct in_addr));
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
+ sizeof(struct in6_addr));
+ blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
+ blake2s_final(&state, cookie);
+
+ up_read(&checker->secret_lock);
+}
+
+enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
+ struct sk_buff *skb,
+ bool check_cookie)
+{
+ struct message_macs *macs = (struct message_macs *)
+ (skb->data + skb->len - sizeof(*macs));
+ enum cookie_mac_state ret;
+ u8 computed_mac[COOKIE_LEN];
+ u8 cookie[COOKIE_LEN];
+
+ ret = INVALID_MAC;
+ compute_mac1(computed_mac, skb->data, skb->len,
+ checker->message_mac1_key);
+ if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
+ goto out;
+
+ ret = VALID_MAC_BUT_NO_COOKIE;
+
+ if (!check_cookie)
+ goto out;
+
+ make_cookie(cookie, skb, checker);
+
+ compute_mac2(computed_mac, skb->data, skb->len, cookie);
+ if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
+ goto out;
+
+ ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
+ if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev)))
+ goto out;
+
+ ret = VALID_MAC_WITH_COOKIE;
+
+out:
+ return ret;
+}
+
+void wg_cookie_add_mac_to_packet(void *message, size_t len,
+ struct wg_peer *peer)
+{
+ struct message_macs *macs = (struct message_macs *)
+ ((u8 *)message + len - sizeof(*macs));
+
+ down_write(&peer->latest_cookie.lock);
+ compute_mac1(macs->mac1, message, len,
+ peer->latest_cookie.message_mac1_key);
+ memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
+ peer->latest_cookie.have_sent_mac1 = true;
+ up_write(&peer->latest_cookie.lock);
+
+ down_read(&peer->latest_cookie.lock);
+ if (peer->latest_cookie.is_valid &&
+ !wg_birthdate_has_expired(peer->latest_cookie.birthdate,
+ COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
+ compute_mac2(macs->mac2, message, len,
+ peer->latest_cookie.cookie);
+ else
+ memset(macs->mac2, 0, COOKIE_LEN);
+ up_read(&peer->latest_cookie.lock);
+}
+
+void wg_cookie_message_create(struct message_handshake_cookie *dst,
+ struct sk_buff *skb, __le32 index,
+ struct cookie_checker *checker)
+{
+ struct message_macs *macs = (struct message_macs *)
+ ((u8 *)skb->data + skb->len - sizeof(*macs));
+ u8 cookie[COOKIE_LEN];
+
+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
+ dst->receiver_index = index;
+ get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
+
+ make_cookie(cookie, skb, checker);
+ xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN,
+ macs->mac1, COOKIE_LEN, dst->nonce,
+ checker->cookie_encryption_key);
+}
+
+void wg_cookie_message_consume(struct message_handshake_cookie *src,
+ struct wg_device *wg)
+{
+ struct wg_peer *peer = NULL;
+ u8 cookie[COOKIE_LEN];
+ bool ret;
+
+ if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable,
+ INDEX_HASHTABLE_HANDSHAKE |
+ INDEX_HASHTABLE_KEYPAIR,
+ src->receiver_index, &peer)))
+ return;
+
+ down_read(&peer->latest_cookie.lock);
+ if (unlikely(!peer->latest_cookie.have_sent_mac1)) {
+ up_read(&peer->latest_cookie.lock);
+ goto out;
+ }
+ ret = xchacha20poly1305_decrypt(
+ cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie),
+ peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce,
+ peer->latest_cookie.cookie_decryption_key);
+ up_read(&peer->latest_cookie.lock);
+
+ if (ret) {
+ down_write(&peer->latest_cookie.lock);
+ memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN);
+ peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns();
+ peer->latest_cookie.is_valid = true;
+ peer->latest_cookie.have_sent_mac1 = false;
+ up_write(&peer->latest_cookie.lock);
+ } else {
+ net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n",
+ wg->dev->name);
+ }
+
+out:
+ wg_peer_put(peer);
+}
diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h
new file mode 100644
index 000000000000..c4bd61ca03f2
--- /dev/null
+++ b/drivers/net/wireguard/cookie.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_COOKIE_H
+#define _WG_COOKIE_H
+
+#include "messages.h"
+#include <linux/rwsem.h>
+
+struct wg_peer;
+
+struct cookie_checker {
+ u8 secret[NOISE_HASH_LEN];
+ u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
+ u64 secret_birthdate;
+ struct rw_semaphore secret_lock;
+ struct wg_device *device;
+};
+
+struct cookie {
+ u64 birthdate;
+ bool is_valid;
+ u8 cookie[COOKIE_LEN];
+ bool have_sent_mac1;
+ u8 last_mac1_sent[COOKIE_LEN];
+ u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
+ struct rw_semaphore lock;
+};
+
+enum cookie_mac_state {
+ INVALID_MAC,
+ VALID_MAC_BUT_NO_COOKIE,
+ VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
+ VALID_MAC_WITH_COOKIE
+};
+
+void wg_cookie_checker_init(struct cookie_checker *checker,
+ struct wg_device *wg);
+void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker);
+void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer);
+void wg_cookie_init(struct cookie *cookie);
+
+enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
+ struct sk_buff *skb,
+ bool check_cookie);
+void wg_cookie_add_mac_to_packet(void *message, size_t len,
+ struct wg_peer *peer);
+
+void wg_cookie_message_create(struct message_handshake_cookie *src,
+ struct sk_buff *skb, __le32 index,
+ struct cookie_checker *checker);
+void wg_cookie_message_consume(struct message_handshake_cookie *src,
+ struct wg_device *wg);
+
+#endif /* _WG_COOKIE_H */
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
new file mode 100644
index 000000000000..16b19824b9ad
--- /dev/null
+++ b/drivers/net/wireguard/device.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "socket.h"
+#include "timers.h"
+#include "device.h"
+#include "ratelimiter.h"
+#include "peer.h"
+#include "messages.h"
+
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+#include <linux/suspend.h>
+#include <net/icmp.h>
+#include <net/rtnetlink.h>
+#include <net/ip_tunnels.h>
+#include <net/addrconf.h>
+
+static LIST_HEAD(device_list);
+
+static int wg_open(struct net_device *dev)
+{
+ struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
+ struct inet6_dev *dev_v6 = __in6_dev_get(dev);
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
+ int ret;
+
+ if (dev_v4) {
+ /* At some point we might put this check near the ip_rt_send_
+ * redirect call of ip_forward in net/ipv4/ip_forward.c, similar
+ * to the current secpath check.
+ */
+ IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
+ IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
+ }
+ if (dev_v6)
+ dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
+
+ ret = wg_socket_init(wg, wg->incoming_port);
+ if (ret < 0)
+ return ret;
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+ wg_packet_send_staged_packets(peer);
+ if (peer->persistent_keepalive_interval)
+ wg_packet_send_keepalive(peer);
+ }
+ mutex_unlock(&wg->device_update_lock);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct wg_device *wg;
+ struct wg_peer *peer;
+
+ /* If the machine is constantly suspending and resuming, as part of
+ * its normal operation rather than as a somewhat rare event, then we
+ * don't actually want to clear keys.
+ */
+ if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID))
+ return 0;
+
+ if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
+ return 0;
+
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+ del_timer(&peer->timer_zero_key_material);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ }
+ mutex_unlock(&wg->device_update_lock);
+ }
+ rtnl_unlock();
+ rcu_barrier();
+ return 0;
+}
+
+static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
+#endif
+
+static int wg_stop(struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
+
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+ wg_packet_purge_staged_packets(peer);
+ wg_timers_stop(peer);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ }
+ mutex_unlock(&wg->device_update_lock);
+ skb_queue_purge(&wg->incoming_handshakes);
+ wg_socket_reinit(wg, NULL, NULL);
+ return 0;
+}
+
+static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ struct sk_buff_head packets;
+ struct wg_peer *peer;
+ struct sk_buff *next;
+ sa_family_t family;
+ u32 mtu;
+ int ret;
+
+ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
+ ret = -EPROTONOSUPPORT;
+ net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
+ goto err;
+ }
+
+ peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb);
+ if (unlikely(!peer)) {
+ ret = -ENOKEY;
+ if (skb->protocol == htons(ETH_P_IP))
+ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
+ dev->name, &ip_hdr(skb)->daddr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
+ dev->name, &ipv6_hdr(skb)->daddr);
+ goto err;
+ }
+
+ family = READ_ONCE(peer->endpoint.addr.sa_family);
+ if (unlikely(family != AF_INET && family != AF_INET6)) {
+ ret = -EDESTADDRREQ;
+ net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
+ dev->name, peer->internal_id);
+ goto err_peer;
+ }
+
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ __skb_queue_head_init(&packets);
+ if (!skb_is_gso(skb)) {
+ skb_mark_not_on_list(skb);
+ } else {
+ struct sk_buff *segs = skb_gso_segment(skb, 0);
+
+ if (unlikely(IS_ERR(segs))) {
+ ret = PTR_ERR(segs);
+ goto err_peer;
+ }
+ dev_kfree_skb(skb);
+ skb = segs;
+ }
+
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ continue;
+
+ /* We only need to keep the original dst around for icmp,
+ * so at this point we're in a position to drop it.
+ */
+ skb_dst_drop(skb);
+
+ PACKET_CB(skb)->mtu = mtu;
+
+ __skb_queue_tail(&packets, skb);
+ }
+
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ /* If the queue is getting too big, we start removing the oldest packets
+ * until it's small again. We do this before adding the new packet, so
+ * we don't remove GSO segments that are in excess.
+ */
+ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
+ dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
+ ++dev->stats.tx_dropped;
+ }
+ skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+
+ wg_packet_send_staged_packets(peer);
+
+ wg_peer_put(peer);
+ return NETDEV_TX_OK;
+
+err_peer:
+ wg_peer_put(peer);
+err:
+ ++dev->stats.tx_errors;
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+ kfree_skb(skb);
+ return ret;
+}
+
+static const struct net_device_ops netdev_ops = {
+ .ndo_open = wg_open,
+ .ndo_stop = wg_stop,
+ .ndo_start_xmit = wg_xmit,
+ .ndo_get_stats64 = ip_tunnel_get_stats64
+};
+
+static void wg_destruct(struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+
+ rtnl_lock();
+ list_del(&wg->device_list);
+ rtnl_unlock();
+ mutex_lock(&wg->device_update_lock);
+ wg->incoming_port = 0;
+ wg_socket_reinit(wg, NULL, NULL);
+ /* The final references are cleared in the below calls to destroy_workqueue. */
+ wg_peer_remove_all(wg);
+ destroy_workqueue(wg->handshake_receive_wq);
+ destroy_workqueue(wg->handshake_send_wq);
+ destroy_workqueue(wg->packet_crypt_wq);
+ wg_packet_queue_free(&wg->decrypt_queue, true);
+ wg_packet_queue_free(&wg->encrypt_queue, true);
+ rcu_barrier(); /* Wait for all the peers to be actually freed. */
+ wg_ratelimiter_uninit();
+ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
+ skb_queue_purge(&wg->incoming_handshakes);
+ free_percpu(dev->tstats);
+ free_percpu(wg->incoming_handshakes_worker);
+ if (wg->have_creating_net_ref)
+ put_net(wg->creating_net);
+ kvfree(wg->index_hashtable);
+ kvfree(wg->peer_hashtable);
+ mutex_unlock(&wg->device_update_lock);
+
+ pr_debug("%s: Interface deleted\n", dev->name);
+ free_netdev(dev);
+}
+
+static const struct device_type device_type = { .name = KBUILD_MODNAME };
+
+static void wg_setup(struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+ NETIF_F_SG | NETIF_F_GSO |
+ NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
+
+ dev->netdev_ops = &netdev_ops;
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
+ dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
+ dev->type = ARPHRD_NONE;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ dev->priv_flags |= IFF_NO_QUEUE;
+ dev->features |= NETIF_F_LLTX;
+ dev->features |= WG_NETDEV_FEATURES;
+ dev->hw_features |= WG_NETDEV_FEATURES;
+ dev->hw_enc_features |= WG_NETDEV_FEATURES;
+ dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
+ sizeof(struct udphdr) -
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
+
+ SET_NETDEV_DEVTYPE(dev, &device_type);
+
+ /* We need to keep the dst around in case of icmp replies. */
+ netif_keep_dst(dev);
+
+ memset(wg, 0, sizeof(*wg));
+ wg->dev = dev;
+}
+
+static int wg_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ int ret = -ENOMEM;
+
+ wg->creating_net = src_net;
+ init_rwsem(&wg->static_identity.lock);
+ mutex_init(&wg->socket_update_lock);
+ mutex_init(&wg->device_update_lock);
+ skb_queue_head_init(&wg->incoming_handshakes);
+ wg_allowedips_init(&wg->peer_allowedips);
+ wg_cookie_checker_init(&wg->cookie_checker, wg);
+ INIT_LIST_HEAD(&wg->peer_list);
+ wg->device_update_gen = 1;
+
+ wg->peer_hashtable = wg_pubkey_hashtable_alloc();
+ if (!wg->peer_hashtable)
+ return ret;
+
+ wg->index_hashtable = wg_index_hashtable_alloc();
+ if (!wg->index_hashtable)
+ goto err_free_peer_hashtable;
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ goto err_free_index_hashtable;
+
+ wg->incoming_handshakes_worker =
+ wg_packet_percpu_multicore_worker_alloc(
+ wg_packet_handshake_receive_worker, wg);
+ if (!wg->incoming_handshakes_worker)
+ goto err_free_tstats;
+
+ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
+ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_receive_wq)
+ goto err_free_incoming_handshakes;
+
+ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
+ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_send_wq)
+ goto err_destroy_handshake_receive;
+
+ wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
+ if (!wg->packet_crypt_wq)
+ goto err_destroy_handshake_send;
+
+ ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
+ true, MAX_QUEUED_PACKETS);
+ if (ret < 0)
+ goto err_destroy_packet_crypt;
+
+ ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
+ true, MAX_QUEUED_PACKETS);
+ if (ret < 0)
+ goto err_free_encrypt_queue;
+
+ ret = wg_ratelimiter_init();
+ if (ret < 0)
+ goto err_free_decrypt_queue;
+
+ ret = register_netdevice(dev);
+ if (ret < 0)
+ goto err_uninit_ratelimiter;
+
+ list_add(&wg->device_list, &device_list);
+
+ /* We wait until the end to assign priv_destructor, so that
+ * register_netdevice doesn't call it for us if it fails.
+ */
+ dev->priv_destructor = wg_destruct;
+
+ pr_debug("%s: Interface created\n", dev->name);
+ return ret;
+
+err_uninit_ratelimiter:
+ wg_ratelimiter_uninit();
+err_free_decrypt_queue:
+ wg_packet_queue_free(&wg->decrypt_queue, true);
+err_free_encrypt_queue:
+ wg_packet_queue_free(&wg->encrypt_queue, true);
+err_destroy_packet_crypt:
+ destroy_workqueue(wg->packet_crypt_wq);
+err_destroy_handshake_send:
+ destroy_workqueue(wg->handshake_send_wq);
+err_destroy_handshake_receive:
+ destroy_workqueue(wg->handshake_receive_wq);
+err_free_incoming_handshakes:
+ free_percpu(wg->incoming_handshakes_worker);
+err_free_tstats:
+ free_percpu(dev->tstats);
+err_free_index_hashtable:
+ kvfree(wg->index_hashtable);
+err_free_peer_hashtable:
+ kvfree(wg->peer_hashtable);
+ return ret;
+}
+
+static struct rtnl_link_ops link_ops __read_mostly = {
+ .kind = KBUILD_MODNAME,
+ .priv_size = sizeof(struct wg_device),
+ .setup = wg_setup,
+ .newlink = wg_newlink,
+};
+
+static int wg_netdevice_notification(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
+ struct wg_device *wg = netdev_priv(dev);
+
+ ASSERT_RTNL();
+
+ if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
+ return 0;
+
+ if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
+ put_net(wg->creating_net);
+ wg->have_creating_net_ref = false;
+ } else if (dev_net(dev) != wg->creating_net &&
+ !wg->have_creating_net_ref) {
+ wg->have_creating_net_ref = true;
+ get_net(wg->creating_net);
+ }
+ return 0;
+}
+
+static struct notifier_block netdevice_notifier = {
+ .notifier_call = wg_netdevice_notification
+};
+
+int __init wg_device_init(void)
+{
+ int ret;
+
+#ifdef CONFIG_PM_SLEEP
+ ret = register_pm_notifier(&pm_notifier);
+ if (ret)
+ return ret;
+#endif
+
+ ret = register_netdevice_notifier(&netdevice_notifier);
+ if (ret)
+ goto error_pm;
+
+ ret = rtnl_link_register(&link_ops);
+ if (ret)
+ goto error_netdevice;
+
+ return 0;
+
+error_netdevice:
+ unregister_netdevice_notifier(&netdevice_notifier);
+error_pm:
+#ifdef CONFIG_PM_SLEEP
+ unregister_pm_notifier(&pm_notifier);
+#endif
+ return ret;
+}
+
+void wg_device_uninit(void)
+{
+ rtnl_link_unregister(&link_ops);
+ unregister_netdevice_notifier(&netdevice_notifier);
+#ifdef CONFIG_PM_SLEEP
+ unregister_pm_notifier(&pm_notifier);
+#endif
+ rcu_barrier();
+}
diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
new file mode 100644
index 000000000000..c91f3051c5c7
--- /dev/null
+++ b/drivers/net/wireguard/device.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_DEVICE_H
+#define _WG_DEVICE_H
+
+#include "noise.h"
+#include "allowedips.h"
+#include "peerlookup.h"
+#include "cookie.h"
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/ptr_ring.h>
+
+struct wg_device;
+
+struct multicore_worker {
+ void *ptr;
+ struct work_struct work;
+};
+
+struct crypt_queue {
+ struct ptr_ring ring;
+ union {
+ struct {
+ struct multicore_worker __percpu *worker;
+ int last_cpu;
+ };
+ struct work_struct work;
+ };
+};
+
+struct wg_device {
+ struct net_device *dev;
+ struct crypt_queue encrypt_queue, decrypt_queue;
+ struct sock __rcu *sock4, *sock6;
+ struct net *creating_net;
+ struct noise_static_identity static_identity;
+ struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
+ struct workqueue_struct *packet_crypt_wq;
+ struct sk_buff_head incoming_handshakes;
+ int incoming_handshake_cpu;
+ struct multicore_worker __percpu *incoming_handshakes_worker;
+ struct cookie_checker cookie_checker;
+ struct pubkey_hashtable *peer_hashtable;
+ struct index_hashtable *index_hashtable;
+ struct allowedips peer_allowedips;
+ struct mutex device_update_lock, socket_update_lock;
+ struct list_head device_list, peer_list;
+ unsigned int num_peers, device_update_gen;
+ u32 fwmark;
+ u16 incoming_port;
+ bool have_creating_net_ref;
+};
+
+int wg_device_init(void);
+void wg_device_uninit(void);
+
+/* Later after the dust settles, this can be moved into include/linux/skbuff.h,
+ * where virtually all code that deals with GSO segs can benefit, around ~30
+ * drivers as of writing.
+ */
+#define skb_list_walk_safe(first, skb, next) \
+ for (skb = first, next = skb->next; skb; \
+ skb = next, next = skb ? skb->next : NULL)
+
+#endif /* _WG_DEVICE_H */
diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
new file mode 100644
index 000000000000..10c0a40f6a9e
--- /dev/null
+++ b/drivers/net/wireguard/main.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "version.h"
+#include "device.h"
+#include "noise.h"
+#include "queueing.h"
+#include "ratelimiter.h"
+#include "netlink.h"
+
+#include <uapi/linux/wireguard.h>
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/genetlink.h>
+#include <net/rtnetlink.h>
+
+static int __init mod_init(void)
+{
+ int ret;
+
+#ifdef DEBUG
+ if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
+ !wg_ratelimiter_selftest())
+ return -ENOTRECOVERABLE;
+#endif
+ wg_noise_init();
+
+ ret = wg_device_init();
+ if (ret < 0)
+ goto err_device;
+
+ ret = wg_genetlink_init();
+ if (ret < 0)
+ goto err_netlink;
+
+ pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
+ pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n");
+
+ return 0;
+
+err_netlink:
+ wg_device_uninit();
+err_device:
+ return ret;
+}
+
+static void __exit mod_exit(void)
+{
+ wg_genetlink_uninit();
+ wg_device_uninit();
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("WireGuard secure network tunnel");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_VERSION(WIREGUARD_VERSION);
+MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
+MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h
new file mode 100644
index 000000000000..b8a7b9ce32ba
--- /dev/null
+++ b/drivers/net/wireguard/messages.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_MESSAGES_H
+#define _WG_MESSAGES_H
+
+#include <crypto/curve25519.h>
+#include <crypto/chacha20poly1305.h>
+#include <crypto/blake2s.h>
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/skbuff.h>
+
+enum noise_lengths {
+ NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
+ NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
+ NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
+ NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
+ NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
+};
+
+#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
+
+enum cookie_values {
+ COOKIE_SECRET_MAX_AGE = 2 * 60,
+ COOKIE_SECRET_LATENCY = 5,
+ COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
+ COOKIE_LEN = 16
+};
+
+enum counter_values {
+ COUNTER_BITS_TOTAL = 2048,
+ COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
+ COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
+};
+
+enum limits {
+ REKEY_AFTER_MESSAGES = 1ULL << 60,
+ REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
+ REKEY_TIMEOUT = 5,
+ REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3,
+ REKEY_AFTER_TIME = 120,
+ REJECT_AFTER_TIME = 180,
+ INITIATIONS_PER_SECOND = 50,
+ MAX_PEERS_PER_DEVICE = 1U << 20,
+ KEEPALIVE_TIMEOUT = 10,
+ MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
+ MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
+ MAX_STAGED_PACKETS = 128,
+ MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
+};
+
+enum message_type {
+ MESSAGE_INVALID = 0,
+ MESSAGE_HANDSHAKE_INITIATION = 1,
+ MESSAGE_HANDSHAKE_RESPONSE = 2,
+ MESSAGE_HANDSHAKE_COOKIE = 3,
+ MESSAGE_DATA = 4
+};
+
+struct message_header {
+ /* The actual layout of this that we want is:
+ * u8 type
+ * u8 reserved_zero[3]
+ *
+ * But it turns out that by encoding this as little endian,
+ * we achieve the same thing, and it makes checking faster.
+ */
+ __le32 type;
+};
+
+struct message_macs {
+ u8 mac1[COOKIE_LEN];
+ u8 mac2[COOKIE_LEN];
+};
+
+struct message_handshake_initiation {
+ struct message_header header;
+ __le32 sender_index;
+ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
+ u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
+ u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
+ struct message_macs macs;
+};
+
+struct message_handshake_response {
+ struct message_header header;
+ __le32 sender_index;
+ __le32 receiver_index;
+ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
+ u8 encrypted_nothing[noise_encrypted_len(0)];
+ struct message_macs macs;
+};
+
+struct message_handshake_cookie {
+ struct message_header header;
+ __le32 receiver_index;
+ u8 nonce[COOKIE_NONCE_LEN];
+ u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
+};
+
+struct message_data {
+ struct message_header header;
+ __le32 key_idx;
+ __le64 counter;
+ u8 encrypted_data[];
+};
+
+#define message_data_len(plain_len) \
+ (noise_encrypted_len(plain_len) + sizeof(struct message_data))
+
+enum message_alignments {
+ MESSAGE_PADDING_MULTIPLE = 16,
+ MESSAGE_MINIMUM_LENGTH = message_data_len(0)
+};
+
+#define SKB_HEADER_LEN \
+ (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \
+ sizeof(struct udphdr) + NET_SKB_PAD)
+#define DATA_PACKET_HEAD_ROOM \
+ ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
+
+enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ };
+
+#endif /* _WG_MESSAGES_H */
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
new file mode 100644
index 000000000000..0739a2cd1920
--- /dev/null
+++ b/drivers/net/wireguard/netlink.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "netlink.h"
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "queueing.h"
+#include "messages.h"
+
+#include <uapi/linux/wireguard.h>
+
+#include <linux/if.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include <crypto/algapi.h>
+
+static struct genl_family genl_family;
+
+static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
+ [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 },
+ [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGDEVICE_A_FLAGS] = { .type = NLA_U32 },
+ [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 },
+ [WGDEVICE_A_FWMARK] = { .type = NLA_U32 },
+ [WGDEVICE_A_PEERS] = { .type = NLA_NESTED }
+};
+
+static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
+ [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN },
+ [WGPEER_A_FLAGS] = { .type = NLA_U32 },
+ [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) },
+ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 },
+ [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) },
+ [WGPEER_A_RX_BYTES] = { .type = NLA_U64 },
+ [WGPEER_A_TX_BYTES] = { .type = NLA_U64 },
+ [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED },
+ [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 }
+};
+
+static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
+ [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 },
+ [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) },
+ [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }
+};
+
+static struct wg_device *lookup_interface(struct nlattr **attrs,
+ struct sk_buff *skb)
+{
+ struct net_device *dev = NULL;
+
+ if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
+ return ERR_PTR(-EBADR);
+ if (attrs[WGDEVICE_A_IFINDEX])
+ dev = dev_get_by_index(sock_net(skb->sk),
+ nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
+ else if (attrs[WGDEVICE_A_IFNAME])
+ dev = dev_get_by_name(sock_net(skb->sk),
+ nla_data(attrs[WGDEVICE_A_IFNAME]));
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+ if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind ||
+ strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
+ dev_put(dev);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ return netdev_priv(dev);
+}
+
+static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr,
+ int family)
+{
+ struct nlattr *allowedip_nest;
+
+ allowedip_nest = nla_nest_start(skb, 0);
+ if (!allowedip_nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) ||
+ nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) ||
+ nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ?
+ sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
+ nla_nest_cancel(skb, allowedip_nest);
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, allowedip_nest);
+ return 0;
+}
+
+struct dump_ctx {
+ struct wg_device *wg;
+ struct wg_peer *next_peer;
+ u64 allowedips_seq;
+ struct allowedips_node *next_allowedip;
+};
+
+#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args)
+
+static int
+get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
+{
+
+ struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0);
+ struct allowedips_node *allowedips_node = ctx->next_allowedip;
+ bool fail;
+
+ if (!peer_nest)
+ return -EMSGSIZE;
+
+ down_read(&peer->handshake.lock);
+ fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN,
+ peer->handshake.remote_static);
+ up_read(&peer->handshake.lock);
+ if (fail)
+ goto err;
+
+ if (!allowedips_node) {
+ const struct __kernel_timespec last_handshake = {
+ .tv_sec = peer->walltime_last_handshake.tv_sec,
+ .tv_nsec = peer->walltime_last_handshake.tv_nsec
+ };
+
+ down_read(&peer->handshake.lock);
+ fail = nla_put(skb, WGPEER_A_PRESHARED_KEY,
+ NOISE_SYMMETRIC_KEY_LEN,
+ peer->handshake.preshared_key);
+ up_read(&peer->handshake.lock);
+ if (fail)
+ goto err;
+
+ if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
+ sizeof(last_handshake), &last_handshake) ||
+ nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ peer->persistent_keepalive_interval) ||
+ nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
+ WGPEER_A_UNSPEC) ||
+ nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
+ WGPEER_A_UNSPEC) ||
+ nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
+ goto err;
+
+ read_lock_bh(&peer->endpoint_lock);
+ if (peer->endpoint.addr.sa_family == AF_INET)
+ fail = nla_put(skb, WGPEER_A_ENDPOINT,
+ sizeof(peer->endpoint.addr4),
+ &peer->endpoint.addr4);
+ else if (peer->endpoint.addr.sa_family == AF_INET6)
+ fail = nla_put(skb, WGPEER_A_ENDPOINT,
+ sizeof(peer->endpoint.addr6),
+ &peer->endpoint.addr6);
+ read_unlock_bh(&peer->endpoint_lock);
+ if (fail)
+ goto err;
+ allowedips_node =
+ list_first_entry_or_null(&peer->allowedips_list,
+ struct allowedips_node, peer_list);
+ }
+ if (!allowedips_node)
+ goto no_allowedips;
+ if (!ctx->allowedips_seq)
+ ctx->allowedips_seq = peer->device->peer_allowedips.seq;
+ else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq)
+ goto no_allowedips;
+
+ allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
+ if (!allowedips_nest)
+ goto err;
+
+ list_for_each_entry_from(allowedips_node, &peer->allowedips_list,
+ peer_list) {
+ u8 cidr, ip[16] __aligned(__alignof(u64));
+ int family;
+
+ family = wg_allowedips_read_node(allowedips_node, ip, &cidr);
+ if (get_allowedips(skb, ip, cidr, family)) {
+ nla_nest_end(skb, allowedips_nest);
+ nla_nest_end(skb, peer_nest);
+ ctx->next_allowedip = allowedips_node;
+ return -EMSGSIZE;
+ }
+ }
+ nla_nest_end(skb, allowedips_nest);
+no_allowedips:
+ nla_nest_end(skb, peer_nest);
+ ctx->next_allowedip = NULL;
+ ctx->allowedips_seq = 0;
+ return 0;
+err:
+ nla_nest_cancel(skb, peer_nest);
+ return -EMSGSIZE;
+}
+
+static int wg_get_device_start(struct netlink_callback *cb)
+{
+ struct nlattr **attrs = genl_family_attrbuf(&genl_family);
+ struct wg_device *wg;
+ int ret;
+
+ ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, attrs,
+ genl_family.maxattr, device_policy, NULL);
+ if (ret < 0)
+ return ret;
+ wg = lookup_interface(attrs, cb->skb);
+ if (IS_ERR(wg))
+ return PTR_ERR(wg);
+ DUMP_CTX(cb)->wg = wg;
+ return 0;
+}
+
+static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct wg_peer *peer, *next_peer_cursor;
+ struct dump_ctx *ctx = DUMP_CTX(cb);
+ struct wg_device *wg = ctx->wg;
+ struct nlattr *peers_nest;
+ int ret = -EMSGSIZE;
+ bool done = true;
+ void *hdr;
+
+ rtnl_lock();
+ mutex_lock(&wg->device_update_lock);
+ cb->seq = wg->device_update_gen;
+ next_peer_cursor = ctx->next_peer;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
+ if (!hdr)
+ goto out;
+ genl_dump_check_consistent(cb, hdr);
+
+ if (!ctx->next_peer) {
+ if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT,
+ wg->incoming_port) ||
+ nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) ||
+ nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) ||
+ nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
+ goto out;
+
+ down_read(&wg->static_identity.lock);
+ if (wg->static_identity.has_identity) {
+ if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY,
+ NOISE_PUBLIC_KEY_LEN,
+ wg->static_identity.static_private) ||
+ nla_put(skb, WGDEVICE_A_PUBLIC_KEY,
+ NOISE_PUBLIC_KEY_LEN,
+ wg->static_identity.static_public)) {
+ up_read(&wg->static_identity.lock);
+ goto out;
+ }
+ }
+ up_read(&wg->static_identity.lock);
+ }
+
+ peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
+ if (!peers_nest)
+ goto out;
+ ret = 0;
+ /* If the last cursor was removed via list_del_init in peer_remove, then
+ * we just treat this the same as there being no more peers left. The
+ * reason is that seq_nr should indicate to userspace that this isn't a
+ * coherent dump anyway, so they'll try again.
+ */
+ if (list_empty(&wg->peer_list) ||
+ (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) {
+ nla_nest_cancel(skb, peers_nest);
+ goto out;
+ }
+ lockdep_assert_held(&wg->device_update_lock);
+ peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list);
+ list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
+ if (get_peer(peer, skb, ctx)) {
+ done = false;
+ break;
+ }
+ next_peer_cursor = peer;
+ }
+ nla_nest_end(skb, peers_nest);
+
+out:
+ if (!ret && !done && next_peer_cursor)
+ wg_peer_get(next_peer_cursor);
+ wg_peer_put(ctx->next_peer);
+ mutex_unlock(&wg->device_update_lock);
+ rtnl_unlock();
+
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ return ret;
+ }
+ genlmsg_end(skb, hdr);
+ if (done) {
+ ctx->next_peer = NULL;
+ return 0;
+ }
+ ctx->next_peer = next_peer_cursor;
+ return skb->len;
+
+ /* At this point, we can't really deal ourselves with safely zeroing out
+ * the private key material after usage. This will need an additional API
+ * in the kernel for marking skbs as zero_on_free.
+ */
+}
+
+static int wg_get_device_done(struct netlink_callback *cb)
+{
+ struct dump_ctx *ctx = DUMP_CTX(cb);
+
+ if (ctx->wg)
+ dev_put(ctx->wg->dev);
+ wg_peer_put(ctx->next_peer);
+ return 0;
+}
+
+static int set_port(struct wg_device *wg, u16 port)
+{
+ struct wg_peer *peer;
+
+ if (wg->incoming_port == port)
+ return 0;
+ list_for_each_entry(peer, &wg->peer_list, peer_list)
+ wg_socket_clear_peer_endpoint_src(peer);
+ if (!netif_running(wg->dev)) {
+ wg->incoming_port = port;
+ return 0;
+ }
+ return wg_socket_init(wg, port);
+}
+
+static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs)
+{
+ int ret = -EINVAL;
+ u16 family;
+ u8 cidr;
+
+ if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] ||
+ !attrs[WGALLOWEDIP_A_CIDR_MASK])
+ return ret;
+ family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
+ cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
+
+ if (family == AF_INET && cidr <= 32 &&
+ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
+ ret = wg_allowedips_insert_v4(
+ &peer->device->peer_allowedips,
+ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
+ &peer->device->device_update_lock);
+ else if (family == AF_INET6 && cidr <= 128 &&
+ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
+ ret = wg_allowedips_insert_v6(
+ &peer->device->peer_allowedips,
+ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
+ &peer->device->device_update_lock);
+
+ return ret;
+}
+
+static int set_peer(struct wg_device *wg, struct nlattr **attrs)
+{
+ u8 *public_key = NULL, *preshared_key = NULL;
+ struct wg_peer *peer = NULL;
+ u32 flags = 0;
+ int ret;
+
+ ret = -EINVAL;
+ if (attrs[WGPEER_A_PUBLIC_KEY] &&
+ nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
+ public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
+ else
+ goto out;
+ if (attrs[WGPEER_A_PRESHARED_KEY] &&
+ nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
+ preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
+
+ if (attrs[WGPEER_A_FLAGS])
+ flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
+ ret = -EOPNOTSUPP;
+ if (flags & ~__WGPEER_F_ALL)
+ goto out;
+
+ ret = -EPFNOSUPPORT;
+ if (attrs[WGPEER_A_PROTOCOL_VERSION]) {
+ if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1)
+ goto out;
+ }
+
+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
+ nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
+ ret = 0;
+ if (!peer) { /* Peer doesn't exist yet. Add a new one. */
+ if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY))
+ goto out;
+
+ /* The peer is new, so there aren't allowed IPs to remove. */
+ flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS;
+
+ down_read(&wg->static_identity.lock);
+ if (wg->static_identity.has_identity &&
+ !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]),
+ wg->static_identity.static_public,
+ NOISE_PUBLIC_KEY_LEN)) {
+ /* We silently ignore peers that have the same public
+ * key as the device. The reason we do it silently is
+ * that we'd like for people to be able to reuse the
+ * same set of API calls across peers.
+ */
+ up_read(&wg->static_identity.lock);
+ ret = 0;
+ goto out;
+ }
+ up_read(&wg->static_identity.lock);
+
+ peer = wg_peer_create(wg, public_key, preshared_key);
+ if (IS_ERR(peer)) {
+ /* Similar to the above, if the key is invalid, we skip
+ * it without fanfare, so that services don't need to
+ * worry about doing key validation themselves.
+ */
+ ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
+ peer = NULL;
+ goto out;
+ }
+ /* Take additional reference, as though we've just been
+ * looked up.
+ */
+ wg_peer_get(peer);
+ }
+
+ if (flags & WGPEER_F_REMOVE_ME) {
+ wg_peer_remove(peer);
+ goto out;
+ }
+
+ if (preshared_key) {
+ down_write(&peer->handshake.lock);
+ memcpy(&peer->handshake.preshared_key, preshared_key,
+ NOISE_SYMMETRIC_KEY_LEN);
+ up_write(&peer->handshake.lock);
+ }
+
+ if (attrs[WGPEER_A_ENDPOINT]) {
+ struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
+ size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
+
+ if ((len == sizeof(struct sockaddr_in) &&
+ addr->sa_family == AF_INET) ||
+ (len == sizeof(struct sockaddr_in6) &&
+ addr->sa_family == AF_INET6)) {
+ struct endpoint endpoint = { { { 0 } } };
+
+ memcpy(&endpoint.addr, addr, len);
+ wg_socket_set_peer_endpoint(peer, &endpoint);
+ }
+ }
+
+ if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
+ wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer,
+ &wg->device_update_lock);
+
+ if (attrs[WGPEER_A_ALLOWEDIPS]) {
+ struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
+ int rem;
+
+ nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
+ ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX,
+ attr, allowedip_policy, NULL);
+ if (ret < 0)
+ goto out;
+ ret = set_allowedip(peer, allowedip);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
+ const u16 persistent_keepalive_interval = nla_get_u16(
+ attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
+ const bool send_keepalive =
+ !peer->persistent_keepalive_interval &&
+ persistent_keepalive_interval &&
+ netif_running(wg->dev);
+
+ peer->persistent_keepalive_interval = persistent_keepalive_interval;
+ if (send_keepalive)
+ wg_packet_send_keepalive(peer);
+ }
+
+ if (netif_running(wg->dev))
+ wg_packet_send_staged_packets(peer);
+
+out:
+ wg_peer_put(peer);
+ if (attrs[WGPEER_A_PRESHARED_KEY])
+ memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]),
+ nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
+ return ret;
+}
+
+static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
+{
+ struct wg_device *wg = lookup_interface(info->attrs, skb);
+ u32 flags = 0;
+ int ret;
+
+ if (IS_ERR(wg)) {
+ ret = PTR_ERR(wg);
+ goto out_nodev;
+ }
+
+ rtnl_lock();
+ mutex_lock(&wg->device_update_lock);
+
+ if (info->attrs[WGDEVICE_A_FLAGS])
+ flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]);
+ ret = -EOPNOTSUPP;
+ if (flags & ~__WGDEVICE_F_ALL)
+ goto out;
+
+ ret = -EPERM;
+ if ((info->attrs[WGDEVICE_A_LISTEN_PORT] ||
+ info->attrs[WGDEVICE_A_FWMARK]) &&
+ !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+
+ ++wg->device_update_gen;
+
+ if (info->attrs[WGDEVICE_A_FWMARK]) {
+ struct wg_peer *peer;
+
+ wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
+ list_for_each_entry(peer, &wg->peer_list, peer_list)
+ wg_socket_clear_peer_endpoint_src(peer);
+ }
+
+ if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
+ ret = set_port(wg,
+ nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
+ if (ret)
+ goto out;
+ }
+
+ if (flags & WGDEVICE_F_REPLACE_PEERS)
+ wg_peer_remove_all(wg);
+
+ if (info->attrs[WGDEVICE_A_PRIVATE_KEY] &&
+ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) ==
+ NOISE_PUBLIC_KEY_LEN) {
+ u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
+ u8 public_key[NOISE_PUBLIC_KEY_LEN];
+ struct wg_peer *peer, *temp;
+
+ if (!crypto_memneq(wg->static_identity.static_private,
+ private_key, NOISE_PUBLIC_KEY_LEN))
+ goto skip_set_private_key;
+
+ /* We remove before setting, to prevent race, which means doing
+ * two 25519-genpub ops.
+ */
+ if (curve25519_generate_public(public_key, private_key)) {
+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
+ public_key);
+ if (peer) {
+ wg_peer_put(peer);
+ wg_peer_remove(peer);
+ }
+ }
+
+ down_write(&wg->static_identity.lock);
+ wg_noise_set_static_identity_private_key(&wg->static_identity,
+ private_key);
+ list_for_each_entry_safe(peer, temp, &wg->peer_list,
+ peer_list) {
+ if (wg_noise_precompute_static_static(peer))
+ wg_noise_expire_current_peer_keypairs(peer);
+ else
+ wg_peer_remove(peer);
+ }
+ wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
+ up_write(&wg->static_identity.lock);
+ }
+skip_set_private_key:
+
+ if (info->attrs[WGDEVICE_A_PEERS]) {
+ struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
+ ret = nla_parse_nested(peer, WGPEER_A_MAX, attr,
+ peer_policy, NULL);
+ if (ret < 0)
+ goto out;
+ ret = set_peer(wg, peer);
+ if (ret < 0)
+ goto out;
+ }
+ }
+ ret = 0;
+
+out:
+ mutex_unlock(&wg->device_update_lock);
+ rtnl_unlock();
+ dev_put(wg->dev);
+out_nodev:
+ if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
+ memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]),
+ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
+ return ret;
+}
+
+static const struct genl_ops genl_ops[] = {
+ {
+ .cmd = WG_CMD_GET_DEVICE,
+ .start = wg_get_device_start,
+ .dumpit = wg_get_device_dump,
+ .done = wg_get_device_done,
+ .flags = GENL_UNS_ADMIN_PERM
+ }, {
+ .cmd = WG_CMD_SET_DEVICE,
+ .doit = wg_set_device,
+ .flags = GENL_UNS_ADMIN_PERM
+ }
+};
+
+static struct genl_family genl_family __ro_after_init = {
+ .ops = genl_ops,
+ .n_ops = ARRAY_SIZE(genl_ops),
+ .name = WG_GENL_NAME,
+ .version = WG_GENL_VERSION,
+ .maxattr = WGDEVICE_A_MAX,
+ .module = THIS_MODULE,
+ .policy = device_policy,
+ .netnsok = true
+};
+
+int __init wg_genetlink_init(void)
+{
+ return genl_register_family(&genl_family);
+}
+
+void __exit wg_genetlink_uninit(void)
+{
+ genl_unregister_family(&genl_family);
+}
diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h
new file mode 100644
index 000000000000..15100d92e2e3
--- /dev/null
+++ b/drivers/net/wireguard/netlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_NETLINK_H
+#define _WG_NETLINK_H
+
+int wg_genetlink_init(void);
+void wg_genetlink_uninit(void);
+
+#endif /* _WG_NETLINK_H */
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
new file mode 100644
index 000000000000..d71c8db68a8c
--- /dev/null
+++ b/drivers/net/wireguard/noise.c
@@ -0,0 +1,828 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "noise.h"
+#include "device.h"
+#include "peer.h"
+#include "messages.h"
+#include "queueing.h"
+#include "peerlookup.h"
+
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <crypto/algapi.h>
+
+/* This implements Noise_IKpsk2:
+ *
+ * <- s
+ * ******
+ * -> e, es, s, ss, {t}
+ * <- e, ee, se, psk, {}
+ */
+
+static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
+static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com";
+static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
+static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
+static atomic64_t keypair_counter = ATOMIC64_INIT(0);
+
+void __init wg_noise_init(void)
+{
+ struct blake2s_state blake;
+
+ blake2s(handshake_init_chaining_key, handshake_name, NULL,
+ NOISE_HASH_LEN, sizeof(handshake_name), 0);
+ blake2s_init(&blake, NOISE_HASH_LEN);
+ blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
+ blake2s_update(&blake, identifier_name, sizeof(identifier_name));
+ blake2s_final(&blake, handshake_init_hash);
+}
+
+/* Must hold peer->handshake.static_identity->lock */
+bool wg_noise_precompute_static_static(struct wg_peer *peer)
+{
+ bool ret = true;
+
+ down_write(&peer->handshake.lock);
+ if (peer->handshake.static_identity->has_identity)
+ ret = curve25519(
+ peer->handshake.precomputed_static_static,
+ peer->handshake.static_identity->static_private,
+ peer->handshake.remote_static);
+ else
+ memset(peer->handshake.precomputed_static_static, 0,
+ NOISE_PUBLIC_KEY_LEN);
+ up_write(&peer->handshake.lock);
+ return ret;
+}
+
+bool wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer)
+{
+ memset(handshake, 0, sizeof(*handshake));
+ init_rwsem(&handshake->lock);
+ handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
+ handshake->entry.peer = peer;
+ memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
+ if (peer_preshared_key)
+ memcpy(handshake->preshared_key, peer_preshared_key,
+ NOISE_SYMMETRIC_KEY_LEN);
+ handshake->static_identity = static_identity;
+ handshake->state = HANDSHAKE_ZEROED;
+ return wg_noise_precompute_static_static(peer);
+}
+
+static void handshake_zero(struct noise_handshake *handshake)
+{
+ memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
+ memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
+ memset(&handshake->hash, 0, NOISE_HASH_LEN);
+ memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
+ handshake->remote_index = 0;
+ handshake->state = HANDSHAKE_ZEROED;
+}
+
+void wg_noise_handshake_clear(struct noise_handshake *handshake)
+{
+ wg_index_hashtable_remove(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+ down_write(&handshake->lock);
+ handshake_zero(handshake);
+ up_write(&handshake->lock);
+ wg_index_hashtable_remove(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+}
+
+static struct noise_keypair *keypair_create(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL);
+
+ if (unlikely(!keypair))
+ return NULL;
+ keypair->internal_id = atomic64_inc_return(&keypair_counter);
+ keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
+ keypair->entry.peer = peer;
+ kref_init(&keypair->refcount);
+ return keypair;
+}
+
+static void keypair_free_rcu(struct rcu_head *rcu)
+{
+ kzfree(container_of(rcu, struct noise_keypair, rcu));
+}
+
+static void keypair_free_kref(struct kref *kref)
+{
+ struct noise_keypair *keypair =
+ container_of(kref, struct noise_keypair, refcount);
+
+ net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n",
+ keypair->entry.peer->device->dev->name,
+ keypair->internal_id,
+ keypair->entry.peer->internal_id);
+ wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable,
+ &keypair->entry);
+ call_rcu(&keypair->rcu, keypair_free_rcu);
+}
+
+void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now)
+{
+ if (unlikely(!keypair))
+ return;
+ if (unlikely(unreference_now))
+ wg_index_hashtable_remove(
+ keypair->entry.peer->device->index_hashtable,
+ &keypair->entry);
+ kref_put(&keypair->refcount, keypair_free_kref);
+}
+
+struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
+ "Taking noise keypair reference without holding the RCU BH read lock");
+ if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
+ return NULL;
+ return keypair;
+}
+
+void wg_noise_keypairs_clear(struct noise_keypairs *keypairs)
+{
+ struct noise_keypair *old;
+
+ spin_lock_bh(&keypairs->keypair_update_lock);
+
+ /* We zero the next_keypair before zeroing the others, so that
+ * wg_noise_received_with_keypair returns early before subsequent ones
+ * are zeroed.
+ */
+ old = rcu_dereference_protected(keypairs->next_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
+ wg_noise_keypair_put(old, true);
+
+ old = rcu_dereference_protected(keypairs->previous_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
+ wg_noise_keypair_put(old, true);
+
+ old = rcu_dereference_protected(keypairs->current_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ RCU_INIT_POINTER(keypairs->current_keypair, NULL);
+ wg_noise_keypair_put(old, true);
+
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+}
+
+void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair;
+
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+
+ spin_lock_bh(&peer->keypairs.keypair_update_lock);
+ keypair = rcu_dereference_protected(peer->keypairs.next_keypair,
+ lockdep_is_held(&peer->keypairs.keypair_update_lock));
+ if (keypair)
+ keypair->sending.is_valid = false;
+ keypair = rcu_dereference_protected(peer->keypairs.current_keypair,
+ lockdep_is_held(&peer->keypairs.keypair_update_lock));
+ if (keypair)
+ keypair->sending.is_valid = false;
+ spin_unlock_bh(&peer->keypairs.keypair_update_lock);
+}
+
+static void add_new_keypair(struct noise_keypairs *keypairs,
+ struct noise_keypair *new_keypair)
+{
+ struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
+
+ spin_lock_bh(&keypairs->keypair_update_lock);
+ previous_keypair = rcu_dereference_protected(keypairs->previous_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ next_keypair = rcu_dereference_protected(keypairs->next_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ current_keypair = rcu_dereference_protected(keypairs->current_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ if (new_keypair->i_am_the_initiator) {
+ /* If we're the initiator, it means we've sent a handshake, and
+ * received a confirmation response, which means this new
+ * keypair can now be used.
+ */
+ if (next_keypair) {
+ /* If there already was a next keypair pending, we
+ * demote it to be the previous keypair, and free the
+ * existing current. Note that this means KCI can result
+ * in this transition. It would perhaps be more sound to
+ * always just get rid of the unused next keypair
+ * instead of putting it in the previous slot, but this
+ * might be a bit less robust. Something to think about
+ * for the future.
+ */
+ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
+ rcu_assign_pointer(keypairs->previous_keypair,
+ next_keypair);
+ wg_noise_keypair_put(current_keypair, true);
+ } else /* If there wasn't an existing next keypair, we replace
+ * the previous with the current one.
+ */
+ rcu_assign_pointer(keypairs->previous_keypair,
+ current_keypair);
+ /* At this point we can get rid of the old previous keypair, and
+ * set up the new keypair.
+ */
+ wg_noise_keypair_put(previous_keypair, true);
+ rcu_assign_pointer(keypairs->current_keypair, new_keypair);
+ } else {
+ /* If we're the responder, it means we can't use the new keypair
+ * until we receive confirmation via the first data packet, so
+ * we get rid of the existing previous one, the possibly
+ * existing next one, and slide in the new next one.
+ */
+ rcu_assign_pointer(keypairs->next_keypair, new_keypair);
+ wg_noise_keypair_put(next_keypair, true);
+ RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
+ wg_noise_keypair_put(previous_keypair, true);
+ }
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+}
+
+bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
+ struct noise_keypair *received_keypair)
+{
+ struct noise_keypair *old_keypair;
+ bool key_is_new;
+
+ /* We first check without taking the spinlock. */
+ key_is_new = received_keypair ==
+ rcu_access_pointer(keypairs->next_keypair);
+ if (likely(!key_is_new))
+ return false;
+
+ spin_lock_bh(&keypairs->keypair_update_lock);
+ /* After locking, we double check that things didn't change from
+ * beneath us.
+ */
+ if (unlikely(received_keypair !=
+ rcu_dereference_protected(keypairs->next_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock)))) {
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+ return false;
+ }
+
+ /* When we've finally received the confirmation, we slide the next
+ * into the current, the current into the previous, and get rid of
+ * the old previous.
+ */
+ old_keypair = rcu_dereference_protected(keypairs->previous_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ rcu_assign_pointer(keypairs->previous_keypair,
+ rcu_dereference_protected(keypairs->current_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock)));
+ wg_noise_keypair_put(old_keypair, true);
+ rcu_assign_pointer(keypairs->current_keypair, received_keypair);
+ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
+
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+ return true;
+}
+
+/* Must hold static_identity->lock */
+void wg_noise_set_static_identity_private_key(
+ struct noise_static_identity *static_identity,
+ const u8 private_key[NOISE_PUBLIC_KEY_LEN])
+{
+ memcpy(static_identity->static_private, private_key,
+ NOISE_PUBLIC_KEY_LEN);
+ curve25519_clamp_secret(static_identity->static_private);
+ static_identity->has_identity = curve25519_generate_public(
+ static_identity->static_public, private_key);
+}
+
+/* This is Hugo Krawczyk's HKDF:
+ * - https://eprint.iacr.org/2010/264.pdf
+ * - https://tools.ietf.org/html/rfc5869
+ */
+static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
+ size_t first_len, size_t second_len, size_t third_len,
+ size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
+{
+ u8 output[BLAKE2S_HASH_SIZE + 1];
+ u8 secret[BLAKE2S_HASH_SIZE];
+
+ WARN_ON(IS_ENABLED(DEBUG) &&
+ (first_len > BLAKE2S_HASH_SIZE ||
+ second_len > BLAKE2S_HASH_SIZE ||
+ third_len > BLAKE2S_HASH_SIZE ||
+ ((second_len || second_dst || third_len || third_dst) &&
+ (!first_len || !first_dst)) ||
+ ((third_len || third_dst) && (!second_len || !second_dst))));
+
+ /* Extract entropy from data into secret */
+ blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
+
+ if (!first_dst || !first_len)
+ goto out;
+
+ /* Expand first key: key = secret, data = 0x1 */
+ output[0] = 1;
+ blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
+ memcpy(first_dst, output, first_len);
+
+ if (!second_dst || !second_len)
+ goto out;
+
+ /* Expand second key: key = secret, data = first-key || 0x2 */
+ output[BLAKE2S_HASH_SIZE] = 2;
+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
+ BLAKE2S_HASH_SIZE);
+ memcpy(second_dst, output, second_len);
+
+ if (!third_dst || !third_len)
+ goto out;
+
+ /* Expand third key: key = secret, data = second-key || 0x3 */
+ output[BLAKE2S_HASH_SIZE] = 3;
+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
+ BLAKE2S_HASH_SIZE);
+ memcpy(third_dst, output, third_len);
+
+out:
+ /* Clear sensitive data from stack */
+ memzero_explicit(secret, BLAKE2S_HASH_SIZE);
+ memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
+}
+
+static void symmetric_key_init(struct noise_symmetric_key *key)
+{
+ spin_lock_init(&key->counter.receive.lock);
+ atomic64_set(&key->counter.counter, 0);
+ memset(key->counter.receive.backtrack, 0,
+ sizeof(key->counter.receive.backtrack));
+ key->birthdate = ktime_get_coarse_boottime_ns();
+ key->is_valid = true;
+}
+
+static void derive_keys(struct noise_symmetric_key *first_dst,
+ struct noise_symmetric_key *second_dst,
+ const u8 chaining_key[NOISE_HASH_LEN])
+{
+ kdf(first_dst->key, second_dst->key, NULL, NULL,
+ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
+ chaining_key);
+ symmetric_key_init(first_dst);
+ symmetric_key_init(second_dst);
+}
+
+static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 private[NOISE_PUBLIC_KEY_LEN],
+ const u8 public[NOISE_PUBLIC_KEY_LEN])
+{
+ u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
+
+ if (unlikely(!curve25519(dh_calculation, private, public)))
+ return false;
+ kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
+ memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
+ return true;
+}
+
+static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
+{
+ struct blake2s_state blake;
+
+ blake2s_init(&blake, NOISE_HASH_LEN);
+ blake2s_update(&blake, hash, NOISE_HASH_LEN);
+ blake2s_update(&blake, src, src_len);
+ blake2s_final(&blake, hash);
+}
+
+static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
+{
+ u8 temp_hash[NOISE_HASH_LEN];
+
+ kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
+ mix_hash(hash, temp_hash, NOISE_HASH_LEN);
+ memzero_explicit(temp_hash, NOISE_HASH_LEN);
+}
+
+static void handshake_init(u8 chaining_key[NOISE_HASH_LEN],
+ u8 hash[NOISE_HASH_LEN],
+ const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
+{
+ memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
+ memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
+ mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
+}
+
+static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext,
+ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ u8 hash[NOISE_HASH_LEN])
+{
+ chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash,
+ NOISE_HASH_LEN,
+ 0 /* Always zero for Noise_IK */, key);
+ mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
+}
+
+static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext,
+ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ u8 hash[NOISE_HASH_LEN])
+{
+ if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len,
+ hash, NOISE_HASH_LEN,
+ 0 /* Always zero for Noise_IK */, key))
+ return false;
+ mix_hash(hash, src_ciphertext, src_len);
+ return true;
+}
+
+static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN],
+ const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN],
+ u8 chaining_key[NOISE_HASH_LEN],
+ u8 hash[NOISE_HASH_LEN])
+{
+ if (ephemeral_dst != ephemeral_src)
+ memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
+ mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
+ kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0,
+ NOISE_PUBLIC_KEY_LEN, chaining_key);
+}
+
+static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
+{
+ struct timespec64 now;
+
+ ktime_get_real_ts64(&now);
+
+ /* In order to prevent some sort of infoleak from precise timers, we
+ * round down the nanoseconds part to the closest rounded-down power of
+ * two to the maximum initiations per second allowed anyway by the
+ * implementation.
+ */
+ now.tv_nsec = ALIGN_DOWN(now.tv_nsec,
+ rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND));
+
+ /* https://cr.yp.to/libtai/tai64.html */
+ *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec);
+ *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
+}
+
+bool
+wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
+ struct noise_handshake *handshake)
+{
+ u8 timestamp[NOISE_TIMESTAMP_LEN];
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ bool ret = false;
+
+ /* We need to wait for crng _before_ taking any locks, since
+ * curve25519_generate_secret uses get_random_bytes_wait.
+ */
+ wait_for_random_bytes();
+
+ down_read(&handshake->static_identity->lock);
+ down_write(&handshake->lock);
+
+ if (unlikely(!handshake->static_identity->has_identity))
+ goto out;
+
+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
+
+ handshake_init(handshake->chaining_key, handshake->hash,
+ handshake->remote_static);
+
+ /* e */
+ curve25519_generate_secret(handshake->ephemeral_private);
+ if (!curve25519_generate_public(dst->unencrypted_ephemeral,
+ handshake->ephemeral_private))
+ goto out;
+ message_ephemeral(dst->unencrypted_ephemeral,
+ dst->unencrypted_ephemeral, handshake->chaining_key,
+ handshake->hash);
+
+ /* es */
+ if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private,
+ handshake->remote_static))
+ goto out;
+
+ /* s */
+ message_encrypt(dst->encrypted_static,
+ handshake->static_identity->static_public,
+ NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
+
+ /* ss */
+ kdf(handshake->chaining_key, key, NULL,
+ handshake->precomputed_static_static, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ handshake->chaining_key);
+
+ /* {t} */
+ tai64n_now(timestamp);
+ message_encrypt(dst->encrypted_timestamp, timestamp,
+ NOISE_TIMESTAMP_LEN, key, handshake->hash);
+
+ dst->sender_index = wg_index_hashtable_insert(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+
+ handshake->state = HANDSHAKE_CREATED_INITIATION;
+ ret = true;
+
+out:
+ up_write(&handshake->lock);
+ up_read(&handshake->static_identity->lock);
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ return ret;
+}
+
+struct wg_peer *
+wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
+ struct wg_device *wg)
+{
+ struct wg_peer *peer = NULL, *ret_peer = NULL;
+ struct noise_handshake *handshake;
+ bool replay_attack, flood_attack;
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 chaining_key[NOISE_HASH_LEN];
+ u8 hash[NOISE_HASH_LEN];
+ u8 s[NOISE_PUBLIC_KEY_LEN];
+ u8 e[NOISE_PUBLIC_KEY_LEN];
+ u8 t[NOISE_TIMESTAMP_LEN];
+ u64 initiation_consumption;
+
+ down_read(&wg->static_identity.lock);
+ if (unlikely(!wg->static_identity.has_identity))
+ goto out;
+
+ handshake_init(chaining_key, hash, wg->static_identity.static_public);
+
+ /* e */
+ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
+
+ /* es */
+ if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
+ goto out;
+
+ /* s */
+ if (!message_decrypt(s, src->encrypted_static,
+ sizeof(src->encrypted_static), key, hash))
+ goto out;
+
+ /* Lookup which peer we're actually talking to */
+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s);
+ if (!peer)
+ goto out;
+ handshake = &peer->handshake;
+
+ /* ss */
+ kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
+ NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ chaining_key);
+
+ /* {t} */
+ if (!message_decrypt(t, src->encrypted_timestamp,
+ sizeof(src->encrypted_timestamp), key, hash))
+ goto out;
+
+ down_read(&handshake->lock);
+ replay_attack = memcmp(t, handshake->latest_timestamp,
+ NOISE_TIMESTAMP_LEN) <= 0;
+ flood_attack = (s64)handshake->last_initiation_consumption +
+ NSEC_PER_SEC / INITIATIONS_PER_SECOND >
+ (s64)ktime_get_coarse_boottime_ns();
+ up_read(&handshake->lock);
+ if (replay_attack || flood_attack)
+ goto out;
+
+ /* Success! Copy everything to peer */
+ down_write(&handshake->lock);
+ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
+ if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0)
+ memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
+ memcpy(handshake->hash, hash, NOISE_HASH_LEN);
+ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
+ handshake->remote_index = src->sender_index;
+ if ((s64)(handshake->last_initiation_consumption -
+ (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0)
+ handshake->last_initiation_consumption = initiation_consumption;
+ handshake->state = HANDSHAKE_CONSUMED_INITIATION;
+ up_write(&handshake->lock);
+ ret_peer = peer;
+
+out:
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ memzero_explicit(hash, NOISE_HASH_LEN);
+ memzero_explicit(chaining_key, NOISE_HASH_LEN);
+ up_read(&wg->static_identity.lock);
+ if (!ret_peer)
+ wg_peer_put(peer);
+ return ret_peer;
+}
+
+bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
+ struct noise_handshake *handshake)
+{
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ bool ret = false;
+
+ /* We need to wait for crng _before_ taking any locks, since
+ * curve25519_generate_secret uses get_random_bytes_wait.
+ */
+ wait_for_random_bytes();
+
+ down_read(&handshake->static_identity->lock);
+ down_write(&handshake->lock);
+
+ if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
+ goto out;
+
+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
+ dst->receiver_index = handshake->remote_index;
+
+ /* e */
+ curve25519_generate_secret(handshake->ephemeral_private);
+ if (!curve25519_generate_public(dst->unencrypted_ephemeral,
+ handshake->ephemeral_private))
+ goto out;
+ message_ephemeral(dst->unencrypted_ephemeral,
+ dst->unencrypted_ephemeral, handshake->chaining_key,
+ handshake->hash);
+
+ /* ee */
+ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
+ handshake->remote_ephemeral))
+ goto out;
+
+ /* se */
+ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
+ handshake->remote_static))
+ goto out;
+
+ /* psk */
+ mix_psk(handshake->chaining_key, handshake->hash, key,
+ handshake->preshared_key);
+
+ /* {} */
+ message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
+
+ dst->sender_index = wg_index_hashtable_insert(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+
+ handshake->state = HANDSHAKE_CREATED_RESPONSE;
+ ret = true;
+
+out:
+ up_write(&handshake->lock);
+ up_read(&handshake->static_identity->lock);
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ return ret;
+}
+
+struct wg_peer *
+wg_noise_handshake_consume_response(struct message_handshake_response *src,
+ struct wg_device *wg)
+{
+ enum noise_handshake_state state = HANDSHAKE_ZEROED;
+ struct wg_peer *peer = NULL, *ret_peer = NULL;
+ struct noise_handshake *handshake;
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 hash[NOISE_HASH_LEN];
+ u8 chaining_key[NOISE_HASH_LEN];
+ u8 e[NOISE_PUBLIC_KEY_LEN];
+ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
+ u8 static_private[NOISE_PUBLIC_KEY_LEN];
+
+ down_read(&wg->static_identity.lock);
+
+ if (unlikely(!wg->static_identity.has_identity))
+ goto out;
+
+ handshake = (struct noise_handshake *)wg_index_hashtable_lookup(
+ wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE,
+ src->receiver_index, &peer);
+ if (unlikely(!handshake))
+ goto out;
+
+ down_read(&handshake->lock);
+ state = handshake->state;
+ memcpy(hash, handshake->hash, NOISE_HASH_LEN);
+ memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
+ memcpy(ephemeral_private, handshake->ephemeral_private,
+ NOISE_PUBLIC_KEY_LEN);
+ up_read(&handshake->lock);
+
+ if (state != HANDSHAKE_CREATED_INITIATION)
+ goto fail;
+
+ /* e */
+ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
+
+ /* ee */
+ if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
+ goto fail;
+
+ /* se */
+ if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
+ goto fail;
+
+ /* psk */
+ mix_psk(chaining_key, hash, key, handshake->preshared_key);
+
+ /* {} */
+ if (!message_decrypt(NULL, src->encrypted_nothing,
+ sizeof(src->encrypted_nothing), key, hash))
+ goto fail;
+
+ /* Success! Copy everything to peer */
+ down_write(&handshake->lock);
+ /* It's important to check that the state is still the same, while we
+ * have an exclusive lock.
+ */
+ if (handshake->state != state) {
+ up_write(&handshake->lock);
+ goto fail;
+ }
+ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
+ memcpy(handshake->hash, hash, NOISE_HASH_LEN);
+ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
+ handshake->remote_index = src->sender_index;
+ handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
+ up_write(&handshake->lock);
+ ret_peer = peer;
+ goto out;
+
+fail:
+ wg_peer_put(peer);
+out:
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ memzero_explicit(hash, NOISE_HASH_LEN);
+ memzero_explicit(chaining_key, NOISE_HASH_LEN);
+ memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
+ memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
+ up_read(&wg->static_identity.lock);
+ return ret_peer;
+}
+
+bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
+ struct noise_keypairs *keypairs)
+{
+ struct noise_keypair *new_keypair;
+ bool ret = false;
+
+ down_write(&handshake->lock);
+ if (handshake->state != HANDSHAKE_CREATED_RESPONSE &&
+ handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
+ goto out;
+
+ new_keypair = keypair_create(handshake->entry.peer);
+ if (!new_keypair)
+ goto out;
+ new_keypair->i_am_the_initiator = handshake->state ==
+ HANDSHAKE_CONSUMED_RESPONSE;
+ new_keypair->remote_index = handshake->remote_index;
+
+ if (new_keypair->i_am_the_initiator)
+ derive_keys(&new_keypair->sending, &new_keypair->receiving,
+ handshake->chaining_key);
+ else
+ derive_keys(&new_keypair->receiving, &new_keypair->sending,
+ handshake->chaining_key);
+
+ handshake_zero(handshake);
+ rcu_read_lock_bh();
+ if (likely(!READ_ONCE(container_of(handshake, struct wg_peer,
+ handshake)->is_dead))) {
+ add_new_keypair(keypairs, new_keypair);
+ net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n",
+ handshake->entry.peer->device->dev->name,
+ new_keypair->internal_id,
+ handshake->entry.peer->internal_id);
+ ret = wg_index_hashtable_replace(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry, &new_keypair->entry);
+ } else {
+ kzfree(new_keypair);
+ }
+ rcu_read_unlock_bh();
+
+out:
+ up_write(&handshake->lock);
+ return ret;
+}
diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
new file mode 100644
index 000000000000..138a07bb817c
--- /dev/null
+++ b/drivers/net/wireguard/noise.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef _WG_NOISE_H
+#define _WG_NOISE_H
+
+#include "messages.h"
+#include "peerlookup.h"
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+
+union noise_counter {
+ struct {
+ u64 counter;
+ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
+ spinlock_t lock;
+ } receive;
+ atomic64_t counter;
+};
+
+struct noise_symmetric_key {
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ union noise_counter counter;
+ u64 birthdate;
+ bool is_valid;
+};
+
+struct noise_keypair {
+ struct index_hashtable_entry entry;
+ struct noise_symmetric_key sending;
+ struct noise_symmetric_key receiving;
+ __le32 remote_index;
+ bool i_am_the_initiator;
+ struct kref refcount;
+ struct rcu_head rcu;
+ u64 internal_id;
+};
+
+struct noise_keypairs {
+ struct noise_keypair __rcu *current_keypair;
+ struct noise_keypair __rcu *previous_keypair;
+ struct noise_keypair __rcu *next_keypair;
+ spinlock_t keypair_update_lock;
+};
+
+struct noise_static_identity {
+ u8 static_public[NOISE_PUBLIC_KEY_LEN];
+ u8 static_private[NOISE_PUBLIC_KEY_LEN];
+ struct rw_semaphore lock;
+ bool has_identity;
+};
+
+enum noise_handshake_state {
+ HANDSHAKE_ZEROED,
+ HANDSHAKE_CREATED_INITIATION,
+ HANDSHAKE_CONSUMED_INITIATION,
+ HANDSHAKE_CREATED_RESPONSE,
+ HANDSHAKE_CONSUMED_RESPONSE
+};
+
+struct noise_handshake {
+ struct index_hashtable_entry entry;
+
+ enum noise_handshake_state state;
+ u64 last_initiation_consumption;
+
+ struct noise_static_identity *static_identity;
+
+ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
+ u8 remote_static[NOISE_PUBLIC_KEY_LEN];
+ u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
+ u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
+
+ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
+
+ u8 hash[NOISE_HASH_LEN];
+ u8 chaining_key[NOISE_HASH_LEN];
+
+ u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
+ __le32 remote_index;
+
+ /* Protects all members except the immutable (after noise_handshake_
+ * init): remote_static, precomputed_static_static, static_identity.
+ */
+ struct rw_semaphore lock;
+};
+
+struct wg_device;
+
+void wg_noise_init(void);
+bool wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer);
+void wg_noise_handshake_clear(struct noise_handshake *handshake);
+static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
+{
+ atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() -
+ (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC);
+}
+
+void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now);
+struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair);
+void wg_noise_keypairs_clear(struct noise_keypairs *keypairs);
+bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
+ struct noise_keypair *received_keypair);
+void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
+
+void wg_noise_set_static_identity_private_key(
+ struct noise_static_identity *static_identity,
+ const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
+bool wg_noise_precompute_static_static(struct wg_peer *peer);
+
+bool
+wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
+ struct noise_handshake *handshake);
+struct wg_peer *
+wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
+ struct wg_device *wg);
+
+bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
+ struct noise_handshake *handshake);
+struct wg_peer *
+wg_noise_handshake_consume_response(struct message_handshake_response *src,
+ struct wg_device *wg);
+
+bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
+ struct noise_keypairs *keypairs);
+
+#endif /* _WG_NOISE_H */
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
new file mode 100644
index 000000000000..071eedf33f5a
--- /dev/null
+++ b/drivers/net/wireguard/peer.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "peer.h"
+#include "device.h"
+#include "queueing.h"
+#include "timers.h"
+#include "peerlookup.h"
+#include "noise.h"
+
+#include <linux/kref.h>
+#include <linux/lockdep.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+
+static atomic64_t peer_counter = ATOMIC64_INIT(0);
+
+struct wg_peer *wg_peer_create(struct wg_device *wg,
+ const u8 public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
+{
+ struct wg_peer *peer;
+ int ret = -ENOMEM;
+
+ lockdep_assert_held(&wg->device_update_lock);
+
+ if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
+ return ERR_PTR(ret);
+
+ peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+ if (unlikely(!peer))
+ return ERR_PTR(ret);
+ peer->device = wg;
+
+ if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
+ public_key, preshared_key, peer)) {
+ ret = -EKEYREJECTED;
+ goto err_1;
+ }
+ if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
+ goto err_1;
+ if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
+ MAX_QUEUED_PACKETS))
+ goto err_2;
+ if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
+ MAX_QUEUED_PACKETS))
+ goto err_3;
+
+ peer->internal_id = atomic64_inc_return(&peer_counter);
+ peer->serial_work_cpu = nr_cpumask_bits;
+ wg_cookie_init(&peer->latest_cookie);
+ wg_timers_init(peer);
+ wg_cookie_checker_precompute_peer_keys(peer);
+ spin_lock_init(&peer->keypairs.keypair_update_lock);
+ INIT_WORK(&peer->transmit_handshake_work,
+ wg_packet_handshake_send_worker);
+ rwlock_init(&peer->endpoint_lock);
+ kref_init(&peer->refcount);
+ skb_queue_head_init(&peer->staged_packet_queue);
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
+ netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&peer->napi);
+ list_add_tail(&peer->peer_list, &wg->peer_list);
+ INIT_LIST_HEAD(&peer->allowedips_list);
+ wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
+ ++wg->num_peers;
+ pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
+ return peer;
+
+err_3:
+ wg_packet_queue_free(&peer->tx_queue, false);
+err_2:
+ dst_cache_destroy(&peer->endpoint_cache);
+err_1:
+ kfree(peer);
+ return ERR_PTR(ret);
+}
+
+struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
+ "Taking peer reference without holding the RCU read lock");
+ if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
+ return NULL;
+ return peer;
+}
+
+static void peer_make_dead(struct wg_peer *peer)
+{
+ /* Remove from configuration-time lookup structures. */
+ list_del_init(&peer->peer_list);
+ wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
+ &peer->device->device_update_lock);
+ wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
+
+ /* Mark as dead, so that we don't allow jumping contexts after. */
+ WRITE_ONCE(peer->is_dead, true);
+
+ /* The caller must now synchronize_rcu() for this to take effect. */
+}
+
+static void peer_remove_after_dead(struct wg_peer *peer)
+{
+ WARN_ON(!peer->is_dead);
+
+ /* No more keypairs can be created for this peer, since is_dead protects
+ * add_new_keypair, so we can now destroy existing ones.
+ */
+ wg_noise_keypairs_clear(&peer->keypairs);
+
+ /* Destroy all ongoing timers that were in-flight at the beginning of
+ * this function.
+ */
+ wg_timers_stop(peer);
+
+ /* The transition between packet encryption/decryption queues isn't
+ * guarded by is_dead, but each reference's life is strictly bounded by
+ * two generations: once for parallel crypto and once for serial
+ * ingestion, so we can simply flush twice, and be sure that we no
+ * longer have references inside these queues.
+ */
+
+ /* a) For encrypt/decrypt. */
+ flush_workqueue(peer->device->packet_crypt_wq);
+ /* b.1) For send (but not receive, since that's napi). */
+ flush_workqueue(peer->device->packet_crypt_wq);
+ /* b.2.1) For receive (but not send, since that's wq). */
+ napi_disable(&peer->napi);
+ /* b.2.1) It's now safe to remove the napi struct, which must be done
+ * here from process context.
+ */
+ netif_napi_del(&peer->napi);
+
+ /* Ensure any workstructs we own (like transmit_handshake_work or
+ * clear_peer_work) no longer are in use.
+ */
+ flush_workqueue(peer->device->handshake_send_wq);
+
+ /* After the above flushes, a peer might still be active in a few
+ * different contexts: 1) from xmit(), before hitting is_dead and
+ * returning, 2) from wg_packet_consume_data(), before hitting is_dead
+ * and returning, 3) from wg_receive_handshake_packet() after a point
+ * where it has processed an incoming handshake packet, but where
+ * all calls to pass it off to timers fails because of is_dead. We won't
+ * have new references in (1) eventually, because we're removed from
+ * allowedips; we won't have new references in (2) eventually, because
+ * wg_index_hashtable_lookup will always return NULL, since we removed
+ * all existing keypairs and no more can be created; we won't have new
+ * references in (3) eventually, because we're removed from the pubkey
+ * hash table, which allows for a maximum of one handshake response,
+ * via the still-uncleared index hashtable entry, but not more than one,
+ * and in wg_cookie_message_consume, the lookup eventually gets a peer
+ * with a refcount of zero, so no new reference is taken.
+ */
+
+ --peer->device->num_peers;
+ wg_peer_put(peer);
+}
+
+/* We have a separate "remove" function make sure that all active places where
+ * a peer is currently operating will eventually come to an end and not pass
+ * their reference onto another context.
+ */
+void wg_peer_remove(struct wg_peer *peer)
+{
+ if (unlikely(!peer))
+ return;
+ lockdep_assert_held(&peer->device->device_update_lock);
+
+ peer_make_dead(peer);
+ synchronize_rcu();
+ peer_remove_after_dead(peer);
+}
+
+void wg_peer_remove_all(struct wg_device *wg)
+{
+ struct wg_peer *peer, *temp;
+ LIST_HEAD(dead_peers);
+
+ lockdep_assert_held(&wg->device_update_lock);
+
+ /* Avoid having to traverse individually for each one. */
+ wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
+
+ list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
+ peer_make_dead(peer);
+ list_add_tail(&peer->peer_list, &dead_peers);
+ }
+ synchronize_rcu();
+ list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
+ peer_remove_after_dead(peer);
+}
+
+static void rcu_release(struct rcu_head *rcu)
+{
+ struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
+
+ dst_cache_destroy(&peer->endpoint_cache);
+ wg_packet_queue_free(&peer->rx_queue, false);
+ wg_packet_queue_free(&peer->tx_queue, false);
+
+ /* The final zeroing takes care of clearing any remaining handshake key
+ * material and other potentially sensitive information.
+ */
+ kzfree(peer);
+}
+
+static void kref_release(struct kref *refcount)
+{
+ struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
+
+ pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+
+ /* Remove ourself from dynamic runtime lookup structures, now that the
+ * last reference is gone.
+ */
+ wg_index_hashtable_remove(peer->device->index_hashtable,
+ &peer->handshake.entry);
+
+ /* Remove any lingering packets that didn't have a chance to be
+ * transmitted.
+ */
+ wg_packet_purge_staged_packets(peer);
+
+ /* Free the memory used. */
+ call_rcu(&peer->rcu, rcu_release);
+}
+
+void wg_peer_put(struct wg_peer *peer)
+{
+ if (unlikely(!peer))
+ return;
+ kref_put(&peer->refcount, kref_release);
+}
diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h
new file mode 100644
index 000000000000..23af40922997
--- /dev/null
+++ b/drivers/net/wireguard/peer.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_PEER_H
+#define _WG_PEER_H
+
+#include "device.h"
+#include "noise.h"
+#include "cookie.h"
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <net/dst_cache.h>
+
+struct wg_device;
+
+struct endpoint {
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ };
+ union {
+ struct {
+ struct in_addr src4;
+ /* Essentially the same as addr6->scope_id */
+ int src_if4;
+ };
+ struct in6_addr src6;
+ };
+};
+
+struct wg_peer {
+ struct wg_device *device;
+ struct crypt_queue tx_queue, rx_queue;
+ struct sk_buff_head staged_packet_queue;
+ int serial_work_cpu;
+ struct noise_keypairs keypairs;
+ struct endpoint endpoint;
+ struct dst_cache endpoint_cache;
+ rwlock_t endpoint_lock;
+ struct noise_handshake handshake;
+ atomic64_t last_sent_handshake;
+ struct work_struct transmit_handshake_work, clear_peer_work;
+ struct cookie latest_cookie;
+ struct hlist_node pubkey_hash;
+ u64 rx_bytes, tx_bytes;
+ struct timer_list timer_retransmit_handshake, timer_send_keepalive;
+ struct timer_list timer_new_handshake, timer_zero_key_material;
+ struct timer_list timer_persistent_keepalive;
+ unsigned int timer_handshake_attempts;
+ u16 persistent_keepalive_interval;
+ bool timer_need_another_keepalive;
+ bool sent_lastminute_handshake;
+ struct timespec64 walltime_last_handshake;
+ struct kref refcount;
+ struct rcu_head rcu;
+ struct list_head peer_list;
+ struct list_head allowedips_list;
+ u64 internal_id;
+ struct napi_struct napi;
+ bool is_dead;
+};
+
+struct wg_peer *wg_peer_create(struct wg_device *wg,
+ const u8 public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
+
+struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer);
+static inline struct wg_peer *wg_peer_get(struct wg_peer *peer)
+{
+ kref_get(&peer->refcount);
+ return peer;
+}
+void wg_peer_put(struct wg_peer *peer);
+void wg_peer_remove(struct wg_peer *peer);
+void wg_peer_remove_all(struct wg_device *wg);
+
+#endif /* _WG_PEER_H */
diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c
new file mode 100644
index 000000000000..e4deb331476b
--- /dev/null
+++ b/drivers/net/wireguard/peerlookup.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "peerlookup.h"
+#include "peer.h"
+#include "noise.h"
+
+static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table,
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
+{
+ /* siphash gives us a secure 64bit number based on a random key. Since
+ * the bits are uniformly distributed, we can then mask off to get the
+ * bits we need.
+ */
+ const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key);
+
+ return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void)
+{
+ struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
+
+ if (!table)
+ return NULL;
+
+ get_random_bytes(&table->key, sizeof(table->key));
+ hash_init(table->hashtable);
+ mutex_init(&table->lock);
+ return table;
+}
+
+void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
+ struct wg_peer *peer)
+{
+ mutex_lock(&table->lock);
+ hlist_add_head_rcu(&peer->pubkey_hash,
+ pubkey_bucket(table, peer->handshake.remote_static));
+ mutex_unlock(&table->lock);
+}
+
+void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
+ struct wg_peer *peer)
+{
+ mutex_lock(&table->lock);
+ hlist_del_init_rcu(&peer->pubkey_hash);
+ mutex_unlock(&table->lock);
+}
+
+/* Returns a strong reference to a peer */
+struct wg_peer *
+wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
+{
+ struct wg_peer *iter_peer, *peer = NULL;
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey),
+ pubkey_hash) {
+ if (!memcmp(pubkey, iter_peer->handshake.remote_static,
+ NOISE_PUBLIC_KEY_LEN)) {
+ peer = iter_peer;
+ break;
+ }
+ }
+ peer = wg_peer_get_maybe_zero(peer);
+ rcu_read_unlock_bh();
+ return peer;
+}
+
+static struct hlist_head *index_bucket(struct index_hashtable *table,
+ const __le32 index)
+{
+ /* Since the indices are random and thus all bits are uniformly
+ * distributed, we can find its bucket simply by masking.
+ */
+ return &table->hashtable[(__force u32)index &
+ (HASH_SIZE(table->hashtable) - 1)];
+}
+
+struct index_hashtable *wg_index_hashtable_alloc(void)
+{
+ struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
+
+ if (!table)
+ return NULL;
+
+ hash_init(table->hashtable);
+ spin_lock_init(&table->lock);
+ return table;
+}
+
+/* At the moment, we limit ourselves to 2^20 total peers, which generally might
+ * amount to 2^20*3 items in this hashtable. The algorithm below works by
+ * picking a random number and testing it. We can see that these limits mean we
+ * usually succeed pretty quickly:
+ *
+ * >>> def calculation(tries, size):
+ * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32))
+ * ...
+ * >>> calculation(1, 2**20 * 3)
+ * 0.999267578125
+ * >>> calculation(2, 2**20 * 3)
+ * 0.0007318854331970215
+ * >>> calculation(3, 2**20 * 3)
+ * 5.360489012673497e-07
+ * >>> calculation(4, 2**20 * 3)
+ * 3.9261394135792216e-10
+ *
+ * At the moment, we don't do any masking, so this algorithm isn't exactly
+ * constant time in either the random guessing or in the hash list lookup. We
+ * could require a minimum of 3 tries, which would successfully mask the
+ * guessing. this would not, however, help with the growing hash lengths, which
+ * is another thing to consider moving forward.
+ */
+
+__le32 wg_index_hashtable_insert(struct index_hashtable *table,
+ struct index_hashtable_entry *entry)
+{
+ struct index_hashtable_entry *existing_entry;
+
+ spin_lock_bh(&table->lock);
+ hlist_del_init_rcu(&entry->index_hash);
+ spin_unlock_bh(&table->lock);
+
+ rcu_read_lock_bh();
+
+search_unused_slot:
+ /* First we try to find an unused slot, randomly, while unlocked. */
+ entry->index = (__force __le32)get_random_u32();
+ hlist_for_each_entry_rcu_bh(existing_entry,
+ index_bucket(table, entry->index),
+ index_hash) {
+ if (existing_entry->index == entry->index)
+ /* If it's already in use, we continue searching. */
+ goto search_unused_slot;
+ }
+
+ /* Once we've found an unused slot, we lock it, and then double-check
+ * that nobody else stole it from us.
+ */
+ spin_lock_bh(&table->lock);
+ hlist_for_each_entry_rcu_bh(existing_entry,
+ index_bucket(table, entry->index),
+ index_hash) {
+ if (existing_entry->index == entry->index) {
+ spin_unlock_bh(&table->lock);
+ /* If it was stolen, we start over. */
+ goto search_unused_slot;
+ }
+ }
+ /* Otherwise, we know we have it exclusively (since we're locked),
+ * so we insert.
+ */
+ hlist_add_head_rcu(&entry->index_hash,
+ index_bucket(table, entry->index));
+ spin_unlock_bh(&table->lock);
+
+ rcu_read_unlock_bh();
+
+ return entry->index;
+}
+
+bool wg_index_hashtable_replace(struct index_hashtable *table,
+ struct index_hashtable_entry *old,
+ struct index_hashtable_entry *new)
+{
+ if (unlikely(hlist_unhashed(&old->index_hash)))
+ return false;
+ spin_lock_bh(&table->lock);
+ new->index = old->index;
+ hlist_replace_rcu(&old->index_hash, &new->index_hash);
+
+ /* Calling init here NULLs out index_hash, and in fact after this
+ * function returns, it's theoretically possible for this to get
+ * reinserted elsewhere. That means the RCU lookup below might either
+ * terminate early or jump between buckets, in which case the packet
+ * simply gets dropped, which isn't terrible.
+ */
+ INIT_HLIST_NODE(&old->index_hash);
+ spin_unlock_bh(&table->lock);
+ return true;
+}
+
+void wg_index_hashtable_remove(struct index_hashtable *table,
+ struct index_hashtable_entry *entry)
+{
+ spin_lock_bh(&table->lock);
+ hlist_del_init_rcu(&entry->index_hash);
+ spin_unlock_bh(&table->lock);
+}
+
+/* Returns a strong reference to a entry->peer */
+struct index_hashtable_entry *
+wg_index_hashtable_lookup(struct index_hashtable *table,
+ const enum index_hashtable_type type_mask,
+ const __le32 index, struct wg_peer **peer)
+{
+ struct index_hashtable_entry *iter_entry, *entry = NULL;
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index),
+ index_hash) {
+ if (iter_entry->index == index) {
+ if (likely(iter_entry->type & type_mask))
+ entry = iter_entry;
+ break;
+ }
+ }
+ if (likely(entry)) {
+ entry->peer = wg_peer_get_maybe_zero(entry->peer);
+ if (likely(entry->peer))
+ *peer = entry->peer;
+ else
+ entry = NULL;
+ }
+ rcu_read_unlock_bh();
+ return entry;
+}
diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h
new file mode 100644
index 000000000000..ced811797680
--- /dev/null
+++ b/drivers/net/wireguard/peerlookup.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_PEERLOOKUP_H
+#define _WG_PEERLOOKUP_H
+
+#include "messages.h"
+
+#include <linux/hashtable.h>
+#include <linux/mutex.h>
+#include <linux/siphash.h>
+
+struct wg_peer;
+
+struct pubkey_hashtable {
+ /* TODO: move to rhashtable */
+ DECLARE_HASHTABLE(hashtable, 11);
+ siphash_key_t key;
+ struct mutex lock;
+};
+
+struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void);
+void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
+ struct wg_peer *peer);
+void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
+ struct wg_peer *peer);
+struct wg_peer *
+wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
+
+struct index_hashtable {
+ /* TODO: move to rhashtable */
+ DECLARE_HASHTABLE(hashtable, 13);
+ spinlock_t lock;
+};
+
+enum index_hashtable_type {
+ INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
+ INDEX_HASHTABLE_KEYPAIR = 1U << 1
+};
+
+struct index_hashtable_entry {
+ struct wg_peer *peer;
+ struct hlist_node index_hash;
+ enum index_hashtable_type type;
+ __le32 index;
+};
+
+struct index_hashtable *wg_index_hashtable_alloc(void);
+__le32 wg_index_hashtable_insert(struct index_hashtable *table,
+ struct index_hashtable_entry *entry);
+bool wg_index_hashtable_replace(struct index_hashtable *table,
+ struct index_hashtable_entry *old,
+ struct index_hashtable_entry *new);
+void wg_index_hashtable_remove(struct index_hashtable *table,
+ struct index_hashtable_entry *entry);
+struct index_hashtable_entry *
+wg_index_hashtable_lookup(struct index_hashtable *table,
+ const enum index_hashtable_type type_mask,
+ const __le32 index, struct wg_peer **peer);
+
+#endif /* _WG_PEERLOOKUP_H */
diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
new file mode 100644
index 000000000000..5c964fcb994e
--- /dev/null
+++ b/drivers/net/wireguard/queueing.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+
+struct multicore_worker __percpu *
+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
+{
+ int cpu;
+ struct multicore_worker __percpu *worker =
+ alloc_percpu(struct multicore_worker);
+
+ if (!worker)
+ return NULL;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(worker, cpu)->ptr = ptr;
+ INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
+ }
+ return worker;
+}
+
+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ bool multicore, unsigned int len)
+{
+ int ret;
+
+ memset(queue, 0, sizeof(*queue));
+ ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
+ if (ret)
+ return ret;
+ if (function) {
+ if (multicore) {
+ queue->worker = wg_packet_percpu_multicore_worker_alloc(
+ function, queue);
+ if (!queue->worker)
+ return -ENOMEM;
+ } else {
+ INIT_WORK(&queue->work, function);
+ }
+ }
+ return 0;
+}
+
+void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
+{
+ if (multicore)
+ free_percpu(queue->worker);
+ WARN_ON(!__ptr_ring_empty(&queue->ring));
+ ptr_ring_cleanup(&queue->ring, NULL);
+}
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
new file mode 100644
index 000000000000..58fdd630b246
--- /dev/null
+++ b/drivers/net/wireguard/queueing.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_QUEUEING_H
+#define _WG_QUEUEING_H
+
+#include "peer.h"
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+struct wg_device;
+struct wg_peer;
+struct multicore_worker;
+struct crypt_queue;
+struct sk_buff;
+
+/* queueing.c APIs: */
+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ bool multicore, unsigned int len);
+void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
+struct multicore_worker __percpu *
+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
+
+/* receive.c APIs: */
+void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
+void wg_packet_handshake_receive_worker(struct work_struct *work);
+/* NAPI poll function: */
+int wg_packet_rx_poll(struct napi_struct *napi, int budget);
+/* Workqueue worker: */
+void wg_packet_decrypt_worker(struct work_struct *work);
+
+/* send.c APIs: */
+void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
+ bool is_retry);
+void wg_packet_send_handshake_response(struct wg_peer *peer);
+void wg_packet_send_handshake_cookie(struct wg_device *wg,
+ struct sk_buff *initiating_skb,
+ __le32 sender_index);
+void wg_packet_send_keepalive(struct wg_peer *peer);
+void wg_packet_purge_staged_packets(struct wg_peer *peer);
+void wg_packet_send_staged_packets(struct wg_peer *peer);
+/* Workqueue workers: */
+void wg_packet_handshake_send_worker(struct work_struct *work);
+void wg_packet_tx_worker(struct work_struct *work);
+void wg_packet_encrypt_worker(struct work_struct *work);
+
+enum packet_state {
+ PACKET_STATE_UNCRYPTED,
+ PACKET_STATE_CRYPTED,
+ PACKET_STATE_DEAD
+};
+
+struct packet_cb {
+ u64 nonce;
+ struct noise_keypair *keypair;
+ atomic_t state;
+ u32 mtu;
+ u8 ds;
+};
+
+#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
+#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
+
+/* Returns either the correct skb->protocol value, or 0 if invalid. */
+static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
+{
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct iphdr)) <=
+ skb_tail_pointer(skb) &&
+ ip_hdr(skb)->version == 4)
+ return htons(ETH_P_IP);
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
+ skb_tail_pointer(skb) &&
+ ipv6_hdr(skb)->version == 6)
+ return htons(ETH_P_IPV6);
+ return 0;
+}
+
+static inline void wg_reset_packet(struct sk_buff *skb)
+{
+ const int pfmemalloc = skb->pfmemalloc;
+
+ skb_scrub_packet(skb, true);
+ memset(&skb->headers_start, 0,
+ offsetof(struct sk_buff, headers_end) -
+ offsetof(struct sk_buff, headers_start));
+ skb->pfmemalloc = pfmemalloc;
+ skb->queue_mapping = 0;
+ skb->nohdr = 0;
+ skb->peeked = 0;
+ skb->mac_len = 0;
+ skb->dev = NULL;
+#ifdef CONFIG_NET_SCHED
+ skb->tc_index = 0;
+#endif
+ skb_reset_redirect(skb);
+ skb->hdr_len = skb_headroom(skb);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_probe_transport_header(skb);
+ skb_reset_inner_headers(skb);
+}
+
+static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
+{
+ unsigned int cpu = *stored_cpu, cpu_index, i;
+
+ if (unlikely(cpu == nr_cpumask_bits ||
+ !cpumask_test_cpu(cpu, cpu_online_mask))) {
+ cpu_index = id % cpumask_weight(cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
+ for (i = 0; i < cpu_index; ++i)
+ cpu = cpumask_next(cpu, cpu_online_mask);
+ *stored_cpu = cpu;
+ }
+ return cpu;
+}
+
+/* This function is racy, in the sense that next is unlocked, so it could return
+ * the same CPU twice. A race-free version of this would be to instead store an
+ * atomic sequence number, do an increment-and-return, and then iterate through
+ * every possible CPU until we get to that index -- choose_cpu. However that's
+ * a bit slower, and it doesn't seem like this potential race actually
+ * introduces any performance loss, so we live with it.
+ */
+static inline int wg_cpumask_next_online(int *next)
+{
+ int cpu = *next;
+
+ while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
+ cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+ *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+ return cpu;
+}
+
+static inline int wg_queue_enqueue_per_device_and_peer(
+ struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
+ struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
+{
+ int cpu;
+
+ atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
+ /* We first queue this up for the peer ingestion, but the consumer
+ * will wait for the state to change to CRYPTED or DEAD before.
+ */
+ if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
+ return -ENOSPC;
+ /* Then we queue it up in the device queue, which consumes the
+ * packet as soon as it can.
+ */
+ cpu = wg_cpumask_next_online(next_cpu);
+ if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+ return -EPIPE;
+ queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
+ return 0;
+}
+
+static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
+ struct sk_buff *skb,
+ enum packet_state state)
+{
+ /* We take a reference, because as soon as we call atomic_set, the
+ * peer can be freed from below us.
+ */
+ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
+
+ atomic_set_release(&PACKET_CB(skb)->state, state);
+ queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
+ peer->internal_id),
+ peer->device->packet_crypt_wq, &queue->work);
+ wg_peer_put(peer);
+}
+
+static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
+ enum packet_state state)
+{
+ /* We take a reference, because as soon as we call atomic_set, the
+ * peer can be freed from below us.
+ */
+ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
+
+ atomic_set_release(&PACKET_CB(skb)->state, state);
+ napi_schedule(&peer->napi);
+ wg_peer_put(peer);
+}
+
+#ifdef DEBUG
+bool wg_packet_counter_selftest(void);
+#endif
+
+#endif /* _WG_QUEUEING_H */
diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c
new file mode 100644
index 000000000000..3fedd1d21f5e
--- /dev/null
+++ b/drivers/net/wireguard/ratelimiter.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "ratelimiter.h"
+#include <linux/siphash.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+static struct kmem_cache *entry_cache;
+static hsiphash_key_t key;
+static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
+static DEFINE_MUTEX(init_lock);
+static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
+static atomic_t total_entries = ATOMIC_INIT(0);
+static unsigned int max_entries, table_size;
+static void wg_ratelimiter_gc_entries(struct work_struct *);
+static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
+static struct hlist_head *table_v4;
+#if IS_ENABLED(CONFIG_IPV6)
+static struct hlist_head *table_v6;
+#endif
+
+struct ratelimiter_entry {
+ u64 last_time_ns, tokens, ip;
+ void *net;
+ spinlock_t lock;
+ struct hlist_node hash;
+ struct rcu_head rcu;
+};
+
+enum {
+ PACKETS_PER_SECOND = 20,
+ PACKETS_BURSTABLE = 5,
+ PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
+ TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
+};
+
+static void entry_free(struct rcu_head *rcu)
+{
+ kmem_cache_free(entry_cache,
+ container_of(rcu, struct ratelimiter_entry, rcu));
+ atomic_dec(&total_entries);
+}
+
+static void entry_uninit(struct ratelimiter_entry *entry)
+{
+ hlist_del_rcu(&entry->hash);
+ call_rcu(&entry->rcu, entry_free);
+}
+
+/* Calling this function with a NULL work uninits all entries. */
+static void wg_ratelimiter_gc_entries(struct work_struct *work)
+{
+ const u64 now = ktime_get_coarse_boottime_ns();
+ struct ratelimiter_entry *entry;
+ struct hlist_node *temp;
+ unsigned int i;
+
+ for (i = 0; i < table_size; ++i) {
+ spin_lock(&table_lock);
+ hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
+ if (unlikely(!work) ||
+ now - entry->last_time_ns > NSEC_PER_SEC)
+ entry_uninit(entry);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
+ if (unlikely(!work) ||
+ now - entry->last_time_ns > NSEC_PER_SEC)
+ entry_uninit(entry);
+ }
+#endif
+ spin_unlock(&table_lock);
+ if (likely(work))
+ cond_resched();
+ }
+ if (likely(work))
+ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
+}
+
+bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
+{
+ /* We only take the bottom half of the net pointer, so that we can hash
+ * 3 words in the end. This way, siphash's len param fits into the final
+ * u32, and we don't incur an extra round.
+ */
+ const u32 net_word = (unsigned long)net;
+ struct ratelimiter_entry *entry;
+ struct hlist_head *bucket;
+ u64 ip;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ip = (u64 __force)ip_hdr(skb)->saddr;
+ bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
+ (table_size - 1)];
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ /* Only use 64 bits, so as to ratelimit the whole /64. */
+ memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
+ bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
+ (table_size - 1)];
+ }
+#endif
+ else
+ return false;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, bucket, hash) {
+ if (entry->net == net && entry->ip == ip) {
+ u64 now, tokens;
+ bool ret;
+ /* Quasi-inspired by nft_limit.c, but this is actually a
+ * slightly different algorithm. Namely, we incorporate
+ * the burst as part of the maximum tokens, rather than
+ * as part of the rate.
+ */
+ spin_lock(&entry->lock);
+ now = ktime_get_coarse_boottime_ns();
+ tokens = min_t(u64, TOKEN_MAX,
+ entry->tokens + now -
+ entry->last_time_ns);
+ entry->last_time_ns = now;
+ ret = tokens >= PACKET_COST;
+ entry->tokens = ret ? tokens - PACKET_COST : tokens;
+ spin_unlock(&entry->lock);
+ rcu_read_unlock();
+ return ret;
+ }
+ }
+ rcu_read_unlock();
+
+ if (atomic_inc_return(&total_entries) > max_entries)
+ goto err_oom;
+
+ entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
+ if (unlikely(!entry))
+ goto err_oom;
+
+ entry->net = net;
+ entry->ip = ip;
+ INIT_HLIST_NODE(&entry->hash);
+ spin_lock_init(&entry->lock);
+ entry->last_time_ns = ktime_get_coarse_boottime_ns();
+ entry->tokens = TOKEN_MAX - PACKET_COST;
+ spin_lock(&table_lock);
+ hlist_add_head_rcu(&entry->hash, bucket);
+ spin_unlock(&table_lock);
+ return true;
+
+err_oom:
+ atomic_dec(&total_entries);
+ return false;
+}
+
+int wg_ratelimiter_init(void)
+{
+ mutex_lock(&init_lock);
+ if (++init_refcnt != 1)
+ goto out;
+
+ entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
+ if (!entry_cache)
+ goto err;
+
+ /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
+ * but what it shares in common is that it uses a massive hashtable. So,
+ * we borrow their wisdom about good table sizes on different systems
+ * dependent on RAM. This calculation here comes from there.
+ */
+ table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
+ max_t(unsigned long, 16, roundup_pow_of_two(
+ (totalram_pages() << PAGE_SHIFT) /
+ (1U << 14) / sizeof(struct hlist_head)));
+ max_entries = table_size * 8;
+
+ table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
+ if (unlikely(!table_v4))
+ goto err_kmemcache;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
+ if (unlikely(!table_v6)) {
+ kvfree(table_v4);
+ goto err_kmemcache;
+ }
+#endif
+
+ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
+ get_random_bytes(&key, sizeof(key));
+out:
+ mutex_unlock(&init_lock);
+ return 0;
+
+err_kmemcache:
+ kmem_cache_destroy(entry_cache);
+err:
+ --init_refcnt;
+ mutex_unlock(&init_lock);
+ return -ENOMEM;
+}
+
+void wg_ratelimiter_uninit(void)
+{
+ mutex_lock(&init_lock);
+ if (!init_refcnt || --init_refcnt)
+ goto out;
+
+ cancel_delayed_work_sync(&gc_work);
+ wg_ratelimiter_gc_entries(NULL);
+ rcu_barrier();
+ kvfree(table_v4);
+#if IS_ENABLED(CONFIG_IPV6)
+ kvfree(table_v6);
+#endif
+ kmem_cache_destroy(entry_cache);
+out:
+ mutex_unlock(&init_lock);
+}
+
+#include "selftest/ratelimiter.c"
diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h
new file mode 100644
index 000000000000..83067f71ea99
--- /dev/null
+++ b/drivers/net/wireguard/ratelimiter.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_RATELIMITER_H
+#define _WG_RATELIMITER_H
+
+#include <linux/skbuff.h>
+
+int wg_ratelimiter_init(void);
+void wg_ratelimiter_uninit(void);
+bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net);
+
+#ifdef DEBUG
+bool wg_ratelimiter_selftest(void);
+#endif
+
+#endif /* _WG_RATELIMITER_H */
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
new file mode 100644
index 000000000000..7e675f541491
--- /dev/null
+++ b/drivers/net/wireguard/receive.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "device.h"
+#include "peer.h"
+#include "timers.h"
+#include "messages.h"
+#include "cookie.h"
+#include "socket.h"
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <net/ip_tunnels.h>
+
+/* Must be called with bh disabled. */
+static void update_rx_stats(struct wg_peer *peer, size_t len)
+{
+ struct pcpu_sw_netstats *tstats =
+ get_cpu_ptr(peer->device->dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ ++tstats->rx_packets;
+ tstats->rx_bytes += len;
+ peer->rx_bytes += len;
+ u64_stats_update_end(&tstats->syncp);
+ put_cpu_ptr(tstats);
+}
+
+#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
+
+static size_t validate_header_len(struct sk_buff *skb)
+{
+ if (unlikely(skb->len < sizeof(struct message_header)))
+ return 0;
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) &&
+ skb->len >= MESSAGE_MINIMUM_LENGTH)
+ return sizeof(struct message_data);
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) &&
+ skb->len == sizeof(struct message_handshake_initiation))
+ return sizeof(struct message_handshake_initiation);
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) &&
+ skb->len == sizeof(struct message_handshake_response))
+ return sizeof(struct message_handshake_response);
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) &&
+ skb->len == sizeof(struct message_handshake_cookie))
+ return sizeof(struct message_handshake_cookie);
+ return 0;
+}
+
+static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
+{
+ size_t data_offset, data_len, header_len;
+ struct udphdr *udp;
+
+ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
+ skb_transport_header(skb) < skb->head ||
+ (skb_transport_header(skb) + sizeof(struct udphdr)) >
+ skb_tail_pointer(skb)))
+ return -EINVAL; /* Bogus IP header */
+ udp = udp_hdr(skb);
+ data_offset = (u8 *)udp - skb->data;
+ if (unlikely(data_offset > U16_MAX ||
+ data_offset + sizeof(struct udphdr) > skb->len))
+ /* Packet has offset at impossible location or isn't big enough
+ * to have UDP fields.
+ */
+ return -EINVAL;
+ data_len = ntohs(udp->len);
+ if (unlikely(data_len < sizeof(struct udphdr) ||
+ data_len > skb->len - data_offset))
+ /* UDP packet is reporting too small of a size or lying about
+ * its size.
+ */
+ return -EINVAL;
+ data_len -= sizeof(struct udphdr);
+ data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
+ if (unlikely(!pskb_may_pull(skb,
+ data_offset + sizeof(struct message_header)) ||
+ pskb_trim(skb, data_len + data_offset) < 0))
+ return -EINVAL;
+ skb_pull(skb, data_offset);
+ if (unlikely(skb->len != data_len))
+ /* Final len does not agree with calculated len */
+ return -EINVAL;
+ header_len = validate_header_len(skb);
+ if (unlikely(!header_len))
+ return -EINVAL;
+ __skb_push(skb, data_offset);
+ if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
+ return -EINVAL;
+ __skb_pull(skb, data_offset);
+ return 0;
+}
+
+static void wg_receive_handshake_packet(struct wg_device *wg,
+ struct sk_buff *skb)
+{
+ enum cookie_mac_state mac_state;
+ struct wg_peer *peer = NULL;
+ /* This is global, so that our load calculation applies to the whole
+ * system. We don't care about races with it at all.
+ */
+ static u64 last_under_load;
+ bool packet_needs_cookie;
+ bool under_load;
+
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
+ net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n",
+ wg->dev->name, skb);
+ wg_cookie_message_consume(
+ (struct message_handshake_cookie *)skb->data, wg);
+ return;
+ }
+
+ under_load = skb_queue_len(&wg->incoming_handshakes) >=
+ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+ if (under_load)
+ last_under_load = ktime_get_coarse_boottime_ns();
+ else if (last_under_load)
+ under_load = !wg_birthdate_has_expired(last_under_load, 1);
+ mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
+ under_load);
+ if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
+ (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) {
+ packet_needs_cookie = false;
+ } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) {
+ packet_needs_cookie = true;
+ } else {
+ net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ return;
+ }
+
+ switch (SKB_TYPE_LE32(skb)) {
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
+ struct message_handshake_initiation *message =
+ (struct message_handshake_initiation *)skb->data;
+
+ if (packet_needs_cookie) {
+ wg_packet_send_handshake_cookie(wg, skb,
+ message->sender_index);
+ return;
+ }
+ peer = wg_noise_handshake_consume_initiation(message, wg);
+ if (unlikely(!peer)) {
+ net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n",
+ wg->dev->name, skb);
+ return;
+ }
+ wg_socket_set_peer_endpoint_from_skb(peer, skb);
+ net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n",
+ wg->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ wg_packet_send_handshake_response(peer);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
+ struct message_handshake_response *message =
+ (struct message_handshake_response *)skb->data;
+
+ if (packet_needs_cookie) {
+ wg_packet_send_handshake_cookie(wg, skb,
+ message->sender_index);
+ return;
+ }
+ peer = wg_noise_handshake_consume_response(message, wg);
+ if (unlikely(!peer)) {
+ net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n",
+ wg->dev->name, skb);
+ return;
+ }
+ wg_socket_set_peer_endpoint_from_skb(peer, skb);
+ net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n",
+ wg->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ if (wg_noise_handshake_begin_session(&peer->handshake,
+ &peer->keypairs)) {
+ wg_timers_session_derived(peer);
+ wg_timers_handshake_complete(peer);
+ /* Calling this function will either send any existing
+ * packets in the queue and not send a keepalive, which
+ * is the best case, Or, if there's nothing in the
+ * queue, it will send a keepalive, in order to give
+ * immediate confirmation of the session.
+ */
+ wg_packet_send_keepalive(peer);
+ }
+ break;
+ }
+ }
+
+ if (unlikely(!peer)) {
+ WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
+ return;
+ }
+
+ local_bh_disable();
+ update_rx_stats(peer, skb->len);
+ local_bh_enable();
+
+ wg_timers_any_authenticated_packet_received(peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_peer_put(peer);
+}
+
+void wg_packet_handshake_receive_worker(struct work_struct *work)
+{
+ struct wg_device *wg = container_of(work, struct multicore_worker,
+ work)->ptr;
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
+ wg_receive_handshake_packet(wg, skb);
+ dev_kfree_skb(skb);
+ cond_resched();
+ }
+}
+
+static void keep_key_fresh(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair;
+ bool send = false;
+
+ if (peer->sent_lastminute_handshake)
+ return;
+
+ rcu_read_lock_bh();
+ keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
+ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
+ keypair->i_am_the_initiator &&
+ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
+ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
+ send = true;
+ rcu_read_unlock_bh();
+
+ if (send) {
+ peer->sent_lastminute_handshake = true;
+ wg_packet_send_queued_handshake_initiation(peer, false);
+ }
+}
+
+static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
+{
+ struct scatterlist sg[MAX_SKB_FRAGS + 8];
+ struct sk_buff *trailer;
+ unsigned int offset;
+ int num_frags;
+
+ if (unlikely(!key))
+ return false;
+
+ if (unlikely(!READ_ONCE(key->is_valid) ||
+ wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) ||
+ key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
+ WRITE_ONCE(key->is_valid, false);
+ return false;
+ }
+
+ PACKET_CB(skb)->nonce =
+ le64_to_cpu(((struct message_data *)skb->data)->counter);
+
+ /* We ensure that the network header is part of the packet before we
+ * call skb_cow_data, so that there's no chance that data is removed
+ * from the skb, so that later we can extract the original endpoint.
+ */
+ offset = skb->data - skb_network_header(skb);
+ skb_push(skb, offset);
+ num_frags = skb_cow_data(skb, 0, &trailer);
+ offset += sizeof(struct message_data);
+ skb_pull(skb, offset);
+ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
+ return false;
+
+ sg_init_table(sg, num_frags);
+ if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
+ return false;
+
+ if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
+ PACKET_CB(skb)->nonce,
+ key->key))
+ return false;
+
+ /* Another ugly situation of pushing and pulling the header so as to
+ * keep endpoint information intact.
+ */
+ skb_push(skb, offset);
+ if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
+ return false;
+ skb_pull(skb, offset);
+
+ return true;
+}
+
+/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
+static bool counter_validate(union noise_counter *counter, u64 their_counter)
+{
+ unsigned long index, index_current, top, i;
+ bool ret = false;
+
+ spin_lock_bh(&counter->receive.lock);
+
+ if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 ||
+ their_counter >= REJECT_AFTER_MESSAGES))
+ goto out;
+
+ ++their_counter;
+
+ if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
+ counter->receive.counter))
+ goto out;
+
+ index = their_counter >> ilog2(BITS_PER_LONG);
+
+ if (likely(their_counter > counter->receive.counter)) {
+ index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
+ top = min_t(unsigned long, index - index_current,
+ COUNTER_BITS_TOTAL / BITS_PER_LONG);
+ for (i = 1; i <= top; ++i)
+ counter->receive.backtrack[(i + index_current) &
+ ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
+ counter->receive.counter = their_counter;
+ }
+
+ index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
+ ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
+ &counter->receive.backtrack[index]);
+
+out:
+ spin_unlock_bh(&counter->receive.lock);
+ return ret;
+}
+
+#include "selftest/counter.c"
+
+static void wg_packet_consume_data_done(struct wg_peer *peer,
+ struct sk_buff *skb,
+ struct endpoint *endpoint)
+{
+ struct net_device *dev = peer->device->dev;
+ unsigned int len, len_before_trim;
+ struct wg_peer *routed_peer;
+
+ wg_socket_set_peer_endpoint(peer, endpoint);
+
+ if (unlikely(wg_noise_received_with_keypair(&peer->keypairs,
+ PACKET_CB(skb)->keypair))) {
+ wg_timers_handshake_complete(peer);
+ wg_packet_send_staged_packets(peer);
+ }
+
+ keep_key_fresh(peer);
+
+ wg_timers_any_authenticated_packet_received(peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+
+ /* A packet with length 0 is a keepalive packet */
+ if (unlikely(!skb->len)) {
+ update_rx_stats(peer, message_data_len(0));
+ net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ goto packet_processed;
+ }
+
+ wg_timers_data_received(peer);
+
+ if (unlikely(skb_network_header(skb) < skb->head))
+ goto dishonest_packet_size;
+ if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) &&
+ (ip_hdr(skb)->version == 4 ||
+ (ip_hdr(skb)->version == 6 &&
+ pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
+ goto dishonest_packet_type;
+
+ skb->dev = dev;
+ /* We've already verified the Poly1305 auth tag, which means this packet
+ * was not modified in transit. We can therefore tell the networking
+ * stack that all checksums of every layer of encapsulation have already
+ * been checked "by the hardware" and therefore is unneccessary to check
+ * again in software.
+ */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = ~0; /* All levels */
+ skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ len = ntohs(ip_hdr(skb)->tot_len);
+ if (unlikely(len < sizeof(struct iphdr)))
+ goto dishonest_packet_size;
+ if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
+ IP_ECN_set_ce(ip_hdr(skb));
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ len = ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr);
+ if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
+ IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+ } else {
+ goto dishonest_packet_type;
+ }
+
+ if (unlikely(len > skb->len))
+ goto dishonest_packet_size;
+ len_before_trim = skb->len;
+ if (unlikely(pskb_trim(skb, len)))
+ goto packet_processed;
+
+ routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips,
+ skb);
+ wg_peer_put(routed_peer); /* We don't need the extra reference. */
+
+ if (unlikely(routed_peer != peer))
+ goto dishonest_packet_peer;
+
+ if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) {
+ ++dev->stats.rx_dropped;
+ net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ } else {
+ update_rx_stats(peer, message_data_len(len_before_trim));
+ }
+ return;
+
+dishonest_packet_peer:
+ net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
+ dev->name, skb, peer->internal_id,
+ &peer->endpoint.addr);
+ ++dev->stats.rx_errors;
+ ++dev->stats.rx_frame_errors;
+ goto packet_processed;
+dishonest_packet_type:
+ net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id, &peer->endpoint.addr);
+ ++dev->stats.rx_errors;
+ ++dev->stats.rx_frame_errors;
+ goto packet_processed;
+dishonest_packet_size:
+ net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id, &peer->endpoint.addr);
+ ++dev->stats.rx_errors;
+ ++dev->stats.rx_length_errors;
+ goto packet_processed;
+packet_processed:
+ dev_kfree_skb(skb);
+}
+
+int wg_packet_rx_poll(struct napi_struct *napi, int budget)
+{
+ struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
+ struct crypt_queue *queue = &peer->rx_queue;
+ struct noise_keypair *keypair;
+ struct endpoint endpoint;
+ enum packet_state state;
+ struct sk_buff *skb;
+ int work_done = 0;
+ bool free;
+
+ if (unlikely(budget <= 0))
+ return 0;
+
+ while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
+ (state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
+ PACKET_STATE_UNCRYPTED) {
+ __ptr_ring_discard_one(&queue->ring);
+ peer = PACKET_PEER(skb);
+ keypair = PACKET_CB(skb)->keypair;
+ free = true;
+
+ if (unlikely(state != PACKET_STATE_CRYPTED))
+ goto next;
+
+ if (unlikely(!counter_validate(&keypair->receiving.counter,
+ PACKET_CB(skb)->nonce))) {
+ net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
+ peer->device->dev->name,
+ PACKET_CB(skb)->nonce,
+ keypair->receiving.counter.receive.counter);
+ goto next;
+ }
+
+ if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
+ goto next;
+
+ wg_reset_packet(skb);
+ wg_packet_consume_data_done(peer, skb, &endpoint);
+ free = false;
+
+next:
+ wg_noise_keypair_put(keypair, false);
+ wg_peer_put(peer);
+ if (unlikely(free))
+ dev_kfree_skb(skb);
+
+ if (++work_done >= budget)
+ break;
+ }
+
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
+
+ return work_done;
+}
+
+void wg_packet_decrypt_worker(struct work_struct *work)
+{
+ struct crypt_queue *queue = container_of(work, struct multicore_worker,
+ work)->ptr;
+ struct sk_buff *skb;
+
+ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ enum packet_state state = likely(decrypt_packet(skb,
+ &PACKET_CB(skb)->keypair->receiving)) ?
+ PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
+ wg_queue_enqueue_per_peer_napi(skb, state);
+ }
+}
+
+static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
+{
+ __le32 idx = ((struct message_data *)skb->data)->key_idx;
+ struct wg_peer *peer = NULL;
+ int ret;
+
+ rcu_read_lock_bh();
+ PACKET_CB(skb)->keypair =
+ (struct noise_keypair *)wg_index_hashtable_lookup(
+ wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx,
+ &peer);
+ if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair)))
+ goto err_keypair;
+
+ if (unlikely(READ_ONCE(peer->is_dead)))
+ goto err;
+
+ ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
+ &peer->rx_queue, skb,
+ wg->packet_crypt_wq,
+ &wg->decrypt_queue.last_cpu);
+ if (unlikely(ret == -EPIPE))
+ wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
+ if (likely(!ret || ret == -EPIPE)) {
+ rcu_read_unlock_bh();
+ return;
+ }
+err:
+ wg_noise_keypair_put(PACKET_CB(skb)->keypair, false);
+err_keypair:
+ rcu_read_unlock_bh();
+ wg_peer_put(peer);
+ dev_kfree_skb(skb);
+}
+
+void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
+{
+ if (unlikely(prepare_skb_header(skb, wg) < 0))
+ goto err;
+ switch (SKB_TYPE_LE32(skb)) {
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
+ int cpu;
+
+ if (skb_queue_len(&wg->incoming_handshakes) >
+ MAX_QUEUED_INCOMING_HANDSHAKES ||
+ unlikely(!rng_is_initialized())) {
+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
+ skb_queue_tail(&wg->incoming_handshakes, skb);
+ /* Queues up a call to packet_process_queued_handshake_
+ * packets(skb):
+ */
+ cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
+ queue_work_on(cpu, wg->handshake_receive_wq,
+ &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_DATA):
+ PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+ wg_packet_consume_data(wg, skb);
+ break;
+ default:
+ net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
+ return;
+
+err:
+ dev_kfree_skb(skb);
+}
diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
new file mode 100644
index 000000000000..846db14cb046
--- /dev/null
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This contains some basic static unit tests for the allowedips data structure.
+ * It also has two additional modes that are disabled and meant to be used by
+ * folks directly playing with this file. If you define the macro
+ * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in
+ * memory, it will be printed out as KERN_DEBUG in a format that can be passed
+ * to graphviz (the dot command) to visualize it. If you define the macro
+ * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of
+ * randomized tests done against a trivial implementation, which may take
+ * upwards of a half-hour to complete. There's no set of users who should be
+ * enabling these, and the only developers that should go anywhere near these
+ * nobs are the ones who are reading this comment.
+ */
+
+#ifdef DEBUG
+
+#include <linux/siphash.h>
+
+static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
+ u8 cidr)
+{
+ swap_endian(dst, src, bits);
+ memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
+ if (cidr)
+ dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
+}
+
+static __init void print_node(struct allowedips_node *node, u8 bits)
+{
+ char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
+ char *fmt_declaration = KERN_DEBUG
+ "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+ char *style = "dotted";
+ u8 ip1[16], ip2[16];
+ u32 color = 0;
+
+ if (bits == 32) {
+ fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
+ fmt_declaration = KERN_DEBUG
+ "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
+ } else if (bits == 128) {
+ fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
+ fmt_declaration = KERN_DEBUG
+ "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
+ }
+ if (node->peer) {
+ hsiphash_key_t key = { { 0 } };
+
+ memcpy(&key, &node->peer, sizeof(node->peer));
+ color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 |
+ hsiphash_1u32(0xbabecafe, &key) % 200 << 8 |
+ hsiphash_1u32(0xabad1dea, &key) % 200;
+ style = "bold";
+ }
+ swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
+ printk(fmt_declaration, ip1, node->cidr, style, color);
+ if (node->bit[0]) {
+ swap_endian_and_apply_cidr(ip2,
+ rcu_dereference_raw(node->bit[0])->bits, bits,
+ node->cidr);
+ printk(fmt_connection, ip1, node->cidr, ip2,
+ rcu_dereference_raw(node->bit[0])->cidr);
+ print_node(rcu_dereference_raw(node->bit[0]), bits);
+ }
+ if (node->bit[1]) {
+ swap_endian_and_apply_cidr(ip2,
+ rcu_dereference_raw(node->bit[1])->bits,
+ bits, node->cidr);
+ printk(fmt_connection, ip1, node->cidr, ip2,
+ rcu_dereference_raw(node->bit[1])->cidr);
+ print_node(rcu_dereference_raw(node->bit[1]), bits);
+ }
+}
+
+static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
+{
+ printk(KERN_DEBUG "digraph trie {\n");
+ print_node(rcu_dereference_raw(top), bits);
+ printk(KERN_DEBUG "}\n");
+}
+
+enum {
+ NUM_PEERS = 2000,
+ NUM_RAND_ROUTES = 400,
+ NUM_MUTATED_ROUTES = 100,
+ NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30
+};
+
+struct horrible_allowedips {
+ struct hlist_head head;
+};
+
+struct horrible_allowedips_node {
+ struct hlist_node table;
+ union nf_inet_addr ip;
+ union nf_inet_addr mask;
+ u8 ip_version;
+ void *value;
+};
+
+static __init void horrible_allowedips_init(struct horrible_allowedips *table)
+{
+ INIT_HLIST_HEAD(&table->head);
+}
+
+static __init void horrible_allowedips_free(struct horrible_allowedips *table)
+{
+ struct horrible_allowedips_node *node;
+ struct hlist_node *h;
+
+ hlist_for_each_entry_safe(node, h, &table->head, table) {
+ hlist_del(&node->table);
+ kfree(node);
+ }
+}
+
+static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
+{
+ union nf_inet_addr mask;
+
+ memset(&mask, 0x00, 128 / 8);
+ memset(&mask, 0xff, cidr / 8);
+ if (cidr % 32)
+ mask.all[cidr / 32] = (__force u32)htonl(
+ (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
+ return mask;
+}
+
+static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet)
+{
+ return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) +
+ hweight32(subnet.all[2]) + hweight32(subnet.all[3]);
+}
+
+static __init inline void
+horrible_mask_self(struct horrible_allowedips_node *node)
+{
+ if (node->ip_version == 4) {
+ node->ip.ip &= node->mask.ip;
+ } else if (node->ip_version == 6) {
+ node->ip.ip6[0] &= node->mask.ip6[0];
+ node->ip.ip6[1] &= node->mask.ip6[1];
+ node->ip.ip6[2] &= node->mask.ip6[2];
+ node->ip.ip6[3] &= node->mask.ip6[3];
+ }
+}
+
+static __init inline bool
+horrible_match_v4(const struct horrible_allowedips_node *node,
+ struct in_addr *ip)
+{
+ return (ip->s_addr & node->mask.ip) == node->ip.ip;
+}
+
+static __init inline bool
+horrible_match_v6(const struct horrible_allowedips_node *node,
+ struct in6_addr *ip)
+{
+ return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
+ node->ip.ip6[0] &&
+ (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
+ node->ip.ip6[1] &&
+ (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
+ node->ip.ip6[2] &&
+ (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
+}
+
+static __init void
+horrible_insert_ordered(struct horrible_allowedips *table,
+ struct horrible_allowedips_node *node)
+{
+ struct horrible_allowedips_node *other = NULL, *where = NULL;
+ u8 my_cidr = horrible_mask_to_cidr(node->mask);
+
+ hlist_for_each_entry(other, &table->head, table) {
+ if (!memcmp(&other->mask, &node->mask,
+ sizeof(union nf_inet_addr)) &&
+ !memcmp(&other->ip, &node->ip,
+ sizeof(union nf_inet_addr)) &&
+ other->ip_version == node->ip_version) {
+ other->value = node->value;
+ kfree(node);
+ return;
+ }
+ where = other;
+ if (horrible_mask_to_cidr(other->mask) <= my_cidr)
+ break;
+ }
+ if (!other && !where)
+ hlist_add_head(&node->table, &table->head);
+ else if (!other)
+ hlist_add_behind(&node->table, &where->table);
+ else
+ hlist_add_before(&node->table, &where->table);
+}
+
+static __init int
+horrible_allowedips_insert_v4(struct horrible_allowedips *table,
+ struct in_addr *ip, u8 cidr, void *value)
+{
+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
+ GFP_KERNEL);
+
+ if (unlikely(!node))
+ return -ENOMEM;
+ node->ip.in = *ip;
+ node->mask = horrible_cidr_to_mask(cidr);
+ node->ip_version = 4;
+ node->value = value;
+ horrible_mask_self(node);
+ horrible_insert_ordered(table, node);
+ return 0;
+}
+
+static __init int
+horrible_allowedips_insert_v6(struct horrible_allowedips *table,
+ struct in6_addr *ip, u8 cidr, void *value)
+{
+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
+ GFP_KERNEL);
+
+ if (unlikely(!node))
+ return -ENOMEM;
+ node->ip.in6 = *ip;
+ node->mask = horrible_cidr_to_mask(cidr);
+ node->ip_version = 6;
+ node->value = value;
+ horrible_mask_self(node);
+ horrible_insert_ordered(table, node);
+ return 0;
+}
+
+static __init void *
+horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
+ struct in_addr *ip)
+{
+ struct horrible_allowedips_node *node;
+ void *ret = NULL;
+
+ hlist_for_each_entry(node, &table->head, table) {
+ if (node->ip_version != 4)
+ continue;
+ if (horrible_match_v4(node, ip)) {
+ ret = node->value;
+ break;
+ }
+ }
+ return ret;
+}
+
+static __init void *
+horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
+ struct in6_addr *ip)
+{
+ struct horrible_allowedips_node *node;
+ void *ret = NULL;
+
+ hlist_for_each_entry(node, &table->head, table) {
+ if (node->ip_version != 6)
+ continue;
+ if (horrible_match_v6(node, ip)) {
+ ret = node->value;
+ break;
+ }
+ }
+ return ret;
+}
+
+static __init bool randomized_test(void)
+{
+ unsigned int i, j, k, mutate_amount, cidr;
+ u8 ip[16], mutate_mask[16], mutated[16];
+ struct wg_peer **peers, *peer;
+ struct horrible_allowedips h;
+ DEFINE_MUTEX(mutex);
+ struct allowedips t;
+ bool ret = false;
+
+ mutex_init(&mutex);
+
+ wg_allowedips_init(&t);
+ horrible_allowedips_init(&h);
+
+ peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL);
+ if (unlikely(!peers)) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free;
+ }
+ for (i = 0; i < NUM_PEERS; ++i) {
+ peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL);
+ if (unlikely(!peers[i])) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free;
+ }
+ kref_init(&peers[i]->refcount);
+ }
+
+ mutex_lock(&mutex);
+
+ for (i = 0; i < NUM_RAND_ROUTES; ++i) {
+ prandom_bytes(ip, 4);
+ cidr = prandom_u32_max(32) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr,
+ peer, &mutex) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip,
+ cidr, peer) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
+ memcpy(mutated, ip, 4);
+ prandom_bytes(mutate_mask, 4);
+ mutate_amount = prandom_u32_max(32);
+ for (k = 0; k < mutate_amount / 8; ++k)
+ mutate_mask[k] = 0xff;
+ mutate_mask[k] = 0xff
+ << ((8 - (mutate_amount % 8)) % 8);
+ for (; k < 4; ++k)
+ mutate_mask[k] = 0;
+ for (k = 0; k < 4; ++k)
+ mutated[k] = (mutated[k] & mutate_mask[k]) |
+ (~mutate_mask[k] &
+ prandom_u32_max(256));
+ cidr = prandom_u32_max(32) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v4(&t,
+ (struct in_addr *)mutated,
+ cidr, peer, &mutex) < 0) {
+ pr_err("allowedips random malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v4(&h,
+ (struct in_addr *)mutated, cidr, peer)) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ }
+ }
+
+ for (i = 0; i < NUM_RAND_ROUTES; ++i) {
+ prandom_bytes(ip, 16);
+ cidr = prandom_u32_max(128) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr,
+ peer, &mutex) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip,
+ cidr, peer) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
+ memcpy(mutated, ip, 16);
+ prandom_bytes(mutate_mask, 16);
+ mutate_amount = prandom_u32_max(128);
+ for (k = 0; k < mutate_amount / 8; ++k)
+ mutate_mask[k] = 0xff;
+ mutate_mask[k] = 0xff
+ << ((8 - (mutate_amount % 8)) % 8);
+ for (; k < 4; ++k)
+ mutate_mask[k] = 0;
+ for (k = 0; k < 4; ++k)
+ mutated[k] = (mutated[k] & mutate_mask[k]) |
+ (~mutate_mask[k] &
+ prandom_u32_max(256));
+ cidr = prandom_u32_max(128) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v6(&t,
+ (struct in6_addr *)mutated,
+ cidr, peer, &mutex) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v6(
+ &h, (struct in6_addr *)mutated, cidr,
+ peer)) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ }
+ }
+
+ mutex_unlock(&mutex);
+
+ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
+ print_tree(t.root4, 32);
+ print_tree(t.root6, 128);
+ }
+
+ for (i = 0; i < NUM_QUERIES; ++i) {
+ prandom_bytes(ip, 4);
+ if (lookup(t.root4, 32, ip) !=
+ horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
+ pr_err("allowedips random self-test: FAIL\n");
+ goto free;
+ }
+ }
+
+ for (i = 0; i < NUM_QUERIES; ++i) {
+ prandom_bytes(ip, 16);
+ if (lookup(t.root6, 128, ip) !=
+ horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
+ pr_err("allowedips random self-test: FAIL\n");
+ goto free;
+ }
+ }
+ ret = true;
+
+free:
+ mutex_lock(&mutex);
+free_locked:
+ wg_allowedips_free(&t, &mutex);
+ mutex_unlock(&mutex);
+ horrible_allowedips_free(&h);
+ if (peers) {
+ for (i = 0; i < NUM_PEERS; ++i)
+ kfree(peers[i]);
+ }
+ kfree(peers);
+ return ret;
+}
+
+static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
+{
+ static struct in_addr ip;
+ u8 *split = (u8 *)&ip;
+
+ split[0] = a;
+ split[1] = b;
+ split[2] = c;
+ split[3] = d;
+ return &ip;
+}
+
+static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
+{
+ static struct in6_addr ip;
+ __be32 *split = (__be32 *)&ip;
+
+ split[0] = cpu_to_be32(a);
+ split[1] = cpu_to_be32(b);
+ split[2] = cpu_to_be32(c);
+ split[3] = cpu_to_be32(d);
+ return &ip;
+}
+
+static __init struct wg_peer *init_peer(void)
+{
+ struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+
+ if (!peer)
+ return NULL;
+ kref_init(&peer->refcount);
+ INIT_LIST_HEAD(&peer->allowedips_list);
+ return peer;
+}
+
+#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
+ wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \
+ cidr, mem, &mutex)
+
+#define maybe_fail() do { \
+ ++i; \
+ if (!_s) { \
+ pr_info("allowedips self-test %zu: FAIL\n", i); \
+ success = false; \
+ } \
+ } while (0)
+
+#define test(version, mem, ipa, ipb, ipc, ipd) do { \
+ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
+ ip##version(ipa, ipb, ipc, ipd)) == (mem); \
+ maybe_fail(); \
+ } while (0)
+
+#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
+ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
+ ip##version(ipa, ipb, ipc, ipd)) != (mem); \
+ maybe_fail(); \
+ } while (0)
+
+#define test_boolean(cond) do { \
+ bool _s = (cond); \
+ maybe_fail(); \
+ } while (0)
+
+bool __init wg_allowedips_selftest(void)
+{
+ bool found_a = false, found_b = false, found_c = false, found_d = false,
+ found_e = false, found_other = false;
+ struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(),
+ *d = init_peer(), *e = init_peer(), *f = init_peer(),
+ *g = init_peer(), *h = init_peer();
+ struct allowedips_node *iter_node;
+ bool success = false;
+ struct allowedips t;
+ DEFINE_MUTEX(mutex);
+ struct in6_addr ip;
+ size_t i = 0, count = 0;
+ __be64 part;
+
+ mutex_init(&mutex);
+ mutex_lock(&mutex);
+ wg_allowedips_init(&t);
+
+ if (!a || !b || !c || !d || !e || !f || !g || !h) {
+ pr_err("allowedips self-test malloc: FAIL\n");
+ goto free;
+ }
+
+ insert(4, a, 192, 168, 4, 0, 24);
+ insert(4, b, 192, 168, 4, 4, 32);
+ insert(4, c, 192, 168, 0, 0, 16);
+ insert(4, d, 192, 95, 5, 64, 27);
+ /* replaces previous entry, and maskself is required */
+ insert(4, c, 192, 95, 5, 65, 27);
+ insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
+ insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
+ insert(4, e, 0, 0, 0, 0, 0);
+ insert(6, e, 0, 0, 0, 0, 0);
+ /* replaces previous entry */
+ insert(6, f, 0, 0, 0, 0, 0);
+ insert(6, g, 0x24046800, 0, 0, 0, 32);
+ /* maskself is required */
+ insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64);
+ insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
+ insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
+ insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
+ insert(4, g, 64, 15, 112, 0, 20);
+ /* maskself is required */
+ insert(4, h, 64, 15, 123, 211, 25);
+ insert(4, a, 10, 0, 0, 0, 25);
+ insert(4, b, 10, 0, 0, 128, 25);
+ insert(4, a, 10, 1, 0, 0, 30);
+ insert(4, b, 10, 1, 0, 4, 30);
+ insert(4, c, 10, 1, 0, 8, 29);
+ insert(4, d, 10, 1, 0, 16, 29);
+
+ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
+ print_tree(t.root4, 32);
+ print_tree(t.root6, 128);
+ }
+
+ success = true;
+
+ test(4, a, 192, 168, 4, 20);
+ test(4, a, 192, 168, 4, 0);
+ test(4, b, 192, 168, 4, 4);
+ test(4, c, 192, 168, 200, 182);
+ test(4, c, 192, 95, 5, 68);
+ test(4, e, 192, 95, 5, 96);
+ test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
+ test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
+ test(6, f, 0x26075300, 0x60006b01, 0, 0);
+ test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
+ test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
+ test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
+ test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
+ test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
+ test(6, h, 0x24046800, 0x40040800, 0, 0);
+ test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
+ test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
+ test(4, g, 64, 15, 116, 26);
+ test(4, g, 64, 15, 127, 3);
+ test(4, g, 64, 15, 123, 1);
+ test(4, h, 64, 15, 123, 128);
+ test(4, h, 64, 15, 123, 129);
+ test(4, a, 10, 0, 0, 52);
+ test(4, b, 10, 0, 0, 220);
+ test(4, a, 10, 1, 0, 2);
+ test(4, b, 10, 1, 0, 6);
+ test(4, c, 10, 1, 0, 10);
+ test(4, d, 10, 1, 0, 20);
+
+ insert(4, a, 1, 0, 0, 0, 32);
+ insert(4, a, 64, 0, 0, 0, 32);
+ insert(4, a, 128, 0, 0, 0, 32);
+ insert(4, a, 192, 0, 0, 0, 32);
+ insert(4, a, 255, 0, 0, 0, 32);
+ wg_allowedips_remove_by_peer(&t, a, &mutex);
+ test_negative(4, a, 1, 0, 0, 0);
+ test_negative(4, a, 64, 0, 0, 0);
+ test_negative(4, a, 128, 0, 0, 0);
+ test_negative(4, a, 192, 0, 0, 0);
+ test_negative(4, a, 255, 0, 0, 0);
+
+ wg_allowedips_free(&t, &mutex);
+ wg_allowedips_init(&t);
+ insert(4, a, 192, 168, 0, 0, 16);
+ insert(4, a, 192, 168, 0, 0, 24);
+ wg_allowedips_remove_by_peer(&t, a, &mutex);
+ test_negative(4, a, 192, 168, 0, 1);
+
+ /* These will hit the WARN_ON(len >= 128) in free_node if something
+ * goes wrong.
+ */
+ for (i = 0; i < 128; ++i) {
+ part = cpu_to_be64(~(1LLU << (i % 64)));
+ memset(&ip, 0xff, 16);
+ memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+ }
+
+ wg_allowedips_free(&t, &mutex);
+
+ wg_allowedips_init(&t);
+ insert(4, a, 192, 95, 5, 93, 27);
+ insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
+ insert(4, a, 10, 1, 0, 20, 29);
+ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83);
+ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21);
+ list_for_each_entry(iter_node, &a->allowedips_list, peer_list) {
+ u8 cidr, ip[16] __aligned(__alignof(u64));
+ int family = wg_allowedips_read_node(iter_node, ip, &cidr);
+
+ count++;
+
+ if (cidr == 27 && family == AF_INET &&
+ !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr)))
+ found_a = true;
+ else if (cidr == 128 && family == AF_INET6 &&
+ !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543),
+ sizeof(struct in6_addr)))
+ found_b = true;
+ else if (cidr == 29 && family == AF_INET &&
+ !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr)))
+ found_c = true;
+ else if (cidr == 83 && family == AF_INET6 &&
+ !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0),
+ sizeof(struct in6_addr)))
+ found_d = true;
+ else if (cidr == 21 && family == AF_INET6 &&
+ !memcmp(ip, ip6(0x26075000, 0, 0, 0),
+ sizeof(struct in6_addr)))
+ found_e = true;
+ else
+ found_other = true;
+ }
+ test_boolean(count == 5);
+ test_boolean(found_a);
+ test_boolean(found_b);
+ test_boolean(found_c);
+ test_boolean(found_d);
+ test_boolean(found_e);
+ test_boolean(!found_other);
+
+ if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success)
+ success = randomized_test();
+
+ if (success)
+ pr_info("allowedips self-tests: pass\n");
+
+free:
+ wg_allowedips_free(&t, &mutex);
+ kfree(a);
+ kfree(b);
+ kfree(c);
+ kfree(d);
+ kfree(e);
+ kfree(f);
+ kfree(g);
+ kfree(h);
+ mutex_unlock(&mutex);
+
+ return success;
+}
+
+#undef test_negative
+#undef test
+#undef remove
+#undef insert
+#undef init_peer
+
+#endif
diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c
new file mode 100644
index 000000000000..f4fbb9072ed7
--- /dev/null
+++ b/drivers/net/wireguard/selftest/counter.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+bool __init wg_packet_counter_selftest(void)
+{
+ unsigned int test_num = 0, i;
+ union noise_counter counter;
+ bool success = true;
+
+#define T_INIT do { \
+ memset(&counter, 0, sizeof(union noise_counter)); \
+ spin_lock_init(&counter.receive.lock); \
+ } while (0)
+#define T_LIM (COUNTER_WINDOW_SIZE + 1)
+#define T(n, v) do { \
+ ++test_num; \
+ if (counter_validate(&counter, n) != (v)) { \
+ pr_err("nonce counter self-test %u: FAIL\n", \
+ test_num); \
+ success = false; \
+ } \
+ } while (0)
+
+ T_INIT;
+ /* 1 */ T(0, true);
+ /* 2 */ T(1, true);
+ /* 3 */ T(1, false);
+ /* 4 */ T(9, true);
+ /* 5 */ T(8, true);
+ /* 6 */ T(7, true);
+ /* 7 */ T(7, false);
+ /* 8 */ T(T_LIM, true);
+ /* 9 */ T(T_LIM - 1, true);
+ /* 10 */ T(T_LIM - 1, false);
+ /* 11 */ T(T_LIM - 2, true);
+ /* 12 */ T(2, true);
+ /* 13 */ T(2, false);
+ /* 14 */ T(T_LIM + 16, true);
+ /* 15 */ T(3, false);
+ /* 16 */ T(T_LIM + 16, false);
+ /* 17 */ T(T_LIM * 4, true);
+ /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
+ /* 19 */ T(10, false);
+ /* 20 */ T(T_LIM * 4 - T_LIM, false);
+ /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
+ /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
+ /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
+ /* 24 */ T(0, false);
+ /* 25 */ T(REJECT_AFTER_MESSAGES, false);
+ /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
+ /* 27 */ T(REJECT_AFTER_MESSAGES, false);
+ /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
+ /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
+ /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
+ /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
+ /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
+ /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
+ /* 34 */ T(0, false);
+
+ T_INIT;
+ for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
+ T(i, true);
+ T(0, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
+ T(i, true);
+ T(1, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;)
+ T(i, true);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;)
+ T(i, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
+ T(i, true);
+ T(COUNTER_WINDOW_SIZE + 1, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
+ T(i, true);
+ T(0, true);
+ T(COUNTER_WINDOW_SIZE + 1, true);
+
+#undef T
+#undef T_LIM
+#undef T_INIT
+
+ if (success)
+ pr_info("nonce counter self-tests: pass\n");
+ return success;
+}
+#endif
diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
new file mode 100644
index 000000000000..bcd6462e4540
--- /dev/null
+++ b/drivers/net/wireguard/selftest/ratelimiter.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+
+#include <linux/jiffies.h>
+
+static const struct {
+ bool result;
+ unsigned int msec_to_sleep_before;
+} expected_results[] __initconst = {
+ [0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
+ [PACKETS_BURSTABLE] = { false, 0 },
+ [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
+ [PACKETS_BURSTABLE + 2] = { false, 0 },
+ [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
+ [PACKETS_BURSTABLE + 4] = { true, 0 },
+ [PACKETS_BURSTABLE + 5] = { false, 0 }
+};
+
+static __init unsigned int maximum_jiffies_at_index(int index)
+{
+ unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
+ int i;
+
+ for (i = 0; i <= index; ++i)
+ total_msecs += expected_results[i].msec_to_sleep_before;
+ return msecs_to_jiffies(total_msecs);
+}
+
+static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
+ struct sk_buff *skb6, struct ipv6hdr *hdr6,
+ int *test)
+{
+ unsigned long loop_start_time;
+ int i;
+
+ wg_ratelimiter_gc_entries(NULL);
+ rcu_barrier();
+ loop_start_time = jiffies;
+
+ for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
+ if (expected_results[i].msec_to_sleep_before)
+ msleep(expected_results[i].msec_to_sleep_before);
+
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (wg_ratelimiter_allow(skb4, &init_net) !=
+ expected_results[i].result)
+ return -EXFULL;
+ ++(*test);
+
+ hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (!wg_ratelimiter_allow(skb4, &init_net))
+ return -EXFULL;
+ ++(*test);
+
+ hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ hdr6->saddr.in6_u.u6_addr32[2] = htonl(i);
+ hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (wg_ratelimiter_allow(skb6, &init_net) !=
+ expected_results[i].result)
+ return -EXFULL;
+ ++(*test);
+
+ hdr6->saddr.in6_u.u6_addr32[0] =
+ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (!wg_ratelimiter_allow(skb6, &init_net))
+ return -EXFULL;
+ ++(*test);
+
+ hdr6->saddr.in6_u.u6_addr32[0] =
+ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
+
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+#endif
+ }
+ return 0;
+}
+
+static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4,
+ int *test)
+{
+ int i;
+
+ wg_ratelimiter_gc_entries(NULL);
+ rcu_barrier();
+
+ if (atomic_read(&total_entries))
+ return -EXFULL;
+ ++(*test);
+
+ for (i = 0; i <= max_entries; ++i) {
+ hdr4->saddr = htonl(i);
+ if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries))
+ return -EXFULL;
+ ++(*test);
+ }
+ return 0;
+}
+
+bool __init wg_ratelimiter_selftest(void)
+{
+ enum { TRIALS_BEFORE_GIVING_UP = 5000 };
+ bool success = false;
+ int test = 0, trials;
+ struct sk_buff *skb4, *skb6;
+ struct iphdr *hdr4;
+ struct ipv6hdr *hdr6;
+
+ if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
+ return true;
+
+ BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
+
+ if (wg_ratelimiter_init())
+ goto out;
+ ++test;
+ if (wg_ratelimiter_init()) {
+ wg_ratelimiter_uninit();
+ goto out;
+ }
+ ++test;
+ if (wg_ratelimiter_init()) {
+ wg_ratelimiter_uninit();
+ wg_ratelimiter_uninit();
+ goto out;
+ }
+ ++test;
+
+ skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
+ if (unlikely(!skb4))
+ goto err_nofree;
+ skb4->protocol = htons(ETH_P_IP);
+ hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4));
+ hdr4->saddr = htonl(8182);
+ skb_reset_network_header(skb4);
+ ++test;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
+ if (unlikely(!skb6)) {
+ kfree_skb(skb4);
+ goto err_nofree;
+ }
+ skb6->protocol = htons(ETH_P_IPV6);
+ hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6));
+ hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
+ hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
+ skb_reset_network_header(skb6);
+ ++test;
+#endif
+
+ for (trials = TRIALS_BEFORE_GIVING_UP;;) {
+ int test_count = 0, ret;
+
+ ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
+ if (ret == -ETIMEDOUT) {
+ if (!trials--) {
+ test += test_count;
+ goto err;
+ }
+ msleep(500);
+ continue;
+ } else if (ret < 0) {
+ test += test_count;
+ goto err;
+ } else {
+ test += test_count;
+ break;
+ }
+ }
+
+ for (trials = TRIALS_BEFORE_GIVING_UP;;) {
+ int test_count = 0;
+
+ if (capacity_test(skb4, hdr4, &test_count) < 0) {
+ if (!trials--) {
+ test += test_count;
+ goto err;
+ }
+ msleep(50);
+ continue;
+ }
+ test += test_count;
+ break;
+ }
+
+ success = true;
+
+err:
+ kfree_skb(skb4);
+#if IS_ENABLED(CONFIG_IPV6)
+ kfree_skb(skb6);
+#endif
+err_nofree:
+ wg_ratelimiter_uninit();
+ wg_ratelimiter_uninit();
+ wg_ratelimiter_uninit();
+ /* Uninit one extra time to check underflow detection. */
+ wg_ratelimiter_uninit();
+out:
+ if (success)
+ pr_info("ratelimiter self-tests: pass\n");
+ else
+ pr_err("ratelimiter self-test %d: FAIL\n", test);
+
+ return success;
+}
+#endif
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
new file mode 100644
index 000000000000..c13260563446
--- /dev/null
+++ b/drivers/net/wireguard/send.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "timers.h"
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "messages.h"
+#include "cookie.h"
+
+#include <linux/uio.h>
+#include <linux/inetdevice.h>
+#include <linux/socket.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+#include <net/sock.h>
+
+static void wg_packet_send_handshake_initiation(struct wg_peer *peer)
+{
+ struct message_handshake_initiation packet;
+
+ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
+ REKEY_TIMEOUT))
+ return; /* This function is rate limited. */
+
+ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
+ net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+
+ if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) {
+ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_timers_any_authenticated_packet_sent(peer);
+ atomic64_set(&peer->last_sent_handshake,
+ ktime_get_coarse_boottime_ns());
+ wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet),
+ HANDSHAKE_DSCP);
+ wg_timers_handshake_initiated(peer);
+ }
+}
+
+void wg_packet_handshake_send_worker(struct work_struct *work)
+{
+ struct wg_peer *peer = container_of(work, struct wg_peer,
+ transmit_handshake_work);
+
+ wg_packet_send_handshake_initiation(peer);
+ wg_peer_put(peer);
+}
+
+void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
+ bool is_retry)
+{
+ if (!is_retry)
+ peer->timer_handshake_attempts = 0;
+
+ rcu_read_lock_bh();
+ /* We check last_sent_handshake here in addition to the actual function
+ * we're queueing up, so that we don't queue things if not strictly
+ * necessary:
+ */
+ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
+ REKEY_TIMEOUT) ||
+ unlikely(READ_ONCE(peer->is_dead)))
+ goto out;
+
+ wg_peer_get(peer);
+ /* Queues up calling packet_send_queued_handshakes(peer), where we do a
+ * peer_put(peer) after:
+ */
+ if (!queue_work(peer->device->handshake_send_wq,
+ &peer->transmit_handshake_work))
+ /* If the work was already queued, we want to drop the
+ * extra reference:
+ */
+ wg_peer_put(peer);
+out:
+ rcu_read_unlock_bh();
+}
+
+void wg_packet_send_handshake_response(struct wg_peer *peer)
+{
+ struct message_handshake_response packet;
+
+ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
+ net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+
+ if (wg_noise_handshake_create_response(&packet, &peer->handshake)) {
+ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
+ if (wg_noise_handshake_begin_session(&peer->handshake,
+ &peer->keypairs)) {
+ wg_timers_session_derived(peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_timers_any_authenticated_packet_sent(peer);
+ atomic64_set(&peer->last_sent_handshake,
+ ktime_get_coarse_boottime_ns());
+ wg_socket_send_buffer_to_peer(peer, &packet,
+ sizeof(packet),
+ HANDSHAKE_DSCP);
+ }
+ }
+}
+
+void wg_packet_send_handshake_cookie(struct wg_device *wg,
+ struct sk_buff *initiating_skb,
+ __le32 sender_index)
+{
+ struct message_handshake_cookie packet;
+
+ net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n",
+ wg->dev->name, initiating_skb);
+ wg_cookie_message_create(&packet, initiating_skb, sender_index,
+ &wg->cookie_checker);
+ wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet,
+ sizeof(packet));
+}
+
+static void keep_key_fresh(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair;
+ bool send = false;
+
+ rcu_read_lock_bh();
+ keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
+ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
+ (unlikely(atomic64_read(&keypair->sending.counter.counter) >
+ REKEY_AFTER_MESSAGES) ||
+ (keypair->i_am_the_initiator &&
+ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
+ REKEY_AFTER_TIME)))))
+ send = true;
+ rcu_read_unlock_bh();
+
+ if (send)
+ wg_packet_send_queued_handshake_initiation(peer, false);
+}
+
+static unsigned int calculate_skb_padding(struct sk_buff *skb)
+{
+ /* We do this modulo business with the MTU, just in case the networking
+ * layer gives us a packet that's bigger than the MTU. In that case, we
+ * wouldn't want the final subtraction to overflow in the case of the
+ * padded_size being clamped.
+ */
+ unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
+ unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
+
+ if (padded_size > PACKET_CB(skb)->mtu)
+ padded_size = PACKET_CB(skb)->mtu;
+ return padded_size - last_unit;
+}
+
+static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
+{
+ unsigned int padding_len, plaintext_len, trailer_len;
+ struct scatterlist sg[MAX_SKB_FRAGS + 8];
+ struct message_data *header;
+ struct sk_buff *trailer;
+ int num_frags;
+
+ /* Calculate lengths. */
+ padding_len = calculate_skb_padding(skb);
+ trailer_len = padding_len + noise_encrypted_len(0);
+ plaintext_len = skb->len + padding_len;
+
+ /* Expand data section to have room for padding and auth tag. */
+ num_frags = skb_cow_data(skb, trailer_len, &trailer);
+ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
+ return false;
+
+ /* Set the padding to zeros, and make sure it and the auth tag are part
+ * of the skb.
+ */
+ memset(skb_tail_pointer(trailer), 0, padding_len);
+
+ /* Expand head section to have room for our header and the network
+ * stack's headers.
+ */
+ if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
+ return false;
+
+ /* Finalize checksum calculation for the inner packet, if required. */
+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb)))
+ return false;
+
+ /* Only after checksumming can we safely add on the padding at the end
+ * and the header.
+ */
+ skb_set_inner_network_header(skb, 0);
+ header = (struct message_data *)skb_push(skb, sizeof(*header));
+ header->header.type = cpu_to_le32(MESSAGE_DATA);
+ header->key_idx = keypair->remote_index;
+ header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
+ pskb_put(skb, trailer, trailer_len);
+
+ /* Now we can encrypt the scattergather segments */
+ sg_init_table(sg, num_frags);
+ if (skb_to_sgvec(skb, sg, sizeof(struct message_data),
+ noise_encrypted_len(plaintext_len)) <= 0)
+ return false;
+ return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0,
+ PACKET_CB(skb)->nonce,
+ keypair->sending.key);
+}
+
+void wg_packet_send_keepalive(struct wg_peer *peer)
+{
+ struct sk_buff *skb;
+
+ if (skb_queue_empty(&peer->staged_packet_queue)) {
+ skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
+ GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+ skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
+ skb->dev = peer->device->dev;
+ PACKET_CB(skb)->mtu = skb->dev->mtu;
+ skb_queue_tail(&peer->staged_packet_queue, skb);
+ net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ }
+
+ wg_packet_send_staged_packets(peer);
+}
+
+static void wg_packet_create_data_done(struct sk_buff *first,
+ struct wg_peer *peer)
+{
+ struct sk_buff *skb, *next;
+ bool is_keepalive, data_sent = false;
+
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_timers_any_authenticated_packet_sent(peer);
+ skb_list_walk_safe(first, skb, next) {
+ is_keepalive = skb->len == message_data_len(0);
+ if (likely(!wg_socket_send_skb_to_peer(peer, skb,
+ PACKET_CB(skb)->ds) && !is_keepalive))
+ data_sent = true;
+ }
+
+ if (likely(data_sent))
+ wg_timers_data_sent(peer);
+
+ keep_key_fresh(peer);
+}
+
+void wg_packet_tx_worker(struct work_struct *work)
+{
+ struct crypt_queue *queue = container_of(work, struct crypt_queue,
+ work);
+ struct noise_keypair *keypair;
+ enum packet_state state;
+ struct sk_buff *first;
+ struct wg_peer *peer;
+
+ while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
+ (state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
+ PACKET_STATE_UNCRYPTED) {
+ __ptr_ring_discard_one(&queue->ring);
+ peer = PACKET_PEER(first);
+ keypair = PACKET_CB(first)->keypair;
+
+ if (likely(state == PACKET_STATE_CRYPTED))
+ wg_packet_create_data_done(first, peer);
+ else
+ kfree_skb_list(first);
+
+ wg_noise_keypair_put(keypair, false);
+ wg_peer_put(peer);
+ }
+}
+
+void wg_packet_encrypt_worker(struct work_struct *work)
+{
+ struct crypt_queue *queue = container_of(work, struct multicore_worker,
+ work)->ptr;
+ struct sk_buff *first, *skb, *next;
+
+ while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ enum packet_state state = PACKET_STATE_CRYPTED;
+
+ skb_list_walk_safe(first, skb, next) {
+ if (likely(encrypt_packet(skb,
+ PACKET_CB(first)->keypair))) {
+ wg_reset_packet(skb);
+ } else {
+ state = PACKET_STATE_DEAD;
+ break;
+ }
+ }
+ wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
+ state);
+
+ }
+}
+
+static void wg_packet_create_data(struct sk_buff *first)
+{
+ struct wg_peer *peer = PACKET_PEER(first);
+ struct wg_device *wg = peer->device;
+ int ret = -EINVAL;
+
+ rcu_read_lock_bh();
+ if (unlikely(READ_ONCE(peer->is_dead)))
+ goto err;
+
+ ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
+ &peer->tx_queue, first,
+ wg->packet_crypt_wq,
+ &wg->encrypt_queue.last_cpu);
+ if (unlikely(ret == -EPIPE))
+ wg_queue_enqueue_per_peer(&peer->tx_queue, first,
+ PACKET_STATE_DEAD);
+err:
+ rcu_read_unlock_bh();
+ if (likely(!ret || ret == -EPIPE))
+ return;
+ wg_noise_keypair_put(PACKET_CB(first)->keypair, false);
+ wg_peer_put(peer);
+ kfree_skb_list(first);
+}
+
+void wg_packet_purge_staged_packets(struct wg_peer *peer)
+{
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
+ __skb_queue_purge(&peer->staged_packet_queue);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+}
+
+void wg_packet_send_staged_packets(struct wg_peer *peer)
+{
+ struct noise_symmetric_key *key;
+ struct noise_keypair *keypair;
+ struct sk_buff_head packets;
+ struct sk_buff *skb;
+
+ /* Steal the current queue into our local one. */
+ __skb_queue_head_init(&packets);
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ skb_queue_splice_init(&peer->staged_packet_queue, &packets);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+ if (unlikely(skb_queue_empty(&packets)))
+ return;
+
+ /* First we make sure we have a valid reference to a valid key. */
+ rcu_read_lock_bh();
+ keypair = wg_noise_keypair_get(
+ rcu_dereference_bh(peer->keypairs.current_keypair));
+ rcu_read_unlock_bh();
+ if (unlikely(!keypair))
+ goto out_nokey;
+ key = &keypair->sending;
+ if (unlikely(!READ_ONCE(key->is_valid)))
+ goto out_nokey;
+ if (unlikely(wg_birthdate_has_expired(key->birthdate,
+ REJECT_AFTER_TIME)))
+ goto out_invalid;
+
+ /* After we know we have a somewhat valid key, we now try to assign
+ * nonces to all of the packets in the queue. If we can't assign nonces
+ * for all of them, we just consider it a failure and wait for the next
+ * handshake.
+ */
+ skb_queue_walk(&packets, skb) {
+ /* 0 for no outer TOS: no leak. TODO: at some later point, we
+ * might consider using flowi->tos as outer instead.
+ */
+ PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
+ PACKET_CB(skb)->nonce =
+ atomic64_inc_return(&key->counter.counter) - 1;
+ if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
+ goto out_invalid;
+ }
+
+ packets.prev->next = NULL;
+ wg_peer_get(keypair->entry.peer);
+ PACKET_CB(packets.next)->keypair = keypair;
+ wg_packet_create_data(packets.next);
+ return;
+
+out_invalid:
+ WRITE_ONCE(key->is_valid, false);
+out_nokey:
+ wg_noise_keypair_put(keypair, false);
+
+ /* We orphan the packets if we're waiting on a handshake, so that they
+ * don't block a socket's pool.
+ */
+ skb_queue_walk(&packets, skb)
+ skb_orphan(skb);
+ /* Then we put them back on the top of the queue. We're not too
+ * concerned about accidentally getting things a little out of order if
+ * packets are being added really fast, because this queue is for before
+ * packets can even be sent and it's small anyway.
+ */
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ skb_queue_splice(&packets, &peer->staged_packet_queue);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+
+ /* If we're exiting because there's something wrong with the key, it
+ * means we should initiate a new handshake.
+ */
+ wg_packet_send_queued_handshake_initiation(peer, false);
+}
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
new file mode 100644
index 000000000000..c46256d0d81c
--- /dev/null
+++ b/drivers/net/wireguard/socket.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "queueing.h"
+#include "messages.h"
+
+#include <linux/ctype.h>
+#include <linux/net.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/inetdevice.h>
+#include <net/udp_tunnel.h>
+#include <net/ipv6.h>
+
+static int send4(struct wg_device *wg, struct sk_buff *skb,
+ struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
+{
+ struct flowi4 fl = {
+ .saddr = endpoint->src4.s_addr,
+ .daddr = endpoint->addr4.sin_addr.s_addr,
+ .fl4_dport = endpoint->addr4.sin_port,
+ .flowi4_mark = wg->fwmark,
+ .flowi4_proto = IPPROTO_UDP
+ };
+ struct rtable *rt = NULL;
+ struct sock *sock;
+ int ret = 0;
+
+ skb_mark_not_on_list(skb);
+ skb->dev = wg->dev;
+ skb->mark = wg->fwmark;
+
+ rcu_read_lock_bh();
+ sock = rcu_dereference_bh(wg->sock4);
+
+ if (unlikely(!sock)) {
+ ret = -ENONET;
+ goto err;
+ }
+
+ fl.fl4_sport = inet_sk(sock)->inet_sport;
+
+ if (cache)
+ rt = dst_cache_get_ip4(cache, &fl.saddr);
+
+ if (!rt) {
+ security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
+ if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
+ fl.saddr, RT_SCOPE_HOST))) {
+ endpoint->src4.s_addr = 0;
+ *(__force __be32 *)&endpoint->src_if4 = 0;
+ fl.saddr = 0;
+ if (cache)
+ dst_cache_reset(cache);
+ }
+ rt = ip_route_output_flow(sock_net(sock), &fl, sock);
+ if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) &&
+ PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
+ rt->dst.dev->ifindex != endpoint->src_if4)))) {
+ endpoint->src4.s_addr = 0;
+ *(__force __be32 *)&endpoint->src_if4 = 0;
+ fl.saddr = 0;
+ if (cache)
+ dst_cache_reset(cache);
+ if (!IS_ERR(rt))
+ ip_rt_put(rt);
+ rt = ip_route_output_flow(sock_net(sock), &fl, sock);
+ }
+ if (unlikely(IS_ERR(rt))) {
+ ret = PTR_ERR(rt);
+ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
+ wg->dev->name, &endpoint->addr, ret);
+ goto err;
+ } else if (unlikely(rt->dst.dev == skb->dev)) {
+ ip_rt_put(rt);
+ ret = -ELOOP;
+ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
+ wg->dev->name, &endpoint->addr);
+ goto err;
+ }
+ if (cache)
+ dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
+ }
+
+ skb->ignore_df = 1;
+ udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
+ ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
+ fl.fl4_dport, false, false);
+ goto out;
+
+err:
+ kfree_skb(skb);
+out:
+ rcu_read_unlock_bh();
+ return ret;
+}
+
+static int send6(struct wg_device *wg, struct sk_buff *skb,
+ struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct flowi6 fl = {
+ .saddr = endpoint->src6,
+ .daddr = endpoint->addr6.sin6_addr,
+ .fl6_dport = endpoint->addr6.sin6_port,
+ .flowi6_mark = wg->fwmark,
+ .flowi6_oif = endpoint->addr6.sin6_scope_id,
+ .flowi6_proto = IPPROTO_UDP
+ /* TODO: addr->sin6_flowinfo */
+ };
+ struct dst_entry *dst = NULL;
+ struct sock *sock;
+ int ret = 0;
+
+ skb_mark_not_on_list(skb);
+ skb->dev = wg->dev;
+ skb->mark = wg->fwmark;
+
+ rcu_read_lock_bh();
+ sock = rcu_dereference_bh(wg->sock6);
+
+ if (unlikely(!sock)) {
+ ret = -ENONET;
+ goto err;
+ }
+
+ fl.fl6_sport = inet_sk(sock)->inet_sport;
+
+ if (cache)
+ dst = dst_cache_get_ip6(cache, &fl.saddr);
+
+ if (!dst) {
+ security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
+ if (unlikely(!ipv6_addr_any(&fl.saddr) &&
+ !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
+ endpoint->src6 = fl.saddr = in6addr_any;
+ if (cache)
+ dst_cache_reset(cache);
+ }
+ dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
+ NULL);
+ if (unlikely(IS_ERR(dst))) {
+ ret = PTR_ERR(dst);
+ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
+ wg->dev->name, &endpoint->addr, ret);
+ goto err;
+ } else if (unlikely(dst->dev == skb->dev)) {
+ dst_release(dst);
+ ret = -ELOOP;
+ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
+ wg->dev->name, &endpoint->addr);
+ goto err;
+ }
+ if (cache)
+ dst_cache_set_ip6(cache, dst, &fl.saddr);
+ }
+
+ skb->ignore_df = 1;
+ udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
+ ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
+ fl.fl6_dport, false);
+ goto out;
+
+err:
+ kfree_skb(skb);
+out:
+ rcu_read_unlock_bh();
+ return ret;
+#else
+ return -EAFNOSUPPORT;
+#endif
+}
+
+int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
+{
+ size_t skb_len = skb->len;
+ int ret = -EAFNOSUPPORT;
+
+ read_lock_bh(&peer->endpoint_lock);
+ if (peer->endpoint.addr.sa_family == AF_INET)
+ ret = send4(peer->device, skb, &peer->endpoint, ds,
+ &peer->endpoint_cache);
+ else if (peer->endpoint.addr.sa_family == AF_INET6)
+ ret = send6(peer->device, skb, &peer->endpoint, ds,
+ &peer->endpoint_cache);
+ else
+ dev_kfree_skb(skb);
+ if (likely(!ret))
+ peer->tx_bytes += skb_len;
+ read_unlock_bh(&peer->endpoint_lock);
+
+ return ret;
+}
+
+int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer,
+ size_t len, u8 ds)
+{
+ struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
+
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ skb_reserve(skb, SKB_HEADER_LEN);
+ skb_set_inner_network_header(skb, 0);
+ skb_put_data(skb, buffer, len);
+ return wg_socket_send_skb_to_peer(peer, skb, ds);
+}
+
+int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
+ struct sk_buff *in_skb, void *buffer,
+ size_t len)
+{
+ int ret = 0;
+ struct sk_buff *skb;
+ struct endpoint endpoint;
+
+ if (unlikely(!in_skb))
+ return -EINVAL;
+ ret = wg_socket_endpoint_from_skb(&endpoint, in_skb);
+ if (unlikely(ret < 0))
+ return ret;
+
+ skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return -ENOMEM;
+ skb_reserve(skb, SKB_HEADER_LEN);
+ skb_set_inner_network_header(skb, 0);
+ skb_put_data(skb, buffer, len);
+
+ if (endpoint.addr.sa_family == AF_INET)
+ ret = send4(wg, skb, &endpoint, 0, NULL);
+ else if (endpoint.addr.sa_family == AF_INET6)
+ ret = send6(wg, skb, &endpoint, 0, NULL);
+ /* No other possibilities if the endpoint is valid, which it is,
+ * as we checked above.
+ */
+
+ return ret;
+}
+
+int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
+ const struct sk_buff *skb)
+{
+ memset(endpoint, 0, sizeof(*endpoint));
+ if (skb->protocol == htons(ETH_P_IP)) {
+ endpoint->addr4.sin_family = AF_INET;
+ endpoint->addr4.sin_port = udp_hdr(skb)->source;
+ endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ endpoint->src4.s_addr = ip_hdr(skb)->daddr;
+ endpoint->src_if4 = skb->skb_iif;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ endpoint->addr6.sin6_family = AF_INET6;
+ endpoint->addr6.sin6_port = udp_hdr(skb)->source;
+ endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
+ endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(
+ &ipv6_hdr(skb)->saddr, skb->skb_iif);
+ endpoint->src6 = ipv6_hdr(skb)->daddr;
+ } else {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
+{
+ return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
+ a->addr4.sin_port == b->addr4.sin_port &&
+ a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
+ a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
+ (a->addr.sa_family == AF_INET6 &&
+ b->addr.sa_family == AF_INET6 &&
+ a->addr6.sin6_port == b->addr6.sin6_port &&
+ ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
+ a->addr6.sin6_scope_id == b->addr6.sin6_scope_id &&
+ ipv6_addr_equal(&a->src6, &b->src6)) ||
+ unlikely(!a->addr.sa_family && !b->addr.sa_family);
+}
+
+void wg_socket_set_peer_endpoint(struct wg_peer *peer,
+ const struct endpoint *endpoint)
+{
+ /* First we check unlocked, in order to optimize, since it's pretty rare
+ * that an endpoint will change. If we happen to be mid-write, and two
+ * CPUs wind up writing the same thing or something slightly different,
+ * it doesn't really matter much either.
+ */
+ if (endpoint_eq(endpoint, &peer->endpoint))
+ return;
+ write_lock_bh(&peer->endpoint_lock);
+ if (endpoint->addr.sa_family == AF_INET) {
+ peer->endpoint.addr4 = endpoint->addr4;
+ peer->endpoint.src4 = endpoint->src4;
+ peer->endpoint.src_if4 = endpoint->src_if4;
+ } else if (endpoint->addr.sa_family == AF_INET6) {
+ peer->endpoint.addr6 = endpoint->addr6;
+ peer->endpoint.src6 = endpoint->src6;
+ } else {
+ goto out;
+ }
+ dst_cache_reset(&peer->endpoint_cache);
+out:
+ write_unlock_bh(&peer->endpoint_lock);
+}
+
+void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
+ const struct sk_buff *skb)
+{
+ struct endpoint endpoint;
+
+ if (!wg_socket_endpoint_from_skb(&endpoint, skb))
+ wg_socket_set_peer_endpoint(peer, &endpoint);
+}
+
+void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
+{
+ write_lock_bh(&peer->endpoint_lock);
+ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
+ dst_cache_reset(&peer->endpoint_cache);
+ write_unlock_bh(&peer->endpoint_lock);
+}
+
+static int wg_receive(struct sock *sk, struct sk_buff *skb)
+{
+ struct wg_device *wg;
+
+ if (unlikely(!sk))
+ goto err;
+ wg = sk->sk_user_data;
+ if (unlikely(!wg))
+ goto err;
+ wg_packet_receive(wg, skb);
+ return 0;
+
+err:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void sock_free(struct sock *sock)
+{
+ if (unlikely(!sock))
+ return;
+ sk_clear_memalloc(sock);
+ udp_tunnel_sock_release(sock->sk_socket);
+}
+
+static void set_sock_opts(struct socket *sock)
+{
+ sock->sk->sk_allocation = GFP_ATOMIC;
+ sock->sk->sk_sndbuf = INT_MAX;
+ sk_set_memalloc(sock->sk);
+}
+
+int wg_socket_init(struct wg_device *wg, u16 port)
+{
+ int ret;
+ struct udp_tunnel_sock_cfg cfg = {
+ .sk_user_data = wg,
+ .encap_type = 1,
+ .encap_rcv = wg_receive
+ };
+ struct socket *new4 = NULL, *new6 = NULL;
+ struct udp_port_cfg port4 = {
+ .family = AF_INET,
+ .local_ip.s_addr = htonl(INADDR_ANY),
+ .local_udp_port = htons(port),
+ .use_udp_checksums = true
+ };
+#if IS_ENABLED(CONFIG_IPV6)
+ int retries = 0;
+ struct udp_port_cfg port6 = {
+ .family = AF_INET6,
+ .local_ip6 = IN6ADDR_ANY_INIT,
+ .use_udp6_tx_checksums = true,
+ .use_udp6_rx_checksums = true,
+ .ipv6_v6only = true
+ };
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+retry:
+#endif
+
+ ret = udp_sock_create(wg->creating_net, &port4, &new4);
+ if (ret < 0) {
+ pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
+ return ret;
+ }
+ set_sock_opts(new4);
+ setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6_mod_enabled()) {
+ port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
+ ret = udp_sock_create(wg->creating_net, &port6, &new6);
+ if (ret < 0) {
+ udp_tunnel_sock_release(new4);
+ if (ret == -EADDRINUSE && !port && retries++ < 100)
+ goto retry;
+ pr_err("%s: Could not create IPv6 socket\n",
+ wg->dev->name);
+ return ret;
+ }
+ set_sock_opts(new6);
+ setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
+ }
+#endif
+
+ wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
+ return 0;
+}
+
+void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
+ struct sock *new6)
+{
+ struct sock *old4, *old6;
+
+ mutex_lock(&wg->socket_update_lock);
+ old4 = rcu_dereference_protected(wg->sock4,
+ lockdep_is_held(&wg->socket_update_lock));
+ old6 = rcu_dereference_protected(wg->sock6,
+ lockdep_is_held(&wg->socket_update_lock));
+ rcu_assign_pointer(wg->sock4, new4);
+ rcu_assign_pointer(wg->sock6, new6);
+ if (new4)
+ wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
+ mutex_unlock(&wg->socket_update_lock);
+ synchronize_rcu();
+ synchronize_net();
+ sock_free(old4);
+ sock_free(old6);
+}
diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h
new file mode 100644
index 000000000000..bab5848efbcd
--- /dev/null
+++ b/drivers/net/wireguard/socket.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_SOCKET_H
+#define _WG_SOCKET_H
+
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+
+int wg_socket_init(struct wg_device *wg, u16 port);
+void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
+ struct sock *new6);
+int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data,
+ size_t len, u8 ds);
+int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb,
+ u8 ds);
+int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
+ struct sk_buff *in_skb,
+ void *out_buffer, size_t len);
+
+int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
+ const struct sk_buff *skb);
+void wg_socket_set_peer_endpoint(struct wg_peer *peer,
+ const struct endpoint *endpoint);
+void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
+ const struct sk_buff *skb);
+void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer);
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG)
+#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \
+ struct endpoint __endpoint; \
+ wg_socket_endpoint_from_skb(&__endpoint, skb); \
+ net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \
+ ##__VA_ARGS__); \
+ } while (0)
+#else
+#define net_dbg_skb_ratelimited(fmt, skb, ...)
+#endif
+
+#endif /* _WG_SOCKET_H */
diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c
new file mode 100644
index 000000000000..d54d32ac9bc4
--- /dev/null
+++ b/drivers/net/wireguard/timers.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "timers.h"
+#include "device.h"
+#include "peer.h"
+#include "queueing.h"
+#include "socket.h"
+
+/*
+ * - Timer for retransmitting the handshake if we don't hear back after
+ * `REKEY_TIMEOUT + jitter` ms.
+ *
+ * - Timer for sending empty packet if we have received a packet but after have
+ * not sent one for `KEEPALIVE_TIMEOUT` ms.
+ *
+ * - Timer for initiating new handshake if we have sent a packet but after have
+ * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) +
+ * jitter` ms.
+ *
+ * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms
+ * if no new keys have been received.
+ *
+ * - Timer for, if enabled, sending an empty authenticated packet every user-
+ * specified seconds.
+ */
+
+static inline void mod_peer_timer(struct wg_peer *peer,
+ struct timer_list *timer,
+ unsigned long expires)
+{
+ rcu_read_lock_bh();
+ if (likely(netif_running(peer->device->dev) &&
+ !READ_ONCE(peer->is_dead)))
+ mod_timer(timer, expires);
+ rcu_read_unlock_bh();
+}
+
+static void wg_expired_retransmit_handshake(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer,
+ timer_retransmit_handshake);
+
+ if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
+
+ del_timer(&peer->timer_send_keepalive);
+ /* We drop all packets without a keypair and don't try again,
+ * if we try unsuccessfully for too long to make a handshake.
+ */
+ wg_packet_purge_staged_packets(peer);
+
+ /* We set a timer for destroying any residue that might be left
+ * of a partial exchange.
+ */
+ if (!timer_pending(&peer->timer_zero_key_material))
+ mod_peer_timer(peer, &peer->timer_zero_key_material,
+ jiffies + REJECT_AFTER_TIME * 3 * HZ);
+ } else {
+ ++peer->timer_handshake_attempts;
+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, REKEY_TIMEOUT,
+ peer->timer_handshake_attempts + 1);
+
+ /* We clear the endpoint address src address, in case this is
+ * the cause of trouble.
+ */
+ wg_socket_clear_peer_endpoint_src(peer);
+
+ wg_packet_send_queued_handshake_initiation(peer, true);
+ }
+}
+
+static void wg_expired_send_keepalive(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive);
+
+ wg_packet_send_keepalive(peer);
+ if (peer->timer_need_another_keepalive) {
+ peer->timer_need_another_keepalive = false;
+ mod_peer_timer(peer, &peer->timer_send_keepalive,
+ jiffies + KEEPALIVE_TIMEOUT * HZ);
+ }
+}
+
+static void wg_expired_new_handshake(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake);
+
+ pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
+ /* We clear the endpoint address src address, in case this is the cause
+ * of trouble.
+ */
+ wg_socket_clear_peer_endpoint_src(peer);
+ wg_packet_send_queued_handshake_initiation(peer, false);
+}
+
+static void wg_expired_zero_key_material(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material);
+
+ rcu_read_lock_bh();
+ if (!READ_ONCE(peer->is_dead)) {
+ wg_peer_get(peer);
+ if (!queue_work(peer->device->handshake_send_wq,
+ &peer->clear_peer_work))
+ /* If the work was already on the queue, we want to drop
+ * the extra reference.
+ */
+ wg_peer_put(peer);
+ }
+ rcu_read_unlock_bh();
+}
+
+static void wg_queued_expired_zero_key_material(struct work_struct *work)
+{
+ struct wg_peer *peer = container_of(work, struct wg_peer,
+ clear_peer_work);
+
+ pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, REJECT_AFTER_TIME * 3);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ wg_peer_put(peer);
+}
+
+static void wg_expired_send_persistent_keepalive(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer,
+ timer_persistent_keepalive);
+
+ if (likely(peer->persistent_keepalive_interval))
+ wg_packet_send_keepalive(peer);
+}
+
+/* Should be called after an authenticated data packet is sent. */
+void wg_timers_data_sent(struct wg_peer *peer)
+{
+ if (!timer_pending(&peer->timer_new_handshake))
+ mod_peer_timer(peer, &peer->timer_new_handshake,
+ jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ +
+ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
+}
+
+/* Should be called after an authenticated data packet is received. */
+void wg_timers_data_received(struct wg_peer *peer)
+{
+ if (likely(netif_running(peer->device->dev))) {
+ if (!timer_pending(&peer->timer_send_keepalive))
+ mod_peer_timer(peer, &peer->timer_send_keepalive,
+ jiffies + KEEPALIVE_TIMEOUT * HZ);
+ else
+ peer->timer_need_another_keepalive = true;
+ }
+}
+
+/* Should be called after any type of authenticated packet is sent, whether
+ * keepalive, data, or handshake.
+ */
+void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer)
+{
+ del_timer(&peer->timer_send_keepalive);
+}
+
+/* Should be called after any type of authenticated packet is received, whether
+ * keepalive, data, or handshake.
+ */
+void wg_timers_any_authenticated_packet_received(struct wg_peer *peer)
+{
+ del_timer(&peer->timer_new_handshake);
+}
+
+/* Should be called after a handshake initiation message is sent. */
+void wg_timers_handshake_initiated(struct wg_peer *peer)
+{
+ mod_peer_timer(peer, &peer->timer_retransmit_handshake,
+ jiffies + REKEY_TIMEOUT * HZ +
+ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
+}
+
+/* Should be called after a handshake response message is received and processed
+ * or when getting key confirmation via the first data message.
+ */
+void wg_timers_handshake_complete(struct wg_peer *peer)
+{
+ del_timer(&peer->timer_retransmit_handshake);
+ peer->timer_handshake_attempts = 0;
+ peer->sent_lastminute_handshake = false;
+ ktime_get_real_ts64(&peer->walltime_last_handshake);
+}
+
+/* Should be called after an ephemeral key is created, which is before sending a
+ * handshake response or after receiving a handshake response.
+ */
+void wg_timers_session_derived(struct wg_peer *peer)
+{
+ mod_peer_timer(peer, &peer->timer_zero_key_material,
+ jiffies + REJECT_AFTER_TIME * 3 * HZ);
+}
+
+/* Should be called before a packet with authentication, whether
+ * keepalive, data, or handshakem is sent, or after one is received.
+ */
+void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer)
+{
+ if (peer->persistent_keepalive_interval)
+ mod_peer_timer(peer, &peer->timer_persistent_keepalive,
+ jiffies + peer->persistent_keepalive_interval * HZ);
+}
+
+void wg_timers_init(struct wg_peer *peer)
+{
+ timer_setup(&peer->timer_retransmit_handshake,
+ wg_expired_retransmit_handshake, 0);
+ timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0);
+ timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0);
+ timer_setup(&peer->timer_zero_key_material,
+ wg_expired_zero_key_material, 0);
+ timer_setup(&peer->timer_persistent_keepalive,
+ wg_expired_send_persistent_keepalive, 0);
+ INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material);
+ peer->timer_handshake_attempts = 0;
+ peer->sent_lastminute_handshake = false;
+ peer->timer_need_another_keepalive = false;
+}
+
+void wg_timers_stop(struct wg_peer *peer)
+{
+ del_timer_sync(&peer->timer_retransmit_handshake);
+ del_timer_sync(&peer->timer_send_keepalive);
+ del_timer_sync(&peer->timer_new_handshake);
+ del_timer_sync(&peer->timer_zero_key_material);
+ del_timer_sync(&peer->timer_persistent_keepalive);
+ flush_work(&peer->clear_peer_work);
+}
diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h
new file mode 100644
index 000000000000..f0653dcb1326
--- /dev/null
+++ b/drivers/net/wireguard/timers.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_TIMERS_H
+#define _WG_TIMERS_H
+
+#include <linux/ktime.h>
+
+struct wg_peer;
+
+void wg_timers_init(struct wg_peer *peer);
+void wg_timers_stop(struct wg_peer *peer);
+void wg_timers_data_sent(struct wg_peer *peer);
+void wg_timers_data_received(struct wg_peer *peer);
+void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer);
+void wg_timers_any_authenticated_packet_received(struct wg_peer *peer);
+void wg_timers_handshake_initiated(struct wg_peer *peer);
+void wg_timers_handshake_complete(struct wg_peer *peer);
+void wg_timers_session_derived(struct wg_peer *peer);
+void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer);
+
+static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds,
+ u64 expiration_seconds)
+{
+ return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC)
+ <= (s64)ktime_get_coarse_boottime_ns();
+}
+
+#endif /* _WG_TIMERS_H */
diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h
new file mode 100644
index 000000000000..a1a269a11634
--- /dev/null
+++ b/drivers/net/wireguard/version.h
@@ -0,0 +1 @@
+#define WIREGUARD_VERSION "1.0.0"
diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h
new file mode 100644
index 000000000000..dd8a47c4ad11
--- /dev/null
+++ b/include/uapi/linux/wireguard.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Documentation
+ * =============
+ *
+ * The below enums and macros are for interfacing with WireGuard, using generic
+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
+ * methods: get and set. Note that while they share many common attributes,
+ * these two functions actually accept a slightly different set of inputs and
+ * outputs.
+ *
+ * WG_CMD_GET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
+ * one but not both of:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ *
+ * The kernel will then return several messages (NLM_F_MULTI) containing the
+ * following tree of nested items:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16
+ * WGDEVICE_A_FWMARK: NLA_U32
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
+ * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec
+ * WGPEER_A_RX_BYTES: NLA_U64
+ * WGPEER_A_TX_BYTES: NLA_U64
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 0: NLA_NESTED
+ * ...
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that all of the allowed IPs of a single peer will not
+ * fit within a single netlink message. In that case, the same peer will
+ * be written in the following message, except it will only contain
+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
+ * times in a row for the same peer. It is then up to the receiver to
+ * coalesce adjacent peers. Likewise, it is possible that all peers will
+ * not fit within a single message. So, subsequent peers will be sent
+ * in following messages, except those will only contain WGDEVICE_A_IFNAME
+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
+ * messages to form the complete list of peers.
+ *
+ * Since this is an NLA_F_DUMP command, the final message will always be
+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
+ * contains an integer error code. It is either zero or a negative error
+ * code corresponding to the errno.
+ *
+ * WG_CMD_SET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST. The command should contain the
+ * following tree of nested items, containing one but not both of
+ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
+ * peers should be removed prior to adding the list below.
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
+ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the
+ * specified peer should not exist at the end of the
+ * operation, rather than added/updated and/or
+ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed
+ * IPs of this peer should be removed prior to adding
+ * the list below and/or WGPEER_F_UPDATE_ONLY if the
+ * peer should only be set if it already exists.
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 0: NLA_NESTED
+ * ...
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at
+ * all by most users of this API, as the
+ * most recent protocol will be used when
+ * this is unset. Otherwise, must be set
+ * to 1.
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that the amount of configuration data exceeds that of
+ * the maximum message length accepted by the kernel. In that case, several
+ * messages should be sent one after another, with each successive one
+ * filling in information not contained in the prior. Note that if
+ * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
+ * should not be specified in fragments that come after, so that the list
+ * of peers is only cleared the first time but appened after. Likewise for
+ * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
+ * of a peer, it likely should not be specified in subsequent fragments.
+ *
+ * If an error occurs, NLMSG_ERROR will reply containing an errno.
+ */
+
+#ifndef _WG_UAPI_WIREGUARD_H
+#define _WG_UAPI_WIREGUARD_H
+
+#define WG_GENL_NAME "wireguard"
+#define WG_GENL_VERSION 1
+
+#define WG_KEY_LEN 32
+
+enum wg_cmd {
+ WG_CMD_GET_DEVICE,
+ WG_CMD_SET_DEVICE,
+ __WG_CMD_MAX
+};
+#define WG_CMD_MAX (__WG_CMD_MAX - 1)
+
+enum wgdevice_flag {
+ WGDEVICE_F_REPLACE_PEERS = 1U << 0,
+ __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS
+};
+enum wgdevice_attribute {
+ WGDEVICE_A_UNSPEC,
+ WGDEVICE_A_IFINDEX,
+ WGDEVICE_A_IFNAME,
+ WGDEVICE_A_PRIVATE_KEY,
+ WGDEVICE_A_PUBLIC_KEY,
+ WGDEVICE_A_FLAGS,
+ WGDEVICE_A_LISTEN_PORT,
+ WGDEVICE_A_FWMARK,
+ WGDEVICE_A_PEERS,
+ __WGDEVICE_A_LAST
+};
+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
+
+enum wgpeer_flag {
+ WGPEER_F_REMOVE_ME = 1U << 0,
+ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1,
+ WGPEER_F_UPDATE_ONLY = 1U << 2,
+ __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS |
+ WGPEER_F_UPDATE_ONLY
+};
+enum wgpeer_attribute {
+ WGPEER_A_UNSPEC,
+ WGPEER_A_PUBLIC_KEY,
+ WGPEER_A_PRESHARED_KEY,
+ WGPEER_A_FLAGS,
+ WGPEER_A_ENDPOINT,
+ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ WGPEER_A_LAST_HANDSHAKE_TIME,
+ WGPEER_A_RX_BYTES,
+ WGPEER_A_TX_BYTES,
+ WGPEER_A_ALLOWEDIPS,
+ WGPEER_A_PROTOCOL_VERSION,
+ __WGPEER_A_LAST
+};
+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
+
+enum wgallowedip_attribute {
+ WGALLOWEDIP_A_UNSPEC,
+ WGALLOWEDIP_A_FAMILY,
+ WGALLOWEDIP_A_IPADDR,
+ WGALLOWEDIP_A_CIDR_MASK,
+ __WGALLOWEDIP_A_LAST
+};
+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
+
+#endif /* _WG_UAPI_WIREGUARD_H */
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
new file mode 100755
index 000000000000..e7310d9390f7
--- /dev/null
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -0,0 +1,537 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+#
+# This script tests the below topology:
+#
+# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐
+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
+# │ │ │ │ │ │
+# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│
+# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││
+# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│
+# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││
+# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││
+# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│
+# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘
+# └──────────────────────────────────┘
+#
+# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the
+# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0
+# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
+# details on how this is accomplished.
+set -e
+
+exec 3>&1
+export WG_HIDE_KEYS=never
+netns0="wg-test-$$-0"
+netns1="wg-test-$$-1"
+netns2="wg-test-$$-2"
+pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; }
+pp() { pretty "" "$*"; "$@"; }
+maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; }
+n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; }
+n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; }
+n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; }
+ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
+ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
+ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
+sleep() { read -t "$1" -N 0 || true; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
+waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
+waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
+waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
+
+cleanup() {
+ set +e
+ exec 2>/dev/null
+ printf "$orig_message_cost" > /proc/sys/net/core/message_cost
+ ip0 link del dev wg0
+ ip1 link del dev wg0
+ ip2 link del dev wg0
+ local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
+ [[ -n $to_kill ]] && kill $to_kill
+ pp ip netns del $netns1
+ pp ip netns del $netns2
+ pp ip netns del $netns0
+ exit
+}
+
+orig_message_cost="$(< /proc/sys/net/core/message_cost)"
+trap cleanup EXIT
+printf 0 > /proc/sys/net/core/message_cost
+
+ip netns del $netns0 2>/dev/null || true
+ip netns del $netns1 2>/dev/null || true
+ip netns del $netns2 2>/dev/null || true
+pp ip netns add $netns0
+pp ip netns add $netns1
+pp ip netns add $netns2
+ip0 link set up dev lo
+
+ip0 link add dev wg0 type wireguard
+ip0 link set wg0 netns $netns1
+ip0 link add dev wg0 type wireguard
+ip0 link set wg0 netns $netns2
+key1="$(pp wg genkey)"
+key2="$(pp wg genkey)"
+key3="$(pp wg genkey)"
+pub1="$(pp wg pubkey <<<"$key1")"
+pub2="$(pp wg pubkey <<<"$key2")"
+pub3="$(pp wg pubkey <<<"$key3")"
+psk="$(pp wg genpsk)"
+[[ -n $key1 && -n $key2 && -n $psk ]]
+
+configure_peers() {
+ ip1 addr add 192.168.241.1/24 dev wg0
+ ip1 addr add fd00::1/24 dev wg0
+
+ ip2 addr add 192.168.241.2/24 dev wg0
+ ip2 addr add fd00::2/24 dev wg0
+
+ n1 wg set wg0 \
+ private-key <(echo "$key1") \
+ listen-port 1 \
+ peer "$pub2" \
+ preshared-key <(echo "$psk") \
+ allowed-ips 192.168.241.2/32,fd00::2/128
+ n2 wg set wg0 \
+ private-key <(echo "$key2") \
+ listen-port 2 \
+ peer "$pub1" \
+ preshared-key <(echo "$psk") \
+ allowed-ips 192.168.241.1/32,fd00::1/128
+
+ ip1 link set up dev wg0
+ ip2 link set up dev wg0
+}
+configure_peers
+
+tests() {
+ # Ping over IPv4
+ n2 ping -c 10 -f -W 1 192.168.241.1
+ n1 ping -c 10 -f -W 1 192.168.241.2
+
+ # Ping over IPv6
+ n2 ping6 -c 10 -f -W 1 fd00::1
+ n1 ping6 -c 10 -f -W 1 fd00::2
+
+ # TCP over IPv4
+ n2 iperf3 -s -1 -B 192.168.241.2 &
+ waitiperf $netns2
+ n1 iperf3 -Z -t 3 -c 192.168.241.2
+
+ # TCP over IPv6
+ n1 iperf3 -s -1 -B fd00::1 &
+ waitiperf $netns1
+ n2 iperf3 -Z -t 3 -c fd00::1
+
+ # UDP over IPv4
+ n1 iperf3 -s -1 -B 192.168.241.1 &
+ waitiperf $netns1
+ n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
+
+ # UDP over IPv6
+ n2 iperf3 -s -1 -B fd00::2 &
+ waitiperf $netns2
+ n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+}
+
+[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
+big_mtu=$(( 34816 - 1500 + $orig_mtu ))
+
+# Test using IPv4 as outer transport
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
+# Before calling tests, we first make sure that the stats counters and timestamper are working
+n2 ping -c 10 -f -W 1 192.168.241.1
+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0)
+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0)
+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
+read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer)
+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
+read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer)
+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
+read _ timestamp < <(n1 wg show wg0 latest-handshakes)
+(( timestamp != 0 ))
+
+tests
+ip1 link set wg0 mtu $big_mtu
+ip2 link set wg0 mtu $big_mtu
+tests
+
+ip1 link set wg0 mtu $orig_mtu
+ip2 link set wg0 mtu $orig_mtu
+
+# Test using IPv6 as outer transport
+n1 wg set wg0 peer "$pub2" endpoint [::1]:2
+n2 wg set wg0 peer "$pub1" endpoint [::1]:1
+tests
+ip1 link set wg0 mtu $big_mtu
+ip2 link set wg0 mtu $big_mtu
+tests
+
+# Test that route MTUs work with the padding
+ip1 link set wg0 mtu 1300
+ip2 link set wg0 mtu 1300
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
+n0 iptables -A INPUT -m length --length 1360 -j DROP
+n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299
+n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299
+n2 ping -c 1 -W 1 -s 1269 192.168.241.1
+n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299
+n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299
+n0 iptables -F INPUT
+
+ip1 link set wg0 mtu $orig_mtu
+ip2 link set wg0 mtu $orig_mtu
+
+# Test using IPv4 that roaming works
+ip0 -4 addr del 127.0.0.1/8 dev lo
+ip0 -4 addr add 127.212.121.99/8 dev lo
+n1 wg set wg0 listen-port 9999
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n1 ping6 -W 1 -c 1 fd00::2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]]
+
+# Test using IPv6 that roaming works
+n1 wg set wg0 listen-port 9998
+n1 wg set wg0 peer "$pub2" endpoint [::1]:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]]
+
+# Test that crypto-RP filter works
+n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
+exec 4< <(n1 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns1
+n2 ncat -u 192.168.241.1 1111 <<<"X"
+read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
+kill $ncat_pid
+more_specific_key="$(pp wg genkey | pp wg pubkey)"
+n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32
+n2 wg set wg0 listen-port 9997
+exec 4< <(n1 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns1
+n2 ncat -u 192.168.241.1 1111 <<<"X"
+! read -r -N 1 -t 1 out <&4 || false
+kill $ncat_pid
+n1 wg set wg0 peer "$more_specific_key" remove
+[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]]
+
+# Test that we can change private keys keys and immediately handshake
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32
+n1 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 private-key <(echo "$key3")
+n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
+n1 ping -W 1 -c 1 192.168.241.2
+
+ip1 link del wg0
+ip2 link del wg0
+
+# Test using NAT. We now change the topology to this:
+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
+# │ │ │ │ │ │
+# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │
+# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │
+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │
+# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │
+# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │
+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │
+# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘
+
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+
+ip0 link add vethrc type veth peer name vethc
+ip0 link add vethrs type veth peer name veths
+ip0 link set vethc netns $netns1
+ip0 link set veths netns $netns2
+ip0 link set vethrc up
+ip0 link set vethrs up
+ip0 addr add 192.168.1.1/24 dev vethrc
+ip0 addr add 10.0.0.1/24 dev vethrs
+ip1 addr add 192.168.1.100/24 dev vethc
+ip1 link set vethc up
+ip1 route add default via 192.168.1.1
+ip2 addr add 10.0.0.100/24 dev veths
+ip2 link set veths up
+waitiface $netns0 vethrc
+waitiface $netns0 vethrs
+waitiface $netns1 vethc
+waitiface $netns2 veths
+
+n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout'
+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream'
+n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1
+
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1
+n1 ping -W 1 -c 1 192.168.241.2
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`).
+pp sleep 3
+n2 ping -W 1 -c 1 192.168.241.1
+n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+
+# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
+ip1 -6 addr add fc00::9/96 dev vethc
+ip1 -6 route add default via fc00::1
+ip2 -4 addr add 192.168.99.7/32 dev wg0
+ip2 -6 addr add abab::1111/128 dev wg0
+n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111
+ip1 -6 route add default dev wg0 table 51820
+ip1 -6 rule add not fwmark 51820 table 51820
+ip1 -6 rule add table main suppress_prefixlength 0
+ip1 -4 route add default dev wg0 table 51820
+ip1 -4 rule add not fwmark 51820 table 51820
+ip1 -4 rule add table main suppress_prefixlength 0
+# suppress_prefixlength only got added in 3.12, and we want to support 3.10+.
+if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then
+ # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
+ n1 ping -W 1 -c 100 -f 192.168.99.7
+ n1 ping -W 1 -c 100 -f abab::1111
+fi
+
+n0 iptables -t nat -F
+ip0 link del vethrc
+ip0 link del vethrs
+ip1 link del wg0
+ip2 link del wg0
+
+# Test that saddr routing is sticky but not too sticky, changing to this topology:
+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐
+# │ $ns1 namespace │ │ $ns2 namespace │
+# │ │ │ │
+# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │
+# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │
+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │
+# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │
+# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │
+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │
+# └────────────────────────────────────────┘ └────────────────────────────────────────┘
+
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+ip1 link add veth1 type veth peer name veth2
+ip1 link set veth2 netns $netns2
+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad'
+n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries'
+
+# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed
+ip1 addr add 10.0.0.1/24 dev veth1
+ip1 addr add fd00:aa::1/96 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip2 addr add fd00:aa::2/96 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+ip1 addr add 10.0.0.10/24 dev veth1
+ip1 addr del 10.0.0.1/24 dev veth1
+n1 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
+n1 ping -W 1 -c 1 192.168.241.2
+ip1 addr add fd00:aa::10/96 dev veth1
+ip1 addr del fd00:aa::1/96 dev veth1
+n1 ping -W 1 -c 1 192.168.241.2
+
+# Now we show that we can successfully do reply to sender routing
+ip1 link set veth1 down
+ip2 link set veth2 down
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip1 addr add 10.0.0.2/24 dev veth1
+ip1 addr add fd00:aa::1/96 dev veth1
+ip1 addr add fd00:aa::2/96 dev veth1
+ip2 addr add 10.0.0.3/24 dev veth2
+ip2 addr add fd00:aa::3/96 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]]
+
+# What happens if the inbound destination address belongs to a different interface as the default route?
+ip1 link add dummy0 type dummy
+ip1 addr add 10.50.0.1/24 dev dummy0
+ip1 link set dummy0 up
+ip2 route add 10.50.0.0/24 dev veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]]
+
+ip1 link del dummy0
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 route flush dev veth1
+ip2 route flush dev veth2
+
+# Now we see what happens if another interface route takes precedence over an ongoing one
+ip1 link add veth3 type veth peer name veth4
+ip1 link set veth4 netns $netns2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip1 addr add 10.0.0.3/24 dev veth3
+ip1 link set veth1 up
+ip2 link set veth2 up
+ip1 link set veth3 up
+ip2 link set veth4 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+waitiface $netns1 veth3
+waitiface $netns2 veth4
+ip1 route flush dev veth1
+ip1 route flush dev veth3
+ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter'
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
+
+ip1 link del veth1
+ip1 link del veth3
+ip1 link del wg0
+ip2 link del wg0
+
+# We test that Netlink/IPC is working properly by doing things that usually cause split responses
+ip0 link add dev wg0 type wireguard
+config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" )
+for a in {1..255}; do
+ for b in {0..255}; do
+ config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" )
+ done
+done
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+i=0
+for ip in $(n0 wg show wg0 allowed-ips); do
+ ((++i))
+done
+((i == 255*256*2+1))
+ip0 link del wg0
+ip0 link add dev wg0 type wireguard
+config=( "[Interface]" "PrivateKey=$(wg genkey)" )
+for a in {1..40}; do
+ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
+ for b in {1..52}; do
+ config+=( "AllowedIPs=$a.$b.0.0/16" )
+ done
+done
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+i=0
+while read -r line; do
+ j=0
+ for ip in $line; do
+ ((++j))
+ done
+ ((j == 53))
+ ((++i))
+done < <(n0 wg show wg0 allowed-ips)
+((i == 40))
+ip0 link del wg0
+ip0 link add wg0 type wireguard
+config=( )
+for i in {1..29}; do
+ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
+done
+config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" )
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+n0 wg showconf wg0 > /dev/null
+ip0 link del wg0
+
+allowedips=( )
+for i in {1..197}; do
+ allowedips+=( abcd::$i )
+done
+saved_ifs="$IFS"
+IFS=,
+allowedips="${allowedips[*]}"
+IFS="$saved_ifs"
+ip0 link add wg0 type wireguard
+n0 wg set wg0 peer "$pub1"
+n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
+{
+ read -r pub allowedips
+ [[ $pub == "$pub1" && $allowedips == "(none)" ]]
+ read -r pub allowedips
+ [[ $pub == "$pub2" ]]
+ i=0
+ for _ in $allowedips; do
+ ((++i))
+ done
+ ((i == 197))
+} < <(n0 wg show wg0 allowed-ips)
+ip0 link del wg0
+
+! n0 wg show doesnotexist || false
+
+ip0 link add wg0 type wireguard
+n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk")
+[[ $(n0 wg show wg0 private-key) == "$key1" ]]
+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]]
+n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null
+[[ $(n0 wg show wg0 private-key) == "(none)" ]]
+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]]
+n0 wg set wg0 peer "$pub2"
+n0 wg set wg0 private-key <(echo "$key2")
+[[ $(n0 wg show wg0 public-key) == "$pub2" ]]
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 peer "$pub2"
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 private-key <(echo "$key1")
+n0 wg set wg0 peer "$pub2"
+[[ $(n0 wg show wg0 peers) == "$pub2" ]]
+n0 wg set wg0 private-key <(echo "/${key1:1}")
+[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]]
+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16
+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
+n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
+n0 wg set wg0 peer "$pub2" allowed-ips ::/0
+ip0 link del wg0
+
+declare -A objects
+while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
+ objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
+done < /dev/kmsg
+alldeleted=1
+for object in "${!objects[@]}"; do
+ if [[ ${objects["$object"]} != *createddestroyed ]]; then
+ echo "Error: $object: merely ${objects["$object"]}" >&3
+ alldeleted=0
+ fi
+done
+[[ $alldeleted -eq 1 ]]
+pretty "" "Objects that were created were also destroyed."
--
2.18.2
From 514644d220829b4c1987cd451485d984cd88e6dd Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 15 Dec 2019 22:08:00 +0100
Subject: [PATCH 060/100] wireguard: selftests: import harness makefile for
test suite
commit 65d88d04114bca7d85faebd5fed61069cb2b632c upstream.
WireGuard has been using this on build.wireguard.com for the last
several years with considerable success. It allows for very quick and
iterative development cycles, and supports several platforms.
To run the test suite on your current platform in QEMU:
$ make -C tools/testing/selftests/wireguard/qemu -j$(nproc)
To run it with KASAN and such turned on:
$ DEBUG_KERNEL=yes make -C tools/testing/selftests/wireguard/qemu -j$(nproc)
To run it emulated for another platform in QEMU:
$ ARCH=arm make -C tools/testing/selftests/wireguard/qemu -j$(nproc)
At the moment, we support aarch64_be, aarch64, arm, armeb, i686, m68k,
mips64, mips64el, mips, mipsel, powerpc64le, powerpc, and x86_64.
The system supports incremental rebuilding, so it should be very fast to
change a single file and then test it out and have immediate feedback.
This requires for the right toolchain and qemu to be installed prior.
I've had success with those from musl.cc.
This is tailored for WireGuard at the moment, though later projects
might generalize it for other network testing.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
.../selftests/wireguard/qemu/.gitignore | 2 +
.../testing/selftests/wireguard/qemu/Makefile | 385 ++++++++++++++++++
.../wireguard/qemu/arch/aarch64.config | 5 +
.../wireguard/qemu/arch/aarch64_be.config | 6 +
.../selftests/wireguard/qemu/arch/arm.config | 9 +
.../wireguard/qemu/arch/armeb.config | 10 +
.../selftests/wireguard/qemu/arch/i686.config | 5 +
.../selftests/wireguard/qemu/arch/m68k.config | 9 +
.../selftests/wireguard/qemu/arch/mips.config | 11 +
.../wireguard/qemu/arch/mips64.config | 14 +
.../wireguard/qemu/arch/mips64el.config | 15 +
.../wireguard/qemu/arch/mipsel.config | 12 +
.../wireguard/qemu/arch/powerpc.config | 10 +
.../wireguard/qemu/arch/powerpc64le.config | 12 +
.../wireguard/qemu/arch/x86_64.config | 5 +
.../selftests/wireguard/qemu/debug.config | 67 +++
tools/testing/selftests/wireguard/qemu/init.c | 284 +++++++++++++
.../selftests/wireguard/qemu/kernel.config | 86 ++++
18 files changed, 947 insertions(+)
create mode 100644 tools/testing/selftests/wireguard/qemu/.gitignore
create mode 100644 tools/testing/selftests/wireguard/qemu/Makefile
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/arm.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/armeb.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/i686.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/m68k.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64el.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mipsel.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
create mode 100644 tools/testing/selftests/wireguard/qemu/arch/x86_64.config
create mode 100644 tools/testing/selftests/wireguard/qemu/debug.config
create mode 100644 tools/testing/selftests/wireguard/qemu/init.c
create mode 100644 tools/testing/selftests/wireguard/qemu/kernel.config
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
new file mode 100644
index 000000000000..415b542a9d59
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -0,0 +1,2 @@
+build/
+distfiles/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
new file mode 100644
index 000000000000..6d51bf78eeff
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -0,0 +1,385 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+
+PWD := $(shell pwd)
+
+CHOST := $(shell gcc -dumpmachine)
+ifneq (,$(ARCH))
+CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
+ifeq (,$(CBUILD))
+$(error The toolchain for $(ARCH) is not installed)
+endif
+else
+CBUILD := $(CHOST)
+ARCH := $(firstword $(subst -, ,$(CBUILD)))
+endif
+
+# Set these from the environment to override
+KERNEL_PATH ?= $(PWD)/../../../../..
+BUILD_PATH ?= $(PWD)/build/$(ARCH)
+DISTFILES_PATH ?= $(PWD)/distfiles
+NR_CPUS ?= 4
+
+MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+
+default: qemu
+
+# variable name, tarball project name, version, tarball extension, default URI base
+define tar_download =
+$(1)_VERSION := $(3)
+$(1)_NAME := $(2)-$$($(1)_VERSION)
+$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4)
+$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME)
+$(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
+endef
+
+define file_download =
+$(DISTFILES_PATH)/$(1):
+ mkdir -p $(DISTFILES_PATH)
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
+ if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
+endef
+
+$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61))
+$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
+$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f))
+$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2))
+$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5))
+$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21))
+$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a))
+$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071))
+
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
+export CFLAGS ?= -O3 -pipe
+export LDFLAGS ?=
+export CPPFLAGS := -I$(BUILD_PATH)/include
+
+ifeq ($(CHOST),$(CBUILD))
+CROSS_COMPILE_FLAG := --host=$(CHOST)
+NOPIE_GCC := gcc -fno-PIE
+CFLAGS += -march=native
+STRIP := strip
+else
+$(info Cross compilation: building for $(CBUILD) using $(CHOST))
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+export CROSS_COMPILE=$(CBUILD)-
+NOPIE_GCC := $(CBUILD)-gcc -fno-PIE
+STRIP := $(CBUILD)-strip
+endif
+ifeq ($(ARCH),aarch64)
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a53 -machine virt
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),aarch64_be)
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a53 -machine virt
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),arm)
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a15 -machine virt
+CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+endif
+else ifeq ($(ARCH),armeb)
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a15 -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+LDFLAGS += -Wl,--be8
+endif
+else ifeq ($(ARCH),x86_64)
+QEMU_ARCH := x86_64
+KERNEL_ARCH := x86_64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+else
+QEMU_MACHINE := -cpu Skylake-Server -machine q35
+CFLAGS += -march=skylake-avx512
+endif
+else ifeq ($(ARCH),i686)
+QEMU_ARCH := i386
+KERNEL_ARCH := x86
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST))
+QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+else
+QEMU_MACHINE := -cpu coreduo -machine q35
+CFLAGS += -march=prescott
+endif
+else ifeq ($(ARCH),mips64)
+QEMU_ARCH := mips64
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EB
+else
+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
+CFLAGS += -march=mips64r2 -EB
+endif
+else ifeq ($(ARCH),mips64el)
+QEMU_ARCH := mips64el
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EL
+else
+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
+CFLAGS += -march=mips64r2 -EL
+endif
+else ifeq ($(ARCH),mips)
+QEMU_ARCH := mips
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EB
+else
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EB
+endif
+else ifeq ($(ARCH),mipsel)
+QEMU_ARCH := mipsel
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EL
+else
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EL
+endif
+else ifeq ($(ARCH),powerpc64le)
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
+CFLAGS += -mcpu=powerpc64le -mlong-double-64
+else ifeq ($(ARCH),powerpc)
+QEMU_ARCH := ppc
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
+else
+QEMU_MACHINE := -machine ppce500
+endif
+CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
+else ifeq ($(ARCH),m68k)
+QEMU_ARCH := m68k
+KERNEL_ARCH := m68k
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(CHOST),$(CBUILD))
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800
+else
+QEMU_MACHINE := -machine q800
+endif
+else
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+endif
+
+REAL_CC := $(CBUILD)-gcc
+MUSL_CC := $(BUILD_PATH)/musl-gcc
+export CC := $(MUSL_CC)
+USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+
+build: $(KERNEL_BZIMAGE)
+qemu: $(KERNEL_BZIMAGE)
+ rm -f $(BUILD_PATH)/result
+ timeout --foreground 20m qemu-system-$(QEMU_ARCH) \
+ -nodefaults \
+ -nographic \
+ -smp $(NR_CPUS) \
+ $(QEMU_MACHINE) \
+ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
+ -serial stdio \
+ -serial file:$(BUILD_PATH)/result \
+ -no-reboot \
+ -monitor none \
+ -kernel $<
+ grep -Fq success $(BUILD_PATH)/result
+
+$(BUILD_PATH)/init-cpio-spec.txt:
+ mkdir -p $(BUILD_PATH)
+ echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
+ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
+ echo "dir /dev 755 0 0" >> $@
+ echo "nod /dev/console 644 0 0 c 5 1" >> $@
+ echo "dir /bin 755 0 0" >> $@
+ echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@
+ echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
+ echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
+ echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
+ echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
+ echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
+ echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@
+ echo "slink /bin/iptables xtables-multi 777 0 0" >> $@
+ echo "slink /bin/ping6 ping 777 0 0" >> $@
+ echo "dir /lib 755 0 0" >> $@
+ echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
+ echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+
+$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+ mkdir -p $(KERNEL_BUILD_PATH)
+ cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
+ printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
+ cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig
+ cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
+ $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
+
+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)"
+
+$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+ touch $@
+
+$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
+ $(MAKE) -C $(MUSL_PATH)
+ $(STRIP) -s $@
+
+$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
+ $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+ touch $@
+
+$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
+ sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc
+ chmod +x $(BUILD_PATH)/musl-gcc
+
+$(IPERF_PATH)/.installed: $(IPERF_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h
+ sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile*
+ touch $@
+
+$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
+ $(MAKE) -C $(IPERF_PATH)
+ $(STRIP) -s $@
+
+$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
+ $(MAKE) -C $(LIBMNL_PATH)
+ sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
+
+$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+ $(STRIP) -s $@
+
+$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
+ mkdir -p $(BUILD_PATH)
+ $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(STRIP) -s $@
+
+$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping
+ $(STRIP) -s $@
+
+$(BASH_PATH)/.installed: $(BASH_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+ $(MAKE) -C $(BASH_PATH)
+ $(STRIP) -s $@
+
+$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
+ printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ touch $@
+
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
+ $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
+
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
+ $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
+
+$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
+ touch $@
+
+$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
+ $(MAKE) -C $(IPTABLES_PATH)
+ $(STRIP) -s $@
+
+$(NMAP_PATH)/.installed: $(NMAP_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux
+ $(MAKE) -C $(NMAP_PATH) build-ncat
+ $(STRIP) -s $@
+
+clean:
+ rm -rf $(BUILD_PATH)
+
+distclean: clean
+ rm -rf $(DISTFILES_PATH)
+
+menuconfig: $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig
+
+.PHONY: qemu build clean distclean menuconfig
+.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
new file mode 100644
index 000000000000..3d063bb247bb
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
new file mode 100644
index 000000000000..dbdc7e406a7b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -0,0 +1,6 @@
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
new file mode 100644
index 000000000000..148f49905418
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -0,0 +1,9 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
new file mode 100644
index 000000000000..bd76b07d00a2
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -0,0 +1,10 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
new file mode 100644
index 000000000000..a85025d7206e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
new file mode 100644
index 000000000000..5381ea10896c
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -0,0 +1,9 @@
+CONFIG_MMU=y
+CONFIG_M68040=y
+CONFIG_MAC=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
new file mode 100644
index 000000000000..df71d6b95546
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -0,0 +1,11 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_MALTA=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
new file mode 100644
index 000000000000..90c783f725c4
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -0,0 +1,14 @@
+CONFIG_64BIT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS32_N32=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_MIPS_MALTA=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
new file mode 100644
index 000000000000..435b0b43e00c
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -0,0 +1,15 @@
+CONFIG_64BIT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS32_N32=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
new file mode 100644
index 000000000000..62bb50c4a85f
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -0,0 +1,12 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
new file mode 100644
index 000000000000..57957093b71b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -0,0 +1,10 @@
+CONFIG_PPC_QEMU_E500=y
+CONFIG_FSL_SOC_BOOKE=y
+CONFIG_PPC_85xx=y
+CONFIG_PHYS_64BIT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_MATH_EMULATION=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
new file mode 100644
index 000000000000..990c510a9cfa
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -0,0 +1,12 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
new file mode 100644
index 000000000000..00a1ef4869d5
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
new file mode 100644
index 000000000000..b9c72706fe4d
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -0,0 +1,67 @@
+CONFIG_LOCALVERSION="-debug"
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_POINTER=y
+CONFIG_STACK_VALIDATION=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_POISONING=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_HAVE_ARCH_KMEMCHECK=y
+CONFIG_HAVE_ARCH_KASAN=y
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_SANITIZE_ALL=y
+CONFIG_UBSAN_NO_ALIGNMENT=y
+CONFIG_UBSAN_NULL=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_INFO=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_PREEMPT=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_TRACE_IRQFLAGS=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_PROVE_RCU=y
+CONFIG_SPARSE_RCU_POINTER=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=21
+CONFIG_RCU_TRACE=y
+CONFIG_RCU_EQS_DEBUG=y
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DOUBLEFAULT=y
+CONFIG_X86_DEBUG_FPU=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
new file mode 100644
index 000000000000..51e5ddedee88
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/io.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/utsname.h>
+#include <sys/sendfile.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+__attribute__((noreturn)) static void poweroff(void)
+{
+ fflush(stdout);
+ fflush(stderr);
+ reboot(RB_AUTOBOOT);
+ sleep(30);
+ fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n");
+ exit(1);
+}
+
+static void panic(const char *what)
+{
+ fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno));
+ poweroff();
+}
+
+#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m")
+
+static void print_banner(void)
+{
+ struct utsname utsname;
+ int len;
+
+ if (uname(&utsname) < 0)
+ panic("uname");
+
+ len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine);
+ printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, "");
+}
+
+static void seed_rng(void)
+{
+ int fd;
+ struct {
+ int entropy_count;
+ int buffer_size;
+ unsigned char buffer[256];
+ } entropy = {
+ .entropy_count = sizeof(entropy.buffer) * 8,
+ .buffer_size = sizeof(entropy.buffer),
+ .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
+ };
+
+ if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
+ panic("mknod(/dev/urandom)");
+ fd = open("/dev/urandom", O_WRONLY);
+ if (fd < 0)
+ panic("open(urandom)");
+ for (int i = 0; i < 256; ++i) {
+ if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
+ panic("ioctl(urandom)");
+ }
+ close(fd);
+}
+
+static void mount_filesystems(void)
+{
+ pretty_message("[+] Mounting filesystems...");
+ mkdir("/dev", 0755);
+ mkdir("/proc", 0755);
+ mkdir("/sys", 0755);
+ mkdir("/tmp", 0755);
+ mkdir("/run", 0755);
+ mkdir("/var", 0755);
+ if (mount("none", "/dev", "devtmpfs", 0, NULL))
+ panic("devtmpfs mount");
+ if (mount("none", "/proc", "proc", 0, NULL))
+ panic("procfs mount");
+ if (mount("none", "/sys", "sysfs", 0, NULL))
+ panic("sysfs mount");
+ if (mount("none", "/tmp", "tmpfs", 0, NULL))
+ panic("tmpfs mount");
+ if (mount("none", "/run", "tmpfs", 0, NULL))
+ panic("tmpfs mount");
+ if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL))
+ ; /* Not a problem if it fails.*/
+ if (symlink("/run", "/var/run"))
+ panic("run symlink");
+ if (symlink("/proc/self/fd", "/dev/fd"))
+ panic("fd symlink");
+}
+
+static void enable_logging(void)
+{
+ int fd;
+ pretty_message("[+] Enabling logging...");
+ fd = open("/proc/sys/kernel/printk", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "9\n", 2) != 2)
+ panic("write(printk)");
+ close(fd);
+ }
+ fd = open("/proc/sys/debug/exception-trace", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "1\n", 2) != 2)
+ panic("write(exception-trace)");
+ close(fd);
+ }
+ fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "1\n", 2) != 2)
+ panic("write(panic_on_warn)");
+ close(fd);
+ }
+}
+
+static void kmod_selftests(void)
+{
+ FILE *file;
+ char line[2048], *start, *pass;
+ bool success = true;
+ pretty_message("[+] Module self-tests:");
+ file = fopen("/proc/kmsg", "r");
+ if (!file)
+ panic("fopen(kmsg)");
+ if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0)
+ panic("fcntl(kmsg, nonblock)");
+ while (fgets(line, sizeof(line), file)) {
+ start = strstr(line, "wireguard: ");
+ if (!start)
+ continue;
+ start += 11;
+ *strchrnul(start, '\n') = '\0';
+ if (strstr(start, "www.wireguard.com"))
+ break;
+ pass = strstr(start, ": pass");
+ if (!pass || pass[6] != '\0') {
+ success = false;
+ printf(" \x1b[31m* %s\x1b[0m\n", start);
+ } else
+ printf(" \x1b[32m* %s\x1b[0m\n", start);
+ }
+ fclose(file);
+ if (!success) {
+ puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m");
+ poweroff();
+ }
+}
+
+static void launch_tests(void)
+{
+ char cmdline[4096], *success_dev;
+ int status, fd;
+ pid_t pid;
+
+ pretty_message("[+] Launching tests...");
+ pid = fork();
+ if (pid == -1)
+ panic("fork");
+ else if (pid == 0) {
+ execl("/init.sh", "init", NULL);
+ panic("exec");
+ }
+ if (waitpid(pid, &status, 0) < 0)
+ panic("waitpid");
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ pretty_message("[+] Tests successful! :-)");
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0)
+ panic("open(/proc/cmdline)");
+ if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0)
+ panic("read(/proc/cmdline)");
+ cmdline[sizeof(cmdline) - 1] = '\0';
+ for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) {
+ if (strncmp(success_dev, "wg.success=", 11))
+ continue;
+ memcpy(success_dev + 11 - 5, "/dev/", 5);
+ success_dev += 11 - 5;
+ break;
+ }
+ if (!success_dev || !strlen(success_dev))
+ panic("Unable to find success device");
+
+ fd = open(success_dev, O_WRONLY);
+ if (fd < 0)
+ panic("open(success_dev)");
+ if (write(fd, "success\n", 8) != 8)
+ panic("write(success_dev)");
+ close(fd);
+ } else {
+ const char *why = "unknown cause";
+ int what = -1;
+
+ if (WIFEXITED(status)) {
+ why = "exit code";
+ what = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ why = "signal";
+ what = WTERMSIG(status);
+ }
+ printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what);
+ }
+}
+
+static void ensure_console(void)
+{
+ for (unsigned int i = 0; i < 1000; ++i) {
+ int fd = open("/dev/console", O_RDWR);
+ if (fd < 0) {
+ usleep(50000);
+ continue;
+ }
+ dup2(fd, 0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ close(fd);
+ if (write(1, "\0\0\0\0\n", 5) == 5)
+ return;
+ }
+ panic("Unable to open console device");
+}
+
+static void clear_leaks(void)
+{
+ int fd;
+
+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
+ if (fd < 0)
+ return;
+ pretty_message("[+] Starting memory leak detection...");
+ write(fd, "clear\n", 5);
+ close(fd);
+}
+
+static void check_leaks(void)
+{
+ int fd;
+
+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
+ if (fd < 0)
+ return;
+ pretty_message("[+] Scanning for memory leaks...");
+ sleep(2); /* Wait for any grace periods. */
+ write(fd, "scan\n", 5);
+ close(fd);
+
+ fd = open("/sys/kernel/debug/kmemleak", O_RDONLY);
+ if (fd < 0)
+ return;
+ if (sendfile(1, fd, NULL, 0x7ffff000) > 0)
+ panic("Memory leaks encountered");
+ close(fd);
+}
+
+int main(int argc, char *argv[])
+{
+ seed_rng();
+ ensure_console();
+ print_banner();
+ mount_filesystems();
+ kmod_selftests();
+ enable_logging();
+ clear_leaks();
+ launch_tests();
+ check_leaks();
+ poweroff();
+ return 1;
+}
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
new file mode 100644
index 000000000000..9cca30206014
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -0,0 +1,86 @@
+CONFIG_LOCALVERSION=""
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_CORE=y
+CONFIG_NET_IPIP=y
+CONFIG_DUMMY=y
+CONFIG_VETH=y
+CONFIG_MULTIUSER=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_NS=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_NAT=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_NAT=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_NAT_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_TTY=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_VDSO=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_PARAVIRT_SPINLOCKS=y
+CONFIG_PRINTK=y
+CONFIG_KALLSYMS=y
+CONFIG_BUG=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+CONFIG_EMBEDDED=n
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_SHMEM=y
+CONFIG_SLUB=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_SMP=y
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+CONFIG_NUMA=y
+CONFIG_PREEMPT=y
+CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_ARCH_RANDOM=y
+CONFIG_FILE_LOCKING=y
+CONFIG_POSIX_TIMERS=y
+CONFIG_DEVTMPFS=y
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_PRINTK_TIME=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_KERNEL_GZIP=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+CONFIG_PANIC_TIMEOUT=-1
+CONFIG_STACKTRACE=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_WIREGUARD=y
+CONFIG_WIREGUARD_DEBUG=y
--
2.18.2
From 044e1033e6c7ee9f63c7bd2686f3a528c085befa Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 15 Dec 2019 22:08:01 +0100
Subject: [PATCH 061/100] wireguard: Kconfig: select parent dependency for
crypto
commit d7c68a38bb4f9b7c1a2e4a772872c752ee5c44a6 upstream.
This fixes the crypto selection submenu depenencies. Otherwise, we'd
wind up issuing warnings in which certain dependencies we also select
couldn't be satisfied. This condition was triggered by the addition of
the test suite autobuilder in the previous commit.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/Kconfig | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 16ad145e22c9..57f1ba924f4e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -85,6 +85,8 @@ config WIREGUARD
select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
select CRYPTO_CURVE25519_X86 if X86 && 64BIT
+ select ARM_CRYPTO if ARM
+ select ARM64_CRYPTO if ARM64
select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
select CRYPTO_POLY1305_ARM if ARM
--
2.18.2
From 2ff0156ba74c1330a1ae557b898879a45b7aea33 Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@gmail.com>
Date: Sun, 15 Dec 2019 22:08:02 +0100
Subject: [PATCH 062/100] wireguard: global: fix spelling mistakes in comments
commit a2ec8b5706944d228181c8b91d815f41d6dd8e7b upstream.
This fixes two spelling errors in source code comments.
Signed-off-by: Josh Soref <jsoref@gmail.com>
[Jason: rewrote commit message]
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 2 +-
include/uapi/linux/wireguard.h | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 7e675f541491..9c6bab9c981f 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -380,7 +380,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
/* We've already verified the Poly1305 auth tag, which means this packet
* was not modified in transit. We can therefore tell the networking
* stack that all checksums of every layer of encapsulation have already
- * been checked "by the hardware" and therefore is unneccessary to check
+ * been checked "by the hardware" and therefore is unnecessary to check
* again in software.
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h
index dd8a47c4ad11..ae88be14c947 100644
--- a/include/uapi/linux/wireguard.h
+++ b/include/uapi/linux/wireguard.h
@@ -18,13 +18,13 @@
* one but not both of:
*
* WGDEVICE_A_IFINDEX: NLA_U32
- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
*
* The kernel will then return several messages (NLM_F_MULTI) containing the
* following tree of nested items:
*
* WGDEVICE_A_IFINDEX: NLA_U32
- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
* WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
* WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
* WGDEVICE_A_LISTEN_PORT: NLA_U16
@@ -77,7 +77,7 @@
* WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
*
* WGDEVICE_A_IFINDEX: NLA_U32
- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
* WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
* peers should be removed prior to adding the list below.
* WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
@@ -121,7 +121,7 @@
* filling in information not contained in the prior. Note that if
* WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
* should not be specified in fragments that come after, so that the list
- * of peers is only cleared the first time but appened after. Likewise for
+ * of peers is only cleared the first time but appended after. Likewise for
* peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
* of a peer, it likely should not be specified in subsequent fragments.
*
--
2.18.2
From 84e89e9fd9f5264db5ca5c6b4601c3762bde0868 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 15 Dec 2019 22:08:03 +0100
Subject: [PATCH 063/100] wireguard: main: remove unused include
<linux/version.h>
commit 43967b6ff91e53bcce5ae08c16a0588a475b53a1 upstream.
Remove <linux/version.h> from the includes for main.c, which is unused.
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
[Jason: reworded commit message]
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/main.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
index 10c0a40f6a9e..7a7d5f1a80fc 100644
--- a/drivers/net/wireguard/main.c
+++ b/drivers/net/wireguard/main.c
@@ -12,7 +12,6 @@
#include <uapi/linux/wireguard.h>
-#include <linux/version.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/genetlink.h>
--
2.18.2
From 3d578c333903a03c4431ba9bf7715da6ffbfada5 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Sun, 15 Dec 2019 22:08:04 +0100
Subject: [PATCH 064/100] wireguard: allowedips: use kfree_rcu() instead of
call_rcu()
commit d89ee7d5c73af15c1c6f12b016cdf469742b5726 upstream.
The callback function of call_rcu() just calls a kfree(), so we
can use kfree_rcu() instead of call_rcu() + callback function.
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/allowedips.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 72667d5399c3..121d9ea0f135 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -31,11 +31,6 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
#define CHOOSE_NODE(parent, key) \
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
-static void node_free_rcu(struct rcu_head *rcu)
-{
- kfree(container_of(rcu, struct allowedips_node, rcu));
-}
-
static void push_rcu(struct allowedips_node **stack,
struct allowedips_node __rcu *p, unsigned int *len)
{
@@ -112,7 +107,7 @@ static void walk_remove_by_peer(struct allowedips_node __rcu **top,
if (!node->bit[0] || !node->bit[1]) {
rcu_assign_pointer(*nptr, DEREF(
&node->bit[!REF(node->bit[0])]));
- call_rcu(&node->rcu, node_free_rcu);
+ kfree_rcu(node, rcu);
node = DEREF(nptr);
}
}
--
2.18.2
From cad9e6e6f5b2b10404d8873d0474a3c770f062be Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 2 Jan 2020 17:47:49 +0100
Subject: [PATCH 065/100] wireguard: selftests: remove ancient kernel
compatibility code
commit 9a69a4c8802adf642bc4a13d471b5a86b44ed434 upstream.
Quite a bit of the test suite was designed to work with ancient kernels.
Thankfully we no longer have to deal with this. This commit updates
things that we can finally update and removes things that we can finally
remove, to avoid the build-up of the last several years as a result of
having to support ancient kernels. We can finally rely on suppress_
prefixlength being available. On the build side of things, the no-PIE
hack is no longer required, and we can bump some of the tools, repair
our m68k and i686-kvm support, and get better coverage of the static
branches used in the crypto lib and in udp_tunnel.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
tools/testing/selftests/wireguard/netns.sh | 11 +--
.../testing/selftests/wireguard/qemu/Makefile | 82 ++++++++++---------
.../selftests/wireguard/qemu/arch/m68k.config | 2 +-
tools/testing/selftests/wireguard/qemu/init.c | 1 +
.../selftests/wireguard/qemu/kernel.config | 2 +
5 files changed, 50 insertions(+), 48 deletions(-)
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index e7310d9390f7..d5c85c7494f2 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -37,7 +37,7 @@ n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; }
ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
-sleep() { read -t "$1" -N 0 || true; }
+sleep() { read -t "$1" -N 1 || true; }
waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
@@ -294,12 +294,9 @@ ip1 -6 rule add table main suppress_prefixlength 0
ip1 -4 route add default dev wg0 table 51820
ip1 -4 rule add not fwmark 51820 table 51820
ip1 -4 rule add table main suppress_prefixlength 0
-# suppress_prefixlength only got added in 3.12, and we want to support 3.10+.
-if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then
- # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
- n1 ping -W 1 -c 100 -f 192.168.99.7
- n1 ping -W 1 -c 100 -f abab::1111
-fi
+# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
+n1 ping -W 1 -c 100 -f 192.168.99.7
+n1 ping -W 1 -c 100 -f abab::1111
n0 iptables -t nat -F
ip0 link del vethrc
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 6d51bf78eeff..f10aa3590adc 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -5,6 +5,7 @@
PWD := $(shell pwd)
CHOST := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
ifneq (,$(ARCH))
CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
ifeq (,$(CBUILD))
@@ -37,19 +38,19 @@ endef
define file_download =
$(DISTFILES_PATH)/$(1):
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
endef
-$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61))
+$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
-$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f))
+$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2))
-$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5))
-$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21))
-$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
+$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
@@ -59,23 +60,21 @@ export CFLAGS ?= -O3 -pipe
export LDFLAGS ?=
export CPPFLAGS := -I$(BUILD_PATH)/include
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
CROSS_COMPILE_FLAG := --host=$(CHOST)
-NOPIE_GCC := gcc -fno-PIE
CFLAGS += -march=native
STRIP := strip
else
$(info Cross compilation: building for $(CBUILD) using $(CHOST))
CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
export CROSS_COMPILE=$(CBUILD)-
-NOPIE_GCC := $(CBUILD)-gcc -fno-PIE
STRIP := $(CBUILD)-strip
endif
ifeq ($(ARCH),aarch64)
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
QEMU_MACHINE := -cpu cortex-a53 -machine virt
@@ -85,7 +84,7 @@ else ifeq ($(ARCH),aarch64_be)
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
QEMU_MACHINE := -cpu cortex-a53 -machine virt
@@ -95,7 +94,7 @@ else ifeq ($(ARCH),arm)
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
QEMU_MACHINE := -cpu cortex-a15 -machine virt
@@ -105,7 +104,7 @@ else ifeq ($(ARCH),armeb)
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
QEMU_MACHINE := -cpu cortex-a15 -machine virt
@@ -116,7 +115,7 @@ else ifeq ($(ARCH),x86_64)
QEMU_ARCH := x86_64
KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine q35,accel=kvm
else
QEMU_MACHINE := -cpu Skylake-Server -machine q35
@@ -126,7 +125,7 @@ else ifeq ($(ARCH),i686)
QEMU_ARCH := i386
KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
-ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST))
+ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
QEMU_MACHINE := -cpu host -machine q35,accel=kvm
else
QEMU_MACHINE := -cpu coreduo -machine q35
@@ -136,7 +135,7 @@ else ifeq ($(ARCH),mips64)
QEMU_ARCH := mips64
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
CFLAGS += -EB
else
@@ -147,7 +146,7 @@ else ifeq ($(ARCH),mips64el)
QEMU_ARCH := mips64el
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
CFLAGS += -EL
else
@@ -158,7 +157,7 @@ else ifeq ($(ARCH),mips)
QEMU_ARCH := mips
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
CFLAGS += -EB
else
@@ -169,7 +168,7 @@ else ifeq ($(ARCH),mipsel)
QEMU_ARCH := mipsel
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
CFLAGS += -EL
else
@@ -180,7 +179,7 @@ else ifeq ($(ARCH),powerpc64le)
QEMU_ARCH := ppc64
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
QEMU_MACHINE := -machine pseries
@@ -190,7 +189,7 @@ else ifeq ($(ARCH),powerpc)
QEMU_ARCH := ppc
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
-ifeq ($(CHOST),$(CBUILD))
+ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
else
QEMU_MACHINE := -machine ppce500
@@ -200,10 +199,11 @@ else ifeq ($(ARCH),m68k)
QEMU_ARCH := m68k
KERNEL_ARCH := m68k
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
-ifeq ($(CHOST),$(CBUILD))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
else
-QEMU_MACHINE := -machine q800
+QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
endif
else
$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
@@ -238,14 +238,14 @@ $(BUILD_PATH)/init-cpio-spec.txt:
echo "nod /dev/console 644 0 0 c 5 1" >> $@
echo "dir /bin 755 0 0" >> $@
echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
- echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@
+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@
echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
- echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@
- echo "slink /bin/iptables xtables-multi 777 0 0" >> $@
+ echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@
+ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
echo "slink /bin/ping6 ping 777 0 0" >> $@
echo "dir /lib 755 0 0" >> $@
echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
@@ -260,8 +260,8 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)"
+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
@@ -280,7 +280,7 @@ $(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc
+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
chmod +x $(BUILD_PATH)/musl-gcc
$(IPERF_PATH)/.installed: $(IPERF_TAR)
@@ -291,7 +291,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
touch $@
$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
- cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
@@ -308,8 +308,8 @@ $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
touch $@
-$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
$(STRIP) -s $@
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
@@ -323,7 +323,8 @@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
touch $@
$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
- $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping
+ sed -i /atexit/d $(IPUTILS_PATH)/ping.c
+ cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS)
$(STRIP) -s $@
$(BASH_PATH)/.installed: $(BASH_TAR)
@@ -357,7 +358,7 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
touch $@
-$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
$(MAKE) -C $(IPTABLES_PATH)
$(STRIP) -s $@
@@ -368,8 +369,9 @@ $(NMAP_PATH)/.installed: $(NMAP_TAR)
touch $@
$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
- cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux
- $(MAKE) -C $(NMAP_PATH) build-ncat
+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh
+ $(MAKE) -C $(NMAP_PATH)/libpcap
+ $(MAKE) -C $(NMAP_PATH)/ncat
$(STRIP) -s $@
clean:
@@ -379,7 +381,7 @@ distclean: clean
rm -rf $(DISTFILES_PATH)
menuconfig: $(KERNEL_BUILD_PATH)/.config
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
.PHONY: qemu build clean distclean menuconfig
.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
index 5381ea10896c..62a15bdb877e 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -1,9 +1,9 @@
CONFIG_MMU=y
+CONFIG_M68KCLASSIC=y
CONFIG_M68040=y
CONFIG_MAC=y
CONFIG_SERIAL_PMACZILOG=y
CONFIG_SERIAL_PMACZILOG_TTYS=y
CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 51e5ddedee88..90bc9813cadc 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -21,6 +21,7 @@
#include <sys/reboot.h>
#include <sys/utsname.h>
#include <sys/sendfile.h>
+#include <sys/sysmacros.h>
#include <linux/random.h>
#include <linux/version.h>
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 9cca30206014..af9323a0b6e0 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -39,6 +39,7 @@ CONFIG_PRINTK=y
CONFIG_KALLSYMS=y
CONFIG_BUG=y
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+CONFIG_JUMP_LABEL=y
CONFIG_EMBEDDED=n
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
@@ -55,6 +56,7 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_HZ_PERIODIC=n
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_ARCH_RANDOM=y
CONFIG_FILE_LOCKING=y
CONFIG_POSIX_TIMERS=y
--
2.18.2
From 9666ac553d46a1d0ee0baa37d5880e41428d1038 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 2 Jan 2020 17:47:50 +0100
Subject: [PATCH 066/100] wireguard: queueing: do not account for pfmemalloc
when clearing skb header
commit 04d2ea92a18417619182cbb79063f154892b0150 upstream.
Before 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_
header()"), the pfmemalloc flag used to be between headers_start and
headers_end, which is a region we clear when preparing the packet for
encryption/decryption. This is a parameter we certainly want to
preserve, which is why 8b7008620b84 moved it out of there. The code here
was written in a world before 8b7008620b84, though, where we had to
manually account for it. This commit brings things up to speed.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/queueing.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index 58fdd630b246..e62c714a548e 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -83,13 +83,10 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
static inline void wg_reset_packet(struct sk_buff *skb)
{
- const int pfmemalloc = skb->pfmemalloc;
-
skb_scrub_packet(skb, true);
memset(&skb->headers_start, 0,
offsetof(struct sk_buff, headers_end) -
offsetof(struct sk_buff, headers_start));
- skb->pfmemalloc = pfmemalloc;
skb->queue_mapping = 0;
skb->nohdr = 0;
skb->peeked = 0;
--
2.18.2
From 1f217f112b8681b030dd198a2c7a5c06102f20c5 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 2 Jan 2020 17:47:51 +0100
Subject: [PATCH 067/100] wireguard: socket: mark skbs as not on list when
receiving via gro
commit 736775d06bac60d7a353e405398b48b2bd8b1e54 upstream.
Certain drivers will pass gro skbs to udp, at which point the udp driver
simply iterates through them and passes them off to encap_rcv, which is
where we pick up. At the moment, we're not attempting to coalesce these
into bundles, but we also don't want to wind up having cascaded lists of
skbs treated separately. The right behavior here, then, is to just mark
each incoming one as not on a list. This can be seen in practice, for
example, with Qualcomm's rmnet_perf driver.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Tested-by: Yaroslav Furman <yaro330@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/socket.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index c46256d0d81c..262f3b5c819d 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -333,6 +333,7 @@ static int wg_receive(struct sock *sk, struct sk_buff *skb)
wg = sk->sk_user_data;
if (unlikely(!wg))
goto err;
+ skb_mark_not_on_list(skb);
wg_packet_receive(wg, skb);
return 0;
--
2.18.2
From bc67e71a23c2e84f1ae7b471a50a62c886d36d7b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 8 Jan 2020 16:59:02 -0500
Subject: [PATCH 068/100] net: introduce skb_list_walk_safe for skb segment
walking
commit dcfea72e79b0aa7a057c8f6024169d86a1bbc84b upstream.
As part of the continual effort to remove direct usage of skb->next and
skb->prev, this patch adds a helper for iterating through the
singly-linked variant of skb lists, which are used for lists of GSO
packet. The name "skb_list_..." has been chosen to match the existing
function, "kfree_skb_list, which also operates on these singly-linked
lists, and the "..._walk_safe" part is the same idiom as elsewhere in
the kernel.
This patch removes the helper from wireguard and puts it into
linux/skbuff.h, while making it a bit more robust for general usage. In
particular, parenthesis are added around the macro argument usage, and it
now accounts for trying to iterate through an already-null skb pointer,
which will simply run the iteration zero times. This latter enhancement
means it can be used to replace both do { ... } while and while (...)
open-coded idioms.
This should take care of these three possible usages, which match all
current methods of iterations.
skb_list_walk_safe(segs, skb, next) { ... }
skb_list_walk_safe(skb, skb, next) { ... }
skb_list_walk_safe(segs, skb, segs) { ... }
Gcc appears to generate efficient code for each of these.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/device.h | 8 --------
include/linux/skbuff.h | 5 +++++
2 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
index c91f3051c5c7..b15a8be9d816 100644
--- a/drivers/net/wireguard/device.h
+++ b/drivers/net/wireguard/device.h
@@ -62,12 +62,4 @@ struct wg_device {
int wg_device_init(void);
void wg_device_uninit(void);
-/* Later after the dust settles, this can be moved into include/linux/skbuff.h,
- * where virtually all code that deals with GSO segs can benefit, around ~30
- * drivers as of writing.
- */
-#define skb_list_walk_safe(first, skb, next) \
- for (skb = first, next = skb->next; skb; \
- skb = next, next = skb ? skb->next : NULL)
-
#endif /* _WG_DEVICE_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 955e1370f033..5e9fe508977f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1480,6 +1480,11 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
skb->next = NULL;
}
+/* Iterate through singly-linked GSO fragments of an skb. */
+#define skb_list_walk_safe(first, skb, next) \
+ for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \
+ (skb) = (next), (next) = (skb) ? (skb)->next : NULL)
+
static inline void skb_list_del_init(struct sk_buff *skb)
{
__list_del_entry(&skb->list);
--
2.18.2
From 8fe9a34f82f28628f6a553d11370f35422346240 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 13 Jan 2020 18:42:26 -0500
Subject: [PATCH 069/100] net: skbuff: disambiguate argument and member for
skb_list_walk_safe helper
commit 5eee7bd7e245914e4e050c413dfe864e31805207 upstream.
This worked before, because we made all callers name their next pointer
"next". But in trying to be more "drop-in" ready, the silliness here is
revealed. This commit fixes the problem by making the macro argument and
the member use different names.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/linux/skbuff.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5e9fe508977f..3c7755d29636 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1481,9 +1481,9 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
}
/* Iterate through singly-linked GSO fragments of an skb. */
-#define skb_list_walk_safe(first, skb, next) \
- for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \
- (skb) = (next), (next) = (skb) ? (skb)->next : NULL)
+#define skb_list_walk_safe(first, skb, next_skb) \
+ for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \
+ (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL)
static inline void skb_list_del_init(struct sk_buff *skb)
{
--
2.18.2
From 814bebdf05ee13ea187fce3c52ae9d7914b92aa2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 4 Feb 2020 22:17:25 +0100
Subject: [PATCH 070/100] wireguard: allowedips: fix use-after-free in
root_remove_peer_lists
commit 9981159fc3b677b357f84e069a11de5a5ec8a2a8 upstream.
In the unlikely case a new node could not be allocated, we need to
remove @newnode from @peer->allowedips_list before freeing it.
syzbot reported:
BUG: KASAN: use-after-free in __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54
Read of size 8 at addr ffff88809881a538 by task syz-executor.4/30133
CPU: 0 PID: 30133 Comm: syz-executor.4 Not tainted 5.5.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x197/0x210 lib/dump_stack.c:118
print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374
__kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506
kasan_report+0x12/0x20 mm/kasan/common.c:639
__asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135
__list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54
__list_del_entry include/linux/list.h:132 [inline]
list_del include/linux/list.h:146 [inline]
root_remove_peer_lists+0x24f/0x4b0 drivers/net/wireguard/allowedips.c:65
wg_allowedips_free+0x232/0x390 drivers/net/wireguard/allowedips.c:300
wg_peer_remove_all+0xd5/0x620 drivers/net/wireguard/peer.c:187
wg_set_device+0xd01/0x1350 drivers/net/wireguard/netlink.c:542
genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:652 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:672
____sys_sendmsg+0x753/0x880 net/socket.c:2343
___sys_sendmsg+0x100/0x170 net/socket.c:2397
__sys_sendmsg+0x105/0x1d0 net/socket.c:2430
__do_sys_sendmsg net/socket.c:2439 [inline]
__se_sys_sendmsg net/socket.c:2437 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x45b399
Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f99a9bcdc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f99a9bce6d4 RCX: 000000000045b399
RDX: 0000000000000000 RSI: 0000000020001340 RDI: 0000000000000003
RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004
R13: 00000000000009ba R14: 00000000004cb2b8 R15: 0000000000000009
Allocated by task 30103:
save_stack+0x23/0x90 mm/kasan/common.c:72
set_track mm/kasan/common.c:80 [inline]
__kasan_kmalloc mm/kasan/common.c:513 [inline]
__kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486
kasan_kmalloc+0x9/0x10 mm/kasan/common.c:527
kmem_cache_alloc_trace+0x158/0x790 mm/slab.c:3551
kmalloc include/linux/slab.h:556 [inline]
kzalloc include/linux/slab.h:670 [inline]
add+0x70a/0x1970 drivers/net/wireguard/allowedips.c:236
wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320
set_allowedip drivers/net/wireguard/netlink.c:343 [inline]
set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468
wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591
genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:652 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:672
____sys_sendmsg+0x753/0x880 net/socket.c:2343
___sys_sendmsg+0x100/0x170 net/socket.c:2397
__sys_sendmsg+0x105/0x1d0 net/socket.c:2430
__do_sys_sendmsg net/socket.c:2439 [inline]
__se_sys_sendmsg net/socket.c:2437 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Freed by task 30103:
save_stack+0x23/0x90 mm/kasan/common.c:72
set_track mm/kasan/common.c:80 [inline]
kasan_set_free_info mm/kasan/common.c:335 [inline]
__kasan_slab_free+0x102/0x150 mm/kasan/common.c:474
kasan_slab_free+0xe/0x10 mm/kasan/common.c:483
__cache_free mm/slab.c:3426 [inline]
kfree+0x10a/0x2c0 mm/slab.c:3757
add+0x12d2/0x1970 drivers/net/wireguard/allowedips.c:266
wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320
set_allowedip drivers/net/wireguard/netlink.c:343 [inline]
set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468
wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591
genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:652 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:672
____sys_sendmsg+0x753/0x880 net/socket.c:2343
___sys_sendmsg+0x100/0x170 net/socket.c:2397
__sys_sendmsg+0x105/0x1d0 net/socket.c:2430
__do_sys_sendmsg net/socket.c:2439 [inline]
__se_sys_sendmsg net/socket.c:2437 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
entry_SYSCALL_64_after_hwframe+0x49/0xbe
The buggy address belongs to the object at ffff88809881a500
which belongs to the cache kmalloc-64 of size 64
The buggy address is located 56 bytes inside of
64-byte region [ffff88809881a500, ffff88809881a540)
The buggy address belongs to the page:
page:ffffea0002620680 refcount:1 mapcount:0 mapping:ffff8880aa400380 index:0x0
raw: 00fffe0000000200 ffffea000250b748 ffffea000254bac8 ffff8880aa400380
raw: 0000000000000000 ffff88809881a000 0000000100000020 0000000000000000
page dumped because: kasan: bad access detected
Memory state around the buggy address:
ffff88809881a400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
ffff88809881a480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
>ffff88809881a500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
^
ffff88809881a580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
ffff88809881a600: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: wireguard@lists.zx2c4.com
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/allowedips.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 121d9ea0f135..3725e9cd85f4 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -263,6 +263,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
} else {
node = kzalloc(sizeof(*node), GFP_KERNEL);
if (unlikely(!node)) {
+ list_del(&newnode->peer_list);
kfree(newnode);
return -ENOMEM;
}
--
2.18.2
From 865cb6d968679b607bb1a83b4d243eec411fc1c8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 4 Feb 2020 22:17:26 +0100
Subject: [PATCH 071/100] wireguard: noise: reject peers with low order public
keys
commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream.
Our static-static calculation returns a failure if the public key is of
low order. We check for this when peers are added, and don't allow them
to be added if they're low order, except in the case where we haven't
yet been given a private key. In that case, we would defer the removal
of the peer until we're given a private key, since at that point we're
doing new static-static calculations which incur failures we can act on.
This meant, however, that we wound up removing peers rather late in the
configuration flow.
Syzkaller points out that peer_remove calls flush_workqueue, which in
turn might then wait for sending a handshake initiation to complete.
Since handshake initiation needs the static identity lock, holding the
static identity lock while calling peer_remove can result in a rare
deadlock. We have precisely this case in this situation of late-stage
peer removal based on an invalid public key. We can't drop the lock when
removing, because then incoming handshakes might interact with a bogus
static-static calculation.
While the band-aid patch for this would involve breaking up the peer
removal into two steps like wg_peer_remove_all does, in order to solve
the locking issue, there's actually a much more elegant way of fixing
this:
If the static-static calculation succeeds with one private key, it
*must* succeed with all others, because all 32-byte strings map to valid
private keys, thanks to clamping. That means we can get rid of this
silly dance and locking headaches of removing peers late in the
configuration flow, and instead just reject them early on, regardless of
whether the device has yet been assigned a private key. For the case
where the device doesn't yet have a private key, we safely use zeros
just for the purposes of checking for low order points by way of
checking the output of the calculation.
The following PoC will trigger the deadlock:
ip link add wg0 type wireguard
ip addr add 10.0.0.1/24 dev wg0
ip link set wg0 up
ping -f 10.0.0.2 &
while true; do
wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234
wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=)
done
[ 0.949105] ======================================================
[ 0.949550] WARNING: possible circular locking dependency detected
[ 0.950143] 5.5.0-debug+ #18 Not tainted
[ 0.950431] ------------------------------------------------------
[ 0.950959] wg/89 is trying to acquire lock:
[ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0
[ 0.951865]
[ 0.951865] but task is already holding lock:
[ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
[ 0.953011]
[ 0.953011] which lock already depends on the new lock.
[ 0.953011]
[ 0.953651]
[ 0.953651] the existing dependency chain (in reverse order) is:
[ 0.954292]
[ 0.954292] -> #2 (&wg->static_identity.lock){++++}:
[ 0.954804] lock_acquire+0x127/0x350
[ 0.955133] down_read+0x83/0x410
[ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700
[ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280
[ 0.956401] wg_packet_handshake_send_worker+0x10/0x20
[ 0.956841] process_one_work+0x806/0x1500
[ 0.957167] worker_thread+0x8c/0xcb0
[ 0.957549] kthread+0x2ee/0x3b0
[ 0.957792] ret_from_fork+0x24/0x30
[ 0.958234]
[ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}:
[ 0.958808] lock_acquire+0x127/0x350
[ 0.959075] process_one_work+0x7ab/0x1500
[ 0.959369] worker_thread+0x8c/0xcb0
[ 0.959639] kthread+0x2ee/0x3b0
[ 0.959896] ret_from_fork+0x24/0x30
[ 0.960346]
[ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}:
[ 0.960945] check_prev_add+0x167/0x1e20
[ 0.961351] __lock_acquire+0x2012/0x3170
[ 0.961725] lock_acquire+0x127/0x350
[ 0.961990] flush_workqueue+0x106/0x12f0
[ 0.962280] peer_remove_after_dead+0x160/0x220
[ 0.962600] wg_set_device+0xa24/0xcc0
[ 0.962994] genl_rcv_msg+0x52f/0xe90
[ 0.963298] netlink_rcv_skb+0x111/0x320
[ 0.963618] genl_rcv+0x1f/0x30
[ 0.963853] netlink_unicast+0x3f6/0x610
[ 0.964245] netlink_sendmsg+0x700/0xb80
[ 0.964586] __sys_sendto+0x1dd/0x2c0
[ 0.964854] __x64_sys_sendto+0xd8/0x1b0
[ 0.965141] do_syscall_64+0x90/0xd9a
[ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 0.965769]
[ 0.965769] other info that might help us debug this:
[ 0.965769]
[ 0.966337] Chain exists of:
[ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock
[ 0.966337]
[ 0.967417] Possible unsafe locking scenario:
[ 0.967417]
[ 0.967836] CPU0 CPU1
[ 0.968155] ---- ----
[ 0.968497] lock(&wg->static_identity.lock);
[ 0.968779] lock((work_completion)(&peer->transmit_handshake_work));
[ 0.969345] lock(&wg->static_identity.lock);
[ 0.969809] lock((wq_completion)wg-kex-wg0);
[ 0.970146]
[ 0.970146] *** DEADLOCK ***
[ 0.970146]
[ 0.970531] 5 locks held by wg/89:
[ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30
[ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90
[ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0
[ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0
[ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
[ 0.973653]
[ 0.973653] stack backtrace:
[ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18
[ 0.974476] Call Trace:
[ 0.974638] dump_stack+0x97/0xe0
[ 0.974869] check_noncircular+0x312/0x3e0
[ 0.975132] ? print_circular_bug+0x1f0/0x1f0
[ 0.975410] ? __kernel_text_address+0x9/0x30
[ 0.975727] ? unwind_get_return_address+0x51/0x90
[ 0.976024] check_prev_add+0x167/0x1e20
[ 0.976367] ? graph_lock+0x70/0x160
[ 0.976682] __lock_acquire+0x2012/0x3170
[ 0.976998] ? register_lock_class+0x1140/0x1140
[ 0.977323] lock_acquire+0x127/0x350
[ 0.977627] ? flush_workqueue+0xe3/0x12f0
[ 0.977890] flush_workqueue+0x106/0x12f0
[ 0.978147] ? flush_workqueue+0xe3/0x12f0
[ 0.978410] ? find_held_lock+0x2c/0x110
[ 0.978662] ? lock_downgrade+0x6e0/0x6e0
[ 0.978919] ? queue_rcu_work+0x60/0x60
[ 0.979166] ? netif_napi_del+0x151/0x3b0
[ 0.979501] ? peer_remove_after_dead+0x160/0x220
[ 0.979871] peer_remove_after_dead+0x160/0x220
[ 0.980232] wg_set_device+0xa24/0xcc0
[ 0.980516] ? deref_stack_reg+0x8e/0xc0
[ 0.980801] ? set_peer+0xe10/0xe10
[ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150
[ 0.981430] ? __nla_validate_parse+0x163/0x270
[ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310
[ 0.982078] genl_rcv_msg+0x52f/0xe90
[ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
[ 0.982690] ? register_lock_class+0x1140/0x1140
[ 0.983049] netlink_rcv_skb+0x111/0x320
[ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
[ 0.983645] ? netlink_ack+0x880/0x880
[ 0.983888] genl_rcv+0x1f/0x30
[ 0.984168] netlink_unicast+0x3f6/0x610
[ 0.984443] ? netlink_detachskb+0x60/0x60
[ 0.984729] ? find_held_lock+0x2c/0x110
[ 0.984976] netlink_sendmsg+0x700/0xb80
[ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60
[ 0.985533] __sys_sendto+0x1dd/0x2c0
[ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0
[ 0.986039] ? sockfd_lookup_light+0x17/0x160
[ 0.986397] ? __sys_recvmsg+0x8c/0xf0
[ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0
[ 0.987018] __x64_sys_sendto+0xd8/0x1b0
[ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0
[ 0.987666] do_syscall_64+0x90/0xd9a
[ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 0.988223] RIP: 0033:0x7fe77c12003e
[ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4
[ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
[ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e
[ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004
[ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c
[ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c
[ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/netlink.c | 6 ++----
drivers/net/wireguard/noise.c | 10 +++++++---
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index 0739a2cd1920..45a631e79d7a 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
private_key);
list_for_each_entry_safe(peer, temp, &wg->peer_list,
peer_list) {
- if (wg_noise_precompute_static_static(peer))
- wg_noise_expire_current_peer_keypairs(peer);
- else
- wg_peer_remove(peer);
+ BUG_ON(!wg_noise_precompute_static_static(peer));
+ wg_noise_expire_current_peer_keypairs(peer);
}
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
up_write(&wg->static_identity.lock);
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index d71c8db68a8c..919d9d866446 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -46,17 +46,21 @@ void __init wg_noise_init(void)
/* Must hold peer->handshake.static_identity->lock */
bool wg_noise_precompute_static_static(struct wg_peer *peer)
{
- bool ret = true;
+ bool ret;
down_write(&peer->handshake.lock);
- if (peer->handshake.static_identity->has_identity)
+ if (peer->handshake.static_identity->has_identity) {
ret = curve25519(
peer->handshake.precomputed_static_static,
peer->handshake.static_identity->static_private,
peer->handshake.remote_static);
- else
+ } else {
+ u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
+
+ ret = curve25519(empty, empty, peer->handshake.remote_static);
memset(peer->handshake.precomputed_static_static, 0,
NOISE_PUBLIC_KEY_LEN);
+ }
up_write(&peer->handshake.lock);
return ret;
}
--
2.18.2
From dcfbac9ca10b50be17c38740df7f6c81c4840270 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 4 Feb 2020 22:17:27 +0100
Subject: [PATCH 072/100] wireguard: selftests: ensure non-addition of peers
with failed precomputation
commit f9398acba6a4ae9cb98bfe4d56414d376eff8d57 upstream.
Ensure that peers with low order points are ignored, both in the case
where we already have a device private key and in the case where we do
not. This adds points that naturally give a zero output.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
tools/testing/selftests/wireguard/netns.sh | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index d5c85c7494f2..b03647d1bbf6 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -516,6 +516,12 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.
n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
n0 wg set wg0 peer "$pub2" allowed-ips ::/0
+n0 wg set wg0 peer "$pub2" remove
+low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
+n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
+[[ -z $(n0 wg show wg0 peers) ]]
ip0 link del wg0
declare -A objects
--
2.18.2
From 428b6098ac8141b68397f7aaa85eb0f615ff237b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 4 Feb 2020 22:17:29 +0100
Subject: [PATCH 073/100] wireguard: selftests: tie socket waiting to target
pid
commit 88f404a9b1d75388225b1c67b6dd327cb2182777 upstream.
Without this, we wind up proceeding too early sometimes when the
previous process has just used the same listening port. So, we tie the
listening socket query to the specific pid we're interested in.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
tools/testing/selftests/wireguard/netns.sh | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index b03647d1bbf6..f5ab1cda8bb5 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -38,9 +38,8 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
sleep() { read -t "$1" -N 1 || true; }
-waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
-waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
-waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
cleanup() {
@@ -119,22 +118,22 @@ tests() {
# TCP over IPv4
n2 iperf3 -s -1 -B 192.168.241.2 &
- waitiperf $netns2
+ waitiperf $netns2 $!
n1 iperf3 -Z -t 3 -c 192.168.241.2
# TCP over IPv6
n1 iperf3 -s -1 -B fd00::1 &
- waitiperf $netns1
+ waitiperf $netns1 $!
n2 iperf3 -Z -t 3 -c fd00::1
# UDP over IPv4
n1 iperf3 -s -1 -B 192.168.241.1 &
- waitiperf $netns1
+ waitiperf $netns1 $!
n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
# UDP over IPv6
n2 iperf3 -s -1 -B fd00::2 &
- waitiperf $netns2
+ waitiperf $netns2 $!
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
}
@@ -207,7 +206,7 @@ n1 ping -W 1 -c 1 192.168.241.2
n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
exec 4< <(n1 ncat -l -u -p 1111)
ncat_pid=$!
-waitncatudp $netns1
+waitncatudp $netns1 $ncat_pid
n2 ncat -u 192.168.241.1 1111 <<<"X"
read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
kill $ncat_pid
@@ -216,7 +215,7 @@ n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32
n2 wg set wg0 listen-port 9997
exec 4< <(n1 ncat -l -u -p 1111)
ncat_pid=$!
-waitncatudp $netns1
+waitncatudp $netns1 $ncat_pid
n2 ncat -u 192.168.241.1 1111 <<<"X"
! read -r -N 1 -t 1 out <&4 || false
kill $ncat_pid
--
2.18.2
From 02e7bebbc099bcec52dd85181009d61881627d77 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 11 Feb 2020 20:47:08 +0100
Subject: [PATCH 074/100] wireguard: device: use icmp_ndo_send helper
commit a12d7f3cbdc72c7625881c8dc2660fc2c979fdf2 upstream.
Because wireguard is calling icmp from network device context, it should
use the ndo helper so that the rate limiting applies correctly. This
commit adds a small test to the wireguard test suite to ensure that the
new functions continue doing the right thing in the context of
wireguard. It does this by setting up a condition that will definately
evoke an icmp error message from the driver, but along a nat'd path.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/device.c | 4 ++--
tools/testing/selftests/wireguard/netns.sh | 11 +++++++++++
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 16b19824b9ad..43db442b1373 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -203,9 +203,9 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
err:
++dev->stats.tx_errors;
if (skb->protocol == htons(ETH_P_IP))
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
else if (skb->protocol == htons(ETH_P_IPV6))
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+ icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
kfree_skb(skb);
return ret;
}
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index f5ab1cda8bb5..138d46b3f330 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -24,6 +24,7 @@
set -e
exec 3>&1
+export LANG=C
export WG_HIDE_KEYS=never
netns0="wg-test-$$-0"
netns1="wg-test-$$-1"
@@ -297,7 +298,17 @@ ip1 -4 rule add table main suppress_prefixlength 0
n1 ping -W 1 -c 100 -f 192.168.99.7
n1 ping -W 1 -c 100 -f abab::1111
+# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route.
+n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2
+n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit.
+n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
+ip0 -4 route add 192.168.241.1 via 10.0.0.100
+n2 wg set wg0 peer "$pub1" remove
+[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]]
+
n0 iptables -t nat -F
+n0 iptables -t filter -F
+n2 iptables -t nat -F
ip0 link del vethrc
ip0 link del vethrs
ip1 link del wg0
--
2.18.2
From 7750b809baf591ea2729e2d7e3c3c021e93ad8f0 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 14 Feb 2020 23:57:20 +0100
Subject: [PATCH 075/100] wireguard: selftests: reduce complexity and fix make
races
commit 04ddf1208f03e1dbc39a4619c40eba640051b950 upstream.
This gives us fewer dependencies and shortens build time, fixes up some
hash checking race conditions, and also fixes missing directory creation
that caused issues on massively parallel builds.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
.../testing/selftests/wireguard/qemu/Makefile | 38 +++++++------------
1 file changed, 14 insertions(+), 24 deletions(-)
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index f10aa3590adc..28d477683e8a 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -38,19 +38,17 @@ endef
define file_download =
$(DISTFILES_PATH)/$(1):
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
- if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
-$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
@@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
-$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
- flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- touch $@
-
-$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
- cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
- $(MAKE) -C $(LIBMNL_PATH)
- sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
-
$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
+ mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
touch $@
-$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg
$(STRIP) -s $@
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
@@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
touch $@
-$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
- $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
+ $(STRIP) -s $@
-$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
- $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
+ $(STRIP) -s $@
$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
mkdir -p $(BUILD_PATH)
@@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
touch $@
-$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include
$(MAKE) -C $(IPTABLES_PATH)
$(STRIP) -s $@
--
2.18.2
From 1296b940ccd927bbaa66765149ccb094c1b44fe2 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 14 Feb 2020 23:57:21 +0100
Subject: [PATCH 076/100] wireguard: receive: reset last_under_load to zero
commit 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 upstream.
This is a small optimization that prevents more expensive comparisons
from happening when they are no longer necessary, by clearing the
last_under_load variable whenever we wind up in a state where we were
under load but we no longer are.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Suggested-by: Matt Dunwoodie <ncon@noconroy.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 9c6bab9c981f..4a153894cee2 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
under_load = skb_queue_len(&wg->incoming_handshakes) >=
MAX_QUEUED_INCOMING_HANDSHAKES / 8;
- if (under_load)
+ if (under_load) {
last_under_load = ktime_get_coarse_boottime_ns();
- else if (last_under_load)
+ } else if (last_under_load) {
under_load = !wg_birthdate_has_expired(last_under_load, 1);
+ if (!under_load)
+ last_under_load = 0;
+ }
mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
under_load);
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
--
2.18.2
From b2ce88b315ce1aeba00deecefe751925d8add692 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 14 Feb 2020 23:57:22 +0100
Subject: [PATCH 077/100] wireguard: send: account for mtu=0 devices
commit 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e upstream.
It turns out there's an easy way to get packets queued up while still
having an MTU of zero, and that's via persistent keep alive. This commit
makes sure that in whatever condition, we don't wind up dividing by
zero. Note that an MTU of zero for a wireguard interface is something
quasi-valid, so I don't think the correct fix is to limit it via
min_mtu. This can be reproduced easily with:
ip link add wg0 type wireguard
ip link add wg1 type wireguard
ip link set wg0 up mtu 0
ip link set wg1 up
wg set wg0 private-key <(wg genkey)
wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key)
wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1
However, while min_mtu=0 seems fine, it makes sense to restrict the
max_mtu. This commit also restricts the maximum MTU to the greatest
number for which rounding up to the padding multiple won't overflow a
signed integer. Packets this large were always rejected anyway
eventually, due to checks deeper in, but it seems more sound not to even
let the administrator configure something that won't work anyway.
We use this opportunity to clean up this function a bit so that it's
clear which paths we're expecting.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/device.c | 7 ++++---
drivers/net/wireguard/send.c | 16 +++++++++++-----
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 43db442b1373..cdc96968b0f4 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev)
enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
NETIF_F_SG | NETIF_F_GSO |
NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
+ const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops;
dev->hard_header_len = 0;
@@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev)
dev->features |= WG_NETDEV_FEATURES;
dev->hw_features |= WG_NETDEV_FEATURES;
dev->hw_enc_features |= WG_NETDEV_FEATURES;
- dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
- sizeof(struct udphdr) -
- max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
+ dev->mtu = ETH_DATA_LEN - overhead;
+ dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
SET_NETDEV_DEVTYPE(dev, &device_type);
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index c13260563446..7348c10cbae3 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer)
static unsigned int calculate_skb_padding(struct sk_buff *skb)
{
+ unsigned int padded_size, last_unit = skb->len;
+
+ if (unlikely(!PACKET_CB(skb)->mtu))
+ return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
+
/* We do this modulo business with the MTU, just in case the networking
* layer gives us a packet that's bigger than the MTU. In that case, we
* wouldn't want the final subtraction to overflow in the case of the
- * padded_size being clamped.
+ * padded_size being clamped. Fortunately, that's very rarely the case,
+ * so we optimize for that not happening.
*/
- unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
- unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
+ if (unlikely(last_unit > PACKET_CB(skb)->mtu))
+ last_unit %= PACKET_CB(skb)->mtu;
- if (padded_size > PACKET_CB(skb)->mtu)
- padded_size = PACKET_CB(skb)->mtu;
+ padded_size = min(PACKET_CB(skb)->mtu,
+ ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
return padded_size - last_unit;
}
--
2.18.2
From 766b55937688166afcd08d168abbfa3cc675c3ef Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 14 Feb 2020 23:57:23 +0100
Subject: [PATCH 078/100] wireguard: socket: remove extra call to
synchronize_net
commit 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e upstream.
synchronize_net() is a wrapper around synchronize_rcu(), so there's no
point in having synchronize_net and synchronize_rcu back to back,
despite the documentation comment suggesting maybe it's somewhat useful,
"Wait for packets currently being received to be done." This commit
removes the extra call.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/socket.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index 262f3b5c819d..b0d6541582d3 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
mutex_unlock(&wg->socket_update_lock);
synchronize_rcu();
- synchronize_net();
sock_free(old4);
sock_free(old6);
}
--
2.18.2
From 2f4ee5e5f2dad3d3ccfea4d16c2c54d9346f7cd1 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 18 Mar 2020 18:30:43 -0600
Subject: [PATCH 079/100] wireguard: selftests: remove duplicated include
<sys/types.h>
commit 166391159c5deb84795d2ff46e95f276177fa5fb upstream.
This commit removes a duplicated include.
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
tools/testing/selftests/wireguard/qemu/init.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 90bc9813cadc..c9698120ac9d 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -13,7 +13,6 @@
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/mount.h>
-#include <sys/types.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/io.h>
--
2.18.2
From c7092cc3c5f766315eb5f1bd1717a2fe6348e977 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 18 Mar 2020 18:30:45 -0600
Subject: [PATCH 080/100] wireguard: queueing: account for skb->protocol==0
commit a5588604af448664e796daf3c1d5a4523c60667b upstream.
We carry out checks to the effect of:
if (skb->protocol != wg_examine_packet_protocol(skb))
goto err;
By having wg_skb_examine_untrusted_ip_hdr return 0 on failure, this
means that the check above still passes in the case where skb->protocol
is zero, which is possible to hit with AF_PACKET:
struct sockaddr_pkt saddr = { .spkt_device = "wg0" };
unsigned char buffer[5] = { 0 };
sendto(socket(AF_PACKET, SOCK_PACKET, /* skb->protocol = */ 0),
buffer, sizeof(buffer), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
Additional checks mean that this isn't actually a problem in the code
base, but I could imagine it becoming a problem later if the function is
used more liberally.
I would prefer to fix this by having wg_examine_packet_protocol return a
32-bit ~0 value on failure, which will never match any value of
skb->protocol, which would simply change the generated code from a mov
to a movzx. However, sparse complains, and adding __force casts doesn't
seem like a good idea, so instead we just add a simple helper function
to check for the zero return value. Since wg_examine_packet_protocol
itself gets inlined, this winds up not adding an additional branch to
the generated code, since the 0 return value already happens in a
mergable branch.
Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/device.c | 2 +-
drivers/net/wireguard/queueing.h | 8 +++++++-
drivers/net/wireguard/receive.c | 4 ++--
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index cdc96968b0f4..3ac3f8570ca1 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
u32 mtu;
int ret;
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
+ if (unlikely(!wg_check_packet_protocol(skb))) {
ret = -EPROTONOSUPPORT;
net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
goto err;
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index e62c714a548e..3432232afe06 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -66,7 +66,7 @@ struct packet_cb {
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
/* Returns either the correct skb->protocol value, or 0 if invalid. */
-static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
+static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
{
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct iphdr)) <=
@@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
return 0;
}
+static inline bool wg_check_packet_protocol(struct sk_buff *skb)
+{
+ __be16 real_protocol = wg_examine_packet_protocol(skb);
+ return real_protocol && skb->protocol == real_protocol;
+}
+
static inline void wg_reset_packet(struct sk_buff *skb)
{
skb_scrub_packet(skb, true);
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 4a153894cee2..243ed7172dd2 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
size_t data_offset, data_len, header_len;
struct udphdr *udp;
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
+ if (unlikely(!wg_check_packet_protocol(skb) ||
skb_transport_header(skb) < skb->head ||
(skb_transport_header(skb) + sizeof(struct udphdr)) >
skb_tail_pointer(skb)))
@@ -388,7 +388,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ~0; /* All levels */
- skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
+ skb->protocol = wg_examine_packet_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
--
2.18.2
From 9c31aa57f2861ba637fb5513444ad5b3139511c8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 18 Mar 2020 18:30:46 -0600
Subject: [PATCH 081/100] wireguard: receive: remove dead code from default
packet type case
commit 2b8765c52db24c0fbcc81bac9b5e8390f2c7d3c8 upstream.
The situation in which we wind up hitting the default case here
indicates a major bug in earlier parsing code. It is not a usual thing
that should ever happen, which means a "friendly" message for it doesn't
make sense. Rather, replace this with a WARN_ON, just like we do earlier
in the file for a similar situation, so that somebody sends us a bug
report and we can fix it.
Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 243ed7172dd2..da3b782ab7d3 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
wg_packet_consume_data(wg, skb);
break;
default:
- net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
- wg->dev->name, skb);
+ WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
goto err;
}
return;
--
2.18.2
From 72e3696fde1ec043a8e0bd2f193f9d56d3b46b6b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 18 Mar 2020 18:30:47 -0600
Subject: [PATCH 082/100] wireguard: noise: error out precomputed DH during
handshake rather than config
commit 11a7686aa99c7fe4b3f80f6dcccd54129817984d upstream.
We precompute the static-static ECDH during configuration time, in order
to save an expensive computation later when receiving network packets.
However, not all ECDH computations yield a contributory result. Prior,
we were just not letting those peers be added to the interface. However,
this creates a strange inconsistency, since it was still possible to add
other weird points, like a valid public key plus a low-order point, and,
like points that result in zeros, a handshake would not complete. In
order to make the behavior more uniform and less surprising, simply
allow all peers to be added. Then, we'll error out later when doing the
crypto if there's an issue. This also adds more separation between the
crypto layer and the configuration layer.
Discussed-with: Mathias Hall-Andersen <mathias@hall-andersen.dk>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/netlink.c | 8 +---
drivers/net/wireguard/noise.c | 55 ++++++++++++----------
drivers/net/wireguard/noise.h | 12 ++---
drivers/net/wireguard/peer.c | 7 +--
tools/testing/selftests/wireguard/netns.sh | 15 ++++--
5 files changed, 49 insertions(+), 48 deletions(-)
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index 45a631e79d7a..ab6cbe95a652 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -417,11 +417,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs)
peer = wg_peer_create(wg, public_key, preshared_key);
if (IS_ERR(peer)) {
- /* Similar to the above, if the key is invalid, we skip
- * it without fanfare, so that services don't need to
- * worry about doing key validation themselves.
- */
- ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
+ ret = PTR_ERR(peer);
peer = NULL;
goto out;
}
@@ -575,7 +571,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
private_key);
list_for_each_entry_safe(peer, temp, &wg->peer_list,
peer_list) {
- BUG_ON(!wg_noise_precompute_static_static(peer));
+ wg_noise_precompute_static_static(peer);
wg_noise_expire_current_peer_keypairs(peer);
}
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 919d9d866446..708dc61c974f 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -44,32 +44,23 @@ void __init wg_noise_init(void)
}
/* Must hold peer->handshake.static_identity->lock */
-bool wg_noise_precompute_static_static(struct wg_peer *peer)
+void wg_noise_precompute_static_static(struct wg_peer *peer)
{
- bool ret;
-
down_write(&peer->handshake.lock);
- if (peer->handshake.static_identity->has_identity) {
- ret = curve25519(
- peer->handshake.precomputed_static_static,
+ if (!peer->handshake.static_identity->has_identity ||
+ !curve25519(peer->handshake.precomputed_static_static,
peer->handshake.static_identity->static_private,
- peer->handshake.remote_static);
- } else {
- u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
-
- ret = curve25519(empty, empty, peer->handshake.remote_static);
+ peer->handshake.remote_static))
memset(peer->handshake.precomputed_static_static, 0,
NOISE_PUBLIC_KEY_LEN);
- }
up_write(&peer->handshake.lock);
- return ret;
}
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
- struct noise_static_identity *static_identity,
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
- struct wg_peer *peer)
+void wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer)
{
memset(handshake, 0, sizeof(*handshake));
init_rwsem(&handshake->lock);
@@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake,
NOISE_SYMMETRIC_KEY_LEN);
handshake->static_identity = static_identity;
handshake->state = HANDSHAKE_ZEROED;
- return wg_noise_precompute_static_static(peer);
+ wg_noise_precompute_static_static(peer);
}
static void handshake_zero(struct noise_handshake *handshake)
@@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
return true;
}
+static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
+{
+ static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
+ if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
+ return false;
+ kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ chaining_key);
+ return true;
+}
+
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
{
struct blake2s_state blake;
@@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
/* ss */
- kdf(handshake->chaining_key, key, NULL,
- handshake->precomputed_static_static, NOISE_HASH_LEN,
- NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
- handshake->chaining_key);
+ if (!mix_precomputed_dh(handshake->chaining_key, key,
+ handshake->precomputed_static_static))
+ goto out;
/* {t} */
tai64n_now(timestamp);
@@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
handshake = &peer->handshake;
/* ss */
- kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
- NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
- chaining_key);
+ if (!mix_precomputed_dh(chaining_key, key,
+ handshake->precomputed_static_static))
+ goto out;
/* {t} */
if (!message_decrypt(t, src->encrypted_timestamp,
diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
index 138a07bb817c..f532d59d3f19 100644
--- a/drivers/net/wireguard/noise.h
+++ b/drivers/net/wireguard/noise.h
@@ -94,11 +94,11 @@ struct noise_handshake {
struct wg_device;
void wg_noise_init(void);
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
- struct noise_static_identity *static_identity,
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
- struct wg_peer *peer);
+void wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer);
void wg_noise_handshake_clear(struct noise_handshake *handshake);
static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
{
@@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
void wg_noise_set_static_identity_private_key(
struct noise_static_identity *static_identity,
const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
-bool wg_noise_precompute_static_static(struct wg_peer *peer);
+void wg_noise_precompute_static_static(struct wg_peer *peer);
bool
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
index 071eedf33f5a..1d634bd3038f 100644
--- a/drivers/net/wireguard/peer.c
+++ b/drivers/net/wireguard/peer.c
@@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
return ERR_PTR(ret);
peer->device = wg;
- if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
- public_key, preshared_key, peer)) {
- ret = -EKEYREJECTED;
- goto err_1;
- }
+ wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
+ public_key, preshared_key, peer);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err_1;
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 138d46b3f330..936e1ca9410e 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
n0 wg set wg0 peer "$pub2" allowed-ips ::/0
n0 wg set wg0 peer "$pub2" remove
-low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
-n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
-n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
+for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do
+ n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111
+done
+[[ -n $(n0 wg show wg0 peers) ]]
+exec 4< <(n0 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns0 $ncat_pid
+ip0 link set wg0 up
+! read -r -n 1 -t 2 <&4 || false
+kill $ncat_pid
ip0 link del wg0
declare -A objects
--
2.18.2
From d1ae8043584a1d3a3474c9e212463bc8876996e2 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 29 Apr 2020 14:59:20 -0600
Subject: [PATCH 083/100] wireguard: send: remove errant newline from
packet_encrypt_worker
commit d6833e42786e050e7522d6a91a9361e54085897d upstream.
This commit removes a useless newline at the end of a scope, which
doesn't add anything in the way of organization or readability.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/send.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 7348c10cbae3..3e030d614df5 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -304,7 +304,6 @@ void wg_packet_encrypt_worker(struct work_struct *work)
}
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
state);
-
}
}
--
2.18.2
From 1e92895e72c5c9354900200fbcfb6b2ab63d9908 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 29 Apr 2020 14:59:21 -0600
Subject: [PATCH 084/100] wireguard: queueing: cleanup ptr_ring in error path
of packet_queue_init
commit 130c58606171326c81841a49cc913cd354113dd9 upstream.
Prior, if the alloc_percpu of packet_percpu_multicore_worker_alloc
failed, the previously allocated ptr_ring wouldn't be freed. This commit
adds the missing call to ptr_ring_cleanup in the error case.
Reported-by: Sultan Alsawaf <sultan@kerneltoast.com>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/queueing.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
index 5c964fcb994e..71b8e80b58e1 100644
--- a/drivers/net/wireguard/queueing.c
+++ b/drivers/net/wireguard/queueing.c
@@ -35,8 +35,10 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
if (multicore) {
queue->worker = wg_packet_percpu_multicore_worker_alloc(
function, queue);
- if (!queue->worker)
+ if (!queue->worker) {
+ ptr_ring_cleanup(&queue->ring, NULL);
return -ENOMEM;
+ }
} else {
INIT_WORK(&queue->work, function);
}
--
2.18.2
From 4c5e81cd3378da4d3045474fcc5514c4b2542e5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 29 Apr 2020 14:59:22 -0600
Subject: [PATCH 085/100] wireguard: receive: use tunnel helpers for
decapsulating ECN markings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit eebabcb26ea1e3295704477c6cd4e772c96a9559 upstream.
WireGuard currently only propagates ECN markings on tunnel decap according
to the old RFC3168 specification. However, the spec has since been updated
in RFC6040 to recommend slightly different decapsulation semantics. This
was implemented in the kernel as a set of common helpers for ECN
decapsulation, so let's just switch over WireGuard to using those, so it
can benefit from this enhancement and any future tweaks. We do not drop
packets with invalid ECN marking combinations, because WireGuard is
frequently used to work around broken ISPs, which could be doing that.
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Reported-by: Olivier Tilmans <olivier.tilmans@nokia-bell-labs.com>
Cc: Dave Taht <dave.taht@gmail.com>
Cc: Rodney W. Grimes <ietf@gndrsh.dnsmgr.net>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index da3b782ab7d3..267f202f1931 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -393,13 +393,11 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
goto dishonest_packet_size;
- if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
- IP_ECN_set_ce(ip_hdr(skb));
+ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
len = ntohs(ipv6_hdr(skb)->payload_len) +
sizeof(struct ipv6hdr);
- if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
- IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb)));
} else {
goto dishonest_packet_type;
}
--
2.18.2
From dea57992e63578e9e696da9ef0c99366f521e4a1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 6 May 2020 15:33:02 -0600
Subject: [PATCH 086/100] wireguard: selftests: use normal kernel stack size on
ppc64
commit a0fd7cc87a018df1a17f9d3f0bd994c1f22c6b34 upstream.
While at some point it might have made sense to be running these tests
on ppc64 with 4k stacks, the kernel hasn't actually used 4k stacks on
64-bit powerpc in a long time, and more interesting things that we test
don't really work when we deviate from the default (16k). So, we stop
pushing our luck in this commit, and return to the default instead of
the minimum.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
index 990c510a9cfa..f52f1e2bc7f6 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
--
2.18.2
From 09699317fac248b183c945291dffa1a105a1836e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 6 May 2020 15:33:03 -0600
Subject: [PATCH 087/100] wireguard: socket: remove errant restriction on
looping to self
commit b673e24aad36981f327a6570412ffa7754de8911 upstream.
It's already possible to create two different interfaces and loop
packets between them. This has always been possible with tunnels in the
kernel, and isn't specific to wireguard. Therefore, the networking stack
already needs to deal with that. At the very least, the packet winds up
exceeding the MTU and is discarded at that point. So, since this is
already something that happens, there's no need to forbid the not very
exceptional case of routing a packet back to the same interface; this
loop is no different than others, and we shouldn't special case it, but
rather rely on generic handling of loops in general. This also makes it
easier to do interesting things with wireguard such as onion routing.
At the same time, we add a selftest for this, ensuring that both onion
routing works and infinite routing loops do not crash the kernel. We
also add a test case for wireguard interfaces nesting packets and
sending traffic between each other, as well as the loop in this case
too. We make sure to send some throughput-heavy traffic for this use
case, to stress out any possible recursion issues with the locks around
workqueues.
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/socket.c | 12 -----
tools/testing/selftests/wireguard/netns.sh | 54 ++++++++++++++++++++--
2 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index b0d6541582d3..f9018027fc13 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -76,12 +76,6 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
wg->dev->name, &endpoint->addr, ret);
goto err;
- } else if (unlikely(rt->dst.dev == skb->dev)) {
- ip_rt_put(rt);
- ret = -ELOOP;
- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
- wg->dev->name, &endpoint->addr);
- goto err;
}
if (cache)
dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
@@ -149,12 +143,6 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
wg->dev->name, &endpoint->addr, ret);
goto err;
- } else if (unlikely(dst->dev == skb->dev)) {
- dst_release(dst);
- ret = -ELOOP;
- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
- wg->dev->name, &endpoint->addr);
- goto err;
}
if (cache)
dst_cache_set_ip6(cache, dst, &fl.saddr);
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 936e1ca9410e..17a1f53ceba0 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -48,8 +48,11 @@ cleanup() {
exec 2>/dev/null
printf "$orig_message_cost" > /proc/sys/net/core/message_cost
ip0 link del dev wg0
+ ip0 link del dev wg1
ip1 link del dev wg0
+ ip1 link del dev wg1
ip2 link del dev wg0
+ ip2 link del dev wg1
local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
[[ -n $to_kill ]] && kill $to_kill
pp ip netns del $netns1
@@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2
key1="$(pp wg genkey)"
key2="$(pp wg genkey)"
key3="$(pp wg genkey)"
+key4="$(pp wg genkey)"
pub1="$(pp wg pubkey <<<"$key1")"
pub2="$(pp wg pubkey <<<"$key2")"
pub3="$(pp wg pubkey <<<"$key3")"
+pub4="$(pp wg pubkey <<<"$key4")"
psk="$(pp wg genpsk)"
[[ -n $key1 && -n $key2 && -n $psk ]]
configure_peers() {
ip1 addr add 192.168.241.1/24 dev wg0
- ip1 addr add fd00::1/24 dev wg0
+ ip1 addr add fd00::1/112 dev wg0
ip2 addr add 192.168.241.2/24 dev wg0
- ip2 addr add fd00::2/24 dev wg0
+ ip2 addr add fd00::2/112 dev wg0
n1 wg set wg0 \
private-key <(echo "$key1") \
@@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2
n1 wg set wg0 private-key <(echo "$key3")
n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
n1 ping -W 1 -c 1 192.168.241.2
+n2 wg set wg0 peer "$pub3" remove
+
+# Test that we can route wg through wg
+ip1 addr flush dev wg0
+ip2 addr flush dev wg0
+ip1 addr add fd00::5:1/112 dev wg0
+ip2 addr add fd00::5:2/112 dev wg0
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998
+ip1 link add wg1 type wireguard
+ip2 link add wg1 type wireguard
+ip1 addr add 192.168.241.1/24 dev wg1
+ip1 addr add fd00::1/112 dev wg1
+ip2 addr add 192.168.241.2/24 dev wg1
+ip2 addr add fd00::2/112 dev wg1
+ip1 link set mtu 1340 up dev wg1
+ip2 link set mtu 1340 up dev wg1
+n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5
+n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5
+tests
+# Try to set up a routing loop between the two namespaces
+ip1 link set netns $netns0 dev wg1
+ip0 addr add 192.168.241.1/24 dev wg1
+ip0 link set up dev wg1
+n0 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
+ip2 link del wg0
+ip2 link del wg1
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
+ip0 link del wg1
ip1 link del wg0
-ip2 link del wg0
# Test using NAT. We now change the topology to this:
# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
@@ -282,6 +316,20 @@ pp sleep 3
n2 ping -W 1 -c 1 192.168.241.1
n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+# Test that onion routing works, even when it loops
+n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
+ip1 addr add 192.168.242.1/24 dev wg0
+ip2 link add wg1 type wireguard
+ip2 addr add 192.168.242.2/24 dev wg1
+n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32
+ip2 link set wg1 up
+n1 ping -W 1 -c 1 192.168.242.2
+ip2 link del wg1
+n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5
+! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel
+n1 wg set wg0 peer "$pub3" remove
+ip1 addr del 192.168.242.1/24 dev wg0
+
# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
ip1 -6 addr add fc00::9/96 dev vethc
ip1 -6 route add default via fc00::1
--
2.18.2
From 16a4e963577da1e2b33b4f99fe84e99ae73ed4e1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 6 May 2020 15:33:04 -0600
Subject: [PATCH 088/100] wireguard: send/receive: cond_resched() when
processing worker ringbuffers
commit 4005f5c3c9d006157ba716594e0d70c88a235c5e upstream.
Users with pathological hardware reported CPU stalls on CONFIG_
PREEMPT_VOLUNTARY=y, because the ringbuffers would stay full, meaning
these workers would never terminate. That turned out not to be okay on
systems without forced preemption, which Sultan observed. This commit
adds a cond_resched() to the bottom of each loop iteration, so that
these workers don't hog the core. Note that we don't need this on the
napi poll worker, since that terminates after its budget is expended.
Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com>
Reported-by: Wang Jian <larkwang@gmail.com>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 2 ++
drivers/net/wireguard/send.c | 4 ++++
2 files changed, 6 insertions(+)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 267f202f1931..2566e13a292d 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -516,6 +516,8 @@ void wg_packet_decrypt_worker(struct work_struct *work)
&PACKET_CB(skb)->keypair->receiving)) ?
PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
wg_queue_enqueue_per_peer_napi(skb, state);
+ if (need_resched())
+ cond_resched();
}
}
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 3e030d614df5..dc3079e17c7f 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -281,6 +281,8 @@ void wg_packet_tx_worker(struct work_struct *work)
wg_noise_keypair_put(keypair, false);
wg_peer_put(peer);
+ if (need_resched())
+ cond_resched();
}
}
@@ -304,6 +306,8 @@ void wg_packet_encrypt_worker(struct work_struct *work)
}
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
state);
+ if (need_resched())
+ cond_resched();
}
}
--
2.18.2
From a2045eeb3476532960810da93c17f282e5360573 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 6 May 2020 15:33:05 -0600
Subject: [PATCH 089/100] wireguard: selftests: initalize ipv6 members to NULL
to squelch clang warning
commit 4fed818ef54b08d4b29200e416cce65546ad5312 upstream.
Without setting these to NULL, clang complains in certain
configurations that have CONFIG_IPV6=n:
In file included from drivers/net/wireguard/ratelimiter.c:223:
drivers/net/wireguard/selftest/ratelimiter.c:173:34: error: variable 'skb6' is uninitialized when used here [-Werror,-Wuninitialized]
ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
^~~~
drivers/net/wireguard/selftest/ratelimiter.c:123:29: note: initialize the variable 'skb6' to silence this warning
struct sk_buff *skb4, *skb6;
^
= NULL
drivers/net/wireguard/selftest/ratelimiter.c:173:40: error: variable 'hdr6' is uninitialized when used here [-Werror,-Wuninitialized]
ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
^~~~
drivers/net/wireguard/selftest/ratelimiter.c:125:22: note: initialize the variable 'hdr6' to silence this warning
struct ipv6hdr *hdr6;
^
We silence this warning by setting the variables to NULL as the warning
suggests.
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/selftest/ratelimiter.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
index bcd6462e4540..007cd4457c5f 100644
--- a/drivers/net/wireguard/selftest/ratelimiter.c
+++ b/drivers/net/wireguard/selftest/ratelimiter.c
@@ -120,9 +120,9 @@ bool __init wg_ratelimiter_selftest(void)
enum { TRIALS_BEFORE_GIVING_UP = 5000 };
bool success = false;
int test = 0, trials;
- struct sk_buff *skb4, *skb6;
+ struct sk_buff *skb4, *skb6 = NULL;
struct iphdr *hdr4;
- struct ipv6hdr *hdr6;
+ struct ipv6hdr *hdr6 = NULL;
if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
return true;
--
2.18.2
From fe7815fa6ff6e3deb4a1b1e74e16759c0616b427 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 6 May 2020 15:33:06 -0600
Subject: [PATCH 090/100] wireguard: send/receive: use explicit unlikely branch
instead of implicit coalescing
commit 243f2148937adc72bcaaa590d482d599c936efde upstream.
It's very unlikely that send will become true. It's nearly always false
between 0 and 120 seconds of a session, and in most cases becomes true
only between 120 and 121 seconds before becoming false again. So,
unlikely(send) is clearly the right option here.
What happened before was that we had this complex boolean expression
with multiple likely and unlikely clauses nested. Since this is
evaluated left-to-right anyway, the whole thing got converted to
unlikely. So, we can clean this up to better represent what's going on.
The generated code is the same.
Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 13 ++++++-------
drivers/net/wireguard/send.c | 15 ++++++---------
2 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 2566e13a292d..3bb5b9ae7cd1 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -226,21 +226,20 @@ void wg_packet_handshake_receive_worker(struct work_struct *work)
static void keep_key_fresh(struct wg_peer *peer)
{
struct noise_keypair *keypair;
- bool send = false;
+ bool send;
if (peer->sent_lastminute_handshake)
return;
rcu_read_lock_bh();
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
- if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
- keypair->i_am_the_initiator &&
- unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
- REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
- send = true;
+ send = keypair && READ_ONCE(keypair->sending.is_valid) &&
+ keypair->i_am_the_initiator &&
+ wg_birthdate_has_expired(keypair->sending.birthdate,
+ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT);
rcu_read_unlock_bh();
- if (send) {
+ if (unlikely(send)) {
peer->sent_lastminute_handshake = true;
wg_packet_send_queued_handshake_initiation(peer, false);
}
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index dc3079e17c7f..6687db699803 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -124,20 +124,17 @@ void wg_packet_send_handshake_cookie(struct wg_device *wg,
static void keep_key_fresh(struct wg_peer *peer)
{
struct noise_keypair *keypair;
- bool send = false;
+ bool send;
rcu_read_lock_bh();
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
- if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
- (unlikely(atomic64_read(&keypair->sending.counter.counter) >
- REKEY_AFTER_MESSAGES) ||
- (keypair->i_am_the_initiator &&
- unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
- REKEY_AFTER_TIME)))))
- send = true;
+ send = keypair && READ_ONCE(keypair->sending.is_valid) &&
+ (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES ||
+ (keypair->i_am_the_initiator &&
+ wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
rcu_read_unlock_bh();
- if (send)
+ if (unlikely(send))
wg_packet_send_queued_handshake_initiation(peer, false);
}
--
2.18.2
From 9cfdaa1253b4ccb2b8a0fe2a2cabad4ae93ad0d0 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 19 May 2020 22:49:27 -0600
Subject: [PATCH 091/100] wireguard: selftests: use newer iproute2 for gcc-10
commit ee3c1aa3f34b7842c1557cfe5d8c3f7b8c692de8 upstream.
gcc-10 switched to defaulting to -fno-common, which broke iproute2-5.4.
This was fixed in iproute-5.6, so switch to that. Because we're after a
stable testing surface, we generally don't like to bump these
unnecessarily, but in this case, being able to actually build is a basic
necessity.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
tools/testing/selftests/wireguard/qemu/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 28d477683e8a..2dab4f57516d 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -44,7 +44,7 @@ endef
$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
--
2.18.2
From 6c01e455945ce254bab6ea2a62ccfd0881e2ea27 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 19 May 2020 22:49:28 -0600
Subject: [PATCH 092/100] wireguard: noise: read preshared key while taking
lock
commit bc67d371256f5c47d824e2eec51e46c8d62d022e upstream.
Prior we read the preshared key after dropping the handshake lock, which
isn't an actual crypto issue if it races, but it's still not quite
correct. So copy that part of the state into a temporary like we do with
the rest of the handshake state variables. Then we can release the lock,
operate on the temporary, and zero it out at the end of the function. In
performance tests, the impact of this was entirely unnoticable, probably
because those bytes are coming from the same cacheline as other things
that are being copied out in the same manner.
Reported-by: Matt Dunwoodie <ncon@noconroy.net>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/noise.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 708dc61c974f..07eb438a6dee 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -715,6 +715,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
u8 e[NOISE_PUBLIC_KEY_LEN];
u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
u8 static_private[NOISE_PUBLIC_KEY_LEN];
+ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
down_read(&wg->static_identity.lock);
@@ -733,6 +734,8 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
memcpy(ephemeral_private, handshake->ephemeral_private,
NOISE_PUBLIC_KEY_LEN);
+ memcpy(preshared_key, handshake->preshared_key,
+ NOISE_SYMMETRIC_KEY_LEN);
up_read(&handshake->lock);
if (state != HANDSHAKE_CREATED_INITIATION)
@@ -750,7 +753,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
goto fail;
/* psk */
- mix_psk(chaining_key, hash, key, handshake->preshared_key);
+ mix_psk(chaining_key, hash, key, preshared_key);
/* {} */
if (!message_decrypt(NULL, src->encrypted_nothing,
@@ -783,6 +786,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
memzero_explicit(chaining_key, NOISE_HASH_LEN);
memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
+ memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN);
up_read(&wg->static_identity.lock);
return ret_peer;
}
--
2.18.2
From 1d53452dea64c2136269ed52b70a21ad6fe351c9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 19 May 2020 22:49:29 -0600
Subject: [PATCH 093/100] wireguard: queueing: preserve flow hash across packet
scrubbing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit c78a0b4a78839d572d8a80f6a62221c0d7843135 upstream.
It's important that we clear most header fields during encapsulation and
decapsulation, because the packet is substantially changed, and we don't
want any info leak or logic bug due to an accidental correlation. But,
for encapsulation, it's wrong to clear skb->hash, since it's used by
fq_codel and flow dissection in general. Without it, classification does
not proceed as usual. This change might make it easier to estimate the
number of innerflows by examining clustering of out of order packets,
but this shouldn't open up anything that can't already be inferred
otherwise (e.g. syn packet size inference), and fq_codel can be disabled
anyway.
Furthermore, it might be the case that the hash isn't used or queried at
all until after wireguard transmits the encrypted UDP packet, which
means skb->hash might still be zero at this point, and thus no hash
taken over the inner packet data. In order to address this situation, we
force a calculation of skb->hash before encrypting packet data.
Of course this means that fq_codel might transmit packets slightly more
out of order than usual. Toke did some testing on beefy machines with
high quantities of parallel flows and found that increasing the
reply-attack counter to 8192 takes care of the most pathological cases
pretty well.
Reported-by: Dave Taht <dave.taht@gmail.com>
Reviewed-and-tested-by: Toke Høiland-Jørgensen <toke@toke.dk>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/messages.h | 2 +-
drivers/net/wireguard/queueing.h | 10 +++++++++-
drivers/net/wireguard/receive.c | 2 +-
drivers/net/wireguard/send.c | 7 ++++++-
4 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h
index b8a7b9ce32ba..208da72673fc 100644
--- a/drivers/net/wireguard/messages.h
+++ b/drivers/net/wireguard/messages.h
@@ -32,7 +32,7 @@ enum cookie_values {
};
enum counter_values {
- COUNTER_BITS_TOTAL = 2048,
+ COUNTER_BITS_TOTAL = 8192,
COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
};
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index 3432232afe06..c58df439dbbe 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -87,12 +87,20 @@ static inline bool wg_check_packet_protocol(struct sk_buff *skb)
return real_protocol && skb->protocol == real_protocol;
}
-static inline void wg_reset_packet(struct sk_buff *skb)
+static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating)
{
+ u8 l4_hash = skb->l4_hash;
+ u8 sw_hash = skb->sw_hash;
+ u32 hash = skb->hash;
skb_scrub_packet(skb, true);
memset(&skb->headers_start, 0,
offsetof(struct sk_buff, headers_end) -
offsetof(struct sk_buff, headers_start));
+ if (encapsulating) {
+ skb->l4_hash = l4_hash;
+ skb->sw_hash = sw_hash;
+ skb->hash = hash;
+ }
skb->queue_mapping = 0;
skb->nohdr = 0;
skb->peeked = 0;
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 3bb5b9ae7cd1..d0eebd90c9d5 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -484,7 +484,7 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget)
if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
goto next;
- wg_reset_packet(skb);
+ wg_reset_packet(skb, false);
wg_packet_consume_data_done(peer, skb, &endpoint);
free = false;
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 6687db699803..2f5119ff93d8 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -167,6 +167,11 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
struct sk_buff *trailer;
int num_frags;
+ /* Force hash calculation before encryption so that flow analysis is
+ * consistent over the inner packet.
+ */
+ skb_get_hash(skb);
+
/* Calculate lengths. */
padding_len = calculate_skb_padding(skb);
trailer_len = padding_len + noise_encrypted_len(0);
@@ -295,7 +300,7 @@ void wg_packet_encrypt_worker(struct work_struct *work)
skb_list_walk_safe(first, skb, next) {
if (likely(encrypt_packet(skb,
PACKET_CB(first)->keypair))) {
- wg_reset_packet(skb);
+ wg_reset_packet(skb, true);
} else {
state = PACKET_STATE_DEAD;
break;
--
2.18.2
From 0271d514595e74b57a787e6eff78edbfb037037d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 19 May 2020 22:49:30 -0600
Subject: [PATCH 094/100] wireguard: noise: separate receive counter from send
counter
commit a9e90d9931f3a474f04bab782ccd9d77904941e9 upstream.
In "wireguard: queueing: preserve flow hash across packet scrubbing", we
were required to slightly increase the size of the receive replay
counter to something still fairly small, but an increase nonetheless.
It turns out that we can recoup some of the additional memory overhead
by splitting up the prior union type into two distinct types. Before, we
used the same "noise_counter" union for both sending and receiving, with
sending just using a simple atomic64_t, while receiving used the full
replay counter checker. This meant that most of the memory being
allocated for the sending counter was being wasted. Since the old
"noise_counter" type increased in size in the prior commit, now is a
good time to split up that union type into a distinct "noise_replay_
counter" for receiving and a boring atomic64_t for sending, each using
neither more nor less memory than required.
Also, since sometimes the replay counter is accessed without
necessitating additional accesses to the bitmap, we can reduce cache
misses by hoisting the always-necessary lock above the bitmap in the
struct layout. We also change a "noise_replay_counter" stack allocation
to kmalloc in a -DDEBUG selftest so that KASAN doesn't trigger a stack
frame warning.
All and all, removing a bit of abstraction in this commit makes the code
simpler and smaller, in addition to the motivating memory usage
recuperation. For example, passing around raw "noise_symmetric_key"
structs is something that really only makes sense within noise.c, in the
one place where the sending and receiving keys can safely be thought of
as the same type of object; subsequent to that, it's important that we
uniformly access these through keypair->{sending,receiving}, where their
distinct roles are always made explicit. So this patch allows us to draw
that distinction clearly as well.
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/noise.c | 16 +++------
drivers/net/wireguard/noise.h | 14 ++++----
drivers/net/wireguard/receive.c | 42 ++++++++++++------------
drivers/net/wireguard/selftest/counter.c | 17 +++++++---
drivers/net/wireguard/send.c | 12 +++----
5 files changed, 48 insertions(+), 53 deletions(-)
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 07eb438a6dee..626433690abb 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -104,6 +104,7 @@ static struct noise_keypair *keypair_create(struct wg_peer *peer)
if (unlikely(!keypair))
return NULL;
+ spin_lock_init(&keypair->receiving_counter.lock);
keypair->internal_id = atomic64_inc_return(&keypair_counter);
keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
keypair->entry.peer = peer;
@@ -358,25 +359,16 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
}
-static void symmetric_key_init(struct noise_symmetric_key *key)
-{
- spin_lock_init(&key->counter.receive.lock);
- atomic64_set(&key->counter.counter, 0);
- memset(key->counter.receive.backtrack, 0,
- sizeof(key->counter.receive.backtrack));
- key->birthdate = ktime_get_coarse_boottime_ns();
- key->is_valid = true;
-}
-
static void derive_keys(struct noise_symmetric_key *first_dst,
struct noise_symmetric_key *second_dst,
const u8 chaining_key[NOISE_HASH_LEN])
{
+ u64 birthdate = ktime_get_coarse_boottime_ns();
kdf(first_dst->key, second_dst->key, NULL, NULL,
NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
chaining_key);
- symmetric_key_init(first_dst);
- symmetric_key_init(second_dst);
+ first_dst->birthdate = second_dst->birthdate = birthdate;
+ first_dst->is_valid = second_dst->is_valid = true;
}
static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
index f532d59d3f19..c527253dba80 100644
--- a/drivers/net/wireguard/noise.h
+++ b/drivers/net/wireguard/noise.h
@@ -15,18 +15,14 @@
#include <linux/mutex.h>
#include <linux/kref.h>
-union noise_counter {
- struct {
- u64 counter;
- unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
- spinlock_t lock;
- } receive;
- atomic64_t counter;
+struct noise_replay_counter {
+ u64 counter;
+ spinlock_t lock;
+ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
};
struct noise_symmetric_key {
u8 key[NOISE_SYMMETRIC_KEY_LEN];
- union noise_counter counter;
u64 birthdate;
bool is_valid;
};
@@ -34,7 +30,9 @@ struct noise_symmetric_key {
struct noise_keypair {
struct index_hashtable_entry entry;
struct noise_symmetric_key sending;
+ atomic64_t sending_counter;
struct noise_symmetric_key receiving;
+ struct noise_replay_counter receiving_counter;
__le32 remote_index;
bool i_am_the_initiator;
struct kref refcount;
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index d0eebd90c9d5..91438144e4f7 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -245,20 +245,20 @@ static void keep_key_fresh(struct wg_peer *peer)
}
}
-static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
+static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
{
struct scatterlist sg[MAX_SKB_FRAGS + 8];
struct sk_buff *trailer;
unsigned int offset;
int num_frags;
- if (unlikely(!key))
+ if (unlikely(!keypair))
return false;
- if (unlikely(!READ_ONCE(key->is_valid) ||
- wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) ||
- key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
- WRITE_ONCE(key->is_valid, false);
+ if (unlikely(!READ_ONCE(keypair->receiving.is_valid) ||
+ wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) ||
+ keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) {
+ WRITE_ONCE(keypair->receiving.is_valid, false);
return false;
}
@@ -283,7 +283,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
PACKET_CB(skb)->nonce,
- key->key))
+ keypair->receiving.key))
return false;
/* Another ugly situation of pushing and pulling the header so as to
@@ -298,41 +298,41 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
}
/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
-static bool counter_validate(union noise_counter *counter, u64 their_counter)
+static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter)
{
unsigned long index, index_current, top, i;
bool ret = false;
- spin_lock_bh(&counter->receive.lock);
+ spin_lock_bh(&counter->lock);
- if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 ||
+ if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 ||
their_counter >= REJECT_AFTER_MESSAGES))
goto out;
++their_counter;
if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
- counter->receive.counter))
+ counter->counter))
goto out;
index = their_counter >> ilog2(BITS_PER_LONG);
- if (likely(their_counter > counter->receive.counter)) {
- index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
+ if (likely(their_counter > counter->counter)) {
+ index_current = counter->counter >> ilog2(BITS_PER_LONG);
top = min_t(unsigned long, index - index_current,
COUNTER_BITS_TOTAL / BITS_PER_LONG);
for (i = 1; i <= top; ++i)
- counter->receive.backtrack[(i + index_current) &
+ counter->backtrack[(i + index_current) &
((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
- counter->receive.counter = their_counter;
+ counter->counter = their_counter;
}
index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
- &counter->receive.backtrack[index]);
+ &counter->backtrack[index]);
out:
- spin_unlock_bh(&counter->receive.lock);
+ spin_unlock_bh(&counter->lock);
return ret;
}
@@ -472,12 +472,12 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget)
if (unlikely(state != PACKET_STATE_CRYPTED))
goto next;
- if (unlikely(!counter_validate(&keypair->receiving.counter,
+ if (unlikely(!counter_validate(&keypair->receiving_counter,
PACKET_CB(skb)->nonce))) {
net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
peer->device->dev->name,
PACKET_CB(skb)->nonce,
- keypair->receiving.counter.receive.counter);
+ keypair->receiving_counter.counter);
goto next;
}
@@ -511,8 +511,8 @@ void wg_packet_decrypt_worker(struct work_struct *work)
struct sk_buff *skb;
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
- enum packet_state state = likely(decrypt_packet(skb,
- &PACKET_CB(skb)->keypair->receiving)) ?
+ enum packet_state state =
+ likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ?
PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
wg_queue_enqueue_per_peer_napi(skb, state);
if (need_resched())
diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c
index f4fbb9072ed7..ec3c156bf91b 100644
--- a/drivers/net/wireguard/selftest/counter.c
+++ b/drivers/net/wireguard/selftest/counter.c
@@ -6,18 +6,24 @@
#ifdef DEBUG
bool __init wg_packet_counter_selftest(void)
{
+ struct noise_replay_counter *counter;
unsigned int test_num = 0, i;
- union noise_counter counter;
bool success = true;
-#define T_INIT do { \
- memset(&counter, 0, sizeof(union noise_counter)); \
- spin_lock_init(&counter.receive.lock); \
+ counter = kmalloc(sizeof(*counter), GFP_KERNEL);
+ if (unlikely(!counter)) {
+ pr_err("nonce counter self-test malloc: FAIL\n");
+ return false;
+ }
+
+#define T_INIT do { \
+ memset(counter, 0, sizeof(*counter)); \
+ spin_lock_init(&counter->lock); \
} while (0)
#define T_LIM (COUNTER_WINDOW_SIZE + 1)
#define T(n, v) do { \
++test_num; \
- if (counter_validate(&counter, n) != (v)) { \
+ if (counter_validate(counter, n) != (v)) { \
pr_err("nonce counter self-test %u: FAIL\n", \
test_num); \
success = false; \
@@ -99,6 +105,7 @@ bool __init wg_packet_counter_selftest(void)
if (success)
pr_info("nonce counter self-tests: pass\n");
+ kfree(counter);
return success;
}
#endif
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 2f5119ff93d8..f74b9341ab0f 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -129,7 +129,7 @@ static void keep_key_fresh(struct wg_peer *peer)
rcu_read_lock_bh();
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
send = keypair && READ_ONCE(keypair->sending.is_valid) &&
- (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES ||
+ (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES ||
(keypair->i_am_the_initiator &&
wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
rcu_read_unlock_bh();
@@ -349,7 +349,6 @@ void wg_packet_purge_staged_packets(struct wg_peer *peer)
void wg_packet_send_staged_packets(struct wg_peer *peer)
{
- struct noise_symmetric_key *key;
struct noise_keypair *keypair;
struct sk_buff_head packets;
struct sk_buff *skb;
@@ -369,10 +368,9 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
rcu_read_unlock_bh();
if (unlikely(!keypair))
goto out_nokey;
- key = &keypair->sending;
- if (unlikely(!READ_ONCE(key->is_valid)))
+ if (unlikely(!READ_ONCE(keypair->sending.is_valid)))
goto out_nokey;
- if (unlikely(wg_birthdate_has_expired(key->birthdate,
+ if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
REJECT_AFTER_TIME)))
goto out_invalid;
@@ -387,7 +385,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
*/
PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
PACKET_CB(skb)->nonce =
- atomic64_inc_return(&key->counter.counter) - 1;
+ atomic64_inc_return(&keypair->sending_counter) - 1;
if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
goto out_invalid;
}
@@ -399,7 +397,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
return;
out_invalid:
- WRITE_ONCE(key->is_valid, false);
+ WRITE_ONCE(keypair->sending.is_valid, false);
out_nokey:
wg_noise_keypair_put(keypair, false);
--
2.18.2
From cab5c845e26d1d60febea5920bd88bda8f2a7ed4 Mon Sep 17 00:00:00 2001
From: Frank Werner-Krippendorf <mail@hb9fxq.ch>
Date: Tue, 23 Jun 2020 03:59:44 -0600
Subject: [PATCH 095/100] wireguard: noise: do not assign initiation time in if
condition
commit 558b353c9c2a717509f291c066c6bd8f5f5e21be upstream.
Fixes an error condition reported by checkpatch.pl which caused by
assigning a variable in an if condition in wg_noise_handshake_consume_
initiation().
Signed-off-by: Frank Werner-Krippendorf <mail@hb9fxq.ch>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/noise.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 626433690abb..201a22681945 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -617,8 +617,8 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
memcpy(handshake->hash, hash, NOISE_HASH_LEN);
memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
handshake->remote_index = src->sender_index;
- if ((s64)(handshake->last_initiation_consumption -
- (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0)
+ initiation_consumption = ktime_get_coarse_boottime_ns();
+ if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0)
handshake->last_initiation_consumption = initiation_consumption;
handshake->state = HANDSHAKE_CONSUMED_INITIATION;
up_write(&handshake->lock);
--
2.18.2
From 86f957cb5a047391d80b789f971a8f1ebd70b216 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 23 Jun 2020 03:59:45 -0600
Subject: [PATCH 096/100] wireguard: device: avoid circular netns references
commit 900575aa33a3eaaef802b31de187a85c4a4b4bd0 upstream.
Before, we took a reference to the creating netns if the new netns was
different. This caused issues with circular references, with two
wireguard interfaces swapping namespaces. The solution is to rather not
take any extra references at all, but instead simply invalidate the
creating netns pointer when that netns is deleted.
In order to prevent this from happening again, this commit improves the
rough object leak tracking by allowing it to account for created and
destroyed interfaces, aside from just peers and keys. That then makes it
possible to check for the object leak when having two interfaces take a
reference to each others' namespaces.
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/device.c | 58 ++++++++++------------
drivers/net/wireguard/device.h | 3 +-
drivers/net/wireguard/netlink.c | 14 ++++--
drivers/net/wireguard/socket.c | 25 +++++++---
tools/testing/selftests/wireguard/netns.sh | 13 ++++-
5 files changed, 67 insertions(+), 46 deletions(-)
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 3ac3f8570ca1..a8f151b1b5fa 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -45,17 +45,18 @@ static int wg_open(struct net_device *dev)
if (dev_v6)
dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
+ mutex_lock(&wg->device_update_lock);
ret = wg_socket_init(wg, wg->incoming_port);
if (ret < 0)
- return ret;
- mutex_lock(&wg->device_update_lock);
+ goto out;
list_for_each_entry(peer, &wg->peer_list, peer_list) {
wg_packet_send_staged_packets(peer);
if (peer->persistent_keepalive_interval)
wg_packet_send_keepalive(peer);
}
+out:
mutex_unlock(&wg->device_update_lock);
- return 0;
+ return ret;
}
#ifdef CONFIG_PM_SLEEP
@@ -225,6 +226,7 @@ static void wg_destruct(struct net_device *dev)
list_del(&wg->device_list);
rtnl_unlock();
mutex_lock(&wg->device_update_lock);
+ rcu_assign_pointer(wg->creating_net, NULL);
wg->incoming_port = 0;
wg_socket_reinit(wg, NULL, NULL);
/* The final references are cleared in the below calls to destroy_workqueue. */
@@ -240,13 +242,11 @@ static void wg_destruct(struct net_device *dev)
skb_queue_purge(&wg->incoming_handshakes);
free_percpu(dev->tstats);
free_percpu(wg->incoming_handshakes_worker);
- if (wg->have_creating_net_ref)
- put_net(wg->creating_net);
kvfree(wg->index_hashtable);
kvfree(wg->peer_hashtable);
mutex_unlock(&wg->device_update_lock);
- pr_debug("%s: Interface deleted\n", dev->name);
+ pr_debug("%s: Interface destroyed\n", dev->name);
free_netdev(dev);
}
@@ -292,7 +292,7 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
struct wg_device *wg = netdev_priv(dev);
int ret = -ENOMEM;
- wg->creating_net = src_net;
+ rcu_assign_pointer(wg->creating_net, src_net);
init_rwsem(&wg->static_identity.lock);
mutex_init(&wg->socket_update_lock);
mutex_init(&wg->device_update_lock);
@@ -393,30 +393,26 @@ static struct rtnl_link_ops link_ops __read_mostly = {
.newlink = wg_newlink,
};
-static int wg_netdevice_notification(struct notifier_block *nb,
- unsigned long action, void *data)
+static void wg_netns_pre_exit(struct net *net)
{
- struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
- struct wg_device *wg = netdev_priv(dev);
-
- ASSERT_RTNL();
-
- if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
- return 0;
+ struct wg_device *wg;
- if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
- put_net(wg->creating_net);
- wg->have_creating_net_ref = false;
- } else if (dev_net(dev) != wg->creating_net &&
- !wg->have_creating_net_ref) {
- wg->have_creating_net_ref = true;
- get_net(wg->creating_net);
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
+ if (rcu_access_pointer(wg->creating_net) == net) {
+ pr_debug("%s: Creating namespace exiting\n", wg->dev->name);
+ netif_carrier_off(wg->dev);
+ mutex_lock(&wg->device_update_lock);
+ rcu_assign_pointer(wg->creating_net, NULL);
+ wg_socket_reinit(wg, NULL, NULL);
+ mutex_unlock(&wg->device_update_lock);
+ }
}
- return 0;
+ rtnl_unlock();
}
-static struct notifier_block netdevice_notifier = {
- .notifier_call = wg_netdevice_notification
+static struct pernet_operations pernet_ops = {
+ .pre_exit = wg_netns_pre_exit
};
int __init wg_device_init(void)
@@ -429,18 +425,18 @@ int __init wg_device_init(void)
return ret;
#endif
- ret = register_netdevice_notifier(&netdevice_notifier);
+ ret = register_pernet_device(&pernet_ops);
if (ret)
goto error_pm;
ret = rtnl_link_register(&link_ops);
if (ret)
- goto error_netdevice;
+ goto error_pernet;
return 0;
-error_netdevice:
- unregister_netdevice_notifier(&netdevice_notifier);
+error_pernet:
+ unregister_pernet_device(&pernet_ops);
error_pm:
#ifdef CONFIG_PM_SLEEP
unregister_pm_notifier(&pm_notifier);
@@ -451,7 +447,7 @@ int __init wg_device_init(void)
void wg_device_uninit(void)
{
rtnl_link_unregister(&link_ops);
- unregister_netdevice_notifier(&netdevice_notifier);
+ unregister_pernet_device(&pernet_ops);
#ifdef CONFIG_PM_SLEEP
unregister_pm_notifier(&pm_notifier);
#endif
diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
index b15a8be9d816..4d0144e16947 100644
--- a/drivers/net/wireguard/device.h
+++ b/drivers/net/wireguard/device.h
@@ -40,7 +40,7 @@ struct wg_device {
struct net_device *dev;
struct crypt_queue encrypt_queue, decrypt_queue;
struct sock __rcu *sock4, *sock6;
- struct net *creating_net;
+ struct net __rcu *creating_net;
struct noise_static_identity static_identity;
struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
struct workqueue_struct *packet_crypt_wq;
@@ -56,7 +56,6 @@ struct wg_device {
unsigned int num_peers, device_update_gen;
u32 fwmark;
u16 incoming_port;
- bool have_creating_net_ref;
};
int wg_device_init(void);
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index ab6cbe95a652..9756239416fd 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -517,11 +517,15 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
if (flags & ~__WGDEVICE_F_ALL)
goto out;
- ret = -EPERM;
- if ((info->attrs[WGDEVICE_A_LISTEN_PORT] ||
- info->attrs[WGDEVICE_A_FWMARK]) &&
- !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN))
- goto out;
+ if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) {
+ struct net *net;
+ rcu_read_lock();
+ net = rcu_dereference(wg->creating_net);
+ ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0;
+ rcu_read_unlock();
+ if (ret)
+ goto out;
+ }
++wg->device_update_gen;
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index f9018027fc13..c33e2c81635f 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -347,6 +347,7 @@ static void set_sock_opts(struct socket *sock)
int wg_socket_init(struct wg_device *wg, u16 port)
{
+ struct net *net;
int ret;
struct udp_tunnel_sock_cfg cfg = {
.sk_user_data = wg,
@@ -371,37 +372,47 @@ int wg_socket_init(struct wg_device *wg, u16 port)
};
#endif
+ rcu_read_lock();
+ net = rcu_dereference(wg->creating_net);
+ net = net ? maybe_get_net(net) : NULL;
+ rcu_read_unlock();
+ if (unlikely(!net))
+ return -ENONET;
+
#if IS_ENABLED(CONFIG_IPV6)
retry:
#endif
- ret = udp_sock_create(wg->creating_net, &port4, &new4);
+ ret = udp_sock_create(net, &port4, &new4);
if (ret < 0) {
pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
- return ret;
+ goto out;
}
set_sock_opts(new4);
- setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
+ setup_udp_tunnel_sock(net, new4, &cfg);
#if IS_ENABLED(CONFIG_IPV6)
if (ipv6_mod_enabled()) {
port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
- ret = udp_sock_create(wg->creating_net, &port6, &new6);
+ ret = udp_sock_create(net, &port6, &new6);
if (ret < 0) {
udp_tunnel_sock_release(new4);
if (ret == -EADDRINUSE && !port && retries++ < 100)
goto retry;
pr_err("%s: Could not create IPv6 socket\n",
wg->dev->name);
- return ret;
+ goto out;
}
set_sock_opts(new6);
- setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
+ setup_udp_tunnel_sock(net, new6, &cfg);
}
#endif
wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
- return 0;
+ ret = 0;
+out:
+ put_net(net);
+ return ret;
}
void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 17a1f53ceba0..d77f4829f1e0 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -587,9 +587,20 @@ ip0 link set wg0 up
kill $ncat_pid
ip0 link del wg0
+# Ensure there aren't circular reference loops
+ip1 link add wg1 type wireguard
+ip2 link add wg2 type wireguard
+ip1 link set wg1 netns $netns2
+ip2 link set wg2 netns $netns1
+pp ip netns delete $netns1
+pp ip netns delete $netns2
+pp ip netns add $netns1
+pp ip netns add $netns2
+
+sleep 2 # Wait for cleanup and grace periods
declare -A objects
while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
- [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue
objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
done < /dev/kmsg
alldeleted=1
--
2.18.2
From b5ad616118347eb41cdf4723a47efc820eb3de72 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 24 Jun 2020 16:06:03 -0600
Subject: [PATCH 097/100] wireguard: receive: account for napi_gro_receive
never returning GRO_DROP
commit df08126e3833e9dca19e2407db5f5860a7c194fb upstream.
The napi_gro_receive function no longer returns GRO_DROP ever, making
handling GRO_DROP dead code. This commit removes that dead code.
Further, it's not even clear that device drivers have any business in
taking action after passing off received packets; that's arguably out of
their hands.
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/receive.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 91438144e4f7..9b2ab6fc91cd 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -414,14 +414,8 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
if (unlikely(routed_peer != peer))
goto dishonest_packet_peer;
- if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) {
- ++dev->stats.rx_dropped;
- net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n",
- dev->name, peer->internal_id,
- &peer->endpoint.addr);
- } else {
- update_rx_stats(peer, message_data_len(len_before_trim));
- }
+ napi_gro_receive(&peer->napi, skb);
+ update_rx_stats(peer, message_data_len(len_before_trim));
return;
dishonest_packet_peer:
--
2.18.2
From 65e2cc153a28545822075c4615807ffb848c634d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 29 Jun 2020 19:06:18 -0600
Subject: [PATCH 098/100] net: ip_tunnel: add header_ops for layer 3 devices
commit 2606aff916854b61234bf85001be9777bab2d5f8 upstream.
Some devices that take straight up layer 3 packets benefit from having a
shared header_ops so that AF_PACKET sockets can inject packets that are
recognized. This shared infrastructure will be used by other drivers
that currently can't inject packets using AF_PACKET. It also exposes the
parser function, as it is useful in standalone form too.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/net/ip_tunnels.h | 3 +++
net/ipv4/ip_tunnel_core.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index af645604f328..b0b03a9f7af9 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -289,6 +289,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
+extern const struct header_ops ip_tunnel_header_ops;
+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
+
struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 1452a97914a0..cfe21c3ddfc2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -446,3 +446,21 @@ void ip_tunnel_unneed_metadata(void)
static_branch_dec(&ip_tunnel_metadata_cnt);
}
EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
+
+/* Returns either the correct skb->protocol value, or 0 if invalid. */
+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb)
+{
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) &&
+ ip_hdr(skb)->version == 4)
+ return htons(ETH_P_IP);
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) &&
+ ipv6_hdr(skb)->version == 6)
+ return htons(ETH_P_IPV6);
+ return 0;
+}
+EXPORT_SYMBOL(ip_tunnel_parse_protocol);
+
+const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol };
+EXPORT_SYMBOL(ip_tunnel_header_ops);
--
2.18.2
From 7aa05817924e993a55069d9b6304cd432f8800c3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 29 Jun 2020 19:06:20 -0600
Subject: [PATCH 099/100] wireguard: implement header_ops->parse_protocol for
AF_PACKET
commit 01a4967c71c004f8ecad4ab57021348636502fa9 upstream.
WireGuard uses skb->protocol to determine packet type, and bails out if
it's not set or set to something it's not expecting. For AF_PACKET
injection, we need to support its call chain of:
packet_sendmsg -> packet_snd -> packet_parse_headers ->
dev_parse_header_protocol -> parse_protocol
Without a valid parse_protocol, this returns zero, and wireguard then
rejects the skb. So, this wires up the ip_tunnel handler for layer 3
packets for that case.
Reported-by: Hans Wippel <ndev@hwipl.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/device.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index a8f151b1b5fa..c9f65e96ccb0 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev)
max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
--
2.18.2
From c1445fa819ec6f562548bdd9cb2d3c24cd654f81 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 29 Jun 2020 19:06:21 -0600
Subject: [PATCH 100/100] wireguard: queueing: make use of
ip_tunnel_parse_protocol
commit 1a574074ae7d1d745c16f7710655f38a53174c27 upstream.
Now that wg_examine_packet_protocol has been added for general
consumption as ip_tunnel_parse_protocol, it's possible to remove
wg_examine_packet_protocol and simply use the new
ip_tunnel_parse_protocol function directly.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
drivers/net/wireguard/queueing.h | 19 ++-----------------
drivers/net/wireguard/receive.c | 2 +-
2 files changed, 3 insertions(+), 18 deletions(-)
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index c58df439dbbe..dfb674e03076 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -11,6 +11,7 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <net/ip_tunnels.h>
struct wg_device;
struct wg_peer;
@@ -65,25 +66,9 @@ struct packet_cb {
#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
-/* Returns either the correct skb->protocol value, or 0 if invalid. */
-static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
-{
- if (skb_network_header(skb) >= skb->head &&
- (skb_network_header(skb) + sizeof(struct iphdr)) <=
- skb_tail_pointer(skb) &&
- ip_hdr(skb)->version == 4)
- return htons(ETH_P_IP);
- if (skb_network_header(skb) >= skb->head &&
- (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
- skb_tail_pointer(skb) &&
- ipv6_hdr(skb)->version == 6)
- return htons(ETH_P_IPV6);
- return 0;
-}
-
static inline bool wg_check_packet_protocol(struct sk_buff *skb)
{
- __be16 real_protocol = wg_examine_packet_protocol(skb);
+ __be16 real_protocol = ip_tunnel_parse_protocol(skb);
return real_protocol && skb->protocol == real_protocol;
}
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 9b2ab6fc91cd..2c9551ea6dc7 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ~0; /* All levels */
- skb->protocol = wg_examine_packet_protocol(skb);
+ skb->protocol = ip_tunnel_parse_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
--
2.18.2