| From d5f9669c68f4d484fd9e035cff5f909413b10ccb Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:07 +0100 |
| Subject: [PATCH 001/100] crypto: lib - tidy up lib/crypto Kconfig and Makefile |
| |
| commit 746b2e024c67aa605ac12d135cd7085a49cf9dc4 upstream. |
| |
| In preparation of introducing a set of crypto library interfaces, tidy |
| up the Makefile and split off the Kconfig symbols into a separate file. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/Kconfig | 13 +------------ |
| lib/crypto/Kconfig | 15 +++++++++++++++ |
| lib/crypto/Makefile | 16 ++++++++-------- |
| 3 files changed, 24 insertions(+), 20 deletions(-) |
| create mode 100644 lib/crypto/Kconfig |
| |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index b2cc0ad3792a..7d19b46a7ef7 100644 |
| |
| |
| @@ -878,9 +878,6 @@ config CRYPTO_SHA1_PPC_SPE |
| SHA-1 secure hash standard (DFIPS 180-4) implemented |
| using powerpc SPE SIMD instruction set. |
| |
| -config CRYPTO_LIB_SHA256 |
| - tristate |
| - |
| config CRYPTO_SHA256 |
| tristate "SHA224 and SHA256 digest algorithm" |
| select CRYPTO_HASH |
| @@ -1019,9 +1016,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL |
| |
| comment "Ciphers" |
| |
| -config CRYPTO_LIB_AES |
| - tristate |
| - |
| config CRYPTO_AES |
| tristate "AES cipher algorithms" |
| select CRYPTO_ALGAPI |
| @@ -1150,9 +1144,6 @@ config CRYPTO_ANUBIS |
| <https://www.cosic.esat.kuleuven.be/nessie/reports/> |
| <http://www.larc.usp.br/~pbarreto/AnubisPage.html> |
| |
| -config CRYPTO_LIB_ARC4 |
| - tristate |
| - |
| config CRYPTO_ARC4 |
| tristate "ARC4 cipher algorithm" |
| select CRYPTO_BLKCIPHER |
| @@ -1339,9 +1330,6 @@ config CRYPTO_CAST6_AVX_X86_64 |
| This module provides the Cast6 cipher algorithm that processes |
| eight blocks parallel using the AVX instruction set. |
| |
| -config CRYPTO_LIB_DES |
| - tristate |
| - |
| config CRYPTO_DES |
| tristate "DES and Triple DES EDE cipher algorithms" |
| select CRYPTO_ALGAPI |
| @@ -1845,6 +1833,7 @@ config CRYPTO_STATS |
| config CRYPTO_HASH_INFO |
| bool |
| |
| +source "lib/crypto/Kconfig" |
| source "drivers/crypto/Kconfig" |
| source "crypto/asymmetric_keys/Kconfig" |
| source "certs/Kconfig" |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| new file mode 100644 |
| index 000000000000..261430051595 |
| |
| |
| @@ -0,0 +1,15 @@ |
| +# SPDX-License-Identifier: GPL-2.0 |
| + |
| +comment "Crypto library routines" |
| + |
| +config CRYPTO_LIB_AES |
| + tristate |
| + |
| +config CRYPTO_LIB_ARC4 |
| + tristate |
| + |
| +config CRYPTO_LIB_DES |
| + tristate |
| + |
| +config CRYPTO_LIB_SHA256 |
| + tristate |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index cbe0b6a6450d..63de4cb3fcf8 100644 |
| |
| |
| @@ -1,13 +1,13 @@ |
| # SPDX-License-Identifier: GPL-2.0 |
| |
| -obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o |
| -libaes-y := aes.o |
| +obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o |
| +libaes-y := aes.o |
| |
| -obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o |
| -libarc4-y := arc4.o |
| +obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o |
| +libarc4-y := arc4.o |
| |
| -obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| -libdes-y := des.o |
| +obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| +libdes-y := des.o |
| |
| -obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o |
| -libsha256-y := sha256.o |
| +obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o |
| +libsha256-y := sha256.o |
| -- |
| 2.18.2 |
| |
| |
| From df871ee7dba8582ca3b6f414ae9b615df113ac99 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:08 +0100 |
| Subject: [PATCH 002/100] crypto: chacha - move existing library code into |
| lib/crypto |
| |
| commit 5fb8ef25803ef33e2eb60b626435828b937bed75 upstream. |
| |
| Currently, our generic ChaCha implementation consists of a permute |
| function in lib/chacha.c that operates on the 64-byte ChaCha state |
| directly [and which is always included into the core kernel since it |
| is used by the /dev/random driver], and the crypto API plumbing to |
| expose it as a skcipher. |
| |
| In order to support in-kernel users that need the ChaCha streamcipher |
| but have no need [or tolerance] for going through the abstractions of |
| the crypto API, let's expose the streamcipher bits via a library API |
| as well, in a way that permits the implementation to be superseded by |
| an architecture specific one if provided. |
| |
| So move the streamcipher code into a separate module in lib/crypto, |
| and expose the init() and crypt() routines to users of the library. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/chacha-neon-glue.c | 2 +- |
| arch/arm64/crypto/chacha-neon-glue.c | 2 +- |
| arch/x86/crypto/chacha_glue.c | 2 +- |
| crypto/Kconfig | 1 + |
| crypto/chacha_generic.c | 60 ++-------------------- |
| include/crypto/chacha.h | 77 ++++++++++++++++++++++------ |
| include/crypto/internal/chacha.h | 53 +++++++++++++++++++ |
| lib/Makefile | 3 +- |
| lib/crypto/Kconfig | 26 ++++++++++ |
| lib/crypto/Makefile | 4 ++ |
| lib/{ => crypto}/chacha.c | 20 ++++---- |
| lib/crypto/libchacha.c | 35 +++++++++++++ |
| 12 files changed, 199 insertions(+), 86 deletions(-) |
| create mode 100644 include/crypto/internal/chacha.h |
| rename lib/{ => crypto}/chacha.c (88%) |
| create mode 100644 lib/crypto/libchacha.c |
| |
| diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c |
| index a8e9b534c8da..26576772f18b 100644 |
| |
| |
| @@ -20,7 +20,7 @@ |
| */ |
| |
| #include <crypto/algapi.h> |
| -#include <crypto/chacha.h> |
| +#include <crypto/internal/chacha.h> |
| #include <crypto/internal/simd.h> |
| #include <crypto/internal/skcipher.h> |
| #include <linux/kernel.h> |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index 1495d2b18518..d4cc61bfe79d 100644 |
| |
| |
| @@ -20,7 +20,7 @@ |
| */ |
| |
| #include <crypto/algapi.h> |
| -#include <crypto/chacha.h> |
| +#include <crypto/internal/chacha.h> |
| #include <crypto/internal/simd.h> |
| #include <crypto/internal/skcipher.h> |
| #include <linux/kernel.h> |
| diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c |
| index 388f95a4ec24..bc62daa8dafd 100644 |
| |
| |
| @@ -7,7 +7,7 @@ |
| */ |
| |
| #include <crypto/algapi.h> |
| -#include <crypto/chacha.h> |
| +#include <crypto/internal/chacha.h> |
| #include <crypto/internal/simd.h> |
| #include <crypto/internal/skcipher.h> |
| #include <linux/kernel.h> |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 7d19b46a7ef7..f29bf10c0462 100644 |
| |
| |
| @@ -1393,6 +1393,7 @@ config CRYPTO_SALSA20 |
| |
| config CRYPTO_CHACHA20 |
| tristate "ChaCha stream cipher algorithms" |
| + select CRYPTO_LIB_CHACHA_GENERIC |
| select CRYPTO_BLKCIPHER |
| help |
| The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms. |
| diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c |
| index 085d8d219987..ebae6d9d9b32 100644 |
| |
| |
| @@ -8,29 +8,10 @@ |
| |
| #include <asm/unaligned.h> |
| #include <crypto/algapi.h> |
| -#include <crypto/chacha.h> |
| +#include <crypto/internal/chacha.h> |
| #include <crypto/internal/skcipher.h> |
| #include <linux/module.h> |
| |
| -static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src, |
| - unsigned int bytes, int nrounds) |
| -{ |
| - /* aligned to potentially speed up crypto_xor() */ |
| - u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); |
| - |
| - while (bytes >= CHACHA_BLOCK_SIZE) { |
| - chacha_block(state, stream, nrounds); |
| - crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); |
| - bytes -= CHACHA_BLOCK_SIZE; |
| - dst += CHACHA_BLOCK_SIZE; |
| - src += CHACHA_BLOCK_SIZE; |
| - } |
| - if (bytes) { |
| - chacha_block(state, stream, nrounds); |
| - crypto_xor_cpy(dst, src, stream, bytes); |
| - } |
| -} |
| - |
| static int chacha_stream_xor(struct skcipher_request *req, |
| const struct chacha_ctx *ctx, const u8 *iv) |
| { |
| @@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skcipher_request *req, |
| if (nbytes < walk.total) |
| nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); |
| |
| - chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, |
| - nbytes, ctx->nrounds); |
| + chacha_crypt_generic(state, walk.dst.virt.addr, |
| + walk.src.virt.addr, nbytes, ctx->nrounds); |
| err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| } |
| |
| @@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skcipher_request *req, |
| |
| void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv) |
| { |
| - state[0] = 0x61707865; /* "expa" */ |
| - state[1] = 0x3320646e; /* "nd 3" */ |
| - state[2] = 0x79622d32; /* "2-by" */ |
| - state[3] = 0x6b206574; /* "te k" */ |
| - state[4] = ctx->key[0]; |
| - state[5] = ctx->key[1]; |
| - state[6] = ctx->key[2]; |
| - state[7] = ctx->key[3]; |
| - state[8] = ctx->key[4]; |
| - state[9] = ctx->key[5]; |
| - state[10] = ctx->key[6]; |
| - state[11] = ctx->key[7]; |
| - state[12] = get_unaligned_le32(iv + 0); |
| - state[13] = get_unaligned_le32(iv + 4); |
| - state[14] = get_unaligned_le32(iv + 8); |
| - state[15] = get_unaligned_le32(iv + 12); |
| + chacha_init_generic(state, ctx->key, iv); |
| } |
| EXPORT_SYMBOL_GPL(crypto_chacha_init); |
| |
| -static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize, int nrounds) |
| -{ |
| - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| - int i; |
| - |
| - if (keysize != CHACHA_KEY_SIZE) |
| - return -EINVAL; |
| - |
| - for (i = 0; i < ARRAY_SIZE(ctx->key); i++) |
| - ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); |
| - |
| - ctx->nrounds = nrounds; |
| - return 0; |
| -} |
| - |
| int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| unsigned int keysize) |
| { |
| @@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req) |
| |
| /* Compute the subkey given the original key and first 128 nonce bits */ |
| crypto_chacha_init(state, ctx, req->iv); |
| - hchacha_block(state, subctx.key, ctx->nrounds); |
| + hchacha_block_generic(state, subctx.key, ctx->nrounds); |
| subctx.nrounds = ctx->nrounds; |
| |
| /* Build the real IV */ |
| diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h |
| index d1e723c6a37d..5c662f8fecac 100644 |
| |
| |
| @@ -15,9 +15,8 @@ |
| #ifndef _CRYPTO_CHACHA_H |
| #define _CRYPTO_CHACHA_H |
| |
| -#include <crypto/skcipher.h> |
| +#include <asm/unaligned.h> |
| #include <linux/types.h> |
| -#include <linux/crypto.h> |
| |
| /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ |
| #define CHACHA_IV_SIZE 16 |
| @@ -29,26 +28,70 @@ |
| /* 192-bit nonce, then 64-bit stream position */ |
| #define XCHACHA_IV_SIZE 32 |
| |
| -struct chacha_ctx { |
| - u32 key[8]; |
| - int nrounds; |
| -}; |
| - |
| -void chacha_block(u32 *state, u8 *stream, int nrounds); |
| +void chacha_block_generic(u32 *state, u8 *stream, int nrounds); |
| static inline void chacha20_block(u32 *state, u8 *stream) |
| { |
| - chacha_block(state, stream, 20); |
| + chacha_block_generic(state, stream, 20); |
| } |
| -void hchacha_block(const u32 *in, u32 *out, int nrounds); |
| |
| -void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv); |
| +void hchacha_block_arch(const u32 *state, u32 *out, int nrounds); |
| +void hchacha_block_generic(const u32 *state, u32 *out, int nrounds); |
| + |
| +static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) |
| + hchacha_block_arch(state, out, nrounds); |
| + else |
| + hchacha_block_generic(state, out, nrounds); |
| +} |
| |
| -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize); |
| -int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize); |
| +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); |
| +static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv) |
| +{ |
| + state[0] = 0x61707865; /* "expa" */ |
| + state[1] = 0x3320646e; /* "nd 3" */ |
| + state[2] = 0x79622d32; /* "2-by" */ |
| + state[3] = 0x6b206574; /* "te k" */ |
| + state[4] = key[0]; |
| + state[5] = key[1]; |
| + state[6] = key[2]; |
| + state[7] = key[3]; |
| + state[8] = key[4]; |
| + state[9] = key[5]; |
| + state[10] = key[6]; |
| + state[11] = key[7]; |
| + state[12] = get_unaligned_le32(iv + 0); |
| + state[13] = get_unaligned_le32(iv + 4); |
| + state[14] = get_unaligned_le32(iv + 8); |
| + state[15] = get_unaligned_le32(iv + 12); |
| +} |
| |
| -int crypto_chacha_crypt(struct skcipher_request *req); |
| -int crypto_xchacha_crypt(struct skcipher_request *req); |
| +static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) |
| + chacha_init_arch(state, key, iv); |
| + else |
| + chacha_init_generic(state, key, iv); |
| +} |
| + |
| +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes, int nrounds); |
| +void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes, int nrounds); |
| + |
| +static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes, int nrounds) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) |
| + chacha_crypt_arch(state, dst, src, bytes, nrounds); |
| + else |
| + chacha_crypt_generic(state, dst, src, bytes, nrounds); |
| +} |
| + |
| +static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes) |
| +{ |
| + chacha_crypt(state, dst, src, bytes, 20); |
| +} |
| |
| #endif /* _CRYPTO_CHACHA_H */ |
| diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h |
| new file mode 100644 |
| index 000000000000..c0e40b245431 |
| |
| |
| @@ -0,0 +1,53 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| + |
| +#ifndef _CRYPTO_INTERNAL_CHACHA_H |
| +#define _CRYPTO_INTERNAL_CHACHA_H |
| + |
| +#include <crypto/chacha.h> |
| +#include <crypto/internal/skcipher.h> |
| +#include <linux/crypto.h> |
| + |
| +struct chacha_ctx { |
| + u32 key[8]; |
| + int nrounds; |
| +}; |
| + |
| +void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv); |
| + |
| +static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize, int nrounds) |
| +{ |
| + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| + int i; |
| + |
| + if (keysize != CHACHA_KEY_SIZE) |
| + return -EINVAL; |
| + |
| + for (i = 0; i < ARRAY_SIZE(ctx->key); i++) |
| + ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); |
| + |
| + ctx->nrounds = nrounds; |
| + return 0; |
| +} |
| + |
| +static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize) |
| +{ |
| + return chacha_setkey(tfm, key, keysize, 20); |
| +} |
| + |
| +static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize) |
| +{ |
| + return chacha_setkey(tfm, key, keysize, 12); |
| +} |
| + |
| +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize); |
| +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize); |
| + |
| +int crypto_chacha_crypt(struct skcipher_request *req); |
| +int crypto_xchacha_crypt(struct skcipher_request *req); |
| + |
| +#endif /* _CRYPTO_CHACHA_H */ |
| diff --git a/lib/Makefile b/lib/Makefile |
| index c5892807e06f..5af38fd5cc60 100644 |
| |
| |
| @@ -26,8 +26,7 @@ endif |
| |
| lib-y := ctype.o string.o vsprintf.o cmdline.o \ |
| rbtree.o radix-tree.o timerqueue.o xarray.o \ |
| - idr.o extable.o \ |
| - sha1.o chacha.o irq_regs.o argv_split.o \ |
| + idr.o extable.o sha1.o irq_regs.o argv_split.o \ |
| flex_proportions.o ratelimit.o show_mem.o \ |
| is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
| earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 261430051595..6a11931ae105 100644 |
| |
| |
| @@ -8,6 +8,32 @@ config CRYPTO_LIB_AES |
| config CRYPTO_LIB_ARC4 |
| tristate |
| |
| +config CRYPTO_ARCH_HAVE_LIB_CHACHA |
| + tristate |
| + help |
| + Declares whether the architecture provides an arch-specific |
| + accelerated implementation of the ChaCha library interface, |
| + either builtin or as a module. |
| + |
| +config CRYPTO_LIB_CHACHA_GENERIC |
| + tristate |
| + select CRYPTO_ALGAPI |
| + help |
| + This symbol can be depended upon by arch implementations of the |
| + ChaCha library interface that require the generic code as a |
| + fallback, e.g., for SIMD implementations. If no arch specific |
| + implementation is enabled, this implementation serves the users |
| + of CRYPTO_LIB_CHACHA. |
| + |
| +config CRYPTO_LIB_CHACHA |
| + tristate "ChaCha library interface" |
| + depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA |
| + select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n |
| + help |
| + Enable the ChaCha library interface. This interface may be fulfilled |
| + by either the generic implementation or an arch-specific one, if one |
| + is available and enabled. |
| + |
| config CRYPTO_LIB_DES |
| tristate |
| |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index 63de4cb3fcf8..0ce40604e104 100644 |
| |
| |
| @@ -1,5 +1,9 @@ |
| # SPDX-License-Identifier: GPL-2.0 |
| |
| +# chacha is used by the /dev/random driver which is always builtin |
| +obj-y += chacha.o |
| +obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o |
| + |
| obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o |
| libaes-y := aes.o |
| |
| diff --git a/lib/chacha.c b/lib/crypto/chacha.c |
| similarity index 88% |
| rename from lib/chacha.c |
| rename to lib/crypto/chacha.c |
| index c7c9826564d3..65ead6b0c7e0 100644 |
| |
| |
| @@ -5,9 +5,11 @@ |
| * Copyright (C) 2015 Martin Willi |
| */ |
| |
| +#include <linux/bug.h> |
| #include <linux/kernel.h> |
| #include <linux/export.h> |
| #include <linux/bitops.h> |
| +#include <linux/string.h> |
| #include <linux/cryptohash.h> |
| #include <asm/unaligned.h> |
| #include <crypto/chacha.h> |
| @@ -72,7 +74,7 @@ static void chacha_permute(u32 *x, int nrounds) |
| * The caller has already converted the endianness of the input. This function |
| * also handles incrementing the block counter in the input matrix. |
| */ |
| -void chacha_block(u32 *state, u8 *stream, int nrounds) |
| +void chacha_block_generic(u32 *state, u8 *stream, int nrounds) |
| { |
| u32 x[16]; |
| int i; |
| @@ -86,11 +88,11 @@ void chacha_block(u32 *state, u8 *stream, int nrounds) |
| |
| state[12]++; |
| } |
| -EXPORT_SYMBOL(chacha_block); |
| +EXPORT_SYMBOL(chacha_block_generic); |
| |
| /** |
| - * hchacha_block - abbreviated ChaCha core, for XChaCha |
| - * @in: input state matrix (16 32-bit words) |
| + * hchacha_block_generic - abbreviated ChaCha core, for XChaCha |
| + * @state: input state matrix (16 32-bit words) |
| * @out: output (8 32-bit words) |
| * @nrounds: number of rounds (20 or 12; 20 is recommended) |
| * |
| @@ -99,15 +101,15 @@ EXPORT_SYMBOL(chacha_block); |
| * skips the final addition of the initial state, and outputs only certain words |
| * of the state. It should not be used for streaming directly. |
| */ |
| -void hchacha_block(const u32 *in, u32 *out, int nrounds) |
| +void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds) |
| { |
| u32 x[16]; |
| |
| - memcpy(x, in, 64); |
| + memcpy(x, state, 64); |
| |
| chacha_permute(x, nrounds); |
| |
| - memcpy(&out[0], &x[0], 16); |
| - memcpy(&out[4], &x[12], 16); |
| + memcpy(&stream[0], &x[0], 16); |
| + memcpy(&stream[4], &x[12], 16); |
| } |
| -EXPORT_SYMBOL(hchacha_block); |
| +EXPORT_SYMBOL(hchacha_block_generic); |
| diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c |
| new file mode 100644 |
| index 000000000000..dabc3accae05 |
| |
| |
| @@ -0,0 +1,35 @@ |
| +// SPDX-License-Identifier: GPL-2.0-or-later |
| +/* |
| + * The ChaCha stream cipher (RFC7539) |
| + * |
| + * Copyright (C) 2015 Martin Willi |
| + */ |
| + |
| +#include <linux/kernel.h> |
| +#include <linux/export.h> |
| +#include <linux/module.h> |
| + |
| +#include <crypto/algapi.h> // for crypto_xor_cpy |
| +#include <crypto/chacha.h> |
| + |
| +void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes, int nrounds) |
| +{ |
| + /* aligned to potentially speed up crypto_xor() */ |
| + u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); |
| + |
| + while (bytes >= CHACHA_BLOCK_SIZE) { |
| + chacha_block_generic(state, stream, nrounds); |
| + crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); |
| + bytes -= CHACHA_BLOCK_SIZE; |
| + dst += CHACHA_BLOCK_SIZE; |
| + src += CHACHA_BLOCK_SIZE; |
| + } |
| + if (bytes) { |
| + chacha_block_generic(state, stream, nrounds); |
| + crypto_xor_cpy(dst, src, stream, bytes); |
| + } |
| +} |
| +EXPORT_SYMBOL(chacha_crypt_generic); |
| + |
| +MODULE_LICENSE("GPL"); |
| -- |
| 2.18.2 |
| |
| |
| From 23aa6c8d945743ff123207351ee2a27612c6486f Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:09 +0100 |
| Subject: [PATCH 003/100] crypto: x86/chacha - depend on generic chacha library |
| instead of crypto driver |
| |
| commit 28e8d89b1ce8d2e7badfb5f69971dd635acb8863 upstream. |
| |
| In preparation of extending the x86 ChaCha driver to also expose the ChaCha |
| library interface, drop the dependency on the chacha_generic crypto driver |
| as a non-SIMD fallback, and depend on the generic ChaCha library directly. |
| This way, we only pull in the code we actually need, without registering |
| a set of ChaCha skciphers that we will never use. |
| |
| Since turning the FPU on and off is cheap these days, simplify the SIMD |
| routine by dropping the per-page yield, which makes for a cleaner switch |
| to the library API as well. This also allows use to invoke the skcipher |
| walk routines in non-atomic mode. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/chacha_glue.c | 90 ++++++++++++++--------------------- |
| crypto/Kconfig | 2 +- |
| 2 files changed, 36 insertions(+), 56 deletions(-) |
| |
| diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c |
| index bc62daa8dafd..0aabb382edce 100644 |
| |
| |
| @@ -123,37 +123,38 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
| } |
| } |
| |
| -static int chacha_simd_stream_xor(struct skcipher_walk *walk, |
| +static int chacha_simd_stream_xor(struct skcipher_request *req, |
| const struct chacha_ctx *ctx, const u8 *iv) |
| { |
| u32 *state, state_buf[16 + 2] __aligned(8); |
| - int next_yield = 4096; /* bytes until next FPU yield */ |
| - int err = 0; |
| + struct skcipher_walk walk; |
| + int err; |
| + |
| + err = skcipher_walk_virt(&walk, req, false); |
| |
| BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); |
| state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); |
| |
| - crypto_chacha_init(state, ctx, iv); |
| + chacha_init_generic(state, ctx->key, iv); |
| |
| - while (walk->nbytes > 0) { |
| - unsigned int nbytes = walk->nbytes; |
| + while (walk.nbytes > 0) { |
| + unsigned int nbytes = walk.nbytes; |
| |
| - if (nbytes < walk->total) { |
| - nbytes = round_down(nbytes, walk->stride); |
| - next_yield -= nbytes; |
| - } |
| + if (nbytes < walk.total) |
| + nbytes = round_down(nbytes, walk.stride); |
| |
| - chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, |
| - nbytes, ctx->nrounds); |
| - |
| - if (next_yield <= 0) { |
| - /* temporarily allow preemption */ |
| - kernel_fpu_end(); |
| + if (!crypto_simd_usable()) { |
| + chacha_crypt_generic(state, walk.dst.virt.addr, |
| + walk.src.virt.addr, nbytes, |
| + ctx->nrounds); |
| + } else { |
| kernel_fpu_begin(); |
| - next_yield = 4096; |
| + chacha_dosimd(state, walk.dst.virt.addr, |
| + walk.src.virt.addr, nbytes, |
| + ctx->nrounds); |
| + kernel_fpu_end(); |
| } |
| - |
| - err = skcipher_walk_done(walk, walk->nbytes - nbytes); |
| + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| } |
| |
| return err; |
| @@ -163,55 +164,34 @@ static int chacha_simd(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| - struct skcipher_walk walk; |
| - int err; |
| |
| - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| - return crypto_chacha_crypt(req); |
| - |
| - err = skcipher_walk_virt(&walk, req, true); |
| - if (err) |
| - return err; |
| - |
| - kernel_fpu_begin(); |
| - err = chacha_simd_stream_xor(&walk, ctx, req->iv); |
| - kernel_fpu_end(); |
| - return err; |
| + return chacha_simd_stream_xor(req, ctx, req->iv); |
| } |
| |
| static int xchacha_simd(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| - struct skcipher_walk walk; |
| - struct chacha_ctx subctx; |
| u32 *state, state_buf[16 + 2] __aligned(8); |
| + struct chacha_ctx subctx; |
| u8 real_iv[16]; |
| - int err; |
| - |
| - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| - return crypto_xchacha_crypt(req); |
| - |
| - err = skcipher_walk_virt(&walk, req, true); |
| - if (err) |
| - return err; |
| |
| BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); |
| state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); |
| - crypto_chacha_init(state, ctx, req->iv); |
| - |
| - kernel_fpu_begin(); |
| - |
| - hchacha_block_ssse3(state, subctx.key, ctx->nrounds); |
| + chacha_init_generic(state, ctx->key, req->iv); |
| + |
| + if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { |
| + kernel_fpu_begin(); |
| + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); |
| + kernel_fpu_end(); |
| + } else { |
| + hchacha_block_generic(state, subctx.key, ctx->nrounds); |
| + } |
| subctx.nrounds = ctx->nrounds; |
| |
| memcpy(&real_iv[0], req->iv + 24, 8); |
| memcpy(&real_iv[8], req->iv + 16, 8); |
| - err = chacha_simd_stream_xor(&walk, &subctx, real_iv); |
| - |
| - kernel_fpu_end(); |
| - |
| - return err; |
| + return chacha_simd_stream_xor(req, &subctx, real_iv); |
| } |
| |
| static struct skcipher_alg algs[] = { |
| @@ -227,7 +207,7 @@ static struct skcipher_alg algs[] = { |
| .max_keysize = CHACHA_KEY_SIZE, |
| .ivsize = CHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| + .setkey = chacha20_setkey, |
| .encrypt = chacha_simd, |
| .decrypt = chacha_simd, |
| }, { |
| @@ -242,7 +222,7 @@ static struct skcipher_alg algs[] = { |
| .max_keysize = CHACHA_KEY_SIZE, |
| .ivsize = XCHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| + .setkey = chacha20_setkey, |
| .encrypt = xchacha_simd, |
| .decrypt = xchacha_simd, |
| }, { |
| @@ -257,7 +237,7 @@ static struct skcipher_alg algs[] = { |
| .max_keysize = CHACHA_KEY_SIZE, |
| .ivsize = XCHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha12_setkey, |
| + .setkey = chacha12_setkey, |
| .encrypt = xchacha_simd, |
| .decrypt = xchacha_simd, |
| }, |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index f29bf10c0462..564a3f7b40b8 100644 |
| |
| |
| @@ -1417,7 +1417,7 @@ config CRYPTO_CHACHA20_X86_64 |
| tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)" |
| depends on X86 && 64BIT |
| select CRYPTO_BLKCIPHER |
| - select CRYPTO_CHACHA20 |
| + select CRYPTO_LIB_CHACHA_GENERIC |
| help |
| SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, |
| XChaCha20, and XChaCha12 stream ciphers. |
| -- |
| 2.18.2 |
| |
| |
| From eae0a3dc41f16fa4829ea9fe4b5520402a54d0eb Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:10 +0100 |
| Subject: [PATCH 004/100] crypto: x86/chacha - expose SIMD ChaCha routine as |
| library function |
| |
| commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream. |
| |
| Wire the existing x86 SIMD ChaCha code into the new ChaCha library |
| interface, so that users of the library interface will get the |
| accelerated version when available. |
| |
| Given that calls into the library API will always go through the |
| routines in this module if it is enabled, switch to static keys |
| to select the optimal implementation available (which may be none |
| at all, in which case we defer to the generic implementation for |
| all invocations). |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++---------- |
| crypto/Kconfig | 1 + |
| include/crypto/chacha.h | 6 +++ |
| 3 files changed, 73 insertions(+), 25 deletions(-) |
| |
| diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c |
| index 0aabb382edce..b391e13a9e41 100644 |
| |
| |
| @@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, |
| asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); |
| -#ifdef CONFIG_AS_AVX2 |
| + |
| asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| -static bool chacha_use_avx2; |
| -#ifdef CONFIG_AS_AVX512 |
| + |
| asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
| unsigned int len, int nrounds); |
| -static bool chacha_use_avx512vl; |
| -#endif |
| -#endif |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); |
| |
| static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) |
| { |
| @@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) |
| static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
| unsigned int bytes, int nrounds) |
| { |
| -#ifdef CONFIG_AS_AVX2 |
| -#ifdef CONFIG_AS_AVX512 |
| - if (chacha_use_avx512vl) { |
| + if (IS_ENABLED(CONFIG_AS_AVX512) && |
| + static_branch_likely(&chacha_use_avx512vl)) { |
| while (bytes >= CHACHA_BLOCK_SIZE * 8) { |
| chacha_8block_xor_avx512vl(state, dst, src, bytes, |
| nrounds); |
| @@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
| return; |
| } |
| } |
| -#endif |
| - if (chacha_use_avx2) { |
| + |
| + if (IS_ENABLED(CONFIG_AS_AVX2) && |
| + static_branch_likely(&chacha_use_avx2)) { |
| while (bytes >= CHACHA_BLOCK_SIZE * 8) { |
| chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); |
| bytes -= CHACHA_BLOCK_SIZE * 8; |
| @@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
| return; |
| } |
| } |
| -#endif |
| + |
| while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); |
| bytes -= CHACHA_BLOCK_SIZE * 4; |
| @@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
| } |
| } |
| |
| +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
| +{ |
| + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); |
| + |
| + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { |
| + hchacha_block_generic(state, stream, nrounds); |
| + } else { |
| + kernel_fpu_begin(); |
| + hchacha_block_ssse3(state, stream, nrounds); |
| + kernel_fpu_end(); |
| + } |
| +} |
| +EXPORT_SYMBOL(hchacha_block_arch); |
| + |
| +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| +{ |
| + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); |
| + |
| + chacha_init_generic(state, key, iv); |
| +} |
| +EXPORT_SYMBOL(chacha_init_arch); |
| + |
| +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| + int nrounds) |
| +{ |
| + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); |
| + |
| + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || |
| + bytes <= CHACHA_BLOCK_SIZE) |
| + return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
| + |
| + kernel_fpu_begin(); |
| + chacha_dosimd(state, dst, src, bytes, nrounds); |
| + kernel_fpu_end(); |
| +} |
| +EXPORT_SYMBOL(chacha_crypt_arch); |
| + |
| static int chacha_simd_stream_xor(struct skcipher_request *req, |
| const struct chacha_ctx *ctx, const u8 *iv) |
| { |
| @@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req, |
| if (nbytes < walk.total) |
| nbytes = round_down(nbytes, walk.stride); |
| |
| - if (!crypto_simd_usable()) { |
| + if (!static_branch_likely(&chacha_use_simd) || |
| + !crypto_simd_usable()) { |
| chacha_crypt_generic(state, walk.dst.virt.addr, |
| walk.src.virt.addr, nbytes, |
| ctx->nrounds); |
| @@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = { |
| static int __init chacha_simd_mod_init(void) |
| { |
| if (!boot_cpu_has(X86_FEATURE_SSSE3)) |
| - return -ENODEV; |
| - |
| -#ifdef CONFIG_AS_AVX2 |
| - chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && |
| - boot_cpu_has(X86_FEATURE_AVX2) && |
| - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
| -#ifdef CONFIG_AS_AVX512 |
| - chacha_use_avx512vl = chacha_use_avx2 && |
| - boot_cpu_has(X86_FEATURE_AVX512VL) && |
| - boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ |
| -#endif |
| -#endif |
| + return 0; |
| + |
| + static_branch_enable(&chacha_use_simd); |
| + |
| + if (IS_ENABLED(CONFIG_AS_AVX2) && |
| + boot_cpu_has(X86_FEATURE_AVX) && |
| + boot_cpu_has(X86_FEATURE_AVX2) && |
| + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
| + static_branch_enable(&chacha_use_avx2); |
| + |
| + if (IS_ENABLED(CONFIG_AS_AVX512) && |
| + boot_cpu_has(X86_FEATURE_AVX512VL) && |
| + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ |
| + static_branch_enable(&chacha_use_avx512vl); |
| + } |
| return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 564a3f7b40b8..649dc564f242 100644 |
| |
| |
| @@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64 |
| depends on X86 && 64BIT |
| select CRYPTO_BLKCIPHER |
| select CRYPTO_LIB_CHACHA_GENERIC |
| + select CRYPTO_ARCH_HAVE_LIB_CHACHA |
| help |
| SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, |
| XChaCha20, and XChaCha12 stream ciphers. |
| diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h |
| index 5c662f8fecac..2676f4fbd4c1 100644 |
| |
| |
| @@ -25,6 +25,12 @@ |
| #define CHACHA_BLOCK_SIZE 64 |
| #define CHACHAPOLY_IV_SIZE 12 |
| |
| +#ifdef CONFIG_X86_64 |
| +#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32)) |
| +#else |
| +#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) |
| +#endif |
| + |
| /* 192-bit nonce, then 64-bit stream position */ |
| #define XCHACHA_IV_SIZE 32 |
| |
| -- |
| 2.18.2 |
| |
| |
| From 00be77bf975db1c79b616faa27f89a1625c8a8f8 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:11 +0100 |
| Subject: [PATCH 005/100] crypto: arm64/chacha - depend on generic chacha |
| library instead of crypto driver |
| |
| commit c77da4867cbb7841177275dbb250f5c09679fae4 upstream. |
| |
| Depend on the generic ChaCha library routines instead of pulling in the |
| generic ChaCha skcipher driver, which is more than we need, and makes |
| managing the dependencies between the generic library, generic driver, |
| accelerated library and driver more complicated. |
| |
| While at it, drop the logic to prefer the scalar code on short inputs. |
| Turning the NEON on and off is cheap these days, and one major use case |
| for ChaCha20 is ChaCha20-Poly1305, which is guaranteed to hit the scalar |
| path upon every invocation (when doing the Poly1305 nonce generation) |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm64/crypto/Kconfig | 2 +- |
| arch/arm64/crypto/chacha-neon-glue.c | 40 +++++++++++++++------------- |
| 2 files changed, 23 insertions(+), 19 deletions(-) |
| |
| diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig |
| index 4922c4451e7c..fdf52d5f18f9 100644 |
| |
| |
| @@ -103,7 +103,7 @@ config CRYPTO_CHACHA20_NEON |
| tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions" |
| depends on KERNEL_MODE_NEON |
| select CRYPTO_BLKCIPHER |
| - select CRYPTO_CHACHA20 |
| + select CRYPTO_LIB_CHACHA_GENERIC |
| |
| config CRYPTO_NHPOLY1305_NEON |
| tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index d4cc61bfe79d..cae2cb92eca8 100644 |
| |
| |
| @@ -68,7 +68,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req, |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| |
| - crypto_chacha_init(state, ctx, iv); |
| + chacha_init_generic(state, ctx->key, iv); |
| |
| while (walk.nbytes > 0) { |
| unsigned int nbytes = walk.nbytes; |
| @@ -76,10 +76,16 @@ static int chacha_neon_stream_xor(struct skcipher_request *req, |
| if (nbytes < walk.total) |
| nbytes = rounddown(nbytes, walk.stride); |
| |
| - kernel_neon_begin(); |
| - chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, |
| - nbytes, ctx->nrounds); |
| - kernel_neon_end(); |
| + if (!crypto_simd_usable()) { |
| + chacha_crypt_generic(state, walk.dst.virt.addr, |
| + walk.src.virt.addr, nbytes, |
| + ctx->nrounds); |
| + } else { |
| + kernel_neon_begin(); |
| + chacha_doneon(state, walk.dst.virt.addr, |
| + walk.src.virt.addr, nbytes, ctx->nrounds); |
| + kernel_neon_end(); |
| + } |
| err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| } |
| |
| @@ -91,9 +97,6 @@ static int chacha_neon(struct skcipher_request *req) |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| |
| - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| - return crypto_chacha_crypt(req); |
| - |
| return chacha_neon_stream_xor(req, ctx, req->iv); |
| } |
| |
| @@ -105,14 +108,15 @@ static int xchacha_neon(struct skcipher_request *req) |
| u32 state[16]; |
| u8 real_iv[16]; |
| |
| - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| - return crypto_xchacha_crypt(req); |
| + chacha_init_generic(state, ctx->key, req->iv); |
| |
| - crypto_chacha_init(state, ctx, req->iv); |
| - |
| - kernel_neon_begin(); |
| - hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| - kernel_neon_end(); |
| + if (crypto_simd_usable()) { |
| + kernel_neon_begin(); |
| + hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| + kernel_neon_end(); |
| + } else { |
| + hchacha_block_generic(state, subctx.key, ctx->nrounds); |
| + } |
| subctx.nrounds = ctx->nrounds; |
| |
| memcpy(&real_iv[0], req->iv + 24, 8); |
| @@ -134,7 +138,7 @@ static struct skcipher_alg algs[] = { |
| .ivsize = CHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| .walksize = 5 * CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| + .setkey = chacha20_setkey, |
| .encrypt = chacha_neon, |
| .decrypt = chacha_neon, |
| }, { |
| @@ -150,7 +154,7 @@ static struct skcipher_alg algs[] = { |
| .ivsize = XCHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| .walksize = 5 * CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| + .setkey = chacha20_setkey, |
| .encrypt = xchacha_neon, |
| .decrypt = xchacha_neon, |
| }, { |
| @@ -166,7 +170,7 @@ static struct skcipher_alg algs[] = { |
| .ivsize = XCHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| .walksize = 5 * CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha12_setkey, |
| + .setkey = chacha12_setkey, |
| .encrypt = xchacha_neon, |
| .decrypt = xchacha_neon, |
| } |
| -- |
| 2.18.2 |
| |
| |
| From 9612659d26c1a94ab65d28c6e95e8efabc44555c Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:12 +0100 |
| Subject: [PATCH 006/100] crypto: arm64/chacha - expose arm64 ChaCha routine as |
| library function |
| |
| commit b3aad5bad26a01a4bd8c49a5c5f52aec665f3b7c upstream. |
| |
| Expose the accelerated NEON ChaCha routine directly as a symbol |
| export so that users of the ChaCha library API can use it directly. |
| |
| Given that calls into the library API will always go through the |
| routines in this module if it is enabled, switch to static keys |
| to select the optimal implementation available (which may be none |
| at all, in which case we defer to the generic implementation for |
| all invocations). |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm64/crypto/Kconfig | 1 + |
| arch/arm64/crypto/chacha-neon-glue.c | 53 ++++++++++++++++++++++------ |
| 2 files changed, 43 insertions(+), 11 deletions(-) |
| |
| diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig |
| index fdf52d5f18f9..17bada4b9dd2 100644 |
| |
| |
| @@ -104,6 +104,7 @@ config CRYPTO_CHACHA20_NEON |
| depends on KERNEL_MODE_NEON |
| select CRYPTO_BLKCIPHER |
| select CRYPTO_LIB_CHACHA_GENERIC |
| + select CRYPTO_ARCH_HAVE_LIB_CHACHA |
| |
| config CRYPTO_NHPOLY1305_NEON |
| tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index cae2cb92eca8..46cd4297761c 100644 |
| |
| |
| @@ -23,6 +23,7 @@ |
| #include <crypto/internal/chacha.h> |
| #include <crypto/internal/simd.h> |
| #include <crypto/internal/skcipher.h> |
| +#include <linux/jump_label.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| |
| @@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src, |
| int nrounds, int bytes); |
| asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
| + |
| static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| int bytes, int nrounds) |
| { |
| @@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| } |
| } |
| |
| +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
| +{ |
| + if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { |
| + hchacha_block_generic(state, stream, nrounds); |
| + } else { |
| + kernel_neon_begin(); |
| + hchacha_block_neon(state, stream, nrounds); |
| + kernel_neon_end(); |
| + } |
| +} |
| +EXPORT_SYMBOL(hchacha_block_arch); |
| + |
| +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| +{ |
| + chacha_init_generic(state, key, iv); |
| +} |
| +EXPORT_SYMBOL(chacha_init_arch); |
| + |
| +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| + int nrounds) |
| +{ |
| + if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || |
| + !crypto_simd_usable()) |
| + return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
| + |
| + kernel_neon_begin(); |
| + chacha_doneon(state, dst, src, bytes, nrounds); |
| + kernel_neon_end(); |
| +} |
| +EXPORT_SYMBOL(chacha_crypt_arch); |
| + |
| static int chacha_neon_stream_xor(struct skcipher_request *req, |
| const struct chacha_ctx *ctx, const u8 *iv) |
| { |
| @@ -76,7 +110,8 @@ static int chacha_neon_stream_xor(struct skcipher_request *req, |
| if (nbytes < walk.total) |
| nbytes = rounddown(nbytes, walk.stride); |
| |
| - if (!crypto_simd_usable()) { |
| + if (!static_branch_likely(&have_neon) || |
| + !crypto_simd_usable()) { |
| chacha_crypt_generic(state, walk.dst.virt.addr, |
| walk.src.virt.addr, nbytes, |
| ctx->nrounds); |
| @@ -109,14 +144,7 @@ static int xchacha_neon(struct skcipher_request *req) |
| u8 real_iv[16]; |
| |
| chacha_init_generic(state, ctx->key, req->iv); |
| - |
| - if (crypto_simd_usable()) { |
| - kernel_neon_begin(); |
| - hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| - kernel_neon_end(); |
| - } else { |
| - hchacha_block_generic(state, subctx.key, ctx->nrounds); |
| - } |
| + hchacha_block_arch(state, subctx.key, ctx->nrounds); |
| subctx.nrounds = ctx->nrounds; |
| |
| memcpy(&real_iv[0], req->iv + 24, 8); |
| @@ -179,14 +207,17 @@ static struct skcipher_alg algs[] = { |
| static int __init chacha_simd_mod_init(void) |
| { |
| if (!cpu_have_named_feature(ASIMD)) |
| - return -ENODEV; |
| + return 0; |
| + |
| + static_branch_enable(&have_neon); |
| |
| return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| static void __exit chacha_simd_mod_fini(void) |
| { |
| - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| + if (cpu_have_named_feature(ASIMD)) |
| + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| module_init(chacha_simd_mod_init); |
| -- |
| 2.18.2 |
| |
| |
| From eb04a17f747282ec72b45430b18595e44ace9fac Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:13 +0100 |
| Subject: [PATCH 007/100] crypto: arm/chacha - import Eric Biggers's scalar |
| accelerated ChaCha code |
| |
| commit 29621d099f9c642b22a69dc8e7e20c108473a392 upstream. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/chacha-scalar-core.S | 461 +++++++++++++++++++++++++++ |
| 1 file changed, 461 insertions(+) |
| create mode 100644 arch/arm/crypto/chacha-scalar-core.S |
| |
| diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S |
| new file mode 100644 |
| index 000000000000..2140319b64a0 |
| |
| |
| @@ -0,0 +1,461 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2018 Google, Inc. |
| + */ |
| + |
| +#include <linux/linkage.h> |
| +#include <asm/assembler.h> |
| + |
| +/* |
| + * Design notes: |
| + * |
| + * 16 registers would be needed to hold the state matrix, but only 14 are |
| + * available because 'sp' and 'pc' cannot be used. So we spill the elements |
| + * (x8, x9) to the stack and swap them out with (x10, x11). This adds one |
| + * 'ldrd' and one 'strd' instruction per round. |
| + * |
| + * All rotates are performed using the implicit rotate operand accepted by the |
| + * 'add' and 'eor' instructions. This is faster than using explicit rotate |
| + * instructions. To make this work, we allow the values in the second and last |
| + * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the |
| + * wrong rotation amount. The rotation amount is then fixed up just in time |
| + * when the values are used. 'brot' is the number of bits the values in row 'b' |
| + * need to be rotated right to arrive at the correct values, and 'drot' |
| + * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such |
| + * that they end up as (25, 24) after every round. |
| + */ |
| + |
| + // ChaCha state registers |
| + X0 .req r0 |
| + X1 .req r1 |
| + X2 .req r2 |
| + X3 .req r3 |
| + X4 .req r4 |
| + X5 .req r5 |
| + X6 .req r6 |
| + X7 .req r7 |
| + X8_X10 .req r8 // shared by x8 and x10 |
| + X9_X11 .req r9 // shared by x9 and x11 |
| + X12 .req r10 |
| + X13 .req r11 |
| + X14 .req r12 |
| + X15 .req r14 |
| + |
| +.Lexpand_32byte_k: |
| + // "expand 32-byte k" |
| + .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 |
| + |
| +#ifdef __thumb2__ |
| +# define adrl adr |
| +#endif |
| + |
| +.macro __rev out, in, t0, t1, t2 |
| +.if __LINUX_ARM_ARCH__ >= 6 |
| + rev \out, \in |
| +.else |
| + lsl \t0, \in, #24 |
| + and \t1, \in, #0xff00 |
| + and \t2, \in, #0xff0000 |
| + orr \out, \t0, \in, lsr #24 |
| + orr \out, \out, \t1, lsl #8 |
| + orr \out, \out, \t2, lsr #8 |
| +.endif |
| +.endm |
| + |
| +.macro _le32_bswap x, t0, t1, t2 |
| +#ifdef __ARMEB__ |
| + __rev \x, \x, \t0, \t1, \t2 |
| +#endif |
| +.endm |
| + |
| +.macro _le32_bswap_4x a, b, c, d, t0, t1, t2 |
| + _le32_bswap \a, \t0, \t1, \t2 |
| + _le32_bswap \b, \t0, \t1, \t2 |
| + _le32_bswap \c, \t0, \t1, \t2 |
| + _le32_bswap \d, \t0, \t1, \t2 |
| +.endm |
| + |
| +.macro __ldrd a, b, src, offset |
| +#if __LINUX_ARM_ARCH__ >= 6 |
| + ldrd \a, \b, [\src, #\offset] |
| +#else |
| + ldr \a, [\src, #\offset] |
| + ldr \b, [\src, #\offset + 4] |
| +#endif |
| +.endm |
| + |
| +.macro __strd a, b, dst, offset |
| +#if __LINUX_ARM_ARCH__ >= 6 |
| + strd \a, \b, [\dst, #\offset] |
| +#else |
| + str \a, [\dst, #\offset] |
| + str \b, [\dst, #\offset + 4] |
| +#endif |
| +.endm |
| + |
| +.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2 |
| + |
| + // a += b; d ^= a; d = rol(d, 16); |
| + add \a1, \a1, \b1, ror #brot |
| + add \a2, \a2, \b2, ror #brot |
| + eor \d1, \a1, \d1, ror #drot |
| + eor \d2, \a2, \d2, ror #drot |
| + // drot == 32 - 16 == 16 |
| + |
| + // c += d; b ^= c; b = rol(b, 12); |
| + add \c1, \c1, \d1, ror #16 |
| + add \c2, \c2, \d2, ror #16 |
| + eor \b1, \c1, \b1, ror #brot |
| + eor \b2, \c2, \b2, ror #brot |
| + // brot == 32 - 12 == 20 |
| + |
| + // a += b; d ^= a; d = rol(d, 8); |
| + add \a1, \a1, \b1, ror #20 |
| + add \a2, \a2, \b2, ror #20 |
| + eor \d1, \a1, \d1, ror #16 |
| + eor \d2, \a2, \d2, ror #16 |
| + // drot == 32 - 8 == 24 |
| + |
| + // c += d; b ^= c; b = rol(b, 7); |
| + add \c1, \c1, \d1, ror #24 |
| + add \c2, \c2, \d2, ror #24 |
| + eor \b1, \c1, \b1, ror #20 |
| + eor \b2, \c2, \b2, ror #20 |
| + // brot == 32 - 7 == 25 |
| +.endm |
| + |
| +.macro _doubleround |
| + |
| + // column round |
| + |
| + // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13) |
| + _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13 |
| + |
| + // save (x8, x9); restore (x10, x11) |
| + __strd X8_X10, X9_X11, sp, 0 |
| + __ldrd X8_X10, X9_X11, sp, 8 |
| + |
| + // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15) |
| + _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15 |
| + |
| + .set brot, 25 |
| + .set drot, 24 |
| + |
| + // diagonal round |
| + |
| + // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12) |
| + _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12 |
| + |
| + // save (x10, x11); restore (x8, x9) |
| + __strd X8_X10, X9_X11, sp, 8 |
| + __ldrd X8_X10, X9_X11, sp, 0 |
| + |
| + // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14) |
| + _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14 |
| +.endm |
| + |
| +.macro _chacha_permute nrounds |
| + .set brot, 0 |
| + .set drot, 0 |
| + .rept \nrounds / 2 |
| + _doubleround |
| + .endr |
| +.endm |
| + |
| +.macro _chacha nrounds |
| + |
| +.Lnext_block\@: |
| + // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN |
| + // Registers contain x0-x9,x12-x15. |
| + |
| + // Do the core ChaCha permutation to update x0-x15. |
| + _chacha_permute \nrounds |
| + |
| + add sp, #8 |
| + // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN |
| + // Registers contain x0-x9,x12-x15. |
| + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. |
| + |
| + // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15). |
| + push {X8_X10, X9_X11, X12, X13, X14, X15} |
| + |
| + // Load (OUT, IN, LEN). |
| + ldr r14, [sp, #96] |
| + ldr r12, [sp, #100] |
| + ldr r11, [sp, #104] |
| + |
| + orr r10, r14, r12 |
| + |
| + // Use slow path if fewer than 64 bytes remain. |
| + cmp r11, #64 |
| + blt .Lxor_slowpath\@ |
| + |
| + // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on |
| + // ARMv6+, since ldmia and stmia (used below) still require alignment. |
| + tst r10, #3 |
| + bne .Lxor_slowpath\@ |
| + |
| + // Fast path: XOR 64 bytes of aligned data. |
| + |
| + // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN |
| + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT. |
| + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. |
| + |
| + // x0-x3 |
| + __ldrd r8, r9, sp, 32 |
| + __ldrd r10, r11, sp, 40 |
| + add X0, X0, r8 |
| + add X1, X1, r9 |
| + add X2, X2, r10 |
| + add X3, X3, r11 |
| + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 |
| + ldmia r12!, {r8-r11} |
| + eor X0, X0, r8 |
| + eor X1, X1, r9 |
| + eor X2, X2, r10 |
| + eor X3, X3, r11 |
| + stmia r14!, {X0-X3} |
| + |
| + // x4-x7 |
| + __ldrd r8, r9, sp, 48 |
| + __ldrd r10, r11, sp, 56 |
| + add X4, r8, X4, ror #brot |
| + add X5, r9, X5, ror #brot |
| + ldmia r12!, {X0-X3} |
| + add X6, r10, X6, ror #brot |
| + add X7, r11, X7, ror #brot |
| + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 |
| + eor X4, X4, X0 |
| + eor X5, X5, X1 |
| + eor X6, X6, X2 |
| + eor X7, X7, X3 |
| + stmia r14!, {X4-X7} |
| + |
| + // x8-x15 |
| + pop {r0-r7} // (x8-x9,x12-x15,x10-x11) |
| + __ldrd r8, r9, sp, 32 |
| + __ldrd r10, r11, sp, 40 |
| + add r0, r0, r8 // x8 |
| + add r1, r1, r9 // x9 |
| + add r6, r6, r10 // x10 |
| + add r7, r7, r11 // x11 |
| + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 |
| + ldmia r12!, {r8-r11} |
| + eor r0, r0, r8 // x8 |
| + eor r1, r1, r9 // x9 |
| + eor r6, r6, r10 // x10 |
| + eor r7, r7, r11 // x11 |
| + stmia r14!, {r0,r1,r6,r7} |
| + ldmia r12!, {r0,r1,r6,r7} |
| + __ldrd r8, r9, sp, 48 |
| + __ldrd r10, r11, sp, 56 |
| + add r2, r8, r2, ror #drot // x12 |
| + add r3, r9, r3, ror #drot // x13 |
| + add r4, r10, r4, ror #drot // x14 |
| + add r5, r11, r5, ror #drot // x15 |
| + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 |
| + ldr r9, [sp, #72] // load LEN |
| + eor r2, r2, r0 // x12 |
| + eor r3, r3, r1 // x13 |
| + eor r4, r4, r6 // x14 |
| + eor r5, r5, r7 // x15 |
| + subs r9, #64 // decrement and check LEN |
| + stmia r14!, {r2-r5} |
| + |
| + beq .Ldone\@ |
| + |
| +.Lprepare_for_next_block\@: |
| + |
| + // Stack: x0-x15 OUT IN LEN |
| + |
| + // Increment block counter (x12) |
| + add r8, #1 |
| + |
| + // Store updated (OUT, IN, LEN) |
| + str r14, [sp, #64] |
| + str r12, [sp, #68] |
| + str r9, [sp, #72] |
| + |
| + mov r14, sp |
| + |
| + // Store updated block counter (x12) |
| + str r8, [sp, #48] |
| + |
| + sub sp, #16 |
| + |
| + // Reload state and do next block |
| + ldmia r14!, {r0-r11} // load x0-x11 |
| + __strd r10, r11, sp, 8 // store x10-x11 before state |
| + ldmia r14, {r10-r12,r14} // load x12-x15 |
| + b .Lnext_block\@ |
| + |
| +.Lxor_slowpath\@: |
| + // Slow path: < 64 bytes remaining, or unaligned input or output buffer. |
| + // We handle it by storing the 64 bytes of keystream to the stack, then |
| + // XOR-ing the needed portion with the data. |
| + |
| + // Allocate keystream buffer |
| + sub sp, #64 |
| + mov r14, sp |
| + |
| + // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN |
| + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0. |
| + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. |
| + |
| + // Save keystream for x0-x3 |
| + __ldrd r8, r9, sp, 96 |
| + __ldrd r10, r11, sp, 104 |
| + add X0, X0, r8 |
| + add X1, X1, r9 |
| + add X2, X2, r10 |
| + add X3, X3, r11 |
| + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 |
| + stmia r14!, {X0-X3} |
| + |
| + // Save keystream for x4-x7 |
| + __ldrd r8, r9, sp, 112 |
| + __ldrd r10, r11, sp, 120 |
| + add X4, r8, X4, ror #brot |
| + add X5, r9, X5, ror #brot |
| + add X6, r10, X6, ror #brot |
| + add X7, r11, X7, ror #brot |
| + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 |
| + add r8, sp, #64 |
| + stmia r14!, {X4-X7} |
| + |
| + // Save keystream for x8-x15 |
| + ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11) |
| + __ldrd r8, r9, sp, 128 |
| + __ldrd r10, r11, sp, 136 |
| + add r0, r0, r8 // x8 |
| + add r1, r1, r9 // x9 |
| + add r6, r6, r10 // x10 |
| + add r7, r7, r11 // x11 |
| + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 |
| + stmia r14!, {r0,r1,r6,r7} |
| + __ldrd r8, r9, sp, 144 |
| + __ldrd r10, r11, sp, 152 |
| + add r2, r8, r2, ror #drot // x12 |
| + add r3, r9, r3, ror #drot // x13 |
| + add r4, r10, r4, ror #drot // x14 |
| + add r5, r11, r5, ror #drot // x15 |
| + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 |
| + stmia r14, {r2-r5} |
| + |
| + // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN |
| + // Registers: r8 is block counter, r12 is IN. |
| + |
| + ldr r9, [sp, #168] // LEN |
| + ldr r14, [sp, #160] // OUT |
| + cmp r9, #64 |
| + mov r0, sp |
| + movle r1, r9 |
| + movgt r1, #64 |
| + // r1 is number of bytes to XOR, in range [1, 64] |
| + |
| +.if __LINUX_ARM_ARCH__ < 6 |
| + orr r2, r12, r14 |
| + tst r2, #3 // IN or OUT misaligned? |
| + bne .Lxor_next_byte\@ |
| +.endif |
| + |
| + // XOR a word at a time |
| +.rept 16 |
| + subs r1, #4 |
| + blt .Lxor_words_done\@ |
| + ldr r2, [r12], #4 |
| + ldr r3, [r0], #4 |
| + eor r2, r2, r3 |
| + str r2, [r14], #4 |
| +.endr |
| + b .Lxor_slowpath_done\@ |
| +.Lxor_words_done\@: |
| + ands r1, r1, #3 |
| + beq .Lxor_slowpath_done\@ |
| + |
| + // XOR a byte at a time |
| +.Lxor_next_byte\@: |
| + ldrb r2, [r12], #1 |
| + ldrb r3, [r0], #1 |
| + eor r2, r2, r3 |
| + strb r2, [r14], #1 |
| + subs r1, #1 |
| + bne .Lxor_next_byte\@ |
| + |
| +.Lxor_slowpath_done\@: |
| + subs r9, #64 |
| + add sp, #96 |
| + bgt .Lprepare_for_next_block\@ |
| + |
| +.Ldone\@: |
| +.endm // _chacha |
| + |
| +/* |
| + * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], |
| + * const u32 iv[4]); |
| + */ |
| +ENTRY(chacha20_arm) |
| + cmp r2, #0 // len == 0? |
| + reteq lr |
| + |
| + push {r0-r2,r4-r11,lr} |
| + |
| + // Push state x0-x15 onto stack. |
| + // Also store an extra copy of x10-x11 just before the state. |
| + |
| + ldr r4, [sp, #48] // iv |
| + mov r0, sp |
| + sub sp, #80 |
| + |
| + // iv: x12-x15 |
| + ldm r4, {X12,X13,X14,X15} |
| + stmdb r0!, {X12,X13,X14,X15} |
| + |
| + // key: x4-x11 |
| + __ldrd X8_X10, X9_X11, r3, 24 |
| + __strd X8_X10, X9_X11, sp, 8 |
| + stmdb r0!, {X8_X10, X9_X11} |
| + ldm r3, {X4-X9_X11} |
| + stmdb r0!, {X4-X9_X11} |
| + |
| + // constants: x0-x3 |
| + adrl X3, .Lexpand_32byte_k |
| + ldm X3, {X0-X3} |
| + __strd X0, X1, sp, 16 |
| + __strd X2, X3, sp, 24 |
| + |
| + _chacha 20 |
| + |
| + add sp, #76 |
| + pop {r4-r11, pc} |
| +ENDPROC(chacha20_arm) |
| + |
| +/* |
| + * void hchacha20_arm(const u32 state[16], u32 out[8]); |
| + */ |
| +ENTRY(hchacha20_arm) |
| + push {r1,r4-r11,lr} |
| + |
| + mov r14, r0 |
| + ldmia r14!, {r0-r11} // load x0-x11 |
| + push {r10-r11} // store x10-x11 to stack |
| + ldm r14, {r10-r12,r14} // load x12-x15 |
| + sub sp, #8 |
| + |
| + _chacha_permute 20 |
| + |
| + // Skip over (unused0-unused1, x10-x11) |
| + add sp, #16 |
| + |
| + // Fix up rotations of x12-x15 |
| + ror X12, X12, #drot |
| + ror X13, X13, #drot |
| + pop {r4} // load 'out' |
| + ror X14, X14, #drot |
| + ror X15, X15, #drot |
| + |
| + // Store (x0-x3,x12-x15) to 'out' |
| + stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} |
| + |
| + pop {r4-r11,pc} |
| +ENDPROC(hchacha20_arm) |
| -- |
| 2.18.2 |
| |
| |
| From 6240a7104c8ec4bf92a7c5b7554f2f504f6c8bfe Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:14 +0100 |
| Subject: [PATCH 008/100] crypto: arm/chacha - remove dependency on generic |
| ChaCha driver |
| |
| commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream. |
| |
| Instead of falling back to the generic ChaCha skcipher driver for |
| non-SIMD cases, use a fast scalar implementation for ARM authored |
| by Eric Biggers. This removes the module dependency on chacha-generic |
| altogether, which also simplifies things when we expose the ChaCha |
| library interface from this module. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/Kconfig | 4 +- |
| arch/arm/crypto/Makefile | 3 +- |
| arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++ |
| arch/arm/crypto/chacha-neon-glue.c | 202 ------------------ |
| arch/arm/crypto/chacha-scalar-core.S | 65 +++--- |
| arch/arm64/crypto/chacha-neon-glue.c | 2 +- |
| 6 files changed, 340 insertions(+), 240 deletions(-) |
| create mode 100644 arch/arm/crypto/chacha-glue.c |
| delete mode 100644 arch/arm/crypto/chacha-neon-glue.c |
| |
| diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig |
| index 043b0b18bf7e..cee414afeabc 100644 |
| |
| |
| @@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE |
| select CRYPTO_HASH |
| |
| config CRYPTO_CHACHA20_NEON |
| - tristate "NEON accelerated ChaCha stream cipher algorithms" |
| - depends on KERNEL_MODE_NEON |
| + tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" |
| select CRYPTO_BLKCIPHER |
| - select CRYPTO_CHACHA20 |
| |
| config CRYPTO_NHPOLY1305_NEON |
| tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" |
| diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile |
| index 4180f3a13512..6b97dffcf90f 100644 |
| |
| |
| @@ -53,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o |
| ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o |
| crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o |
| crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o |
| -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o |
| +chacha-neon-y := chacha-scalar-core.o chacha-glue.o |
| +chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o |
| nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o |
| |
| ifdef REGENERATE_ARM_CRYPTO |
| diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c |
| new file mode 100644 |
| index 000000000000..eb40efb3eb34 |
| |
| |
| @@ -0,0 +1,304 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| + * including ChaCha20 (RFC7539) |
| + * |
| + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| + * Copyright (C) 2015 Martin Willi |
| + */ |
| + |
| +#include <crypto/algapi.h> |
| +#include <crypto/internal/chacha.h> |
| +#include <crypto/internal/simd.h> |
| +#include <crypto/internal/skcipher.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| + |
| +#include <asm/cputype.h> |
| +#include <asm/hwcap.h> |
| +#include <asm/neon.h> |
| +#include <asm/simd.h> |
| + |
| +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| + int nrounds); |
| +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| + int nrounds); |
| +asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); |
| +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| + |
| +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| + const u32 *state, int nrounds); |
| + |
| +static inline bool neon_usable(void) |
| +{ |
| + return crypto_simd_usable(); |
| +} |
| + |
| +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes, int nrounds) |
| +{ |
| + u8 buf[CHACHA_BLOCK_SIZE]; |
| + |
| + while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| + chacha_4block_xor_neon(state, dst, src, nrounds); |
| + bytes -= CHACHA_BLOCK_SIZE * 4; |
| + src += CHACHA_BLOCK_SIZE * 4; |
| + dst += CHACHA_BLOCK_SIZE * 4; |
| + state[12] += 4; |
| + } |
| + while (bytes >= CHACHA_BLOCK_SIZE) { |
| + chacha_block_xor_neon(state, dst, src, nrounds); |
| + bytes -= CHACHA_BLOCK_SIZE; |
| + src += CHACHA_BLOCK_SIZE; |
| + dst += CHACHA_BLOCK_SIZE; |
| + state[12]++; |
| + } |
| + if (bytes) { |
| + memcpy(buf, src, bytes); |
| + chacha_block_xor_neon(state, buf, buf, nrounds); |
| + memcpy(dst, buf, bytes); |
| + } |
| +} |
| + |
| +static int chacha_stream_xor(struct skcipher_request *req, |
| + const struct chacha_ctx *ctx, const u8 *iv, |
| + bool neon) |
| +{ |
| + struct skcipher_walk walk; |
| + u32 state[16]; |
| + int err; |
| + |
| + err = skcipher_walk_virt(&walk, req, false); |
| + |
| + chacha_init_generic(state, ctx->key, iv); |
| + |
| + while (walk.nbytes > 0) { |
| + unsigned int nbytes = walk.nbytes; |
| + |
| + if (nbytes < walk.total) |
| + nbytes = round_down(nbytes, walk.stride); |
| + |
| + if (!neon) { |
| + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
| + nbytes, state, ctx->nrounds); |
| + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); |
| + } else { |
| + kernel_neon_begin(); |
| + chacha_doneon(state, walk.dst.virt.addr, |
| + walk.src.virt.addr, nbytes, ctx->nrounds); |
| + kernel_neon_end(); |
| + } |
| + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| + } |
| + |
| + return err; |
| +} |
| + |
| +static int do_chacha(struct skcipher_request *req, bool neon) |
| +{ |
| + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| + |
| + return chacha_stream_xor(req, ctx, req->iv, neon); |
| +} |
| + |
| +static int chacha_arm(struct skcipher_request *req) |
| +{ |
| + return do_chacha(req, false); |
| +} |
| + |
| +static int chacha_neon(struct skcipher_request *req) |
| +{ |
| + return do_chacha(req, neon_usable()); |
| +} |
| + |
| +static int do_xchacha(struct skcipher_request *req, bool neon) |
| +{ |
| + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| + struct chacha_ctx subctx; |
| + u32 state[16]; |
| + u8 real_iv[16]; |
| + |
| + chacha_init_generic(state, ctx->key, req->iv); |
| + |
| + if (!neon) { |
| + hchacha_block_arm(state, subctx.key, ctx->nrounds); |
| + } else { |
| + kernel_neon_begin(); |
| + hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| + kernel_neon_end(); |
| + } |
| + subctx.nrounds = ctx->nrounds; |
| + |
| + memcpy(&real_iv[0], req->iv + 24, 8); |
| + memcpy(&real_iv[8], req->iv + 16, 8); |
| + return chacha_stream_xor(req, &subctx, real_iv, neon); |
| +} |
| + |
| +static int xchacha_arm(struct skcipher_request *req) |
| +{ |
| + return do_xchacha(req, false); |
| +} |
| + |
| +static int xchacha_neon(struct skcipher_request *req) |
| +{ |
| + return do_xchacha(req, neon_usable()); |
| +} |
| + |
| +static struct skcipher_alg arm_algs[] = { |
| + { |
| + .base.cra_name = "chacha20", |
| + .base.cra_driver_name = "chacha20-arm", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = CHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .setkey = chacha20_setkey, |
| + .encrypt = chacha_arm, |
| + .decrypt = chacha_arm, |
| + }, { |
| + .base.cra_name = "xchacha20", |
| + .base.cra_driver_name = "xchacha20-arm", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = XCHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .setkey = chacha20_setkey, |
| + .encrypt = xchacha_arm, |
| + .decrypt = xchacha_arm, |
| + }, { |
| + .base.cra_name = "xchacha12", |
| + .base.cra_driver_name = "xchacha12-arm", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = XCHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .setkey = chacha12_setkey, |
| + .encrypt = xchacha_arm, |
| + .decrypt = xchacha_arm, |
| + }, |
| +}; |
| + |
| +static struct skcipher_alg neon_algs[] = { |
| + { |
| + .base.cra_name = "chacha20", |
| + .base.cra_driver_name = "chacha20-neon", |
| + .base.cra_priority = 300, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = CHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .walksize = 4 * CHACHA_BLOCK_SIZE, |
| + .setkey = chacha20_setkey, |
| + .encrypt = chacha_neon, |
| + .decrypt = chacha_neon, |
| + }, { |
| + .base.cra_name = "xchacha20", |
| + .base.cra_driver_name = "xchacha20-neon", |
| + .base.cra_priority = 300, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = XCHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .walksize = 4 * CHACHA_BLOCK_SIZE, |
| + .setkey = chacha20_setkey, |
| + .encrypt = xchacha_neon, |
| + .decrypt = xchacha_neon, |
| + }, { |
| + .base.cra_name = "xchacha12", |
| + .base.cra_driver_name = "xchacha12-neon", |
| + .base.cra_priority = 300, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = XCHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .walksize = 4 * CHACHA_BLOCK_SIZE, |
| + .setkey = chacha12_setkey, |
| + .encrypt = xchacha_neon, |
| + .decrypt = xchacha_neon, |
| + } |
| +}; |
| + |
| +static int __init chacha_simd_mod_init(void) |
| +{ |
| + int err; |
| + |
| + err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + if (err) |
| + return err; |
| + |
| + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { |
| + int i; |
| + |
| + switch (read_cpuid_part()) { |
| + case ARM_CPU_PART_CORTEX_A7: |
| + case ARM_CPU_PART_CORTEX_A5: |
| + /* |
| + * The Cortex-A7 and Cortex-A5 do not perform well with |
| + * the NEON implementation but do incredibly with the |
| + * scalar one and use less power. |
| + */ |
| + for (i = 0; i < ARRAY_SIZE(neon_algs); i++) |
| + neon_algs[i].base.cra_priority = 0; |
| + break; |
| + } |
| + |
| + err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| + if (err) |
| + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + } |
| + return err; |
| +} |
| + |
| +static void __exit chacha_simd_mod_fini(void) |
| +{ |
| + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) |
| + crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| +} |
| + |
| +module_init(chacha_simd_mod_init); |
| +module_exit(chacha_simd_mod_fini); |
| + |
| +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); |
| +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_ALIAS_CRYPTO("chacha20"); |
| +MODULE_ALIAS_CRYPTO("chacha20-arm"); |
| +MODULE_ALIAS_CRYPTO("xchacha20"); |
| +MODULE_ALIAS_CRYPTO("xchacha20-arm"); |
| +MODULE_ALIAS_CRYPTO("xchacha12"); |
| +MODULE_ALIAS_CRYPTO("xchacha12-arm"); |
| +#ifdef CONFIG_KERNEL_MODE_NEON |
| +MODULE_ALIAS_CRYPTO("chacha20-neon"); |
| +MODULE_ALIAS_CRYPTO("xchacha20-neon"); |
| +MODULE_ALIAS_CRYPTO("xchacha12-neon"); |
| +#endif |
| diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c |
| deleted file mode 100644 |
| index 26576772f18b..000000000000 |
| |
| |
| @@ -1,202 +0,0 @@ |
| -/* |
| - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| - * including ChaCha20 (RFC7539) |
| - * |
| - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| - * |
| - * This program is free software; you can redistribute it and/or modify |
| - * it under the terms of the GNU General Public License version 2 as |
| - * published by the Free Software Foundation. |
| - * |
| - * Based on: |
| - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code |
| - * |
| - * Copyright (C) 2015 Martin Willi |
| - * |
| - * This program is free software; you can redistribute it and/or modify |
| - * it under the terms of the GNU General Public License as published by |
| - * the Free Software Foundation; either version 2 of the License, or |
| - * (at your option) any later version. |
| - */ |
| - |
| -#include <crypto/algapi.h> |
| -#include <crypto/internal/chacha.h> |
| -#include <crypto/internal/simd.h> |
| -#include <crypto/internal/skcipher.h> |
| -#include <linux/kernel.h> |
| -#include <linux/module.h> |
| - |
| -#include <asm/hwcap.h> |
| -#include <asm/neon.h> |
| -#include <asm/simd.h> |
| - |
| -asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| - int nrounds); |
| -asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| - int nrounds); |
| -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| - |
| -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| - unsigned int bytes, int nrounds) |
| -{ |
| - u8 buf[CHACHA_BLOCK_SIZE]; |
| - |
| - while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| - chacha_4block_xor_neon(state, dst, src, nrounds); |
| - bytes -= CHACHA_BLOCK_SIZE * 4; |
| - src += CHACHA_BLOCK_SIZE * 4; |
| - dst += CHACHA_BLOCK_SIZE * 4; |
| - state[12] += 4; |
| - } |
| - while (bytes >= CHACHA_BLOCK_SIZE) { |
| - chacha_block_xor_neon(state, dst, src, nrounds); |
| - bytes -= CHACHA_BLOCK_SIZE; |
| - src += CHACHA_BLOCK_SIZE; |
| - dst += CHACHA_BLOCK_SIZE; |
| - state[12]++; |
| - } |
| - if (bytes) { |
| - memcpy(buf, src, bytes); |
| - chacha_block_xor_neon(state, buf, buf, nrounds); |
| - memcpy(dst, buf, bytes); |
| - } |
| -} |
| - |
| -static int chacha_neon_stream_xor(struct skcipher_request *req, |
| - const struct chacha_ctx *ctx, const u8 *iv) |
| -{ |
| - struct skcipher_walk walk; |
| - u32 state[16]; |
| - int err; |
| - |
| - err = skcipher_walk_virt(&walk, req, false); |
| - |
| - crypto_chacha_init(state, ctx, iv); |
| - |
| - while (walk.nbytes > 0) { |
| - unsigned int nbytes = walk.nbytes; |
| - |
| - if (nbytes < walk.total) |
| - nbytes = round_down(nbytes, walk.stride); |
| - |
| - kernel_neon_begin(); |
| - chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, |
| - nbytes, ctx->nrounds); |
| - kernel_neon_end(); |
| - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| - } |
| - |
| - return err; |
| -} |
| - |
| -static int chacha_neon(struct skcipher_request *req) |
| -{ |
| - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| - |
| - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| - return crypto_chacha_crypt(req); |
| - |
| - return chacha_neon_stream_xor(req, ctx, req->iv); |
| -} |
| - |
| -static int xchacha_neon(struct skcipher_request *req) |
| -{ |
| - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| - struct chacha_ctx subctx; |
| - u32 state[16]; |
| - u8 real_iv[16]; |
| - |
| - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| - return crypto_xchacha_crypt(req); |
| - |
| - crypto_chacha_init(state, ctx, req->iv); |
| - |
| - kernel_neon_begin(); |
| - hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| - kernel_neon_end(); |
| - subctx.nrounds = ctx->nrounds; |
| - |
| - memcpy(&real_iv[0], req->iv + 24, 8); |
| - memcpy(&real_iv[8], req->iv + 16, 8); |
| - return chacha_neon_stream_xor(req, &subctx, real_iv); |
| -} |
| - |
| -static struct skcipher_alg algs[] = { |
| - { |
| - .base.cra_name = "chacha20", |
| - .base.cra_driver_name = "chacha20-neon", |
| - .base.cra_priority = 300, |
| - .base.cra_blocksize = 1, |
| - .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| - .base.cra_module = THIS_MODULE, |
| - |
| - .min_keysize = CHACHA_KEY_SIZE, |
| - .max_keysize = CHACHA_KEY_SIZE, |
| - .ivsize = CHACHA_IV_SIZE, |
| - .chunksize = CHACHA_BLOCK_SIZE, |
| - .walksize = 4 * CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| - .encrypt = chacha_neon, |
| - .decrypt = chacha_neon, |
| - }, { |
| - .base.cra_name = "xchacha20", |
| - .base.cra_driver_name = "xchacha20-neon", |
| - .base.cra_priority = 300, |
| - .base.cra_blocksize = 1, |
| - .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| - .base.cra_module = THIS_MODULE, |
| - |
| - .min_keysize = CHACHA_KEY_SIZE, |
| - .max_keysize = CHACHA_KEY_SIZE, |
| - .ivsize = XCHACHA_IV_SIZE, |
| - .chunksize = CHACHA_BLOCK_SIZE, |
| - .walksize = 4 * CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| - .encrypt = xchacha_neon, |
| - .decrypt = xchacha_neon, |
| - }, { |
| - .base.cra_name = "xchacha12", |
| - .base.cra_driver_name = "xchacha12-neon", |
| - .base.cra_priority = 300, |
| - .base.cra_blocksize = 1, |
| - .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| - .base.cra_module = THIS_MODULE, |
| - |
| - .min_keysize = CHACHA_KEY_SIZE, |
| - .max_keysize = CHACHA_KEY_SIZE, |
| - .ivsize = XCHACHA_IV_SIZE, |
| - .chunksize = CHACHA_BLOCK_SIZE, |
| - .walksize = 4 * CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha12_setkey, |
| - .encrypt = xchacha_neon, |
| - .decrypt = xchacha_neon, |
| - } |
| -}; |
| - |
| -static int __init chacha_simd_mod_init(void) |
| -{ |
| - if (!(elf_hwcap & HWCAP_NEON)) |
| - return -ENODEV; |
| - |
| - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| -} |
| - |
| -static void __exit chacha_simd_mod_fini(void) |
| -{ |
| - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| -} |
| - |
| -module_init(chacha_simd_mod_init); |
| -module_exit(chacha_simd_mod_fini); |
| - |
| -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); |
| -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| -MODULE_LICENSE("GPL v2"); |
| -MODULE_ALIAS_CRYPTO("chacha20"); |
| -MODULE_ALIAS_CRYPTO("chacha20-neon"); |
| -MODULE_ALIAS_CRYPTO("xchacha20"); |
| -MODULE_ALIAS_CRYPTO("xchacha20-neon"); |
| -MODULE_ALIAS_CRYPTO("xchacha12"); |
| -MODULE_ALIAS_CRYPTO("xchacha12-neon"); |
| diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S |
| index 2140319b64a0..2985b80a45b5 100644 |
| |
| |
| @@ -41,14 +41,6 @@ |
| X14 .req r12 |
| X15 .req r14 |
| |
| -.Lexpand_32byte_k: |
| - // "expand 32-byte k" |
| - .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 |
| - |
| -#ifdef __thumb2__ |
| -# define adrl adr |
| -#endif |
| - |
| .macro __rev out, in, t0, t1, t2 |
| .if __LINUX_ARM_ARCH__ >= 6 |
| rev \out, \in |
| @@ -391,61 +383,65 @@ |
| .endm // _chacha |
| |
| /* |
| - * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], |
| - * const u32 iv[4]); |
| + * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| + * const u32 *state, int nrounds); |
| */ |
| -ENTRY(chacha20_arm) |
| +ENTRY(chacha_doarm) |
| cmp r2, #0 // len == 0? |
| reteq lr |
| |
| + ldr ip, [sp] |
| + cmp ip, #12 |
| + |
| push {r0-r2,r4-r11,lr} |
| |
| // Push state x0-x15 onto stack. |
| // Also store an extra copy of x10-x11 just before the state. |
| |
| - ldr r4, [sp, #48] // iv |
| - mov r0, sp |
| - sub sp, #80 |
| - |
| - // iv: x12-x15 |
| - ldm r4, {X12,X13,X14,X15} |
| - stmdb r0!, {X12,X13,X14,X15} |
| + add X12, r3, #48 |
| + ldm X12, {X12,X13,X14,X15} |
| + push {X12,X13,X14,X15} |
| + sub sp, sp, #64 |
| |
| - // key: x4-x11 |
| - __ldrd X8_X10, X9_X11, r3, 24 |
| + __ldrd X8_X10, X9_X11, r3, 40 |
| __strd X8_X10, X9_X11, sp, 8 |
| - stmdb r0!, {X8_X10, X9_X11} |
| - ldm r3, {X4-X9_X11} |
| - stmdb r0!, {X4-X9_X11} |
| - |
| - // constants: x0-x3 |
| - adrl X3, .Lexpand_32byte_k |
| - ldm X3, {X0-X3} |
| + __strd X8_X10, X9_X11, sp, 56 |
| + ldm r3, {X0-X9_X11} |
| __strd X0, X1, sp, 16 |
| __strd X2, X3, sp, 24 |
| + __strd X4, X5, sp, 32 |
| + __strd X6, X7, sp, 40 |
| + __strd X8_X10, X9_X11, sp, 48 |
| |
| + beq 1f |
| _chacha 20 |
| |
| - add sp, #76 |
| +0: add sp, #76 |
| pop {r4-r11, pc} |
| -ENDPROC(chacha20_arm) |
| + |
| +1: _chacha 12 |
| + b 0b |
| +ENDPROC(chacha_doarm) |
| |
| /* |
| - * void hchacha20_arm(const u32 state[16], u32 out[8]); |
| + * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); |
| */ |
| -ENTRY(hchacha20_arm) |
| +ENTRY(hchacha_block_arm) |
| push {r1,r4-r11,lr} |
| |
| + cmp r2, #12 // ChaCha12 ? |
| + |
| mov r14, r0 |
| ldmia r14!, {r0-r11} // load x0-x11 |
| push {r10-r11} // store x10-x11 to stack |
| ldm r14, {r10-r12,r14} // load x12-x15 |
| sub sp, #8 |
| |
| + beq 1f |
| _chacha_permute 20 |
| |
| // Skip over (unused0-unused1, x10-x11) |
| - add sp, #16 |
| +0: add sp, #16 |
| |
| // Fix up rotations of x12-x15 |
| ror X12, X12, #drot |
| @@ -458,4 +454,7 @@ ENTRY(hchacha20_arm) |
| stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} |
| |
| pop {r4-r11,pc} |
| -ENDPROC(hchacha20_arm) |
| + |
| +1: _chacha_permute 12 |
| + b 0b |
| +ENDPROC(hchacha_block_arm) |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index 46cd4297761c..b08029d7bde6 100644 |
| |
| |
| @@ -1,5 +1,5 @@ |
| /* |
| - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| + * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, |
| * including ChaCha20 (RFC7539) |
| * |
| * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| -- |
| 2.18.2 |
| |
| |
| From d3944704ef3870fb15fc8801ef9a71f00b34babd Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:15 +0100 |
| Subject: [PATCH 009/100] crypto: arm/chacha - expose ARM ChaCha routine as |
| library function |
| |
| commit a44a3430d71bad4ee56788a59fff099b291ea54c upstream. |
| |
| Expose the accelerated NEON ChaCha routine directly as a symbol |
| export so that users of the ChaCha library API can use it directly. |
| |
| Given that calls into the library API will always go through the |
| routines in this module if it is enabled, switch to static keys |
| to select the optimal implementation available (which may be none |
| at all, in which case we defer to the generic implementation for |
| all invocations). |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/Kconfig | 1 + |
| arch/arm/crypto/chacha-glue.c | 41 ++++++++++++++++++++++++++++++++++- |
| 2 files changed, 41 insertions(+), 1 deletion(-) |
| |
| diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig |
| index cee414afeabc..b25ffec04417 100644 |
| |
| |
| @@ -129,6 +129,7 @@ config CRYPTO_CRC32_ARM_CE |
| config CRYPTO_CHACHA20_NEON |
| tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" |
| select CRYPTO_BLKCIPHER |
| + select CRYPTO_ARCH_HAVE_LIB_CHACHA |
| |
| config CRYPTO_NHPOLY1305_NEON |
| tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" |
| diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c |
| index eb40efb3eb34..3f0c057aa050 100644 |
| |
| |
| @@ -11,6 +11,7 @@ |
| #include <crypto/internal/chacha.h> |
| #include <crypto/internal/simd.h> |
| #include <crypto/internal/skcipher.h> |
| +#include <linux/jump_label.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| |
| @@ -29,9 +30,11 @@ asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| const u32 *state, int nrounds); |
| |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); |
| + |
| static inline bool neon_usable(void) |
| { |
| - return crypto_simd_usable(); |
| + return static_branch_likely(&use_neon) && crypto_simd_usable(); |
| } |
| |
| static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| @@ -60,6 +63,40 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| } |
| } |
| |
| +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
| +{ |
| + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { |
| + hchacha_block_arm(state, stream, nrounds); |
| + } else { |
| + kernel_neon_begin(); |
| + hchacha_block_neon(state, stream, nrounds); |
| + kernel_neon_end(); |
| + } |
| +} |
| +EXPORT_SYMBOL(hchacha_block_arch); |
| + |
| +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| +{ |
| + chacha_init_generic(state, key, iv); |
| +} |
| +EXPORT_SYMBOL(chacha_init_arch); |
| + |
| +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| + int nrounds) |
| +{ |
| + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || |
| + bytes <= CHACHA_BLOCK_SIZE) { |
| + chacha_doarm(dst, src, bytes, state, nrounds); |
| + state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); |
| + return; |
| + } |
| + |
| + kernel_neon_begin(); |
| + chacha_doneon(state, dst, src, bytes, nrounds); |
| + kernel_neon_end(); |
| +} |
| +EXPORT_SYMBOL(chacha_crypt_arch); |
| + |
| static int chacha_stream_xor(struct skcipher_request *req, |
| const struct chacha_ctx *ctx, const u8 *iv, |
| bool neon) |
| @@ -269,6 +306,8 @@ static int __init chacha_simd_mod_init(void) |
| for (i = 0; i < ARRAY_SIZE(neon_algs); i++) |
| neon_algs[i].base.cra_priority = 0; |
| break; |
| + default: |
| + static_branch_enable(&use_neon); |
| } |
| |
| err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| -- |
| 2.18.2 |
| |
| |
| From f2718bb79f639d39f1f2ec04b6746ed5e8e70af9 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:16 +0100 |
| Subject: [PATCH 010/100] crypto: mips/chacha - import 32r2 ChaCha code from |
| Zinc |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit 49aa7c00eddf8d8f462b0256bd82e81762d7b0c6 upstream. |
| |
| This imports the accelerated MIPS 32r2 ChaCha20 implementation from the |
| Zinc patch set. |
| |
| Co-developed-by: René van Dorst <opensource@vdorst.com> |
| Signed-off-by: René van Dorst <opensource@vdorst.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/mips/crypto/chacha-core.S | 424 +++++++++++++++++++++++++++++++++ |
| 1 file changed, 424 insertions(+) |
| create mode 100644 arch/mips/crypto/chacha-core.S |
| |
| diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S |
| new file mode 100644 |
| index 000000000000..a81e02db95e7 |
| |
| |
| @@ -0,0 +1,424 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| +/* |
| + * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#define MASK_U32 0x3c |
| +#define CHACHA20_BLOCK_SIZE 64 |
| +#define STACK_SIZE 32 |
| + |
| +#define X0 $t0 |
| +#define X1 $t1 |
| +#define X2 $t2 |
| +#define X3 $t3 |
| +#define X4 $t4 |
| +#define X5 $t5 |
| +#define X6 $t6 |
| +#define X7 $t7 |
| +#define X8 $t8 |
| +#define X9 $t9 |
| +#define X10 $v1 |
| +#define X11 $s6 |
| +#define X12 $s5 |
| +#define X13 $s4 |
| +#define X14 $s3 |
| +#define X15 $s2 |
| +/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ |
| +#define T0 $s1 |
| +#define T1 $s0 |
| +#define T(n) T ## n |
| +#define X(n) X ## n |
| + |
| +/* Input arguments */ |
| +#define STATE $a0 |
| +#define OUT $a1 |
| +#define IN $a2 |
| +#define BYTES $a3 |
| + |
| +/* Output argument */ |
| +/* NONCE[0] is kept in a register and not in memory. |
| + * We don't want to touch original value in memory. |
| + * Must be incremented every loop iteration. |
| + */ |
| +#define NONCE_0 $v0 |
| + |
| +/* SAVED_X and SAVED_CA are set in the jump table. |
| + * Use regs which are overwritten on exit else we don't leak clear data. |
| + * They are used to handling the last bytes which are not multiple of 4. |
| + */ |
| +#define SAVED_X X15 |
| +#define SAVED_CA $s7 |
| + |
| +#define IS_UNALIGNED $s7 |
| + |
| +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| +#define MSB 0 |
| +#define LSB 3 |
| +#define ROTx rotl |
| +#define ROTR(n) rotr n, 24 |
| +#define CPU_TO_LE32(n) \ |
| + wsbh n; \ |
| + rotr n, 16; |
| +#else |
| +#define MSB 3 |
| +#define LSB 0 |
| +#define ROTx rotr |
| +#define CPU_TO_LE32(n) |
| +#define ROTR(n) |
| +#endif |
| + |
| +#define FOR_EACH_WORD(x) \ |
| + x( 0); \ |
| + x( 1); \ |
| + x( 2); \ |
| + x( 3); \ |
| + x( 4); \ |
| + x( 5); \ |
| + x( 6); \ |
| + x( 7); \ |
| + x( 8); \ |
| + x( 9); \ |
| + x(10); \ |
| + x(11); \ |
| + x(12); \ |
| + x(13); \ |
| + x(14); \ |
| + x(15); |
| + |
| +#define FOR_EACH_WORD_REV(x) \ |
| + x(15); \ |
| + x(14); \ |
| + x(13); \ |
| + x(12); \ |
| + x(11); \ |
| + x(10); \ |
| + x( 9); \ |
| + x( 8); \ |
| + x( 7); \ |
| + x( 6); \ |
| + x( 5); \ |
| + x( 4); \ |
| + x( 3); \ |
| + x( 2); \ |
| + x( 1); \ |
| + x( 0); |
| + |
| +#define PLUS_ONE_0 1 |
| +#define PLUS_ONE_1 2 |
| +#define PLUS_ONE_2 3 |
| +#define PLUS_ONE_3 4 |
| +#define PLUS_ONE_4 5 |
| +#define PLUS_ONE_5 6 |
| +#define PLUS_ONE_6 7 |
| +#define PLUS_ONE_7 8 |
| +#define PLUS_ONE_8 9 |
| +#define PLUS_ONE_9 10 |
| +#define PLUS_ONE_10 11 |
| +#define PLUS_ONE_11 12 |
| +#define PLUS_ONE_12 13 |
| +#define PLUS_ONE_13 14 |
| +#define PLUS_ONE_14 15 |
| +#define PLUS_ONE_15 16 |
| +#define PLUS_ONE(x) PLUS_ONE_ ## x |
| +#define _CONCAT3(a,b,c) a ## b ## c |
| +#define CONCAT3(a,b,c) _CONCAT3(a,b,c) |
| + |
| +#define STORE_UNALIGNED(x) \ |
| +CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ |
| + .if (x != 12); \ |
| + lw T0, (x*4)(STATE); \ |
| + .endif; \ |
| + lwl T1, (x*4)+MSB ## (IN); \ |
| + lwr T1, (x*4)+LSB ## (IN); \ |
| + .if (x == 12); \ |
| + addu X ## x, NONCE_0; \ |
| + .else; \ |
| + addu X ## x, T0; \ |
| + .endif; \ |
| + CPU_TO_LE32(X ## x); \ |
| + xor X ## x, T1; \ |
| + swl X ## x, (x*4)+MSB ## (OUT); \ |
| + swr X ## x, (x*4)+LSB ## (OUT); |
| + |
| +#define STORE_ALIGNED(x) \ |
| +CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ |
| + .if (x != 12); \ |
| + lw T0, (x*4)(STATE); \ |
| + .endif; \ |
| + lw T1, (x*4) ## (IN); \ |
| + .if (x == 12); \ |
| + addu X ## x, NONCE_0; \ |
| + .else; \ |
| + addu X ## x, T0; \ |
| + .endif; \ |
| + CPU_TO_LE32(X ## x); \ |
| + xor X ## x, T1; \ |
| + sw X ## x, (x*4) ## (OUT); |
| + |
| +/* Jump table macro. |
| + * Used for setup and handling the last bytes, which are not multiple of 4. |
| + * X15 is free to store Xn |
| + * Every jumptable entry must be equal in size. |
| + */ |
| +#define JMPTBL_ALIGNED(x) \ |
| +.Lchacha20_mips_jmptbl_aligned_ ## x: ; \ |
| + .set noreorder; \ |
| + b .Lchacha20_mips_xor_aligned_ ## x ## _b; \ |
| + .if (x == 12); \ |
| + addu SAVED_X, X ## x, NONCE_0; \ |
| + .else; \ |
| + addu SAVED_X, X ## x, SAVED_CA; \ |
| + .endif; \ |
| + .set reorder |
| + |
| +#define JMPTBL_UNALIGNED(x) \ |
| +.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \ |
| + .set noreorder; \ |
| + b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \ |
| + .if (x == 12); \ |
| + addu SAVED_X, X ## x, NONCE_0; \ |
| + .else; \ |
| + addu SAVED_X, X ## x, SAVED_CA; \ |
| + .endif; \ |
| + .set reorder |
| + |
| +#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ |
| + addu X(A), X(K); \ |
| + addu X(B), X(L); \ |
| + addu X(C), X(M); \ |
| + addu X(D), X(N); \ |
| + xor X(V), X(A); \ |
| + xor X(W), X(B); \ |
| + xor X(Y), X(C); \ |
| + xor X(Z), X(D); \ |
| + rotl X(V), S; \ |
| + rotl X(W), S; \ |
| + rotl X(Y), S; \ |
| + rotl X(Z), S; |
| + |
| +.text |
| +.set reorder |
| +.set noat |
| +.globl chacha20_mips |
| +.ent chacha20_mips |
| +chacha20_mips: |
| + .frame $sp, STACK_SIZE, $ra |
| + |
| + addiu $sp, -STACK_SIZE |
| + |
| + /* Return bytes = 0. */ |
| + beqz BYTES, .Lchacha20_mips_end |
| + |
| + lw NONCE_0, 48(STATE) |
| + |
| + /* Save s0-s7 */ |
| + sw $s0, 0($sp) |
| + sw $s1, 4($sp) |
| + sw $s2, 8($sp) |
| + sw $s3, 12($sp) |
| + sw $s4, 16($sp) |
| + sw $s5, 20($sp) |
| + sw $s6, 24($sp) |
| + sw $s7, 28($sp) |
| + |
| + /* Test IN or OUT is unaligned. |
| + * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 |
| + */ |
| + or IS_UNALIGNED, IN, OUT |
| + andi IS_UNALIGNED, 0x3 |
| + |
| + /* Set number of rounds */ |
| + li $at, 20 |
| + |
| + b .Lchacha20_rounds_start |
| + |
| +.align 4 |
| +.Loop_chacha20_rounds: |
| + addiu IN, CHACHA20_BLOCK_SIZE |
| + addiu OUT, CHACHA20_BLOCK_SIZE |
| + addiu NONCE_0, 1 |
| + |
| +.Lchacha20_rounds_start: |
| + lw X0, 0(STATE) |
| + lw X1, 4(STATE) |
| + lw X2, 8(STATE) |
| + lw X3, 12(STATE) |
| + |
| + lw X4, 16(STATE) |
| + lw X5, 20(STATE) |
| + lw X6, 24(STATE) |
| + lw X7, 28(STATE) |
| + lw X8, 32(STATE) |
| + lw X9, 36(STATE) |
| + lw X10, 40(STATE) |
| + lw X11, 44(STATE) |
| + |
| + move X12, NONCE_0 |
| + lw X13, 52(STATE) |
| + lw X14, 56(STATE) |
| + lw X15, 60(STATE) |
| + |
| +.Loop_chacha20_xor_rounds: |
| + addiu $at, -2 |
| + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); |
| + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); |
| + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); |
| + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); |
| + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); |
| + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); |
| + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); |
| + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); |
| + bnez $at, .Loop_chacha20_xor_rounds |
| + |
| + addiu BYTES, -(CHACHA20_BLOCK_SIZE) |
| + |
| + /* Is data src/dst unaligned? Jump */ |
| + bnez IS_UNALIGNED, .Loop_chacha20_unaligned |
| + |
| + /* Set number rounds here to fill delayslot. */ |
| + li $at, 20 |
| + |
| + /* BYTES < 0, it has no full block. */ |
| + bltz BYTES, .Lchacha20_mips_no_full_block_aligned |
| + |
| + FOR_EACH_WORD_REV(STORE_ALIGNED) |
| + |
| + /* BYTES > 0? Loop again. */ |
| + bgtz BYTES, .Loop_chacha20_rounds |
| + |
| + /* Place this here to fill delay slot */ |
| + addiu NONCE_0, 1 |
| + |
| + /* BYTES < 0? Handle last bytes */ |
| + bltz BYTES, .Lchacha20_mips_xor_bytes |
| + |
| +.Lchacha20_mips_xor_done: |
| + /* Restore used registers */ |
| + lw $s0, 0($sp) |
| + lw $s1, 4($sp) |
| + lw $s2, 8($sp) |
| + lw $s3, 12($sp) |
| + lw $s4, 16($sp) |
| + lw $s5, 20($sp) |
| + lw $s6, 24($sp) |
| + lw $s7, 28($sp) |
| + |
| + /* Write NONCE_0 back to right location in state */ |
| + sw NONCE_0, 48(STATE) |
| + |
| +.Lchacha20_mips_end: |
| + addiu $sp, STACK_SIZE |
| + jr $ra |
| + |
| +.Lchacha20_mips_no_full_block_aligned: |
| + /* Restore the offset on BYTES */ |
| + addiu BYTES, CHACHA20_BLOCK_SIZE |
| + |
| + /* Get number of full WORDS */ |
| + andi $at, BYTES, MASK_U32 |
| + |
| + /* Load upper half of jump table addr */ |
| + lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0) |
| + |
| + /* Calculate lower half jump table offset */ |
| + ins T0, $at, 1, 6 |
| + |
| + /* Add offset to STATE */ |
| + addu T1, STATE, $at |
| + |
| + /* Add lower half jump table addr */ |
| + addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0) |
| + |
| + /* Read value from STATE */ |
| + lw SAVED_CA, 0(T1) |
| + |
| + /* Store remaining bytecounter as negative value */ |
| + subu BYTES, $at, BYTES |
| + |
| + jr T0 |
| + |
| + /* Jump table */ |
| + FOR_EACH_WORD(JMPTBL_ALIGNED) |
| + |
| + |
| +.Loop_chacha20_unaligned: |
| + /* Set number rounds here to fill delayslot. */ |
| + li $at, 20 |
| + |
| + /* BYTES > 0, it has no full block. */ |
| + bltz BYTES, .Lchacha20_mips_no_full_block_unaligned |
| + |
| + FOR_EACH_WORD_REV(STORE_UNALIGNED) |
| + |
| + /* BYTES > 0? Loop again. */ |
| + bgtz BYTES, .Loop_chacha20_rounds |
| + |
| + /* Write NONCE_0 back to right location in state */ |
| + sw NONCE_0, 48(STATE) |
| + |
| + .set noreorder |
| + /* Fall through to byte handling */ |
| + bgez BYTES, .Lchacha20_mips_xor_done |
| +.Lchacha20_mips_xor_unaligned_0_b: |
| +.Lchacha20_mips_xor_aligned_0_b: |
| + /* Place this here to fill delay slot */ |
| + addiu NONCE_0, 1 |
| + .set reorder |
| + |
| +.Lchacha20_mips_xor_bytes: |
| + addu IN, $at |
| + addu OUT, $at |
| + /* First byte */ |
| + lbu T1, 0(IN) |
| + addiu $at, BYTES, 1 |
| + CPU_TO_LE32(SAVED_X) |
| + ROTR(SAVED_X) |
| + xor T1, SAVED_X |
| + sb T1, 0(OUT) |
| + beqz $at, .Lchacha20_mips_xor_done |
| + /* Second byte */ |
| + lbu T1, 1(IN) |
| + addiu $at, BYTES, 2 |
| + ROTx SAVED_X, 8 |
| + xor T1, SAVED_X |
| + sb T1, 1(OUT) |
| + beqz $at, .Lchacha20_mips_xor_done |
| + /* Third byte */ |
| + lbu T1, 2(IN) |
| + ROTx SAVED_X, 8 |
| + xor T1, SAVED_X |
| + sb T1, 2(OUT) |
| + b .Lchacha20_mips_xor_done |
| + |
| +.Lchacha20_mips_no_full_block_unaligned: |
| + /* Restore the offset on BYTES */ |
| + addiu BYTES, CHACHA20_BLOCK_SIZE |
| + |
| + /* Get number of full WORDS */ |
| + andi $at, BYTES, MASK_U32 |
| + |
| + /* Load upper half of jump table addr */ |
| + lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0) |
| + |
| + /* Calculate lower half jump table offset */ |
| + ins T0, $at, 1, 6 |
| + |
| + /* Add offset to STATE */ |
| + addu T1, STATE, $at |
| + |
| + /* Add lower half jump table addr */ |
| + addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0) |
| + |
| + /* Read value from STATE */ |
| + lw SAVED_CA, 0(T1) |
| + |
| + /* Store remaining bytecounter as negative value */ |
| + subu BYTES, $at, BYTES |
| + |
| + jr T0 |
| + |
| + /* Jump table */ |
| + FOR_EACH_WORD(JMPTBL_UNALIGNED) |
| +.end chacha20_mips |
| +.set at |
| -- |
| 2.18.2 |
| |
| |
| From a171197a29c587a90f42392a3e23afc7b790576b Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:17 +0100 |
| Subject: [PATCH 011/100] crypto: mips/chacha - wire up accelerated 32r2 code |
| from Zinc |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 upstream. |
| |
| This integrates the accelerated MIPS 32r2 implementation of ChaCha |
| into both the API and library interfaces of the kernel crypto stack. |
| |
| The significance of this is that, in addition to becoming available |
| as an accelerated library implementation, it can also be used by |
| existing crypto API code such as Adiantum (for block encryption on |
| ultra low performance cores) or IPsec using chacha20poly1305. These |
| are use cases that have already opted into using the abstract crypto |
| API. In order to support Adiantum, the core assembler routine has |
| been adapted to take the round count as a function argument rather |
| than hardcoding it to 20. |
| |
| Co-developed-by: René van Dorst <opensource@vdorst.com> |
| Signed-off-by: René van Dorst <opensource@vdorst.com> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/mips/Makefile | 2 +- |
| arch/mips/crypto/Makefile | 4 + |
| arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++--------- |
| arch/mips/crypto/chacha-glue.c | 150 +++++++++++++++++++++++++++++++ |
| crypto/Kconfig | 6 ++ |
| 5 files changed, 277 insertions(+), 44 deletions(-) |
| create mode 100644 arch/mips/crypto/chacha-glue.c |
| |
| diff --git a/arch/mips/Makefile b/arch/mips/Makefile |
| index 5403a91ce098..573409c85c81 100644 |
| |
| |
| @@ -334,7 +334,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/ |
| # See arch/mips/Kbuild for content of core part of the kernel |
| core-y += arch/mips/ |
| |
| -drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/ |
| +drivers-y += arch/mips/crypto/ |
| drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ |
| |
| # suspend and hibernation support |
| diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile |
| index e07aca572c2e..b528b9d300f1 100644 |
| |
| |
| @@ -4,3 +4,7 @@ |
| # |
| |
| obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o |
| + |
| +obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o |
| +chacha-mips-y := chacha-core.o chacha-glue.o |
| +AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots |
| diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S |
| index a81e02db95e7..5755f69cfe00 100644 |
| |
| |
| @@ -125,7 +125,7 @@ |
| #define CONCAT3(a,b,c) _CONCAT3(a,b,c) |
| |
| #define STORE_UNALIGNED(x) \ |
| -CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ |
| +CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ |
| .if (x != 12); \ |
| lw T0, (x*4)(STATE); \ |
| .endif; \ |
| @@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ |
| swr X ## x, (x*4)+LSB ## (OUT); |
| |
| #define STORE_ALIGNED(x) \ |
| -CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ |
| +CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ |
| .if (x != 12); \ |
| lw T0, (x*4)(STATE); \ |
| .endif; \ |
| @@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ |
| * Every jumptable entry must be equal in size. |
| */ |
| #define JMPTBL_ALIGNED(x) \ |
| -.Lchacha20_mips_jmptbl_aligned_ ## x: ; \ |
| +.Lchacha_mips_jmptbl_aligned_ ## x: ; \ |
| .set noreorder; \ |
| - b .Lchacha20_mips_xor_aligned_ ## x ## _b; \ |
| + b .Lchacha_mips_xor_aligned_ ## x ## _b; \ |
| .if (x == 12); \ |
| addu SAVED_X, X ## x, NONCE_0; \ |
| .else; \ |
| @@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ |
| .set reorder |
| |
| #define JMPTBL_UNALIGNED(x) \ |
| -.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \ |
| +.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ |
| .set noreorder; \ |
| - b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \ |
| + b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ |
| .if (x == 12); \ |
| addu SAVED_X, X ## x, NONCE_0; \ |
| .else; \ |
| @@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ |
| .text |
| .set reorder |
| .set noat |
| -.globl chacha20_mips |
| -.ent chacha20_mips |
| -chacha20_mips: |
| +.globl chacha_crypt_arch |
| +.ent chacha_crypt_arch |
| +chacha_crypt_arch: |
| .frame $sp, STACK_SIZE, $ra |
| |
| + /* Load number of rounds */ |
| + lw $at, 16($sp) |
| + |
| addiu $sp, -STACK_SIZE |
| |
| /* Return bytes = 0. */ |
| - beqz BYTES, .Lchacha20_mips_end |
| + beqz BYTES, .Lchacha_mips_end |
| |
| lw NONCE_0, 48(STATE) |
| |
| @@ -228,18 +231,15 @@ chacha20_mips: |
| or IS_UNALIGNED, IN, OUT |
| andi IS_UNALIGNED, 0x3 |
| |
| - /* Set number of rounds */ |
| - li $at, 20 |
| - |
| - b .Lchacha20_rounds_start |
| + b .Lchacha_rounds_start |
| |
| .align 4 |
| -.Loop_chacha20_rounds: |
| +.Loop_chacha_rounds: |
| addiu IN, CHACHA20_BLOCK_SIZE |
| addiu OUT, CHACHA20_BLOCK_SIZE |
| addiu NONCE_0, 1 |
| |
| -.Lchacha20_rounds_start: |
| +.Lchacha_rounds_start: |
| lw X0, 0(STATE) |
| lw X1, 4(STATE) |
| lw X2, 8(STATE) |
| @@ -259,7 +259,7 @@ chacha20_mips: |
| lw X14, 56(STATE) |
| lw X15, 60(STATE) |
| |
| -.Loop_chacha20_xor_rounds: |
| +.Loop_chacha_xor_rounds: |
| addiu $at, -2 |
| AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); |
| AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); |
| @@ -269,31 +269,31 @@ chacha20_mips: |
| AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); |
| AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); |
| AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); |
| - bnez $at, .Loop_chacha20_xor_rounds |
| + bnez $at, .Loop_chacha_xor_rounds |
| |
| addiu BYTES, -(CHACHA20_BLOCK_SIZE) |
| |
| /* Is data src/dst unaligned? Jump */ |
| - bnez IS_UNALIGNED, .Loop_chacha20_unaligned |
| + bnez IS_UNALIGNED, .Loop_chacha_unaligned |
| |
| /* Set number rounds here to fill delayslot. */ |
| - li $at, 20 |
| + lw $at, (STACK_SIZE+16)($sp) |
| |
| /* BYTES < 0, it has no full block. */ |
| - bltz BYTES, .Lchacha20_mips_no_full_block_aligned |
| + bltz BYTES, .Lchacha_mips_no_full_block_aligned |
| |
| FOR_EACH_WORD_REV(STORE_ALIGNED) |
| |
| /* BYTES > 0? Loop again. */ |
| - bgtz BYTES, .Loop_chacha20_rounds |
| + bgtz BYTES, .Loop_chacha_rounds |
| |
| /* Place this here to fill delay slot */ |
| addiu NONCE_0, 1 |
| |
| /* BYTES < 0? Handle last bytes */ |
| - bltz BYTES, .Lchacha20_mips_xor_bytes |
| + bltz BYTES, .Lchacha_mips_xor_bytes |
| |
| -.Lchacha20_mips_xor_done: |
| +.Lchacha_mips_xor_done: |
| /* Restore used registers */ |
| lw $s0, 0($sp) |
| lw $s1, 4($sp) |
| @@ -307,11 +307,11 @@ chacha20_mips: |
| /* Write NONCE_0 back to right location in state */ |
| sw NONCE_0, 48(STATE) |
| |
| -.Lchacha20_mips_end: |
| +.Lchacha_mips_end: |
| addiu $sp, STACK_SIZE |
| jr $ra |
| |
| -.Lchacha20_mips_no_full_block_aligned: |
| +.Lchacha_mips_no_full_block_aligned: |
| /* Restore the offset on BYTES */ |
| addiu BYTES, CHACHA20_BLOCK_SIZE |
| |
| @@ -319,7 +319,7 @@ chacha20_mips: |
| andi $at, BYTES, MASK_U32 |
| |
| /* Load upper half of jump table addr */ |
| - lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0) |
| + lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) |
| |
| /* Calculate lower half jump table offset */ |
| ins T0, $at, 1, 6 |
| @@ -328,7 +328,7 @@ chacha20_mips: |
| addu T1, STATE, $at |
| |
| /* Add lower half jump table addr */ |
| - addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0) |
| + addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) |
| |
| /* Read value from STATE */ |
| lw SAVED_CA, 0(T1) |
| @@ -342,31 +342,31 @@ chacha20_mips: |
| FOR_EACH_WORD(JMPTBL_ALIGNED) |
| |
| |
| -.Loop_chacha20_unaligned: |
| +.Loop_chacha_unaligned: |
| /* Set number rounds here to fill delayslot. */ |
| - li $at, 20 |
| + lw $at, (STACK_SIZE+16)($sp) |
| |
| /* BYTES > 0, it has no full block. */ |
| - bltz BYTES, .Lchacha20_mips_no_full_block_unaligned |
| + bltz BYTES, .Lchacha_mips_no_full_block_unaligned |
| |
| FOR_EACH_WORD_REV(STORE_UNALIGNED) |
| |
| /* BYTES > 0? Loop again. */ |
| - bgtz BYTES, .Loop_chacha20_rounds |
| + bgtz BYTES, .Loop_chacha_rounds |
| |
| /* Write NONCE_0 back to right location in state */ |
| sw NONCE_0, 48(STATE) |
| |
| .set noreorder |
| /* Fall through to byte handling */ |
| - bgez BYTES, .Lchacha20_mips_xor_done |
| -.Lchacha20_mips_xor_unaligned_0_b: |
| -.Lchacha20_mips_xor_aligned_0_b: |
| + bgez BYTES, .Lchacha_mips_xor_done |
| +.Lchacha_mips_xor_unaligned_0_b: |
| +.Lchacha_mips_xor_aligned_0_b: |
| /* Place this here to fill delay slot */ |
| addiu NONCE_0, 1 |
| .set reorder |
| |
| -.Lchacha20_mips_xor_bytes: |
| +.Lchacha_mips_xor_bytes: |
| addu IN, $at |
| addu OUT, $at |
| /* First byte */ |
| @@ -376,22 +376,22 @@ chacha20_mips: |
| ROTR(SAVED_X) |
| xor T1, SAVED_X |
| sb T1, 0(OUT) |
| - beqz $at, .Lchacha20_mips_xor_done |
| + beqz $at, .Lchacha_mips_xor_done |
| /* Second byte */ |
| lbu T1, 1(IN) |
| addiu $at, BYTES, 2 |
| ROTx SAVED_X, 8 |
| xor T1, SAVED_X |
| sb T1, 1(OUT) |
| - beqz $at, .Lchacha20_mips_xor_done |
| + beqz $at, .Lchacha_mips_xor_done |
| /* Third byte */ |
| lbu T1, 2(IN) |
| ROTx SAVED_X, 8 |
| xor T1, SAVED_X |
| sb T1, 2(OUT) |
| - b .Lchacha20_mips_xor_done |
| + b .Lchacha_mips_xor_done |
| |
| -.Lchacha20_mips_no_full_block_unaligned: |
| +.Lchacha_mips_no_full_block_unaligned: |
| /* Restore the offset on BYTES */ |
| addiu BYTES, CHACHA20_BLOCK_SIZE |
| |
| @@ -399,7 +399,7 @@ chacha20_mips: |
| andi $at, BYTES, MASK_U32 |
| |
| /* Load upper half of jump table addr */ |
| - lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0) |
| + lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) |
| |
| /* Calculate lower half jump table offset */ |
| ins T0, $at, 1, 6 |
| @@ -408,7 +408,7 @@ chacha20_mips: |
| addu T1, STATE, $at |
| |
| /* Add lower half jump table addr */ |
| - addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0) |
| + addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) |
| |
| /* Read value from STATE */ |
| lw SAVED_CA, 0(T1) |
| @@ -420,5 +420,78 @@ chacha20_mips: |
| |
| /* Jump table */ |
| FOR_EACH_WORD(JMPTBL_UNALIGNED) |
| -.end chacha20_mips |
| +.end chacha_crypt_arch |
| +.set at |
| + |
| +/* Input arguments |
| + * STATE $a0 |
| + * OUT $a1 |
| + * NROUND $a2 |
| + */ |
| + |
| +#undef X12 |
| +#undef X13 |
| +#undef X14 |
| +#undef X15 |
| + |
| +#define X12 $a3 |
| +#define X13 $at |
| +#define X14 $v0 |
| +#define X15 STATE |
| + |
| +.set noat |
| +.globl hchacha_block_arch |
| +.ent hchacha_block_arch |
| +hchacha_block_arch: |
| + .frame $sp, STACK_SIZE, $ra |
| + |
| + addiu $sp, -STACK_SIZE |
| + |
| + /* Save X11(s6) */ |
| + sw X11, 0($sp) |
| + |
| + lw X0, 0(STATE) |
| + lw X1, 4(STATE) |
| + lw X2, 8(STATE) |
| + lw X3, 12(STATE) |
| + lw X4, 16(STATE) |
| + lw X5, 20(STATE) |
| + lw X6, 24(STATE) |
| + lw X7, 28(STATE) |
| + lw X8, 32(STATE) |
| + lw X9, 36(STATE) |
| + lw X10, 40(STATE) |
| + lw X11, 44(STATE) |
| + lw X12, 48(STATE) |
| + lw X13, 52(STATE) |
| + lw X14, 56(STATE) |
| + lw X15, 60(STATE) |
| + |
| +.Loop_hchacha_xor_rounds: |
| + addiu $a2, -2 |
| + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); |
| + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); |
| + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); |
| + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); |
| + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); |
| + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); |
| + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); |
| + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); |
| + bnez $a2, .Loop_hchacha_xor_rounds |
| + |
| + /* Restore used register */ |
| + lw X11, 0($sp) |
| + |
| + sw X0, 0(OUT) |
| + sw X1, 4(OUT) |
| + sw X2, 8(OUT) |
| + sw X3, 12(OUT) |
| + sw X12, 16(OUT) |
| + sw X13, 20(OUT) |
| + sw X14, 24(OUT) |
| + sw X15, 28(OUT) |
| + |
| + addiu $sp, STACK_SIZE |
| + jr $ra |
| +.end hchacha_block_arch |
| .set at |
| diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c |
| new file mode 100644 |
| index 000000000000..779e399c9bef |
| |
| |
| @@ -0,0 +1,150 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * MIPS accelerated ChaCha and XChaCha stream ciphers, |
| + * including ChaCha20 (RFC7539) |
| + * |
| + * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| + */ |
| + |
| +#include <asm/byteorder.h> |
| +#include <crypto/algapi.h> |
| +#include <crypto/internal/chacha.h> |
| +#include <crypto/internal/skcipher.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| + |
| +asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, |
| + unsigned int bytes, int nrounds); |
| +EXPORT_SYMBOL(chacha_crypt_arch); |
| + |
| +asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds); |
| +EXPORT_SYMBOL(hchacha_block_arch); |
| + |
| +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| +{ |
| + chacha_init_generic(state, key, iv); |
| +} |
| +EXPORT_SYMBOL(chacha_init_arch); |
| + |
| +static int chacha_mips_stream_xor(struct skcipher_request *req, |
| + const struct chacha_ctx *ctx, const u8 *iv) |
| +{ |
| + struct skcipher_walk walk; |
| + u32 state[16]; |
| + int err; |
| + |
| + err = skcipher_walk_virt(&walk, req, false); |
| + |
| + chacha_init_generic(state, ctx->key, iv); |
| + |
| + while (walk.nbytes > 0) { |
| + unsigned int nbytes = walk.nbytes; |
| + |
| + if (nbytes < walk.total) |
| + nbytes = round_down(nbytes, walk.stride); |
| + |
| + chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr, |
| + nbytes, ctx->nrounds); |
| + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| + } |
| + |
| + return err; |
| +} |
| + |
| +static int chacha_mips(struct skcipher_request *req) |
| +{ |
| + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| + |
| + return chacha_mips_stream_xor(req, ctx, req->iv); |
| +} |
| + |
| +static int xchacha_mips(struct skcipher_request *req) |
| +{ |
| + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| + struct chacha_ctx subctx; |
| + u32 state[16]; |
| + u8 real_iv[16]; |
| + |
| + chacha_init_generic(state, ctx->key, req->iv); |
| + |
| + hchacha_block(state, subctx.key, ctx->nrounds); |
| + subctx.nrounds = ctx->nrounds; |
| + |
| + memcpy(&real_iv[0], req->iv + 24, 8); |
| + memcpy(&real_iv[8], req->iv + 16, 8); |
| + return chacha_mips_stream_xor(req, &subctx, real_iv); |
| +} |
| + |
| +static struct skcipher_alg algs[] = { |
| + { |
| + .base.cra_name = "chacha20", |
| + .base.cra_driver_name = "chacha20-mips", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = CHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .setkey = chacha20_setkey, |
| + .encrypt = chacha_mips, |
| + .decrypt = chacha_mips, |
| + }, { |
| + .base.cra_name = "xchacha20", |
| + .base.cra_driver_name = "xchacha20-mips", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = XCHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .setkey = chacha20_setkey, |
| + .encrypt = xchacha_mips, |
| + .decrypt = xchacha_mips, |
| + }, { |
| + .base.cra_name = "xchacha12", |
| + .base.cra_driver_name = "xchacha12-mips", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = 1, |
| + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .min_keysize = CHACHA_KEY_SIZE, |
| + .max_keysize = CHACHA_KEY_SIZE, |
| + .ivsize = XCHACHA_IV_SIZE, |
| + .chunksize = CHACHA_BLOCK_SIZE, |
| + .setkey = chacha12_setkey, |
| + .encrypt = xchacha_mips, |
| + .decrypt = xchacha_mips, |
| + } |
| +}; |
| + |
| +static int __init chacha_simd_mod_init(void) |
| +{ |
| + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| +} |
| + |
| +static void __exit chacha_simd_mod_fini(void) |
| +{ |
| + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| +} |
| + |
| +module_init(chacha_simd_mod_init); |
| +module_exit(chacha_simd_mod_fini); |
| + |
| +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); |
| +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_ALIAS_CRYPTO("chacha20"); |
| +MODULE_ALIAS_CRYPTO("chacha20-mips"); |
| +MODULE_ALIAS_CRYPTO("xchacha20"); |
| +MODULE_ALIAS_CRYPTO("xchacha20-mips"); |
| +MODULE_ALIAS_CRYPTO("xchacha12"); |
| +MODULE_ALIAS_CRYPTO("xchacha12-mips"); |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 649dc564f242..6b5e14cee475 100644 |
| |
| |
| @@ -1423,6 +1423,12 @@ config CRYPTO_CHACHA20_X86_64 |
| SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, |
| XChaCha20, and XChaCha12 stream ciphers. |
| |
| +config CRYPTO_CHACHA_MIPS |
| + tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)" |
| + depends on CPU_MIPS32_R2 |
| + select CRYPTO_BLKCIPHER |
| + select CRYPTO_ARCH_HAVE_LIB_CHACHA |
| + |
| config CRYPTO_SEED |
| tristate "SEED cipher algorithm" |
| select CRYPTO_ALGAPI |
| -- |
| 2.18.2 |
| |
| |
| From b455a56d1c9035f7ca22428941755ec376189d14 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:18 +0100 |
| Subject: [PATCH 012/100] crypto: chacha - unexport chacha_generic routines |
| |
| commit 22cf705360707ced15f9fe5423938f313c7df536 upstream. |
| |
| Now that all users of generic ChaCha code have moved to the core library, |
| there is no longer a need for the generic ChaCha skcpiher driver to |
| export parts of it implementation for reuse by other drivers. So drop |
| the exports, and make the symbols static. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/chacha_generic.c | 26 ++++++++------------------ |
| include/crypto/internal/chacha.h | 10 ---------- |
| 2 files changed, 8 insertions(+), 28 deletions(-) |
| |
| diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c |
| index ebae6d9d9b32..c1b147318393 100644 |
| |
| |
| @@ -21,7 +21,7 @@ static int chacha_stream_xor(struct skcipher_request *req, |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| |
| - crypto_chacha_init(state, ctx, iv); |
| + chacha_init_generic(state, ctx->key, iv); |
| |
| while (walk.nbytes > 0) { |
| unsigned int nbytes = walk.nbytes; |
| @@ -37,36 +37,27 @@ static int chacha_stream_xor(struct skcipher_request *req, |
| return err; |
| } |
| |
| -void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv) |
| -{ |
| - chacha_init_generic(state, ctx->key, iv); |
| -} |
| -EXPORT_SYMBOL_GPL(crypto_chacha_init); |
| - |
| -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize) |
| +static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize) |
| { |
| return chacha_setkey(tfm, key, keysize, 20); |
| } |
| -EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); |
| |
| -int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize) |
| +static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| + unsigned int keysize) |
| { |
| return chacha_setkey(tfm, key, keysize, 12); |
| } |
| -EXPORT_SYMBOL_GPL(crypto_chacha12_setkey); |
| |
| -int crypto_chacha_crypt(struct skcipher_request *req) |
| +static int crypto_chacha_crypt(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| |
| return chacha_stream_xor(req, ctx, req->iv); |
| } |
| -EXPORT_SYMBOL_GPL(crypto_chacha_crypt); |
| |
| -int crypto_xchacha_crypt(struct skcipher_request *req) |
| +static int crypto_xchacha_crypt(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| @@ -75,7 +66,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req) |
| u8 real_iv[16]; |
| |
| /* Compute the subkey given the original key and first 128 nonce bits */ |
| - crypto_chacha_init(state, ctx, req->iv); |
| + chacha_init_generic(state, ctx->key, req->iv); |
| hchacha_block_generic(state, subctx.key, ctx->nrounds); |
| subctx.nrounds = ctx->nrounds; |
| |
| @@ -86,7 +77,6 @@ int crypto_xchacha_crypt(struct skcipher_request *req) |
| /* Generate the stream and XOR it with the data */ |
| return chacha_stream_xor(req, &subctx, real_iv); |
| } |
| -EXPORT_SYMBOL_GPL(crypto_xchacha_crypt); |
| |
| static struct skcipher_alg algs[] = { |
| { |
| diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h |
| index c0e40b245431..aa5d4a16aac5 100644 |
| |
| |
| @@ -12,8 +12,6 @@ struct chacha_ctx { |
| int nrounds; |
| }; |
| |
| -void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv); |
| - |
| static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| unsigned int keysize, int nrounds) |
| { |
| @@ -42,12 +40,4 @@ static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| return chacha_setkey(tfm, key, keysize, 12); |
| } |
| |
| -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize); |
| -int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize); |
| - |
| -int crypto_chacha_crypt(struct skcipher_request *req); |
| -int crypto_xchacha_crypt(struct skcipher_request *req); |
| - |
| #endif /* _CRYPTO_CHACHA_H */ |
| -- |
| 2.18.2 |
| |
| |
| From 015fca1be79e9f688c936524100503689d100f8c Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:19 +0100 |
| Subject: [PATCH 013/100] crypto: poly1305 - move core routines into a separate |
| library |
| |
| commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream. |
| |
| Move the core Poly1305 routines shared between the generic Poly1305 |
| shash driver and the Adiantum and NHPoly1305 drivers into a separate |
| library so that using just this pieces does not pull in the crypto |
| API pieces of the generic Poly1305 routine. |
| |
| In a subsequent patch, we will augment this generic library with |
| init/update/final routines so that Poyl1305 algorithm can be used |
| directly without the need for using the crypto API's shash abstraction. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305_glue.c | 2 +- |
| crypto/Kconfig | 5 +- |
| crypto/adiantum.c | 5 +- |
| crypto/nhpoly1305.c | 3 +- |
| crypto/poly1305_generic.c | 195 ++--------------------------- |
| include/crypto/internal/poly1305.h | 67 ++++++++++ |
| include/crypto/poly1305.h | 23 ---- |
| lib/crypto/Kconfig | 3 + |
| lib/crypto/Makefile | 3 + |
| lib/crypto/poly1305.c | 158 +++++++++++++++++++++++ |
| 10 files changed, 248 insertions(+), 216 deletions(-) |
| create mode 100644 include/crypto/internal/poly1305.h |
| create mode 100644 lib/crypto/poly1305.c |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index 4a1c05dce950..6ccf8eb26324 100644 |
| |
| |
| @@ -7,8 +7,8 @@ |
| |
| #include <crypto/algapi.h> |
| #include <crypto/internal/hash.h> |
| +#include <crypto/internal/poly1305.h> |
| #include <crypto/internal/simd.h> |
| -#include <crypto/poly1305.h> |
| #include <linux/crypto.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 6b5e14cee475..b70b9d7c6e2f 100644 |
| |
| |
| @@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP |
| config CRYPTO_NHPOLY1305 |
| tristate |
| select CRYPTO_HASH |
| - select CRYPTO_POLY1305 |
| + select CRYPTO_LIB_POLY1305_GENERIC |
| |
| config CRYPTO_NHPOLY1305_SSE2 |
| tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)" |
| @@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2 |
| config CRYPTO_ADIANTUM |
| tristate "Adiantum support" |
| select CRYPTO_CHACHA20 |
| - select CRYPTO_POLY1305 |
| + select CRYPTO_LIB_POLY1305_GENERIC |
| select CRYPTO_NHPOLY1305 |
| select CRYPTO_MANAGER |
| help |
| @@ -686,6 +686,7 @@ config CRYPTO_GHASH |
| config CRYPTO_POLY1305 |
| tristate "Poly1305 authenticator algorithm" |
| select CRYPTO_HASH |
| + select CRYPTO_LIB_POLY1305_GENERIC |
| help |
| Poly1305 authenticator algorithm, RFC7539. |
| |
| diff --git a/crypto/adiantum.c b/crypto/adiantum.c |
| index 395a3ddd3707..aded26092268 100644 |
| |
| |
| @@ -33,6 +33,7 @@ |
| #include <crypto/b128ops.h> |
| #include <crypto/chacha.h> |
| #include <crypto/internal/hash.h> |
| +#include <crypto/internal/poly1305.h> |
| #include <crypto/internal/skcipher.h> |
| #include <crypto/nhpoly1305.h> |
| #include <crypto/scatterwalk.h> |
| @@ -242,11 +243,11 @@ static void adiantum_hash_header(struct skcipher_request *req) |
| |
| BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); |
| poly1305_core_blocks(&state, &tctx->header_hash_key, |
| - &header, sizeof(header) / POLY1305_BLOCK_SIZE); |
| + &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1); |
| |
| BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); |
| poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, |
| - TWEAK_SIZE / POLY1305_BLOCK_SIZE); |
| + TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1); |
| |
| poly1305_core_emit(&state, &rctx->header_hash); |
| } |
| diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c |
| index 9ab4e07cde4d..f6b6a52092b4 100644 |
| |
| |
| @@ -33,6 +33,7 @@ |
| #include <asm/unaligned.h> |
| #include <crypto/algapi.h> |
| #include <crypto/internal/hash.h> |
| +#include <crypto/internal/poly1305.h> |
| #include <crypto/nhpoly1305.h> |
| #include <linux/crypto.h> |
| #include <linux/kernel.h> |
| @@ -78,7 +79,7 @@ static void process_nh_hash_value(struct nhpoly1305_state *state, |
| BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); |
| |
| poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, |
| - NH_HASH_BYTES / POLY1305_BLOCK_SIZE); |
| + NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1); |
| } |
| |
| /* |
| diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c |
| index adc40298c749..067f493c2504 100644 |
| |
| |
| @@ -13,27 +13,12 @@ |
| |
| #include <crypto/algapi.h> |
| #include <crypto/internal/hash.h> |
| -#include <crypto/poly1305.h> |
| +#include <crypto/internal/poly1305.h> |
| #include <linux/crypto.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <asm/unaligned.h> |
| |
| -static inline u64 mlt(u64 a, u64 b) |
| -{ |
| - return a * b; |
| -} |
| - |
| -static inline u32 sr(u64 v, u_char n) |
| -{ |
| - return v >> n; |
| -} |
| - |
| -static inline u32 and(u32 v, u32 mask) |
| -{ |
| - return v & mask; |
| -} |
| - |
| int crypto_poly1305_init(struct shash_desc *desc) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| @@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_desc *desc) |
| } |
| EXPORT_SYMBOL_GPL(crypto_poly1305_init); |
| |
| -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) |
| -{ |
| - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| - key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; |
| - key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; |
| - key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; |
| - key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; |
| - key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; |
| -} |
| -EXPORT_SYMBOL_GPL(poly1305_core_setkey); |
| - |
| -/* |
| - * Poly1305 requires a unique key for each tag, which implies that we can't set |
| - * it on the tfm that gets accessed by multiple users simultaneously. Instead we |
| - * expect the key as the first 32 bytes in the update() call. |
| - */ |
| -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen) |
| -{ |
| - if (!dctx->sset) { |
| - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_core_setkey(&dctx->r, src); |
| - src += POLY1305_BLOCK_SIZE; |
| - srclen -= POLY1305_BLOCK_SIZE; |
| - dctx->rset = true; |
| - } |
| - if (srclen >= POLY1305_BLOCK_SIZE) { |
| - dctx->s[0] = get_unaligned_le32(src + 0); |
| - dctx->s[1] = get_unaligned_le32(src + 4); |
| - dctx->s[2] = get_unaligned_le32(src + 8); |
| - dctx->s[3] = get_unaligned_le32(src + 12); |
| - src += POLY1305_BLOCK_SIZE; |
| - srclen -= POLY1305_BLOCK_SIZE; |
| - dctx->sset = true; |
| - } |
| - } |
| - return srclen; |
| -} |
| -EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey); |
| - |
| -static void poly1305_blocks_internal(struct poly1305_state *state, |
| - const struct poly1305_key *key, |
| - const void *src, unsigned int nblocks, |
| - u32 hibit) |
| -{ |
| - u32 r0, r1, r2, r3, r4; |
| - u32 s1, s2, s3, s4; |
| - u32 h0, h1, h2, h3, h4; |
| - u64 d0, d1, d2, d3, d4; |
| - |
| - if (!nblocks) |
| - return; |
| - |
| - r0 = key->r[0]; |
| - r1 = key->r[1]; |
| - r2 = key->r[2]; |
| - r3 = key->r[3]; |
| - r4 = key->r[4]; |
| - |
| - s1 = r1 * 5; |
| - s2 = r2 * 5; |
| - s3 = r3 * 5; |
| - s4 = r4 * 5; |
| - |
| - h0 = state->h[0]; |
| - h1 = state->h[1]; |
| - h2 = state->h[2]; |
| - h3 = state->h[3]; |
| - h4 = state->h[4]; |
| - |
| - do { |
| - /* h += m[i] */ |
| - h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; |
| - h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; |
| - h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; |
| - h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; |
| - h4 += (get_unaligned_le32(src + 12) >> 8) | hibit; |
| - |
| - /* h *= r */ |
| - d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + |
| - mlt(h3, s2) + mlt(h4, s1); |
| - d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + |
| - mlt(h3, s3) + mlt(h4, s2); |
| - d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + |
| - mlt(h3, s4) + mlt(h4, s3); |
| - d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + |
| - mlt(h3, r0) + mlt(h4, s4); |
| - d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + |
| - mlt(h3, r1) + mlt(h4, r0); |
| - |
| - /* (partial) h %= p */ |
| - d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); |
| - d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); |
| - d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); |
| - d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); |
| - h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); |
| - h1 += h0 >> 26; h0 = h0 & 0x3ffffff; |
| - |
| - src += POLY1305_BLOCK_SIZE; |
| - } while (--nblocks); |
| - |
| - state->h[0] = h0; |
| - state->h[1] = h1; |
| - state->h[2] = h2; |
| - state->h[3] = h3; |
| - state->h[4] = h4; |
| -} |
| - |
| -void poly1305_core_blocks(struct poly1305_state *state, |
| - const struct poly1305_key *key, |
| - const void *src, unsigned int nblocks) |
| -{ |
| - poly1305_blocks_internal(state, key, src, nblocks, 1 << 24); |
| -} |
| -EXPORT_SYMBOL_GPL(poly1305_core_blocks); |
| - |
| -static void poly1305_blocks(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen, u32 hibit) |
| +static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + unsigned int srclen) |
| { |
| unsigned int datalen; |
| |
| @@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, |
| srclen = datalen; |
| } |
| |
| - poly1305_blocks_internal(&dctx->h, &dctx->r, |
| - src, srclen / POLY1305_BLOCK_SIZE, hibit); |
| + poly1305_core_blocks(&dctx->h, &dctx->r, src, |
| + srclen / POLY1305_BLOCK_SIZE, 1); |
| } |
| |
| int crypto_poly1305_update(struct shash_desc *desc, |
| @@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_desc *desc, |
| |
| if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| poly1305_blocks(dctx, dctx->buf, |
| - POLY1305_BLOCK_SIZE, 1 << 24); |
| + POLY1305_BLOCK_SIZE); |
| dctx->buflen = 0; |
| } |
| } |
| |
| if (likely(srclen >= POLY1305_BLOCK_SIZE)) { |
| - poly1305_blocks(dctx, src, srclen, 1 << 24); |
| + poly1305_blocks(dctx, src, srclen); |
| src += srclen - (srclen % POLY1305_BLOCK_SIZE); |
| srclen %= POLY1305_BLOCK_SIZE; |
| } |
| @@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_desc *desc, |
| } |
| EXPORT_SYMBOL_GPL(crypto_poly1305_update); |
| |
| -void poly1305_core_emit(const struct poly1305_state *state, void *dst) |
| -{ |
| - u32 h0, h1, h2, h3, h4; |
| - u32 g0, g1, g2, g3, g4; |
| - u32 mask; |
| - |
| - /* fully carry h */ |
| - h0 = state->h[0]; |
| - h1 = state->h[1]; |
| - h2 = state->h[2]; |
| - h3 = state->h[3]; |
| - h4 = state->h[4]; |
| - |
| - h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; |
| - h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; |
| - h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; |
| - h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; |
| - h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; |
| - |
| - /* compute h + -p */ |
| - g0 = h0 + 5; |
| - g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; |
| - g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; |
| - g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; |
| - g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; |
| - |
| - /* select h if h < p, or h + -p if h >= p */ |
| - mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; |
| - g0 &= mask; |
| - g1 &= mask; |
| - g2 &= mask; |
| - g3 &= mask; |
| - g4 &= mask; |
| - mask = ~mask; |
| - h0 = (h0 & mask) | g0; |
| - h1 = (h1 & mask) | g1; |
| - h2 = (h2 & mask) | g2; |
| - h3 = (h3 & mask) | g3; |
| - h4 = (h4 & mask) | g4; |
| - |
| - /* h = h % (2^128) */ |
| - put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); |
| - put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); |
| - put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); |
| - put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); |
| -} |
| -EXPORT_SYMBOL_GPL(poly1305_core_emit); |
| - |
| int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| @@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| dctx->buf[dctx->buflen++] = 1; |
| memset(dctx->buf + dctx->buflen, 0, |
| POLY1305_BLOCK_SIZE - dctx->buflen); |
| - poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| + poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0); |
| } |
| |
| poly1305_core_emit(&dctx->h, digest); |
| diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h |
| new file mode 100644 |
| index 000000000000..cb58e61f73a7 |
| |
| |
| @@ -0,0 +1,67 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Common values for the Poly1305 algorithm |
| + */ |
| + |
| +#ifndef _CRYPTO_INTERNAL_POLY1305_H |
| +#define _CRYPTO_INTERNAL_POLY1305_H |
| + |
| +#include <asm/unaligned.h> |
| +#include <linux/types.h> |
| +#include <crypto/poly1305.h> |
| + |
| +struct shash_desc; |
| + |
| +/* |
| + * Poly1305 core functions. These implement the ε-almost-∆-universal hash |
| + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce |
| + * ("s key") at the end. They also only support block-aligned inputs. |
| + */ |
| +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); |
| +static inline void poly1305_core_init(struct poly1305_state *state) |
| +{ |
| + *state = (struct poly1305_state){}; |
| +} |
| + |
| +void poly1305_core_blocks(struct poly1305_state *state, |
| + const struct poly1305_key *key, const void *src, |
| + unsigned int nblocks, u32 hibit); |
| +void poly1305_core_emit(const struct poly1305_state *state, void *dst); |
| + |
| +/* Crypto API helper functions for the Poly1305 MAC */ |
| +int crypto_poly1305_init(struct shash_desc *desc); |
| + |
| +int crypto_poly1305_update(struct shash_desc *desc, |
| + const u8 *src, unsigned int srclen); |
| +int crypto_poly1305_final(struct shash_desc *desc, u8 *dst); |
| + |
| +/* |
| + * Poly1305 requires a unique key for each tag, which implies that we can't set |
| + * it on the tfm that gets accessed by multiple users simultaneously. Instead we |
| + * expect the key as the first 32 bytes in the update() call. |
| + */ |
| +static inline |
| +unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + if (!dctx->sset) { |
| + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| + poly1305_core_setkey(&dctx->r, src); |
| + src += POLY1305_BLOCK_SIZE; |
| + srclen -= POLY1305_BLOCK_SIZE; |
| + dctx->rset = true; |
| + } |
| + if (srclen >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(src + 0); |
| + dctx->s[1] = get_unaligned_le32(src + 4); |
| + dctx->s[2] = get_unaligned_le32(src + 8); |
| + dctx->s[3] = get_unaligned_le32(src + 12); |
| + src += POLY1305_BLOCK_SIZE; |
| + srclen -= POLY1305_BLOCK_SIZE; |
| + dctx->sset = true; |
| + } |
| + } |
| + return srclen; |
| +} |
| + |
| +#endif |
| diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h |
| index 34317ed2071e..f5a4319c2a1f 100644 |
| |
| |
| @@ -38,27 +38,4 @@ struct poly1305_desc_ctx { |
| bool sset; |
| }; |
| |
| -/* |
| - * Poly1305 core functions. These implement the ε-almost-∆-universal hash |
| - * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce |
| - * ("s key") at the end. They also only support block-aligned inputs. |
| - */ |
| -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); |
| -static inline void poly1305_core_init(struct poly1305_state *state) |
| -{ |
| - memset(state->h, 0, sizeof(state->h)); |
| -} |
| -void poly1305_core_blocks(struct poly1305_state *state, |
| - const struct poly1305_key *key, |
| - const void *src, unsigned int nblocks); |
| -void poly1305_core_emit(const struct poly1305_state *state, void *dst); |
| - |
| -/* Crypto API helper functions for the Poly1305 MAC */ |
| -int crypto_poly1305_init(struct shash_desc *desc); |
| -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen); |
| -int crypto_poly1305_update(struct shash_desc *desc, |
| - const u8 *src, unsigned int srclen); |
| -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst); |
| - |
| #endif |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 6a11931ae105..c4882d29879e 100644 |
| |
| |
| @@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA |
| config CRYPTO_LIB_DES |
| tristate |
| |
| +config CRYPTO_LIB_POLY1305_GENERIC |
| + tristate |
| + |
| config CRYPTO_LIB_SHA256 |
| tristate |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index 0ce40604e104..b58ab6843a9d 100644 |
| |
| |
| @@ -13,5 +13,8 @@ libarc4-y := arc4.o |
| obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| libdes-y := des.o |
| |
| +obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o |
| +libpoly1305-y := poly1305.o |
| + |
| obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o |
| libsha256-y := sha256.o |
| diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c |
| new file mode 100644 |
| index 000000000000..f019a57dbc1b |
| |
| |
| @@ -0,0 +1,158 @@ |
| +// SPDX-License-Identifier: GPL-2.0-or-later |
| +/* |
| + * Poly1305 authenticator algorithm, RFC7539 |
| + * |
| + * Copyright (C) 2015 Martin Willi |
| + * |
| + * Based on public domain code by Andrew Moon and Daniel J. Bernstein. |
| + */ |
| + |
| +#include <crypto/internal/poly1305.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| +#include <asm/unaligned.h> |
| + |
| +static inline u64 mlt(u64 a, u64 b) |
| +{ |
| + return a * b; |
| +} |
| + |
| +static inline u32 sr(u64 v, u_char n) |
| +{ |
| + return v >> n; |
| +} |
| + |
| +static inline u32 and(u32 v, u32 mask) |
| +{ |
| + return v & mask; |
| +} |
| + |
| +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) |
| +{ |
| + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; |
| + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; |
| + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; |
| + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; |
| + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_core_setkey); |
| + |
| +void poly1305_core_blocks(struct poly1305_state *state, |
| + const struct poly1305_key *key, const void *src, |
| + unsigned int nblocks, u32 hibit) |
| +{ |
| + u32 r0, r1, r2, r3, r4; |
| + u32 s1, s2, s3, s4; |
| + u32 h0, h1, h2, h3, h4; |
| + u64 d0, d1, d2, d3, d4; |
| + |
| + if (!nblocks) |
| + return; |
| + |
| + r0 = key->r[0]; |
| + r1 = key->r[1]; |
| + r2 = key->r[2]; |
| + r3 = key->r[3]; |
| + r4 = key->r[4]; |
| + |
| + s1 = r1 * 5; |
| + s2 = r2 * 5; |
| + s3 = r3 * 5; |
| + s4 = r4 * 5; |
| + |
| + h0 = state->h[0]; |
| + h1 = state->h[1]; |
| + h2 = state->h[2]; |
| + h3 = state->h[3]; |
| + h4 = state->h[4]; |
| + |
| + do { |
| + /* h += m[i] */ |
| + h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; |
| + h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; |
| + h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; |
| + h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; |
| + h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); |
| + |
| + /* h *= r */ |
| + d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + |
| + mlt(h3, s2) + mlt(h4, s1); |
| + d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + |
| + mlt(h3, s3) + mlt(h4, s2); |
| + d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + |
| + mlt(h3, s4) + mlt(h4, s3); |
| + d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + |
| + mlt(h3, r0) + mlt(h4, s4); |
| + d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + |
| + mlt(h3, r1) + mlt(h4, r0); |
| + |
| + /* (partial) h %= p */ |
| + d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); |
| + d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); |
| + d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); |
| + d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); |
| + h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); |
| + h1 += h0 >> 26; h0 = h0 & 0x3ffffff; |
| + |
| + src += POLY1305_BLOCK_SIZE; |
| + } while (--nblocks); |
| + |
| + state->h[0] = h0; |
| + state->h[1] = h1; |
| + state->h[2] = h2; |
| + state->h[3] = h3; |
| + state->h[4] = h4; |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_core_blocks); |
| + |
| +void poly1305_core_emit(const struct poly1305_state *state, void *dst) |
| +{ |
| + u32 h0, h1, h2, h3, h4; |
| + u32 g0, g1, g2, g3, g4; |
| + u32 mask; |
| + |
| + /* fully carry h */ |
| + h0 = state->h[0]; |
| + h1 = state->h[1]; |
| + h2 = state->h[2]; |
| + h3 = state->h[3]; |
| + h4 = state->h[4]; |
| + |
| + h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; |
| + h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; |
| + h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; |
| + h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; |
| + h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; |
| + |
| + /* compute h + -p */ |
| + g0 = h0 + 5; |
| + g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; |
| + g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; |
| + g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; |
| + g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; |
| + |
| + /* select h if h < p, or h + -p if h >= p */ |
| + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; |
| + g0 &= mask; |
| + g1 &= mask; |
| + g2 &= mask; |
| + g3 &= mask; |
| + g4 &= mask; |
| + mask = ~mask; |
| + h0 = (h0 & mask) | g0; |
| + h1 = (h1 & mask) | g1; |
| + h2 = (h2 & mask) | g2; |
| + h3 = (h3 & mask) | g3; |
| + h4 = (h4 & mask) | g4; |
| + |
| + /* h = h % (2^128) */ |
| + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); |
| + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); |
| + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); |
| + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_core_emit); |
| + |
| +MODULE_LICENSE("GPL"); |
| +MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); |
| -- |
| 2.18.2 |
| |
| |
| From 67f694baa2784551f3a5b0402fa53b17feed3009 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:20 +0100 |
| Subject: [PATCH 014/100] crypto: x86/poly1305 - unify Poly1305 state struct |
| with generic code |
| |
| commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream. |
| |
| In preparation of exposing a Poly1305 library interface directly from |
| the accelerated x86 driver, align the state descriptor of the x86 code |
| with the one used by the generic driver. This is needed to make the |
| library interface unified between all implementations. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305_glue.c | 88 ++++++++++-------------------- |
| crypto/poly1305_generic.c | 6 +- |
| include/crypto/internal/poly1305.h | 4 +- |
| include/crypto/poly1305.h | 18 +++--- |
| 4 files changed, 43 insertions(+), 73 deletions(-) |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index 6ccf8eb26324..b43b93c95e79 100644 |
| |
| |
| @@ -14,40 +14,14 @@ |
| #include <linux/module.h> |
| #include <asm/simd.h> |
| |
| -struct poly1305_simd_desc_ctx { |
| - struct poly1305_desc_ctx base; |
| - /* derived key u set? */ |
| - bool uset; |
| -#ifdef CONFIG_AS_AVX2 |
| - /* derived keys r^3, r^4 set? */ |
| - bool wset; |
| -#endif |
| - /* derived Poly1305 key r^2 */ |
| - u32 u[5]; |
| - /* ... silently appended r^3 and r^4 when using AVX2 */ |
| -}; |
| - |
| asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, |
| const u32 *r, unsigned int blocks); |
| asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, |
| unsigned int blocks, const u32 *u); |
| -#ifdef CONFIG_AS_AVX2 |
| asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, |
| unsigned int blocks, const u32 *u); |
| -static bool poly1305_use_avx2; |
| -#endif |
| |
| -static int poly1305_simd_init(struct shash_desc *desc) |
| -{ |
| - struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc); |
| - |
| - sctx->uset = false; |
| -#ifdef CONFIG_AS_AVX2 |
| - sctx->wset = false; |
| -#endif |
| - |
| - return crypto_poly1305_init(desc); |
| -} |
| +static bool poly1305_use_avx2 __ro_after_init; |
| |
| static void poly1305_simd_mult(u32 *a, const u32 *b) |
| { |
| @@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, const u32 *b) |
| static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| const u8 *src, unsigned int srclen) |
| { |
| - struct poly1305_simd_desc_ctx *sctx; |
| unsigned int blocks, datalen; |
| |
| - BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base)); |
| - sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base); |
| - |
| if (unlikely(!dctx->sset)) { |
| datalen = crypto_poly1305_setdesckey(dctx, src, srclen); |
| src += srclen - datalen; |
| srclen = datalen; |
| } |
| |
| -#ifdef CONFIG_AS_AVX2 |
| - if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { |
| - if (unlikely(!sctx->wset)) { |
| - if (!sctx->uset) { |
| - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); |
| - poly1305_simd_mult(sctx->u, dctx->r.r); |
| - sctx->uset = true; |
| + if (IS_ENABLED(CONFIG_AS_AVX2) && |
| + poly1305_use_avx2 && |
| + srclen >= POLY1305_BLOCK_SIZE * 4) { |
| + if (unlikely(dctx->rset < 4)) { |
| + if (dctx->rset < 2) { |
| + dctx->r[1] = dctx->r[0]; |
| + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); |
| } |
| - memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); |
| - poly1305_simd_mult(sctx->u + 5, dctx->r.r); |
| - memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); |
| - poly1305_simd_mult(sctx->u + 10, dctx->r.r); |
| - sctx->wset = true; |
| + dctx->r[2] = dctx->r[1]; |
| + poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); |
| + dctx->r[3] = dctx->r[2]; |
| + poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); |
| + dctx->rset = 4; |
| } |
| blocks = srclen / (POLY1305_BLOCK_SIZE * 4); |
| - poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, |
| - sctx->u); |
| + poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, |
| + dctx->r[1].r); |
| src += POLY1305_BLOCK_SIZE * 4 * blocks; |
| srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; |
| } |
| -#endif |
| + |
| if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { |
| - if (unlikely(!sctx->uset)) { |
| - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); |
| - poly1305_simd_mult(sctx->u, dctx->r.r); |
| - sctx->uset = true; |
| + if (unlikely(dctx->rset < 2)) { |
| + dctx->r[1] = dctx->r[0]; |
| + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); |
| + dctx->rset = 2; |
| } |
| blocks = srclen / (POLY1305_BLOCK_SIZE * 2); |
| - poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, |
| - sctx->u); |
| + poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, |
| + blocks, dctx->r[1].r); |
| src += POLY1305_BLOCK_SIZE * 2 * blocks; |
| srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; |
| } |
| if (srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); |
| + poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); |
| srclen -= POLY1305_BLOCK_SIZE; |
| } |
| return srclen; |
| @@ -159,10 +129,10 @@ static int poly1305_simd_update(struct shash_desc *desc, |
| |
| static struct shash_alg alg = { |
| .digestsize = POLY1305_DIGEST_SIZE, |
| - .init = poly1305_simd_init, |
| + .init = crypto_poly1305_init, |
| .update = poly1305_simd_update, |
| .final = crypto_poly1305_final, |
| - .descsize = sizeof(struct poly1305_simd_desc_ctx), |
| + .descsize = sizeof(struct poly1305_desc_ctx), |
| .base = { |
| .cra_name = "poly1305", |
| .cra_driver_name = "poly1305-simd", |
| @@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init(void) |
| if (!boot_cpu_has(X86_FEATURE_XMM2)) |
| return -ENODEV; |
| |
| -#ifdef CONFIG_AS_AVX2 |
| - poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && |
| + poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) && |
| + boot_cpu_has(X86_FEATURE_AVX) && |
| boot_cpu_has(X86_FEATURE_AVX2) && |
| cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
| - alg.descsize = sizeof(struct poly1305_simd_desc_ctx); |
| + alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32); |
| if (poly1305_use_avx2) |
| alg.descsize += 10 * sizeof(u32); |
| -#endif |
| + |
| return crypto_register_shash(&alg); |
| } |
| |
| diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c |
| index 067f493c2504..f3fcd9578a47 100644 |
| |
| |
| @@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_desc *desc) |
| |
| poly1305_core_init(&dctx->h); |
| dctx->buflen = 0; |
| - dctx->rset = false; |
| + dctx->rset = 0; |
| dctx->sset = false; |
| |
| return 0; |
| @@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| srclen = datalen; |
| } |
| |
| - poly1305_core_blocks(&dctx->h, &dctx->r, src, |
| + poly1305_core_blocks(&dctx->h, dctx->r, src, |
| srclen / POLY1305_BLOCK_SIZE, 1); |
| } |
| |
| @@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| dctx->buf[dctx->buflen++] = 1; |
| memset(dctx->buf + dctx->buflen, 0, |
| POLY1305_BLOCK_SIZE - dctx->buflen); |
| - poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0); |
| + poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0); |
| } |
| |
| poly1305_core_emit(&dctx->h, digest); |
| diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h |
| index cb58e61f73a7..04fa269e5534 100644 |
| |
| |
| @@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| { |
| if (!dctx->sset) { |
| if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_core_setkey(&dctx->r, src); |
| + poly1305_core_setkey(dctx->r, src); |
| src += POLY1305_BLOCK_SIZE; |
| srclen -= POLY1305_BLOCK_SIZE; |
| - dctx->rset = true; |
| + dctx->rset = 1; |
| } |
| if (srclen >= POLY1305_BLOCK_SIZE) { |
| dctx->s[0] = get_unaligned_le32(src + 0); |
| diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h |
| index f5a4319c2a1f..36b5886cb50c 100644 |
| |
| |
| @@ -22,20 +22,20 @@ struct poly1305_state { |
| }; |
| |
| struct poly1305_desc_ctx { |
| - /* key */ |
| - struct poly1305_key r; |
| - /* finalize key */ |
| - u32 s[4]; |
| - /* accumulator */ |
| - struct poly1305_state h; |
| /* partial buffer */ |
| u8 buf[POLY1305_BLOCK_SIZE]; |
| /* bytes used in partial buffer */ |
| unsigned int buflen; |
| - /* r key has been set */ |
| - bool rset; |
| - /* s key has been set */ |
| + /* how many keys have been set in r[] */ |
| + unsigned short rset; |
| + /* whether s[] has been set */ |
| bool sset; |
| + /* finalize key */ |
| + u32 s[4]; |
| + /* accumulator */ |
| + struct poly1305_state h; |
| + /* key */ |
| + struct poly1305_key r[1]; |
| }; |
| |
| #endif |
| -- |
| 2.18.2 |
| |
| |
| From 355c32dc47fefdec66a4e6cb58ce5501d70d3a42 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:21 +0100 |
| Subject: [PATCH 015/100] crypto: poly1305 - expose init/update/final library |
| interface |
| |
| commit a1d93064094cc5e24d64e35cf093e7191d0c9344 upstream. |
| |
| Expose the existing generic Poly1305 code via a init/update/final |
| library interface so that callers are not required to go through |
| the crypto API's shash abstraction to access it. At the same time, |
| make some preparations so that the library implementation can be |
| superseded by an accelerated arch-specific version in the future. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/poly1305_generic.c | 22 +----------- |
| include/crypto/poly1305.h | 38 +++++++++++++++++++- |
| lib/crypto/Kconfig | 26 ++++++++++++++ |
| lib/crypto/poly1305.c | 74 +++++++++++++++++++++++++++++++++++++++ |
| 4 files changed, 138 insertions(+), 22 deletions(-) |
| |
| diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c |
| index f3fcd9578a47..afe9a9e576dd 100644 |
| |
| |
| @@ -85,31 +85,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update); |
| int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| - __le32 digest[4]; |
| - u64 f = 0; |
| |
| if (unlikely(!dctx->sset)) |
| return -ENOKEY; |
| |
| - if (unlikely(dctx->buflen)) { |
| - dctx->buf[dctx->buflen++] = 1; |
| - memset(dctx->buf + dctx->buflen, 0, |
| - POLY1305_BLOCK_SIZE - dctx->buflen); |
| - poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0); |
| - } |
| - |
| - poly1305_core_emit(&dctx->h, digest); |
| - |
| - /* mac = (h + s) % (2^128) */ |
| - f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0]; |
| - put_unaligned_le32(f, dst + 0); |
| - f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1]; |
| - put_unaligned_le32(f, dst + 4); |
| - f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2]; |
| - put_unaligned_le32(f, dst + 8); |
| - f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3]; |
| - put_unaligned_le32(f, dst + 12); |
| - |
| + poly1305_final_generic(dctx, dst); |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(crypto_poly1305_final); |
| diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h |
| index 36b5886cb50c..74c6e1cd73ee 100644 |
| |
| |
| @@ -35,7 +35,43 @@ struct poly1305_desc_ctx { |
| /* accumulator */ |
| struct poly1305_state h; |
| /* key */ |
| - struct poly1305_key r[1]; |
| + struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; |
| }; |
| |
| +void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); |
| +void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key); |
| + |
| +static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) |
| + poly1305_init_arch(desc, key); |
| + else |
| + poly1305_init_generic(desc, key); |
| +} |
| + |
| +void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src, |
| + unsigned int nbytes); |
| +void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, |
| + unsigned int nbytes); |
| + |
| +static inline void poly1305_update(struct poly1305_desc_ctx *desc, |
| + const u8 *src, unsigned int nbytes) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) |
| + poly1305_update_arch(desc, src, nbytes); |
| + else |
| + poly1305_update_generic(desc, src, nbytes); |
| +} |
| + |
| +void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest); |
| +void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest); |
| + |
| +static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) |
| + poly1305_final_arch(desc, digest); |
| + else |
| + poly1305_final_generic(desc, digest); |
| +} |
| + |
| #endif |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index c4882d29879e..a731ea36bd5c 100644 |
| |
| |
| @@ -37,8 +37,34 @@ config CRYPTO_LIB_CHACHA |
| config CRYPTO_LIB_DES |
| tristate |
| |
| +config CRYPTO_LIB_POLY1305_RSIZE |
| + int |
| + default 1 |
| + |
| +config CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| + tristate |
| + help |
| + Declares whether the architecture provides an arch-specific |
| + accelerated implementation of the Poly1305 library interface, |
| + either builtin or as a module. |
| + |
| config CRYPTO_LIB_POLY1305_GENERIC |
| tristate |
| + help |
| + This symbol can be depended upon by arch implementations of the |
| + Poly1305 library interface that require the generic code as a |
| + fallback, e.g., for SIMD implementations. If no arch specific |
| + implementation is enabled, this implementation serves the users |
| + of CRYPTO_LIB_POLY1305. |
| + |
| +config CRYPTO_LIB_POLY1305 |
| + tristate "Poly1305 library interface" |
| + depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| + select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n |
| + help |
| + Enable the Poly1305 library interface. This interface may be fulfilled |
| + by either the generic implementation or an arch-specific one, if one |
| + is available and enabled. |
| |
| config CRYPTO_LIB_SHA256 |
| tristate |
| diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c |
| index f019a57dbc1b..32ec293c65ae 100644 |
| |
| |
| @@ -154,5 +154,79 @@ void poly1305_core_emit(const struct poly1305_state *state, void *dst) |
| } |
| EXPORT_SYMBOL_GPL(poly1305_core_emit); |
| |
| +void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) |
| +{ |
| + poly1305_core_setkey(desc->r, key); |
| + desc->s[0] = get_unaligned_le32(key + 16); |
| + desc->s[1] = get_unaligned_le32(key + 20); |
| + desc->s[2] = get_unaligned_le32(key + 24); |
| + desc->s[3] = get_unaligned_le32(key + 28); |
| + poly1305_core_init(&desc->h); |
| + desc->buflen = 0; |
| + desc->sset = true; |
| + desc->rset = 1; |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_init_generic); |
| + |
| +void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, |
| + unsigned int nbytes) |
| +{ |
| + unsigned int bytes; |
| + |
| + if (unlikely(desc->buflen)) { |
| + bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen); |
| + memcpy(desc->buf + desc->buflen, src, bytes); |
| + src += bytes; |
| + nbytes -= bytes; |
| + desc->buflen += bytes; |
| + |
| + if (desc->buflen == POLY1305_BLOCK_SIZE) { |
| + poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1); |
| + desc->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { |
| + poly1305_core_blocks(&desc->h, desc->r, src, |
| + nbytes / POLY1305_BLOCK_SIZE, 1); |
| + src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); |
| + nbytes %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(nbytes)) { |
| + desc->buflen = nbytes; |
| + memcpy(desc->buf, src, nbytes); |
| + } |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_update_generic); |
| + |
| +void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) |
| +{ |
| + __le32 digest[4]; |
| + u64 f = 0; |
| + |
| + if (unlikely(desc->buflen)) { |
| + desc->buf[desc->buflen++] = 1; |
| + memset(desc->buf + desc->buflen, 0, |
| + POLY1305_BLOCK_SIZE - desc->buflen); |
| + poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0); |
| + } |
| + |
| + poly1305_core_emit(&desc->h, digest); |
| + |
| + /* mac = (h + s) % (2^128) */ |
| + f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; |
| + put_unaligned_le32(f, dst + 0); |
| + f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; |
| + put_unaligned_le32(f, dst + 4); |
| + f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; |
| + put_unaligned_le32(f, dst + 8); |
| + f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; |
| + put_unaligned_le32(f, dst + 12); |
| + |
| + *desc = (struct poly1305_desc_ctx){}; |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_final_generic); |
| + |
| MODULE_LICENSE("GPL"); |
| MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); |
| -- |
| 2.18.2 |
| |
| |
| From 7998253646414533f2b58947258628dd83cd380b Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:22 +0100 |
| Subject: [PATCH 016/100] crypto: x86/poly1305 - depend on generic library not |
| generic shash |
| |
| commit 1b2c6a5120489d41c8ea3b8dacd0b4586289b158 upstream. |
| |
| Remove the dependency on the generic Poly1305 driver. Instead, depend |
| on the generic library so that we only reuse code without pulling in |
| the generic skcipher implementation as well. |
| |
| While at it, remove the logic that prefers the non-SIMD path for short |
| inputs - this is no longer necessary after recent FPU handling changes |
| on x86. |
| |
| Since this removes the last remaining user of the routines exported |
| by the generic shash driver, unexport them and make them static. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305_glue.c | 66 +++++++++++++++++++++++++----- |
| crypto/Kconfig | 2 +- |
| crypto/poly1305_generic.c | 11 ++--- |
| include/crypto/internal/poly1305.h | 9 ---- |
| 4 files changed, 60 insertions(+), 28 deletions(-) |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index b43b93c95e79..a5b3a054604c 100644 |
| |
| |
| @@ -34,6 +34,24 @@ static void poly1305_simd_mult(u32 *a, const u32 *b) |
| poly1305_block_sse2(a, m, b, 1); |
| } |
| |
| +static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + unsigned int datalen; |
| + |
| + if (unlikely(!dctx->sset)) { |
| + datalen = crypto_poly1305_setdesckey(dctx, src, srclen); |
| + src += srclen - datalen; |
| + srclen = datalen; |
| + } |
| + if (srclen >= POLY1305_BLOCK_SIZE) { |
| + poly1305_core_blocks(&dctx->h, dctx->r, src, |
| + srclen / POLY1305_BLOCK_SIZE, 1); |
| + srclen %= POLY1305_BLOCK_SIZE; |
| + } |
| + return srclen; |
| +} |
| + |
| static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| const u8 *src, unsigned int srclen) |
| { |
| @@ -91,12 +109,6 @@ static int poly1305_simd_update(struct shash_desc *desc, |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| unsigned int bytes; |
| |
| - /* kernel_fpu_begin/end is costly, use fallback for small updates */ |
| - if (srclen <= 288 || !crypto_simd_usable()) |
| - return crypto_poly1305_update(desc, src, srclen); |
| - |
| - kernel_fpu_begin(); |
| - |
| if (unlikely(dctx->buflen)) { |
| bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); |
| memcpy(dctx->buf + dctx->buflen, src, bytes); |
| @@ -105,25 +117,57 @@ static int poly1305_simd_update(struct shash_desc *desc, |
| dctx->buflen += bytes; |
| |
| if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| - poly1305_simd_blocks(dctx, dctx->buf, |
| - POLY1305_BLOCK_SIZE); |
| + if (likely(crypto_simd_usable())) { |
| + kernel_fpu_begin(); |
| + poly1305_simd_blocks(dctx, dctx->buf, |
| + POLY1305_BLOCK_SIZE); |
| + kernel_fpu_end(); |
| + } else { |
| + poly1305_scalar_blocks(dctx, dctx->buf, |
| + POLY1305_BLOCK_SIZE); |
| + } |
| dctx->buflen = 0; |
| } |
| } |
| |
| if (likely(srclen >= POLY1305_BLOCK_SIZE)) { |
| - bytes = poly1305_simd_blocks(dctx, src, srclen); |
| + if (likely(crypto_simd_usable())) { |
| + kernel_fpu_begin(); |
| + bytes = poly1305_simd_blocks(dctx, src, srclen); |
| + kernel_fpu_end(); |
| + } else { |
| + bytes = poly1305_scalar_blocks(dctx, src, srclen); |
| + } |
| src += srclen - bytes; |
| srclen = bytes; |
| } |
| |
| - kernel_fpu_end(); |
| - |
| if (unlikely(srclen)) { |
| dctx->buflen = srclen; |
| memcpy(dctx->buf, src, srclen); |
| } |
| +} |
| + |
| +static int crypto_poly1305_init(struct shash_desc *desc) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + poly1305_core_init(&dctx->h); |
| + dctx->buflen = 0; |
| + dctx->rset = 0; |
| + dctx->sset = false; |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + if (unlikely(!dctx->sset)) |
| + return -ENOKEY; |
| |
| + poly1305_final_generic(dctx, dst); |
| return 0; |
| } |
| |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index b70b9d7c6e2f..6178aa627141 100644 |
| |
| |
| @@ -697,7 +697,7 @@ config CRYPTO_POLY1305 |
| config CRYPTO_POLY1305_X86_64 |
| tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)" |
| depends on X86 && 64BIT |
| - select CRYPTO_POLY1305 |
| + select CRYPTO_LIB_POLY1305_GENERIC |
| help |
| Poly1305 authenticator algorithm, RFC7539. |
| |
| diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c |
| index afe9a9e576dd..21edbd8c99fb 100644 |
| |
| |
| @@ -19,7 +19,7 @@ |
| #include <linux/module.h> |
| #include <asm/unaligned.h> |
| |
| -int crypto_poly1305_init(struct shash_desc *desc) |
| +static int crypto_poly1305_init(struct shash_desc *desc) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| |
| @@ -30,7 +30,6 @@ int crypto_poly1305_init(struct shash_desc *desc) |
| |
| return 0; |
| } |
| -EXPORT_SYMBOL_GPL(crypto_poly1305_init); |
| |
| static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| unsigned int srclen) |
| @@ -47,8 +46,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| srclen / POLY1305_BLOCK_SIZE, 1); |
| } |
| |
| -int crypto_poly1305_update(struct shash_desc *desc, |
| - const u8 *src, unsigned int srclen) |
| +static int crypto_poly1305_update(struct shash_desc *desc, |
| + const u8 *src, unsigned int srclen) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| unsigned int bytes; |
| @@ -80,9 +79,8 @@ int crypto_poly1305_update(struct shash_desc *desc, |
| |
| return 0; |
| } |
| -EXPORT_SYMBOL_GPL(crypto_poly1305_update); |
| |
| -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| +static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| |
| @@ -92,7 +90,6 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| poly1305_final_generic(dctx, dst); |
| return 0; |
| } |
| -EXPORT_SYMBOL_GPL(crypto_poly1305_final); |
| |
| static struct shash_alg poly1305_alg = { |
| .digestsize = POLY1305_DIGEST_SIZE, |
| diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h |
| index 04fa269e5534..479b0cab2a1a 100644 |
| |
| |
| @@ -10,8 +10,6 @@ |
| #include <linux/types.h> |
| #include <crypto/poly1305.h> |
| |
| -struct shash_desc; |
| - |
| /* |
| * Poly1305 core functions. These implement the ε-almost-∆-universal hash |
| * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce |
| @@ -28,13 +26,6 @@ void poly1305_core_blocks(struct poly1305_state *state, |
| unsigned int nblocks, u32 hibit); |
| void poly1305_core_emit(const struct poly1305_state *state, void *dst); |
| |
| -/* Crypto API helper functions for the Poly1305 MAC */ |
| -int crypto_poly1305_init(struct shash_desc *desc); |
| - |
| -int crypto_poly1305_update(struct shash_desc *desc, |
| - const u8 *src, unsigned int srclen); |
| -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst); |
| - |
| /* |
| * Poly1305 requires a unique key for each tag, which implies that we can't set |
| * it on the tfm that gets accessed by multiple users simultaneously. Instead we |
| -- |
| 2.18.2 |
| |
| |
| From 42b7d909e3dc65ee679c5820954fddc02a89b250 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:23 +0100 |
| Subject: [PATCH 017/100] crypto: x86/poly1305 - expose existing driver as |
| poly1305 library |
| |
| commit f0e89bcfbb894e5844cd1bbf6b3cf7c63cb0f5ac upstream. |
| |
| Implement the arch init/update/final Poly1305 library routines in the |
| accelerated SIMD driver for x86 so they are accessible to users of |
| the Poly1305 library interface as well. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305_glue.c | 57 ++++++++++++++++++++++++--------- |
| crypto/Kconfig | 1 + |
| lib/crypto/Kconfig | 1 + |
| 3 files changed, 43 insertions(+), 16 deletions(-) |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index a5b3a054604c..370cd88068ec 100644 |
| |
| |
| @@ -10,6 +10,7 @@ |
| #include <crypto/internal/poly1305.h> |
| #include <crypto/internal/simd.h> |
| #include <linux/crypto.h> |
| +#include <linux/jump_label.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <asm/simd.h> |
| @@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, |
| asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, |
| unsigned int blocks, const u32 *u); |
| |
| -static bool poly1305_use_avx2 __ro_after_init; |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); |
| |
| static void poly1305_simd_mult(u32 *a, const u32 *b) |
| { |
| @@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| } |
| |
| if (IS_ENABLED(CONFIG_AS_AVX2) && |
| - poly1305_use_avx2 && |
| + static_branch_likely(&poly1305_use_avx2) && |
| srclen >= POLY1305_BLOCK_SIZE * 4) { |
| if (unlikely(dctx->rset < 4)) { |
| if (dctx->rset < 2) { |
| @@ -103,10 +105,15 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| return srclen; |
| } |
| |
| -static int poly1305_simd_update(struct shash_desc *desc, |
| - const u8 *src, unsigned int srclen) |
| +void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) |
| +{ |
| + poly1305_init_generic(desc, key); |
| +} |
| +EXPORT_SYMBOL(poly1305_init_arch); |
| + |
| +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + unsigned int srclen) |
| { |
| - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| unsigned int bytes; |
| |
| if (unlikely(dctx->buflen)) { |
| @@ -117,7 +124,8 @@ static int poly1305_simd_update(struct shash_desc *desc, |
| dctx->buflen += bytes; |
| |
| if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| - if (likely(crypto_simd_usable())) { |
| + if (static_branch_likely(&poly1305_use_simd) && |
| + likely(crypto_simd_usable())) { |
| kernel_fpu_begin(); |
| poly1305_simd_blocks(dctx, dctx->buf, |
| POLY1305_BLOCK_SIZE); |
| @@ -131,7 +139,8 @@ static int poly1305_simd_update(struct shash_desc *desc, |
| } |
| |
| if (likely(srclen >= POLY1305_BLOCK_SIZE)) { |
| - if (likely(crypto_simd_usable())) { |
| + if (static_branch_likely(&poly1305_use_simd) && |
| + likely(crypto_simd_usable())) { |
| kernel_fpu_begin(); |
| bytes = poly1305_simd_blocks(dctx, src, srclen); |
| kernel_fpu_end(); |
| @@ -147,6 +156,13 @@ static int poly1305_simd_update(struct shash_desc *desc, |
| memcpy(dctx->buf, src, srclen); |
| } |
| } |
| +EXPORT_SYMBOL(poly1305_update_arch); |
| + |
| +void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) |
| +{ |
| + poly1305_final_generic(desc, digest); |
| +} |
| +EXPORT_SYMBOL(poly1305_final_arch); |
| |
| static int crypto_poly1305_init(struct shash_desc *desc) |
| { |
| @@ -171,6 +187,15 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| return 0; |
| } |
| |
| +static int poly1305_simd_update(struct shash_desc *desc, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + poly1305_update_arch(dctx, src, srclen); |
| + return 0; |
| +} |
| + |
| static struct shash_alg alg = { |
| .digestsize = POLY1305_DIGEST_SIZE, |
| .init = crypto_poly1305_init, |
| @@ -189,15 +214,15 @@ static struct shash_alg alg = { |
| static int __init poly1305_simd_mod_init(void) |
| { |
| if (!boot_cpu_has(X86_FEATURE_XMM2)) |
| - return -ENODEV; |
| - |
| - poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) && |
| - boot_cpu_has(X86_FEATURE_AVX) && |
| - boot_cpu_has(X86_FEATURE_AVX2) && |
| - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
| - alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32); |
| - if (poly1305_use_avx2) |
| - alg.descsize += 10 * sizeof(u32); |
| + return 0; |
| + |
| + static_branch_enable(&poly1305_use_simd); |
| + |
| + if (IS_ENABLED(CONFIG_AS_AVX2) && |
| + boot_cpu_has(X86_FEATURE_AVX) && |
| + boot_cpu_has(X86_FEATURE_AVX2) && |
| + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) |
| + static_branch_enable(&poly1305_use_avx2); |
| |
| return crypto_register_shash(&alg); |
| } |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 6178aa627141..15cfb02c3e49 100644 |
| |
| |
| @@ -698,6 +698,7 @@ config CRYPTO_POLY1305_X86_64 |
| tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)" |
| depends on X86 && 64BIT |
| select CRYPTO_LIB_POLY1305_GENERIC |
| + select CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| help |
| Poly1305 authenticator algorithm, RFC7539. |
| |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index a731ea36bd5c..181754615f73 100644 |
| |
| |
| @@ -39,6 +39,7 @@ config CRYPTO_LIB_DES |
| |
| config CRYPTO_LIB_POLY1305_RSIZE |
| int |
| + default 4 if X86_64 |
| default 1 |
| |
| config CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| -- |
| 2.18.2 |
| |
| |
| From 3cd3c9e240bf2bf8b2ea277f8fa06e08017bfbce Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:24 +0100 |
| Subject: [PATCH 018/100] crypto: arm64/poly1305 - incorporate |
| OpenSSL/CRYPTOGAMS NEON implementation |
| |
| commit f569ca16475155013525686d0f73bc379c67e635 upstream. |
| |
| This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation |
| for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL |
| project. The file 'poly1305-armv8.pl' is taken straight from this upstream |
| GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f, |
| and already contains all the changes required to build it as part of a |
| Linux kernel module. |
| |
| [0] https://github.com/dot-asm/cryptogams |
| |
| Co-developed-by: Andy Polyakov <appro@cryptogams.org> |
| Signed-off-by: Andy Polyakov <appro@cryptogams.org> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm64/crypto/Kconfig | 6 + |
| arch/arm64/crypto/Makefile | 10 +- |
| arch/arm64/crypto/poly1305-armv8.pl | 913 ++++++++++++++++++++++ |
| arch/arm64/crypto/poly1305-core.S_shipped | 835 ++++++++++++++++++++ |
| arch/arm64/crypto/poly1305-glue.c | 237 ++++++ |
| lib/crypto/Kconfig | 1 + |
| 6 files changed, 2001 insertions(+), 1 deletion(-) |
| create mode 100644 arch/arm64/crypto/poly1305-armv8.pl |
| create mode 100644 arch/arm64/crypto/poly1305-core.S_shipped |
| create mode 100644 arch/arm64/crypto/poly1305-glue.c |
| |
| diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig |
| index 17bada4b9dd2..30d9b24ee86e 100644 |
| |
| |
| @@ -106,6 +106,12 @@ config CRYPTO_CHACHA20_NEON |
| select CRYPTO_LIB_CHACHA_GENERIC |
| select CRYPTO_ARCH_HAVE_LIB_CHACHA |
| |
| +config CRYPTO_POLY1305_NEON |
| + tristate "Poly1305 hash function using scalar or NEON instructions" |
| + depends on KERNEL_MODE_NEON |
| + select CRYPTO_HASH |
| + select CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| + |
| config CRYPTO_NHPOLY1305_NEON |
| tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" |
| depends on KERNEL_MODE_NEON |
| diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile |
| index 0435f2a0610e..d0901e610df3 100644 |
| |
| |
| @@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o |
| obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o |
| chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o |
| |
| +obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o |
| +poly1305-neon-y := poly1305-core.o poly1305-glue.o |
| +AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64 |
| + |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o |
| nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o |
| |
| @@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO |
| quiet_cmd_perlasm = PERLASM $@ |
| cmd_perlasm = $(PERL) $(<) void $(@) |
| |
| +$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl |
| + $(call cmd,perlasm) |
| + |
| $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl |
| $(call cmd,perlasm) |
| |
| $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl |
| $(call cmd,perlasm) |
| + |
| endif |
| |
| -clean-files += sha256-core.S sha512-core.S |
| +clean-files += poly1305-core.S sha256-core.S sha512-core.S |
| diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl |
| new file mode 100644 |
| index 000000000000..6e5576d19af8 |
| |
| |
| @@ -0,0 +1,913 @@ |
| +#!/usr/bin/env perl |
| +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause |
| +# |
| +# ==================================================================== |
| +# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL |
| +# project. |
| +# ==================================================================== |
| +# |
| +# This module implements Poly1305 hash for ARMv8. |
| +# |
| +# June 2015 |
| +# |
| +# Numbers are cycles per processed byte with poly1305_blocks alone. |
| +# |
| +# IALU/gcc-4.9 NEON |
| +# |
| +# Apple A7 1.86/+5% 0.72 |
| +# Cortex-A53 2.69/+58% 1.47 |
| +# Cortex-A57 2.70/+7% 1.14 |
| +# Denver 1.64/+50% 1.18(*) |
| +# X-Gene 2.13/+68% 2.27 |
| +# Mongoose 1.77/+75% 1.12 |
| +# Kryo 2.70/+55% 1.13 |
| +# ThunderX2 1.17/+95% 1.36 |
| +# |
| +# (*) estimate based on resources availability is less than 1.0, |
| +# i.e. measured result is worse than expected, presumably binary |
| +# translator is not almighty; |
| + |
| +$flavour=shift; |
| +$output=shift; |
| + |
| +if ($flavour && $flavour ne "void") { |
| + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or |
| + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or |
| + die "can't locate arm-xlate.pl"; |
| + |
| + open STDOUT,"| \"$^X\" $xlate $flavour $output"; |
| +} else { |
| + open STDOUT,">$output"; |
| +} |
| + |
| +my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3)); |
| +my ($mac,$nonce)=($inp,$len); |
| + |
| +my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14)); |
| + |
| +$code.=<<___; |
| +#ifndef __KERNEL__ |
| +# include "arm_arch.h" |
| +.extern OPENSSL_armcap_P |
| +#endif |
| + |
| +.text |
| + |
| +// forward "declarations" are required for Apple |
| +.globl poly1305_blocks |
| +.globl poly1305_emit |
| + |
| +.globl poly1305_init |
| +.type poly1305_init,%function |
| +.align 5 |
| +poly1305_init: |
| + cmp $inp,xzr |
| + stp xzr,xzr,[$ctx] // zero hash value |
| + stp xzr,xzr,[$ctx,#16] // [along with is_base2_26] |
| + |
| + csel x0,xzr,x0,eq |
| + b.eq .Lno_key |
| + |
| +#ifndef __KERNEL__ |
| + adrp x17,OPENSSL_armcap_P |
| + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] |
| +#endif |
| + |
| + ldp $r0,$r1,[$inp] // load key |
| + mov $s1,#0xfffffffc0fffffff |
| + movk $s1,#0x0fff,lsl#48 |
| +#ifdef __AARCH64EB__ |
| + rev $r0,$r0 // flip bytes |
| + rev $r1,$r1 |
| +#endif |
| + and $r0,$r0,$s1 // &=0ffffffc0fffffff |
| + and $s1,$s1,#-4 |
| + and $r1,$r1,$s1 // &=0ffffffc0ffffffc |
| + mov w#$s1,#-1 |
| + stp $r0,$r1,[$ctx,#32] // save key value |
| + str w#$s1,[$ctx,#48] // impossible key power value |
| + |
| +#ifndef __KERNEL__ |
| + tst w17,#ARMV7_NEON |
| + |
| + adr $d0,.Lpoly1305_blocks |
| + adr $r0,.Lpoly1305_blocks_neon |
| + adr $d1,.Lpoly1305_emit |
| + |
| + csel $d0,$d0,$r0,eq |
| + |
| +# ifdef __ILP32__ |
| + stp w#$d0,w#$d1,[$len] |
| +# else |
| + stp $d0,$d1,[$len] |
| +# endif |
| +#endif |
| + mov x0,#1 |
| +.Lno_key: |
| + ret |
| +.size poly1305_init,.-poly1305_init |
| + |
| +.type poly1305_blocks,%function |
| +.align 5 |
| +poly1305_blocks: |
| +.Lpoly1305_blocks: |
| + ands $len,$len,#-16 |
| + b.eq .Lno_data |
| + |
| + ldp $h0,$h1,[$ctx] // load hash value |
| + ldp $h2,x17,[$ctx,#16] // [along with is_base2_26] |
| + ldp $r0,$r1,[$ctx,#32] // load key value |
| + |
| +#ifdef __AARCH64EB__ |
| + lsr $d0,$h0,#32 |
| + mov w#$d1,w#$h0 |
| + lsr $d2,$h1,#32 |
| + mov w15,w#$h1 |
| + lsr x16,$h2,#32 |
| +#else |
| + mov w#$d0,w#$h0 |
| + lsr $d1,$h0,#32 |
| + mov w#$d2,w#$h1 |
| + lsr x15,$h1,#32 |
| + mov w16,w#$h2 |
| +#endif |
| + |
| + add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64 |
| + lsr $d1,$d2,#12 |
| + adds $d0,$d0,$d2,lsl#52 |
| + add $d1,$d1,x15,lsl#14 |
| + adc $d1,$d1,xzr |
| + lsr $d2,x16,#24 |
| + adds $d1,$d1,x16,lsl#40 |
| + adc $d2,$d2,xzr |
| + |
| + cmp x17,#0 // is_base2_26? |
| + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) |
| + csel $h0,$h0,$d0,eq // choose between radixes |
| + csel $h1,$h1,$d1,eq |
| + csel $h2,$h2,$d2,eq |
| + |
| +.Loop: |
| + ldp $t0,$t1,[$inp],#16 // load input |
| + sub $len,$len,#16 |
| +#ifdef __AARCH64EB__ |
| + rev $t0,$t0 |
| + rev $t1,$t1 |
| +#endif |
| + adds $h0,$h0,$t0 // accumulate input |
| + adcs $h1,$h1,$t1 |
| + |
| + mul $d0,$h0,$r0 // h0*r0 |
| + adc $h2,$h2,$padbit |
| + umulh $d1,$h0,$r0 |
| + |
| + mul $t0,$h1,$s1 // h1*5*r1 |
| + umulh $t1,$h1,$s1 |
| + |
| + adds $d0,$d0,$t0 |
| + mul $t0,$h0,$r1 // h0*r1 |
| + adc $d1,$d1,$t1 |
| + umulh $d2,$h0,$r1 |
| + |
| + adds $d1,$d1,$t0 |
| + mul $t0,$h1,$r0 // h1*r0 |
| + adc $d2,$d2,xzr |
| + umulh $t1,$h1,$r0 |
| + |
| + adds $d1,$d1,$t0 |
| + mul $t0,$h2,$s1 // h2*5*r1 |
| + adc $d2,$d2,$t1 |
| + mul $t1,$h2,$r0 // h2*r0 |
| + |
| + adds $d1,$d1,$t0 |
| + adc $d2,$d2,$t1 |
| + |
| + and $t0,$d2,#-4 // final reduction |
| + and $h2,$d2,#3 |
| + add $t0,$t0,$d2,lsr#2 |
| + adds $h0,$d0,$t0 |
| + adcs $h1,$d1,xzr |
| + adc $h2,$h2,xzr |
| + |
| + cbnz $len,.Loop |
| + |
| + stp $h0,$h1,[$ctx] // store hash value |
| + stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26] |
| + |
| +.Lno_data: |
| + ret |
| +.size poly1305_blocks,.-poly1305_blocks |
| + |
| +.type poly1305_emit,%function |
| +.align 5 |
| +poly1305_emit: |
| +.Lpoly1305_emit: |
| + ldp $h0,$h1,[$ctx] // load hash base 2^64 |
| + ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26] |
| + ldp $t0,$t1,[$nonce] // load nonce |
| + |
| +#ifdef __AARCH64EB__ |
| + lsr $d0,$h0,#32 |
| + mov w#$d1,w#$h0 |
| + lsr $d2,$h1,#32 |
| + mov w15,w#$h1 |
| + lsr x16,$h2,#32 |
| +#else |
| + mov w#$d0,w#$h0 |
| + lsr $d1,$h0,#32 |
| + mov w#$d2,w#$h1 |
| + lsr x15,$h1,#32 |
| + mov w16,w#$h2 |
| +#endif |
| + |
| + add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64 |
| + lsr $d1,$d2,#12 |
| + adds $d0,$d0,$d2,lsl#52 |
| + add $d1,$d1,x15,lsl#14 |
| + adc $d1,$d1,xzr |
| + lsr $d2,x16,#24 |
| + adds $d1,$d1,x16,lsl#40 |
| + adc $d2,$d2,xzr |
| + |
| + cmp $r0,#0 // is_base2_26? |
| + csel $h0,$h0,$d0,eq // choose between radixes |
| + csel $h1,$h1,$d1,eq |
| + csel $h2,$h2,$d2,eq |
| + |
| + adds $d0,$h0,#5 // compare to modulus |
| + adcs $d1,$h1,xzr |
| + adc $d2,$h2,xzr |
| + |
| + tst $d2,#-4 // see if it's carried/borrowed |
| + |
| + csel $h0,$h0,$d0,eq |
| + csel $h1,$h1,$d1,eq |
| + |
| +#ifdef __AARCH64EB__ |
| + ror $t0,$t0,#32 // flip nonce words |
| + ror $t1,$t1,#32 |
| +#endif |
| + adds $h0,$h0,$t0 // accumulate nonce |
| + adc $h1,$h1,$t1 |
| +#ifdef __AARCH64EB__ |
| + rev $h0,$h0 // flip output bytes |
| + rev $h1,$h1 |
| +#endif |
| + stp $h0,$h1,[$mac] // write result |
| + |
| + ret |
| +.size poly1305_emit,.-poly1305_emit |
| +___ |
| +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8)); |
| +my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13)); |
| +my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18)); |
| +my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23)); |
| +my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28)); |
| +my ($T0,$T1,$MASK) = map("v$_",(29..31)); |
| + |
| +my ($in2,$zeros)=("x16","x17"); |
| +my $is_base2_26 = $zeros; # borrow |
| + |
| +$code.=<<___; |
| +.type poly1305_mult,%function |
| +.align 5 |
| +poly1305_mult: |
| + mul $d0,$h0,$r0 // h0*r0 |
| + umulh $d1,$h0,$r0 |
| + |
| + mul $t0,$h1,$s1 // h1*5*r1 |
| + umulh $t1,$h1,$s1 |
| + |
| + adds $d0,$d0,$t0 |
| + mul $t0,$h0,$r1 // h0*r1 |
| + adc $d1,$d1,$t1 |
| + umulh $d2,$h0,$r1 |
| + |
| + adds $d1,$d1,$t0 |
| + mul $t0,$h1,$r0 // h1*r0 |
| + adc $d2,$d2,xzr |
| + umulh $t1,$h1,$r0 |
| + |
| + adds $d1,$d1,$t0 |
| + mul $t0,$h2,$s1 // h2*5*r1 |
| + adc $d2,$d2,$t1 |
| + mul $t1,$h2,$r0 // h2*r0 |
| + |
| + adds $d1,$d1,$t0 |
| + adc $d2,$d2,$t1 |
| + |
| + and $t0,$d2,#-4 // final reduction |
| + and $h2,$d2,#3 |
| + add $t0,$t0,$d2,lsr#2 |
| + adds $h0,$d0,$t0 |
| + adcs $h1,$d1,xzr |
| + adc $h2,$h2,xzr |
| + |
| + ret |
| +.size poly1305_mult,.-poly1305_mult |
| + |
| +.type poly1305_splat,%function |
| +.align 4 |
| +poly1305_splat: |
| + and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26 |
| + ubfx x13,$h0,#26,#26 |
| + extr x14,$h1,$h0,#52 |
| + and x14,x14,#0x03ffffff |
| + ubfx x15,$h1,#14,#26 |
| + extr x16,$h2,$h1,#40 |
| + |
| + str w12,[$ctx,#16*0] // r0 |
| + add w12,w13,w13,lsl#2 // r1*5 |
| + str w13,[$ctx,#16*1] // r1 |
| + add w13,w14,w14,lsl#2 // r2*5 |
| + str w12,[$ctx,#16*2] // s1 |
| + str w14,[$ctx,#16*3] // r2 |
| + add w14,w15,w15,lsl#2 // r3*5 |
| + str w13,[$ctx,#16*4] // s2 |
| + str w15,[$ctx,#16*5] // r3 |
| + add w15,w16,w16,lsl#2 // r4*5 |
| + str w14,[$ctx,#16*6] // s3 |
| + str w16,[$ctx,#16*7] // r4 |
| + str w15,[$ctx,#16*8] // s4 |
| + |
| + ret |
| +.size poly1305_splat,.-poly1305_splat |
| + |
| +#ifdef __KERNEL__ |
| +.globl poly1305_blocks_neon |
| +#endif |
| +.type poly1305_blocks_neon,%function |
| +.align 5 |
| +poly1305_blocks_neon: |
| +.Lpoly1305_blocks_neon: |
| + ldr $is_base2_26,[$ctx,#24] |
| + cmp $len,#128 |
| + b.lo .Lpoly1305_blocks |
| + |
| + .inst 0xd503233f // paciasp |
| + stp x29,x30,[sp,#-80]! |
| + add x29,sp,#0 |
| + |
| + stp d8,d9,[sp,#16] // meet ABI requirements |
| + stp d10,d11,[sp,#32] |
| + stp d12,d13,[sp,#48] |
| + stp d14,d15,[sp,#64] |
| + |
| + cbz $is_base2_26,.Lbase2_64_neon |
| + |
| + ldp w10,w11,[$ctx] // load hash value base 2^26 |
| + ldp w12,w13,[$ctx,#8] |
| + ldr w14,[$ctx,#16] |
| + |
| + tst $len,#31 |
| + b.eq .Leven_neon |
| + |
| + ldp $r0,$r1,[$ctx,#32] // load key value |
| + |
| + add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64 |
| + lsr $h1,x12,#12 |
| + adds $h0,$h0,x12,lsl#52 |
| + add $h1,$h1,x13,lsl#14 |
| + adc $h1,$h1,xzr |
| + lsr $h2,x14,#24 |
| + adds $h1,$h1,x14,lsl#40 |
| + adc $d2,$h2,xzr // can be partially reduced... |
| + |
| + ldp $d0,$d1,[$inp],#16 // load input |
| + sub $len,$len,#16 |
| + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) |
| + |
| +#ifdef __AARCH64EB__ |
| + rev $d0,$d0 |
| + rev $d1,$d1 |
| +#endif |
| + adds $h0,$h0,$d0 // accumulate input |
| + adcs $h1,$h1,$d1 |
| + adc $h2,$h2,$padbit |
| + |
| + bl poly1305_mult |
| + |
| + and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 |
| + ubfx x11,$h0,#26,#26 |
| + extr x12,$h1,$h0,#52 |
| + and x12,x12,#0x03ffffff |
| + ubfx x13,$h1,#14,#26 |
| + extr x14,$h2,$h1,#40 |
| + |
| + b .Leven_neon |
| + |
| +.align 4 |
| +.Lbase2_64_neon: |
| + ldp $r0,$r1,[$ctx,#32] // load key value |
| + |
| + ldp $h0,$h1,[$ctx] // load hash value base 2^64 |
| + ldr $h2,[$ctx,#16] |
| + |
| + tst $len,#31 |
| + b.eq .Linit_neon |
| + |
| + ldp $d0,$d1,[$inp],#16 // load input |
| + sub $len,$len,#16 |
| + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) |
| +#ifdef __AARCH64EB__ |
| + rev $d0,$d0 |
| + rev $d1,$d1 |
| +#endif |
| + adds $h0,$h0,$d0 // accumulate input |
| + adcs $h1,$h1,$d1 |
| + adc $h2,$h2,$padbit |
| + |
| + bl poly1305_mult |
| + |
| +.Linit_neon: |
| + ldr w17,[$ctx,#48] // first table element |
| + and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 |
| + ubfx x11,$h0,#26,#26 |
| + extr x12,$h1,$h0,#52 |
| + and x12,x12,#0x03ffffff |
| + ubfx x13,$h1,#14,#26 |
| + extr x14,$h2,$h1,#40 |
| + |
| + cmp w17,#-1 // is value impossible? |
| + b.ne .Leven_neon |
| + |
| + fmov ${H0},x10 |
| + fmov ${H1},x11 |
| + fmov ${H2},x12 |
| + fmov ${H3},x13 |
| + fmov ${H4},x14 |
| + |
| + ////////////////////////////////// initialize r^n table |
| + mov $h0,$r0 // r^1 |
| + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) |
| + mov $h1,$r1 |
| + mov $h2,xzr |
| + add $ctx,$ctx,#48+12 |
| + bl poly1305_splat |
| + |
| + bl poly1305_mult // r^2 |
| + sub $ctx,$ctx,#4 |
| + bl poly1305_splat |
| + |
| + bl poly1305_mult // r^3 |
| + sub $ctx,$ctx,#4 |
| + bl poly1305_splat |
| + |
| + bl poly1305_mult // r^4 |
| + sub $ctx,$ctx,#4 |
| + bl poly1305_splat |
| + sub $ctx,$ctx,#48 // restore original $ctx |
| + b .Ldo_neon |
| + |
| +.align 4 |
| +.Leven_neon: |
| + fmov ${H0},x10 |
| + fmov ${H1},x11 |
| + fmov ${H2},x12 |
| + fmov ${H3},x13 |
| + fmov ${H4},x14 |
| + |
| +.Ldo_neon: |
| + ldp x8,x12,[$inp,#32] // inp[2:3] |
| + subs $len,$len,#64 |
| + ldp x9,x13,[$inp,#48] |
| + add $in2,$inp,#96 |
| + adr $zeros,.Lzeros |
| + |
| + lsl $padbit,$padbit,#24 |
| + add x15,$ctx,#48 |
| + |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + and x5,x9,#0x03ffffff |
| + ubfx x6,x8,#26,#26 |
| + ubfx x7,x9,#26,#26 |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + extr x8,x12,x8,#52 |
| + extr x9,x13,x9,#52 |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + fmov $IN23_0,x4 |
| + and x8,x8,#0x03ffffff |
| + and x9,x9,#0x03ffffff |
| + ubfx x10,x12,#14,#26 |
| + ubfx x11,x13,#14,#26 |
| + add x12,$padbit,x12,lsr#40 |
| + add x13,$padbit,x13,lsr#40 |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + fmov $IN23_1,x6 |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + fmov $IN23_2,x8 |
| + fmov $IN23_3,x10 |
| + fmov $IN23_4,x12 |
| + |
| + ldp x8,x12,[$inp],#16 // inp[0:1] |
| + ldp x9,x13,[$inp],#48 |
| + |
| + ld1 {$R0,$R1,$S1,$R2},[x15],#64 |
| + ld1 {$S2,$R3,$S3,$R4},[x15],#64 |
| + ld1 {$S4},[x15] |
| + |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + and x5,x9,#0x03ffffff |
| + ubfx x6,x8,#26,#26 |
| + ubfx x7,x9,#26,#26 |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + extr x8,x12,x8,#52 |
| + extr x9,x13,x9,#52 |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + fmov $IN01_0,x4 |
| + and x8,x8,#0x03ffffff |
| + and x9,x9,#0x03ffffff |
| + ubfx x10,x12,#14,#26 |
| + ubfx x11,x13,#14,#26 |
| + add x12,$padbit,x12,lsr#40 |
| + add x13,$padbit,x13,lsr#40 |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + fmov $IN01_1,x6 |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + movi $MASK.2d,#-1 |
| + fmov $IN01_2,x8 |
| + fmov $IN01_3,x10 |
| + fmov $IN01_4,x12 |
| + ushr $MASK.2d,$MASK.2d,#38 |
| + |
| + b.ls .Lskip_loop |
| + |
| +.align 4 |
| +.Loop_neon: |
| + //////////////////////////////////////////////////////////////// |
| + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 |
| + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r |
| + // \___________________/ |
| + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 |
| + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r |
| + // \___________________/ \____________________/ |
| + // |
| + // Note that we start with inp[2:3]*r^2. This is because it |
| + // doesn't depend on reduction in previous iteration. |
| + //////////////////////////////////////////////////////////////// |
| + // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 |
| + // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 |
| + // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 |
| + // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 |
| + // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 |
| + |
| + subs $len,$len,#64 |
| + umull $ACC4,$IN23_0,${R4}[2] |
| + csel $in2,$zeros,$in2,lo |
| + umull $ACC3,$IN23_0,${R3}[2] |
| + umull $ACC2,$IN23_0,${R2}[2] |
| + ldp x8,x12,[$in2],#16 // inp[2:3] (or zero) |
| + umull $ACC1,$IN23_0,${R1}[2] |
| + ldp x9,x13,[$in2],#48 |
| + umull $ACC0,$IN23_0,${R0}[2] |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + |
| + umlal $ACC4,$IN23_1,${R3}[2] |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + umlal $ACC3,$IN23_1,${R2}[2] |
| + and x5,x9,#0x03ffffff |
| + umlal $ACC2,$IN23_1,${R1}[2] |
| + ubfx x6,x8,#26,#26 |
| + umlal $ACC1,$IN23_1,${R0}[2] |
| + ubfx x7,x9,#26,#26 |
| + umlal $ACC0,$IN23_1,${S4}[2] |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + |
| + umlal $ACC4,$IN23_2,${R2}[2] |
| + extr x8,x12,x8,#52 |
| + umlal $ACC3,$IN23_2,${R1}[2] |
| + extr x9,x13,x9,#52 |
| + umlal $ACC2,$IN23_2,${R0}[2] |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + umlal $ACC1,$IN23_2,${S4}[2] |
| + fmov $IN23_0,x4 |
| + umlal $ACC0,$IN23_2,${S3}[2] |
| + and x8,x8,#0x03ffffff |
| + |
| + umlal $ACC4,$IN23_3,${R1}[2] |
| + and x9,x9,#0x03ffffff |
| + umlal $ACC3,$IN23_3,${R0}[2] |
| + ubfx x10,x12,#14,#26 |
| + umlal $ACC2,$IN23_3,${S4}[2] |
| + ubfx x11,x13,#14,#26 |
| + umlal $ACC1,$IN23_3,${S3}[2] |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + umlal $ACC0,$IN23_3,${S2}[2] |
| + fmov $IN23_1,x6 |
| + |
| + add $IN01_2,$IN01_2,$H2 |
| + add x12,$padbit,x12,lsr#40 |
| + umlal $ACC4,$IN23_4,${R0}[2] |
| + add x13,$padbit,x13,lsr#40 |
| + umlal $ACC3,$IN23_4,${S4}[2] |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + umlal $ACC2,$IN23_4,${S3}[2] |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + umlal $ACC1,$IN23_4,${S2}[2] |
| + fmov $IN23_2,x8 |
| + umlal $ACC0,$IN23_4,${S1}[2] |
| + fmov $IN23_3,x10 |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // (hash+inp[0:1])*r^4 and accumulate |
| + |
| + add $IN01_0,$IN01_0,$H0 |
| + fmov $IN23_4,x12 |
| + umlal $ACC3,$IN01_2,${R1}[0] |
| + ldp x8,x12,[$inp],#16 // inp[0:1] |
| + umlal $ACC0,$IN01_2,${S3}[0] |
| + ldp x9,x13,[$inp],#48 |
| + umlal $ACC4,$IN01_2,${R2}[0] |
| + umlal $ACC1,$IN01_2,${S4}[0] |
| + umlal $ACC2,$IN01_2,${R0}[0] |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + |
| + add $IN01_1,$IN01_1,$H1 |
| + umlal $ACC3,$IN01_0,${R3}[0] |
| + umlal $ACC4,$IN01_0,${R4}[0] |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + umlal $ACC2,$IN01_0,${R2}[0] |
| + and x5,x9,#0x03ffffff |
| + umlal $ACC0,$IN01_0,${R0}[0] |
| + ubfx x6,x8,#26,#26 |
| + umlal $ACC1,$IN01_0,${R1}[0] |
| + ubfx x7,x9,#26,#26 |
| + |
| + add $IN01_3,$IN01_3,$H3 |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + umlal $ACC3,$IN01_1,${R2}[0] |
| + extr x8,x12,x8,#52 |
| + umlal $ACC4,$IN01_1,${R3}[0] |
| + extr x9,x13,x9,#52 |
| + umlal $ACC0,$IN01_1,${S4}[0] |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + umlal $ACC2,$IN01_1,${R1}[0] |
| + fmov $IN01_0,x4 |
| + umlal $ACC1,$IN01_1,${R0}[0] |
| + and x8,x8,#0x03ffffff |
| + |
| + add $IN01_4,$IN01_4,$H4 |
| + and x9,x9,#0x03ffffff |
| + umlal $ACC3,$IN01_3,${R0}[0] |
| + ubfx x10,x12,#14,#26 |
| + umlal $ACC0,$IN01_3,${S2}[0] |
| + ubfx x11,x13,#14,#26 |
| + umlal $ACC4,$IN01_3,${R1}[0] |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + umlal $ACC1,$IN01_3,${S3}[0] |
| + fmov $IN01_1,x6 |
| + umlal $ACC2,$IN01_3,${S4}[0] |
| + add x12,$padbit,x12,lsr#40 |
| + |
| + umlal $ACC3,$IN01_4,${S4}[0] |
| + add x13,$padbit,x13,lsr#40 |
| + umlal $ACC0,$IN01_4,${S1}[0] |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + umlal $ACC4,$IN01_4,${R0}[0] |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + umlal $ACC1,$IN01_4,${S2}[0] |
| + fmov $IN01_2,x8 |
| + umlal $ACC2,$IN01_4,${S3}[0] |
| + fmov $IN01_3,x10 |
| + fmov $IN01_4,x12 |
| + |
| + ///////////////////////////////////////////////////////////////// |
| + // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein |
| + // and P. Schwabe |
| + // |
| + // [see discussion in poly1305-armv4 module] |
| + |
| + ushr $T0.2d,$ACC3,#26 |
| + xtn $H3,$ACC3 |
| + ushr $T1.2d,$ACC0,#26 |
| + and $ACC0,$ACC0,$MASK.2d |
| + add $ACC4,$ACC4,$T0.2d // h3 -> h4 |
| + bic $H3,#0xfc,lsl#24 // &=0x03ffffff |
| + add $ACC1,$ACC1,$T1.2d // h0 -> h1 |
| + |
| + ushr $T0.2d,$ACC4,#26 |
| + xtn $H4,$ACC4 |
| + ushr $T1.2d,$ACC1,#26 |
| + xtn $H1,$ACC1 |
| + bic $H4,#0xfc,lsl#24 |
| + add $ACC2,$ACC2,$T1.2d // h1 -> h2 |
| + |
| + add $ACC0,$ACC0,$T0.2d |
| + shl $T0.2d,$T0.2d,#2 |
| + shrn $T1.2s,$ACC2,#26 |
| + xtn $H2,$ACC2 |
| + add $ACC0,$ACC0,$T0.2d // h4 -> h0 |
| + bic $H1,#0xfc,lsl#24 |
| + add $H3,$H3,$T1.2s // h2 -> h3 |
| + bic $H2,#0xfc,lsl#24 |
| + |
| + shrn $T0.2s,$ACC0,#26 |
| + xtn $H0,$ACC0 |
| + ushr $T1.2s,$H3,#26 |
| + bic $H3,#0xfc,lsl#24 |
| + bic $H0,#0xfc,lsl#24 |
| + add $H1,$H1,$T0.2s // h0 -> h1 |
| + add $H4,$H4,$T1.2s // h3 -> h4 |
| + |
| + b.hi .Loop_neon |
| + |
| +.Lskip_loop: |
| + dup $IN23_2,${IN23_2}[0] |
| + add $IN01_2,$IN01_2,$H2 |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 |
| + |
| + adds $len,$len,#32 |
| + b.ne .Long_tail |
| + |
| + dup $IN23_2,${IN01_2}[0] |
| + add $IN23_0,$IN01_0,$H0 |
| + add $IN23_3,$IN01_3,$H3 |
| + add $IN23_1,$IN01_1,$H1 |
| + add $IN23_4,$IN01_4,$H4 |
| + |
| +.Long_tail: |
| + dup $IN23_0,${IN23_0}[0] |
| + umull2 $ACC0,$IN23_2,${S3} |
| + umull2 $ACC3,$IN23_2,${R1} |
| + umull2 $ACC4,$IN23_2,${R2} |
| + umull2 $ACC2,$IN23_2,${R0} |
| + umull2 $ACC1,$IN23_2,${S4} |
| + |
| + dup $IN23_1,${IN23_1}[0] |
| + umlal2 $ACC0,$IN23_0,${R0} |
| + umlal2 $ACC2,$IN23_0,${R2} |
| + umlal2 $ACC3,$IN23_0,${R3} |
| + umlal2 $ACC4,$IN23_0,${R4} |
| + umlal2 $ACC1,$IN23_0,${R1} |
| + |
| + dup $IN23_3,${IN23_3}[0] |
| + umlal2 $ACC0,$IN23_1,${S4} |
| + umlal2 $ACC3,$IN23_1,${R2} |
| + umlal2 $ACC2,$IN23_1,${R1} |
| + umlal2 $ACC4,$IN23_1,${R3} |
| + umlal2 $ACC1,$IN23_1,${R0} |
| + |
| + dup $IN23_4,${IN23_4}[0] |
| + umlal2 $ACC3,$IN23_3,${R0} |
| + umlal2 $ACC4,$IN23_3,${R1} |
| + umlal2 $ACC0,$IN23_3,${S2} |
| + umlal2 $ACC1,$IN23_3,${S3} |
| + umlal2 $ACC2,$IN23_3,${S4} |
| + |
| + umlal2 $ACC3,$IN23_4,${S4} |
| + umlal2 $ACC0,$IN23_4,${S1} |
| + umlal2 $ACC4,$IN23_4,${R0} |
| + umlal2 $ACC1,$IN23_4,${S2} |
| + umlal2 $ACC2,$IN23_4,${S3} |
| + |
| + b.eq .Lshort_tail |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // (hash+inp[0:1])*r^4:r^3 and accumulate |
| + |
| + add $IN01_0,$IN01_0,$H0 |
| + umlal $ACC3,$IN01_2,${R1} |
| + umlal $ACC0,$IN01_2,${S3} |
| + umlal $ACC4,$IN01_2,${R2} |
| + umlal $ACC1,$IN01_2,${S4} |
| + umlal $ACC2,$IN01_2,${R0} |
| + |
| + add $IN01_1,$IN01_1,$H1 |
| + umlal $ACC3,$IN01_0,${R3} |
| + umlal $ACC0,$IN01_0,${R0} |
| + umlal $ACC4,$IN01_0,${R4} |
| + umlal $ACC1,$IN01_0,${R1} |
| + umlal $ACC2,$IN01_0,${R2} |
| + |
| + add $IN01_3,$IN01_3,$H3 |
| + umlal $ACC3,$IN01_1,${R2} |
| + umlal $ACC0,$IN01_1,${S4} |
| + umlal $ACC4,$IN01_1,${R3} |
| + umlal $ACC1,$IN01_1,${R0} |
| + umlal $ACC2,$IN01_1,${R1} |
| + |
| + add $IN01_4,$IN01_4,$H4 |
| + umlal $ACC3,$IN01_3,${R0} |
| + umlal $ACC0,$IN01_3,${S2} |
| + umlal $ACC4,$IN01_3,${R1} |
| + umlal $ACC1,$IN01_3,${S3} |
| + umlal $ACC2,$IN01_3,${S4} |
| + |
| + umlal $ACC3,$IN01_4,${S4} |
| + umlal $ACC0,$IN01_4,${S1} |
| + umlal $ACC4,$IN01_4,${R0} |
| + umlal $ACC1,$IN01_4,${S2} |
| + umlal $ACC2,$IN01_4,${S3} |
| + |
| +.Lshort_tail: |
| + //////////////////////////////////////////////////////////////// |
| + // horizontal add |
| + |
| + addp $ACC3,$ACC3,$ACC3 |
| + ldp d8,d9,[sp,#16] // meet ABI requirements |
| + addp $ACC0,$ACC0,$ACC0 |
| + ldp d10,d11,[sp,#32] |
| + addp $ACC4,$ACC4,$ACC4 |
| + ldp d12,d13,[sp,#48] |
| + addp $ACC1,$ACC1,$ACC1 |
| + ldp d14,d15,[sp,#64] |
| + addp $ACC2,$ACC2,$ACC2 |
| + ldr x30,[sp,#8] |
| + .inst 0xd50323bf // autiasp |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // lazy reduction, but without narrowing |
| + |
| + ushr $T0.2d,$ACC3,#26 |
| + and $ACC3,$ACC3,$MASK.2d |
| + ushr $T1.2d,$ACC0,#26 |
| + and $ACC0,$ACC0,$MASK.2d |
| + |
| + add $ACC4,$ACC4,$T0.2d // h3 -> h4 |
| + add $ACC1,$ACC1,$T1.2d // h0 -> h1 |
| + |
| + ushr $T0.2d,$ACC4,#26 |
| + and $ACC4,$ACC4,$MASK.2d |
| + ushr $T1.2d,$ACC1,#26 |
| + and $ACC1,$ACC1,$MASK.2d |
| + add $ACC2,$ACC2,$T1.2d // h1 -> h2 |
| + |
| + add $ACC0,$ACC0,$T0.2d |
| + shl $T0.2d,$T0.2d,#2 |
| + ushr $T1.2d,$ACC2,#26 |
| + and $ACC2,$ACC2,$MASK.2d |
| + add $ACC0,$ACC0,$T0.2d // h4 -> h0 |
| + add $ACC3,$ACC3,$T1.2d // h2 -> h3 |
| + |
| + ushr $T0.2d,$ACC0,#26 |
| + and $ACC0,$ACC0,$MASK.2d |
| + ushr $T1.2d,$ACC3,#26 |
| + and $ACC3,$ACC3,$MASK.2d |
| + add $ACC1,$ACC1,$T0.2d // h0 -> h1 |
| + add $ACC4,$ACC4,$T1.2d // h3 -> h4 |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // write the result, can be partially reduced |
| + |
| + st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16 |
| + mov x4,#1 |
| + st1 {$ACC4}[0],[$ctx] |
| + str x4,[$ctx,#8] // set is_base2_26 |
| + |
| + ldr x29,[sp],#80 |
| + ret |
| +.size poly1305_blocks_neon,.-poly1305_blocks_neon |
| + |
| +.align 5 |
| +.Lzeros: |
| +.long 0,0,0,0,0,0,0,0 |
| +.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm" |
| +.align 2 |
| +#if !defined(__KERNEL__) && !defined(_WIN64) |
| +.comm OPENSSL_armcap_P,4,4 |
| +.hidden OPENSSL_armcap_P |
| +#endif |
| +___ |
| + |
| +foreach (split("\n",$code)) { |
| + s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or |
| + s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or |
| + (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or |
| + (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or |
| + (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or |
| + (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or |
| + (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1)); |
| + |
| + s/\.[124]([sd])\[/.$1\[/; |
| + s/w#x([0-9]+)/w$1/g; |
| + |
| + print $_,"\n"; |
| +} |
| +close STDOUT; |
| diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped |
| new file mode 100644 |
| index 000000000000..8d1c4e420ccd |
| |
| |
| @@ -0,0 +1,835 @@ |
| +#ifndef __KERNEL__ |
| +# include "arm_arch.h" |
| +.extern OPENSSL_armcap_P |
| +#endif |
| + |
| +.text |
| + |
| +// forward "declarations" are required for Apple |
| +.globl poly1305_blocks |
| +.globl poly1305_emit |
| + |
| +.globl poly1305_init |
| +.type poly1305_init,%function |
| +.align 5 |
| +poly1305_init: |
| + cmp x1,xzr |
| + stp xzr,xzr,[x0] // zero hash value |
| + stp xzr,xzr,[x0,#16] // [along with is_base2_26] |
| + |
| + csel x0,xzr,x0,eq |
| + b.eq .Lno_key |
| + |
| +#ifndef __KERNEL__ |
| + adrp x17,OPENSSL_armcap_P |
| + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] |
| +#endif |
| + |
| + ldp x7,x8,[x1] // load key |
| + mov x9,#0xfffffffc0fffffff |
| + movk x9,#0x0fff,lsl#48 |
| +#ifdef __AARCH64EB__ |
| + rev x7,x7 // flip bytes |
| + rev x8,x8 |
| +#endif |
| + and x7,x7,x9 // &=0ffffffc0fffffff |
| + and x9,x9,#-4 |
| + and x8,x8,x9 // &=0ffffffc0ffffffc |
| + mov w9,#-1 |
| + stp x7,x8,[x0,#32] // save key value |
| + str w9,[x0,#48] // impossible key power value |
| + |
| +#ifndef __KERNEL__ |
| + tst w17,#ARMV7_NEON |
| + |
| + adr x12,.Lpoly1305_blocks |
| + adr x7,.Lpoly1305_blocks_neon |
| + adr x13,.Lpoly1305_emit |
| + |
| + csel x12,x12,x7,eq |
| + |
| +# ifdef __ILP32__ |
| + stp w12,w13,[x2] |
| +# else |
| + stp x12,x13,[x2] |
| +# endif |
| +#endif |
| + mov x0,#1 |
| +.Lno_key: |
| + ret |
| +.size poly1305_init,.-poly1305_init |
| + |
| +.type poly1305_blocks,%function |
| +.align 5 |
| +poly1305_blocks: |
| +.Lpoly1305_blocks: |
| + ands x2,x2,#-16 |
| + b.eq .Lno_data |
| + |
| + ldp x4,x5,[x0] // load hash value |
| + ldp x6,x17,[x0,#16] // [along with is_base2_26] |
| + ldp x7,x8,[x0,#32] // load key value |
| + |
| +#ifdef __AARCH64EB__ |
| + lsr x12,x4,#32 |
| + mov w13,w4 |
| + lsr x14,x5,#32 |
| + mov w15,w5 |
| + lsr x16,x6,#32 |
| +#else |
| + mov w12,w4 |
| + lsr x13,x4,#32 |
| + mov w14,w5 |
| + lsr x15,x5,#32 |
| + mov w16,w6 |
| +#endif |
| + |
| + add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 |
| + lsr x13,x14,#12 |
| + adds x12,x12,x14,lsl#52 |
| + add x13,x13,x15,lsl#14 |
| + adc x13,x13,xzr |
| + lsr x14,x16,#24 |
| + adds x13,x13,x16,lsl#40 |
| + adc x14,x14,xzr |
| + |
| + cmp x17,#0 // is_base2_26? |
| + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) |
| + csel x4,x4,x12,eq // choose between radixes |
| + csel x5,x5,x13,eq |
| + csel x6,x6,x14,eq |
| + |
| +.Loop: |
| + ldp x10,x11,[x1],#16 // load input |
| + sub x2,x2,#16 |
| +#ifdef __AARCH64EB__ |
| + rev x10,x10 |
| + rev x11,x11 |
| +#endif |
| + adds x4,x4,x10 // accumulate input |
| + adcs x5,x5,x11 |
| + |
| + mul x12,x4,x7 // h0*r0 |
| + adc x6,x6,x3 |
| + umulh x13,x4,x7 |
| + |
| + mul x10,x5,x9 // h1*5*r1 |
| + umulh x11,x5,x9 |
| + |
| + adds x12,x12,x10 |
| + mul x10,x4,x8 // h0*r1 |
| + adc x13,x13,x11 |
| + umulh x14,x4,x8 |
| + |
| + adds x13,x13,x10 |
| + mul x10,x5,x7 // h1*r0 |
| + adc x14,x14,xzr |
| + umulh x11,x5,x7 |
| + |
| + adds x13,x13,x10 |
| + mul x10,x6,x9 // h2*5*r1 |
| + adc x14,x14,x11 |
| + mul x11,x6,x7 // h2*r0 |
| + |
| + adds x13,x13,x10 |
| + adc x14,x14,x11 |
| + |
| + and x10,x14,#-4 // final reduction |
| + and x6,x14,#3 |
| + add x10,x10,x14,lsr#2 |
| + adds x4,x12,x10 |
| + adcs x5,x13,xzr |
| + adc x6,x6,xzr |
| + |
| + cbnz x2,.Loop |
| + |
| + stp x4,x5,[x0] // store hash value |
| + stp x6,xzr,[x0,#16] // [and clear is_base2_26] |
| + |
| +.Lno_data: |
| + ret |
| +.size poly1305_blocks,.-poly1305_blocks |
| + |
| +.type poly1305_emit,%function |
| +.align 5 |
| +poly1305_emit: |
| +.Lpoly1305_emit: |
| + ldp x4,x5,[x0] // load hash base 2^64 |
| + ldp x6,x7,[x0,#16] // [along with is_base2_26] |
| + ldp x10,x11,[x2] // load nonce |
| + |
| +#ifdef __AARCH64EB__ |
| + lsr x12,x4,#32 |
| + mov w13,w4 |
| + lsr x14,x5,#32 |
| + mov w15,w5 |
| + lsr x16,x6,#32 |
| +#else |
| + mov w12,w4 |
| + lsr x13,x4,#32 |
| + mov w14,w5 |
| + lsr x15,x5,#32 |
| + mov w16,w6 |
| +#endif |
| + |
| + add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 |
| + lsr x13,x14,#12 |
| + adds x12,x12,x14,lsl#52 |
| + add x13,x13,x15,lsl#14 |
| + adc x13,x13,xzr |
| + lsr x14,x16,#24 |
| + adds x13,x13,x16,lsl#40 |
| + adc x14,x14,xzr |
| + |
| + cmp x7,#0 // is_base2_26? |
| + csel x4,x4,x12,eq // choose between radixes |
| + csel x5,x5,x13,eq |
| + csel x6,x6,x14,eq |
| + |
| + adds x12,x4,#5 // compare to modulus |
| + adcs x13,x5,xzr |
| + adc x14,x6,xzr |
| + |
| + tst x14,#-4 // see if it's carried/borrowed |
| + |
| + csel x4,x4,x12,eq |
| + csel x5,x5,x13,eq |
| + |
| +#ifdef __AARCH64EB__ |
| + ror x10,x10,#32 // flip nonce words |
| + ror x11,x11,#32 |
| +#endif |
| + adds x4,x4,x10 // accumulate nonce |
| + adc x5,x5,x11 |
| +#ifdef __AARCH64EB__ |
| + rev x4,x4 // flip output bytes |
| + rev x5,x5 |
| +#endif |
| + stp x4,x5,[x1] // write result |
| + |
| + ret |
| +.size poly1305_emit,.-poly1305_emit |
| +.type poly1305_mult,%function |
| +.align 5 |
| +poly1305_mult: |
| + mul x12,x4,x7 // h0*r0 |
| + umulh x13,x4,x7 |
| + |
| + mul x10,x5,x9 // h1*5*r1 |
| + umulh x11,x5,x9 |
| + |
| + adds x12,x12,x10 |
| + mul x10,x4,x8 // h0*r1 |
| + adc x13,x13,x11 |
| + umulh x14,x4,x8 |
| + |
| + adds x13,x13,x10 |
| + mul x10,x5,x7 // h1*r0 |
| + adc x14,x14,xzr |
| + umulh x11,x5,x7 |
| + |
| + adds x13,x13,x10 |
| + mul x10,x6,x9 // h2*5*r1 |
| + adc x14,x14,x11 |
| + mul x11,x6,x7 // h2*r0 |
| + |
| + adds x13,x13,x10 |
| + adc x14,x14,x11 |
| + |
| + and x10,x14,#-4 // final reduction |
| + and x6,x14,#3 |
| + add x10,x10,x14,lsr#2 |
| + adds x4,x12,x10 |
| + adcs x5,x13,xzr |
| + adc x6,x6,xzr |
| + |
| + ret |
| +.size poly1305_mult,.-poly1305_mult |
| + |
| +.type poly1305_splat,%function |
| +.align 4 |
| +poly1305_splat: |
| + and x12,x4,#0x03ffffff // base 2^64 -> base 2^26 |
| + ubfx x13,x4,#26,#26 |
| + extr x14,x5,x4,#52 |
| + and x14,x14,#0x03ffffff |
| + ubfx x15,x5,#14,#26 |
| + extr x16,x6,x5,#40 |
| + |
| + str w12,[x0,#16*0] // r0 |
| + add w12,w13,w13,lsl#2 // r1*5 |
| + str w13,[x0,#16*1] // r1 |
| + add w13,w14,w14,lsl#2 // r2*5 |
| + str w12,[x0,#16*2] // s1 |
| + str w14,[x0,#16*3] // r2 |
| + add w14,w15,w15,lsl#2 // r3*5 |
| + str w13,[x0,#16*4] // s2 |
| + str w15,[x0,#16*5] // r3 |
| + add w15,w16,w16,lsl#2 // r4*5 |
| + str w14,[x0,#16*6] // s3 |
| + str w16,[x0,#16*7] // r4 |
| + str w15,[x0,#16*8] // s4 |
| + |
| + ret |
| +.size poly1305_splat,.-poly1305_splat |
| + |
| +#ifdef __KERNEL__ |
| +.globl poly1305_blocks_neon |
| +#endif |
| +.type poly1305_blocks_neon,%function |
| +.align 5 |
| +poly1305_blocks_neon: |
| +.Lpoly1305_blocks_neon: |
| + ldr x17,[x0,#24] |
| + cmp x2,#128 |
| + b.lo .Lpoly1305_blocks |
| + |
| + .inst 0xd503233f // paciasp |
| + stp x29,x30,[sp,#-80]! |
| + add x29,sp,#0 |
| + |
| + stp d8,d9,[sp,#16] // meet ABI requirements |
| + stp d10,d11,[sp,#32] |
| + stp d12,d13,[sp,#48] |
| + stp d14,d15,[sp,#64] |
| + |
| + cbz x17,.Lbase2_64_neon |
| + |
| + ldp w10,w11,[x0] // load hash value base 2^26 |
| + ldp w12,w13,[x0,#8] |
| + ldr w14,[x0,#16] |
| + |
| + tst x2,#31 |
| + b.eq .Leven_neon |
| + |
| + ldp x7,x8,[x0,#32] // load key value |
| + |
| + add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64 |
| + lsr x5,x12,#12 |
| + adds x4,x4,x12,lsl#52 |
| + add x5,x5,x13,lsl#14 |
| + adc x5,x5,xzr |
| + lsr x6,x14,#24 |
| + adds x5,x5,x14,lsl#40 |
| + adc x14,x6,xzr // can be partially reduced... |
| + |
| + ldp x12,x13,[x1],#16 // load input |
| + sub x2,x2,#16 |
| + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) |
| + |
| +#ifdef __AARCH64EB__ |
| + rev x12,x12 |
| + rev x13,x13 |
| +#endif |
| + adds x4,x4,x12 // accumulate input |
| + adcs x5,x5,x13 |
| + adc x6,x6,x3 |
| + |
| + bl poly1305_mult |
| + |
| + and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 |
| + ubfx x11,x4,#26,#26 |
| + extr x12,x5,x4,#52 |
| + and x12,x12,#0x03ffffff |
| + ubfx x13,x5,#14,#26 |
| + extr x14,x6,x5,#40 |
| + |
| + b .Leven_neon |
| + |
| +.align 4 |
| +.Lbase2_64_neon: |
| + ldp x7,x8,[x0,#32] // load key value |
| + |
| + ldp x4,x5,[x0] // load hash value base 2^64 |
| + ldr x6,[x0,#16] |
| + |
| + tst x2,#31 |
| + b.eq .Linit_neon |
| + |
| + ldp x12,x13,[x1],#16 // load input |
| + sub x2,x2,#16 |
| + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) |
| +#ifdef __AARCH64EB__ |
| + rev x12,x12 |
| + rev x13,x13 |
| +#endif |
| + adds x4,x4,x12 // accumulate input |
| + adcs x5,x5,x13 |
| + adc x6,x6,x3 |
| + |
| + bl poly1305_mult |
| + |
| +.Linit_neon: |
| + ldr w17,[x0,#48] // first table element |
| + and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 |
| + ubfx x11,x4,#26,#26 |
| + extr x12,x5,x4,#52 |
| + and x12,x12,#0x03ffffff |
| + ubfx x13,x5,#14,#26 |
| + extr x14,x6,x5,#40 |
| + |
| + cmp w17,#-1 // is value impossible? |
| + b.ne .Leven_neon |
| + |
| + fmov d24,x10 |
| + fmov d25,x11 |
| + fmov d26,x12 |
| + fmov d27,x13 |
| + fmov d28,x14 |
| + |
| + ////////////////////////////////// initialize r^n table |
| + mov x4,x7 // r^1 |
| + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) |
| + mov x5,x8 |
| + mov x6,xzr |
| + add x0,x0,#48+12 |
| + bl poly1305_splat |
| + |
| + bl poly1305_mult // r^2 |
| + sub x0,x0,#4 |
| + bl poly1305_splat |
| + |
| + bl poly1305_mult // r^3 |
| + sub x0,x0,#4 |
| + bl poly1305_splat |
| + |
| + bl poly1305_mult // r^4 |
| + sub x0,x0,#4 |
| + bl poly1305_splat |
| + sub x0,x0,#48 // restore original x0 |
| + b .Ldo_neon |
| + |
| +.align 4 |
| +.Leven_neon: |
| + fmov d24,x10 |
| + fmov d25,x11 |
| + fmov d26,x12 |
| + fmov d27,x13 |
| + fmov d28,x14 |
| + |
| +.Ldo_neon: |
| + ldp x8,x12,[x1,#32] // inp[2:3] |
| + subs x2,x2,#64 |
| + ldp x9,x13,[x1,#48] |
| + add x16,x1,#96 |
| + adr x17,.Lzeros |
| + |
| + lsl x3,x3,#24 |
| + add x15,x0,#48 |
| + |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + and x5,x9,#0x03ffffff |
| + ubfx x6,x8,#26,#26 |
| + ubfx x7,x9,#26,#26 |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + extr x8,x12,x8,#52 |
| + extr x9,x13,x9,#52 |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + fmov d14,x4 |
| + and x8,x8,#0x03ffffff |
| + and x9,x9,#0x03ffffff |
| + ubfx x10,x12,#14,#26 |
| + ubfx x11,x13,#14,#26 |
| + add x12,x3,x12,lsr#40 |
| + add x13,x3,x13,lsr#40 |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + fmov d15,x6 |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + fmov d16,x8 |
| + fmov d17,x10 |
| + fmov d18,x12 |
| + |
| + ldp x8,x12,[x1],#16 // inp[0:1] |
| + ldp x9,x13,[x1],#48 |
| + |
| + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64 |
| + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64 |
| + ld1 {v8.4s},[x15] |
| + |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + and x5,x9,#0x03ffffff |
| + ubfx x6,x8,#26,#26 |
| + ubfx x7,x9,#26,#26 |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + extr x8,x12,x8,#52 |
| + extr x9,x13,x9,#52 |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + fmov d9,x4 |
| + and x8,x8,#0x03ffffff |
| + and x9,x9,#0x03ffffff |
| + ubfx x10,x12,#14,#26 |
| + ubfx x11,x13,#14,#26 |
| + add x12,x3,x12,lsr#40 |
| + add x13,x3,x13,lsr#40 |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + fmov d10,x6 |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + movi v31.2d,#-1 |
| + fmov d11,x8 |
| + fmov d12,x10 |
| + fmov d13,x12 |
| + ushr v31.2d,v31.2d,#38 |
| + |
| + b.ls .Lskip_loop |
| + |
| +.align 4 |
| +.Loop_neon: |
| + //////////////////////////////////////////////////////////////// |
| + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 |
| + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r |
| + // ___________________/ |
| + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 |
| + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r |
| + // ___________________/ ____________________/ |
| + // |
| + // Note that we start with inp[2:3]*r^2. This is because it |
| + // doesn't depend on reduction in previous iteration. |
| + //////////////////////////////////////////////////////////////// |
| + // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 |
| + // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 |
| + // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 |
| + // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 |
| + // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 |
| + |
| + subs x2,x2,#64 |
| + umull v23.2d,v14.2s,v7.s[2] |
| + csel x16,x17,x16,lo |
| + umull v22.2d,v14.2s,v5.s[2] |
| + umull v21.2d,v14.2s,v3.s[2] |
| + ldp x8,x12,[x16],#16 // inp[2:3] (or zero) |
| + umull v20.2d,v14.2s,v1.s[2] |
| + ldp x9,x13,[x16],#48 |
| + umull v19.2d,v14.2s,v0.s[2] |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + |
| + umlal v23.2d,v15.2s,v5.s[2] |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + umlal v22.2d,v15.2s,v3.s[2] |
| + and x5,x9,#0x03ffffff |
| + umlal v21.2d,v15.2s,v1.s[2] |
| + ubfx x6,x8,#26,#26 |
| + umlal v20.2d,v15.2s,v0.s[2] |
| + ubfx x7,x9,#26,#26 |
| + umlal v19.2d,v15.2s,v8.s[2] |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + |
| + umlal v23.2d,v16.2s,v3.s[2] |
| + extr x8,x12,x8,#52 |
| + umlal v22.2d,v16.2s,v1.s[2] |
| + extr x9,x13,x9,#52 |
| + umlal v21.2d,v16.2s,v0.s[2] |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + umlal v20.2d,v16.2s,v8.s[2] |
| + fmov d14,x4 |
| + umlal v19.2d,v16.2s,v6.s[2] |
| + and x8,x8,#0x03ffffff |
| + |
| + umlal v23.2d,v17.2s,v1.s[2] |
| + and x9,x9,#0x03ffffff |
| + umlal v22.2d,v17.2s,v0.s[2] |
| + ubfx x10,x12,#14,#26 |
| + umlal v21.2d,v17.2s,v8.s[2] |
| + ubfx x11,x13,#14,#26 |
| + umlal v20.2d,v17.2s,v6.s[2] |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + umlal v19.2d,v17.2s,v4.s[2] |
| + fmov d15,x6 |
| + |
| + add v11.2s,v11.2s,v26.2s |
| + add x12,x3,x12,lsr#40 |
| + umlal v23.2d,v18.2s,v0.s[2] |
| + add x13,x3,x13,lsr#40 |
| + umlal v22.2d,v18.2s,v8.s[2] |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + umlal v21.2d,v18.2s,v6.s[2] |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + umlal v20.2d,v18.2s,v4.s[2] |
| + fmov d16,x8 |
| + umlal v19.2d,v18.2s,v2.s[2] |
| + fmov d17,x10 |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // (hash+inp[0:1])*r^4 and accumulate |
| + |
| + add v9.2s,v9.2s,v24.2s |
| + fmov d18,x12 |
| + umlal v22.2d,v11.2s,v1.s[0] |
| + ldp x8,x12,[x1],#16 // inp[0:1] |
| + umlal v19.2d,v11.2s,v6.s[0] |
| + ldp x9,x13,[x1],#48 |
| + umlal v23.2d,v11.2s,v3.s[0] |
| + umlal v20.2d,v11.2s,v8.s[0] |
| + umlal v21.2d,v11.2s,v0.s[0] |
| +#ifdef __AARCH64EB__ |
| + rev x8,x8 |
| + rev x12,x12 |
| + rev x9,x9 |
| + rev x13,x13 |
| +#endif |
| + |
| + add v10.2s,v10.2s,v25.2s |
| + umlal v22.2d,v9.2s,v5.s[0] |
| + umlal v23.2d,v9.2s,v7.s[0] |
| + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 |
| + umlal v21.2d,v9.2s,v3.s[0] |
| + and x5,x9,#0x03ffffff |
| + umlal v19.2d,v9.2s,v0.s[0] |
| + ubfx x6,x8,#26,#26 |
| + umlal v20.2d,v9.2s,v1.s[0] |
| + ubfx x7,x9,#26,#26 |
| + |
| + add v12.2s,v12.2s,v27.2s |
| + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 |
| + umlal v22.2d,v10.2s,v3.s[0] |
| + extr x8,x12,x8,#52 |
| + umlal v23.2d,v10.2s,v5.s[0] |
| + extr x9,x13,x9,#52 |
| + umlal v19.2d,v10.2s,v8.s[0] |
| + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 |
| + umlal v21.2d,v10.2s,v1.s[0] |
| + fmov d9,x4 |
| + umlal v20.2d,v10.2s,v0.s[0] |
| + and x8,x8,#0x03ffffff |
| + |
| + add v13.2s,v13.2s,v28.2s |
| + and x9,x9,#0x03ffffff |
| + umlal v22.2d,v12.2s,v0.s[0] |
| + ubfx x10,x12,#14,#26 |
| + umlal v19.2d,v12.2s,v4.s[0] |
| + ubfx x11,x13,#14,#26 |
| + umlal v23.2d,v12.2s,v1.s[0] |
| + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 |
| + umlal v20.2d,v12.2s,v6.s[0] |
| + fmov d10,x6 |
| + umlal v21.2d,v12.2s,v8.s[0] |
| + add x12,x3,x12,lsr#40 |
| + |
| + umlal v22.2d,v13.2s,v8.s[0] |
| + add x13,x3,x13,lsr#40 |
| + umlal v19.2d,v13.2s,v2.s[0] |
| + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 |
| + umlal v23.2d,v13.2s,v0.s[0] |
| + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 |
| + umlal v20.2d,v13.2s,v4.s[0] |
| + fmov d11,x8 |
| + umlal v21.2d,v13.2s,v6.s[0] |
| + fmov d12,x10 |
| + fmov d13,x12 |
| + |
| + ///////////////////////////////////////////////////////////////// |
| + // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein |
| + // and P. Schwabe |
| + // |
| + // [see discussion in poly1305-armv4 module] |
| + |
| + ushr v29.2d,v22.2d,#26 |
| + xtn v27.2s,v22.2d |
| + ushr v30.2d,v19.2d,#26 |
| + and v19.16b,v19.16b,v31.16b |
| + add v23.2d,v23.2d,v29.2d // h3 -> h4 |
| + bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff |
| + add v20.2d,v20.2d,v30.2d // h0 -> h1 |
| + |
| + ushr v29.2d,v23.2d,#26 |
| + xtn v28.2s,v23.2d |
| + ushr v30.2d,v20.2d,#26 |
| + xtn v25.2s,v20.2d |
| + bic v28.2s,#0xfc,lsl#24 |
| + add v21.2d,v21.2d,v30.2d // h1 -> h2 |
| + |
| + add v19.2d,v19.2d,v29.2d |
| + shl v29.2d,v29.2d,#2 |
| + shrn v30.2s,v21.2d,#26 |
| + xtn v26.2s,v21.2d |
| + add v19.2d,v19.2d,v29.2d // h4 -> h0 |
| + bic v25.2s,#0xfc,lsl#24 |
| + add v27.2s,v27.2s,v30.2s // h2 -> h3 |
| + bic v26.2s,#0xfc,lsl#24 |
| + |
| + shrn v29.2s,v19.2d,#26 |
| + xtn v24.2s,v19.2d |
| + ushr v30.2s,v27.2s,#26 |
| + bic v27.2s,#0xfc,lsl#24 |
| + bic v24.2s,#0xfc,lsl#24 |
| + add v25.2s,v25.2s,v29.2s // h0 -> h1 |
| + add v28.2s,v28.2s,v30.2s // h3 -> h4 |
| + |
| + b.hi .Loop_neon |
| + |
| +.Lskip_loop: |
| + dup v16.2d,v16.d[0] |
| + add v11.2s,v11.2s,v26.2s |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 |
| + |
| + adds x2,x2,#32 |
| + b.ne .Long_tail |
| + |
| + dup v16.2d,v11.d[0] |
| + add v14.2s,v9.2s,v24.2s |
| + add v17.2s,v12.2s,v27.2s |
| + add v15.2s,v10.2s,v25.2s |
| + add v18.2s,v13.2s,v28.2s |
| + |
| +.Long_tail: |
| + dup v14.2d,v14.d[0] |
| + umull2 v19.2d,v16.4s,v6.4s |
| + umull2 v22.2d,v16.4s,v1.4s |
| + umull2 v23.2d,v16.4s,v3.4s |
| + umull2 v21.2d,v16.4s,v0.4s |
| + umull2 v20.2d,v16.4s,v8.4s |
| + |
| + dup v15.2d,v15.d[0] |
| + umlal2 v19.2d,v14.4s,v0.4s |
| + umlal2 v21.2d,v14.4s,v3.4s |
| + umlal2 v22.2d,v14.4s,v5.4s |
| + umlal2 v23.2d,v14.4s,v7.4s |
| + umlal2 v20.2d,v14.4s,v1.4s |
| + |
| + dup v17.2d,v17.d[0] |
| + umlal2 v19.2d,v15.4s,v8.4s |
| + umlal2 v22.2d,v15.4s,v3.4s |
| + umlal2 v21.2d,v15.4s,v1.4s |
| + umlal2 v23.2d,v15.4s,v5.4s |
| + umlal2 v20.2d,v15.4s,v0.4s |
| + |
| + dup v18.2d,v18.d[0] |
| + umlal2 v22.2d,v17.4s,v0.4s |
| + umlal2 v23.2d,v17.4s,v1.4s |
| + umlal2 v19.2d,v17.4s,v4.4s |
| + umlal2 v20.2d,v17.4s,v6.4s |
| + umlal2 v21.2d,v17.4s,v8.4s |
| + |
| + umlal2 v22.2d,v18.4s,v8.4s |
| + umlal2 v19.2d,v18.4s,v2.4s |
| + umlal2 v23.2d,v18.4s,v0.4s |
| + umlal2 v20.2d,v18.4s,v4.4s |
| + umlal2 v21.2d,v18.4s,v6.4s |
| + |
| + b.eq .Lshort_tail |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // (hash+inp[0:1])*r^4:r^3 and accumulate |
| + |
| + add v9.2s,v9.2s,v24.2s |
| + umlal v22.2d,v11.2s,v1.2s |
| + umlal v19.2d,v11.2s,v6.2s |
| + umlal v23.2d,v11.2s,v3.2s |
| + umlal v20.2d,v11.2s,v8.2s |
| + umlal v21.2d,v11.2s,v0.2s |
| + |
| + add v10.2s,v10.2s,v25.2s |
| + umlal v22.2d,v9.2s,v5.2s |
| + umlal v19.2d,v9.2s,v0.2s |
| + umlal v23.2d,v9.2s,v7.2s |
| + umlal v20.2d,v9.2s,v1.2s |
| + umlal v21.2d,v9.2s,v3.2s |
| + |
| + add v12.2s,v12.2s,v27.2s |
| + umlal v22.2d,v10.2s,v3.2s |
| + umlal v19.2d,v10.2s,v8.2s |
| + umlal v23.2d,v10.2s,v5.2s |
| + umlal v20.2d,v10.2s,v0.2s |
| + umlal v21.2d,v10.2s,v1.2s |
| + |
| + add v13.2s,v13.2s,v28.2s |
| + umlal v22.2d,v12.2s,v0.2s |
| + umlal v19.2d,v12.2s,v4.2s |
| + umlal v23.2d,v12.2s,v1.2s |
| + umlal v20.2d,v12.2s,v6.2s |
| + umlal v21.2d,v12.2s,v8.2s |
| + |
| + umlal v22.2d,v13.2s,v8.2s |
| + umlal v19.2d,v13.2s,v2.2s |
| + umlal v23.2d,v13.2s,v0.2s |
| + umlal v20.2d,v13.2s,v4.2s |
| + umlal v21.2d,v13.2s,v6.2s |
| + |
| +.Lshort_tail: |
| + //////////////////////////////////////////////////////////////// |
| + // horizontal add |
| + |
| + addp v22.2d,v22.2d,v22.2d |
| + ldp d8,d9,[sp,#16] // meet ABI requirements |
| + addp v19.2d,v19.2d,v19.2d |
| + ldp d10,d11,[sp,#32] |
| + addp v23.2d,v23.2d,v23.2d |
| + ldp d12,d13,[sp,#48] |
| + addp v20.2d,v20.2d,v20.2d |
| + ldp d14,d15,[sp,#64] |
| + addp v21.2d,v21.2d,v21.2d |
| + ldr x30,[sp,#8] |
| + .inst 0xd50323bf // autiasp |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // lazy reduction, but without narrowing |
| + |
| + ushr v29.2d,v22.2d,#26 |
| + and v22.16b,v22.16b,v31.16b |
| + ushr v30.2d,v19.2d,#26 |
| + and v19.16b,v19.16b,v31.16b |
| + |
| + add v23.2d,v23.2d,v29.2d // h3 -> h4 |
| + add v20.2d,v20.2d,v30.2d // h0 -> h1 |
| + |
| + ushr v29.2d,v23.2d,#26 |
| + and v23.16b,v23.16b,v31.16b |
| + ushr v30.2d,v20.2d,#26 |
| + and v20.16b,v20.16b,v31.16b |
| + add v21.2d,v21.2d,v30.2d // h1 -> h2 |
| + |
| + add v19.2d,v19.2d,v29.2d |
| + shl v29.2d,v29.2d,#2 |
| + ushr v30.2d,v21.2d,#26 |
| + and v21.16b,v21.16b,v31.16b |
| + add v19.2d,v19.2d,v29.2d // h4 -> h0 |
| + add v22.2d,v22.2d,v30.2d // h2 -> h3 |
| + |
| + ushr v29.2d,v19.2d,#26 |
| + and v19.16b,v19.16b,v31.16b |
| + ushr v30.2d,v22.2d,#26 |
| + and v22.16b,v22.16b,v31.16b |
| + add v20.2d,v20.2d,v29.2d // h0 -> h1 |
| + add v23.2d,v23.2d,v30.2d // h3 -> h4 |
| + |
| + //////////////////////////////////////////////////////////////// |
| + // write the result, can be partially reduced |
| + |
| + st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16 |
| + mov x4,#1 |
| + st1 {v23.s}[0],[x0] |
| + str x4,[x0,#8] // set is_base2_26 |
| + |
| + ldr x29,[sp],#80 |
| + ret |
| +.size poly1305_blocks_neon,.-poly1305_blocks_neon |
| + |
| +.align 5 |
| +.Lzeros: |
| +.long 0,0,0,0,0,0,0,0 |
| +.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm" |
| +.align 2 |
| +#if !defined(__KERNEL__) && !defined(_WIN64) |
| +.comm OPENSSL_armcap_P,4,4 |
| +.hidden OPENSSL_armcap_P |
| +#endif |
| diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c |
| new file mode 100644 |
| index 000000000000..dd843d0ee83a |
| |
| |
| @@ -0,0 +1,237 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 |
| + * |
| + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> |
| + */ |
| + |
| +#include <asm/hwcap.h> |
| +#include <asm/neon.h> |
| +#include <asm/simd.h> |
| +#include <asm/unaligned.h> |
| +#include <crypto/algapi.h> |
| +#include <crypto/internal/hash.h> |
| +#include <crypto/internal/poly1305.h> |
| +#include <crypto/internal/simd.h> |
| +#include <linux/cpufeature.h> |
| +#include <linux/crypto.h> |
| +#include <linux/jump_label.h> |
| +#include <linux/module.h> |
| + |
| +asmlinkage void poly1305_init_arm64(void *state, const u8 *key); |
| +asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); |
| +asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); |
| +asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
| + |
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) |
| +{ |
| + poly1305_init_arm64(&dctx->h, key); |
| + dctx->s[0] = get_unaligned_le32(key + 16); |
| + dctx->s[1] = get_unaligned_le32(key + 20); |
| + dctx->s[2] = get_unaligned_le32(key + 24); |
| + dctx->s[3] = get_unaligned_le32(key + 28); |
| + dctx->buflen = 0; |
| +} |
| +EXPORT_SYMBOL(poly1305_init_arch); |
| + |
| +static int neon_poly1305_init(struct shash_desc *desc) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + dctx->buflen = 0; |
| + dctx->rset = 0; |
| + dctx->sset = false; |
| + |
| + return 0; |
| +} |
| + |
| +static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + u32 len, u32 hibit, bool do_neon) |
| +{ |
| + if (unlikely(!dctx->sset)) { |
| + if (!dctx->rset) { |
| + poly1305_init_arch(dctx, src); |
| + src += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + dctx->rset = 1; |
| + } |
| + if (len >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(src + 0); |
| + dctx->s[1] = get_unaligned_le32(src + 4); |
| + dctx->s[2] = get_unaligned_le32(src + 8); |
| + dctx->s[3] = get_unaligned_le32(src + 12); |
| + src += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + dctx->sset = true; |
| + } |
| + if (len < POLY1305_BLOCK_SIZE) |
| + return; |
| + } |
| + |
| + len &= ~(POLY1305_BLOCK_SIZE - 1); |
| + |
| + if (static_branch_likely(&have_neon) && likely(do_neon)) |
| + poly1305_blocks_neon(&dctx->h, src, len, hibit); |
| + else |
| + poly1305_blocks(&dctx->h, src, len, hibit); |
| +} |
| + |
| +static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, |
| + const u8 *src, u32 len, bool do_neon) |
| +{ |
| + if (unlikely(dctx->buflen)) { |
| + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); |
| + |
| + memcpy(dctx->buf + dctx->buflen, src, bytes); |
| + src += bytes; |
| + len -= bytes; |
| + dctx->buflen += bytes; |
| + |
| + if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| + neon_poly1305_blocks(dctx, dctx->buf, |
| + POLY1305_BLOCK_SIZE, 1, false); |
| + dctx->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(len >= POLY1305_BLOCK_SIZE)) { |
| + neon_poly1305_blocks(dctx, src, len, 1, do_neon); |
| + src += round_down(len, POLY1305_BLOCK_SIZE); |
| + len %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(len)) { |
| + dctx->buflen = len; |
| + memcpy(dctx->buf, src, len); |
| + } |
| +} |
| + |
| +static int neon_poly1305_update(struct shash_desc *desc, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + bool do_neon = crypto_simd_usable() && srclen > 128; |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + if (static_branch_likely(&have_neon) && do_neon) |
| + kernel_neon_begin(); |
| + neon_poly1305_do_update(dctx, src, srclen, do_neon); |
| + if (static_branch_likely(&have_neon) && do_neon) |
| + kernel_neon_end(); |
| + return 0; |
| +} |
| + |
| +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + unsigned int nbytes) |
| +{ |
| + if (unlikely(dctx->buflen)) { |
| + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); |
| + |
| + memcpy(dctx->buf + dctx->buflen, src, bytes); |
| + src += bytes; |
| + nbytes -= bytes; |
| + dctx->buflen += bytes; |
| + |
| + if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| + poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); |
| + dctx->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { |
| + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); |
| + |
| + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { |
| + kernel_neon_begin(); |
| + poly1305_blocks_neon(&dctx->h, src, len, 1); |
| + kernel_neon_end(); |
| + } else { |
| + poly1305_blocks(&dctx->h, src, len, 1); |
| + } |
| + src += len; |
| + nbytes %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(nbytes)) { |
| + dctx->buflen = nbytes; |
| + memcpy(dctx->buf, src, nbytes); |
| + } |
| +} |
| +EXPORT_SYMBOL(poly1305_update_arch); |
| + |
| +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| +{ |
| + __le32 digest[4]; |
| + u64 f = 0; |
| + |
| + if (unlikely(dctx->buflen)) { |
| + dctx->buf[dctx->buflen++] = 1; |
| + memset(dctx->buf + dctx->buflen, 0, |
| + POLY1305_BLOCK_SIZE - dctx->buflen); |
| + poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| + } |
| + |
| + poly1305_emit(&dctx->h, digest, dctx->s); |
| + |
| + /* mac = (h + s) % (2^128) */ |
| + f = (f >> 32) + le32_to_cpu(digest[0]); |
| + put_unaligned_le32(f, dst); |
| + f = (f >> 32) + le32_to_cpu(digest[1]); |
| + put_unaligned_le32(f, dst + 4); |
| + f = (f >> 32) + le32_to_cpu(digest[2]); |
| + put_unaligned_le32(f, dst + 8); |
| + f = (f >> 32) + le32_to_cpu(digest[3]); |
| + put_unaligned_le32(f, dst + 12); |
| + |
| + *dctx = (struct poly1305_desc_ctx){}; |
| +} |
| +EXPORT_SYMBOL(poly1305_final_arch); |
| + |
| +static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + if (unlikely(!dctx->sset)) |
| + return -ENOKEY; |
| + |
| + poly1305_final_arch(dctx, dst); |
| + return 0; |
| +} |
| + |
| +static struct shash_alg neon_poly1305_alg = { |
| + .init = neon_poly1305_init, |
| + .update = neon_poly1305_update, |
| + .final = neon_poly1305_final, |
| + .digestsize = POLY1305_DIGEST_SIZE, |
| + .descsize = sizeof(struct poly1305_desc_ctx), |
| + |
| + .base.cra_name = "poly1305", |
| + .base.cra_driver_name = "poly1305-neon", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = POLY1305_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| +}; |
| + |
| +static int __init neon_poly1305_mod_init(void) |
| +{ |
| + if (!cpu_have_named_feature(ASIMD)) |
| + return 0; |
| + |
| + static_branch_enable(&have_neon); |
| + |
| + return crypto_register_shash(&neon_poly1305_alg); |
| +} |
| + |
| +static void __exit neon_poly1305_mod_exit(void) |
| +{ |
| + if (cpu_have_named_feature(ASIMD)) |
| + crypto_unregister_shash(&neon_poly1305_alg); |
| +} |
| + |
| +module_init(neon_poly1305_mod_init); |
| +module_exit(neon_poly1305_mod_exit); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_ALIAS_CRYPTO("poly1305"); |
| +MODULE_ALIAS_CRYPTO("poly1305-neon"); |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 181754615f73..9923445e8225 100644 |
| |
| |
| @@ -40,6 +40,7 @@ config CRYPTO_LIB_DES |
| config CRYPTO_LIB_POLY1305_RSIZE |
| int |
| default 4 if X86_64 |
| + default 9 if ARM64 |
| default 1 |
| |
| config CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| -- |
| 2.18.2 |
| |
| |
| From 37bfaa5edd51c66438932ea842ac026313fa7104 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:25 +0100 |
| Subject: [PATCH 019/100] crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS |
| NEON implementation |
| |
| commit a6b803b3ddc793d6db0c16f12fc12d30d20fa9cc upstream. |
| |
| This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation |
| for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL |
| project. The file 'poly1305-armv4.pl' is taken straight from this upstream |
| GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f, |
| and already contains all the changes required to build it as part of a |
| Linux kernel module. |
| |
| [0] https://github.com/dot-asm/cryptogams |
| |
| Co-developed-by: Andy Polyakov <appro@cryptogams.org> |
| Signed-off-by: Andy Polyakov <appro@cryptogams.org> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/Kconfig | 5 + |
| arch/arm/crypto/Makefile | 12 +- |
| arch/arm/crypto/poly1305-armv4.pl | 1236 +++++++++++++++++++++++ |
| arch/arm/crypto/poly1305-core.S_shipped | 1158 +++++++++++++++++++++ |
| arch/arm/crypto/poly1305-glue.c | 276 +++++ |
| lib/crypto/Kconfig | 2 +- |
| 6 files changed, 2687 insertions(+), 2 deletions(-) |
| create mode 100644 arch/arm/crypto/poly1305-armv4.pl |
| create mode 100644 arch/arm/crypto/poly1305-core.S_shipped |
| create mode 100644 arch/arm/crypto/poly1305-glue.c |
| |
| diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig |
| index b25ffec04417..2e8a9289bded 100644 |
| |
| |
| @@ -131,6 +131,11 @@ config CRYPTO_CHACHA20_NEON |
| select CRYPTO_BLKCIPHER |
| select CRYPTO_ARCH_HAVE_LIB_CHACHA |
| |
| +config CRYPTO_POLY1305_ARM |
| + tristate "Accelerated scalar and SIMD Poly1305 hash implementations" |
| + select CRYPTO_HASH |
| + select CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| + |
| config CRYPTO_NHPOLY1305_NEON |
| tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" |
| depends on KERNEL_MODE_NEON |
| diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile |
| index 6b97dffcf90f..4f6a8a81dabc 100644 |
| |
| |
| @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o |
| obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o |
| obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o |
| obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o |
| +obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o |
| |
| ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o |
| @@ -55,12 +56,16 @@ crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o |
| crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o |
| chacha-neon-y := chacha-scalar-core.o chacha-glue.o |
| chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o |
| +poly1305-arm-y := poly1305-core.o poly1305-glue.o |
| nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o |
| |
| ifdef REGENERATE_ARM_CRYPTO |
| quiet_cmd_perl = PERL $@ |
| cmd_perl = $(PERL) $(<) > $(@) |
| |
| +$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl |
| + $(call cmd,perl) |
| + |
| $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl |
| $(call cmd,perl) |
| |
| @@ -68,4 +73,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl |
| $(call cmd,perl) |
| endif |
| |
| -clean-files += sha256-core.S sha512-core.S |
| +clean-files += poly1305-core.S sha256-core.S sha512-core.S |
| + |
| +# massage the perlasm code a bit so we only get the NEON routine if we need it |
| +poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 |
| +poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 |
| +AFLAGS_poly1305-core.o += $(poly1305-aflags-y) |
| diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl |
| new file mode 100644 |
| index 000000000000..6d79498d3115 |
| |
| |
| @@ -0,0 +1,1236 @@ |
| +#!/usr/bin/env perl |
| +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause |
| +# |
| +# ==================================================================== |
| +# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL |
| +# project. |
| +# ==================================================================== |
| +# |
| +# IALU(*)/gcc-4.4 NEON |
| +# |
| +# ARM11xx(ARMv6) 7.78/+100% - |
| +# Cortex-A5 6.35/+130% 3.00 |
| +# Cortex-A8 6.25/+115% 2.36 |
| +# Cortex-A9 5.10/+95% 2.55 |
| +# Cortex-A15 3.85/+85% 1.25(**) |
| +# Snapdragon S4 5.70/+100% 1.48(**) |
| +# |
| +# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data; |
| +# (**) these are trade-off results, they can be improved by ~8% but at |
| +# the cost of 15/12% regression on Cortex-A5/A7, it's even possible |
| +# to improve Cortex-A9 result, but then A5/A7 loose more than 20%; |
| + |
| +$flavour = shift; |
| +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } |
| +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } |
| + |
| +if ($flavour && $flavour ne "void") { |
| + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or |
| + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or |
| + die "can't locate arm-xlate.pl"; |
| + |
| + open STDOUT,"| \"$^X\" $xlate $flavour $output"; |
| +} else { |
| + open STDOUT,">$output"; |
| +} |
| + |
| +($ctx,$inp,$len,$padbit)=map("r$_",(0..3)); |
| + |
| +$code.=<<___; |
| +#ifndef __KERNEL__ |
| +# include "arm_arch.h" |
| +#else |
| +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ |
| +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ |
| +# define poly1305_init poly1305_init_arm |
| +# define poly1305_blocks poly1305_blocks_arm |
| +# define poly1305_emit poly1305_emit_arm |
| +.globl poly1305_blocks_neon |
| +#endif |
| + |
| +#if defined(__thumb2__) |
| +.syntax unified |
| +.thumb |
| +#else |
| +.code 32 |
| +#endif |
| + |
| +.text |
| + |
| +.globl poly1305_emit |
| +.globl poly1305_blocks |
| +.globl poly1305_init |
| +.type poly1305_init,%function |
| +.align 5 |
| +poly1305_init: |
| +.Lpoly1305_init: |
| + stmdb sp!,{r4-r11} |
| + |
| + eor r3,r3,r3 |
| + cmp $inp,#0 |
| + str r3,[$ctx,#0] @ zero hash value |
| + str r3,[$ctx,#4] |
| + str r3,[$ctx,#8] |
| + str r3,[$ctx,#12] |
| + str r3,[$ctx,#16] |
| + str r3,[$ctx,#36] @ clear is_base2_26 |
| + add $ctx,$ctx,#20 |
| + |
| +#ifdef __thumb2__ |
| + it eq |
| +#endif |
| + moveq r0,#0 |
| + beq .Lno_key |
| + |
| +#if __ARM_MAX_ARCH__>=7 |
| + mov r3,#-1 |
| + str r3,[$ctx,#28] @ impossible key power value |
| +# ifndef __KERNEL__ |
| + adr r11,.Lpoly1305_init |
| + ldr r12,.LOPENSSL_armcap |
| +# endif |
| +#endif |
| + ldrb r4,[$inp,#0] |
| + mov r10,#0x0fffffff |
| + ldrb r5,[$inp,#1] |
| + and r3,r10,#-4 @ 0x0ffffffc |
| + ldrb r6,[$inp,#2] |
| + ldrb r7,[$inp,#3] |
| + orr r4,r4,r5,lsl#8 |
| + ldrb r5,[$inp,#4] |
| + orr r4,r4,r6,lsl#16 |
| + ldrb r6,[$inp,#5] |
| + orr r4,r4,r7,lsl#24 |
| + ldrb r7,[$inp,#6] |
| + and r4,r4,r10 |
| + |
| +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
| +# if !defined(_WIN32) |
| + ldr r12,[r11,r12] @ OPENSSL_armcap_P |
| +# endif |
| +# if defined(__APPLE__) || defined(_WIN32) |
| + ldr r12,[r12] |
| +# endif |
| +#endif |
| + ldrb r8,[$inp,#7] |
| + orr r5,r5,r6,lsl#8 |
| + ldrb r6,[$inp,#8] |
| + orr r5,r5,r7,lsl#16 |
| + ldrb r7,[$inp,#9] |
| + orr r5,r5,r8,lsl#24 |
| + ldrb r8,[$inp,#10] |
| + and r5,r5,r3 |
| + |
| +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
| + tst r12,#ARMV7_NEON @ check for NEON |
| +# ifdef __thumb2__ |
| + adr r9,.Lpoly1305_blocks_neon |
| + adr r11,.Lpoly1305_blocks |
| + it ne |
| + movne r11,r9 |
| + adr r12,.Lpoly1305_emit |
| + orr r11,r11,#1 @ thumb-ify addresses |
| + orr r12,r12,#1 |
| +# else |
| + add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) |
| + ite eq |
| + addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) |
| + addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) |
| +# endif |
| +#endif |
| + ldrb r9,[$inp,#11] |
| + orr r6,r6,r7,lsl#8 |
| + ldrb r7,[$inp,#12] |
| + orr r6,r6,r8,lsl#16 |
| + ldrb r8,[$inp,#13] |
| + orr r6,r6,r9,lsl#24 |
| + ldrb r9,[$inp,#14] |
| + and r6,r6,r3 |
| + |
| + ldrb r10,[$inp,#15] |
| + orr r7,r7,r8,lsl#8 |
| + str r4,[$ctx,#0] |
| + orr r7,r7,r9,lsl#16 |
| + str r5,[$ctx,#4] |
| + orr r7,r7,r10,lsl#24 |
| + str r6,[$ctx,#8] |
| + and r7,r7,r3 |
| + str r7,[$ctx,#12] |
| +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
| + stmia r2,{r11,r12} @ fill functions table |
| + mov r0,#1 |
| +#else |
| + mov r0,#0 |
| +#endif |
| +.Lno_key: |
| + ldmia sp!,{r4-r11} |
| +#if __ARM_ARCH__>=5 |
| + ret @ bx lr |
| +#else |
| + tst lr,#1 |
| + moveq pc,lr @ be binary compatible with V4, yet |
| + bx lr @ interoperable with Thumb ISA:-) |
| +#endif |
| +.size poly1305_init,.-poly1305_init |
| +___ |
| +{ |
| +my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12)); |
| +my ($s1,$s2,$s3)=($r1,$r2,$r3); |
| + |
| +$code.=<<___; |
| +.type poly1305_blocks,%function |
| +.align 5 |
| +poly1305_blocks: |
| +.Lpoly1305_blocks: |
| + stmdb sp!,{r3-r11,lr} |
| + |
| + ands $len,$len,#-16 |
| + beq .Lno_data |
| + |
| + add $len,$len,$inp @ end pointer |
| + sub sp,sp,#32 |
| + |
| +#if __ARM_ARCH__<7 |
| + ldmia $ctx,{$h0-$r3} @ load context |
| + add $ctx,$ctx,#20 |
| + str $len,[sp,#16] @ offload stuff |
| + str $ctx,[sp,#12] |
| +#else |
| + ldr lr,[$ctx,#36] @ is_base2_26 |
| + ldmia $ctx!,{$h0-$h4} @ load hash value |
| + str $len,[sp,#16] @ offload stuff |
| + str $ctx,[sp,#12] |
| + |
| + adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 |
| + mov $r1,$h1,lsr#6 |
| + adcs $r1,$r1,$h2,lsl#20 |
| + mov $r2,$h2,lsr#12 |
| + adcs $r2,$r2,$h3,lsl#14 |
| + mov $r3,$h3,lsr#18 |
| + adcs $r3,$r3,$h4,lsl#8 |
| + mov $len,#0 |
| + teq lr,#0 |
| + str $len,[$ctx,#16] @ clear is_base2_26 |
| + adc $len,$len,$h4,lsr#24 |
| + |
| + itttt ne |
| + movne $h0,$r0 @ choose between radixes |
| + movne $h1,$r1 |
| + movne $h2,$r2 |
| + movne $h3,$r3 |
| + ldmia $ctx,{$r0-$r3} @ load key |
| + it ne |
| + movne $h4,$len |
| +#endif |
| + |
| + mov lr,$inp |
| + cmp $padbit,#0 |
| + str $r1,[sp,#20] |
| + str $r2,[sp,#24] |
| + str $r3,[sp,#28] |
| + b .Loop |
| + |
| +.align 4 |
| +.Loop: |
| +#if __ARM_ARCH__<7 |
| + ldrb r0,[lr],#16 @ load input |
| +# ifdef __thumb2__ |
| + it hi |
| +# endif |
| + addhi $h4,$h4,#1 @ 1<<128 |
| + ldrb r1,[lr,#-15] |
| + ldrb r2,[lr,#-14] |
| + ldrb r3,[lr,#-13] |
| + orr r1,r0,r1,lsl#8 |
| + ldrb r0,[lr,#-12] |
| + orr r2,r1,r2,lsl#16 |
| + ldrb r1,[lr,#-11] |
| + orr r3,r2,r3,lsl#24 |
| + ldrb r2,[lr,#-10] |
| + adds $h0,$h0,r3 @ accumulate input |
| + |
| + ldrb r3,[lr,#-9] |
| + orr r1,r0,r1,lsl#8 |
| + ldrb r0,[lr,#-8] |
| + orr r2,r1,r2,lsl#16 |
| + ldrb r1,[lr,#-7] |
| + orr r3,r2,r3,lsl#24 |
| + ldrb r2,[lr,#-6] |
| + adcs $h1,$h1,r3 |
| + |
| + ldrb r3,[lr,#-5] |
| + orr r1,r0,r1,lsl#8 |
| + ldrb r0,[lr,#-4] |
| + orr r2,r1,r2,lsl#16 |
| + ldrb r1,[lr,#-3] |
| + orr r3,r2,r3,lsl#24 |
| + ldrb r2,[lr,#-2] |
| + adcs $h2,$h2,r3 |
| + |
| + ldrb r3,[lr,#-1] |
| + orr r1,r0,r1,lsl#8 |
| + str lr,[sp,#8] @ offload input pointer |
| + orr r2,r1,r2,lsl#16 |
| + add $s1,$r1,$r1,lsr#2 |
| + orr r3,r2,r3,lsl#24 |
| +#else |
| + ldr r0,[lr],#16 @ load input |
| + it hi |
| + addhi $h4,$h4,#1 @ padbit |
| + ldr r1,[lr,#-12] |
| + ldr r2,[lr,#-8] |
| + ldr r3,[lr,#-4] |
| +# ifdef __ARMEB__ |
| + rev r0,r0 |
| + rev r1,r1 |
| + rev r2,r2 |
| + rev r3,r3 |
| +# endif |
| + adds $h0,$h0,r0 @ accumulate input |
| + str lr,[sp,#8] @ offload input pointer |
| + adcs $h1,$h1,r1 |
| + add $s1,$r1,$r1,lsr#2 |
| + adcs $h2,$h2,r2 |
| +#endif |
| + add $s2,$r2,$r2,lsr#2 |
| + adcs $h3,$h3,r3 |
| + add $s3,$r3,$r3,lsr#2 |
| + |
| + umull r2,r3,$h1,$r0 |
| + adc $h4,$h4,#0 |
| + umull r0,r1,$h0,$r0 |
| + umlal r2,r3,$h4,$s1 |
| + umlal r0,r1,$h3,$s1 |
| + ldr $r1,[sp,#20] @ reload $r1 |
| + umlal r2,r3,$h2,$s3 |
| + umlal r0,r1,$h1,$s3 |
| + umlal r2,r3,$h3,$s2 |
| + umlal r0,r1,$h2,$s2 |
| + umlal r2,r3,$h0,$r1 |
| + str r0,[sp,#0] @ future $h0 |
| + mul r0,$s2,$h4 |
| + ldr $r2,[sp,#24] @ reload $r2 |
| + adds r2,r2,r1 @ d1+=d0>>32 |
| + eor r1,r1,r1 |
| + adc lr,r3,#0 @ future $h2 |
| + str r2,[sp,#4] @ future $h1 |
| + |
| + mul r2,$s3,$h4 |
| + eor r3,r3,r3 |
| + umlal r0,r1,$h3,$s3 |
| + ldr $r3,[sp,#28] @ reload $r3 |
| + umlal r2,r3,$h3,$r0 |
| + umlal r0,r1,$h2,$r0 |
| + umlal r2,r3,$h2,$r1 |
| + umlal r0,r1,$h1,$r1 |
| + umlal r2,r3,$h1,$r2 |
| + umlal r0,r1,$h0,$r2 |
| + umlal r2,r3,$h0,$r3 |
| + ldr $h0,[sp,#0] |
| + mul $h4,$r0,$h4 |
| + ldr $h1,[sp,#4] |
| + |
| + adds $h2,lr,r0 @ d2+=d1>>32 |
| + ldr lr,[sp,#8] @ reload input pointer |
| + adc r1,r1,#0 |
| + adds $h3,r2,r1 @ d3+=d2>>32 |
| + ldr r0,[sp,#16] @ reload end pointer |
| + adc r3,r3,#0 |
| + add $h4,$h4,r3 @ h4+=d3>>32 |
| + |
| + and r1,$h4,#-4 |
| + and $h4,$h4,#3 |
| + add r1,r1,r1,lsr#2 @ *=5 |
| + adds $h0,$h0,r1 |
| + adcs $h1,$h1,#0 |
| + adcs $h2,$h2,#0 |
| + adcs $h3,$h3,#0 |
| + adc $h4,$h4,#0 |
| + |
| + cmp r0,lr @ done yet? |
| + bhi .Loop |
| + |
| + ldr $ctx,[sp,#12] |
| + add sp,sp,#32 |
| + stmdb $ctx,{$h0-$h4} @ store the result |
| + |
| +.Lno_data: |
| +#if __ARM_ARCH__>=5 |
| + ldmia sp!,{r3-r11,pc} |
| +#else |
| + ldmia sp!,{r3-r11,lr} |
| + tst lr,#1 |
| + moveq pc,lr @ be binary compatible with V4, yet |
| + bx lr @ interoperable with Thumb ISA:-) |
| +#endif |
| +.size poly1305_blocks,.-poly1305_blocks |
| +___ |
| +} |
| +{ |
| +my ($ctx,$mac,$nonce)=map("r$_",(0..2)); |
| +my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11)); |
| +my $g4=$ctx; |
| + |
| +$code.=<<___; |
| +.type poly1305_emit,%function |
| +.align 5 |
| +poly1305_emit: |
| +.Lpoly1305_emit: |
| + stmdb sp!,{r4-r11} |
| + |
| + ldmia $ctx,{$h0-$h4} |
| + |
| +#if __ARM_ARCH__>=7 |
| + ldr ip,[$ctx,#36] @ is_base2_26 |
| + |
| + adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 |
| + mov $g1,$h1,lsr#6 |
| + adcs $g1,$g1,$h2,lsl#20 |
| + mov $g2,$h2,lsr#12 |
| + adcs $g2,$g2,$h3,lsl#14 |
| + mov $g3,$h3,lsr#18 |
| + adcs $g3,$g3,$h4,lsl#8 |
| + mov $g4,#0 |
| + adc $g4,$g4,$h4,lsr#24 |
| + |
| + tst ip,ip |
| + itttt ne |
| + movne $h0,$g0 |
| + movne $h1,$g1 |
| + movne $h2,$g2 |
| + movne $h3,$g3 |
| + it ne |
| + movne $h4,$g4 |
| +#endif |
| + |
| + adds $g0,$h0,#5 @ compare to modulus |
| + adcs $g1,$h1,#0 |
| + adcs $g2,$h2,#0 |
| + adcs $g3,$h3,#0 |
| + adc $g4,$h4,#0 |
| + tst $g4,#4 @ did it carry/borrow? |
| + |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne $h0,$g0 |
| + ldr $g0,[$nonce,#0] |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne $h1,$g1 |
| + ldr $g1,[$nonce,#4] |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne $h2,$g2 |
| + ldr $g2,[$nonce,#8] |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne $h3,$g3 |
| + ldr $g3,[$nonce,#12] |
| + |
| + adds $h0,$h0,$g0 |
| + adcs $h1,$h1,$g1 |
| + adcs $h2,$h2,$g2 |
| + adc $h3,$h3,$g3 |
| + |
| +#if __ARM_ARCH__>=7 |
| +# ifdef __ARMEB__ |
| + rev $h0,$h0 |
| + rev $h1,$h1 |
| + rev $h2,$h2 |
| + rev $h3,$h3 |
| +# endif |
| + str $h0,[$mac,#0] |
| + str $h1,[$mac,#4] |
| + str $h2,[$mac,#8] |
| + str $h3,[$mac,#12] |
| +#else |
| + strb $h0,[$mac,#0] |
| + mov $h0,$h0,lsr#8 |
| + strb $h1,[$mac,#4] |
| + mov $h1,$h1,lsr#8 |
| + strb $h2,[$mac,#8] |
| + mov $h2,$h2,lsr#8 |
| + strb $h3,[$mac,#12] |
| + mov $h3,$h3,lsr#8 |
| + |
| + strb $h0,[$mac,#1] |
| + mov $h0,$h0,lsr#8 |
| + strb $h1,[$mac,#5] |
| + mov $h1,$h1,lsr#8 |
| + strb $h2,[$mac,#9] |
| + mov $h2,$h2,lsr#8 |
| + strb $h3,[$mac,#13] |
| + mov $h3,$h3,lsr#8 |
| + |
| + strb $h0,[$mac,#2] |
| + mov $h0,$h0,lsr#8 |
| + strb $h1,[$mac,#6] |
| + mov $h1,$h1,lsr#8 |
| + strb $h2,[$mac,#10] |
| + mov $h2,$h2,lsr#8 |
| + strb $h3,[$mac,#14] |
| + mov $h3,$h3,lsr#8 |
| + |
| + strb $h0,[$mac,#3] |
| + strb $h1,[$mac,#7] |
| + strb $h2,[$mac,#11] |
| + strb $h3,[$mac,#15] |
| +#endif |
| + ldmia sp!,{r4-r11} |
| +#if __ARM_ARCH__>=5 |
| + ret @ bx lr |
| +#else |
| + tst lr,#1 |
| + moveq pc,lr @ be binary compatible with V4, yet |
| + bx lr @ interoperable with Thumb ISA:-) |
| +#endif |
| +.size poly1305_emit,.-poly1305_emit |
| +___ |
| +{ |
| +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9)); |
| +my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14)); |
| +my ($T0,$T1,$MASK) = map("q$_",(15,4,0)); |
| + |
| +my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7)); |
| + |
| +$code.=<<___; |
| +#if __ARM_MAX_ARCH__>=7 |
| +.fpu neon |
| + |
| +.type poly1305_init_neon,%function |
| +.align 5 |
| +poly1305_init_neon: |
| +.Lpoly1305_init_neon: |
| + ldr r3,[$ctx,#48] @ first table element |
| + cmp r3,#-1 @ is value impossible? |
| + bne .Lno_init_neon |
| + |
| + ldr r4,[$ctx,#20] @ load key base 2^32 |
| + ldr r5,[$ctx,#24] |
| + ldr r6,[$ctx,#28] |
| + ldr r7,[$ctx,#32] |
| + |
| + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 |
| + mov r3,r4,lsr#26 |
| + mov r4,r5,lsr#20 |
| + orr r3,r3,r5,lsl#6 |
| + mov r5,r6,lsr#14 |
| + orr r4,r4,r6,lsl#12 |
| + mov r6,r7,lsr#8 |
| + orr r5,r5,r7,lsl#18 |
| + and r3,r3,#0x03ffffff |
| + and r4,r4,#0x03ffffff |
| + and r5,r5,#0x03ffffff |
| + |
| + vdup.32 $R0,r2 @ r^1 in both lanes |
| + add r2,r3,r3,lsl#2 @ *5 |
| + vdup.32 $R1,r3 |
| + add r3,r4,r4,lsl#2 |
| + vdup.32 $S1,r2 |
| + vdup.32 $R2,r4 |
| + add r4,r5,r5,lsl#2 |
| + vdup.32 $S2,r3 |
| + vdup.32 $R3,r5 |
| + add r5,r6,r6,lsl#2 |
| + vdup.32 $S3,r4 |
| + vdup.32 $R4,r6 |
| + vdup.32 $S4,r5 |
| + |
| + mov $zeros,#2 @ counter |
| + |
| +.Lsquare_neon: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + |
| + vmull.u32 $D0,$R0,${R0}[1] |
| + vmull.u32 $D1,$R1,${R0}[1] |
| + vmull.u32 $D2,$R2,${R0}[1] |
| + vmull.u32 $D3,$R3,${R0}[1] |
| + vmull.u32 $D4,$R4,${R0}[1] |
| + |
| + vmlal.u32 $D0,$R4,${S1}[1] |
| + vmlal.u32 $D1,$R0,${R1}[1] |
| + vmlal.u32 $D2,$R1,${R1}[1] |
| + vmlal.u32 $D3,$R2,${R1}[1] |
| + vmlal.u32 $D4,$R3,${R1}[1] |
| + |
| + vmlal.u32 $D0,$R3,${S2}[1] |
| + vmlal.u32 $D1,$R4,${S2}[1] |
| + vmlal.u32 $D3,$R1,${R2}[1] |
| + vmlal.u32 $D2,$R0,${R2}[1] |
| + vmlal.u32 $D4,$R2,${R2}[1] |
| + |
| + vmlal.u32 $D0,$R2,${S3}[1] |
| + vmlal.u32 $D3,$R0,${R3}[1] |
| + vmlal.u32 $D1,$R3,${S3}[1] |
| + vmlal.u32 $D2,$R4,${S3}[1] |
| + vmlal.u32 $D4,$R1,${R3}[1] |
| + |
| + vmlal.u32 $D3,$R4,${S4}[1] |
| + vmlal.u32 $D0,$R1,${S4}[1] |
| + vmlal.u32 $D1,$R2,${S4}[1] |
| + vmlal.u32 $D2,$R3,${S4}[1] |
| + vmlal.u32 $D4,$R0,${R4}[1] |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein |
| + @ and P. Schwabe |
| + @ |
| + @ H0>>+H1>>+H2>>+H3>>+H4 |
| + @ H3>>+H4>>*5+H0>>+H1 |
| + @ |
| + @ Trivia. |
| + @ |
| + @ Result of multiplication of n-bit number by m-bit number is |
| + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, |
| + @ m-bit number multiplied by 2^n is still n+m bits wide. |
| + @ |
| + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, |
| + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit |
| + @ one is n+1 bits wide. |
| + @ |
| + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that |
| + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 |
| + @ can be 27. However! In cases when their width exceeds 26 bits |
| + @ they are limited by 2^26+2^6. This in turn means that *sum* |
| + @ of the products with these values can still be viewed as sum |
| + @ of 52-bit numbers as long as the amount of addends is not a |
| + @ power of 2. For example, |
| + @ |
| + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, |
| + @ |
| + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or |
| + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than |
| + @ 8 * (2^52) or 2^55. However, the value is then multiplied by |
| + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), |
| + @ which is less than 32 * (2^52) or 2^57. And when processing |
| + @ data we are looking at triple as many addends... |
| + @ |
| + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and |
| + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the |
| + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while |
| + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 |
| + @ instruction accepts 2x32-bit input and writes 2x64-bit result. |
| + @ This means that result of reduction have to be compressed upon |
| + @ loop wrap-around. This can be done in the process of reduction |
| + @ to minimize amount of instructions [as well as amount of |
| + @ 128-bit instructions, which benefits low-end processors], but |
| + @ one has to watch for H2 (which is narrower than H0) and 5*H4 |
| + @ not being wider than 58 bits, so that result of right shift |
| + @ by 26 bits fits in 32 bits. This is also useful on x86, |
| + @ because it allows to use paddd in place for paddq, which |
| + @ benefits Atom, where paddq is ridiculously slow. |
| + |
| + vshr.u64 $T0,$D3,#26 |
| + vmovn.i64 $D3#lo,$D3 |
| + vshr.u64 $T1,$D0,#26 |
| + vmovn.i64 $D0#lo,$D0 |
| + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 |
| + vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff |
| + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 |
| + vbic.i32 $D0#lo,#0xfc000000 |
| + |
| + vshrn.u64 $T0#lo,$D4,#26 |
| + vmovn.i64 $D4#lo,$D4 |
| + vshr.u64 $T1,$D1,#26 |
| + vmovn.i64 $D1#lo,$D1 |
| + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 |
| + vbic.i32 $D4#lo,#0xfc000000 |
| + vbic.i32 $D1#lo,#0xfc000000 |
| + |
| + vadd.i32 $D0#lo,$D0#lo,$T0#lo |
| + vshl.u32 $T0#lo,$T0#lo,#2 |
| + vshrn.u64 $T1#lo,$D2,#26 |
| + vmovn.i64 $D2#lo,$D2 |
| + vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0 |
| + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 |
| + vbic.i32 $D2#lo,#0xfc000000 |
| + |
| + vshr.u32 $T0#lo,$D0#lo,#26 |
| + vbic.i32 $D0#lo,#0xfc000000 |
| + vshr.u32 $T1#lo,$D3#lo,#26 |
| + vbic.i32 $D3#lo,#0xfc000000 |
| + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 |
| + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 |
| + |
| + subs $zeros,$zeros,#1 |
| + beq .Lsquare_break_neon |
| + |
| + add $tbl0,$ctx,#(48+0*9*4) |
| + add $tbl1,$ctx,#(48+1*9*4) |
| + |
| + vtrn.32 $R0,$D0#lo @ r^2:r^1 |
| + vtrn.32 $R2,$D2#lo |
| + vtrn.32 $R3,$D3#lo |
| + vtrn.32 $R1,$D1#lo |
| + vtrn.32 $R4,$D4#lo |
| + |
| + vshl.u32 $S2,$R2,#2 @ *5 |
| + vshl.u32 $S3,$R3,#2 |
| + vshl.u32 $S1,$R1,#2 |
| + vshl.u32 $S4,$R4,#2 |
| + vadd.i32 $S2,$S2,$R2 |
| + vadd.i32 $S1,$S1,$R1 |
| + vadd.i32 $S3,$S3,$R3 |
| + vadd.i32 $S4,$S4,$R4 |
| + |
| + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! |
| + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! |
| + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! |
| + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! |
| + vst1.32 {${S4}[0]},[$tbl0,:32] |
| + vst1.32 {${S4}[1]},[$tbl1,:32] |
| + |
| + b .Lsquare_neon |
| + |
| +.align 4 |
| +.Lsquare_break_neon: |
| + add $tbl0,$ctx,#(48+2*4*9) |
| + add $tbl1,$ctx,#(48+3*4*9) |
| + |
| + vmov $R0,$D0#lo @ r^4:r^3 |
| + vshl.u32 $S1,$D1#lo,#2 @ *5 |
| + vmov $R1,$D1#lo |
| + vshl.u32 $S2,$D2#lo,#2 |
| + vmov $R2,$D2#lo |
| + vshl.u32 $S3,$D3#lo,#2 |
| + vmov $R3,$D3#lo |
| + vshl.u32 $S4,$D4#lo,#2 |
| + vmov $R4,$D4#lo |
| + vadd.i32 $S1,$S1,$D1#lo |
| + vadd.i32 $S2,$S2,$D2#lo |
| + vadd.i32 $S3,$S3,$D3#lo |
| + vadd.i32 $S4,$S4,$D4#lo |
| + |
| + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! |
| + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! |
| + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! |
| + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! |
| + vst1.32 {${S4}[0]},[$tbl0] |
| + vst1.32 {${S4}[1]},[$tbl1] |
| + |
| +.Lno_init_neon: |
| + ret @ bx lr |
| +.size poly1305_init_neon,.-poly1305_init_neon |
| + |
| +.type poly1305_blocks_neon,%function |
| +.align 5 |
| +poly1305_blocks_neon: |
| +.Lpoly1305_blocks_neon: |
| + ldr ip,[$ctx,#36] @ is_base2_26 |
| + |
| + cmp $len,#64 |
| + blo .Lpoly1305_blocks |
| + |
| + stmdb sp!,{r4-r7} |
| + vstmdb sp!,{d8-d15} @ ABI specification says so |
| + |
| + tst ip,ip @ is_base2_26? |
| + bne .Lbase2_26_neon |
| + |
| + stmdb sp!,{r1-r3,lr} |
| + bl .Lpoly1305_init_neon |
| + |
| + ldr r4,[$ctx,#0] @ load hash value base 2^32 |
| + ldr r5,[$ctx,#4] |
| + ldr r6,[$ctx,#8] |
| + ldr r7,[$ctx,#12] |
| + ldr ip,[$ctx,#16] |
| + |
| + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 |
| + mov r3,r4,lsr#26 |
| + veor $D0#lo,$D0#lo,$D0#lo |
| + mov r4,r5,lsr#20 |
| + orr r3,r3,r5,lsl#6 |
| + veor $D1#lo,$D1#lo,$D1#lo |
| + mov r5,r6,lsr#14 |
| + orr r4,r4,r6,lsl#12 |
| + veor $D2#lo,$D2#lo,$D2#lo |
| + mov r6,r7,lsr#8 |
| + orr r5,r5,r7,lsl#18 |
| + veor $D3#lo,$D3#lo,$D3#lo |
| + and r3,r3,#0x03ffffff |
| + orr r6,r6,ip,lsl#24 |
| + veor $D4#lo,$D4#lo,$D4#lo |
| + and r4,r4,#0x03ffffff |
| + mov r1,#1 |
| + and r5,r5,#0x03ffffff |
| + str r1,[$ctx,#36] @ set is_base2_26 |
| + |
| + vmov.32 $D0#lo[0],r2 |
| + vmov.32 $D1#lo[0],r3 |
| + vmov.32 $D2#lo[0],r4 |
| + vmov.32 $D3#lo[0],r5 |
| + vmov.32 $D4#lo[0],r6 |
| + adr $zeros,.Lzeros |
| + |
| + ldmia sp!,{r1-r3,lr} |
| + b .Lhash_loaded |
| + |
| +.align 4 |
| +.Lbase2_26_neon: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ load hash value |
| + |
| + veor $D0#lo,$D0#lo,$D0#lo |
| + veor $D1#lo,$D1#lo,$D1#lo |
| + veor $D2#lo,$D2#lo,$D2#lo |
| + veor $D3#lo,$D3#lo,$D3#lo |
| + veor $D4#lo,$D4#lo,$D4#lo |
| + vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! |
| + adr $zeros,.Lzeros |
| + vld1.32 {$D4#lo[0]},[$ctx] |
| + sub $ctx,$ctx,#16 @ rewind |
| + |
| +.Lhash_loaded: |
| + add $in2,$inp,#32 |
| + mov $padbit,$padbit,lsl#24 |
| + tst $len,#31 |
| + beq .Leven |
| + |
| + vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]! |
| + vmov.32 $H4#lo[0],$padbit |
| + sub $len,$len,#16 |
| + add $in2,$inp,#32 |
| + |
| +# ifdef __ARMEB__ |
| + vrev32.8 $H0,$H0 |
| + vrev32.8 $H3,$H3 |
| + vrev32.8 $H1,$H1 |
| + vrev32.8 $H2,$H2 |
| +# endif |
| + vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26 |
| + vshl.u32 $H3#lo,$H3#lo,#18 |
| + |
| + vsri.u32 $H3#lo,$H2#lo,#14 |
| + vshl.u32 $H2#lo,$H2#lo,#12 |
| + vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi |
| + |
| + vbic.i32 $H3#lo,#0xfc000000 |
| + vsri.u32 $H2#lo,$H1#lo,#20 |
| + vshl.u32 $H1#lo,$H1#lo,#6 |
| + |
| + vbic.i32 $H2#lo,#0xfc000000 |
| + vsri.u32 $H1#lo,$H0#lo,#26 |
| + vadd.i32 $H3#hi,$H3#lo,$D3#lo |
| + |
| + vbic.i32 $H0#lo,#0xfc000000 |
| + vbic.i32 $H1#lo,#0xfc000000 |
| + vadd.i32 $H2#hi,$H2#lo,$D2#lo |
| + |
| + vadd.i32 $H0#hi,$H0#lo,$D0#lo |
| + vadd.i32 $H1#hi,$H1#lo,$D1#lo |
| + |
| + mov $tbl1,$zeros |
| + add $tbl0,$ctx,#48 |
| + |
| + cmp $len,$len |
| + b .Long_tail |
| + |
| +.align 4 |
| +.Leven: |
| + subs $len,$len,#64 |
| + it lo |
| + movlo $in2,$zeros |
| + |
| + vmov.i32 $H4,#1<<24 @ padbit, yes, always |
| + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] |
| + add $inp,$inp,#64 |
| + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) |
| + add $in2,$in2,#64 |
| + itt hi |
| + addhi $tbl1,$ctx,#(48+1*9*4) |
| + addhi $tbl0,$ctx,#(48+3*9*4) |
| + |
| +# ifdef __ARMEB__ |
| + vrev32.8 $H0,$H0 |
| + vrev32.8 $H3,$H3 |
| + vrev32.8 $H1,$H1 |
| + vrev32.8 $H2,$H2 |
| +# endif |
| + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 |
| + vshl.u32 $H3,$H3,#18 |
| + |
| + vsri.u32 $H3,$H2,#14 |
| + vshl.u32 $H2,$H2,#12 |
| + |
| + vbic.i32 $H3,#0xfc000000 |
| + vsri.u32 $H2,$H1,#20 |
| + vshl.u32 $H1,$H1,#6 |
| + |
| + vbic.i32 $H2,#0xfc000000 |
| + vsri.u32 $H1,$H0,#26 |
| + |
| + vbic.i32 $H0,#0xfc000000 |
| + vbic.i32 $H1,#0xfc000000 |
| + |
| + bls .Lskip_loop |
| + |
| + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2 |
| + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 |
| + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! |
| + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! |
| + b .Loop_neon |
| + |
| +.align 5 |
| +.Loop_neon: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 |
| + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r |
| + @ \___________________/ |
| + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 |
| + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r |
| + @ \___________________/ \____________________/ |
| + @ |
| + @ Note that we start with inp[2:3]*r^2. This is because it |
| + @ doesn't depend on reduction in previous iteration. |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ inp[2:3]*r^2 |
| + |
| + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1] |
| + vmull.u32 $D2,$H2#hi,${R0}[1] |
| + vadd.i32 $H0#lo,$H0#lo,$D0#lo |
| + vmull.u32 $D0,$H0#hi,${R0}[1] |
| + vadd.i32 $H3#lo,$H3#lo,$D3#lo |
| + vmull.u32 $D3,$H3#hi,${R0}[1] |
| + vmlal.u32 $D2,$H1#hi,${R1}[1] |
| + vadd.i32 $H1#lo,$H1#lo,$D1#lo |
| + vmull.u32 $D1,$H1#hi,${R0}[1] |
| + |
| + vadd.i32 $H4#lo,$H4#lo,$D4#lo |
| + vmull.u32 $D4,$H4#hi,${R0}[1] |
| + subs $len,$len,#64 |
| + vmlal.u32 $D0,$H4#hi,${S1}[1] |
| + it lo |
| + movlo $in2,$zeros |
| + vmlal.u32 $D3,$H2#hi,${R1}[1] |
| + vld1.32 ${S4}[1],[$tbl1,:32] |
| + vmlal.u32 $D1,$H0#hi,${R1}[1] |
| + vmlal.u32 $D4,$H3#hi,${R1}[1] |
| + |
| + vmlal.u32 $D0,$H3#hi,${S2}[1] |
| + vmlal.u32 $D3,$H1#hi,${R2}[1] |
| + vmlal.u32 $D4,$H2#hi,${R2}[1] |
| + vmlal.u32 $D1,$H4#hi,${S2}[1] |
| + vmlal.u32 $D2,$H0#hi,${R2}[1] |
| + |
| + vmlal.u32 $D3,$H0#hi,${R3}[1] |
| + vmlal.u32 $D0,$H2#hi,${S3}[1] |
| + vmlal.u32 $D4,$H1#hi,${R3}[1] |
| + vmlal.u32 $D1,$H3#hi,${S3}[1] |
| + vmlal.u32 $D2,$H4#hi,${S3}[1] |
| + |
| + vmlal.u32 $D3,$H4#hi,${S4}[1] |
| + vmlal.u32 $D0,$H1#hi,${S4}[1] |
| + vmlal.u32 $D4,$H0#hi,${R4}[1] |
| + vmlal.u32 $D1,$H2#hi,${S4}[1] |
| + vmlal.u32 $D2,$H3#hi,${S4}[1] |
| + |
| + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) |
| + add $in2,$in2,#64 |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ (hash+inp[0:1])*r^4 and accumulate |
| + |
| + vmlal.u32 $D3,$H3#lo,${R0}[0] |
| + vmlal.u32 $D0,$H0#lo,${R0}[0] |
| + vmlal.u32 $D4,$H4#lo,${R0}[0] |
| + vmlal.u32 $D1,$H1#lo,${R0}[0] |
| + vmlal.u32 $D2,$H2#lo,${R0}[0] |
| + vld1.32 ${S4}[0],[$tbl0,:32] |
| + |
| + vmlal.u32 $D3,$H2#lo,${R1}[0] |
| + vmlal.u32 $D0,$H4#lo,${S1}[0] |
| + vmlal.u32 $D4,$H3#lo,${R1}[0] |
| + vmlal.u32 $D1,$H0#lo,${R1}[0] |
| + vmlal.u32 $D2,$H1#lo,${R1}[0] |
| + |
| + vmlal.u32 $D3,$H1#lo,${R2}[0] |
| + vmlal.u32 $D0,$H3#lo,${S2}[0] |
| + vmlal.u32 $D4,$H2#lo,${R2}[0] |
| + vmlal.u32 $D1,$H4#lo,${S2}[0] |
| + vmlal.u32 $D2,$H0#lo,${R2}[0] |
| + |
| + vmlal.u32 $D3,$H0#lo,${R3}[0] |
| + vmlal.u32 $D0,$H2#lo,${S3}[0] |
| + vmlal.u32 $D4,$H1#lo,${R3}[0] |
| + vmlal.u32 $D1,$H3#lo,${S3}[0] |
| + vmlal.u32 $D3,$H4#lo,${S4}[0] |
| + |
| + vmlal.u32 $D2,$H4#lo,${S3}[0] |
| + vmlal.u32 $D0,$H1#lo,${S4}[0] |
| + vmlal.u32 $D4,$H0#lo,${R4}[0] |
| + vmov.i32 $H4,#1<<24 @ padbit, yes, always |
| + vmlal.u32 $D1,$H2#lo,${S4}[0] |
| + vmlal.u32 $D2,$H3#lo,${S4}[0] |
| + |
| + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] |
| + add $inp,$inp,#64 |
| +# ifdef __ARMEB__ |
| + vrev32.8 $H0,$H0 |
| + vrev32.8 $H1,$H1 |
| + vrev32.8 $H2,$H2 |
| + vrev32.8 $H3,$H3 |
| +# endif |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ lazy reduction interleaved with base 2^32 -> base 2^26 of |
| + @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4. |
| + |
| + vshr.u64 $T0,$D3,#26 |
| + vmovn.i64 $D3#lo,$D3 |
| + vshr.u64 $T1,$D0,#26 |
| + vmovn.i64 $D0#lo,$D0 |
| + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 |
| + vbic.i32 $D3#lo,#0xfc000000 |
| + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 |
| + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 |
| + vshl.u32 $H3,$H3,#18 |
| + vbic.i32 $D0#lo,#0xfc000000 |
| + |
| + vshrn.u64 $T0#lo,$D4,#26 |
| + vmovn.i64 $D4#lo,$D4 |
| + vshr.u64 $T1,$D1,#26 |
| + vmovn.i64 $D1#lo,$D1 |
| + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 |
| + vsri.u32 $H3,$H2,#14 |
| + vbic.i32 $D4#lo,#0xfc000000 |
| + vshl.u32 $H2,$H2,#12 |
| + vbic.i32 $D1#lo,#0xfc000000 |
| + |
| + vadd.i32 $D0#lo,$D0#lo,$T0#lo |
| + vshl.u32 $T0#lo,$T0#lo,#2 |
| + vbic.i32 $H3,#0xfc000000 |
| + vshrn.u64 $T1#lo,$D2,#26 |
| + vmovn.i64 $D2#lo,$D2 |
| + vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec] |
| + vsri.u32 $H2,$H1,#20 |
| + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 |
| + vshl.u32 $H1,$H1,#6 |
| + vbic.i32 $D2#lo,#0xfc000000 |
| + vbic.i32 $H2,#0xfc000000 |
| + |
| + vshrn.u64 $T0#lo,$D0,#26 @ re-narrow |
| + vmovn.i64 $D0#lo,$D0 |
| + vsri.u32 $H1,$H0,#26 |
| + vbic.i32 $H0,#0xfc000000 |
| + vshr.u32 $T1#lo,$D3#lo,#26 |
| + vbic.i32 $D3#lo,#0xfc000000 |
| + vbic.i32 $D0#lo,#0xfc000000 |
| + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 |
| + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 |
| + vbic.i32 $H1,#0xfc000000 |
| + |
| + bhi .Loop_neon |
| + |
| +.Lskip_loop: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 |
| + |
| + add $tbl1,$ctx,#(48+0*9*4) |
| + add $tbl0,$ctx,#(48+1*9*4) |
| + adds $len,$len,#32 |
| + it ne |
| + movne $len,#0 |
| + bne .Long_tail |
| + |
| + vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi |
| + vadd.i32 $H0#hi,$H0#lo,$D0#lo |
| + vadd.i32 $H3#hi,$H3#lo,$D3#lo |
| + vadd.i32 $H1#hi,$H1#lo,$D1#lo |
| + vadd.i32 $H4#hi,$H4#lo,$D4#lo |
| + |
| +.Long_tail: |
| + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1 |
| + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2 |
| + |
| + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant |
| + vmull.u32 $D2,$H2#hi,$R0 |
| + vadd.i32 $H0#lo,$H0#lo,$D0#lo |
| + vmull.u32 $D0,$H0#hi,$R0 |
| + vadd.i32 $H3#lo,$H3#lo,$D3#lo |
| + vmull.u32 $D3,$H3#hi,$R0 |
| + vadd.i32 $H1#lo,$H1#lo,$D1#lo |
| + vmull.u32 $D1,$H1#hi,$R0 |
| + vadd.i32 $H4#lo,$H4#lo,$D4#lo |
| + vmull.u32 $D4,$H4#hi,$R0 |
| + |
| + vmlal.u32 $D0,$H4#hi,$S1 |
| + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! |
| + vmlal.u32 $D3,$H2#hi,$R1 |
| + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! |
| + vmlal.u32 $D1,$H0#hi,$R1 |
| + vmlal.u32 $D4,$H3#hi,$R1 |
| + vmlal.u32 $D2,$H1#hi,$R1 |
| + |
| + vmlal.u32 $D3,$H1#hi,$R2 |
| + vld1.32 ${S4}[1],[$tbl1,:32] |
| + vmlal.u32 $D0,$H3#hi,$S2 |
| + vld1.32 ${S4}[0],[$tbl0,:32] |
| + vmlal.u32 $D4,$H2#hi,$R2 |
| + vmlal.u32 $D1,$H4#hi,$S2 |
| + vmlal.u32 $D2,$H0#hi,$R2 |
| + |
| + vmlal.u32 $D3,$H0#hi,$R3 |
| + it ne |
| + addne $tbl1,$ctx,#(48+2*9*4) |
| + vmlal.u32 $D0,$H2#hi,$S3 |
| + it ne |
| + addne $tbl0,$ctx,#(48+3*9*4) |
| + vmlal.u32 $D4,$H1#hi,$R3 |
| + vmlal.u32 $D1,$H3#hi,$S3 |
| + vmlal.u32 $D2,$H4#hi,$S3 |
| + |
| + vmlal.u32 $D3,$H4#hi,$S4 |
| + vorn $MASK,$MASK,$MASK @ all-ones, can be redundant |
| + vmlal.u32 $D0,$H1#hi,$S4 |
| + vshr.u64 $MASK,$MASK,#38 |
| + vmlal.u32 $D4,$H0#hi,$R4 |
| + vmlal.u32 $D1,$H2#hi,$S4 |
| + vmlal.u32 $D2,$H3#hi,$S4 |
| + |
| + beq .Lshort_tail |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ (hash+inp[0:1])*r^4:r^3 and accumulate |
| + |
| + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3 |
| + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 |
| + |
| + vmlal.u32 $D2,$H2#lo,$R0 |
| + vmlal.u32 $D0,$H0#lo,$R0 |
| + vmlal.u32 $D3,$H3#lo,$R0 |
| + vmlal.u32 $D1,$H1#lo,$R0 |
| + vmlal.u32 $D4,$H4#lo,$R0 |
| + |
| + vmlal.u32 $D0,$H4#lo,$S1 |
| + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! |
| + vmlal.u32 $D3,$H2#lo,$R1 |
| + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! |
| + vmlal.u32 $D1,$H0#lo,$R1 |
| + vmlal.u32 $D4,$H3#lo,$R1 |
| + vmlal.u32 $D2,$H1#lo,$R1 |
| + |
| + vmlal.u32 $D3,$H1#lo,$R2 |
| + vld1.32 ${S4}[1],[$tbl1,:32] |
| + vmlal.u32 $D0,$H3#lo,$S2 |
| + vld1.32 ${S4}[0],[$tbl0,:32] |
| + vmlal.u32 $D4,$H2#lo,$R2 |
| + vmlal.u32 $D1,$H4#lo,$S2 |
| + vmlal.u32 $D2,$H0#lo,$R2 |
| + |
| + vmlal.u32 $D3,$H0#lo,$R3 |
| + vmlal.u32 $D0,$H2#lo,$S3 |
| + vmlal.u32 $D4,$H1#lo,$R3 |
| + vmlal.u32 $D1,$H3#lo,$S3 |
| + vmlal.u32 $D2,$H4#lo,$S3 |
| + |
| + vmlal.u32 $D3,$H4#lo,$S4 |
| + vorn $MASK,$MASK,$MASK @ all-ones |
| + vmlal.u32 $D0,$H1#lo,$S4 |
| + vshr.u64 $MASK,$MASK,#38 |
| + vmlal.u32 $D4,$H0#lo,$R4 |
| + vmlal.u32 $D1,$H2#lo,$S4 |
| + vmlal.u32 $D2,$H3#lo,$S4 |
| + |
| +.Lshort_tail: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ horizontal addition |
| + |
| + vadd.i64 $D3#lo,$D3#lo,$D3#hi |
| + vadd.i64 $D0#lo,$D0#lo,$D0#hi |
| + vadd.i64 $D4#lo,$D4#lo,$D4#hi |
| + vadd.i64 $D1#lo,$D1#lo,$D1#hi |
| + vadd.i64 $D2#lo,$D2#lo,$D2#hi |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ lazy reduction, but without narrowing |
| + |
| + vshr.u64 $T0,$D3,#26 |
| + vand.i64 $D3,$D3,$MASK |
| + vshr.u64 $T1,$D0,#26 |
| + vand.i64 $D0,$D0,$MASK |
| + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 |
| + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 |
| + |
| + vshr.u64 $T0,$D4,#26 |
| + vand.i64 $D4,$D4,$MASK |
| + vshr.u64 $T1,$D1,#26 |
| + vand.i64 $D1,$D1,$MASK |
| + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 |
| + |
| + vadd.i64 $D0,$D0,$T0 |
| + vshl.u64 $T0,$T0,#2 |
| + vshr.u64 $T1,$D2,#26 |
| + vand.i64 $D2,$D2,$MASK |
| + vadd.i64 $D0,$D0,$T0 @ h4 -> h0 |
| + vadd.i64 $D3,$D3,$T1 @ h2 -> h3 |
| + |
| + vshr.u64 $T0,$D0,#26 |
| + vand.i64 $D0,$D0,$MASK |
| + vshr.u64 $T1,$D3,#26 |
| + vand.i64 $D3,$D3,$MASK |
| + vadd.i64 $D1,$D1,$T0 @ h0 -> h1 |
| + vadd.i64 $D4,$D4,$T1 @ h3 -> h4 |
| + |
| + cmp $len,#0 |
| + bne .Leven |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ store hash value |
| + |
| + vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! |
| + vst1.32 {$D4#lo[0]},[$ctx] |
| + |
| + vldmia sp!,{d8-d15} @ epilogue |
| + ldmia sp!,{r4-r7} |
| + ret @ bx lr |
| +.size poly1305_blocks_neon,.-poly1305_blocks_neon |
| + |
| +.align 5 |
| +.Lzeros: |
| +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
| +#ifndef __KERNEL__ |
| +.LOPENSSL_armcap: |
| +# ifdef _WIN32 |
| +.word OPENSSL_armcap_P |
| +# else |
| +.word OPENSSL_armcap_P-.Lpoly1305_init |
| +# endif |
| +.comm OPENSSL_armcap_P,4,4 |
| +.hidden OPENSSL_armcap_P |
| +#endif |
| +#endif |
| +___ |
| +} } |
| +$code.=<<___; |
| +.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm" |
| +.align 2 |
| +___ |
| + |
| +foreach (split("\n",$code)) { |
| + s/\`([^\`]*)\`/eval $1/geo; |
| + |
| + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or |
| + s/\bret\b/bx lr/go or |
| + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 |
| + |
| + print $_,"\n"; |
| +} |
| +close STDOUT; # enforce flush |
| diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped |
| new file mode 100644 |
| index 000000000000..37b71d990293 |
| |
| |
| @@ -0,0 +1,1158 @@ |
| +#ifndef __KERNEL__ |
| +# include "arm_arch.h" |
| +#else |
| +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ |
| +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ |
| +# define poly1305_init poly1305_init_arm |
| +# define poly1305_blocks poly1305_blocks_arm |
| +# define poly1305_emit poly1305_emit_arm |
| +.globl poly1305_blocks_neon |
| +#endif |
| + |
| +#if defined(__thumb2__) |
| +.syntax unified |
| +.thumb |
| +#else |
| +.code 32 |
| +#endif |
| + |
| +.text |
| + |
| +.globl poly1305_emit |
| +.globl poly1305_blocks |
| +.globl poly1305_init |
| +.type poly1305_init,%function |
| +.align 5 |
| +poly1305_init: |
| +.Lpoly1305_init: |
| + stmdb sp!,{r4-r11} |
| + |
| + eor r3,r3,r3 |
| + cmp r1,#0 |
| + str r3,[r0,#0] @ zero hash value |
| + str r3,[r0,#4] |
| + str r3,[r0,#8] |
| + str r3,[r0,#12] |
| + str r3,[r0,#16] |
| + str r3,[r0,#36] @ clear is_base2_26 |
| + add r0,r0,#20 |
| + |
| +#ifdef __thumb2__ |
| + it eq |
| +#endif |
| + moveq r0,#0 |
| + beq .Lno_key |
| + |
| +#if __ARM_MAX_ARCH__>=7 |
| + mov r3,#-1 |
| + str r3,[r0,#28] @ impossible key power value |
| +# ifndef __KERNEL__ |
| + adr r11,.Lpoly1305_init |
| + ldr r12,.LOPENSSL_armcap |
| +# endif |
| +#endif |
| + ldrb r4,[r1,#0] |
| + mov r10,#0x0fffffff |
| + ldrb r5,[r1,#1] |
| + and r3,r10,#-4 @ 0x0ffffffc |
| + ldrb r6,[r1,#2] |
| + ldrb r7,[r1,#3] |
| + orr r4,r4,r5,lsl#8 |
| + ldrb r5,[r1,#4] |
| + orr r4,r4,r6,lsl#16 |
| + ldrb r6,[r1,#5] |
| + orr r4,r4,r7,lsl#24 |
| + ldrb r7,[r1,#6] |
| + and r4,r4,r10 |
| + |
| +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
| +# if !defined(_WIN32) |
| + ldr r12,[r11,r12] @ OPENSSL_armcap_P |
| +# endif |
| +# if defined(__APPLE__) || defined(_WIN32) |
| + ldr r12,[r12] |
| +# endif |
| +#endif |
| + ldrb r8,[r1,#7] |
| + orr r5,r5,r6,lsl#8 |
| + ldrb r6,[r1,#8] |
| + orr r5,r5,r7,lsl#16 |
| + ldrb r7,[r1,#9] |
| + orr r5,r5,r8,lsl#24 |
| + ldrb r8,[r1,#10] |
| + and r5,r5,r3 |
| + |
| +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
| + tst r12,#ARMV7_NEON @ check for NEON |
| +# ifdef __thumb2__ |
| + adr r9,.Lpoly1305_blocks_neon |
| + adr r11,.Lpoly1305_blocks |
| + it ne |
| + movne r11,r9 |
| + adr r12,.Lpoly1305_emit |
| + orr r11,r11,#1 @ thumb-ify addresses |
| + orr r12,r12,#1 |
| +# else |
| + add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) |
| + ite eq |
| + addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) |
| + addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) |
| +# endif |
| +#endif |
| + ldrb r9,[r1,#11] |
| + orr r6,r6,r7,lsl#8 |
| + ldrb r7,[r1,#12] |
| + orr r6,r6,r8,lsl#16 |
| + ldrb r8,[r1,#13] |
| + orr r6,r6,r9,lsl#24 |
| + ldrb r9,[r1,#14] |
| + and r6,r6,r3 |
| + |
| + ldrb r10,[r1,#15] |
| + orr r7,r7,r8,lsl#8 |
| + str r4,[r0,#0] |
| + orr r7,r7,r9,lsl#16 |
| + str r5,[r0,#4] |
| + orr r7,r7,r10,lsl#24 |
| + str r6,[r0,#8] |
| + and r7,r7,r3 |
| + str r7,[r0,#12] |
| +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
| + stmia r2,{r11,r12} @ fill functions table |
| + mov r0,#1 |
| +#else |
| + mov r0,#0 |
| +#endif |
| +.Lno_key: |
| + ldmia sp!,{r4-r11} |
| +#if __ARM_ARCH__>=5 |
| + bx lr @ bx lr |
| +#else |
| + tst lr,#1 |
| + moveq pc,lr @ be binary compatible with V4, yet |
| + .word 0xe12fff1e @ interoperable with Thumb ISA:-) |
| +#endif |
| +.size poly1305_init,.-poly1305_init |
| +.type poly1305_blocks,%function |
| +.align 5 |
| +poly1305_blocks: |
| +.Lpoly1305_blocks: |
| + stmdb sp!,{r3-r11,lr} |
| + |
| + ands r2,r2,#-16 |
| + beq .Lno_data |
| + |
| + add r2,r2,r1 @ end pointer |
| + sub sp,sp,#32 |
| + |
| +#if __ARM_ARCH__<7 |
| + ldmia r0,{r4-r12} @ load context |
| + add r0,r0,#20 |
| + str r2,[sp,#16] @ offload stuff |
| + str r0,[sp,#12] |
| +#else |
| + ldr lr,[r0,#36] @ is_base2_26 |
| + ldmia r0!,{r4-r8} @ load hash value |
| + str r2,[sp,#16] @ offload stuff |
| + str r0,[sp,#12] |
| + |
| + adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32 |
| + mov r10,r5,lsr#6 |
| + adcs r10,r10,r6,lsl#20 |
| + mov r11,r6,lsr#12 |
| + adcs r11,r11,r7,lsl#14 |
| + mov r12,r7,lsr#18 |
| + adcs r12,r12,r8,lsl#8 |
| + mov r2,#0 |
| + teq lr,#0 |
| + str r2,[r0,#16] @ clear is_base2_26 |
| + adc r2,r2,r8,lsr#24 |
| + |
| + itttt ne |
| + movne r4,r9 @ choose between radixes |
| + movne r5,r10 |
| + movne r6,r11 |
| + movne r7,r12 |
| + ldmia r0,{r9-r12} @ load key |
| + it ne |
| + movne r8,r2 |
| +#endif |
| + |
| + mov lr,r1 |
| + cmp r3,#0 |
| + str r10,[sp,#20] |
| + str r11,[sp,#24] |
| + str r12,[sp,#28] |
| + b .Loop |
| + |
| +.align 4 |
| +.Loop: |
| +#if __ARM_ARCH__<7 |
| + ldrb r0,[lr],#16 @ load input |
| +# ifdef __thumb2__ |
| + it hi |
| +# endif |
| + addhi r8,r8,#1 @ 1<<128 |
| + ldrb r1,[lr,#-15] |
| + ldrb r2,[lr,#-14] |
| + ldrb r3,[lr,#-13] |
| + orr r1,r0,r1,lsl#8 |
| + ldrb r0,[lr,#-12] |
| + orr r2,r1,r2,lsl#16 |
| + ldrb r1,[lr,#-11] |
| + orr r3,r2,r3,lsl#24 |
| + ldrb r2,[lr,#-10] |
| + adds r4,r4,r3 @ accumulate input |
| + |
| + ldrb r3,[lr,#-9] |
| + orr r1,r0,r1,lsl#8 |
| + ldrb r0,[lr,#-8] |
| + orr r2,r1,r2,lsl#16 |
| + ldrb r1,[lr,#-7] |
| + orr r3,r2,r3,lsl#24 |
| + ldrb r2,[lr,#-6] |
| + adcs r5,r5,r3 |
| + |
| + ldrb r3,[lr,#-5] |
| + orr r1,r0,r1,lsl#8 |
| + ldrb r0,[lr,#-4] |
| + orr r2,r1,r2,lsl#16 |
| + ldrb r1,[lr,#-3] |
| + orr r3,r2,r3,lsl#24 |
| + ldrb r2,[lr,#-2] |
| + adcs r6,r6,r3 |
| + |
| + ldrb r3,[lr,#-1] |
| + orr r1,r0,r1,lsl#8 |
| + str lr,[sp,#8] @ offload input pointer |
| + orr r2,r1,r2,lsl#16 |
| + add r10,r10,r10,lsr#2 |
| + orr r3,r2,r3,lsl#24 |
| +#else |
| + ldr r0,[lr],#16 @ load input |
| + it hi |
| + addhi r8,r8,#1 @ padbit |
| + ldr r1,[lr,#-12] |
| + ldr r2,[lr,#-8] |
| + ldr r3,[lr,#-4] |
| +# ifdef __ARMEB__ |
| + rev r0,r0 |
| + rev r1,r1 |
| + rev r2,r2 |
| + rev r3,r3 |
| +# endif |
| + adds r4,r4,r0 @ accumulate input |
| + str lr,[sp,#8] @ offload input pointer |
| + adcs r5,r5,r1 |
| + add r10,r10,r10,lsr#2 |
| + adcs r6,r6,r2 |
| +#endif |
| + add r11,r11,r11,lsr#2 |
| + adcs r7,r7,r3 |
| + add r12,r12,r12,lsr#2 |
| + |
| + umull r2,r3,r5,r9 |
| + adc r8,r8,#0 |
| + umull r0,r1,r4,r9 |
| + umlal r2,r3,r8,r10 |
| + umlal r0,r1,r7,r10 |
| + ldr r10,[sp,#20] @ reload r10 |
| + umlal r2,r3,r6,r12 |
| + umlal r0,r1,r5,r12 |
| + umlal r2,r3,r7,r11 |
| + umlal r0,r1,r6,r11 |
| + umlal r2,r3,r4,r10 |
| + str r0,[sp,#0] @ future r4 |
| + mul r0,r11,r8 |
| + ldr r11,[sp,#24] @ reload r11 |
| + adds r2,r2,r1 @ d1+=d0>>32 |
| + eor r1,r1,r1 |
| + adc lr,r3,#0 @ future r6 |
| + str r2,[sp,#4] @ future r5 |
| + |
| + mul r2,r12,r8 |
| + eor r3,r3,r3 |
| + umlal r0,r1,r7,r12 |
| + ldr r12,[sp,#28] @ reload r12 |
| + umlal r2,r3,r7,r9 |
| + umlal r0,r1,r6,r9 |
| + umlal r2,r3,r6,r10 |
| + umlal r0,r1,r5,r10 |
| + umlal r2,r3,r5,r11 |
| + umlal r0,r1,r4,r11 |
| + umlal r2,r3,r4,r12 |
| + ldr r4,[sp,#0] |
| + mul r8,r9,r8 |
| + ldr r5,[sp,#4] |
| + |
| + adds r6,lr,r0 @ d2+=d1>>32 |
| + ldr lr,[sp,#8] @ reload input pointer |
| + adc r1,r1,#0 |
| + adds r7,r2,r1 @ d3+=d2>>32 |
| + ldr r0,[sp,#16] @ reload end pointer |
| + adc r3,r3,#0 |
| + add r8,r8,r3 @ h4+=d3>>32 |
| + |
| + and r1,r8,#-4 |
| + and r8,r8,#3 |
| + add r1,r1,r1,lsr#2 @ *=5 |
| + adds r4,r4,r1 |
| + adcs r5,r5,#0 |
| + adcs r6,r6,#0 |
| + adcs r7,r7,#0 |
| + adc r8,r8,#0 |
| + |
| + cmp r0,lr @ done yet? |
| + bhi .Loop |
| + |
| + ldr r0,[sp,#12] |
| + add sp,sp,#32 |
| + stmdb r0,{r4-r8} @ store the result |
| + |
| +.Lno_data: |
| +#if __ARM_ARCH__>=5 |
| + ldmia sp!,{r3-r11,pc} |
| +#else |
| + ldmia sp!,{r3-r11,lr} |
| + tst lr,#1 |
| + moveq pc,lr @ be binary compatible with V4, yet |
| + .word 0xe12fff1e @ interoperable with Thumb ISA:-) |
| +#endif |
| +.size poly1305_blocks,.-poly1305_blocks |
| +.type poly1305_emit,%function |
| +.align 5 |
| +poly1305_emit: |
| +.Lpoly1305_emit: |
| + stmdb sp!,{r4-r11} |
| + |
| + ldmia r0,{r3-r7} |
| + |
| +#if __ARM_ARCH__>=7 |
| + ldr ip,[r0,#36] @ is_base2_26 |
| + |
| + adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32 |
| + mov r9,r4,lsr#6 |
| + adcs r9,r9,r5,lsl#20 |
| + mov r10,r5,lsr#12 |
| + adcs r10,r10,r6,lsl#14 |
| + mov r11,r6,lsr#18 |
| + adcs r11,r11,r7,lsl#8 |
| + mov r0,#0 |
| + adc r0,r0,r7,lsr#24 |
| + |
| + tst ip,ip |
| + itttt ne |
| + movne r3,r8 |
| + movne r4,r9 |
| + movne r5,r10 |
| + movne r6,r11 |
| + it ne |
| + movne r7,r0 |
| +#endif |
| + |
| + adds r8,r3,#5 @ compare to modulus |
| + adcs r9,r4,#0 |
| + adcs r10,r5,#0 |
| + adcs r11,r6,#0 |
| + adc r0,r7,#0 |
| + tst r0,#4 @ did it carry/borrow? |
| + |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne r3,r8 |
| + ldr r8,[r2,#0] |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne r4,r9 |
| + ldr r9,[r2,#4] |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne r5,r10 |
| + ldr r10,[r2,#8] |
| +#ifdef __thumb2__ |
| + it ne |
| +#endif |
| + movne r6,r11 |
| + ldr r11,[r2,#12] |
| + |
| + adds r3,r3,r8 |
| + adcs r4,r4,r9 |
| + adcs r5,r5,r10 |
| + adc r6,r6,r11 |
| + |
| +#if __ARM_ARCH__>=7 |
| +# ifdef __ARMEB__ |
| + rev r3,r3 |
| + rev r4,r4 |
| + rev r5,r5 |
| + rev r6,r6 |
| +# endif |
| + str r3,[r1,#0] |
| + str r4,[r1,#4] |
| + str r5,[r1,#8] |
| + str r6,[r1,#12] |
| +#else |
| + strb r3,[r1,#0] |
| + mov r3,r3,lsr#8 |
| + strb r4,[r1,#4] |
| + mov r4,r4,lsr#8 |
| + strb r5,[r1,#8] |
| + mov r5,r5,lsr#8 |
| + strb r6,[r1,#12] |
| + mov r6,r6,lsr#8 |
| + |
| + strb r3,[r1,#1] |
| + mov r3,r3,lsr#8 |
| + strb r4,[r1,#5] |
| + mov r4,r4,lsr#8 |
| + strb r5,[r1,#9] |
| + mov r5,r5,lsr#8 |
| + strb r6,[r1,#13] |
| + mov r6,r6,lsr#8 |
| + |
| + strb r3,[r1,#2] |
| + mov r3,r3,lsr#8 |
| + strb r4,[r1,#6] |
| + mov r4,r4,lsr#8 |
| + strb r5,[r1,#10] |
| + mov r5,r5,lsr#8 |
| + strb r6,[r1,#14] |
| + mov r6,r6,lsr#8 |
| + |
| + strb r3,[r1,#3] |
| + strb r4,[r1,#7] |
| + strb r5,[r1,#11] |
| + strb r6,[r1,#15] |
| +#endif |
| + ldmia sp!,{r4-r11} |
| +#if __ARM_ARCH__>=5 |
| + bx lr @ bx lr |
| +#else |
| + tst lr,#1 |
| + moveq pc,lr @ be binary compatible with V4, yet |
| + .word 0xe12fff1e @ interoperable with Thumb ISA:-) |
| +#endif |
| +.size poly1305_emit,.-poly1305_emit |
| +#if __ARM_MAX_ARCH__>=7 |
| +.fpu neon |
| + |
| +.type poly1305_init_neon,%function |
| +.align 5 |
| +poly1305_init_neon: |
| +.Lpoly1305_init_neon: |
| + ldr r3,[r0,#48] @ first table element |
| + cmp r3,#-1 @ is value impossible? |
| + bne .Lno_init_neon |
| + |
| + ldr r4,[r0,#20] @ load key base 2^32 |
| + ldr r5,[r0,#24] |
| + ldr r6,[r0,#28] |
| + ldr r7,[r0,#32] |
| + |
| + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 |
| + mov r3,r4,lsr#26 |
| + mov r4,r5,lsr#20 |
| + orr r3,r3,r5,lsl#6 |
| + mov r5,r6,lsr#14 |
| + orr r4,r4,r6,lsl#12 |
| + mov r6,r7,lsr#8 |
| + orr r5,r5,r7,lsl#18 |
| + and r3,r3,#0x03ffffff |
| + and r4,r4,#0x03ffffff |
| + and r5,r5,#0x03ffffff |
| + |
| + vdup.32 d0,r2 @ r^1 in both lanes |
| + add r2,r3,r3,lsl#2 @ *5 |
| + vdup.32 d1,r3 |
| + add r3,r4,r4,lsl#2 |
| + vdup.32 d2,r2 |
| + vdup.32 d3,r4 |
| + add r4,r5,r5,lsl#2 |
| + vdup.32 d4,r3 |
| + vdup.32 d5,r5 |
| + add r5,r6,r6,lsl#2 |
| + vdup.32 d6,r4 |
| + vdup.32 d7,r6 |
| + vdup.32 d8,r5 |
| + |
| + mov r5,#2 @ counter |
| + |
| +.Lsquare_neon: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + |
| + vmull.u32 q5,d0,d0[1] |
| + vmull.u32 q6,d1,d0[1] |
| + vmull.u32 q7,d3,d0[1] |
| + vmull.u32 q8,d5,d0[1] |
| + vmull.u32 q9,d7,d0[1] |
| + |
| + vmlal.u32 q5,d7,d2[1] |
| + vmlal.u32 q6,d0,d1[1] |
| + vmlal.u32 q7,d1,d1[1] |
| + vmlal.u32 q8,d3,d1[1] |
| + vmlal.u32 q9,d5,d1[1] |
| + |
| + vmlal.u32 q5,d5,d4[1] |
| + vmlal.u32 q6,d7,d4[1] |
| + vmlal.u32 q8,d1,d3[1] |
| + vmlal.u32 q7,d0,d3[1] |
| + vmlal.u32 q9,d3,d3[1] |
| + |
| + vmlal.u32 q5,d3,d6[1] |
| + vmlal.u32 q8,d0,d5[1] |
| + vmlal.u32 q6,d5,d6[1] |
| + vmlal.u32 q7,d7,d6[1] |
| + vmlal.u32 q9,d1,d5[1] |
| + |
| + vmlal.u32 q8,d7,d8[1] |
| + vmlal.u32 q5,d1,d8[1] |
| + vmlal.u32 q6,d3,d8[1] |
| + vmlal.u32 q7,d5,d8[1] |
| + vmlal.u32 q9,d0,d7[1] |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein |
| + @ and P. Schwabe |
| + @ |
| + @ H0>>+H1>>+H2>>+H3>>+H4 |
| + @ H3>>+H4>>*5+H0>>+H1 |
| + @ |
| + @ Trivia. |
| + @ |
| + @ Result of multiplication of n-bit number by m-bit number is |
| + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, |
| + @ m-bit number multiplied by 2^n is still n+m bits wide. |
| + @ |
| + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, |
| + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit |
| + @ one is n+1 bits wide. |
| + @ |
| + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that |
| + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 |
| + @ can be 27. However! In cases when their width exceeds 26 bits |
| + @ they are limited by 2^26+2^6. This in turn means that *sum* |
| + @ of the products with these values can still be viewed as sum |
| + @ of 52-bit numbers as long as the amount of addends is not a |
| + @ power of 2. For example, |
| + @ |
| + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, |
| + @ |
| + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or |
| + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than |
| + @ 8 * (2^52) or 2^55. However, the value is then multiplied by |
| + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), |
| + @ which is less than 32 * (2^52) or 2^57. And when processing |
| + @ data we are looking at triple as many addends... |
| + @ |
| + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and |
| + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the |
| + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while |
| + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 |
| + @ instruction accepts 2x32-bit input and writes 2x64-bit result. |
| + @ This means that result of reduction have to be compressed upon |
| + @ loop wrap-around. This can be done in the process of reduction |
| + @ to minimize amount of instructions [as well as amount of |
| + @ 128-bit instructions, which benefits low-end processors], but |
| + @ one has to watch for H2 (which is narrower than H0) and 5*H4 |
| + @ not being wider than 58 bits, so that result of right shift |
| + @ by 26 bits fits in 32 bits. This is also useful on x86, |
| + @ because it allows to use paddd in place for paddq, which |
| + @ benefits Atom, where paddq is ridiculously slow. |
| + |
| + vshr.u64 q15,q8,#26 |
| + vmovn.i64 d16,q8 |
| + vshr.u64 q4,q5,#26 |
| + vmovn.i64 d10,q5 |
| + vadd.i64 q9,q9,q15 @ h3 -> h4 |
| + vbic.i32 d16,#0xfc000000 @ &=0x03ffffff |
| + vadd.i64 q6,q6,q4 @ h0 -> h1 |
| + vbic.i32 d10,#0xfc000000 |
| + |
| + vshrn.u64 d30,q9,#26 |
| + vmovn.i64 d18,q9 |
| + vshr.u64 q4,q6,#26 |
| + vmovn.i64 d12,q6 |
| + vadd.i64 q7,q7,q4 @ h1 -> h2 |
| + vbic.i32 d18,#0xfc000000 |
| + vbic.i32 d12,#0xfc000000 |
| + |
| + vadd.i32 d10,d10,d30 |
| + vshl.u32 d30,d30,#2 |
| + vshrn.u64 d8,q7,#26 |
| + vmovn.i64 d14,q7 |
| + vadd.i32 d10,d10,d30 @ h4 -> h0 |
| + vadd.i32 d16,d16,d8 @ h2 -> h3 |
| + vbic.i32 d14,#0xfc000000 |
| + |
| + vshr.u32 d30,d10,#26 |
| + vbic.i32 d10,#0xfc000000 |
| + vshr.u32 d8,d16,#26 |
| + vbic.i32 d16,#0xfc000000 |
| + vadd.i32 d12,d12,d30 @ h0 -> h1 |
| + vadd.i32 d18,d18,d8 @ h3 -> h4 |
| + |
| + subs r5,r5,#1 |
| + beq .Lsquare_break_neon |
| + |
| + add r6,r0,#(48+0*9*4) |
| + add r7,r0,#(48+1*9*4) |
| + |
| + vtrn.32 d0,d10 @ r^2:r^1 |
| + vtrn.32 d3,d14 |
| + vtrn.32 d5,d16 |
| + vtrn.32 d1,d12 |
| + vtrn.32 d7,d18 |
| + |
| + vshl.u32 d4,d3,#2 @ *5 |
| + vshl.u32 d6,d5,#2 |
| + vshl.u32 d2,d1,#2 |
| + vshl.u32 d8,d7,#2 |
| + vadd.i32 d4,d4,d3 |
| + vadd.i32 d2,d2,d1 |
| + vadd.i32 d6,d6,d5 |
| + vadd.i32 d8,d8,d7 |
| + |
| + vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! |
| + vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! |
| + vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! |
| + vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! |
| + vst1.32 {d8[0]},[r6,:32] |
| + vst1.32 {d8[1]},[r7,:32] |
| + |
| + b .Lsquare_neon |
| + |
| +.align 4 |
| +.Lsquare_break_neon: |
| + add r6,r0,#(48+2*4*9) |
| + add r7,r0,#(48+3*4*9) |
| + |
| + vmov d0,d10 @ r^4:r^3 |
| + vshl.u32 d2,d12,#2 @ *5 |
| + vmov d1,d12 |
| + vshl.u32 d4,d14,#2 |
| + vmov d3,d14 |
| + vshl.u32 d6,d16,#2 |
| + vmov d5,d16 |
| + vshl.u32 d8,d18,#2 |
| + vmov d7,d18 |
| + vadd.i32 d2,d2,d12 |
| + vadd.i32 d4,d4,d14 |
| + vadd.i32 d6,d6,d16 |
| + vadd.i32 d8,d8,d18 |
| + |
| + vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! |
| + vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! |
| + vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! |
| + vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! |
| + vst1.32 {d8[0]},[r6] |
| + vst1.32 {d8[1]},[r7] |
| + |
| +.Lno_init_neon: |
| + bx lr @ bx lr |
| +.size poly1305_init_neon,.-poly1305_init_neon |
| + |
| +.type poly1305_blocks_neon,%function |
| +.align 5 |
| +poly1305_blocks_neon: |
| +.Lpoly1305_blocks_neon: |
| + ldr ip,[r0,#36] @ is_base2_26 |
| + |
| + cmp r2,#64 |
| + blo .Lpoly1305_blocks |
| + |
| + stmdb sp!,{r4-r7} |
| + vstmdb sp!,{d8-d15} @ ABI specification says so |
| + |
| + tst ip,ip @ is_base2_26? |
| + bne .Lbase2_26_neon |
| + |
| + stmdb sp!,{r1-r3,lr} |
| + bl .Lpoly1305_init_neon |
| + |
| + ldr r4,[r0,#0] @ load hash value base 2^32 |
| + ldr r5,[r0,#4] |
| + ldr r6,[r0,#8] |
| + ldr r7,[r0,#12] |
| + ldr ip,[r0,#16] |
| + |
| + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 |
| + mov r3,r4,lsr#26 |
| + veor d10,d10,d10 |
| + mov r4,r5,lsr#20 |
| + orr r3,r3,r5,lsl#6 |
| + veor d12,d12,d12 |
| + mov r5,r6,lsr#14 |
| + orr r4,r4,r6,lsl#12 |
| + veor d14,d14,d14 |
| + mov r6,r7,lsr#8 |
| + orr r5,r5,r7,lsl#18 |
| + veor d16,d16,d16 |
| + and r3,r3,#0x03ffffff |
| + orr r6,r6,ip,lsl#24 |
| + veor d18,d18,d18 |
| + and r4,r4,#0x03ffffff |
| + mov r1,#1 |
| + and r5,r5,#0x03ffffff |
| + str r1,[r0,#36] @ set is_base2_26 |
| + |
| + vmov.32 d10[0],r2 |
| + vmov.32 d12[0],r3 |
| + vmov.32 d14[0],r4 |
| + vmov.32 d16[0],r5 |
| + vmov.32 d18[0],r6 |
| + adr r5,.Lzeros |
| + |
| + ldmia sp!,{r1-r3,lr} |
| + b .Lhash_loaded |
| + |
| +.align 4 |
| +.Lbase2_26_neon: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ load hash value |
| + |
| + veor d10,d10,d10 |
| + veor d12,d12,d12 |
| + veor d14,d14,d14 |
| + veor d16,d16,d16 |
| + veor d18,d18,d18 |
| + vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! |
| + adr r5,.Lzeros |
| + vld1.32 {d18[0]},[r0] |
| + sub r0,r0,#16 @ rewind |
| + |
| +.Lhash_loaded: |
| + add r4,r1,#32 |
| + mov r3,r3,lsl#24 |
| + tst r2,#31 |
| + beq .Leven |
| + |
| + vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]! |
| + vmov.32 d28[0],r3 |
| + sub r2,r2,#16 |
| + add r4,r1,#32 |
| + |
| +# ifdef __ARMEB__ |
| + vrev32.8 q10,q10 |
| + vrev32.8 q13,q13 |
| + vrev32.8 q11,q11 |
| + vrev32.8 q12,q12 |
| +# endif |
| + vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26 |
| + vshl.u32 d26,d26,#18 |
| + |
| + vsri.u32 d26,d24,#14 |
| + vshl.u32 d24,d24,#12 |
| + vadd.i32 d29,d28,d18 @ add hash value and move to #hi |
| + |
| + vbic.i32 d26,#0xfc000000 |
| + vsri.u32 d24,d22,#20 |
| + vshl.u32 d22,d22,#6 |
| + |
| + vbic.i32 d24,#0xfc000000 |
| + vsri.u32 d22,d20,#26 |
| + vadd.i32 d27,d26,d16 |
| + |
| + vbic.i32 d20,#0xfc000000 |
| + vbic.i32 d22,#0xfc000000 |
| + vadd.i32 d25,d24,d14 |
| + |
| + vadd.i32 d21,d20,d10 |
| + vadd.i32 d23,d22,d12 |
| + |
| + mov r7,r5 |
| + add r6,r0,#48 |
| + |
| + cmp r2,r2 |
| + b .Long_tail |
| + |
| +.align 4 |
| +.Leven: |
| + subs r2,r2,#64 |
| + it lo |
| + movlo r4,r5 |
| + |
| + vmov.i32 q14,#1<<24 @ padbit, yes, always |
| + vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] |
| + add r1,r1,#64 |
| + vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) |
| + add r4,r4,#64 |
| + itt hi |
| + addhi r7,r0,#(48+1*9*4) |
| + addhi r6,r0,#(48+3*9*4) |
| + |
| +# ifdef __ARMEB__ |
| + vrev32.8 q10,q10 |
| + vrev32.8 q13,q13 |
| + vrev32.8 q11,q11 |
| + vrev32.8 q12,q12 |
| +# endif |
| + vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 |
| + vshl.u32 q13,q13,#18 |
| + |
| + vsri.u32 q13,q12,#14 |
| + vshl.u32 q12,q12,#12 |
| + |
| + vbic.i32 q13,#0xfc000000 |
| + vsri.u32 q12,q11,#20 |
| + vshl.u32 q11,q11,#6 |
| + |
| + vbic.i32 q12,#0xfc000000 |
| + vsri.u32 q11,q10,#26 |
| + |
| + vbic.i32 q10,#0xfc000000 |
| + vbic.i32 q11,#0xfc000000 |
| + |
| + bls .Lskip_loop |
| + |
| + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2 |
| + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 |
| + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! |
| + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! |
| + b .Loop_neon |
| + |
| +.align 5 |
| +.Loop_neon: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 |
| + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r |
| + @ ___________________/ |
| + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 |
| + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r |
| + @ ___________________/ ____________________/ |
| + @ |
| + @ Note that we start with inp[2:3]*r^2. This is because it |
| + @ doesn't depend on reduction in previous iteration. |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ inp[2:3]*r^2 |
| + |
| + vadd.i32 d24,d24,d14 @ accumulate inp[0:1] |
| + vmull.u32 q7,d25,d0[1] |
| + vadd.i32 d20,d20,d10 |
| + vmull.u32 q5,d21,d0[1] |
| + vadd.i32 d26,d26,d16 |
| + vmull.u32 q8,d27,d0[1] |
| + vmlal.u32 q7,d23,d1[1] |
| + vadd.i32 d22,d22,d12 |
| + vmull.u32 q6,d23,d0[1] |
| + |
| + vadd.i32 d28,d28,d18 |
| + vmull.u32 q9,d29,d0[1] |
| + subs r2,r2,#64 |
| + vmlal.u32 q5,d29,d2[1] |
| + it lo |
| + movlo r4,r5 |
| + vmlal.u32 q8,d25,d1[1] |
| + vld1.32 d8[1],[r7,:32] |
| + vmlal.u32 q6,d21,d1[1] |
| + vmlal.u32 q9,d27,d1[1] |
| + |
| + vmlal.u32 q5,d27,d4[1] |
| + vmlal.u32 q8,d23,d3[1] |
| + vmlal.u32 q9,d25,d3[1] |
| + vmlal.u32 q6,d29,d4[1] |
| + vmlal.u32 q7,d21,d3[1] |
| + |
| + vmlal.u32 q8,d21,d5[1] |
| + vmlal.u32 q5,d25,d6[1] |
| + vmlal.u32 q9,d23,d5[1] |
| + vmlal.u32 q6,d27,d6[1] |
| + vmlal.u32 q7,d29,d6[1] |
| + |
| + vmlal.u32 q8,d29,d8[1] |
| + vmlal.u32 q5,d23,d8[1] |
| + vmlal.u32 q9,d21,d7[1] |
| + vmlal.u32 q6,d25,d8[1] |
| + vmlal.u32 q7,d27,d8[1] |
| + |
| + vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) |
| + add r4,r4,#64 |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ (hash+inp[0:1])*r^4 and accumulate |
| + |
| + vmlal.u32 q8,d26,d0[0] |
| + vmlal.u32 q5,d20,d0[0] |
| + vmlal.u32 q9,d28,d0[0] |
| + vmlal.u32 q6,d22,d0[0] |
| + vmlal.u32 q7,d24,d0[0] |
| + vld1.32 d8[0],[r6,:32] |
| + |
| + vmlal.u32 q8,d24,d1[0] |
| + vmlal.u32 q5,d28,d2[0] |
| + vmlal.u32 q9,d26,d1[0] |
| + vmlal.u32 q6,d20,d1[0] |
| + vmlal.u32 q7,d22,d1[0] |
| + |
| + vmlal.u32 q8,d22,d3[0] |
| + vmlal.u32 q5,d26,d4[0] |
| + vmlal.u32 q9,d24,d3[0] |
| + vmlal.u32 q6,d28,d4[0] |
| + vmlal.u32 q7,d20,d3[0] |
| + |
| + vmlal.u32 q8,d20,d5[0] |
| + vmlal.u32 q5,d24,d6[0] |
| + vmlal.u32 q9,d22,d5[0] |
| + vmlal.u32 q6,d26,d6[0] |
| + vmlal.u32 q8,d28,d8[0] |
| + |
| + vmlal.u32 q7,d28,d6[0] |
| + vmlal.u32 q5,d22,d8[0] |
| + vmlal.u32 q9,d20,d7[0] |
| + vmov.i32 q14,#1<<24 @ padbit, yes, always |
| + vmlal.u32 q6,d24,d8[0] |
| + vmlal.u32 q7,d26,d8[0] |
| + |
| + vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] |
| + add r1,r1,#64 |
| +# ifdef __ARMEB__ |
| + vrev32.8 q10,q10 |
| + vrev32.8 q11,q11 |
| + vrev32.8 q12,q12 |
| + vrev32.8 q13,q13 |
| +# endif |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ lazy reduction interleaved with base 2^32 -> base 2^26 of |
| + @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14. |
| + |
| + vshr.u64 q15,q8,#26 |
| + vmovn.i64 d16,q8 |
| + vshr.u64 q4,q5,#26 |
| + vmovn.i64 d10,q5 |
| + vadd.i64 q9,q9,q15 @ h3 -> h4 |
| + vbic.i32 d16,#0xfc000000 |
| + vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 |
| + vadd.i64 q6,q6,q4 @ h0 -> h1 |
| + vshl.u32 q13,q13,#18 |
| + vbic.i32 d10,#0xfc000000 |
| + |
| + vshrn.u64 d30,q9,#26 |
| + vmovn.i64 d18,q9 |
| + vshr.u64 q4,q6,#26 |
| + vmovn.i64 d12,q6 |
| + vadd.i64 q7,q7,q4 @ h1 -> h2 |
| + vsri.u32 q13,q12,#14 |
| + vbic.i32 d18,#0xfc000000 |
| + vshl.u32 q12,q12,#12 |
| + vbic.i32 d12,#0xfc000000 |
| + |
| + vadd.i32 d10,d10,d30 |
| + vshl.u32 d30,d30,#2 |
| + vbic.i32 q13,#0xfc000000 |
| + vshrn.u64 d8,q7,#26 |
| + vmovn.i64 d14,q7 |
| + vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec] |
| + vsri.u32 q12,q11,#20 |
| + vadd.i32 d16,d16,d8 @ h2 -> h3 |
| + vshl.u32 q11,q11,#6 |
| + vbic.i32 d14,#0xfc000000 |
| + vbic.i32 q12,#0xfc000000 |
| + |
| + vshrn.u64 d30,q5,#26 @ re-narrow |
| + vmovn.i64 d10,q5 |
| + vsri.u32 q11,q10,#26 |
| + vbic.i32 q10,#0xfc000000 |
| + vshr.u32 d8,d16,#26 |
| + vbic.i32 d16,#0xfc000000 |
| + vbic.i32 d10,#0xfc000000 |
| + vadd.i32 d12,d12,d30 @ h0 -> h1 |
| + vadd.i32 d18,d18,d8 @ h3 -> h4 |
| + vbic.i32 q11,#0xfc000000 |
| + |
| + bhi .Loop_neon |
| + |
| +.Lskip_loop: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 |
| + |
| + add r7,r0,#(48+0*9*4) |
| + add r6,r0,#(48+1*9*4) |
| + adds r2,r2,#32 |
| + it ne |
| + movne r2,#0 |
| + bne .Long_tail |
| + |
| + vadd.i32 d25,d24,d14 @ add hash value and move to #hi |
| + vadd.i32 d21,d20,d10 |
| + vadd.i32 d27,d26,d16 |
| + vadd.i32 d23,d22,d12 |
| + vadd.i32 d29,d28,d18 |
| + |
| +.Long_tail: |
| + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1 |
| + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2 |
| + |
| + vadd.i32 d24,d24,d14 @ can be redundant |
| + vmull.u32 q7,d25,d0 |
| + vadd.i32 d20,d20,d10 |
| + vmull.u32 q5,d21,d0 |
| + vadd.i32 d26,d26,d16 |
| + vmull.u32 q8,d27,d0 |
| + vadd.i32 d22,d22,d12 |
| + vmull.u32 q6,d23,d0 |
| + vadd.i32 d28,d28,d18 |
| + vmull.u32 q9,d29,d0 |
| + |
| + vmlal.u32 q5,d29,d2 |
| + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! |
| + vmlal.u32 q8,d25,d1 |
| + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! |
| + vmlal.u32 q6,d21,d1 |
| + vmlal.u32 q9,d27,d1 |
| + vmlal.u32 q7,d23,d1 |
| + |
| + vmlal.u32 q8,d23,d3 |
| + vld1.32 d8[1],[r7,:32] |
| + vmlal.u32 q5,d27,d4 |
| + vld1.32 d8[0],[r6,:32] |
| + vmlal.u32 q9,d25,d3 |
| + vmlal.u32 q6,d29,d4 |
| + vmlal.u32 q7,d21,d3 |
| + |
| + vmlal.u32 q8,d21,d5 |
| + it ne |
| + addne r7,r0,#(48+2*9*4) |
| + vmlal.u32 q5,d25,d6 |
| + it ne |
| + addne r6,r0,#(48+3*9*4) |
| + vmlal.u32 q9,d23,d5 |
| + vmlal.u32 q6,d27,d6 |
| + vmlal.u32 q7,d29,d6 |
| + |
| + vmlal.u32 q8,d29,d8 |
| + vorn q0,q0,q0 @ all-ones, can be redundant |
| + vmlal.u32 q5,d23,d8 |
| + vshr.u64 q0,q0,#38 |
| + vmlal.u32 q9,d21,d7 |
| + vmlal.u32 q6,d25,d8 |
| + vmlal.u32 q7,d27,d8 |
| + |
| + beq .Lshort_tail |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ (hash+inp[0:1])*r^4:r^3 and accumulate |
| + |
| + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3 |
| + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 |
| + |
| + vmlal.u32 q7,d24,d0 |
| + vmlal.u32 q5,d20,d0 |
| + vmlal.u32 q8,d26,d0 |
| + vmlal.u32 q6,d22,d0 |
| + vmlal.u32 q9,d28,d0 |
| + |
| + vmlal.u32 q5,d28,d2 |
| + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! |
| + vmlal.u32 q8,d24,d1 |
| + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! |
| + vmlal.u32 q6,d20,d1 |
| + vmlal.u32 q9,d26,d1 |
| + vmlal.u32 q7,d22,d1 |
| + |
| + vmlal.u32 q8,d22,d3 |
| + vld1.32 d8[1],[r7,:32] |
| + vmlal.u32 q5,d26,d4 |
| + vld1.32 d8[0],[r6,:32] |
| + vmlal.u32 q9,d24,d3 |
| + vmlal.u32 q6,d28,d4 |
| + vmlal.u32 q7,d20,d3 |
| + |
| + vmlal.u32 q8,d20,d5 |
| + vmlal.u32 q5,d24,d6 |
| + vmlal.u32 q9,d22,d5 |
| + vmlal.u32 q6,d26,d6 |
| + vmlal.u32 q7,d28,d6 |
| + |
| + vmlal.u32 q8,d28,d8 |
| + vorn q0,q0,q0 @ all-ones |
| + vmlal.u32 q5,d22,d8 |
| + vshr.u64 q0,q0,#38 |
| + vmlal.u32 q9,d20,d7 |
| + vmlal.u32 q6,d24,d8 |
| + vmlal.u32 q7,d26,d8 |
| + |
| +.Lshort_tail: |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ horizontal addition |
| + |
| + vadd.i64 d16,d16,d17 |
| + vadd.i64 d10,d10,d11 |
| + vadd.i64 d18,d18,d19 |
| + vadd.i64 d12,d12,d13 |
| + vadd.i64 d14,d14,d15 |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ lazy reduction, but without narrowing |
| + |
| + vshr.u64 q15,q8,#26 |
| + vand.i64 q8,q8,q0 |
| + vshr.u64 q4,q5,#26 |
| + vand.i64 q5,q5,q0 |
| + vadd.i64 q9,q9,q15 @ h3 -> h4 |
| + vadd.i64 q6,q6,q4 @ h0 -> h1 |
| + |
| + vshr.u64 q15,q9,#26 |
| + vand.i64 q9,q9,q0 |
| + vshr.u64 q4,q6,#26 |
| + vand.i64 q6,q6,q0 |
| + vadd.i64 q7,q7,q4 @ h1 -> h2 |
| + |
| + vadd.i64 q5,q5,q15 |
| + vshl.u64 q15,q15,#2 |
| + vshr.u64 q4,q7,#26 |
| + vand.i64 q7,q7,q0 |
| + vadd.i64 q5,q5,q15 @ h4 -> h0 |
| + vadd.i64 q8,q8,q4 @ h2 -> h3 |
| + |
| + vshr.u64 q15,q5,#26 |
| + vand.i64 q5,q5,q0 |
| + vshr.u64 q4,q8,#26 |
| + vand.i64 q8,q8,q0 |
| + vadd.i64 q6,q6,q15 @ h0 -> h1 |
| + vadd.i64 q9,q9,q4 @ h3 -> h4 |
| + |
| + cmp r2,#0 |
| + bne .Leven |
| + |
| + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| + @ store hash value |
| + |
| + vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! |
| + vst1.32 {d18[0]},[r0] |
| + |
| + vldmia sp!,{d8-d15} @ epilogue |
| + ldmia sp!,{r4-r7} |
| + bx lr @ bx lr |
| +.size poly1305_blocks_neon,.-poly1305_blocks_neon |
| + |
| +.align 5 |
| +.Lzeros: |
| +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
| +#ifndef __KERNEL__ |
| +.LOPENSSL_armcap: |
| +# ifdef _WIN32 |
| +.word OPENSSL_armcap_P |
| +# else |
| +.word OPENSSL_armcap_P-.Lpoly1305_init |
| +# endif |
| +.comm OPENSSL_armcap_P,4,4 |
| +.hidden OPENSSL_armcap_P |
| +#endif |
| +#endif |
| +.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm" |
| +.align 2 |
| diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c |
| new file mode 100644 |
| index 000000000000..74a725ac89c9 |
| |
| |
| @@ -0,0 +1,276 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM |
| + * |
| + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> |
| + */ |
| + |
| +#include <asm/hwcap.h> |
| +#include <asm/neon.h> |
| +#include <asm/simd.h> |
| +#include <asm/unaligned.h> |
| +#include <crypto/algapi.h> |
| +#include <crypto/internal/hash.h> |
| +#include <crypto/internal/poly1305.h> |
| +#include <crypto/internal/simd.h> |
| +#include <linux/cpufeature.h> |
| +#include <linux/crypto.h> |
| +#include <linux/jump_label.h> |
| +#include <linux/module.h> |
| + |
| +void poly1305_init_arm(void *state, const u8 *key); |
| +void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); |
| +void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); |
| + |
| +void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) |
| +{ |
| +} |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
| + |
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) |
| +{ |
| + poly1305_init_arm(&dctx->h, key); |
| + dctx->s[0] = get_unaligned_le32(key + 16); |
| + dctx->s[1] = get_unaligned_le32(key + 20); |
| + dctx->s[2] = get_unaligned_le32(key + 24); |
| + dctx->s[3] = get_unaligned_le32(key + 28); |
| + dctx->buflen = 0; |
| +} |
| +EXPORT_SYMBOL(poly1305_init_arch); |
| + |
| +static int arm_poly1305_init(struct shash_desc *desc) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + dctx->buflen = 0; |
| + dctx->rset = 0; |
| + dctx->sset = false; |
| + |
| + return 0; |
| +} |
| + |
| +static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + u32 len, u32 hibit, bool do_neon) |
| +{ |
| + if (unlikely(!dctx->sset)) { |
| + if (!dctx->rset) { |
| + poly1305_init_arm(&dctx->h, src); |
| + src += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + dctx->rset = 1; |
| + } |
| + if (len >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(src + 0); |
| + dctx->s[1] = get_unaligned_le32(src + 4); |
| + dctx->s[2] = get_unaligned_le32(src + 8); |
| + dctx->s[3] = get_unaligned_le32(src + 12); |
| + src += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + dctx->sset = true; |
| + } |
| + if (len < POLY1305_BLOCK_SIZE) |
| + return; |
| + } |
| + |
| + len &= ~(POLY1305_BLOCK_SIZE - 1); |
| + |
| + if (static_branch_likely(&have_neon) && likely(do_neon)) |
| + poly1305_blocks_neon(&dctx->h, src, len, hibit); |
| + else |
| + poly1305_blocks_arm(&dctx->h, src, len, hibit); |
| +} |
| + |
| +static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, |
| + const u8 *src, u32 len, bool do_neon) |
| +{ |
| + if (unlikely(dctx->buflen)) { |
| + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); |
| + |
| + memcpy(dctx->buf + dctx->buflen, src, bytes); |
| + src += bytes; |
| + len -= bytes; |
| + dctx->buflen += bytes; |
| + |
| + if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| + arm_poly1305_blocks(dctx, dctx->buf, |
| + POLY1305_BLOCK_SIZE, 1, false); |
| + dctx->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(len >= POLY1305_BLOCK_SIZE)) { |
| + arm_poly1305_blocks(dctx, src, len, 1, do_neon); |
| + src += round_down(len, POLY1305_BLOCK_SIZE); |
| + len %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(len)) { |
| + dctx->buflen = len; |
| + memcpy(dctx->buf, src, len); |
| + } |
| +} |
| + |
| +static int arm_poly1305_update(struct shash_desc *desc, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + arm_poly1305_do_update(dctx, src, srclen, false); |
| + return 0; |
| +} |
| + |
| +static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, |
| + const u8 *src, |
| + unsigned int srclen) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + bool do_neon = crypto_simd_usable() && srclen > 128; |
| + |
| + if (static_branch_likely(&have_neon) && do_neon) |
| + kernel_neon_begin(); |
| + arm_poly1305_do_update(dctx, src, srclen, do_neon); |
| + if (static_branch_likely(&have_neon) && do_neon) |
| + kernel_neon_end(); |
| + return 0; |
| +} |
| + |
| +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + unsigned int nbytes) |
| +{ |
| + bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && |
| + crypto_simd_usable(); |
| + |
| + if (unlikely(dctx->buflen)) { |
| + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); |
| + |
| + memcpy(dctx->buf + dctx->buflen, src, bytes); |
| + src += bytes; |
| + nbytes -= bytes; |
| + dctx->buflen += bytes; |
| + |
| + if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| + poly1305_blocks_arm(&dctx->h, dctx->buf, |
| + POLY1305_BLOCK_SIZE, 1); |
| + dctx->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { |
| + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); |
| + |
| + if (static_branch_likely(&have_neon) && do_neon) { |
| + kernel_neon_begin(); |
| + poly1305_blocks_neon(&dctx->h, src, len, 1); |
| + kernel_neon_end(); |
| + } else { |
| + poly1305_blocks_arm(&dctx->h, src, len, 1); |
| + } |
| + src += len; |
| + nbytes %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(nbytes)) { |
| + dctx->buflen = nbytes; |
| + memcpy(dctx->buf, src, nbytes); |
| + } |
| +} |
| +EXPORT_SYMBOL(poly1305_update_arch); |
| + |
| +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| +{ |
| + __le32 digest[4]; |
| + u64 f = 0; |
| + |
| + if (unlikely(dctx->buflen)) { |
| + dctx->buf[dctx->buflen++] = 1; |
| + memset(dctx->buf + dctx->buflen, 0, |
| + POLY1305_BLOCK_SIZE - dctx->buflen); |
| + poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| + } |
| + |
| + poly1305_emit_arm(&dctx->h, digest, dctx->s); |
| + |
| + /* mac = (h + s) % (2^128) */ |
| + f = (f >> 32) + le32_to_cpu(digest[0]); |
| + put_unaligned_le32(f, dst); |
| + f = (f >> 32) + le32_to_cpu(digest[1]); |
| + put_unaligned_le32(f, dst + 4); |
| + f = (f >> 32) + le32_to_cpu(digest[2]); |
| + put_unaligned_le32(f, dst + 8); |
| + f = (f >> 32) + le32_to_cpu(digest[3]); |
| + put_unaligned_le32(f, dst + 12); |
| + |
| + *dctx = (struct poly1305_desc_ctx){}; |
| +} |
| +EXPORT_SYMBOL(poly1305_final_arch); |
| + |
| +static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + if (unlikely(!dctx->sset)) |
| + return -ENOKEY; |
| + |
| + poly1305_final_arch(dctx, dst); |
| + return 0; |
| +} |
| + |
| +static struct shash_alg arm_poly1305_algs[] = {{ |
| + .init = arm_poly1305_init, |
| + .update = arm_poly1305_update, |
| + .final = arm_poly1305_final, |
| + .digestsize = POLY1305_DIGEST_SIZE, |
| + .descsize = sizeof(struct poly1305_desc_ctx), |
| + |
| + .base.cra_name = "poly1305", |
| + .base.cra_driver_name = "poly1305-arm", |
| + .base.cra_priority = 150, |
| + .base.cra_blocksize = POLY1305_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| +#ifdef CONFIG_KERNEL_MODE_NEON |
| +}, { |
| + .init = arm_poly1305_init, |
| + .update = arm_poly1305_update_neon, |
| + .final = arm_poly1305_final, |
| + .digestsize = POLY1305_DIGEST_SIZE, |
| + .descsize = sizeof(struct poly1305_desc_ctx), |
| + |
| + .base.cra_name = "poly1305", |
| + .base.cra_driver_name = "poly1305-neon", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = POLY1305_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| +#endif |
| +}}; |
| + |
| +static int __init arm_poly1305_mod_init(void) |
| +{ |
| + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && |
| + (elf_hwcap & HWCAP_NEON)) |
| + static_branch_enable(&have_neon); |
| + else |
| + /* register only the first entry */ |
| + return crypto_register_shash(&arm_poly1305_algs[0]); |
| + |
| + return crypto_register_shashes(arm_poly1305_algs, |
| + ARRAY_SIZE(arm_poly1305_algs)); |
| +} |
| + |
| +static void __exit arm_poly1305_mod_exit(void) |
| +{ |
| + if (!static_branch_likely(&have_neon)) { |
| + crypto_unregister_shash(&arm_poly1305_algs[0]); |
| + return; |
| + } |
| + crypto_unregister_shashes(arm_poly1305_algs, |
| + ARRAY_SIZE(arm_poly1305_algs)); |
| +} |
| + |
| +module_init(arm_poly1305_mod_init); |
| +module_exit(arm_poly1305_mod_exit); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_ALIAS_CRYPTO("poly1305"); |
| +MODULE_ALIAS_CRYPTO("poly1305-arm"); |
| +MODULE_ALIAS_CRYPTO("poly1305-neon"); |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 9923445e8225..9bd15b227e78 100644 |
| |
| |
| @@ -40,7 +40,7 @@ config CRYPTO_LIB_DES |
| config CRYPTO_LIB_POLY1305_RSIZE |
| int |
| default 4 if X86_64 |
| - default 9 if ARM64 |
| + default 9 if ARM || ARM64 |
| default 1 |
| |
| config CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| -- |
| 2.18.2 |
| |
| |
| From d6a1701488249634f8dd62a3757dfe6119d2acf0 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:26 +0100 |
| Subject: [PATCH 020/100] crypto: mips/poly1305 - incorporate |
| OpenSSL/CRYPTOGAMS optimized implementation |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit a11d055e7a64ac34a5e99b6fe731299449cbcd58 upstream. |
| |
| This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for |
| MIPS authored by Andy Polyakov, a prior 64-bit only version of which has been |
| contributed by him to the OpenSSL project. The file 'poly1305-mips.pl' is taken |
| straight from this upstream GitHub repository [0] at commit |
| d22ade312a7af958ec955620b0d241cf42c37feb, and already contains all the changes |
| required to build it as part of a Linux kernel module. |
| |
| [0] https://github.com/dot-asm/cryptogams |
| |
| Co-developed-by: Andy Polyakov <appro@cryptogams.org> |
| Signed-off-by: Andy Polyakov <appro@cryptogams.org> |
| Co-developed-by: René van Dorst <opensource@vdorst.com> |
| Signed-off-by: René van Dorst <opensource@vdorst.com> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/mips/crypto/Makefile | 14 + |
| arch/mips/crypto/poly1305-glue.c | 203 +++++ |
| arch/mips/crypto/poly1305-mips.pl | 1273 +++++++++++++++++++++++++++++ |
| crypto/Kconfig | 5 + |
| lib/crypto/Kconfig | 1 + |
| 5 files changed, 1496 insertions(+) |
| create mode 100644 arch/mips/crypto/poly1305-glue.c |
| create mode 100644 arch/mips/crypto/poly1305-mips.pl |
| |
| diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile |
| index b528b9d300f1..8e1deaf00e0c 100644 |
| |
| |
| @@ -8,3 +8,17 @@ obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o |
| obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o |
| chacha-mips-y := chacha-core.o chacha-glue.o |
| AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots |
| + |
| +obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o |
| +poly1305-mips-y := poly1305-core.o poly1305-glue.o |
| + |
| +perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 |
| +perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 |
| + |
| +quiet_cmd_perlasm = PERLASM $@ |
| + cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) |
| + |
| +$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE |
| + $(call if_changed,perlasm) |
| + |
| +targets += poly1305-core.S |
| diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c |
| new file mode 100644 |
| index 000000000000..b759b6ccc361 |
| |
| |
| @@ -0,0 +1,203 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS |
| + * |
| + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> |
| + */ |
| + |
| +#include <asm/unaligned.h> |
| +#include <crypto/algapi.h> |
| +#include <crypto/internal/hash.h> |
| +#include <crypto/internal/poly1305.h> |
| +#include <linux/cpufeature.h> |
| +#include <linux/crypto.h> |
| +#include <linux/module.h> |
| + |
| +asmlinkage void poly1305_init_mips(void *state, const u8 *key); |
| +asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); |
| +asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); |
| + |
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) |
| +{ |
| + poly1305_init_mips(&dctx->h, key); |
| + dctx->s[0] = get_unaligned_le32(key + 16); |
| + dctx->s[1] = get_unaligned_le32(key + 20); |
| + dctx->s[2] = get_unaligned_le32(key + 24); |
| + dctx->s[3] = get_unaligned_le32(key + 28); |
| + dctx->buflen = 0; |
| +} |
| +EXPORT_SYMBOL(poly1305_init_arch); |
| + |
| +static int mips_poly1305_init(struct shash_desc *desc) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + dctx->buflen = 0; |
| + dctx->rset = 0; |
| + dctx->sset = false; |
| + |
| + return 0; |
| +} |
| + |
| +static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + u32 len, u32 hibit) |
| +{ |
| + if (unlikely(!dctx->sset)) { |
| + if (!dctx->rset) { |
| + poly1305_init_mips(&dctx->h, src); |
| + src += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + dctx->rset = 1; |
| + } |
| + if (len >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(src + 0); |
| + dctx->s[1] = get_unaligned_le32(src + 4); |
| + dctx->s[2] = get_unaligned_le32(src + 8); |
| + dctx->s[3] = get_unaligned_le32(src + 12); |
| + src += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + dctx->sset = true; |
| + } |
| + if (len < POLY1305_BLOCK_SIZE) |
| + return; |
| + } |
| + |
| + len &= ~(POLY1305_BLOCK_SIZE - 1); |
| + |
| + poly1305_blocks_mips(&dctx->h, src, len, hibit); |
| +} |
| + |
| +static int mips_poly1305_update(struct shash_desc *desc, const u8 *src, |
| + unsigned int len) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + if (unlikely(dctx->buflen)) { |
| + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); |
| + |
| + memcpy(dctx->buf + dctx->buflen, src, bytes); |
| + src += bytes; |
| + len -= bytes; |
| + dctx->buflen += bytes; |
| + |
| + if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| + mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1); |
| + dctx->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(len >= POLY1305_BLOCK_SIZE)) { |
| + mips_poly1305_blocks(dctx, src, len, 1); |
| + src += round_down(len, POLY1305_BLOCK_SIZE); |
| + len %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(len)) { |
| + dctx->buflen = len; |
| + memcpy(dctx->buf, src, len); |
| + } |
| + return 0; |
| +} |
| + |
| +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| + unsigned int nbytes) |
| +{ |
| + if (unlikely(dctx->buflen)) { |
| + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); |
| + |
| + memcpy(dctx->buf + dctx->buflen, src, bytes); |
| + src += bytes; |
| + nbytes -= bytes; |
| + dctx->buflen += bytes; |
| + |
| + if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| + poly1305_blocks_mips(&dctx->h, dctx->buf, |
| + POLY1305_BLOCK_SIZE, 1); |
| + dctx->buflen = 0; |
| + } |
| + } |
| + |
| + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { |
| + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); |
| + |
| + poly1305_blocks_mips(&dctx->h, src, len, 1); |
| + src += len; |
| + nbytes %= POLY1305_BLOCK_SIZE; |
| + } |
| + |
| + if (unlikely(nbytes)) { |
| + dctx->buflen = nbytes; |
| + memcpy(dctx->buf, src, nbytes); |
| + } |
| +} |
| +EXPORT_SYMBOL(poly1305_update_arch); |
| + |
| +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| +{ |
| + __le32 digest[4]; |
| + u64 f = 0; |
| + |
| + if (unlikely(dctx->buflen)) { |
| + dctx->buf[dctx->buflen++] = 1; |
| + memset(dctx->buf + dctx->buflen, 0, |
| + POLY1305_BLOCK_SIZE - dctx->buflen); |
| + poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| + } |
| + |
| + poly1305_emit_mips(&dctx->h, digest, dctx->s); |
| + |
| + /* mac = (h + s) % (2^128) */ |
| + f = (f >> 32) + le32_to_cpu(digest[0]); |
| + put_unaligned_le32(f, dst); |
| + f = (f >> 32) + le32_to_cpu(digest[1]); |
| + put_unaligned_le32(f, dst + 4); |
| + f = (f >> 32) + le32_to_cpu(digest[2]); |
| + put_unaligned_le32(f, dst + 8); |
| + f = (f >> 32) + le32_to_cpu(digest[3]); |
| + put_unaligned_le32(f, dst + 12); |
| + |
| + *dctx = (struct poly1305_desc_ctx){}; |
| +} |
| +EXPORT_SYMBOL(poly1305_final_arch); |
| + |
| +static int mips_poly1305_final(struct shash_desc *desc, u8 *dst) |
| +{ |
| + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| + |
| + if (unlikely(!dctx->sset)) |
| + return -ENOKEY; |
| + |
| + poly1305_final_arch(dctx, dst); |
| + return 0; |
| +} |
| + |
| +static struct shash_alg mips_poly1305_alg = { |
| + .init = mips_poly1305_init, |
| + .update = mips_poly1305_update, |
| + .final = mips_poly1305_final, |
| + .digestsize = POLY1305_DIGEST_SIZE, |
| + .descsize = sizeof(struct poly1305_desc_ctx), |
| + |
| + .base.cra_name = "poly1305", |
| + .base.cra_driver_name = "poly1305-mips", |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = POLY1305_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| +}; |
| + |
| +static int __init mips_poly1305_mod_init(void) |
| +{ |
| + return crypto_register_shash(&mips_poly1305_alg); |
| +} |
| + |
| +static void __exit mips_poly1305_mod_exit(void) |
| +{ |
| + crypto_unregister_shash(&mips_poly1305_alg); |
| +} |
| + |
| +module_init(mips_poly1305_mod_init); |
| +module_exit(mips_poly1305_mod_exit); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_ALIAS_CRYPTO("poly1305"); |
| +MODULE_ALIAS_CRYPTO("poly1305-mips"); |
| diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl |
| new file mode 100644 |
| index 000000000000..b05bab884ed2 |
| |
| |
| @@ -0,0 +1,1273 @@ |
| +#!/usr/bin/env perl |
| +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause |
| +# |
| +# ==================================================================== |
| +# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL |
| +# project. |
| +# ==================================================================== |
| + |
| +# Poly1305 hash for MIPS. |
| +# |
| +# May 2016 |
| +# |
| +# Numbers are cycles per processed byte with poly1305_blocks alone. |
| +# |
| +# IALU/gcc |
| +# R1x000 ~5.5/+130% (big-endian) |
| +# Octeon II 2.50/+70% (little-endian) |
| +# |
| +# March 2019 |
| +# |
| +# Add 32-bit code path. |
| +# |
| +# October 2019 |
| +# |
| +# Modulo-scheduling reduction allows to omit dependency chain at the |
| +# end of inner loop and improve performance. Also optimize MIPS32R2 |
| +# code path for MIPS 1004K core. Per René von Dorst's suggestions. |
| +# |
| +# IALU/gcc |
| +# R1x000 ~9.8/? (big-endian) |
| +# Octeon II 3.65/+140% (little-endian) |
| +# MT7621/1004K 4.75/? (little-endian) |
| +# |
| +###################################################################### |
| +# There is a number of MIPS ABI in use, O32 and N32/64 are most |
| +# widely used. Then there is a new contender: NUBI. It appears that if |
| +# one picks the latter, it's possible to arrange code in ABI neutral |
| +# manner. Therefore let's stick to NUBI register layout: |
| +# |
| +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); |
| +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); |
| +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); |
| +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); |
| +# |
| +# The return value is placed in $a0. Following coding rules facilitate |
| +# interoperability: |
| +# |
| +# - never ever touch $tp, "thread pointer", former $gp [o32 can be |
| +# excluded from the rule, because it's specified volatile]; |
| +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting |
| +# old code]; |
| +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; |
| +# |
| +# For reference here is register layout for N32/64 MIPS ABIs: |
| +# |
| +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); |
| +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); |
| +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); |
| +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); |
| +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); |
| +# |
| +# <appro@openssl.org> |
| +# |
| +###################################################################### |
| + |
| +$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 |
| + |
| +$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; |
| + |
| +if ($flavour =~ /64|n32/i) {{{ |
| +###################################################################### |
| +# 64-bit code path |
| +# |
| + |
| +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); |
| +my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); |
| + |
| +$code.=<<___; |
| +#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ |
| + defined(_MIPS_ARCH_MIPS64R6)) \\ |
| + && !defined(_MIPS_ARCH_MIPS64R2) |
| +# define _MIPS_ARCH_MIPS64R2 |
| +#endif |
| + |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| +# define dmultu(rs,rt) |
| +# define mflo(rd,rs,rt) dmulu rd,rs,rt |
| +# define mfhi(rd,rs,rt) dmuhu rd,rs,rt |
| +#else |
| +# define dmultu(rs,rt) dmultu rs,rt |
| +# define mflo(rd,rs,rt) mflo rd |
| +# define mfhi(rd,rs,rt) mfhi rd |
| +#endif |
| + |
| +#ifdef __KERNEL__ |
| +# define poly1305_init poly1305_init_mips |
| +# define poly1305_blocks poly1305_blocks_mips |
| +# define poly1305_emit poly1305_emit_mips |
| +#endif |
| + |
| +#if defined(__MIPSEB__) && !defined(MIPSEB) |
| +# define MIPSEB |
| +#endif |
| + |
| +#ifdef MIPSEB |
| +# define MSB 0 |
| +# define LSB 7 |
| +#else |
| +# define MSB 7 |
| +# define LSB 0 |
| +#endif |
| + |
| +.text |
| +.set noat |
| +.set noreorder |
| + |
| +.align 5 |
| +.globl poly1305_init |
| +.ent poly1305_init |
| +poly1305_init: |
| + .frame $sp,0,$ra |
| + .set reorder |
| + |
| + sd $zero,0($ctx) |
| + sd $zero,8($ctx) |
| + sd $zero,16($ctx) |
| + |
| + beqz $inp,.Lno_key |
| + |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| + andi $tmp0,$inp,7 # $inp % 8 |
| + dsubu $inp,$inp,$tmp0 # align $inp |
| + sll $tmp0,$tmp0,3 # byte to bit offset |
| + ld $in0,0($inp) |
| + ld $in1,8($inp) |
| + beqz $tmp0,.Laligned_key |
| + ld $tmp2,16($inp) |
| + |
| + subu $tmp1,$zero,$tmp0 |
| +# ifdef MIPSEB |
| + dsllv $in0,$in0,$tmp0 |
| + dsrlv $tmp3,$in1,$tmp1 |
| + dsllv $in1,$in1,$tmp0 |
| + dsrlv $tmp2,$tmp2,$tmp1 |
| +# else |
| + dsrlv $in0,$in0,$tmp0 |
| + dsllv $tmp3,$in1,$tmp1 |
| + dsrlv $in1,$in1,$tmp0 |
| + dsllv $tmp2,$tmp2,$tmp1 |
| +# endif |
| + or $in0,$in0,$tmp3 |
| + or $in1,$in1,$tmp2 |
| +.Laligned_key: |
| +#else |
| + ldl $in0,0+MSB($inp) |
| + ldl $in1,8+MSB($inp) |
| + ldr $in0,0+LSB($inp) |
| + ldr $in1,8+LSB($inp) |
| +#endif |
| +#ifdef MIPSEB |
| +# if defined(_MIPS_ARCH_MIPS64R2) |
| + dsbh $in0,$in0 # byte swap |
| + dsbh $in1,$in1 |
| + dshd $in0,$in0 |
| + dshd $in1,$in1 |
| +# else |
| + ori $tmp0,$zero,0xFF |
| + dsll $tmp2,$tmp0,32 |
| + or $tmp0,$tmp2 # 0x000000FF000000FF |
| + |
| + and $tmp1,$in0,$tmp0 # byte swap |
| + and $tmp3,$in1,$tmp0 |
| + dsrl $tmp2,$in0,24 |
| + dsrl $tmp4,$in1,24 |
| + dsll $tmp1,24 |
| + dsll $tmp3,24 |
| + and $tmp2,$tmp0 |
| + and $tmp4,$tmp0 |
| + dsll $tmp0,8 # 0x0000FF000000FF00 |
| + or $tmp1,$tmp2 |
| + or $tmp3,$tmp4 |
| + and $tmp2,$in0,$tmp0 |
| + and $tmp4,$in1,$tmp0 |
| + dsrl $in0,8 |
| + dsrl $in1,8 |
| + dsll $tmp2,8 |
| + dsll $tmp4,8 |
| + and $in0,$tmp0 |
| + and $in1,$tmp0 |
| + or $tmp1,$tmp2 |
| + or $tmp3,$tmp4 |
| + or $in0,$tmp1 |
| + or $in1,$tmp3 |
| + dsrl $tmp1,$in0,32 |
| + dsrl $tmp3,$in1,32 |
| + dsll $in0,32 |
| + dsll $in1,32 |
| + or $in0,$tmp1 |
| + or $in1,$tmp3 |
| +# endif |
| +#endif |
| + li $tmp0,1 |
| + dsll $tmp0,32 # 0x0000000100000000 |
| + daddiu $tmp0,-63 # 0x00000000ffffffc1 |
| + dsll $tmp0,28 # 0x0ffffffc10000000 |
| + daddiu $tmp0,-1 # 0x0ffffffc0fffffff |
| + |
| + and $in0,$tmp0 |
| + daddiu $tmp0,-3 # 0x0ffffffc0ffffffc |
| + and $in1,$tmp0 |
| + |
| + sd $in0,24($ctx) |
| + dsrl $tmp0,$in1,2 |
| + sd $in1,32($ctx) |
| + daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) |
| + sd $tmp0,40($ctx) |
| + |
| +.Lno_key: |
| + li $v0,0 # return 0 |
| + jr $ra |
| +.end poly1305_init |
| +___ |
| +{ |
| +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; |
| + |
| +my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = |
| + ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); |
| +my ($shr,$shl) = ($s6,$s7); # used on R6 |
| + |
| +$code.=<<___; |
| +.align 5 |
| +.globl poly1305_blocks |
| +.ent poly1305_blocks |
| +poly1305_blocks: |
| + .set noreorder |
| + dsrl $len,4 # number of complete blocks |
| + bnez $len,poly1305_blocks_internal |
| + nop |
| + jr $ra |
| + nop |
| +.end poly1305_blocks |
| + |
| +.align 5 |
| +.ent poly1305_blocks_internal |
| +poly1305_blocks_internal: |
| + .set noreorder |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| + .frame $sp,8*8,$ra |
| + .mask $SAVED_REGS_MASK|0x000c0000,-8 |
| + dsubu $sp,8*8 |
| + sd $s7,56($sp) |
| + sd $s6,48($sp) |
| +#else |
| + .frame $sp,6*8,$ra |
| + .mask $SAVED_REGS_MASK,-8 |
| + dsubu $sp,6*8 |
| +#endif |
| + sd $s5,40($sp) |
| + sd $s4,32($sp) |
| +___ |
| +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue |
| + sd $s3,24($sp) |
| + sd $s2,16($sp) |
| + sd $s1,8($sp) |
| + sd $s0,0($sp) |
| +___ |
| +$code.=<<___; |
| + .set reorder |
| + |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| + andi $shr,$inp,7 |
| + dsubu $inp,$inp,$shr # align $inp |
| + sll $shr,$shr,3 # byte to bit offset |
| + subu $shl,$zero,$shr |
| +#endif |
| + |
| + ld $h0,0($ctx) # load hash value |
| + ld $h1,8($ctx) |
| + ld $h2,16($ctx) |
| + |
| + ld $r0,24($ctx) # load key |
| + ld $r1,32($ctx) |
| + ld $rs1,40($ctx) |
| + |
| + dsll $len,4 |
| + daddu $len,$inp # end of buffer |
| + b .Loop |
| + |
| +.align 4 |
| +.Loop: |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| + ld $in0,0($inp) # load input |
| + ld $in1,8($inp) |
| + beqz $shr,.Laligned_inp |
| + |
| + ld $tmp2,16($inp) |
| +# ifdef MIPSEB |
| + dsllv $in0,$in0,$shr |
| + dsrlv $tmp3,$in1,$shl |
| + dsllv $in1,$in1,$shr |
| + dsrlv $tmp2,$tmp2,$shl |
| +# else |
| + dsrlv $in0,$in0,$shr |
| + dsllv $tmp3,$in1,$shl |
| + dsrlv $in1,$in1,$shr |
| + dsllv $tmp2,$tmp2,$shl |
| +# endif |
| + or $in0,$in0,$tmp3 |
| + or $in1,$in1,$tmp2 |
| +.Laligned_inp: |
| +#else |
| + ldl $in0,0+MSB($inp) # load input |
| + ldl $in1,8+MSB($inp) |
| + ldr $in0,0+LSB($inp) |
| + ldr $in1,8+LSB($inp) |
| +#endif |
| + daddiu $inp,16 |
| +#ifdef MIPSEB |
| +# if defined(_MIPS_ARCH_MIPS64R2) |
| + dsbh $in0,$in0 # byte swap |
| + dsbh $in1,$in1 |
| + dshd $in0,$in0 |
| + dshd $in1,$in1 |
| +# else |
| + ori $tmp0,$zero,0xFF |
| + dsll $tmp2,$tmp0,32 |
| + or $tmp0,$tmp2 # 0x000000FF000000FF |
| + |
| + and $tmp1,$in0,$tmp0 # byte swap |
| + and $tmp3,$in1,$tmp0 |
| + dsrl $tmp2,$in0,24 |
| + dsrl $tmp4,$in1,24 |
| + dsll $tmp1,24 |
| + dsll $tmp3,24 |
| + and $tmp2,$tmp0 |
| + and $tmp4,$tmp0 |
| + dsll $tmp0,8 # 0x0000FF000000FF00 |
| + or $tmp1,$tmp2 |
| + or $tmp3,$tmp4 |
| + and $tmp2,$in0,$tmp0 |
| + and $tmp4,$in1,$tmp0 |
| + dsrl $in0,8 |
| + dsrl $in1,8 |
| + dsll $tmp2,8 |
| + dsll $tmp4,8 |
| + and $in0,$tmp0 |
| + and $in1,$tmp0 |
| + or $tmp1,$tmp2 |
| + or $tmp3,$tmp4 |
| + or $in0,$tmp1 |
| + or $in1,$tmp3 |
| + dsrl $tmp1,$in0,32 |
| + dsrl $tmp3,$in1,32 |
| + dsll $in0,32 |
| + dsll $in1,32 |
| + or $in0,$tmp1 |
| + or $in1,$tmp3 |
| +# endif |
| +#endif |
| + dsrl $tmp1,$h2,2 # modulo-scheduled reduction |
| + andi $h2,$h2,3 |
| + dsll $tmp0,$tmp1,2 |
| + |
| + daddu $d0,$h0,$in0 # accumulate input |
| + daddu $tmp1,$tmp0 |
| + sltu $tmp0,$d0,$h0 |
| + daddu $d0,$d0,$tmp1 # ... and residue |
| + sltu $tmp1,$d0,$tmp1 |
| + daddu $d1,$h1,$in1 |
| + daddu $tmp0,$tmp1 |
| + sltu $tmp1,$d1,$h1 |
| + daddu $d1,$tmp0 |
| + |
| + dmultu ($r0,$d0) # h0*r0 |
| + daddu $d2,$h2,$padbit |
| + sltu $tmp0,$d1,$tmp0 |
| + mflo ($h0,$r0,$d0) |
| + mfhi ($h1,$r0,$d0) |
| + |
| + dmultu ($rs1,$d1) # h1*5*r1 |
| + daddu $d2,$tmp1 |
| + daddu $d2,$tmp0 |
| + mflo ($tmp0,$rs1,$d1) |
| + mfhi ($tmp1,$rs1,$d1) |
| + |
| + dmultu ($r1,$d0) # h0*r1 |
| + mflo ($tmp2,$r1,$d0) |
| + mfhi ($h2,$r1,$d0) |
| + daddu $h0,$tmp0 |
| + daddu $h1,$tmp1 |
| + sltu $tmp0,$h0,$tmp0 |
| + |
| + dmultu ($r0,$d1) # h1*r0 |
| + daddu $h1,$tmp0 |
| + daddu $h1,$tmp2 |
| + mflo ($tmp0,$r0,$d1) |
| + mfhi ($tmp1,$r0,$d1) |
| + |
| + dmultu ($rs1,$d2) # h2*5*r1 |
| + sltu $tmp2,$h1,$tmp2 |
| + daddu $h2,$tmp2 |
| + mflo ($tmp2,$rs1,$d2) |
| + |
| + dmultu ($r0,$d2) # h2*r0 |
| + daddu $h1,$tmp0 |
| + daddu $h2,$tmp1 |
| + mflo ($tmp3,$r0,$d2) |
| + sltu $tmp0,$h1,$tmp0 |
| + daddu $h2,$tmp0 |
| + |
| + daddu $h1,$tmp2 |
| + sltu $tmp2,$h1,$tmp2 |
| + daddu $h2,$tmp2 |
| + daddu $h2,$tmp3 |
| + |
| + bne $inp,$len,.Loop |
| + |
| + sd $h0,0($ctx) # store hash value |
| + sd $h1,8($ctx) |
| + sd $h2,16($ctx) |
| + |
| + .set noreorder |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| + ld $s7,56($sp) |
| + ld $s6,48($sp) |
| +#endif |
| + ld $s5,40($sp) # epilogue |
| + ld $s4,32($sp) |
| +___ |
| +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue |
| + ld $s3,24($sp) |
| + ld $s2,16($sp) |
| + ld $s1,8($sp) |
| + ld $s0,0($sp) |
| +___ |
| +$code.=<<___; |
| + jr $ra |
| +#if defined(_MIPS_ARCH_MIPS64R6) |
| + daddu $sp,8*8 |
| +#else |
| + daddu $sp,6*8 |
| +#endif |
| +.end poly1305_blocks_internal |
| +___ |
| +} |
| +{ |
| +my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); |
| + |
| +$code.=<<___; |
| +.align 5 |
| +.globl poly1305_emit |
| +.ent poly1305_emit |
| +poly1305_emit: |
| + .frame $sp,0,$ra |
| + .set reorder |
| + |
| + ld $tmp2,16($ctx) |
| + ld $tmp0,0($ctx) |
| + ld $tmp1,8($ctx) |
| + |
| + li $in0,-4 # final reduction |
| + dsrl $in1,$tmp2,2 |
| + and $in0,$tmp2 |
| + andi $tmp2,$tmp2,3 |
| + daddu $in0,$in1 |
| + |
| + daddu $tmp0,$tmp0,$in0 |
| + sltu $in1,$tmp0,$in0 |
| + daddiu $in0,$tmp0,5 # compare to modulus |
| + daddu $tmp1,$tmp1,$in1 |
| + sltiu $tmp3,$in0,5 |
| + sltu $tmp4,$tmp1,$in1 |
| + daddu $in1,$tmp1,$tmp3 |
| + daddu $tmp2,$tmp2,$tmp4 |
| + sltu $tmp3,$in1,$tmp3 |
| + daddu $tmp2,$tmp2,$tmp3 |
| + |
| + dsrl $tmp2,2 # see if it carried/borrowed |
| + dsubu $tmp2,$zero,$tmp2 |
| + |
| + xor $in0,$tmp0 |
| + xor $in1,$tmp1 |
| + and $in0,$tmp2 |
| + and $in1,$tmp2 |
| + xor $in0,$tmp0 |
| + xor $in1,$tmp1 |
| + |
| + lwu $tmp0,0($nonce) # load nonce |
| + lwu $tmp1,4($nonce) |
| + lwu $tmp2,8($nonce) |
| + lwu $tmp3,12($nonce) |
| + dsll $tmp1,32 |
| + dsll $tmp3,32 |
| + or $tmp0,$tmp1 |
| + or $tmp2,$tmp3 |
| + |
| + daddu $in0,$tmp0 # accumulate nonce |
| + daddu $in1,$tmp2 |
| + sltu $tmp0,$in0,$tmp0 |
| + daddu $in1,$tmp0 |
| + |
| + dsrl $tmp0,$in0,8 # write mac value |
| + dsrl $tmp1,$in0,16 |
| + dsrl $tmp2,$in0,24 |
| + sb $in0,0($mac) |
| + dsrl $tmp3,$in0,32 |
| + sb $tmp0,1($mac) |
| + dsrl $tmp0,$in0,40 |
| + sb $tmp1,2($mac) |
| + dsrl $tmp1,$in0,48 |
| + sb $tmp2,3($mac) |
| + dsrl $tmp2,$in0,56 |
| + sb $tmp3,4($mac) |
| + dsrl $tmp3,$in1,8 |
| + sb $tmp0,5($mac) |
| + dsrl $tmp0,$in1,16 |
| + sb $tmp1,6($mac) |
| + dsrl $tmp1,$in1,24 |
| + sb $tmp2,7($mac) |
| + |
| + sb $in1,8($mac) |
| + dsrl $tmp2,$in1,32 |
| + sb $tmp3,9($mac) |
| + dsrl $tmp3,$in1,40 |
| + sb $tmp0,10($mac) |
| + dsrl $tmp0,$in1,48 |
| + sb $tmp1,11($mac) |
| + dsrl $tmp1,$in1,56 |
| + sb $tmp2,12($mac) |
| + sb $tmp3,13($mac) |
| + sb $tmp0,14($mac) |
| + sb $tmp1,15($mac) |
| + |
| + jr $ra |
| +.end poly1305_emit |
| +.rdata |
| +.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" |
| +.align 2 |
| +___ |
| +} |
| +}}} else {{{ |
| +###################################################################### |
| +# 32-bit code path |
| +# |
| + |
| +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); |
| +my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = |
| + ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); |
| + |
| +$code.=<<___; |
| +#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ |
| + defined(_MIPS_ARCH_MIPS32R6)) \\ |
| + && !defined(_MIPS_ARCH_MIPS32R2) |
| +# define _MIPS_ARCH_MIPS32R2 |
| +#endif |
| + |
| +#if defined(_MIPS_ARCH_MIPS32R6) |
| +# define multu(rs,rt) |
| +# define mflo(rd,rs,rt) mulu rd,rs,rt |
| +# define mfhi(rd,rs,rt) muhu rd,rs,rt |
| +#else |
| +# define multu(rs,rt) multu rs,rt |
| +# define mflo(rd,rs,rt) mflo rd |
| +# define mfhi(rd,rs,rt) mfhi rd |
| +#endif |
| + |
| +#ifdef __KERNEL__ |
| +# define poly1305_init poly1305_init_mips |
| +# define poly1305_blocks poly1305_blocks_mips |
| +# define poly1305_emit poly1305_emit_mips |
| +#endif |
| + |
| +#if defined(__MIPSEB__) && !defined(MIPSEB) |
| +# define MIPSEB |
| +#endif |
| + |
| +#ifdef MIPSEB |
| +# define MSB 0 |
| +# define LSB 3 |
| +#else |
| +# define MSB 3 |
| +# define LSB 0 |
| +#endif |
| + |
| +.text |
| +.set noat |
| +.set noreorder |
| + |
| +.align 5 |
| +.globl poly1305_init |
| +.ent poly1305_init |
| +poly1305_init: |
| + .frame $sp,0,$ra |
| + .set reorder |
| + |
| + sw $zero,0($ctx) |
| + sw $zero,4($ctx) |
| + sw $zero,8($ctx) |
| + sw $zero,12($ctx) |
| + sw $zero,16($ctx) |
| + |
| + beqz $inp,.Lno_key |
| + |
| +#if defined(_MIPS_ARCH_MIPS32R6) |
| + andi $tmp0,$inp,3 # $inp % 4 |
| + subu $inp,$inp,$tmp0 # align $inp |
| + sll $tmp0,$tmp0,3 # byte to bit offset |
| + lw $in0,0($inp) |
| + lw $in1,4($inp) |
| + lw $in2,8($inp) |
| + lw $in3,12($inp) |
| + beqz $tmp0,.Laligned_key |
| + |
| + lw $tmp2,16($inp) |
| + subu $tmp1,$zero,$tmp0 |
| +# ifdef MIPSEB |
| + sllv $in0,$in0,$tmp0 |
| + srlv $tmp3,$in1,$tmp1 |
| + sllv $in1,$in1,$tmp0 |
| + or $in0,$in0,$tmp3 |
| + srlv $tmp3,$in2,$tmp1 |
| + sllv $in2,$in2,$tmp0 |
| + or $in1,$in1,$tmp3 |
| + srlv $tmp3,$in3,$tmp1 |
| + sllv $in3,$in3,$tmp0 |
| + or $in2,$in2,$tmp3 |
| + srlv $tmp2,$tmp2,$tmp1 |
| + or $in3,$in3,$tmp2 |
| +# else |
| + srlv $in0,$in0,$tmp0 |
| + sllv $tmp3,$in1,$tmp1 |
| + srlv $in1,$in1,$tmp0 |
| + or $in0,$in0,$tmp3 |
| + sllv $tmp3,$in2,$tmp1 |
| + srlv $in2,$in2,$tmp0 |
| + or $in1,$in1,$tmp3 |
| + sllv $tmp3,$in3,$tmp1 |
| + srlv $in3,$in3,$tmp0 |
| + or $in2,$in2,$tmp3 |
| + sllv $tmp2,$tmp2,$tmp1 |
| + or $in3,$in3,$tmp2 |
| +# endif |
| +.Laligned_key: |
| +#else |
| + lwl $in0,0+MSB($inp) |
| + lwl $in1,4+MSB($inp) |
| + lwl $in2,8+MSB($inp) |
| + lwl $in3,12+MSB($inp) |
| + lwr $in0,0+LSB($inp) |
| + lwr $in1,4+LSB($inp) |
| + lwr $in2,8+LSB($inp) |
| + lwr $in3,12+LSB($inp) |
| +#endif |
| +#ifdef MIPSEB |
| +# if defined(_MIPS_ARCH_MIPS32R2) |
| + wsbh $in0,$in0 # byte swap |
| + wsbh $in1,$in1 |
| + wsbh $in2,$in2 |
| + wsbh $in3,$in3 |
| + rotr $in0,$in0,16 |
| + rotr $in1,$in1,16 |
| + rotr $in2,$in2,16 |
| + rotr $in3,$in3,16 |
| +# else |
| + srl $tmp0,$in0,24 # byte swap |
| + srl $tmp1,$in0,8 |
| + andi $tmp2,$in0,0xFF00 |
| + sll $in0,$in0,24 |
| + andi $tmp1,0xFF00 |
| + sll $tmp2,$tmp2,8 |
| + or $in0,$tmp0 |
| + srl $tmp0,$in1,24 |
| + or $tmp1,$tmp2 |
| + srl $tmp2,$in1,8 |
| + or $in0,$tmp1 |
| + andi $tmp1,$in1,0xFF00 |
| + sll $in1,$in1,24 |
| + andi $tmp2,0xFF00 |
| + sll $tmp1,$tmp1,8 |
| + or $in1,$tmp0 |
| + srl $tmp0,$in2,24 |
| + or $tmp2,$tmp1 |
| + srl $tmp1,$in2,8 |
| + or $in1,$tmp2 |
| + andi $tmp2,$in2,0xFF00 |
| + sll $in2,$in2,24 |
| + andi $tmp1,0xFF00 |
| + sll $tmp2,$tmp2,8 |
| + or $in2,$tmp0 |
| + srl $tmp0,$in3,24 |
| + or $tmp1,$tmp2 |
| + srl $tmp2,$in3,8 |
| + or $in2,$tmp1 |
| + andi $tmp1,$in3,0xFF00 |
| + sll $in3,$in3,24 |
| + andi $tmp2,0xFF00 |
| + sll $tmp1,$tmp1,8 |
| + or $in3,$tmp0 |
| + or $tmp2,$tmp1 |
| + or $in3,$tmp2 |
| +# endif |
| +#endif |
| + lui $tmp0,0x0fff |
| + ori $tmp0,0xffff # 0x0fffffff |
| + and $in0,$in0,$tmp0 |
| + subu $tmp0,3 # 0x0ffffffc |
| + and $in1,$in1,$tmp0 |
| + and $in2,$in2,$tmp0 |
| + and $in3,$in3,$tmp0 |
| + |
| + sw $in0,20($ctx) |
| + sw $in1,24($ctx) |
| + sw $in2,28($ctx) |
| + sw $in3,32($ctx) |
| + |
| + srl $tmp1,$in1,2 |
| + srl $tmp2,$in2,2 |
| + srl $tmp3,$in3,2 |
| + addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) |
| + addu $in2,$in2,$tmp2 |
| + addu $in3,$in3,$tmp3 |
| + sw $in1,36($ctx) |
| + sw $in2,40($ctx) |
| + sw $in3,44($ctx) |
| +.Lno_key: |
| + li $v0,0 |
| + jr $ra |
| +.end poly1305_init |
| +___ |
| +{ |
| +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; |
| + |
| +my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = |
| + ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); |
| +my ($d0,$d1,$d2,$d3) = |
| + ($a4,$a5,$a6,$a7); |
| +my $shr = $t2; # used on R6 |
| +my $one = $t2; # used on R2 |
| + |
| +$code.=<<___; |
| +.globl poly1305_blocks |
| +.align 5 |
| +.ent poly1305_blocks |
| +poly1305_blocks: |
| + .frame $sp,16*4,$ra |
| + .mask $SAVED_REGS_MASK,-4 |
| + .set noreorder |
| + subu $sp, $sp,4*12 |
| + sw $s11,4*11($sp) |
| + sw $s10,4*10($sp) |
| + sw $s9, 4*9($sp) |
| + sw $s8, 4*8($sp) |
| + sw $s7, 4*7($sp) |
| + sw $s6, 4*6($sp) |
| + sw $s5, 4*5($sp) |
| + sw $s4, 4*4($sp) |
| +___ |
| +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue |
| + sw $s3, 4*3($sp) |
| + sw $s2, 4*2($sp) |
| + sw $s1, 4*1($sp) |
| + sw $s0, 4*0($sp) |
| +___ |
| +$code.=<<___; |
| + .set reorder |
| + |
| + srl $len,4 # number of complete blocks |
| + li $one,1 |
| + beqz $len,.Labort |
| + |
| +#if defined(_MIPS_ARCH_MIPS32R6) |
| + andi $shr,$inp,3 |
| + subu $inp,$inp,$shr # align $inp |
| + sll $shr,$shr,3 # byte to bit offset |
| +#endif |
| + |
| + lw $h0,0($ctx) # load hash value |
| + lw $h1,4($ctx) |
| + lw $h2,8($ctx) |
| + lw $h3,12($ctx) |
| + lw $h4,16($ctx) |
| + |
| + lw $r0,20($ctx) # load key |
| + lw $r1,24($ctx) |
| + lw $r2,28($ctx) |
| + lw $r3,32($ctx) |
| + lw $rs1,36($ctx) |
| + lw $rs2,40($ctx) |
| + lw $rs3,44($ctx) |
| + |
| + sll $len,4 |
| + addu $len,$len,$inp # end of buffer |
| + b .Loop |
| + |
| +.align 4 |
| +.Loop: |
| +#if defined(_MIPS_ARCH_MIPS32R6) |
| + lw $d0,0($inp) # load input |
| + lw $d1,4($inp) |
| + lw $d2,8($inp) |
| + lw $d3,12($inp) |
| + beqz $shr,.Laligned_inp |
| + |
| + lw $t0,16($inp) |
| + subu $t1,$zero,$shr |
| +# ifdef MIPSEB |
| + sllv $d0,$d0,$shr |
| + srlv $at,$d1,$t1 |
| + sllv $d1,$d1,$shr |
| + or $d0,$d0,$at |
| + srlv $at,$d2,$t1 |
| + sllv $d2,$d2,$shr |
| + or $d1,$d1,$at |
| + srlv $at,$d3,$t1 |
| + sllv $d3,$d3,$shr |
| + or $d2,$d2,$at |
| + srlv $t0,$t0,$t1 |
| + or $d3,$d3,$t0 |
| +# else |
| + srlv $d0,$d0,$shr |
| + sllv $at,$d1,$t1 |
| + srlv $d1,$d1,$shr |
| + or $d0,$d0,$at |
| + sllv $at,$d2,$t1 |
| + srlv $d2,$d2,$shr |
| + or $d1,$d1,$at |
| + sllv $at,$d3,$t1 |
| + srlv $d3,$d3,$shr |
| + or $d2,$d2,$at |
| + sllv $t0,$t0,$t1 |
| + or $d3,$d3,$t0 |
| +# endif |
| +.Laligned_inp: |
| +#else |
| + lwl $d0,0+MSB($inp) # load input |
| + lwl $d1,4+MSB($inp) |
| + lwl $d2,8+MSB($inp) |
| + lwl $d3,12+MSB($inp) |
| + lwr $d0,0+LSB($inp) |
| + lwr $d1,4+LSB($inp) |
| + lwr $d2,8+LSB($inp) |
| + lwr $d3,12+LSB($inp) |
| +#endif |
| +#ifdef MIPSEB |
| +# if defined(_MIPS_ARCH_MIPS32R2) |
| + wsbh $d0,$d0 # byte swap |
| + wsbh $d1,$d1 |
| + wsbh $d2,$d2 |
| + wsbh $d3,$d3 |
| + rotr $d0,$d0,16 |
| + rotr $d1,$d1,16 |
| + rotr $d2,$d2,16 |
| + rotr $d3,$d3,16 |
| +# else |
| + srl $at,$d0,24 # byte swap |
| + srl $t0,$d0,8 |
| + andi $t1,$d0,0xFF00 |
| + sll $d0,$d0,24 |
| + andi $t0,0xFF00 |
| + sll $t1,$t1,8 |
| + or $d0,$at |
| + srl $at,$d1,24 |
| + or $t0,$t1 |
| + srl $t1,$d1,8 |
| + or $d0,$t0 |
| + andi $t0,$d1,0xFF00 |
| + sll $d1,$d1,24 |
| + andi $t1,0xFF00 |
| + sll $t0,$t0,8 |
| + or $d1,$at |
| + srl $at,$d2,24 |
| + or $t1,$t0 |
| + srl $t0,$d2,8 |
| + or $d1,$t1 |
| + andi $t1,$d2,0xFF00 |
| + sll $d2,$d2,24 |
| + andi $t0,0xFF00 |
| + sll $t1,$t1,8 |
| + or $d2,$at |
| + srl $at,$d3,24 |
| + or $t0,$t1 |
| + srl $t1,$d3,8 |
| + or $d2,$t0 |
| + andi $t0,$d3,0xFF00 |
| + sll $d3,$d3,24 |
| + andi $t1,0xFF00 |
| + sll $t0,$t0,8 |
| + or $d3,$at |
| + or $t1,$t0 |
| + or $d3,$t1 |
| +# endif |
| +#endif |
| + srl $t0,$h4,2 # modulo-scheduled reduction |
| + andi $h4,$h4,3 |
| + sll $at,$t0,2 |
| + |
| + addu $d0,$d0,$h0 # accumulate input |
| + addu $t0,$t0,$at |
| + sltu $h0,$d0,$h0 |
| + addu $d0,$d0,$t0 # ... and residue |
| + sltu $at,$d0,$t0 |
| + |
| + addu $d1,$d1,$h1 |
| + addu $h0,$h0,$at # carry |
| + sltu $h1,$d1,$h1 |
| + addu $d1,$d1,$h0 |
| + sltu $h0,$d1,$h0 |
| + |
| + addu $d2,$d2,$h2 |
| + addu $h1,$h1,$h0 # carry |
| + sltu $h2,$d2,$h2 |
| + addu $d2,$d2,$h1 |
| + sltu $h1,$d2,$h1 |
| + |
| + addu $d3,$d3,$h3 |
| + addu $h2,$h2,$h1 # carry |
| + sltu $h3,$d3,$h3 |
| + addu $d3,$d3,$h2 |
| + |
| +#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) |
| + multu $r0,$d0 # d0*r0 |
| + sltu $h2,$d3,$h2 |
| + maddu $rs3,$d1 # d1*s3 |
| + addu $h3,$h3,$h2 # carry |
| + maddu $rs2,$d2 # d2*s2 |
| + addu $h4,$h4,$padbit |
| + maddu $rs1,$d3 # d3*s1 |
| + addu $h4,$h4,$h3 |
| + mfhi $at |
| + mflo $h0 |
| + |
| + multu $r1,$d0 # d0*r1 |
| + maddu $r0,$d1 # d1*r0 |
| + maddu $rs3,$d2 # d2*s3 |
| + maddu $rs2,$d3 # d3*s2 |
| + maddu $rs1,$h4 # h4*s1 |
| + maddu $at,$one # hi*1 |
| + mfhi $at |
| + mflo $h1 |
| + |
| + multu $r2,$d0 # d0*r2 |
| + maddu $r1,$d1 # d1*r1 |
| + maddu $r0,$d2 # d2*r0 |
| + maddu $rs3,$d3 # d3*s3 |
| + maddu $rs2,$h4 # h4*s2 |
| + maddu $at,$one # hi*1 |
| + mfhi $at |
| + mflo $h2 |
| + |
| + mul $t0,$r0,$h4 # h4*r0 |
| + |
| + multu $r3,$d0 # d0*r3 |
| + maddu $r2,$d1 # d1*r2 |
| + maddu $r1,$d2 # d2*r1 |
| + maddu $r0,$d3 # d3*r0 |
| + maddu $rs3,$h4 # h4*s3 |
| + maddu $at,$one # hi*1 |
| + mfhi $at |
| + mflo $h3 |
| + |
| + addiu $inp,$inp,16 |
| + |
| + addu $h4,$t0,$at |
| +#else |
| + multu ($r0,$d0) # d0*r0 |
| + mflo ($h0,$r0,$d0) |
| + mfhi ($h1,$r0,$d0) |
| + |
| + sltu $h2,$d3,$h2 |
| + addu $h3,$h3,$h2 # carry |
| + |
| + multu ($rs3,$d1) # d1*s3 |
| + mflo ($at,$rs3,$d1) |
| + mfhi ($t0,$rs3,$d1) |
| + |
| + addu $h4,$h4,$padbit |
| + addiu $inp,$inp,16 |
| + addu $h4,$h4,$h3 |
| + |
| + multu ($rs2,$d2) # d2*s2 |
| + mflo ($a3,$rs2,$d2) |
| + mfhi ($t1,$rs2,$d2) |
| + addu $h0,$h0,$at |
| + addu $h1,$h1,$t0 |
| + multu ($rs1,$d3) # d3*s1 |
| + sltu $at,$h0,$at |
| + addu $h1,$h1,$at |
| + |
| + mflo ($at,$rs1,$d3) |
| + mfhi ($t0,$rs1,$d3) |
| + addu $h0,$h0,$a3 |
| + addu $h1,$h1,$t1 |
| + multu ($r1,$d0) # d0*r1 |
| + sltu $a3,$h0,$a3 |
| + addu $h1,$h1,$a3 |
| + |
| + |
| + mflo ($a3,$r1,$d0) |
| + mfhi ($h2,$r1,$d0) |
| + addu $h0,$h0,$at |
| + addu $h1,$h1,$t0 |
| + multu ($r0,$d1) # d1*r0 |
| + sltu $at,$h0,$at |
| + addu $h1,$h1,$at |
| + |
| + mflo ($at,$r0,$d1) |
| + mfhi ($t0,$r0,$d1) |
| + addu $h1,$h1,$a3 |
| + sltu $a3,$h1,$a3 |
| + multu ($rs3,$d2) # d2*s3 |
| + addu $h2,$h2,$a3 |
| + |
| + mflo ($a3,$rs3,$d2) |
| + mfhi ($t1,$rs3,$d2) |
| + addu $h1,$h1,$at |
| + addu $h2,$h2,$t0 |
| + multu ($rs2,$d3) # d3*s2 |
| + sltu $at,$h1,$at |
| + addu $h2,$h2,$at |
| + |
| + mflo ($at,$rs2,$d3) |
| + mfhi ($t0,$rs2,$d3) |
| + addu $h1,$h1,$a3 |
| + addu $h2,$h2,$t1 |
| + multu ($rs1,$h4) # h4*s1 |
| + sltu $a3,$h1,$a3 |
| + addu $h2,$h2,$a3 |
| + |
| + mflo ($a3,$rs1,$h4) |
| + addu $h1,$h1,$at |
| + addu $h2,$h2,$t0 |
| + multu ($r2,$d0) # d0*r2 |
| + sltu $at,$h1,$at |
| + addu $h2,$h2,$at |
| + |
| + |
| + mflo ($at,$r2,$d0) |
| + mfhi ($h3,$r2,$d0) |
| + addu $h1,$h1,$a3 |
| + sltu $a3,$h1,$a3 |
| + multu ($r1,$d1) # d1*r1 |
| + addu $h2,$h2,$a3 |
| + |
| + mflo ($a3,$r1,$d1) |
| + mfhi ($t1,$r1,$d1) |
| + addu $h2,$h2,$at |
| + sltu $at,$h2,$at |
| + multu ($r0,$d2) # d2*r0 |
| + addu $h3,$h3,$at |
| + |
| + mflo ($at,$r0,$d2) |
| + mfhi ($t0,$r0,$d2) |
| + addu $h2,$h2,$a3 |
| + addu $h3,$h3,$t1 |
| + multu ($rs3,$d3) # d3*s3 |
| + sltu $a3,$h2,$a3 |
| + addu $h3,$h3,$a3 |
| + |
| + mflo ($a3,$rs3,$d3) |
| + mfhi ($t1,$rs3,$d3) |
| + addu $h2,$h2,$at |
| + addu $h3,$h3,$t0 |
| + multu ($rs2,$h4) # h4*s2 |
| + sltu $at,$h2,$at |
| + addu $h3,$h3,$at |
| + |
| + mflo ($at,$rs2,$h4) |
| + addu $h2,$h2,$a3 |
| + addu $h3,$h3,$t1 |
| + multu ($r3,$d0) # d0*r3 |
| + sltu $a3,$h2,$a3 |
| + addu $h3,$h3,$a3 |
| + |
| + |
| + mflo ($a3,$r3,$d0) |
| + mfhi ($t1,$r3,$d0) |
| + addu $h2,$h2,$at |
| + sltu $at,$h2,$at |
| + multu ($r2,$d1) # d1*r2 |
| + addu $h3,$h3,$at |
| + |
| + mflo ($at,$r2,$d1) |
| + mfhi ($t0,$r2,$d1) |
| + addu $h3,$h3,$a3 |
| + sltu $a3,$h3,$a3 |
| + multu ($r0,$d3) # d3*r0 |
| + addu $t1,$t1,$a3 |
| + |
| + mflo ($a3,$r0,$d3) |
| + mfhi ($d3,$r0,$d3) |
| + addu $h3,$h3,$at |
| + addu $t1,$t1,$t0 |
| + multu ($r1,$d2) # d2*r1 |
| + sltu $at,$h3,$at |
| + addu $t1,$t1,$at |
| + |
| + mflo ($at,$r1,$d2) |
| + mfhi ($t0,$r1,$d2) |
| + addu $h3,$h3,$a3 |
| + addu $t1,$t1,$d3 |
| + multu ($rs3,$h4) # h4*s3 |
| + sltu $a3,$h3,$a3 |
| + addu $t1,$t1,$a3 |
| + |
| + mflo ($a3,$rs3,$h4) |
| + addu $h3,$h3,$at |
| + addu $t1,$t1,$t0 |
| + multu ($r0,$h4) # h4*r0 |
| + sltu $at,$h3,$at |
| + addu $t1,$t1,$at |
| + |
| + |
| + mflo ($h4,$r0,$h4) |
| + addu $h3,$h3,$a3 |
| + sltu $a3,$h3,$a3 |
| + addu $t1,$t1,$a3 |
| + addu $h4,$h4,$t1 |
| + |
| + li $padbit,1 # if we loop, padbit is 1 |
| +#endif |
| + bne $inp,$len,.Loop |
| + |
| + sw $h0,0($ctx) # store hash value |
| + sw $h1,4($ctx) |
| + sw $h2,8($ctx) |
| + sw $h3,12($ctx) |
| + sw $h4,16($ctx) |
| + |
| + .set noreorder |
| +.Labort: |
| + lw $s11,4*11($sp) |
| + lw $s10,4*10($sp) |
| + lw $s9, 4*9($sp) |
| + lw $s8, 4*8($sp) |
| + lw $s7, 4*7($sp) |
| + lw $s6, 4*6($sp) |
| + lw $s5, 4*5($sp) |
| + lw $s4, 4*4($sp) |
| +___ |
| +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue |
| + lw $s3, 4*3($sp) |
| + lw $s2, 4*2($sp) |
| + lw $s1, 4*1($sp) |
| + lw $s0, 4*0($sp) |
| +___ |
| +$code.=<<___; |
| + jr $ra |
| + addu $sp,$sp,4*12 |
| +.end poly1305_blocks |
| +___ |
| +} |
| +{ |
| +my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); |
| + |
| +$code.=<<___; |
| +.align 5 |
| +.globl poly1305_emit |
| +.ent poly1305_emit |
| +poly1305_emit: |
| + .frame $sp,0,$ra |
| + .set reorder |
| + |
| + lw $tmp4,16($ctx) |
| + lw $tmp0,0($ctx) |
| + lw $tmp1,4($ctx) |
| + lw $tmp2,8($ctx) |
| + lw $tmp3,12($ctx) |
| + |
| + li $in0,-4 # final reduction |
| + srl $ctx,$tmp4,2 |
| + and $in0,$in0,$tmp4 |
| + andi $tmp4,$tmp4,3 |
| + addu $ctx,$ctx,$in0 |
| + |
| + addu $tmp0,$tmp0,$ctx |
| + sltu $ctx,$tmp0,$ctx |
| + addiu $in0,$tmp0,5 # compare to modulus |
| + addu $tmp1,$tmp1,$ctx |
| + sltiu $in1,$in0,5 |
| + sltu $ctx,$tmp1,$ctx |
| + addu $in1,$in1,$tmp1 |
| + addu $tmp2,$tmp2,$ctx |
| + sltu $in2,$in1,$tmp1 |
| + sltu $ctx,$tmp2,$ctx |
| + addu $in2,$in2,$tmp2 |
| + addu $tmp3,$tmp3,$ctx |
| + sltu $in3,$in2,$tmp2 |
| + sltu $ctx,$tmp3,$ctx |
| + addu $in3,$in3,$tmp3 |
| + addu $tmp4,$tmp4,$ctx |
| + sltu $ctx,$in3,$tmp3 |
| + addu $ctx,$tmp4 |
| + |
| + srl $ctx,2 # see if it carried/borrowed |
| + subu $ctx,$zero,$ctx |
| + |
| + xor $in0,$tmp0 |
| + xor $in1,$tmp1 |
| + xor $in2,$tmp2 |
| + xor $in3,$tmp3 |
| + and $in0,$ctx |
| + and $in1,$ctx |
| + and $in2,$ctx |
| + and $in3,$ctx |
| + xor $in0,$tmp0 |
| + xor $in1,$tmp1 |
| + xor $in2,$tmp2 |
| + xor $in3,$tmp3 |
| + |
| + lw $tmp0,0($nonce) # load nonce |
| + lw $tmp1,4($nonce) |
| + lw $tmp2,8($nonce) |
| + lw $tmp3,12($nonce) |
| + |
| + addu $in0,$tmp0 # accumulate nonce |
| + sltu $ctx,$in0,$tmp0 |
| + |
| + addu $in1,$tmp1 |
| + sltu $tmp1,$in1,$tmp1 |
| + addu $in1,$ctx |
| + sltu $ctx,$in1,$ctx |
| + addu $ctx,$tmp1 |
| + |
| + addu $in2,$tmp2 |
| + sltu $tmp2,$in2,$tmp2 |
| + addu $in2,$ctx |
| + sltu $ctx,$in2,$ctx |
| + addu $ctx,$tmp2 |
| + |
| + addu $in3,$tmp3 |
| + addu $in3,$ctx |
| + |
| + srl $tmp0,$in0,8 # write mac value |
| + srl $tmp1,$in0,16 |
| + srl $tmp2,$in0,24 |
| + sb $in0, 0($mac) |
| + sb $tmp0,1($mac) |
| + srl $tmp0,$in1,8 |
| + sb $tmp1,2($mac) |
| + srl $tmp1,$in1,16 |
| + sb $tmp2,3($mac) |
| + srl $tmp2,$in1,24 |
| + sb $in1, 4($mac) |
| + sb $tmp0,5($mac) |
| + srl $tmp0,$in2,8 |
| + sb $tmp1,6($mac) |
| + srl $tmp1,$in2,16 |
| + sb $tmp2,7($mac) |
| + srl $tmp2,$in2,24 |
| + sb $in2, 8($mac) |
| + sb $tmp0,9($mac) |
| + srl $tmp0,$in3,8 |
| + sb $tmp1,10($mac) |
| + srl $tmp1,$in3,16 |
| + sb $tmp2,11($mac) |
| + srl $tmp2,$in3,24 |
| + sb $in3, 12($mac) |
| + sb $tmp0,13($mac) |
| + sb $tmp1,14($mac) |
| + sb $tmp2,15($mac) |
| + |
| + jr $ra |
| +.end poly1305_emit |
| +.rdata |
| +.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" |
| +.align 2 |
| +___ |
| +} |
| +}}} |
| + |
| +$output=pop and open STDOUT,">$output"; |
| +print $code; |
| +close STDOUT; |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 15cfb02c3e49..0be5b4092f18 100644 |
| |
| |
| @@ -707,6 +707,11 @@ config CRYPTO_POLY1305_X86_64 |
| in IETF protocols. This is the x86_64 assembler implementation using SIMD |
| instructions. |
| |
| +config CRYPTO_POLY1305_MIPS |
| + tristate "Poly1305 authenticator algorithm (MIPS optimized)" |
| + depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) |
| + select CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| + |
| config CRYPTO_MD4 |
| tristate "MD4 digest algorithm" |
| select CRYPTO_HASH |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 9bd15b227e78..d15ec5382986 100644 |
| |
| |
| @@ -39,6 +39,7 @@ config CRYPTO_LIB_DES |
| |
| config CRYPTO_LIB_POLY1305_RSIZE |
| int |
| + default 2 if MIPS |
| default 4 if X86_64 |
| default 9 if ARM || ARM64 |
| default 1 |
| -- |
| 2.18.2 |
| |
| |
| From 8f7e17a4015605a94c6c26c1bec0f22b8490a8e3 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:28 +0100 |
| Subject: [PATCH 021/100] crypto: blake2s - generic C library implementation |
| and selftest |
| |
| commit 66d7fb94e4ffe5acc589e0b2b4710aecc1f07a28 upstream. |
| |
| The C implementation was originally based on Samuel Neves' public |
| domain reference implementation but has since been heavily modified |
| for the kernel. We're able to do compile-time optimizations by moving |
| some scaffolding around the final function into the header file. |
| |
| Information: https://blake2.net/ |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Samuel Neves <sneves@dei.uc.pt> |
| Co-developed-by: Samuel Neves <sneves@dei.uc.pt> |
| [ardb: - move from lib/zinc to lib/crypto |
| - remove simd handling |
| - rewrote selftest for better coverage |
| - use fixed digest length for blake2s_hmac() and rename to |
| blake2s256_hmac() ] |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/crypto/blake2s.h | 106 +++++ |
| include/crypto/internal/blake2s.h | 19 + |
| lib/crypto/Kconfig | 25 ++ |
| lib/crypto/Makefile | 10 + |
| lib/crypto/blake2s-generic.c | 111 ++++++ |
| lib/crypto/blake2s-selftest.c | 622 ++++++++++++++++++++++++++++++ |
| lib/crypto/blake2s.c | 126 ++++++ |
| 7 files changed, 1019 insertions(+) |
| create mode 100644 include/crypto/blake2s.h |
| create mode 100644 include/crypto/internal/blake2s.h |
| create mode 100644 lib/crypto/blake2s-generic.c |
| create mode 100644 lib/crypto/blake2s-selftest.c |
| create mode 100644 lib/crypto/blake2s.c |
| |
| diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h |
| new file mode 100644 |
| index 000000000000..b471deac28ff |
| |
| |
| @@ -0,0 +1,106 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef BLAKE2S_H |
| +#define BLAKE2S_H |
| + |
| +#include <linux/types.h> |
| +#include <linux/kernel.h> |
| +#include <linux/string.h> |
| + |
| +#include <asm/bug.h> |
| + |
| +enum blake2s_lengths { |
| + BLAKE2S_BLOCK_SIZE = 64, |
| + BLAKE2S_HASH_SIZE = 32, |
| + BLAKE2S_KEY_SIZE = 32, |
| + |
| + BLAKE2S_128_HASH_SIZE = 16, |
| + BLAKE2S_160_HASH_SIZE = 20, |
| + BLAKE2S_224_HASH_SIZE = 28, |
| + BLAKE2S_256_HASH_SIZE = 32, |
| +}; |
| + |
| +struct blake2s_state { |
| + u32 h[8]; |
| + u32 t[2]; |
| + u32 f[2]; |
| + u8 buf[BLAKE2S_BLOCK_SIZE]; |
| + unsigned int buflen; |
| + unsigned int outlen; |
| +}; |
| + |
| +enum blake2s_iv { |
| + BLAKE2S_IV0 = 0x6A09E667UL, |
| + BLAKE2S_IV1 = 0xBB67AE85UL, |
| + BLAKE2S_IV2 = 0x3C6EF372UL, |
| + BLAKE2S_IV3 = 0xA54FF53AUL, |
| + BLAKE2S_IV4 = 0x510E527FUL, |
| + BLAKE2S_IV5 = 0x9B05688CUL, |
| + BLAKE2S_IV6 = 0x1F83D9ABUL, |
| + BLAKE2S_IV7 = 0x5BE0CD19UL, |
| +}; |
| + |
| +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen); |
| +void blake2s_final(struct blake2s_state *state, u8 *out); |
| + |
| +static inline void blake2s_init_param(struct blake2s_state *state, |
| + const u32 param) |
| +{ |
| + *state = (struct blake2s_state){{ |
| + BLAKE2S_IV0 ^ param, |
| + BLAKE2S_IV1, |
| + BLAKE2S_IV2, |
| + BLAKE2S_IV3, |
| + BLAKE2S_IV4, |
| + BLAKE2S_IV5, |
| + BLAKE2S_IV6, |
| + BLAKE2S_IV7, |
| + }}; |
| +} |
| + |
| +static inline void blake2s_init(struct blake2s_state *state, |
| + const size_t outlen) |
| +{ |
| + blake2s_init_param(state, 0x01010000 | outlen); |
| + state->outlen = outlen; |
| +} |
| + |
| +static inline void blake2s_init_key(struct blake2s_state *state, |
| + const size_t outlen, const void *key, |
| + const size_t keylen) |
| +{ |
| + WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE || |
| + !key || !keylen || keylen > BLAKE2S_KEY_SIZE)); |
| + |
| + blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen); |
| + memcpy(state->buf, key, keylen); |
| + state->buflen = BLAKE2S_BLOCK_SIZE; |
| + state->outlen = outlen; |
| +} |
| + |
| +static inline void blake2s(u8 *out, const u8 *in, const u8 *key, |
| + const size_t outlen, const size_t inlen, |
| + const size_t keylen) |
| +{ |
| + struct blake2s_state state; |
| + |
| + WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen || |
| + outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE || |
| + (!key && keylen))); |
| + |
| + if (keylen) |
| + blake2s_init_key(&state, outlen, key, keylen); |
| + else |
| + blake2s_init(&state, outlen); |
| + |
| + blake2s_update(&state, in, inlen); |
| + blake2s_final(&state, out); |
| +} |
| + |
| +void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, |
| + const size_t keylen); |
| + |
| +#endif /* BLAKE2S_H */ |
| diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h |
| new file mode 100644 |
| index 000000000000..941693effc7d |
| |
| |
| @@ -0,0 +1,19 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| + |
| +#ifndef BLAKE2S_INTERNAL_H |
| +#define BLAKE2S_INTERNAL_H |
| + |
| +#include <crypto/blake2s.h> |
| + |
| +void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, |
| + size_t nblocks, const u32 inc); |
| + |
| +void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, |
| + size_t nblocks, const u32 inc); |
| + |
| +static inline void blake2s_set_lastblock(struct blake2s_state *state) |
| +{ |
| + state->f[0] = -1; |
| +} |
| + |
| +#endif /* BLAKE2S_INTERNAL_H */ |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index d15ec5382986..7ad98b624e55 100644 |
| |
| |
| @@ -8,6 +8,31 @@ config CRYPTO_LIB_AES |
| config CRYPTO_LIB_ARC4 |
| tristate |
| |
| +config CRYPTO_ARCH_HAVE_LIB_BLAKE2S |
| + tristate |
| + help |
| + Declares whether the architecture provides an arch-specific |
| + accelerated implementation of the Blake2s library interface, |
| + either builtin or as a module. |
| + |
| +config CRYPTO_LIB_BLAKE2S_GENERIC |
| + tristate |
| + help |
| + This symbol can be depended upon by arch implementations of the |
| + Blake2s library interface that require the generic code as a |
| + fallback, e.g., for SIMD implementations. If no arch specific |
| + implementation is enabled, this implementation serves the users |
| + of CRYPTO_LIB_BLAKE2S. |
| + |
| +config CRYPTO_LIB_BLAKE2S |
| + tristate "BLAKE2s hash function library" |
| + depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S |
| + select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n |
| + help |
| + Enable the Blake2s library interface. This interface may be fulfilled |
| + by either the generic implementation or an arch-specific one, if one |
| + is available and enabled. |
| + |
| config CRYPTO_ARCH_HAVE_LIB_CHACHA |
| tristate |
| help |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index b58ab6843a9d..8ca66b5f9807 100644 |
| |
| |
| @@ -10,6 +10,12 @@ libaes-y := aes.o |
| obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o |
| libarc4-y := arc4.o |
| |
| +obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o |
| +libblake2s-generic-y += blake2s-generic.o |
| + |
| +obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o |
| +libblake2s-y += blake2s.o |
| + |
| obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| libdes-y := des.o |
| |
| @@ -18,3 +24,7 @@ libpoly1305-y := poly1305.o |
| |
| obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o |
| libsha256-y := sha256.o |
| + |
| +ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) |
| +libblake2s-y += blake2s-selftest.o |
| +endif |
| diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c |
| new file mode 100644 |
| index 000000000000..04ff8df24513 |
| |
| |
| @@ -0,0 +1,111 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is an implementation of the BLAKE2s hash and PRF functions. |
| + * |
| + * Information: https://blake2.net/ |
| + * |
| + */ |
| + |
| +#include <crypto/internal/blake2s.h> |
| +#include <linux/types.h> |
| +#include <linux/string.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| +#include <linux/init.h> |
| +#include <linux/bug.h> |
| +#include <asm/unaligned.h> |
| + |
| +static const u8 blake2s_sigma[10][16] = { |
| + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, |
| + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, |
| + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, |
| + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, |
| + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, |
| + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, |
| + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, |
| + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, |
| + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, |
| + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, |
| +}; |
| + |
| +static inline void blake2s_increment_counter(struct blake2s_state *state, |
| + const u32 inc) |
| +{ |
| + state->t[0] += inc; |
| + state->t[1] += (state->t[0] < inc); |
| +} |
| + |
| +void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, |
| + size_t nblocks, const u32 inc) |
| +{ |
| + u32 m[16]; |
| + u32 v[16]; |
| + int i; |
| + |
| + WARN_ON(IS_ENABLED(DEBUG) && |
| + (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); |
| + |
| + while (nblocks > 0) { |
| + blake2s_increment_counter(state, inc); |
| + memcpy(m, block, BLAKE2S_BLOCK_SIZE); |
| + le32_to_cpu_array(m, ARRAY_SIZE(m)); |
| + memcpy(v, state->h, 32); |
| + v[ 8] = BLAKE2S_IV0; |
| + v[ 9] = BLAKE2S_IV1; |
| + v[10] = BLAKE2S_IV2; |
| + v[11] = BLAKE2S_IV3; |
| + v[12] = BLAKE2S_IV4 ^ state->t[0]; |
| + v[13] = BLAKE2S_IV5 ^ state->t[1]; |
| + v[14] = BLAKE2S_IV6 ^ state->f[0]; |
| + v[15] = BLAKE2S_IV7 ^ state->f[1]; |
| + |
| +#define G(r, i, a, b, c, d) do { \ |
| + a += b + m[blake2s_sigma[r][2 * i + 0]]; \ |
| + d = ror32(d ^ a, 16); \ |
| + c += d; \ |
| + b = ror32(b ^ c, 12); \ |
| + a += b + m[blake2s_sigma[r][2 * i + 1]]; \ |
| + d = ror32(d ^ a, 8); \ |
| + c += d; \ |
| + b = ror32(b ^ c, 7); \ |
| +} while (0) |
| + |
| +#define ROUND(r) do { \ |
| + G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ |
| + G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ |
| + G(r, 2, v[2], v[ 6], v[10], v[14]); \ |
| + G(r, 3, v[3], v[ 7], v[11], v[15]); \ |
| + G(r, 4, v[0], v[ 5], v[10], v[15]); \ |
| + G(r, 5, v[1], v[ 6], v[11], v[12]); \ |
| + G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ |
| + G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ |
| +} while (0) |
| + ROUND(0); |
| + ROUND(1); |
| + ROUND(2); |
| + ROUND(3); |
| + ROUND(4); |
| + ROUND(5); |
| + ROUND(6); |
| + ROUND(7); |
| + ROUND(8); |
| + ROUND(9); |
| + |
| +#undef G |
| +#undef ROUND |
| + |
| + for (i = 0; i < 8; ++i) |
| + state->h[i] ^= v[i] ^ v[i + 8]; |
| + |
| + block += BLAKE2S_BLOCK_SIZE; |
| + --nblocks; |
| + } |
| +} |
| + |
| +EXPORT_SYMBOL(blake2s_compress_generic); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_DESCRIPTION("BLAKE2s hash function"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c |
| new file mode 100644 |
| index 000000000000..79ef404a990d |
| |
| |
| @@ -0,0 +1,622 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include <crypto/blake2s.h> |
| +#include <linux/string.h> |
| + |
| +/* |
| + * blake2s_testvecs[] generated with the program below (using libb2-dev and |
| + * libssl-dev [OpenSSL]) |
| + * |
| + * #include <blake2.h> |
| + * #include <stdint.h> |
| + * #include <stdio.h> |
| + * |
| + * #include <openssl/evp.h> |
| + * #include <openssl/hmac.h> |
| + * |
| + * #define BLAKE2S_TESTVEC_COUNT 256 |
| + * |
| + * static void print_vec(const uint8_t vec[], int len) |
| + * { |
| + * int i; |
| + * |
| + * printf(" { "); |
| + * for (i = 0; i < len; i++) { |
| + * if (i && (i % 12) == 0) |
| + * printf("\n "); |
| + * printf("0x%02x, ", vec[i]); |
| + * } |
| + * printf("},\n"); |
| + * } |
| + * |
| + * int main(void) |
| + * { |
| + * uint8_t key[BLAKE2S_KEYBYTES]; |
| + * uint8_t buf[BLAKE2S_TESTVEC_COUNT]; |
| + * uint8_t hash[BLAKE2S_OUTBYTES]; |
| + * int i, j; |
| + * |
| + * key[0] = key[1] = 1; |
| + * for (i = 2; i < BLAKE2S_KEYBYTES; ++i) |
| + * key[i] = key[i - 2] + key[i - 1]; |
| + * |
| + * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) |
| + * buf[i] = (uint8_t)i; |
| + * |
| + * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); |
| + * |
| + * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) { |
| + * int outlen = 1 + i % BLAKE2S_OUTBYTES; |
| + * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1); |
| + * |
| + * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i, |
| + * keylen); |
| + * print_vec(hash, outlen); |
| + * } |
| + * printf("};\n\n"); |
| + * |
| + * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); |
| + * |
| + * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL); |
| + * print_vec(hash, BLAKE2S_OUTBYTES); |
| + * |
| + * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL); |
| + * print_vec(hash, BLAKE2S_OUTBYTES); |
| + * |
| + * printf("};\n"); |
| + * |
| + * return 0; |
| + *} |
| + */ |
| +static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { |
| + { 0xa1, }, |
| + { 0x7c, 0x89, }, |
| + { 0x74, 0x0e, 0xd4, }, |
| + { 0x47, 0x0c, 0x21, 0x15, }, |
| + { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, }, |
| + { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, }, |
| + { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, }, |
| + { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, }, |
| + { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, }, |
| + { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, }, |
| + { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, }, |
| + { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, }, |
| + { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda, |
| + 0xb7, }, |
| + { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25, |
| + 0x52, 0x3e, }, |
| + { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc, |
| + 0x57, 0xe2, 0x27, }, |
| + { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6, |
| + 0x47, 0x2a, 0xc7, 0xf5, }, |
| + { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43, |
| + 0xe7, 0x72, 0x08, 0xec, 0xbe, }, |
| + { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96, |
| + 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, }, |
| + { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e, |
| + 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, }, |
| + { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d, |
| + 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, }, |
| + { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86, |
| + 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, }, |
| + { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41, |
| + 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, }, |
| + { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22, |
| + 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, }, |
| + { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00, |
| + 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, }, |
| + { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9, |
| + 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13, |
| + 0xd1, }, |
| + { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05, |
| + 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07, |
| + 0x01, 0x3e, }, |
| + { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74, |
| + 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b, |
| + 0xec, 0x13, 0xed, }, |
| + { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7, |
| + 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37, |
| + 0x72, 0x67, 0x09, 0xfc, }, |
| + { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38, |
| + 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c, |
| + 0x95, 0x29, 0x19, 0xf2, 0x4b, }, |
| + { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22, |
| + 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30, |
| + 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, }, |
| + { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e, |
| + 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd, |
| + 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, }, |
| + { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe, |
| + 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61, |
| + 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, }, |
| + { 0x0a, }, |
| + { 0x6e, 0xd4, }, |
| + { 0x64, 0xe9, 0xd1, }, |
| + { 0x30, 0xdd, 0x71, 0xef, }, |
| + { 0x11, 0xb5, 0x0c, 0x87, 0xc9, }, |
| + { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, }, |
| + { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, }, |
| + { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, }, |
| + { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, }, |
| + { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, }, |
| + { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, }, |
| + { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, }, |
| + { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf, |
| + 0xe2, }, |
| + { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64, |
| + 0x7e, 0xb0, }, |
| + { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51, |
| + 0x98, 0x0a, 0x8d, }, |
| + { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04, |
| + 0xf1, 0xc3, 0x94, 0xd8, }, |
| + { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7, |
| + 0x2c, 0xe8, 0x8f, 0xc7, 0x34, }, |
| + { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41, |
| + 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, }, |
| + { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81, |
| + 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, }, |
| + { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4, |
| + 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, }, |
| + { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a, |
| + 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, }, |
| + { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb, |
| + 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, }, |
| + { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9, |
| + 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, }, |
| + { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9, |
| + 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, }, |
| + { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e, |
| + 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42, |
| + 0xe3, }, |
| + { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9, |
| + 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e, |
| + 0xe3, 0x90, }, |
| + { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3, |
| + 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10, |
| + 0x58, 0x06, 0x9a, }, |
| + { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5, |
| + 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d, |
| + 0x53, 0x03, 0x1a, 0x39, }, |
| + { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0, |
| + 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5, |
| + 0xd4, 0x36, 0xb1, 0xac, 0x73, }, |
| + { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59, |
| + 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90, |
| + 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, }, |
| + { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28, |
| + 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75, |
| + 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, }, |
| + { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6, |
| + 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77, |
| + 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, }, |
| + { 0x9d, }, |
| + { 0x9d, 0x7d, }, |
| + { 0xfd, 0xc3, 0xda, }, |
| + { 0xe8, 0x82, 0xcd, 0x21, }, |
| + { 0xc3, 0x1d, 0x42, 0x4c, 0x74, }, |
| + { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, }, |
| + { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, }, |
| + { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, }, |
| + { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, }, |
| + { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, }, |
| + { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, }, |
| + { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, }, |
| + { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3, |
| + 0x63, }, |
| + { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f, |
| + 0xe6, 0xa9, }, |
| + { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a, |
| + 0xf6, 0x0e, 0x20, }, |
| + { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39, |
| + 0x18, 0x3e, 0xc7, 0xc3, }, |
| + { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c, |
| + 0x92, 0xfc, 0x7f, 0x34, 0x41, }, |
| + { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb, |
| + 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, }, |
| + { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96, |
| + 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, }, |
| + { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6, |
| + 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, }, |
| + { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba, |
| + 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, }, |
| + { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0, |
| + 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, }, |
| + { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d, |
| + 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, }, |
| + { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59, |
| + 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, }, |
| + { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89, |
| + 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e, |
| + 0xaf, }, |
| + { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc, |
| + 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7, |
| + 0xb1, 0x1d, }, |
| + { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9, |
| + 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf, |
| + 0xee, 0x6a, 0xbe, }, |
| + { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8, |
| + 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd, |
| + 0x5c, 0xef, 0xa3, 0x25, }, |
| + { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6, |
| + 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8, |
| + 0x4b, 0x0e, 0xe3, 0x94, 0x9f, }, |
| + { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc, |
| + 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf, |
| + 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, }, |
| + { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76, |
| + 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46, |
| + 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, }, |
| + { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02, |
| + 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3, |
| + 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, }, |
| + { 0x02, }, |
| + { 0x52, 0xa8, }, |
| + { 0x38, 0x25, 0x0d, }, |
| + { 0xe3, 0x04, 0xd4, 0x92, }, |
| + { 0x97, 0xdb, 0xf7, 0x81, 0xca, }, |
| + { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, }, |
| + { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, }, |
| + { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, }, |
| + { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, }, |
| + { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, }, |
| + { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, }, |
| + { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, }, |
| + { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa, |
| + 0xd3, }, |
| + { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c, |
| + 0xb7, 0x9a, }, |
| + { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31, |
| + 0x3d, 0x47, 0x3d, }, |
| + { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0, |
| + 0x24, 0xf0, 0x17, 0xc0, }, |
| + { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76, |
| + 0xd2, 0xfd, 0x0c, 0x47, 0x40, }, |
| + { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae, |
| + 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, }, |
| + { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee, |
| + 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, }, |
| + { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d, |
| + 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, }, |
| + { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61, |
| + 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, }, |
| + { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd, |
| + 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, }, |
| + { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5, |
| + 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, }, |
| + { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17, |
| + 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, }, |
| + { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c, |
| + 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c, |
| + 0xae, }, |
| + { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5, |
| + 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d, |
| + 0x59, 0x00, }, |
| + { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e, |
| + 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c, |
| + 0x5a, 0x2c, 0x3b, }, |
| + { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda, |
| + 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c, |
| + 0x10, 0x07, 0x2a, 0xc4, }, |
| + { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14, |
| + 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf, |
| + 0xee, 0xa1, 0x74, 0xd6, 0x71, }, |
| + { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33, |
| + 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff, |
| + 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, }, |
| + { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c, |
| + 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44, |
| + 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, }, |
| + { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff, |
| + 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23, |
| + 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, }, |
| + { 0xbe, }, |
| + { 0x17, 0x6c, }, |
| + { 0x1a, 0x42, 0x4f, }, |
| + { 0xba, 0xaf, 0xb7, 0x65, }, |
| + { 0xc2, 0x63, 0x43, 0x6a, 0xea, }, |
| + { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, }, |
| + { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, }, |
| + { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, }, |
| + { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, }, |
| + { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, }, |
| + { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, }, |
| + { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, }, |
| + { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92, |
| + 0xe9, }, |
| + { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d, |
| + 0xc4, 0xb3, }, |
| + { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60, |
| + 0xd7, 0xd3, 0x5e, }, |
| + { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16, |
| + 0xaf, 0x39, 0xbd, 0x92, }, |
| + { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10, |
| + 0xd9, 0xa7, 0x66, 0x27, 0xcf, }, |
| + { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02, |
| + 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, }, |
| + { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd, |
| + 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, }, |
| + { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3, |
| + 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, }, |
| + { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f, |
| + 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, }, |
| + { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51, |
| + 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, }, |
| + { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3, |
| + 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, }, |
| + { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda, |
| + 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, }, |
| + { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6, |
| + 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a, |
| + 0x26, }, |
| + { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24, |
| + 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35, |
| + 0xf4, 0x1d, }, |
| + { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9, |
| + 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46, |
| + 0x4e, 0x29, 0x26, }, |
| + { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09, |
| + 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98, |
| + 0x1a, 0x5b, 0xff, 0xc9, }, |
| + { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2, |
| + 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69, |
| + 0xbf, 0xf6, 0xbe, 0x3c, 0x40, }, |
| + { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31, |
| + 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20, |
| + 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, }, |
| + { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4, |
| + 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30, |
| + 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, }, |
| + { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0, |
| + 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5, |
| + 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, }, |
| + { 0x1f, }, |
| + { 0x82, 0x60, }, |
| + { 0xcc, 0xe3, 0x08, }, |
| + { 0x56, 0x17, 0xe4, 0x59, }, |
| + { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, }, |
| + { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, }, |
| + { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, }, |
| + { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, }, |
| + { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, }, |
| + { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, }, |
| + { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, }, |
| + { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, }, |
| + { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc, |
| + 0x5b, }, |
| + { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0, |
| + 0x90, 0x48, }, |
| + { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60, |
| + 0x04, 0xd9, 0xd5, }, |
| + { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47, |
| + 0xe5, 0x31, 0x06, 0x70, }, |
| + { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c, |
| + 0x70, 0x25, 0xdb, 0x33, 0x27, }, |
| + { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a, |
| + 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, }, |
| + { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3, |
| + 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, }, |
| + { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7, |
| + 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, }, |
| + { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac, |
| + 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, }, |
| + { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98, |
| + 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, }, |
| + { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11, |
| + 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, }, |
| + { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a, |
| + 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, }, |
| + { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41, |
| + 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70, |
| + 0x88, }, |
| + { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4, |
| + 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3, |
| + 0xc6, 0xbb, }, |
| + { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55, |
| + 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5, |
| + 0x55, 0xfd, 0x4f, }, |
| + { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e, |
| + 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2, |
| + 0x42, 0x64, 0x3b, 0x1a, }, |
| + { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95, |
| + 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5, |
| + 0x1d, 0x60, 0x8f, 0x8a, 0x1d, }, |
| + { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4, |
| + 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d, |
| + 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, }, |
| + { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b, |
| + 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3, |
| + 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, }, |
| + { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf, |
| + 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f, |
| + 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, }, |
| + { 0x60, }, |
| + { 0x24, 0x26, }, |
| + { 0x47, 0xeb, 0xc9, }, |
| + { 0x4a, 0xd0, 0xbc, 0xf0, }, |
| + { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, }, |
| + { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, }, |
| + { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, }, |
| + { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, }, |
| + { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, }, |
| + { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, }, |
| + { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, }, |
| + { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, }, |
| + { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91, |
| + 0x8d, }, |
| + { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33, |
| + 0xbf, 0xa0, }, |
| + { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c, |
| + 0xd5, 0x4b, 0xed, }, |
| + { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44, |
| + 0xc8, 0x24, 0x0a, 0xb7, }, |
| + { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75, |
| + 0xb2, 0x15, 0xd1, 0xb1, 0x10, }, |
| + { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35, |
| + 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, }, |
| + { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9, |
| + 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, }, |
| + { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47, |
| + 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, }, |
| + { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35, |
| + 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, }, |
| + { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1, |
| + 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, }, |
| + { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21, |
| + 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, }, |
| + { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4, |
| + 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, }, |
| + { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7, |
| + 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7, |
| + 0xd3, }, |
| + { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb, |
| + 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16, |
| + 0xa6, 0xd6, }, |
| + { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80, |
| + 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88, |
| + 0x55, 0xd1, 0xa9, }, |
| + { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54, |
| + 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46, |
| + 0xef, 0xb0, 0x00, 0x8d, }, |
| + { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8, |
| + 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94, |
| + 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, }, |
| + { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4, |
| + 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67, |
| + 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, }, |
| + { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02, |
| + 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04, |
| + 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, }, |
| + { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28, |
| + 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca, |
| + 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, }, |
| + { 0x7e, }, |
| + { 0x1e, 0x21, }, |
| + { 0x26, 0xd3, 0xdd, }, |
| + { 0x2c, 0xd4, 0xb3, 0x3d, }, |
| + { 0x86, 0x7b, 0x76, 0x3c, 0xf0, }, |
| + { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, }, |
| + { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, }, |
| + { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, }, |
| + { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, }, |
| + { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, }, |
| + { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, }, |
| + { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, }, |
| + { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77, |
| + 0x66, }, |
| + { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31, |
| + 0x55, 0x66, }, |
| + { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12, |
| + 0x43, 0xbf, 0xa1, }, |
| + { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e, |
| + 0x95, 0x8a, 0xc5, 0xed, }, |
| + { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef, |
| + 0xf6, 0x2b, 0x3a, 0xba, 0x60, }, |
| + { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2, |
| + 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, }, |
| + { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b, |
| + 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, }, |
| + { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94, |
| + 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, }, |
| + { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b, |
| + 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, }, |
| + { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf, |
| + 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, }, |
| + { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8, |
| + 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, }, |
| + { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea, |
| + 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, }, |
| + { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2, |
| + 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0, |
| + 0xd7, }, |
| + { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b, |
| + 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd, |
| + 0xb6, 0xef, }, |
| + { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6, |
| + 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57, |
| + 0xef, 0xf8, 0x53, }, |
| + { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46, |
| + 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89, |
| + 0x89, 0xfd, 0xf7, 0x99, }, |
| + { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03, |
| + 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a, |
| + 0xa9, 0x8f, 0x2b, 0x59, 0xfb, }, |
| + { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb, |
| + 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d, |
| + 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, }, |
| + { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0, |
| + 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d, |
| + 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, }, |
| + { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c, |
| + 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92, |
| + 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, |
| +}; |
| + |
| +static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { |
| + { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70, |
| + 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79, |
| + 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, }, |
| + { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9, |
| + 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f, |
| + 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, }, |
| +}; |
| + |
| +bool __init blake2s_selftest(void) |
| +{ |
| + u8 key[BLAKE2S_KEY_SIZE]; |
| + u8 buf[ARRAY_SIZE(blake2s_testvecs)]; |
| + u8 hash[BLAKE2S_HASH_SIZE]; |
| + struct blake2s_state state; |
| + bool success = true; |
| + int i, l; |
| + |
| + key[0] = key[1] = 1; |
| + for (i = 2; i < sizeof(key); ++i) |
| + key[i] = key[i - 2] + key[i - 1]; |
| + |
| + for (i = 0; i < sizeof(buf); ++i) |
| + buf[i] = (u8)i; |
| + |
| + for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) { |
| + int outlen = 1 + i % BLAKE2S_HASH_SIZE; |
| + int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1); |
| + |
| + blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i, |
| + keylen); |
| + if (memcmp(hash, blake2s_testvecs[i], outlen)) { |
| + pr_err("blake2s self-test %d: FAIL\n", i + 1); |
| + success = false; |
| + } |
| + |
| + if (!keylen) |
| + blake2s_init(&state, outlen); |
| + else |
| + blake2s_init_key(&state, outlen, |
| + key + BLAKE2S_KEY_SIZE - keylen, |
| + keylen); |
| + |
| + blake2s_update(&state, buf, l); |
| + blake2s_update(&state, buf + l, i - l); |
| + blake2s_final(&state, hash); |
| + if (memcmp(hash, blake2s_testvecs[i], outlen)) { |
| + pr_err("blake2s init/update/final self-test %d: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| + |
| + if (success) { |
| + blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key)); |
| + success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE); |
| + |
| + blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf)); |
| + success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE); |
| + |
| + if (!success) |
| + pr_err("blake2s256_hmac self-test: FAIL\n"); |
| + } |
| + |
| + return success; |
| +} |
| diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c |
| new file mode 100644 |
| index 000000000000..41025a30c524 |
| |
| |
| @@ -0,0 +1,126 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is an implementation of the BLAKE2s hash and PRF functions. |
| + * |
| + * Information: https://blake2.net/ |
| + * |
| + */ |
| + |
| +#include <crypto/internal/blake2s.h> |
| +#include <linux/types.h> |
| +#include <linux/string.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| +#include <linux/init.h> |
| +#include <linux/bug.h> |
| +#include <asm/unaligned.h> |
| + |
| +bool blake2s_selftest(void); |
| + |
| +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) |
| +{ |
| + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; |
| + |
| + if (unlikely(!inlen)) |
| + return; |
| + if (inlen > fill) { |
| + memcpy(state->buf + state->buflen, in, fill); |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) |
| + blake2s_compress_arch(state, state->buf, 1, |
| + BLAKE2S_BLOCK_SIZE); |
| + else |
| + blake2s_compress_generic(state, state->buf, 1, |
| + BLAKE2S_BLOCK_SIZE); |
| + state->buflen = 0; |
| + in += fill; |
| + inlen -= fill; |
| + } |
| + if (inlen > BLAKE2S_BLOCK_SIZE) { |
| + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); |
| + /* Hash one less (full) block than strictly possible */ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) |
| + blake2s_compress_arch(state, in, nblocks - 1, |
| + BLAKE2S_BLOCK_SIZE); |
| + else |
| + blake2s_compress_generic(state, in, nblocks - 1, |
| + BLAKE2S_BLOCK_SIZE); |
| + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); |
| + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); |
| + } |
| + memcpy(state->buf + state->buflen, in, inlen); |
| + state->buflen += inlen; |
| +} |
| +EXPORT_SYMBOL(blake2s_update); |
| + |
| +void blake2s_final(struct blake2s_state *state, u8 *out) |
| +{ |
| + WARN_ON(IS_ENABLED(DEBUG) && !out); |
| + blake2s_set_lastblock(state); |
| + memset(state->buf + state->buflen, 0, |
| + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) |
| + blake2s_compress_arch(state, state->buf, 1, state->buflen); |
| + else |
| + blake2s_compress_generic(state, state->buf, 1, state->buflen); |
| + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); |
| + memcpy(out, state->h, state->outlen); |
| + memzero_explicit(state, sizeof(*state)); |
| +} |
| +EXPORT_SYMBOL(blake2s_final); |
| + |
| +void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, |
| + const size_t keylen) |
| +{ |
| + struct blake2s_state state; |
| + u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; |
| + u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); |
| + int i; |
| + |
| + if (keylen > BLAKE2S_BLOCK_SIZE) { |
| + blake2s_init(&state, BLAKE2S_HASH_SIZE); |
| + blake2s_update(&state, key, keylen); |
| + blake2s_final(&state, x_key); |
| + } else |
| + memcpy(x_key, key, keylen); |
| + |
| + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) |
| + x_key[i] ^= 0x36; |
| + |
| + blake2s_init(&state, BLAKE2S_HASH_SIZE); |
| + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); |
| + blake2s_update(&state, in, inlen); |
| + blake2s_final(&state, i_hash); |
| + |
| + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) |
| + x_key[i] ^= 0x5c ^ 0x36; |
| + |
| + blake2s_init(&state, BLAKE2S_HASH_SIZE); |
| + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); |
| + blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); |
| + blake2s_final(&state, i_hash); |
| + |
| + memcpy(out, i_hash, BLAKE2S_HASH_SIZE); |
| + memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); |
| + memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); |
| +} |
| +EXPORT_SYMBOL(blake2s256_hmac); |
| + |
| +static int __init mod_init(void) |
| +{ |
| + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && |
| + WARN_ON(!blake2s_selftest())) |
| + return -ENODEV; |
| + return 0; |
| +} |
| + |
| +static void __exit mod_exit(void) |
| +{ |
| +} |
| + |
| +module_init(mod_init); |
| +module_exit(mod_exit); |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_DESCRIPTION("BLAKE2s hash function"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| -- |
| 2.18.2 |
| |
| |
| From 9d6b3f8f7dd184396b6dff0fb80d53f631b81dfe Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:29 +0100 |
| Subject: [PATCH 022/100] crypto: testmgr - add test cases for Blake2s |
| |
| commit 17e1df67023a5c9ccaeb5de8bf5b88f63127ecf7 upstream. |
| |
| As suggested by Eric for the Blake2b implementation contributed by |
| David, introduce a set of test vectors for Blake2s covering different |
| digest and key sizes. |
| |
| blake2s-128 blake2s-160 blake2s-224 blake2s-256 |
| --------------------------------------------------- |
| len=0 | klen=0 klen=1 klen=16 klen=32 |
| len=1 | klen=16 klen=32 klen=0 klen=1 |
| len=7 | klen=32 klen=0 klen=1 klen=16 |
| len=15 | klen=1 klen=16 klen=32 klen=0 |
| len=64 | klen=0 klen=1 klen=16 klen=32 |
| len=247 | klen=16 klen=32 klen=0 klen=1 |
| len=256 | klen=32 klen=0 klen=1 klen=16 |
| |
| Cc: David Sterba <dsterba@suse.com> |
| Cc: Eric Biggers <ebiggers@google.com> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/testmgr.c | 24 +++++ |
| crypto/testmgr.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++ |
| 2 files changed, 275 insertions(+) |
| |
| diff --git a/crypto/testmgr.c b/crypto/testmgr.c |
| index 7473c5bc06b1..711390861f71 100644 |
| |
| |
| @@ -4034,6 +4034,30 @@ static const struct alg_test_desc alg_test_descs[] = { |
| .alg = "authenc(hmac(sha512),rfc3686(ctr(aes)))", |
| .test = alg_test_null, |
| .fips_allowed = 1, |
| + }, { |
| + .alg = "blake2s-128", |
| + .test = alg_test_hash, |
| + .suite = { |
| + .hash = __VECS(blakes2s_128_tv_template) |
| + } |
| + }, { |
| + .alg = "blake2s-160", |
| + .test = alg_test_hash, |
| + .suite = { |
| + .hash = __VECS(blakes2s_160_tv_template) |
| + } |
| + }, { |
| + .alg = "blake2s-224", |
| + .test = alg_test_hash, |
| + .suite = { |
| + .hash = __VECS(blakes2s_224_tv_template) |
| + } |
| + }, { |
| + .alg = "blake2s-256", |
| + .test = alg_test_hash, |
| + .suite = { |
| + .hash = __VECS(blakes2s_256_tv_template) |
| + } |
| }, { |
| .alg = "cbc(aes)", |
| .test = alg_test_skcipher, |
| diff --git a/crypto/testmgr.h b/crypto/testmgr.h |
| index ef7d21f39d4a..102fcad54966 100644 |
| |
| |
| @@ -31567,4 +31567,255 @@ static const struct aead_testvec essiv_hmac_sha256_aes_cbc_tv_temp[] = { |
| }, |
| }; |
| |
| +static const char blake2_ordered_sequence[] = |
| + "\x00\x01\x02\x03\x04\x05\x06\x07" |
| + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" |
| + "\x10\x11\x12\x13\x14\x15\x16\x17" |
| + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" |
| + "\x20\x21\x22\x23\x24\x25\x26\x27" |
| + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" |
| + "\x30\x31\x32\x33\x34\x35\x36\x37" |
| + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" |
| + "\x40\x41\x42\x43\x44\x45\x46\x47" |
| + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" |
| + "\x50\x51\x52\x53\x54\x55\x56\x57" |
| + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" |
| + "\x60\x61\x62\x63\x64\x65\x66\x67" |
| + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" |
| + "\x70\x71\x72\x73\x74\x75\x76\x77" |
| + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" |
| + "\x80\x81\x82\x83\x84\x85\x86\x87" |
| + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
| + "\x90\x91\x92\x93\x94\x95\x96\x97" |
| + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
| + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" |
| + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
| + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" |
| + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" |
| + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" |
| + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
| + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" |
| + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" |
| + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" |
| + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" |
| + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" |
| + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
| + |
| +static const struct hash_testvec blakes2s_128_tv_template[] = {{ |
| + .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01, |
| + 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, }, |
| +}, { |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 64, |
| + .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01, |
| + 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 1, |
| + .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82, |
| + 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 7, |
| + .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd, |
| + 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 15, |
| + .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2, |
| + 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 247, |
| + .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe, |
| + 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 256, |
| + .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6, |
| + 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, }, |
| +}}; |
| + |
| +static const struct hash_testvec blakes2s_160_tv_template[] = {{ |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 7, |
| + .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e, |
| + 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62, |
| + 0xe3, 0xf2, 0x84, 0xff, }, |
| +}, { |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 256, |
| + .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2, |
| + 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1, |
| + 0x9b, 0x2d, 0x35, 0x05, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3, |
| + 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1, |
| + 0x79, 0x65, 0x32, 0x93, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 1, |
| + .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71, |
| + 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef, |
| + 0xa2, 0x3a, 0x56, 0x9c, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 15, |
| + .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19, |
| + 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34, |
| + 0x83, 0x39, 0x0f, 0x30, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 64, |
| + .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5, |
| + 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d, |
| + 0xac, 0xa6, 0x81, 0x63, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 247, |
| + .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01, |
| + 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10, |
| + 0x0a, 0xf6, 0x73, 0xe8, }, |
| +}}; |
| + |
| +static const struct hash_testvec blakes2s_224_tv_template[] = {{ |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 1, |
| + .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91, |
| + 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12, |
| + 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc, |
| + 0x48, 0x21, 0x97, 0xbb, }, |
| +}, { |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 247, |
| + .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e, |
| + 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4, |
| + 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc, |
| + 0x2b, 0xa4, 0xd5, 0xf6, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f, |
| + 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3, |
| + 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32, |
| + 0xa7, 0x19, 0xfc, 0xb8, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 7, |
| + .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76, |
| + 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6, |
| + 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8, |
| + 0x7b, 0x45, 0xfe, 0x05, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 15, |
| + .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36, |
| + 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43, |
| + 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e, |
| + 0x25, 0xab, 0xc5, 0x02, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 64, |
| + .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7, |
| + 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c, |
| + 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93, |
| + 0x6a, 0x31, 0x83, 0xb5, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 256, |
| + .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86, |
| + 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2, |
| + 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45, |
| + 0xb3, 0xd7, 0xec, 0xcc, }, |
| +}}; |
| + |
| +static const struct hash_testvec blakes2s_256_tv_template[] = {{ |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 15, |
| + .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21, |
| + 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67, |
| + 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04, |
| + 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b, |
| + 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b, |
| + 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a, |
| + 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 1, |
| + .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03, |
| + 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18, |
| + 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b, |
| + 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 7, |
| + .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03, |
| + 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15, |
| + 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34, |
| + 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, }, |
| +}, { |
| + .ksize = 32, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 64, |
| + .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66, |
| + 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26, |
| + 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab, |
| + 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, }, |
| +}, { |
| + .ksize = 1, |
| + .key = "B", |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 247, |
| + .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84, |
| + 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66, |
| + 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0, |
| + 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, }, |
| +}, { |
| + .ksize = 16, |
| + .key = blake2_ordered_sequence, |
| + .plaintext = blake2_ordered_sequence, |
| + .psize = 256, |
| + .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b, |
| + 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1, |
| + 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed, |
| + 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, }, |
| +}}; |
| + |
| #endif /* _CRYPTO_TESTMGR_H */ |
| -- |
| 2.18.2 |
| |
| |
| From ae1e1578754f22af47a32fde440803ad6f4c96d2 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:30 +0100 |
| Subject: [PATCH 023/100] crypto: blake2s - implement generic shash driver |
| |
| commit 7f9b0880925f1f9d7d59504ea0892d2ae9cfc233 upstream. |
| |
| Wire up our newly added Blake2s implementation via the shash API. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/Kconfig | 18 ++++ |
| crypto/Makefile | 1 + |
| crypto/blake2s_generic.c | 171 ++++++++++++++++++++++++++++++ |
| include/crypto/internal/blake2s.h | 5 + |
| 4 files changed, 195 insertions(+) |
| create mode 100644 crypto/blake2s_generic.c |
| |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 0be5b4092f18..81c8a4059afc 100644 |
| |
| |
| @@ -639,6 +639,24 @@ config CRYPTO_XXHASH |
| xxHash non-cryptographic hash algorithm. Extremely fast, working at |
| speeds close to RAM limits. |
| |
| +config CRYPTO_BLAKE2S |
| + tristate "BLAKE2s digest algorithm" |
| + select CRYPTO_LIB_BLAKE2S_GENERIC |
| + select CRYPTO_HASH |
| + help |
| + Implementation of cryptographic hash function BLAKE2s |
| + optimized for 8-32bit platforms and can produce digests of any size |
| + between 1 to 32. The keyed hash is also implemented. |
| + |
| + This module provides the following algorithms: |
| + |
| + - blake2s-128 |
| + - blake2s-160 |
| + - blake2s-224 |
| + - blake2s-256 |
| + |
| + See https://blake2.net for further information. |
| + |
| config CRYPTO_CRCT10DIF |
| tristate "CRCT10DIF algorithm" |
| select CRYPTO_HASH |
| diff --git a/crypto/Makefile b/crypto/Makefile |
| index aa740c8492b9..fd27edea7c8e 100644 |
| |
| |
| @@ -74,6 +74,7 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o |
| obj-$(CONFIG_CRYPTO_WP512) += wp512.o |
| CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 |
| obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o |
| +obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o |
| obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o |
| obj-$(CONFIG_CRYPTO_ECB) += ecb.o |
| obj-$(CONFIG_CRYPTO_CBC) += cbc.o |
| diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c |
| new file mode 100644 |
| index 000000000000..ed0c74640470 |
| |
| |
| @@ -0,0 +1,171 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include <crypto/internal/blake2s.h> |
| +#include <crypto/internal/simd.h> |
| +#include <crypto/internal/hash.h> |
| + |
| +#include <linux/types.h> |
| +#include <linux/jump_label.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| + |
| +static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, |
| + unsigned int keylen) |
| +{ |
| + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); |
| + |
| + if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { |
| + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
| + return -EINVAL; |
| + } |
| + |
| + memcpy(tctx->key, key, keylen); |
| + tctx->keylen = keylen; |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_blake2s_init(struct shash_desc *desc) |
| +{ |
| + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); |
| + struct blake2s_state *state = shash_desc_ctx(desc); |
| + const int outlen = crypto_shash_digestsize(desc->tfm); |
| + |
| + if (tctx->keylen) |
| + blake2s_init_key(state, outlen, tctx->key, tctx->keylen); |
| + else |
| + blake2s_init(state, outlen); |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, |
| + unsigned int inlen) |
| +{ |
| + struct blake2s_state *state = shash_desc_ctx(desc); |
| + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; |
| + |
| + if (unlikely(!inlen)) |
| + return 0; |
| + if (inlen > fill) { |
| + memcpy(state->buf + state->buflen, in, fill); |
| + blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); |
| + state->buflen = 0; |
| + in += fill; |
| + inlen -= fill; |
| + } |
| + if (inlen > BLAKE2S_BLOCK_SIZE) { |
| + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); |
| + /* Hash one less (full) block than strictly possible */ |
| + blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); |
| + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); |
| + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); |
| + } |
| + memcpy(state->buf + state->buflen, in, inlen); |
| + state->buflen += inlen; |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) |
| +{ |
| + struct blake2s_state *state = shash_desc_ctx(desc); |
| + |
| + blake2s_set_lastblock(state); |
| + memset(state->buf + state->buflen, 0, |
| + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ |
| + blake2s_compress_generic(state, state->buf, 1, state->buflen); |
| + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); |
| + memcpy(out, state->h, state->outlen); |
| + memzero_explicit(state, sizeof(*state)); |
| + |
| + return 0; |
| +} |
| + |
| +static struct shash_alg blake2s_algs[] = {{ |
| + .base.cra_name = "blake2s-128", |
| + .base.cra_driver_name = "blake2s-128-generic", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_128_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}, { |
| + .base.cra_name = "blake2s-160", |
| + .base.cra_driver_name = "blake2s-160-generic", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_160_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}, { |
| + .base.cra_name = "blake2s-224", |
| + .base.cra_driver_name = "blake2s-224-generic", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_224_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}, { |
| + .base.cra_name = "blake2s-256", |
| + .base.cra_driver_name = "blake2s-256-generic", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_256_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}}; |
| + |
| +static int __init blake2s_mod_init(void) |
| +{ |
| + return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); |
| +} |
| + |
| +static void __exit blake2s_mod_exit(void) |
| +{ |
| + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); |
| +} |
| + |
| +subsys_initcall(blake2s_mod_init); |
| +module_exit(blake2s_mod_exit); |
| + |
| +MODULE_ALIAS_CRYPTO("blake2s-128"); |
| +MODULE_ALIAS_CRYPTO("blake2s-128-generic"); |
| +MODULE_ALIAS_CRYPTO("blake2s-160"); |
| +MODULE_ALIAS_CRYPTO("blake2s-160-generic"); |
| +MODULE_ALIAS_CRYPTO("blake2s-224"); |
| +MODULE_ALIAS_CRYPTO("blake2s-224-generic"); |
| +MODULE_ALIAS_CRYPTO("blake2s-256"); |
| +MODULE_ALIAS_CRYPTO("blake2s-256-generic"); |
| +MODULE_LICENSE("GPL v2"); |
| diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h |
| index 941693effc7d..74ff77032e52 100644 |
| |
| |
| @@ -5,6 +5,11 @@ |
| |
| #include <crypto/blake2s.h> |
| |
| +struct blake2s_tfm_ctx { |
| + u8 key[BLAKE2S_KEY_SIZE]; |
| + unsigned int keylen; |
| +}; |
| + |
| void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, |
| size_t nblocks, const u32 inc); |
| |
| -- |
| 2.18.2 |
| |
| |
| From 5d9e7e09d015f1cc5d3ae6d1e4553f11ef79dca1 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:31 +0100 |
| Subject: [PATCH 024/100] crypto: blake2s - x86_64 SIMD implementation |
| |
| commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream. |
| |
| These implementations from Samuel Neves support AVX and AVX-512VL. |
| Originally this used AVX-512F, but Skylake thermal throttling made |
| AVX-512VL more attractive and possible to do with negligable difference. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Samuel Neves <sneves@dei.uc.pt> |
| Co-developed-by: Samuel Neves <sneves@dei.uc.pt> |
| [ardb: move to arch/x86/crypto, wire into lib/crypto framework] |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/Makefile | 2 + |
| arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++ |
| arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++ |
| crypto/Kconfig | 6 + |
| 4 files changed, 499 insertions(+) |
| create mode 100644 arch/x86/crypto/blake2s-core.S |
| create mode 100644 arch/x86/crypto/blake2s-glue.c |
| |
| diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile |
| index 759b1a927826..922c8ecfa00f 100644 |
| |
| |
| @@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes) |
| obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o |
| obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o |
| obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o |
| + obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o |
| endif |
| |
| # These modules require assembler to support AVX2. |
| @@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o |
| aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o |
| |
| nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o |
| +blake2s-x86_64-y := blake2s-core.o blake2s-glue.o |
| |
| ifeq ($(avx_supported),yes) |
| camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ |
| diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S |
| new file mode 100644 |
| index 000000000000..8591938eee26 |
| |
| |
| @@ -0,0 +1,258 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. |
| + */ |
| + |
| +#include <linux/linkage.h> |
| + |
| +.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 |
| +.align 32 |
| +IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 |
| + .octa 0x5BE0CD191F83D9AB9B05688C510E527F |
| +.section .rodata.cst16.ROT16, "aM", @progbits, 16 |
| +.align 16 |
| +ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 |
| +.section .rodata.cst16.ROR328, "aM", @progbits, 16 |
| +.align 16 |
| +ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 |
| +.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 |
| +.align 64 |
| +SIGMA: |
| +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 |
| +.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 |
| +.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 |
| +.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 |
| +.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 |
| +.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 |
| +.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 |
| +.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 |
| +.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 |
| +.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 |
| +#ifdef CONFIG_AS_AVX512 |
| +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 |
| +.align 64 |
| +SIGMA2: |
| +.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 |
| +.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 |
| +.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 |
| +.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 |
| +.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 |
| +.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 |
| +.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 |
| +.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 |
| +.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 |
| +.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 |
| +#endif /* CONFIG_AS_AVX512 */ |
| + |
| +.text |
| +#ifdef CONFIG_AS_SSSE3 |
| +ENTRY(blake2s_compress_ssse3) |
| + testq %rdx,%rdx |
| + je .Lendofloop |
| + movdqu (%rdi),%xmm0 |
| + movdqu 0x10(%rdi),%xmm1 |
| + movdqa ROT16(%rip),%xmm12 |
| + movdqa ROR328(%rip),%xmm13 |
| + movdqu 0x20(%rdi),%xmm14 |
| + movq %rcx,%xmm15 |
| + leaq SIGMA+0xa0(%rip),%r8 |
| + jmp .Lbeginofloop |
| + .align 32 |
| +.Lbeginofloop: |
| + movdqa %xmm0,%xmm10 |
| + movdqa %xmm1,%xmm11 |
| + paddq %xmm15,%xmm14 |
| + movdqa IV(%rip),%xmm2 |
| + movdqa %xmm14,%xmm3 |
| + pxor IV+0x10(%rip),%xmm3 |
| + leaq SIGMA(%rip),%rcx |
| +.Lroundloop: |
| + movzbl (%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm4 |
| + movzbl 0x1(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm5 |
| + movzbl 0x2(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm6 |
| + movzbl 0x3(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm7 |
| + punpckldq %xmm5,%xmm4 |
| + punpckldq %xmm7,%xmm6 |
| + punpcklqdq %xmm6,%xmm4 |
| + paddd %xmm4,%xmm0 |
| + paddd %xmm1,%xmm0 |
| + pxor %xmm0,%xmm3 |
| + pshufb %xmm12,%xmm3 |
| + paddd %xmm3,%xmm2 |
| + pxor %xmm2,%xmm1 |
| + movdqa %xmm1,%xmm8 |
| + psrld $0xc,%xmm1 |
| + pslld $0x14,%xmm8 |
| + por %xmm8,%xmm1 |
| + movzbl 0x4(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm5 |
| + movzbl 0x5(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm6 |
| + movzbl 0x6(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm7 |
| + movzbl 0x7(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm4 |
| + punpckldq %xmm6,%xmm5 |
| + punpckldq %xmm4,%xmm7 |
| + punpcklqdq %xmm7,%xmm5 |
| + paddd %xmm5,%xmm0 |
| + paddd %xmm1,%xmm0 |
| + pxor %xmm0,%xmm3 |
| + pshufb %xmm13,%xmm3 |
| + paddd %xmm3,%xmm2 |
| + pxor %xmm2,%xmm1 |
| + movdqa %xmm1,%xmm8 |
| + psrld $0x7,%xmm1 |
| + pslld $0x19,%xmm8 |
| + por %xmm8,%xmm1 |
| + pshufd $0x93,%xmm0,%xmm0 |
| + pshufd $0x4e,%xmm3,%xmm3 |
| + pshufd $0x39,%xmm2,%xmm2 |
| + movzbl 0x8(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm6 |
| + movzbl 0x9(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm7 |
| + movzbl 0xa(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm4 |
| + movzbl 0xb(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm5 |
| + punpckldq %xmm7,%xmm6 |
| + punpckldq %xmm5,%xmm4 |
| + punpcklqdq %xmm4,%xmm6 |
| + paddd %xmm6,%xmm0 |
| + paddd %xmm1,%xmm0 |
| + pxor %xmm0,%xmm3 |
| + pshufb %xmm12,%xmm3 |
| + paddd %xmm3,%xmm2 |
| + pxor %xmm2,%xmm1 |
| + movdqa %xmm1,%xmm8 |
| + psrld $0xc,%xmm1 |
| + pslld $0x14,%xmm8 |
| + por %xmm8,%xmm1 |
| + movzbl 0xc(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm7 |
| + movzbl 0xd(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm4 |
| + movzbl 0xe(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm5 |
| + movzbl 0xf(%rcx),%eax |
| + movd (%rsi,%rax,4),%xmm6 |
| + punpckldq %xmm4,%xmm7 |
| + punpckldq %xmm6,%xmm5 |
| + punpcklqdq %xmm5,%xmm7 |
| + paddd %xmm7,%xmm0 |
| + paddd %xmm1,%xmm0 |
| + pxor %xmm0,%xmm3 |
| + pshufb %xmm13,%xmm3 |
| + paddd %xmm3,%xmm2 |
| + pxor %xmm2,%xmm1 |
| + movdqa %xmm1,%xmm8 |
| + psrld $0x7,%xmm1 |
| + pslld $0x19,%xmm8 |
| + por %xmm8,%xmm1 |
| + pshufd $0x39,%xmm0,%xmm0 |
| + pshufd $0x4e,%xmm3,%xmm3 |
| + pshufd $0x93,%xmm2,%xmm2 |
| + addq $0x10,%rcx |
| + cmpq %r8,%rcx |
| + jnz .Lroundloop |
| + pxor %xmm2,%xmm0 |
| + pxor %xmm3,%xmm1 |
| + pxor %xmm10,%xmm0 |
| + pxor %xmm11,%xmm1 |
| + addq $0x40,%rsi |
| + decq %rdx |
| + jnz .Lbeginofloop |
| + movdqu %xmm0,(%rdi) |
| + movdqu %xmm1,0x10(%rdi) |
| + movdqu %xmm14,0x20(%rdi) |
| +.Lendofloop: |
| + ret |
| +ENDPROC(blake2s_compress_ssse3) |
| +#endif /* CONFIG_AS_SSSE3 */ |
| + |
| +#ifdef CONFIG_AS_AVX512 |
| +ENTRY(blake2s_compress_avx512) |
| + vmovdqu (%rdi),%xmm0 |
| + vmovdqu 0x10(%rdi),%xmm1 |
| + vmovdqu 0x20(%rdi),%xmm4 |
| + vmovq %rcx,%xmm5 |
| + vmovdqa IV(%rip),%xmm14 |
| + vmovdqa IV+16(%rip),%xmm15 |
| + jmp .Lblake2s_compress_avx512_mainloop |
| +.align 32 |
| +.Lblake2s_compress_avx512_mainloop: |
| + vmovdqa %xmm0,%xmm10 |
| + vmovdqa %xmm1,%xmm11 |
| + vpaddq %xmm5,%xmm4,%xmm4 |
| + vmovdqa %xmm14,%xmm2 |
| + vpxor %xmm15,%xmm4,%xmm3 |
| + vmovdqu (%rsi),%ymm6 |
| + vmovdqu 0x20(%rsi),%ymm7 |
| + addq $0x40,%rsi |
| + leaq SIGMA2(%rip),%rax |
| + movb $0xa,%cl |
| +.Lblake2s_compress_avx512_roundloop: |
| + addq $0x40,%rax |
| + vmovdqa -0x40(%rax),%ymm8 |
| + vmovdqa -0x20(%rax),%ymm9 |
| + vpermi2d %ymm7,%ymm6,%ymm8 |
| + vpermi2d %ymm7,%ymm6,%ymm9 |
| + vmovdqa %ymm8,%ymm6 |
| + vmovdqa %ymm9,%ymm7 |
| + vpaddd %xmm8,%xmm0,%xmm0 |
| + vpaddd %xmm1,%xmm0,%xmm0 |
| + vpxor %xmm0,%xmm3,%xmm3 |
| + vprord $0x10,%xmm3,%xmm3 |
| + vpaddd %xmm3,%xmm2,%xmm2 |
| + vpxor %xmm2,%xmm1,%xmm1 |
| + vprord $0xc,%xmm1,%xmm1 |
| + vextracti128 $0x1,%ymm8,%xmm8 |
| + vpaddd %xmm8,%xmm0,%xmm0 |
| + vpaddd %xmm1,%xmm0,%xmm0 |
| + vpxor %xmm0,%xmm3,%xmm3 |
| + vprord $0x8,%xmm3,%xmm3 |
| + vpaddd %xmm3,%xmm2,%xmm2 |
| + vpxor %xmm2,%xmm1,%xmm1 |
| + vprord $0x7,%xmm1,%xmm1 |
| + vpshufd $0x93,%xmm0,%xmm0 |
| + vpshufd $0x4e,%xmm3,%xmm3 |
| + vpshufd $0x39,%xmm2,%xmm2 |
| + vpaddd %xmm9,%xmm0,%xmm0 |
| + vpaddd %xmm1,%xmm0,%xmm0 |
| + vpxor %xmm0,%xmm3,%xmm3 |
| + vprord $0x10,%xmm3,%xmm3 |
| + vpaddd %xmm3,%xmm2,%xmm2 |
| + vpxor %xmm2,%xmm1,%xmm1 |
| + vprord $0xc,%xmm1,%xmm1 |
| + vextracti128 $0x1,%ymm9,%xmm9 |
| + vpaddd %xmm9,%xmm0,%xmm0 |
| + vpaddd %xmm1,%xmm0,%xmm0 |
| + vpxor %xmm0,%xmm3,%xmm3 |
| + vprord $0x8,%xmm3,%xmm3 |
| + vpaddd %xmm3,%xmm2,%xmm2 |
| + vpxor %xmm2,%xmm1,%xmm1 |
| + vprord $0x7,%xmm1,%xmm1 |
| + vpshufd $0x39,%xmm0,%xmm0 |
| + vpshufd $0x4e,%xmm3,%xmm3 |
| + vpshufd $0x93,%xmm2,%xmm2 |
| + decb %cl |
| + jne .Lblake2s_compress_avx512_roundloop |
| + vpxor %xmm10,%xmm0,%xmm0 |
| + vpxor %xmm11,%xmm1,%xmm1 |
| + vpxor %xmm2,%xmm0,%xmm0 |
| + vpxor %xmm3,%xmm1,%xmm1 |
| + decq %rdx |
| + jne .Lblake2s_compress_avx512_mainloop |
| + vmovdqu %xmm0,(%rdi) |
| + vmovdqu %xmm1,0x10(%rdi) |
| + vmovdqu %xmm4,0x20(%rdi) |
| + vzeroupper |
| + retq |
| +ENDPROC(blake2s_compress_avx512) |
| +#endif /* CONFIG_AS_AVX512 */ |
| diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c |
| new file mode 100644 |
| index 000000000000..4a37ba7cdbe5 |
| |
| |
| @@ -0,0 +1,233 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include <crypto/internal/blake2s.h> |
| +#include <crypto/internal/simd.h> |
| +#include <crypto/internal/hash.h> |
| + |
| +#include <linux/types.h> |
| +#include <linux/jump_label.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| + |
| +#include <asm/cpufeature.h> |
| +#include <asm/fpu/api.h> |
| +#include <asm/processor.h> |
| +#include <asm/simd.h> |
| + |
| +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, |
| + const u8 *block, const size_t nblocks, |
| + const u32 inc); |
| +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, |
| + const u8 *block, const size_t nblocks, |
| + const u32 inc); |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); |
| + |
| +void blake2s_compress_arch(struct blake2s_state *state, |
| + const u8 *block, size_t nblocks, |
| + const u32 inc) |
| +{ |
| + /* SIMD disables preemption, so relax after processing each page. */ |
| + BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); |
| + |
| + if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { |
| + blake2s_compress_generic(state, block, nblocks, inc); |
| + return; |
| + } |
| + |
| + for (;;) { |
| + const size_t blocks = min_t(size_t, nblocks, |
| + PAGE_SIZE / BLAKE2S_BLOCK_SIZE); |
| + |
| + kernel_fpu_begin(); |
| + if (IS_ENABLED(CONFIG_AS_AVX512) && |
| + static_branch_likely(&blake2s_use_avx512)) |
| + blake2s_compress_avx512(state, block, blocks, inc); |
| + else |
| + blake2s_compress_ssse3(state, block, blocks, inc); |
| + kernel_fpu_end(); |
| + |
| + nblocks -= blocks; |
| + if (!nblocks) |
| + break; |
| + block += blocks * BLAKE2S_BLOCK_SIZE; |
| + } |
| +} |
| +EXPORT_SYMBOL(blake2s_compress_arch); |
| + |
| +static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, |
| + unsigned int keylen) |
| +{ |
| + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); |
| + |
| + if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { |
| + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
| + return -EINVAL; |
| + } |
| + |
| + memcpy(tctx->key, key, keylen); |
| + tctx->keylen = keylen; |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_blake2s_init(struct shash_desc *desc) |
| +{ |
| + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); |
| + struct blake2s_state *state = shash_desc_ctx(desc); |
| + const int outlen = crypto_shash_digestsize(desc->tfm); |
| + |
| + if (tctx->keylen) |
| + blake2s_init_key(state, outlen, tctx->key, tctx->keylen); |
| + else |
| + blake2s_init(state, outlen); |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, |
| + unsigned int inlen) |
| +{ |
| + struct blake2s_state *state = shash_desc_ctx(desc); |
| + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; |
| + |
| + if (unlikely(!inlen)) |
| + return 0; |
| + if (inlen > fill) { |
| + memcpy(state->buf + state->buflen, in, fill); |
| + blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); |
| + state->buflen = 0; |
| + in += fill; |
| + inlen -= fill; |
| + } |
| + if (inlen > BLAKE2S_BLOCK_SIZE) { |
| + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); |
| + /* Hash one less (full) block than strictly possible */ |
| + blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); |
| + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); |
| + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); |
| + } |
| + memcpy(state->buf + state->buflen, in, inlen); |
| + state->buflen += inlen; |
| + |
| + return 0; |
| +} |
| + |
| +static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) |
| +{ |
| + struct blake2s_state *state = shash_desc_ctx(desc); |
| + |
| + blake2s_set_lastblock(state); |
| + memset(state->buf + state->buflen, 0, |
| + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ |
| + blake2s_compress_arch(state, state->buf, 1, state->buflen); |
| + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); |
| + memcpy(out, state->h, state->outlen); |
| + memzero_explicit(state, sizeof(*state)); |
| + |
| + return 0; |
| +} |
| + |
| +static struct shash_alg blake2s_algs[] = {{ |
| + .base.cra_name = "blake2s-128", |
| + .base.cra_driver_name = "blake2s-128-x86", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_128_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}, { |
| + .base.cra_name = "blake2s-160", |
| + .base.cra_driver_name = "blake2s-160-x86", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_160_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}, { |
| + .base.cra_name = "blake2s-224", |
| + .base.cra_driver_name = "blake2s-224-x86", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_224_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}, { |
| + .base.cra_name = "blake2s-256", |
| + .base.cra_driver_name = "blake2s-256-x86", |
| + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, |
| + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), |
| + .base.cra_priority = 200, |
| + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, |
| + .base.cra_module = THIS_MODULE, |
| + |
| + .digestsize = BLAKE2S_256_HASH_SIZE, |
| + .setkey = crypto_blake2s_setkey, |
| + .init = crypto_blake2s_init, |
| + .update = crypto_blake2s_update, |
| + .final = crypto_blake2s_final, |
| + .descsize = sizeof(struct blake2s_state), |
| +}}; |
| + |
| +static int __init blake2s_mod_init(void) |
| +{ |
| + if (!boot_cpu_has(X86_FEATURE_SSSE3)) |
| + return 0; |
| + |
| + static_branch_enable(&blake2s_use_ssse3); |
| + |
| + if (IS_ENABLED(CONFIG_AS_AVX512) && |
| + boot_cpu_has(X86_FEATURE_AVX) && |
| + boot_cpu_has(X86_FEATURE_AVX2) && |
| + boot_cpu_has(X86_FEATURE_AVX512F) && |
| + boot_cpu_has(X86_FEATURE_AVX512VL) && |
| + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | |
| + XFEATURE_MASK_AVX512, NULL)) |
| + static_branch_enable(&blake2s_use_avx512); |
| + |
| + return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); |
| +} |
| + |
| +static void __exit blake2s_mod_exit(void) |
| +{ |
| + if (boot_cpu_has(X86_FEATURE_SSSE3)) |
| + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); |
| +} |
| + |
| +module_init(blake2s_mod_init); |
| +module_exit(blake2s_mod_exit); |
| + |
| +MODULE_ALIAS_CRYPTO("blake2s-128"); |
| +MODULE_ALIAS_CRYPTO("blake2s-128-x86"); |
| +MODULE_ALIAS_CRYPTO("blake2s-160"); |
| +MODULE_ALIAS_CRYPTO("blake2s-160-x86"); |
| +MODULE_ALIAS_CRYPTO("blake2s-224"); |
| +MODULE_ALIAS_CRYPTO("blake2s-224-x86"); |
| +MODULE_ALIAS_CRYPTO("blake2s-256"); |
| +MODULE_ALIAS_CRYPTO("blake2s-256-x86"); |
| +MODULE_LICENSE("GPL v2"); |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 81c8a4059afc..8fd3954bf64c 100644 |
| |
| |
| @@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S |
| |
| See https://blake2.net for further information. |
| |
| +config CRYPTO_BLAKE2S_X86 |
| + tristate "BLAKE2s digest algorithm (x86 accelerated version)" |
| + depends on X86 && 64BIT |
| + select CRYPTO_LIB_BLAKE2S_GENERIC |
| + select CRYPTO_ARCH_HAVE_LIB_BLAKE2S |
| + |
| config CRYPTO_CRCT10DIF |
| tristate "CRCT10DIF algorithm" |
| select CRYPTO_HASH |
| -- |
| 2.18.2 |
| |
| |
| From 854d7c4e760cb5841345ca2b90a451dedf784deb Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:32 +0100 |
| Subject: [PATCH 025/100] crypto: curve25519 - generic C library |
| implementations |
| |
| commit 0ed42a6f431e930b2e8fae21955406e09fe75d70 upstream. |
| |
| This contains two formally verified C implementations of the Curve25519 |
| scalar multiplication function, one for 32-bit systems, and one for |
| 64-bit systems whose compiler supports efficient 128-bit integer types. |
| Not only are these implementations formally verified, but they are also |
| the fastest available C implementations. They have been modified to be |
| friendly to kernel space and to be generally less horrendous looking, |
| but still an effort has been made to retain their formally verified |
| characteristic, and so the C might look slightly unidiomatic. |
| |
| The 64-bit version comes from HACL*: https://github.com/project-everest/hacl-star |
| The 32-bit version comes from Fiat: https://github.com/mit-plv/fiat-crypto |
| |
| Information: https://cr.yp.to/ecdh.html |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| [ardb: - move from lib/zinc to lib/crypto |
| - replace .c #includes with Kconfig based object selection |
| - drop simd handling and simplify support for per-arch versions ] |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/crypto/curve25519.h | 71 +++ |
| lib/crypto/Kconfig | 25 + |
| lib/crypto/Makefile | 5 + |
| lib/crypto/curve25519-fiat32.c | 864 +++++++++++++++++++++++++++++++++ |
| lib/crypto/curve25519-hacl64.c | 788 ++++++++++++++++++++++++++++++ |
| lib/crypto/curve25519.c | 25 + |
| 6 files changed, 1778 insertions(+) |
| create mode 100644 include/crypto/curve25519.h |
| create mode 100644 lib/crypto/curve25519-fiat32.c |
| create mode 100644 lib/crypto/curve25519-hacl64.c |
| create mode 100644 lib/crypto/curve25519.c |
| |
| diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h |
| new file mode 100644 |
| index 000000000000..4e6dc840b159 |
| |
| |
| @@ -0,0 +1,71 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef CURVE25519_H |
| +#define CURVE25519_H |
| + |
| +#include <crypto/algapi.h> // For crypto_memneq. |
| +#include <linux/types.h> |
| +#include <linux/random.h> |
| + |
| +enum curve25519_lengths { |
| + CURVE25519_KEY_SIZE = 32 |
| +}; |
| + |
| +extern const u8 curve25519_null_point[]; |
| +extern const u8 curve25519_base_point[]; |
| + |
| +void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], |
| + const u8 scalar[CURVE25519_KEY_SIZE], |
| + const u8 point[CURVE25519_KEY_SIZE]); |
| + |
| +void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], |
| + const u8 scalar[CURVE25519_KEY_SIZE], |
| + const u8 point[CURVE25519_KEY_SIZE]); |
| + |
| +void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE]); |
| + |
| +static inline |
| +bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE], |
| + const u8 basepoint[CURVE25519_KEY_SIZE]) |
| +{ |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) |
| + curve25519_arch(mypublic, secret, basepoint); |
| + else |
| + curve25519_generic(mypublic, secret, basepoint); |
| + return crypto_memneq(mypublic, curve25519_null_point, |
| + CURVE25519_KEY_SIZE); |
| +} |
| + |
| +static inline bool |
| +__must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE]) |
| +{ |
| + if (unlikely(!crypto_memneq(secret, curve25519_null_point, |
| + CURVE25519_KEY_SIZE))) |
| + return false; |
| + |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) |
| + curve25519_base_arch(pub, secret); |
| + else |
| + curve25519_generic(pub, secret, curve25519_base_point); |
| + return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); |
| +} |
| + |
| +static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE]) |
| +{ |
| + secret[0] &= 248; |
| + secret[31] = (secret[31] & 127) | 64; |
| +} |
| + |
| +static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]) |
| +{ |
| + get_random_bytes_wait(secret, CURVE25519_KEY_SIZE); |
| + curve25519_clamp_secret(secret); |
| +} |
| + |
| +#endif /* CURVE25519_H */ |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 7ad98b624e55..b1d830dc1c9e 100644 |
| |
| |
| @@ -59,6 +59,31 @@ config CRYPTO_LIB_CHACHA |
| by either the generic implementation or an arch-specific one, if one |
| is available and enabled. |
| |
| +config CRYPTO_ARCH_HAVE_LIB_CURVE25519 |
| + tristate |
| + help |
| + Declares whether the architecture provides an arch-specific |
| + accelerated implementation of the Curve25519 library interface, |
| + either builtin or as a module. |
| + |
| +config CRYPTO_LIB_CURVE25519_GENERIC |
| + tristate |
| + help |
| + This symbol can be depended upon by arch implementations of the |
| + Curve25519 library interface that require the generic code as a |
| + fallback, e.g., for SIMD implementations. If no arch specific |
| + implementation is enabled, this implementation serves the users |
| + of CRYPTO_LIB_CURVE25519. |
| + |
| +config CRYPTO_LIB_CURVE25519 |
| + tristate "Curve25519 scalar multiplication library" |
| + depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 |
| + select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n |
| + help |
| + Enable the Curve25519 library interface. This interface may be |
| + fulfilled by either the generic implementation or an arch-specific |
| + one, if one is available and enabled. |
| + |
| config CRYPTO_LIB_DES |
| tristate |
| |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index 8ca66b5f9807..273c55d5e147 100644 |
| |
| |
| @@ -16,6 +16,11 @@ libblake2s-generic-y += blake2s-generic.o |
| obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o |
| libblake2s-y += blake2s.o |
| |
| +obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o |
| +libcurve25519-y := curve25519-fiat32.o |
| +libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o |
| +libcurve25519-y += curve25519.o |
| + |
| obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| libdes-y := des.o |
| |
| diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c |
| new file mode 100644 |
| index 000000000000..1c455207341d |
| |
| |
| @@ -0,0 +1,864 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2016 The fiat-crypto Authors. |
| + * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is a machine-generated formally verified implementation of Curve25519 |
| + * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally |
| + * machine generated, it has been tweaked to be suitable for use in the kernel. |
| + * It is optimized for 32-bit machines and machines that cannot work efficiently |
| + * with 128-bit integer types. |
| + */ |
| + |
| +#include <asm/unaligned.h> |
| +#include <crypto/curve25519.h> |
| +#include <linux/string.h> |
| + |
| +/* fe means field element. Here the field is \Z/(2^255-19). An element t, |
| + * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 |
| + * t[3]+2^102 t[4]+...+2^230 t[9]. |
| + * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc. |
| + * Multiplication and carrying produce fe from fe_loose. |
| + */ |
| +typedef struct fe { u32 v[10]; } fe; |
| + |
| +/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc |
| + * Addition and subtraction produce fe_loose from (fe, fe). |
| + */ |
| +typedef struct fe_loose { u32 v[10]; } fe_loose; |
| + |
| +static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s) |
| +{ |
| + /* Ignores top bit of s. */ |
| + u32 a0 = get_unaligned_le32(s); |
| + u32 a1 = get_unaligned_le32(s+4); |
| + u32 a2 = get_unaligned_le32(s+8); |
| + u32 a3 = get_unaligned_le32(s+12); |
| + u32 a4 = get_unaligned_le32(s+16); |
| + u32 a5 = get_unaligned_le32(s+20); |
| + u32 a6 = get_unaligned_le32(s+24); |
| + u32 a7 = get_unaligned_le32(s+28); |
| + h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */ |
| + h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */ |
| + h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */ |
| + h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */ |
| + h[4] = (a3>> 6); /* (32- 6) = 26 */ |
| + h[5] = a4&((1<<25)-1); /* 25 */ |
| + h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */ |
| + h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */ |
| + h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */ |
| + h[9] = (a7>> 6)&((1<<25)-1); /* 25 */ |
| +} |
| + |
| +static __always_inline void fe_frombytes(fe *h, const u8 *s) |
| +{ |
| + fe_frombytes_impl(h->v, s); |
| +} |
| + |
| +static __always_inline u8 /*bool*/ |
| +addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low) |
| +{ |
| + /* This function extracts 25 bits of result and 1 bit of carry |
| + * (26 total), so a 32-bit intermediate is sufficient. |
| + */ |
| + u32 x = a + b + c; |
| + *low = x & ((1 << 25) - 1); |
| + return (x >> 25) & 1; |
| +} |
| + |
| +static __always_inline u8 /*bool*/ |
| +addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low) |
| +{ |
| + /* This function extracts 26 bits of result and 1 bit of carry |
| + * (27 total), so a 32-bit intermediate is sufficient. |
| + */ |
| + u32 x = a + b + c; |
| + *low = x & ((1 << 26) - 1); |
| + return (x >> 26) & 1; |
| +} |
| + |
| +static __always_inline u8 /*bool*/ |
| +subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low) |
| +{ |
| + /* This function extracts 25 bits of result and 1 bit of borrow |
| + * (26 total), so a 32-bit intermediate is sufficient. |
| + */ |
| + u32 x = a - b - c; |
| + *low = x & ((1 << 25) - 1); |
| + return x >> 31; |
| +} |
| + |
| +static __always_inline u8 /*bool*/ |
| +subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low) |
| +{ |
| + /* This function extracts 26 bits of result and 1 bit of borrow |
| + *(27 total), so a 32-bit intermediate is sufficient. |
| + */ |
| + u32 x = a - b - c; |
| + *low = x & ((1 << 26) - 1); |
| + return x >> 31; |
| +} |
| + |
| +static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz) |
| +{ |
| + t = -!!t; /* all set if nonzero, 0 if 0 */ |
| + return (t&nz) | ((~t)&z); |
| +} |
| + |
| +static __always_inline void fe_freeze(u32 out[10], const u32 in1[10]) |
| +{ |
| + { const u32 x17 = in1[9]; |
| + { const u32 x18 = in1[8]; |
| + { const u32 x16 = in1[7]; |
| + { const u32 x14 = in1[6]; |
| + { const u32 x12 = in1[5]; |
| + { const u32 x10 = in1[4]; |
| + { const u32 x8 = in1[3]; |
| + { const u32 x6 = in1[2]; |
| + { const u32 x4 = in1[1]; |
| + { const u32 x2 = in1[0]; |
| + { u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20); |
| + { u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23); |
| + { u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26); |
| + { u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29); |
| + { u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32); |
| + { u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35); |
| + { u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38); |
| + { u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41); |
| + { u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44); |
| + { u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47); |
| + { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff); |
| + { u32 x50 = (x49 & 0x3ffffed); |
| + { u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52); |
| + { u32 x54 = (x49 & 0x1ffffff); |
| + { u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56); |
| + { u32 x58 = (x49 & 0x3ffffff); |
| + { u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60); |
| + { u32 x62 = (x49 & 0x1ffffff); |
| + { u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64); |
| + { u32 x66 = (x49 & 0x3ffffff); |
| + { u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68); |
| + { u32 x70 = (x49 & 0x1ffffff); |
| + { u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72); |
| + { u32 x74 = (x49 & 0x3ffffff); |
| + { u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76); |
| + { u32 x78 = (x49 & 0x1ffffff); |
| + { u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80); |
| + { u32 x82 = (x49 & 0x3ffffff); |
| + { u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84); |
| + { u32 x86 = (x49 & 0x1ffffff); |
| + { u32 x88; addcarryx_u25(x85, x47, x86, &x88); |
| + out[0] = x52; |
| + out[1] = x56; |
| + out[2] = x60; |
| + out[3] = x64; |
| + out[4] = x68; |
| + out[5] = x72; |
| + out[6] = x76; |
| + out[7] = x80; |
| + out[8] = x84; |
| + out[9] = x88; |
| + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} |
| +} |
| + |
| +static __always_inline void fe_tobytes(u8 s[32], const fe *f) |
| +{ |
| + u32 h[10]; |
| + fe_freeze(h, f->v); |
| + s[0] = h[0] >> 0; |
| + s[1] = h[0] >> 8; |
| + s[2] = h[0] >> 16; |
| + s[3] = (h[0] >> 24) | (h[1] << 2); |
| + s[4] = h[1] >> 6; |
| + s[5] = h[1] >> 14; |
| + s[6] = (h[1] >> 22) | (h[2] << 3); |
| + s[7] = h[2] >> 5; |
| + s[8] = h[2] >> 13; |
| + s[9] = (h[2] >> 21) | (h[3] << 5); |
| + s[10] = h[3] >> 3; |
| + s[11] = h[3] >> 11; |
| + s[12] = (h[3] >> 19) | (h[4] << 6); |
| + s[13] = h[4] >> 2; |
| + s[14] = h[4] >> 10; |
| + s[15] = h[4] >> 18; |
| + s[16] = h[5] >> 0; |
| + s[17] = h[5] >> 8; |
| + s[18] = h[5] >> 16; |
| + s[19] = (h[5] >> 24) | (h[6] << 1); |
| + s[20] = h[6] >> 7; |
| + s[21] = h[6] >> 15; |
| + s[22] = (h[6] >> 23) | (h[7] << 3); |
| + s[23] = h[7] >> 5; |
| + s[24] = h[7] >> 13; |
| + s[25] = (h[7] >> 21) | (h[8] << 4); |
| + s[26] = h[8] >> 4; |
| + s[27] = h[8] >> 12; |
| + s[28] = (h[8] >> 20) | (h[9] << 6); |
| + s[29] = h[9] >> 2; |
| + s[30] = h[9] >> 10; |
| + s[31] = h[9] >> 18; |
| +} |
| + |
| +/* h = f */ |
| +static __always_inline void fe_copy(fe *h, const fe *f) |
| +{ |
| + memmove(h, f, sizeof(u32) * 10); |
| +} |
| + |
| +static __always_inline void fe_copy_lt(fe_loose *h, const fe *f) |
| +{ |
| + memmove(h, f, sizeof(u32) * 10); |
| +} |
| + |
| +/* h = 0 */ |
| +static __always_inline void fe_0(fe *h) |
| +{ |
| + memset(h, 0, sizeof(u32) * 10); |
| +} |
| + |
| +/* h = 1 */ |
| +static __always_inline void fe_1(fe *h) |
| +{ |
| + memset(h, 0, sizeof(u32) * 10); |
| + h->v[0] = 1; |
| +} |
| + |
| +static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| +{ |
| + { const u32 x20 = in1[9]; |
| + { const u32 x21 = in1[8]; |
| + { const u32 x19 = in1[7]; |
| + { const u32 x17 = in1[6]; |
| + { const u32 x15 = in1[5]; |
| + { const u32 x13 = in1[4]; |
| + { const u32 x11 = in1[3]; |
| + { const u32 x9 = in1[2]; |
| + { const u32 x7 = in1[1]; |
| + { const u32 x5 = in1[0]; |
| + { const u32 x38 = in2[9]; |
| + { const u32 x39 = in2[8]; |
| + { const u32 x37 = in2[7]; |
| + { const u32 x35 = in2[6]; |
| + { const u32 x33 = in2[5]; |
| + { const u32 x31 = in2[4]; |
| + { const u32 x29 = in2[3]; |
| + { const u32 x27 = in2[2]; |
| + { const u32 x25 = in2[1]; |
| + { const u32 x23 = in2[0]; |
| + out[0] = (x5 + x23); |
| + out[1] = (x7 + x25); |
| + out[2] = (x9 + x27); |
| + out[3] = (x11 + x29); |
| + out[4] = (x13 + x31); |
| + out[5] = (x15 + x33); |
| + out[6] = (x17 + x35); |
| + out[7] = (x19 + x37); |
| + out[8] = (x21 + x39); |
| + out[9] = (x20 + x38); |
| + }}}}}}}}}}}}}}}}}}}} |
| +} |
| + |
| +/* h = f + g |
| + * Can overlap h with f or g. |
| + */ |
| +static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) |
| +{ |
| + fe_add_impl(h->v, f->v, g->v); |
| +} |
| + |
| +static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| +{ |
| + { const u32 x20 = in1[9]; |
| + { const u32 x21 = in1[8]; |
| + { const u32 x19 = in1[7]; |
| + { const u32 x17 = in1[6]; |
| + { const u32 x15 = in1[5]; |
| + { const u32 x13 = in1[4]; |
| + { const u32 x11 = in1[3]; |
| + { const u32 x9 = in1[2]; |
| + { const u32 x7 = in1[1]; |
| + { const u32 x5 = in1[0]; |
| + { const u32 x38 = in2[9]; |
| + { const u32 x39 = in2[8]; |
| + { const u32 x37 = in2[7]; |
| + { const u32 x35 = in2[6]; |
| + { const u32 x33 = in2[5]; |
| + { const u32 x31 = in2[4]; |
| + { const u32 x29 = in2[3]; |
| + { const u32 x27 = in2[2]; |
| + { const u32 x25 = in2[1]; |
| + { const u32 x23 = in2[0]; |
| + out[0] = ((0x7ffffda + x5) - x23); |
| + out[1] = ((0x3fffffe + x7) - x25); |
| + out[2] = ((0x7fffffe + x9) - x27); |
| + out[3] = ((0x3fffffe + x11) - x29); |
| + out[4] = ((0x7fffffe + x13) - x31); |
| + out[5] = ((0x3fffffe + x15) - x33); |
| + out[6] = ((0x7fffffe + x17) - x35); |
| + out[7] = ((0x3fffffe + x19) - x37); |
| + out[8] = ((0x7fffffe + x21) - x39); |
| + out[9] = ((0x3fffffe + x20) - x38); |
| + }}}}}}}}}}}}}}}}}}}} |
| +} |
| + |
| +/* h = f - g |
| + * Can overlap h with f or g. |
| + */ |
| +static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) |
| +{ |
| + fe_sub_impl(h->v, f->v, g->v); |
| +} |
| + |
| +static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| +{ |
| + { const u32 x20 = in1[9]; |
| + { const u32 x21 = in1[8]; |
| + { const u32 x19 = in1[7]; |
| + { const u32 x17 = in1[6]; |
| + { const u32 x15 = in1[5]; |
| + { const u32 x13 = in1[4]; |
| + { const u32 x11 = in1[3]; |
| + { const u32 x9 = in1[2]; |
| + { const u32 x7 = in1[1]; |
| + { const u32 x5 = in1[0]; |
| + { const u32 x38 = in2[9]; |
| + { const u32 x39 = in2[8]; |
| + { const u32 x37 = in2[7]; |
| + { const u32 x35 = in2[6]; |
| + { const u32 x33 = in2[5]; |
| + { const u32 x31 = in2[4]; |
| + { const u32 x29 = in2[3]; |
| + { const u32 x27 = in2[2]; |
| + { const u32 x25 = in2[1]; |
| + { const u32 x23 = in2[0]; |
| + { u64 x40 = ((u64)x23 * x5); |
| + { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5)); |
| + { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5)); |
| + { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5)); |
| + { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5)); |
| + { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5)); |
| + { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5)); |
| + { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5)); |
| + { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5)); |
| + { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5)); |
| + { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9)); |
| + { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9)); |
| + { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13)); |
| + { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13)); |
| + { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17)); |
| + { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17)); |
| + { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19)))); |
| + { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21)); |
| + { u64 x58 = ((u64)(0x2 * x38) * x20); |
| + { u64 x59 = (x48 + (x58 << 0x4)); |
| + { u64 x60 = (x59 + (x58 << 0x1)); |
| + { u64 x61 = (x60 + x58); |
| + { u64 x62 = (x47 + (x57 << 0x4)); |
| + { u64 x63 = (x62 + (x57 << 0x1)); |
| + { u64 x64 = (x63 + x57); |
| + { u64 x65 = (x46 + (x56 << 0x4)); |
| + { u64 x66 = (x65 + (x56 << 0x1)); |
| + { u64 x67 = (x66 + x56); |
| + { u64 x68 = (x45 + (x55 << 0x4)); |
| + { u64 x69 = (x68 + (x55 << 0x1)); |
| + { u64 x70 = (x69 + x55); |
| + { u64 x71 = (x44 + (x54 << 0x4)); |
| + { u64 x72 = (x71 + (x54 << 0x1)); |
| + { u64 x73 = (x72 + x54); |
| + { u64 x74 = (x43 + (x53 << 0x4)); |
| + { u64 x75 = (x74 + (x53 << 0x1)); |
| + { u64 x76 = (x75 + x53); |
| + { u64 x77 = (x42 + (x52 << 0x4)); |
| + { u64 x78 = (x77 + (x52 << 0x1)); |
| + { u64 x79 = (x78 + x52); |
| + { u64 x80 = (x41 + (x51 << 0x4)); |
| + { u64 x81 = (x80 + (x51 << 0x1)); |
| + { u64 x82 = (x81 + x51); |
| + { u64 x83 = (x40 + (x50 << 0x4)); |
| + { u64 x84 = (x83 + (x50 << 0x1)); |
| + { u64 x85 = (x84 + x50); |
| + { u64 x86 = (x85 >> 0x1a); |
| + { u32 x87 = ((u32)x85 & 0x3ffffff); |
| + { u64 x88 = (x86 + x82); |
| + { u64 x89 = (x88 >> 0x19); |
| + { u32 x90 = ((u32)x88 & 0x1ffffff); |
| + { u64 x91 = (x89 + x79); |
| + { u64 x92 = (x91 >> 0x1a); |
| + { u32 x93 = ((u32)x91 & 0x3ffffff); |
| + { u64 x94 = (x92 + x76); |
| + { u64 x95 = (x94 >> 0x19); |
| + { u32 x96 = ((u32)x94 & 0x1ffffff); |
| + { u64 x97 = (x95 + x73); |
| + { u64 x98 = (x97 >> 0x1a); |
| + { u32 x99 = ((u32)x97 & 0x3ffffff); |
| + { u64 x100 = (x98 + x70); |
| + { u64 x101 = (x100 >> 0x19); |
| + { u32 x102 = ((u32)x100 & 0x1ffffff); |
| + { u64 x103 = (x101 + x67); |
| + { u64 x104 = (x103 >> 0x1a); |
| + { u32 x105 = ((u32)x103 & 0x3ffffff); |
| + { u64 x106 = (x104 + x64); |
| + { u64 x107 = (x106 >> 0x19); |
| + { u32 x108 = ((u32)x106 & 0x1ffffff); |
| + { u64 x109 = (x107 + x61); |
| + { u64 x110 = (x109 >> 0x1a); |
| + { u32 x111 = ((u32)x109 & 0x3ffffff); |
| + { u64 x112 = (x110 + x49); |
| + { u64 x113 = (x112 >> 0x19); |
| + { u32 x114 = ((u32)x112 & 0x1ffffff); |
| + { u64 x115 = (x87 + (0x13 * x113)); |
| + { u32 x116 = (u32) (x115 >> 0x1a); |
| + { u32 x117 = ((u32)x115 & 0x3ffffff); |
| + { u32 x118 = (x116 + x90); |
| + { u32 x119 = (x118 >> 0x19); |
| + { u32 x120 = (x118 & 0x1ffffff); |
| + out[0] = x117; |
| + out[1] = x120; |
| + out[2] = (x119 + x93); |
| + out[3] = x96; |
| + out[4] = x99; |
| + out[5] = x102; |
| + out[6] = x105; |
| + out[7] = x108; |
| + out[8] = x111; |
| + out[9] = x114; |
| + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} |
| +} |
| + |
| +static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g) |
| +{ |
| + fe_mul_impl(h->v, f->v, g->v); |
| +} |
| + |
| +static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) |
| +{ |
| + fe_mul_impl(h->v, f->v, g->v); |
| +} |
| + |
| +static __always_inline void |
| +fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) |
| +{ |
| + fe_mul_impl(h->v, f->v, g->v); |
| +} |
| + |
| +static void fe_sqr_impl(u32 out[10], const u32 in1[10]) |
| +{ |
| + { const u32 x17 = in1[9]; |
| + { const u32 x18 = in1[8]; |
| + { const u32 x16 = in1[7]; |
| + { const u32 x14 = in1[6]; |
| + { const u32 x12 = in1[5]; |
| + { const u32 x10 = in1[4]; |
| + { const u32 x8 = in1[3]; |
| + { const u32 x6 = in1[2]; |
| + { const u32 x4 = in1[1]; |
| + { const u32 x2 = in1[0]; |
| + { u64 x19 = ((u64)x2 * x2); |
| + { u64 x20 = ((u64)(0x2 * x2) * x4); |
| + { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6))); |
| + { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8))); |
| + { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10)); |
| + { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12))); |
| + { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12))); |
| + { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16))); |
| + { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12)))))); |
| + { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17))); |
| + { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17))))); |
| + { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17))); |
| + { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17)))))); |
| + { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17))); |
| + { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17))); |
| + { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17))); |
| + { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17)); |
| + { u64 x36 = ((u64)(0x2 * x18) * x17); |
| + { u64 x37 = ((u64)(0x2 * x17) * x17); |
| + { u64 x38 = (x27 + (x37 << 0x4)); |
| + { u64 x39 = (x38 + (x37 << 0x1)); |
| + { u64 x40 = (x39 + x37); |
| + { u64 x41 = (x26 + (x36 << 0x4)); |
| + { u64 x42 = (x41 + (x36 << 0x1)); |
| + { u64 x43 = (x42 + x36); |
| + { u64 x44 = (x25 + (x35 << 0x4)); |
| + { u64 x45 = (x44 + (x35 << 0x1)); |
| + { u64 x46 = (x45 + x35); |
| + { u64 x47 = (x24 + (x34 << 0x4)); |
| + { u64 x48 = (x47 + (x34 << 0x1)); |
| + { u64 x49 = (x48 + x34); |
| + { u64 x50 = (x23 + (x33 << 0x4)); |
| + { u64 x51 = (x50 + (x33 << 0x1)); |
| + { u64 x52 = (x51 + x33); |
| + { u64 x53 = (x22 + (x32 << 0x4)); |
| + { u64 x54 = (x53 + (x32 << 0x1)); |
| + { u64 x55 = (x54 + x32); |
| + { u64 x56 = (x21 + (x31 << 0x4)); |
| + { u64 x57 = (x56 + (x31 << 0x1)); |
| + { u64 x58 = (x57 + x31); |
| + { u64 x59 = (x20 + (x30 << 0x4)); |
| + { u64 x60 = (x59 + (x30 << 0x1)); |
| + { u64 x61 = (x60 + x30); |
| + { u64 x62 = (x19 + (x29 << 0x4)); |
| + { u64 x63 = (x62 + (x29 << 0x1)); |
| + { u64 x64 = (x63 + x29); |
| + { u64 x65 = (x64 >> 0x1a); |
| + { u32 x66 = ((u32)x64 & 0x3ffffff); |
| + { u64 x67 = (x65 + x61); |
| + { u64 x68 = (x67 >> 0x19); |
| + { u32 x69 = ((u32)x67 & 0x1ffffff); |
| + { u64 x70 = (x68 + x58); |
| + { u64 x71 = (x70 >> 0x1a); |
| + { u32 x72 = ((u32)x70 & 0x3ffffff); |
| + { u64 x73 = (x71 + x55); |
| + { u64 x74 = (x73 >> 0x19); |
| + { u32 x75 = ((u32)x73 & 0x1ffffff); |
| + { u64 x76 = (x74 + x52); |
| + { u64 x77 = (x76 >> 0x1a); |
| + { u32 x78 = ((u32)x76 & 0x3ffffff); |
| + { u64 x79 = (x77 + x49); |
| + { u64 x80 = (x79 >> 0x19); |
| + { u32 x81 = ((u32)x79 & 0x1ffffff); |
| + { u64 x82 = (x80 + x46); |
| + { u64 x83 = (x82 >> 0x1a); |
| + { u32 x84 = ((u32)x82 & 0x3ffffff); |
| + { u64 x85 = (x83 + x43); |
| + { u64 x86 = (x85 >> 0x19); |
| + { u32 x87 = ((u32)x85 & 0x1ffffff); |
| + { u64 x88 = (x86 + x40); |
| + { u64 x89 = (x88 >> 0x1a); |
| + { u32 x90 = ((u32)x88 & 0x3ffffff); |
| + { u64 x91 = (x89 + x28); |
| + { u64 x92 = (x91 >> 0x19); |
| + { u32 x93 = ((u32)x91 & 0x1ffffff); |
| + { u64 x94 = (x66 + (0x13 * x92)); |
| + { u32 x95 = (u32) (x94 >> 0x1a); |
| + { u32 x96 = ((u32)x94 & 0x3ffffff); |
| + { u32 x97 = (x95 + x69); |
| + { u32 x98 = (x97 >> 0x19); |
| + { u32 x99 = (x97 & 0x1ffffff); |
| + out[0] = x96; |
| + out[1] = x99; |
| + out[2] = (x98 + x72); |
| + out[3] = x75; |
| + out[4] = x78; |
| + out[5] = x81; |
| + out[6] = x84; |
| + out[7] = x87; |
| + out[8] = x90; |
| + out[9] = x93; |
| + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} |
| +} |
| + |
| +static __always_inline void fe_sq_tl(fe *h, const fe_loose *f) |
| +{ |
| + fe_sqr_impl(h->v, f->v); |
| +} |
| + |
| +static __always_inline void fe_sq_tt(fe *h, const fe *f) |
| +{ |
| + fe_sqr_impl(h->v, f->v); |
| +} |
| + |
| +static __always_inline void fe_loose_invert(fe *out, const fe_loose *z) |
| +{ |
| + fe t0; |
| + fe t1; |
| + fe t2; |
| + fe t3; |
| + int i; |
| + |
| + fe_sq_tl(&t0, z); |
| + fe_sq_tt(&t1, &t0); |
| + for (i = 1; i < 2; ++i) |
| + fe_sq_tt(&t1, &t1); |
| + fe_mul_tlt(&t1, z, &t1); |
| + fe_mul_ttt(&t0, &t0, &t1); |
| + fe_sq_tt(&t2, &t0); |
| + fe_mul_ttt(&t1, &t1, &t2); |
| + fe_sq_tt(&t2, &t1); |
| + for (i = 1; i < 5; ++i) |
| + fe_sq_tt(&t2, &t2); |
| + fe_mul_ttt(&t1, &t2, &t1); |
| + fe_sq_tt(&t2, &t1); |
| + for (i = 1; i < 10; ++i) |
| + fe_sq_tt(&t2, &t2); |
| + fe_mul_ttt(&t2, &t2, &t1); |
| + fe_sq_tt(&t3, &t2); |
| + for (i = 1; i < 20; ++i) |
| + fe_sq_tt(&t3, &t3); |
| + fe_mul_ttt(&t2, &t3, &t2); |
| + fe_sq_tt(&t2, &t2); |
| + for (i = 1; i < 10; ++i) |
| + fe_sq_tt(&t2, &t2); |
| + fe_mul_ttt(&t1, &t2, &t1); |
| + fe_sq_tt(&t2, &t1); |
| + for (i = 1; i < 50; ++i) |
| + fe_sq_tt(&t2, &t2); |
| + fe_mul_ttt(&t2, &t2, &t1); |
| + fe_sq_tt(&t3, &t2); |
| + for (i = 1; i < 100; ++i) |
| + fe_sq_tt(&t3, &t3); |
| + fe_mul_ttt(&t2, &t3, &t2); |
| + fe_sq_tt(&t2, &t2); |
| + for (i = 1; i < 50; ++i) |
| + fe_sq_tt(&t2, &t2); |
| + fe_mul_ttt(&t1, &t2, &t1); |
| + fe_sq_tt(&t1, &t1); |
| + for (i = 1; i < 5; ++i) |
| + fe_sq_tt(&t1, &t1); |
| + fe_mul_ttt(out, &t1, &t0); |
| +} |
| + |
| +static __always_inline void fe_invert(fe *out, const fe *z) |
| +{ |
| + fe_loose l; |
| + fe_copy_lt(&l, z); |
| + fe_loose_invert(out, &l); |
| +} |
| + |
| +/* Replace (f,g) with (g,f) if b == 1; |
| + * replace (f,g) with (f,g) if b == 0. |
| + * |
| + * Preconditions: b in {0,1} |
| + */ |
| +static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b) |
| +{ |
| + unsigned i; |
| + b = 0 - b; |
| + for (i = 0; i < 10; i++) { |
| + u32 x = f->v[i] ^ g->v[i]; |
| + x &= b; |
| + f->v[i] ^= x; |
| + g->v[i] ^= x; |
| + } |
| +} |
| + |
| +/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/ |
| +static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10]) |
| +{ |
| + { const u32 x20 = in1[9]; |
| + { const u32 x21 = in1[8]; |
| + { const u32 x19 = in1[7]; |
| + { const u32 x17 = in1[6]; |
| + { const u32 x15 = in1[5]; |
| + { const u32 x13 = in1[4]; |
| + { const u32 x11 = in1[3]; |
| + { const u32 x9 = in1[2]; |
| + { const u32 x7 = in1[1]; |
| + { const u32 x5 = in1[0]; |
| + { const u32 x38 = 0; |
| + { const u32 x39 = 0; |
| + { const u32 x37 = 0; |
| + { const u32 x35 = 0; |
| + { const u32 x33 = 0; |
| + { const u32 x31 = 0; |
| + { const u32 x29 = 0; |
| + { const u32 x27 = 0; |
| + { const u32 x25 = 0; |
| + { const u32 x23 = 121666; |
| + { u64 x40 = ((u64)x23 * x5); |
| + { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5)); |
| + { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5)); |
| + { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5)); |
| + { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5)); |
| + { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5)); |
| + { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5)); |
| + { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5)); |
| + { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5)); |
| + { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5)); |
| + { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9)); |
| + { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9)); |
| + { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13)); |
| + { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13)); |
| + { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17)); |
| + { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17)); |
| + { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19)))); |
| + { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21)); |
| + { u64 x58 = ((u64)(0x2 * x38) * x20); |
| + { u64 x59 = (x48 + (x58 << 0x4)); |
| + { u64 x60 = (x59 + (x58 << 0x1)); |
| + { u64 x61 = (x60 + x58); |
| + { u64 x62 = (x47 + (x57 << 0x4)); |
| + { u64 x63 = (x62 + (x57 << 0x1)); |
| + { u64 x64 = (x63 + x57); |
| + { u64 x65 = (x46 + (x56 << 0x4)); |
| + { u64 x66 = (x65 + (x56 << 0x1)); |
| + { u64 x67 = (x66 + x56); |
| + { u64 x68 = (x45 + (x55 << 0x4)); |
| + { u64 x69 = (x68 + (x55 << 0x1)); |
| + { u64 x70 = (x69 + x55); |
| + { u64 x71 = (x44 + (x54 << 0x4)); |
| + { u64 x72 = (x71 + (x54 << 0x1)); |
| + { u64 x73 = (x72 + x54); |
| + { u64 x74 = (x43 + (x53 << 0x4)); |
| + { u64 x75 = (x74 + (x53 << 0x1)); |
| + { u64 x76 = (x75 + x53); |
| + { u64 x77 = (x42 + (x52 << 0x4)); |
| + { u64 x78 = (x77 + (x52 << 0x1)); |
| + { u64 x79 = (x78 + x52); |
| + { u64 x80 = (x41 + (x51 << 0x4)); |
| + { u64 x81 = (x80 + (x51 << 0x1)); |
| + { u64 x82 = (x81 + x51); |
| + { u64 x83 = (x40 + (x50 << 0x4)); |
| + { u64 x84 = (x83 + (x50 << 0x1)); |
| + { u64 x85 = (x84 + x50); |
| + { u64 x86 = (x85 >> 0x1a); |
| + { u32 x87 = ((u32)x85 & 0x3ffffff); |
| + { u64 x88 = (x86 + x82); |
| + { u64 x89 = (x88 >> 0x19); |
| + { u32 x90 = ((u32)x88 & 0x1ffffff); |
| + { u64 x91 = (x89 + x79); |
| + { u64 x92 = (x91 >> 0x1a); |
| + { u32 x93 = ((u32)x91 & 0x3ffffff); |
| + { u64 x94 = (x92 + x76); |
| + { u64 x95 = (x94 >> 0x19); |
| + { u32 x96 = ((u32)x94 & 0x1ffffff); |
| + { u64 x97 = (x95 + x73); |
| + { u64 x98 = (x97 >> 0x1a); |
| + { u32 x99 = ((u32)x97 & 0x3ffffff); |
| + { u64 x100 = (x98 + x70); |
| + { u64 x101 = (x100 >> 0x19); |
| + { u32 x102 = ((u32)x100 & 0x1ffffff); |
| + { u64 x103 = (x101 + x67); |
| + { u64 x104 = (x103 >> 0x1a); |
| + { u32 x105 = ((u32)x103 & 0x3ffffff); |
| + { u64 x106 = (x104 + x64); |
| + { u64 x107 = (x106 >> 0x19); |
| + { u32 x108 = ((u32)x106 & 0x1ffffff); |
| + { u64 x109 = (x107 + x61); |
| + { u64 x110 = (x109 >> 0x1a); |
| + { u32 x111 = ((u32)x109 & 0x3ffffff); |
| + { u64 x112 = (x110 + x49); |
| + { u64 x113 = (x112 >> 0x19); |
| + { u32 x114 = ((u32)x112 & 0x1ffffff); |
| + { u64 x115 = (x87 + (0x13 * x113)); |
| + { u32 x116 = (u32) (x115 >> 0x1a); |
| + { u32 x117 = ((u32)x115 & 0x3ffffff); |
| + { u32 x118 = (x116 + x90); |
| + { u32 x119 = (x118 >> 0x19); |
| + { u32 x120 = (x118 & 0x1ffffff); |
| + out[0] = x117; |
| + out[1] = x120; |
| + out[2] = (x119 + x93); |
| + out[3] = x96; |
| + out[4] = x99; |
| + out[5] = x102; |
| + out[6] = x105; |
| + out[7] = x108; |
| + out[8] = x111; |
| + out[9] = x114; |
| + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} |
| +} |
| + |
| +static __always_inline void fe_mul121666(fe *h, const fe_loose *f) |
| +{ |
| + fe_mul_121666_impl(h->v, f->v); |
| +} |
| + |
| +void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], |
| + const u8 scalar[CURVE25519_KEY_SIZE], |
| + const u8 point[CURVE25519_KEY_SIZE]) |
| +{ |
| + fe x1, x2, z2, x3, z3; |
| + fe_loose x2l, z2l, x3l; |
| + unsigned swap = 0; |
| + int pos; |
| + u8 e[32]; |
| + |
| + memcpy(e, scalar, 32); |
| + curve25519_clamp_secret(e); |
| + |
| + /* The following implementation was transcribed to Coq and proven to |
| + * correspond to unary scalar multiplication in affine coordinates given |
| + * that x1 != 0 is the x coordinate of some point on the curve. It was |
| + * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives |
| + * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was |
| + * quantified over the underlying field, so it applies to Curve25519 |
| + * itself and the quadratic twist of Curve25519. It was not proven in |
| + * Coq that prime-field arithmetic correctly simulates extension-field |
| + * arithmetic on prime-field values. The decoding of the byte array |
| + * representation of e was not considered. |
| + * |
| + * Specification of Montgomery curves in affine coordinates: |
| + * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27> |
| + * |
| + * Proof that these form a group that is isomorphic to a Weierstrass |
| + * curve: |
| + * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35> |
| + * |
| + * Coq transcription and correctness proof of the loop |
| + * (where scalarbits=255): |
| + * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118> |
| + * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278> |
| + * preconditions: 0 <= e < 2^255 (not necessarily e < order), |
| + * fe_invert(0) = 0 |
| + */ |
| + fe_frombytes(&x1, point); |
| + fe_1(&x2); |
| + fe_0(&z2); |
| + fe_copy(&x3, &x1); |
| + fe_1(&z3); |
| + |
| + for (pos = 254; pos >= 0; --pos) { |
| + fe tmp0, tmp1; |
| + fe_loose tmp0l, tmp1l; |
| + /* loop invariant as of right before the test, for the case |
| + * where x1 != 0: |
| + * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 |
| + * is nonzero |
| + * let r := e >> (pos+1) in the following equalities of |
| + * projective points: |
| + * to_xz (r*P) === if swap then (x3, z3) else (x2, z2) |
| + * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3) |
| + * x1 is the nonzero x coordinate of the nonzero |
| + * point (r*P-(r+1)*P) |
| + */ |
| + unsigned b = 1 & (e[pos / 8] >> (pos & 7)); |
| + swap ^= b; |
| + fe_cswap(&x2, &x3, swap); |
| + fe_cswap(&z2, &z3, swap); |
| + swap = b; |
| + /* Coq transcription of ladderstep formula (called from |
| + * transcribed loop): |
| + * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89> |
| + * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131> |
| + * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217> |
| + * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147> |
| + */ |
| + fe_sub(&tmp0l, &x3, &z3); |
| + fe_sub(&tmp1l, &x2, &z2); |
| + fe_add(&x2l, &x2, &z2); |
| + fe_add(&z2l, &x3, &z3); |
| + fe_mul_tll(&z3, &tmp0l, &x2l); |
| + fe_mul_tll(&z2, &z2l, &tmp1l); |
| + fe_sq_tl(&tmp0, &tmp1l); |
| + fe_sq_tl(&tmp1, &x2l); |
| + fe_add(&x3l, &z3, &z2); |
| + fe_sub(&z2l, &z3, &z2); |
| + fe_mul_ttt(&x2, &tmp1, &tmp0); |
| + fe_sub(&tmp1l, &tmp1, &tmp0); |
| + fe_sq_tl(&z2, &z2l); |
| + fe_mul121666(&z3, &tmp1l); |
| + fe_sq_tl(&x3, &x3l); |
| + fe_add(&tmp0l, &tmp0, &z3); |
| + fe_mul_ttt(&z3, &x1, &z2); |
| + fe_mul_tll(&z2, &tmp1l, &tmp0l); |
| + } |
| + /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) |
| + * else (x2, z2) |
| + */ |
| + fe_cswap(&x2, &x3, swap); |
| + fe_cswap(&z2, &z3, swap); |
| + |
| + fe_invert(&z2, &z2); |
| + fe_mul_ttt(&x2, &x2, &z2); |
| + fe_tobytes(out, &x2); |
| + |
| + memzero_explicit(&x1, sizeof(x1)); |
| + memzero_explicit(&x2, sizeof(x2)); |
| + memzero_explicit(&z2, sizeof(z2)); |
| + memzero_explicit(&x3, sizeof(x3)); |
| + memzero_explicit(&z3, sizeof(z3)); |
| + memzero_explicit(&x2l, sizeof(x2l)); |
| + memzero_explicit(&z2l, sizeof(z2l)); |
| + memzero_explicit(&x3l, sizeof(x3l)); |
| + memzero_explicit(&e, sizeof(e)); |
| +} |
| diff --git a/lib/crypto/curve25519-hacl64.c b/lib/crypto/curve25519-hacl64.c |
| new file mode 100644 |
| index 000000000000..771d82dc5f14 |
| |
| |
| @@ -0,0 +1,788 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2016-2017 INRIA and Microsoft Corporation. |
| + * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is a machine-generated formally verified implementation of Curve25519 |
| + * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine |
| + * generated, it has been tweaked to be suitable for use in the kernel. It is |
| + * optimized for 64-bit machines that can efficiently work with 128-bit |
| + * integer types. |
| + */ |
| + |
| +#include <asm/unaligned.h> |
| +#include <crypto/curve25519.h> |
| +#include <linux/string.h> |
| + |
| +typedef __uint128_t u128; |
| + |
| +static __always_inline u64 u64_eq_mask(u64 a, u64 b) |
| +{ |
| + u64 x = a ^ b; |
| + u64 minus_x = ~x + (u64)1U; |
| + u64 x_or_minus_x = x | minus_x; |
| + u64 xnx = x_or_minus_x >> (u32)63U; |
| + u64 c = xnx - (u64)1U; |
| + return c; |
| +} |
| + |
| +static __always_inline u64 u64_gte_mask(u64 a, u64 b) |
| +{ |
| + u64 x = a; |
| + u64 y = b; |
| + u64 x_xor_y = x ^ y; |
| + u64 x_sub_y = x - y; |
| + u64 x_sub_y_xor_y = x_sub_y ^ y; |
| + u64 q = x_xor_y | x_sub_y_xor_y; |
| + u64 x_xor_q = x ^ q; |
| + u64 x_xor_q_ = x_xor_q >> (u32)63U; |
| + u64 c = x_xor_q_ - (u64)1U; |
| + return c; |
| +} |
| + |
| +static __always_inline void modulo_carry_top(u64 *b) |
| +{ |
| + u64 b4 = b[4]; |
| + u64 b0 = b[0]; |
| + u64 b4_ = b4 & 0x7ffffffffffffLLU; |
| + u64 b0_ = b0 + 19 * (b4 >> 51); |
| + b[4] = b4_; |
| + b[0] = b0_; |
| +} |
| + |
| +static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input) |
| +{ |
| + { |
| + u128 xi = input[0]; |
| + output[0] = ((u64)(xi)); |
| + } |
| + { |
| + u128 xi = input[1]; |
| + output[1] = ((u64)(xi)); |
| + } |
| + { |
| + u128 xi = input[2]; |
| + output[2] = ((u64)(xi)); |
| + } |
| + { |
| + u128 xi = input[3]; |
| + output[3] = ((u64)(xi)); |
| + } |
| + { |
| + u128 xi = input[4]; |
| + output[4] = ((u64)(xi)); |
| + } |
| +} |
| + |
| +static __always_inline void |
| +fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s) |
| +{ |
| + output[0] += (u128)input[0] * s; |
| + output[1] += (u128)input[1] * s; |
| + output[2] += (u128)input[2] * s; |
| + output[3] += (u128)input[3] * s; |
| + output[4] += (u128)input[4] * s; |
| +} |
| + |
| +static __always_inline void fproduct_carry_wide_(u128 *tmp) |
| +{ |
| + { |
| + u32 ctr = 0; |
| + u128 tctr = tmp[ctr]; |
| + u128 tctrp1 = tmp[ctr + 1]; |
| + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; |
| + u128 c = ((tctr) >> (51)); |
| + tmp[ctr] = ((u128)(r0)); |
| + tmp[ctr + 1] = ((tctrp1) + (c)); |
| + } |
| + { |
| + u32 ctr = 1; |
| + u128 tctr = tmp[ctr]; |
| + u128 tctrp1 = tmp[ctr + 1]; |
| + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; |
| + u128 c = ((tctr) >> (51)); |
| + tmp[ctr] = ((u128)(r0)); |
| + tmp[ctr + 1] = ((tctrp1) + (c)); |
| + } |
| + |
| + { |
| + u32 ctr = 2; |
| + u128 tctr = tmp[ctr]; |
| + u128 tctrp1 = tmp[ctr + 1]; |
| + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; |
| + u128 c = ((tctr) >> (51)); |
| + tmp[ctr] = ((u128)(r0)); |
| + tmp[ctr + 1] = ((tctrp1) + (c)); |
| + } |
| + { |
| + u32 ctr = 3; |
| + u128 tctr = tmp[ctr]; |
| + u128 tctrp1 = tmp[ctr + 1]; |
| + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; |
| + u128 c = ((tctr) >> (51)); |
| + tmp[ctr] = ((u128)(r0)); |
| + tmp[ctr + 1] = ((tctrp1) + (c)); |
| + } |
| +} |
| + |
| +static __always_inline void fmul_shift_reduce(u64 *output) |
| +{ |
| + u64 tmp = output[4]; |
| + u64 b0; |
| + { |
| + u32 ctr = 5 - 0 - 1; |
| + u64 z = output[ctr - 1]; |
| + output[ctr] = z; |
| + } |
| + { |
| + u32 ctr = 5 - 1 - 1; |
| + u64 z = output[ctr - 1]; |
| + output[ctr] = z; |
| + } |
| + { |
| + u32 ctr = 5 - 2 - 1; |
| + u64 z = output[ctr - 1]; |
| + output[ctr] = z; |
| + } |
| + { |
| + u32 ctr = 5 - 3 - 1; |
| + u64 z = output[ctr - 1]; |
| + output[ctr] = z; |
| + } |
| + output[0] = tmp; |
| + b0 = output[0]; |
| + output[0] = 19 * b0; |
| +} |
| + |
| +static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input, |
| + u64 *input21) |
| +{ |
| + u32 i; |
| + u64 input2i; |
| + { |
| + u64 input2i = input21[0]; |
| + fproduct_sum_scalar_multiplication_(output, input, input2i); |
| + fmul_shift_reduce(input); |
| + } |
| + { |
| + u64 input2i = input21[1]; |
| + fproduct_sum_scalar_multiplication_(output, input, input2i); |
| + fmul_shift_reduce(input); |
| + } |
| + { |
| + u64 input2i = input21[2]; |
| + fproduct_sum_scalar_multiplication_(output, input, input2i); |
| + fmul_shift_reduce(input); |
| + } |
| + { |
| + u64 input2i = input21[3]; |
| + fproduct_sum_scalar_multiplication_(output, input, input2i); |
| + fmul_shift_reduce(input); |
| + } |
| + i = 4; |
| + input2i = input21[i]; |
| + fproduct_sum_scalar_multiplication_(output, input, input2i); |
| +} |
| + |
| +static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21) |
| +{ |
| + u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] }; |
| + { |
| + u128 b4; |
| + u128 b0; |
| + u128 b4_; |
| + u128 b0_; |
| + u64 i0; |
| + u64 i1; |
| + u64 i0_; |
| + u64 i1_; |
| + u128 t[5] = { 0 }; |
| + fmul_mul_shift_reduce_(t, tmp, input21); |
| + fproduct_carry_wide_(t); |
| + b4 = t[4]; |
| + b0 = t[0]; |
| + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); |
| + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); |
| + t[4] = b4_; |
| + t[0] = b0_; |
| + fproduct_copy_from_wide_(output, t); |
| + i0 = output[0]; |
| + i1 = output[1]; |
| + i0_ = i0 & 0x7ffffffffffffLLU; |
| + i1_ = i1 + (i0 >> 51); |
| + output[0] = i0_; |
| + output[1] = i1_; |
| + } |
| +} |
| + |
| +static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output) |
| +{ |
| + u64 r0 = output[0]; |
| + u64 r1 = output[1]; |
| + u64 r2 = output[2]; |
| + u64 r3 = output[3]; |
| + u64 r4 = output[4]; |
| + u64 d0 = r0 * 2; |
| + u64 d1 = r1 * 2; |
| + u64 d2 = r2 * 2 * 19; |
| + u64 d419 = r4 * 19; |
| + u64 d4 = d419 * 2; |
| + u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) + |
| + (((u128)(d2) * (r3)))); |
| + u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) + |
| + (((u128)(r3 * 19) * (r3)))); |
| + u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) + |
| + (((u128)(d4) * (r3)))); |
| + u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) + |
| + (((u128)(r4) * (d419)))); |
| + u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) + |
| + (((u128)(r2) * (r2)))); |
| + tmp[0] = s0; |
| + tmp[1] = s1; |
| + tmp[2] = s2; |
| + tmp[3] = s3; |
| + tmp[4] = s4; |
| +} |
| + |
| +static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output) |
| +{ |
| + u128 b4; |
| + u128 b0; |
| + u128 b4_; |
| + u128 b0_; |
| + u64 i0; |
| + u64 i1; |
| + u64 i0_; |
| + u64 i1_; |
| + fsquare_fsquare__(tmp, output); |
| + fproduct_carry_wide_(tmp); |
| + b4 = tmp[4]; |
| + b0 = tmp[0]; |
| + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); |
| + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); |
| + tmp[4] = b4_; |
| + tmp[0] = b0_; |
| + fproduct_copy_from_wide_(output, tmp); |
| + i0 = output[0]; |
| + i1 = output[1]; |
| + i0_ = i0 & 0x7ffffffffffffLLU; |
| + i1_ = i1 + (i0 >> 51); |
| + output[0] = i0_; |
| + output[1] = i1_; |
| +} |
| + |
| +static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp, |
| + u32 count1) |
| +{ |
| + u32 i; |
| + fsquare_fsquare_(tmp, output); |
| + for (i = 1; i < count1; ++i) |
| + fsquare_fsquare_(tmp, output); |
| +} |
| + |
| +static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input, |
| + u32 count1) |
| +{ |
| + u128 t[5]; |
| + memcpy(output, input, 5 * sizeof(*input)); |
| + fsquare_fsquare_times_(output, t, count1); |
| +} |
| + |
| +static __always_inline void fsquare_fsquare_times_inplace(u64 *output, |
| + u32 count1) |
| +{ |
| + u128 t[5]; |
| + fsquare_fsquare_times_(output, t, count1); |
| +} |
| + |
| +static __always_inline void crecip_crecip(u64 *out, u64 *z) |
| +{ |
| + u64 buf[20] = { 0 }; |
| + u64 *a0 = buf; |
| + u64 *t00 = buf + 5; |
| + u64 *b0 = buf + 10; |
| + u64 *t01; |
| + u64 *b1; |
| + u64 *c0; |
| + u64 *a; |
| + u64 *t0; |
| + u64 *b; |
| + u64 *c; |
| + fsquare_fsquare_times(a0, z, 1); |
| + fsquare_fsquare_times(t00, a0, 2); |
| + fmul_fmul(b0, t00, z); |
| + fmul_fmul(a0, b0, a0); |
| + fsquare_fsquare_times(t00, a0, 1); |
| + fmul_fmul(b0, t00, b0); |
| + fsquare_fsquare_times(t00, b0, 5); |
| + t01 = buf + 5; |
| + b1 = buf + 10; |
| + c0 = buf + 15; |
| + fmul_fmul(b1, t01, b1); |
| + fsquare_fsquare_times(t01, b1, 10); |
| + fmul_fmul(c0, t01, b1); |
| + fsquare_fsquare_times(t01, c0, 20); |
| + fmul_fmul(t01, t01, c0); |
| + fsquare_fsquare_times_inplace(t01, 10); |
| + fmul_fmul(b1, t01, b1); |
| + fsquare_fsquare_times(t01, b1, 50); |
| + a = buf; |
| + t0 = buf + 5; |
| + b = buf + 10; |
| + c = buf + 15; |
| + fmul_fmul(c, t0, b); |
| + fsquare_fsquare_times(t0, c, 100); |
| + fmul_fmul(t0, t0, c); |
| + fsquare_fsquare_times_inplace(t0, 50); |
| + fmul_fmul(t0, t0, b); |
| + fsquare_fsquare_times_inplace(t0, 5); |
| + fmul_fmul(out, t0, a); |
| +} |
| + |
| +static __always_inline void fsum(u64 *a, u64 *b) |
| +{ |
| + a[0] += b[0]; |
| + a[1] += b[1]; |
| + a[2] += b[2]; |
| + a[3] += b[3]; |
| + a[4] += b[4]; |
| +} |
| + |
| +static __always_inline void fdifference(u64 *a, u64 *b) |
| +{ |
| + u64 tmp[5] = { 0 }; |
| + u64 b0; |
| + u64 b1; |
| + u64 b2; |
| + u64 b3; |
| + u64 b4; |
| + memcpy(tmp, b, 5 * sizeof(*b)); |
| + b0 = tmp[0]; |
| + b1 = tmp[1]; |
| + b2 = tmp[2]; |
| + b3 = tmp[3]; |
| + b4 = tmp[4]; |
| + tmp[0] = b0 + 0x3fffffffffff68LLU; |
| + tmp[1] = b1 + 0x3ffffffffffff8LLU; |
| + tmp[2] = b2 + 0x3ffffffffffff8LLU; |
| + tmp[3] = b3 + 0x3ffffffffffff8LLU; |
| + tmp[4] = b4 + 0x3ffffffffffff8LLU; |
| + { |
| + u64 xi = a[0]; |
| + u64 yi = tmp[0]; |
| + a[0] = yi - xi; |
| + } |
| + { |
| + u64 xi = a[1]; |
| + u64 yi = tmp[1]; |
| + a[1] = yi - xi; |
| + } |
| + { |
| + u64 xi = a[2]; |
| + u64 yi = tmp[2]; |
| + a[2] = yi - xi; |
| + } |
| + { |
| + u64 xi = a[3]; |
| + u64 yi = tmp[3]; |
| + a[3] = yi - xi; |
| + } |
| + { |
| + u64 xi = a[4]; |
| + u64 yi = tmp[4]; |
| + a[4] = yi - xi; |
| + } |
| +} |
| + |
| +static __always_inline void fscalar(u64 *output, u64 *b, u64 s) |
| +{ |
| + u128 tmp[5]; |
| + u128 b4; |
| + u128 b0; |
| + u128 b4_; |
| + u128 b0_; |
| + { |
| + u64 xi = b[0]; |
| + tmp[0] = ((u128)(xi) * (s)); |
| + } |
| + { |
| + u64 xi = b[1]; |
| + tmp[1] = ((u128)(xi) * (s)); |
| + } |
| + { |
| + u64 xi = b[2]; |
| + tmp[2] = ((u128)(xi) * (s)); |
| + } |
| + { |
| + u64 xi = b[3]; |
| + tmp[3] = ((u128)(xi) * (s)); |
| + } |
| + { |
| + u64 xi = b[4]; |
| + tmp[4] = ((u128)(xi) * (s)); |
| + } |
| + fproduct_carry_wide_(tmp); |
| + b4 = tmp[4]; |
| + b0 = tmp[0]; |
| + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); |
| + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); |
| + tmp[4] = b4_; |
| + tmp[0] = b0_; |
| + fproduct_copy_from_wide_(output, tmp); |
| +} |
| + |
| +static __always_inline void fmul(u64 *output, u64 *a, u64 *b) |
| +{ |
| + fmul_fmul(output, a, b); |
| +} |
| + |
| +static __always_inline void crecip(u64 *output, u64 *input) |
| +{ |
| + crecip_crecip(output, input); |
| +} |
| + |
| +static __always_inline void point_swap_conditional_step(u64 *a, u64 *b, |
| + u64 swap1, u32 ctr) |
| +{ |
| + u32 i = ctr - 1; |
| + u64 ai = a[i]; |
| + u64 bi = b[i]; |
| + u64 x = swap1 & (ai ^ bi); |
| + u64 ai1 = ai ^ x; |
| + u64 bi1 = bi ^ x; |
| + a[i] = ai1; |
| + b[i] = bi1; |
| +} |
| + |
| +static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1) |
| +{ |
| + point_swap_conditional_step(a, b, swap1, 5); |
| + point_swap_conditional_step(a, b, swap1, 4); |
| + point_swap_conditional_step(a, b, swap1, 3); |
| + point_swap_conditional_step(a, b, swap1, 2); |
| + point_swap_conditional_step(a, b, swap1, 1); |
| +} |
| + |
| +static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap) |
| +{ |
| + u64 swap1 = 0 - iswap; |
| + point_swap_conditional5(a, b, swap1); |
| + point_swap_conditional5(a + 5, b + 5, swap1); |
| +} |
| + |
| +static __always_inline void point_copy(u64 *output, u64 *input) |
| +{ |
| + memcpy(output, input, 5 * sizeof(*input)); |
| + memcpy(output + 5, input + 5, 5 * sizeof(*input)); |
| +} |
| + |
| +static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p, |
| + u64 *pq, u64 *qmqp) |
| +{ |
| + u64 *qx = qmqp; |
| + u64 *x2 = pp; |
| + u64 *z2 = pp + 5; |
| + u64 *x3 = ppq; |
| + u64 *z3 = ppq + 5; |
| + u64 *x = p; |
| + u64 *z = p + 5; |
| + u64 *xprime = pq; |
| + u64 *zprime = pq + 5; |
| + u64 buf[40] = { 0 }; |
| + u64 *origx = buf; |
| + u64 *origxprime0 = buf + 5; |
| + u64 *xxprime0; |
| + u64 *zzprime0; |
| + u64 *origxprime; |
| + xxprime0 = buf + 25; |
| + zzprime0 = buf + 30; |
| + memcpy(origx, x, 5 * sizeof(*x)); |
| + fsum(x, z); |
| + fdifference(z, origx); |
| + memcpy(origxprime0, xprime, 5 * sizeof(*xprime)); |
| + fsum(xprime, zprime); |
| + fdifference(zprime, origxprime0); |
| + fmul(xxprime0, xprime, z); |
| + fmul(zzprime0, x, zprime); |
| + origxprime = buf + 5; |
| + { |
| + u64 *xx0; |
| + u64 *zz0; |
| + u64 *xxprime; |
| + u64 *zzprime; |
| + u64 *zzzprime; |
| + xx0 = buf + 15; |
| + zz0 = buf + 20; |
| + xxprime = buf + 25; |
| + zzprime = buf + 30; |
| + zzzprime = buf + 35; |
| + memcpy(origxprime, xxprime, 5 * sizeof(*xxprime)); |
| + fsum(xxprime, zzprime); |
| + fdifference(zzprime, origxprime); |
| + fsquare_fsquare_times(x3, xxprime, 1); |
| + fsquare_fsquare_times(zzzprime, zzprime, 1); |
| + fmul(z3, zzzprime, qx); |
| + fsquare_fsquare_times(xx0, x, 1); |
| + fsquare_fsquare_times(zz0, z, 1); |
| + { |
| + u64 *zzz; |
| + u64 *xx; |
| + u64 *zz; |
| + u64 scalar; |
| + zzz = buf + 10; |
| + xx = buf + 15; |
| + zz = buf + 20; |
| + fmul(x2, xx, zz); |
| + fdifference(zz, xx); |
| + scalar = 121665; |
| + fscalar(zzz, zz, scalar); |
| + fsum(zzz, xx); |
| + fmul(z2, zzz, zz); |
| + } |
| + } |
| +} |
| + |
| +static __always_inline void |
| +ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, |
| + u64 *q, u8 byt) |
| +{ |
| + u64 bit0 = (u64)(byt >> 7); |
| + u64 bit; |
| + point_swap_conditional(nq, nqpq, bit0); |
| + addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q); |
| + bit = (u64)(byt >> 7); |
| + point_swap_conditional(nq2, nqpq2, bit); |
| +} |
| + |
| +static __always_inline void |
| +ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2, |
| + u64 *nqpq2, u64 *q, u8 byt) |
| +{ |
| + u8 byt1; |
| + ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); |
| + byt1 = byt << 1; |
| + ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); |
| +} |
| + |
| +static __always_inline void |
| +ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, |
| + u64 *q, u8 byt, u32 i) |
| +{ |
| + while (i--) { |
| + ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2, |
| + nqpq2, q, byt); |
| + byt <<= 2; |
| + } |
| +} |
| + |
| +static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq, |
| + u64 *nqpq, u64 *nq2, |
| + u64 *nqpq2, u64 *q, |
| + u32 i) |
| +{ |
| + while (i--) { |
| + u8 byte = n1[i]; |
| + ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, |
| + byte, 4); |
| + } |
| +} |
| + |
| +static void ladder_cmult(u64 *result, u8 *n1, u64 *q) |
| +{ |
| + u64 point_buf[40] = { 0 }; |
| + u64 *nq = point_buf; |
| + u64 *nqpq = point_buf + 10; |
| + u64 *nq2 = point_buf + 20; |
| + u64 *nqpq2 = point_buf + 30; |
| + point_copy(nqpq, q); |
| + nq[0] = 1; |
| + ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); |
| + point_copy(result, nq); |
| +} |
| + |
| +static __always_inline void format_fexpand(u64 *output, const u8 *input) |
| +{ |
| + const u8 *x00 = input + 6; |
| + const u8 *x01 = input + 12; |
| + const u8 *x02 = input + 19; |
| + const u8 *x0 = input + 24; |
| + u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4; |
| + i0 = get_unaligned_le64(input); |
| + i1 = get_unaligned_le64(x00); |
| + i2 = get_unaligned_le64(x01); |
| + i3 = get_unaligned_le64(x02); |
| + i4 = get_unaligned_le64(x0); |
| + output0 = i0 & 0x7ffffffffffffLLU; |
| + output1 = i1 >> 3 & 0x7ffffffffffffLLU; |
| + output2 = i2 >> 6 & 0x7ffffffffffffLLU; |
| + output3 = i3 >> 1 & 0x7ffffffffffffLLU; |
| + output4 = i4 >> 12 & 0x7ffffffffffffLLU; |
| + output[0] = output0; |
| + output[1] = output1; |
| + output[2] = output2; |
| + output[3] = output3; |
| + output[4] = output4; |
| +} |
| + |
| +static __always_inline void format_fcontract_first_carry_pass(u64 *input) |
| +{ |
| + u64 t0 = input[0]; |
| + u64 t1 = input[1]; |
| + u64 t2 = input[2]; |
| + u64 t3 = input[3]; |
| + u64 t4 = input[4]; |
| + u64 t1_ = t1 + (t0 >> 51); |
| + u64 t0_ = t0 & 0x7ffffffffffffLLU; |
| + u64 t2_ = t2 + (t1_ >> 51); |
| + u64 t1__ = t1_ & 0x7ffffffffffffLLU; |
| + u64 t3_ = t3 + (t2_ >> 51); |
| + u64 t2__ = t2_ & 0x7ffffffffffffLLU; |
| + u64 t4_ = t4 + (t3_ >> 51); |
| + u64 t3__ = t3_ & 0x7ffffffffffffLLU; |
| + input[0] = t0_; |
| + input[1] = t1__; |
| + input[2] = t2__; |
| + input[3] = t3__; |
| + input[4] = t4_; |
| +} |
| + |
| +static __always_inline void format_fcontract_first_carry_full(u64 *input) |
| +{ |
| + format_fcontract_first_carry_pass(input); |
| + modulo_carry_top(input); |
| +} |
| + |
| +static __always_inline void format_fcontract_second_carry_pass(u64 *input) |
| +{ |
| + u64 t0 = input[0]; |
| + u64 t1 = input[1]; |
| + u64 t2 = input[2]; |
| + u64 t3 = input[3]; |
| + u64 t4 = input[4]; |
| + u64 t1_ = t1 + (t0 >> 51); |
| + u64 t0_ = t0 & 0x7ffffffffffffLLU; |
| + u64 t2_ = t2 + (t1_ >> 51); |
| + u64 t1__ = t1_ & 0x7ffffffffffffLLU; |
| + u64 t3_ = t3 + (t2_ >> 51); |
| + u64 t2__ = t2_ & 0x7ffffffffffffLLU; |
| + u64 t4_ = t4 + (t3_ >> 51); |
| + u64 t3__ = t3_ & 0x7ffffffffffffLLU; |
| + input[0] = t0_; |
| + input[1] = t1__; |
| + input[2] = t2__; |
| + input[3] = t3__; |
| + input[4] = t4_; |
| +} |
| + |
| +static __always_inline void format_fcontract_second_carry_full(u64 *input) |
| +{ |
| + u64 i0; |
| + u64 i1; |
| + u64 i0_; |
| + u64 i1_; |
| + format_fcontract_second_carry_pass(input); |
| + modulo_carry_top(input); |
| + i0 = input[0]; |
| + i1 = input[1]; |
| + i0_ = i0 & 0x7ffffffffffffLLU; |
| + i1_ = i1 + (i0 >> 51); |
| + input[0] = i0_; |
| + input[1] = i1_; |
| +} |
| + |
| +static __always_inline void format_fcontract_trim(u64 *input) |
| +{ |
| + u64 a0 = input[0]; |
| + u64 a1 = input[1]; |
| + u64 a2 = input[2]; |
| + u64 a3 = input[3]; |
| + u64 a4 = input[4]; |
| + u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU); |
| + u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU); |
| + u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU); |
| + u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU); |
| + u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU); |
| + u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4; |
| + u64 a0_ = a0 - (0x7ffffffffffedLLU & mask); |
| + u64 a1_ = a1 - (0x7ffffffffffffLLU & mask); |
| + u64 a2_ = a2 - (0x7ffffffffffffLLU & mask); |
| + u64 a3_ = a3 - (0x7ffffffffffffLLU & mask); |
| + u64 a4_ = a4 - (0x7ffffffffffffLLU & mask); |
| + input[0] = a0_; |
| + input[1] = a1_; |
| + input[2] = a2_; |
| + input[3] = a3_; |
| + input[4] = a4_; |
| +} |
| + |
| +static __always_inline void format_fcontract_store(u8 *output, u64 *input) |
| +{ |
| + u64 t0 = input[0]; |
| + u64 t1 = input[1]; |
| + u64 t2 = input[2]; |
| + u64 t3 = input[3]; |
| + u64 t4 = input[4]; |
| + u64 o0 = t1 << 51 | t0; |
| + u64 o1 = t2 << 38 | t1 >> 13; |
| + u64 o2 = t3 << 25 | t2 >> 26; |
| + u64 o3 = t4 << 12 | t3 >> 39; |
| + u8 *b0 = output; |
| + u8 *b1 = output + 8; |
| + u8 *b2 = output + 16; |
| + u8 *b3 = output + 24; |
| + put_unaligned_le64(o0, b0); |
| + put_unaligned_le64(o1, b1); |
| + put_unaligned_le64(o2, b2); |
| + put_unaligned_le64(o3, b3); |
| +} |
| + |
| +static __always_inline void format_fcontract(u8 *output, u64 *input) |
| +{ |
| + format_fcontract_first_carry_full(input); |
| + format_fcontract_second_carry_full(input); |
| + format_fcontract_trim(input); |
| + format_fcontract_store(output, input); |
| +} |
| + |
| +static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point) |
| +{ |
| + u64 *x = point; |
| + u64 *z = point + 5; |
| + u64 buf[10] __aligned(32) = { 0 }; |
| + u64 *zmone = buf; |
| + u64 *sc = buf + 5; |
| + crecip(zmone, z); |
| + fmul(sc, x, zmone); |
| + format_fcontract(scalar, sc); |
| +} |
| + |
| +void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE], |
| + const u8 basepoint[CURVE25519_KEY_SIZE]) |
| +{ |
| + u64 buf0[10] __aligned(32) = { 0 }; |
| + u64 *x0 = buf0; |
| + u64 *z = buf0 + 5; |
| + u64 *q; |
| + format_fexpand(x0, basepoint); |
| + z[0] = 1; |
| + q = buf0; |
| + { |
| + u8 e[32] __aligned(32) = { 0 }; |
| + u8 *scalar; |
| + memcpy(e, secret, 32); |
| + curve25519_clamp_secret(e); |
| + scalar = e; |
| + { |
| + u64 buf[15] = { 0 }; |
| + u64 *nq = buf; |
| + u64 *x = nq; |
| + x[0] = 1; |
| + ladder_cmult(nq, scalar, q); |
| + format_scalar_of_point(mypublic, nq); |
| + memzero_explicit(buf, sizeof(buf)); |
| + } |
| + memzero_explicit(e, sizeof(e)); |
| + } |
| + memzero_explicit(buf0, sizeof(buf0)); |
| +} |
| diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c |
| new file mode 100644 |
| index 000000000000..0106bebe6900 |
| |
| |
| @@ -0,0 +1,25 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is an implementation of the Curve25519 ECDH algorithm, using either |
| + * a 32-bit implementation or a 64-bit implementation with 128-bit integers, |
| + * depending on what is supported by the target compiler. |
| + * |
| + * Information: https://cr.yp.to/ecdh.html |
| + */ |
| + |
| +#include <crypto/curve25519.h> |
| +#include <linux/module.h> |
| +#include <linux/init.h> |
| + |
| +const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; |
| +const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; |
| + |
| +EXPORT_SYMBOL(curve25519_null_point); |
| +EXPORT_SYMBOL(curve25519_base_point); |
| +EXPORT_SYMBOL(curve25519_generic); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_DESCRIPTION("Curve25519 scalar multiplication"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| -- |
| 2.18.2 |
| |
| |
| From b85ad2b88fc06d997f8f142222d9f8159cd5d2a2 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:33 +0100 |
| Subject: [PATCH 026/100] crypto: curve25519 - add kpp selftest |
| |
| commit f613457a7af085728297bef71233c37faf3c01b1 upstream. |
| |
| In preparation of introducing KPP implementations of Curve25519, import |
| the set of test cases proposed by the Zinc patch set, but converted to |
| the KPP format. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/testmgr.c | 6 + |
| crypto/testmgr.h | 1225 ++++++++++++++++++++++++++++++++++++++++++++++ |
| 2 files changed, 1231 insertions(+) |
| |
| diff --git a/crypto/testmgr.c b/crypto/testmgr.c |
| index 711390861f71..57ab993b9ad2 100644 |
| |
| |
| @@ -4295,6 +4295,12 @@ static const struct alg_test_desc alg_test_descs[] = { |
| .alg = "cts(cbc(paes))", |
| .test = alg_test_null, |
| .fips_allowed = 1, |
| + }, { |
| + .alg = "curve25519", |
| + .test = alg_test_kpp, |
| + .suite = { |
| + .kpp = __VECS(curve25519_tv_template) |
| + } |
| }, { |
| .alg = "deflate", |
| .test = alg_test_comp, |
| diff --git a/crypto/testmgr.h b/crypto/testmgr.h |
| index 102fcad54966..5d132ae996b4 100644 |
| |
| |
| @@ -1030,6 +1030,1231 @@ static const struct kpp_testvec dh_tv_template[] = { |
| } |
| }; |
| |
| +static const struct kpp_testvec curve25519_tv_template[] = { |
| +{ |
| + .secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, |
| + 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, |
| + 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, |
| + 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, |
| + .b_public = (u8[32]){ 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, |
| + 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, |
| + 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, |
| + 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, |
| + .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, |
| + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, |
| + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, |
| + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +{ |
| + .secret = (u8[32]){ 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, |
| + 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, |
| + 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, |
| + 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, |
| + .b_public = (u8[32]){ 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, |
| + 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, |
| + 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, |
| + 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, |
| + .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, |
| + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, |
| + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, |
| + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +{ |
| + .secret = (u8[32]){ 1 }, |
| + .b_public = (u8[32]){ 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, |
| + 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, |
| + 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, |
| + 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +{ |
| + .secret = (u8[32]){ 1 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, |
| + 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, |
| + 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, |
| + 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +{ |
| + .secret = (u8[32]){ 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, |
| + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, |
| + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, |
| + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, |
| + .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, |
| + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, |
| + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, |
| + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, |
| + .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, |
| + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, |
| + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, |
| + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +{ |
| + .secret = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, |
| + .expected_ss = (u8[32]){ 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, |
| + 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, |
| + 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, |
| + 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +{ |
| + .secret = (u8[32]){ 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, |
| + .expected_ss = (u8[32]){ 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, |
| + 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, |
| + 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, |
| + 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - normal case */ |
| +{ |
| + .secret = (u8[32]){ 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, |
| + 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, |
| + 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, |
| + 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, |
| + .b_public = (u8[32]){ 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, |
| + 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, |
| + 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, |
| + 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, |
| + .expected_ss = (u8[32]){ 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, |
| + 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, |
| + 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, |
| + 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, |
| + 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, |
| + 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, |
| + 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, |
| + .b_public = (u8[32]){ 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, |
| + 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, |
| + 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, |
| + 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, |
| + .expected_ss = (u8[32]){ 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, |
| + 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, |
| + 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, |
| + 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key on twist */ |
| +{ |
| + .secret = (u8[32]){ 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, |
| + 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, |
| + 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, |
| + 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, |
| + .b_public = (u8[32]){ 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, |
| + 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, |
| + 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, |
| + 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, |
| + .expected_ss = (u8[32]){ 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, |
| + 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, |
| + 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, |
| + 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, |
| + 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, |
| + 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, |
| + 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, |
| + .b_public = (u8[32]){ 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, |
| + 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, |
| + 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, |
| + 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, |
| + .expected_ss = (u8[32]){ 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, |
| + 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, |
| + 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, |
| + 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, |
| + 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, |
| + 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, |
| + 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, |
| + .b_public = (u8[32]){ 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, |
| + 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, |
| + 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, |
| + 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, |
| + .expected_ss = (u8[32]){ 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, |
| + 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, |
| + 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, |
| + 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key on twist */ |
| +{ |
| + .secret = (u8[32]){ 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, |
| + 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, |
| + 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, |
| + 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, |
| + .b_public = (u8[32]){ 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, |
| + 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, |
| + 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, |
| + 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, |
| + .expected_ss = (u8[32]){ 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, |
| + 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, |
| + 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, |
| + 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, |
| + 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, |
| + 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, |
| + 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, |
| + .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, |
| + 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, |
| + 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, |
| + 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, |
| + 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, |
| + 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, |
| + 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, |
| + .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, |
| + 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, |
| + 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, |
| + 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, |
| + 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, |
| + 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, |
| + 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, |
| + 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, |
| + 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, |
| + 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, |
| + 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, |
| + 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, |
| + 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, |
| + 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, |
| + 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, |
| + .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, |
| + 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, |
| + 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, |
| + 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, |
| + 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, |
| + 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, |
| + 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, |
| + 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, |
| + 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, |
| + .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, |
| + 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, |
| + 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, |
| + 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case on twist */ |
| +{ |
| + .secret = (u8[32]){ 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, |
| + 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, |
| + 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, |
| + 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, |
| + .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, |
| + 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, |
| + 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, |
| + 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, |
| + 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, |
| + 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, |
| + 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, |
| + .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, |
| + 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, |
| + 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, |
| + 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, |
| + 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, |
| + 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, |
| + 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, |
| + 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, |
| + 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, |
| + 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, |
| + 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, |
| + 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, |
| + 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, |
| + .expected_ss = (u8[32]){ 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, |
| + 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, |
| + 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, |
| + 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, |
| + 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, |
| + 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, |
| + 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, |
| + 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, |
| + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, |
| + 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, |
| + .expected_ss = (u8[32]){ 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, |
| + 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, |
| + 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, |
| + 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, |
| + 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, |
| + 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, |
| + 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .expected_ss = (u8[32]){ 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, |
| + 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, |
| + 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, |
| + 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, |
| + 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, |
| + 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, |
| + 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, |
| + 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, |
| + 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, |
| + 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for public key */ |
| +{ |
| + .secret = (u8[32]){ 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, |
| + 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, |
| + 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, |
| + 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, |
| + .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, |
| + 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, |
| + 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, |
| + 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, |
| + 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, |
| + 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, |
| + 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, |
| + .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, |
| + 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, |
| + 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, |
| + 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, |
| + 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, |
| + 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, |
| + 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, |
| + .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, |
| + 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, |
| + 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, |
| + 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, |
| + 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, |
| + 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, |
| + 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, |
| + .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, |
| + 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, |
| + 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, |
| + 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, |
| + 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, |
| + 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, |
| + 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .expected_ss = (u8[32]){ 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, |
| + 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, |
| + 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, |
| + 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, |
| + 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, |
| + 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, |
| + 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, |
| + .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .expected_ss = (u8[32]){ 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, |
| + 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, |
| + 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, |
| + 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, |
| + 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, |
| + 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, |
| + 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, |
| + .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .expected_ss = (u8[32]){ 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, |
| + 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, |
| + 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, |
| + 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, |
| + 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, |
| + 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, |
| + 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, |
| + .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .expected_ss = (u8[32]){ 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, |
| + 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, |
| + 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, |
| + 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, |
| + 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, |
| + 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, |
| + 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, |
| + .b_public = (u8[32]){ 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, |
| + 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, |
| + 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, |
| + 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, |
| + 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, |
| + 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, |
| + 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, |
| + .b_public = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, |
| + 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, |
| + 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, |
| + 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, |
| + 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, |
| + 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, |
| + 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, |
| + .b_public = (u8[32]){ 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, |
| + 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, |
| + 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, |
| + 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, |
| + 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, |
| + 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, |
| + 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, |
| + .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, |
| + 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, |
| + 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, |
| + 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, |
| + 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, |
| + 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, |
| + 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, |
| + .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, |
| + 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, |
| + 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, |
| + 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, |
| + 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, |
| + 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, |
| + 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, |
| + .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, |
| + 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, |
| + 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, |
| + 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, |
| + 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, |
| + 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, |
| + 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, |
| + .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, |
| + 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, |
| + 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, |
| + 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, |
| + 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, |
| + 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, |
| + 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, |
| + .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, |
| + 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, |
| + 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, |
| + 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - public key >= p */ |
| +{ |
| + .secret = (u8[32]){ 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, |
| + 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, |
| + 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, |
| + 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, |
| + .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .expected_ss = (u8[32]){ 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, |
| + 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, |
| + 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, |
| + 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - RFC 7748 */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, |
| + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, |
| + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, |
| + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, |
| + .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, |
| + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, |
| + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, |
| + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, |
| + .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, |
| + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, |
| + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, |
| + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - RFC 7748 */ |
| +{ |
| + .secret = (u8[32]){ 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, |
| + 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, |
| + 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, |
| + 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, |
| + .b_public = (u8[32]){ 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, |
| + 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, |
| + 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, |
| + 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, |
| + .expected_ss = (u8[32]){ 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, |
| + 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, |
| + 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, |
| + 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, |
| + 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, |
| + 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, |
| + 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, |
| + .expected_ss = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, |
| + 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, |
| + 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, |
| + 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, |
| + .expected_ss = (u8[32]){ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, |
| + 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, |
| + 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, |
| + 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, |
| + .expected_ss = (u8[32]){ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, |
| + 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, |
| + 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, |
| + 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, |
| + .expected_ss = (u8[32]){ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, |
| + 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, |
| + 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, |
| + 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, |
| + .expected_ss = (u8[32]){ 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, |
| + 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, |
| + 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, |
| + 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, |
| + .expected_ss = (u8[32]){ 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, |
| + 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, |
| + 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, |
| + 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, |
| + .expected_ss = (u8[32]){ 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, |
| + 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, |
| + 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, |
| + 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, |
| + .expected_ss = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, |
| + 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, |
| + 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, |
| + 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, |
| + .expected_ss = (u8[32]){ 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, |
| + 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, |
| + 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, |
| + 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, |
| + .expected_ss = (u8[32]){ 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, |
| + 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, |
| + 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, |
| + 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, |
| + .expected_ss = (u8[32]){ 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, |
| + 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, |
| + 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, |
| + 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, |
| + .expected_ss = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, |
| + 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, |
| + 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, |
| + 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, |
| + .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - edge case for shared secret */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .b_public = (u8[32]){ 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, |
| + 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, |
| + 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, |
| + 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, |
| + .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - checking for overflow */ |
| +{ |
| + .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .b_public = (u8[32]){ 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, |
| + 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, |
| + 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, |
| + 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, |
| + .expected_ss = (u8[32]){ 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, |
| + 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, |
| + 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, |
| + 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - checking for overflow */ |
| +{ |
| + .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .b_public = (u8[32]){ 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, |
| + 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, |
| + 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, |
| + 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, |
| + .expected_ss = (u8[32]){ 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, |
| + 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, |
| + 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, |
| + 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - checking for overflow */ |
| +{ |
| + .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .b_public = (u8[32]){ 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, |
| + 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, |
| + 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, |
| + 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, |
| + .expected_ss = (u8[32]){ 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, |
| + 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, |
| + 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, |
| + 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - checking for overflow */ |
| +{ |
| + .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .b_public = (u8[32]){ 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, |
| + 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, |
| + 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, |
| + 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, |
| + .expected_ss = (u8[32]){ 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, |
| + 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, |
| + 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, |
| + 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - checking for overflow */ |
| +{ |
| + .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .b_public = (u8[32]){ 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, |
| + 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, |
| + 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, |
| + 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, |
| + .expected_ss = (u8[32]){ 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, |
| + 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, |
| + 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, |
| + 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - private key == -1 (mod order) */ |
| +{ |
| + .secret = (u8[32]){ 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, |
| + 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, |
| + .b_public = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, |
| + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, |
| + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, |
| + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, |
| + .expected_ss = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, |
| + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, |
| + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, |
| + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +}, |
| +/* wycheproof - private key == 1 (mod order) on twist */ |
| +{ |
| + .secret = (u8[32]){ 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, |
| + 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, |
| + .b_public = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, |
| + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, |
| + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, |
| + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, |
| + .expected_ss = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, |
| + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, |
| + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, |
| + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, |
| + .secret_size = 32, |
| + .b_public_size = 32, |
| + .expected_ss_size = 32, |
| + |
| +} |
| +}; |
| + |
| static const struct kpp_testvec ecdh_tv_template[] = { |
| { |
| #ifndef CONFIG_CRYPTO_FIPS |
| -- |
| 2.18.2 |
| |
| |
| From 19e6bba9e31ff0202097c57d184ba73eebf01980 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:34 +0100 |
| Subject: [PATCH 027/100] crypto: curve25519 - implement generic KPP driver |
| |
| commit ee772cb641135739c1530647391d5a04c39db192 upstream. |
| |
| Expose the generic Curve25519 library via the crypto API KPP interface. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/Kconfig | 5 +++ |
| crypto/Makefile | 1 + |
| crypto/curve25519-generic.c | 90 +++++++++++++++++++++++++++++++++++++ |
| 3 files changed, 96 insertions(+) |
| create mode 100644 crypto/curve25519-generic.c |
| |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index 8fd3954bf64c..a3fc859830c1 100644 |
| |
| |
| @@ -264,6 +264,11 @@ config CRYPTO_ECRDSA |
| standard algorithms (called GOST algorithms). Only signature verification |
| is implemented. |
| |
| +config CRYPTO_CURVE25519 |
| + tristate "Curve25519 algorithm" |
| + select CRYPTO_KPP |
| + select CRYPTO_LIB_CURVE25519_GENERIC |
| + |
| comment "Authenticated Encryption with Associated Data" |
| |
| config CRYPTO_CCM |
| diff --git a/crypto/Makefile b/crypto/Makefile |
| index fd27edea7c8e..4e7a0a8f7e35 100644 |
| |
| |
| @@ -167,6 +167,7 @@ obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o |
| obj-$(CONFIG_CRYPTO_OFB) += ofb.o |
| obj-$(CONFIG_CRYPTO_ECC) += ecc.o |
| obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o |
| +obj-$(CONFIG_CRYPTO_CURVE25519) += curve25519-generic.o |
| |
| ecdh_generic-y += ecdh.o |
| ecdh_generic-y += ecdh_helper.o |
| diff --git a/crypto/curve25519-generic.c b/crypto/curve25519-generic.c |
| new file mode 100644 |
| index 000000000000..bd88fd571393 |
| |
| |
| @@ -0,0 +1,90 @@ |
| +// SPDX-License-Identifier: GPL-2.0-or-later |
| + |
| +#include <crypto/curve25519.h> |
| +#include <crypto/internal/kpp.h> |
| +#include <crypto/kpp.h> |
| +#include <linux/module.h> |
| +#include <linux/scatterlist.h> |
| + |
| +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, |
| + unsigned int len) |
| +{ |
| + u8 *secret = kpp_tfm_ctx(tfm); |
| + |
| + if (!len) |
| + curve25519_generate_secret(secret); |
| + else if (len == CURVE25519_KEY_SIZE && |
| + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) |
| + memcpy(secret, buf, CURVE25519_KEY_SIZE); |
| + else |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static int curve25519_compute_value(struct kpp_request *req) |
| +{ |
| + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); |
| + const u8 *secret = kpp_tfm_ctx(tfm); |
| + u8 public_key[CURVE25519_KEY_SIZE]; |
| + u8 buf[CURVE25519_KEY_SIZE]; |
| + int copied, nbytes; |
| + u8 const *bp; |
| + |
| + if (req->src) { |
| + copied = sg_copy_to_buffer(req->src, |
| + sg_nents_for_len(req->src, |
| + CURVE25519_KEY_SIZE), |
| + public_key, CURVE25519_KEY_SIZE); |
| + if (copied != CURVE25519_KEY_SIZE) |
| + return -EINVAL; |
| + bp = public_key; |
| + } else { |
| + bp = curve25519_base_point; |
| + } |
| + |
| + curve25519_generic(buf, secret, bp); |
| + |
| + /* might want less than we've got */ |
| + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); |
| + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, |
| + nbytes), |
| + buf, nbytes); |
| + if (copied != nbytes) |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) |
| +{ |
| + return CURVE25519_KEY_SIZE; |
| +} |
| + |
| +static struct kpp_alg curve25519_alg = { |
| + .base.cra_name = "curve25519", |
| + .base.cra_driver_name = "curve25519-generic", |
| + .base.cra_priority = 100, |
| + .base.cra_module = THIS_MODULE, |
| + .base.cra_ctxsize = CURVE25519_KEY_SIZE, |
| + |
| + .set_secret = curve25519_set_secret, |
| + .generate_public_key = curve25519_compute_value, |
| + .compute_shared_secret = curve25519_compute_value, |
| + .max_size = curve25519_max_size, |
| +}; |
| + |
| +static int curve25519_init(void) |
| +{ |
| + return crypto_register_kpp(&curve25519_alg); |
| +} |
| + |
| +static void curve25519_exit(void) |
| +{ |
| + crypto_unregister_kpp(&curve25519_alg); |
| +} |
| + |
| +subsys_initcall(curve25519_init); |
| +module_exit(curve25519_exit); |
| + |
| +MODULE_ALIAS_CRYPTO("curve25519"); |
| +MODULE_ALIAS_CRYPTO("curve25519-generic"); |
| +MODULE_LICENSE("GPL"); |
| -- |
| 2.18.2 |
| |
| |
| From 9564b9cc289e700a1cbdf08d1ab3f81ae442db9f Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:35 +0100 |
| Subject: [PATCH 028/100] crypto: lib/curve25519 - work around Clang stack |
| spilling issue |
| |
| commit 660bb8e1f833ea63185fe80fde847e3e42f18e3b upstream. |
| |
| Arnd reports that the 32-bit generic library code for Curve25119 ends |
| up using an excessive amount of stack space when built with Clang: |
| |
| lib/crypto/curve25519-fiat32.c:756:6: error: stack frame size |
| of 1384 bytes in function 'curve25519_generic' |
| [-Werror,-Wframe-larger-than=] |
| |
| Let's give some hints to the compiler regarding which routines should |
| not be inlined, to prevent it from running out of registers and spilling |
| to the stack. The resulting code performs identically under both GCC |
| and Clang, and makes the warning go away. |
| |
| Suggested-by: Arnd Bergmann <arnd@arndb.de> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| lib/crypto/curve25519-fiat32.c | 10 +++++----- |
| 1 file changed, 5 insertions(+), 5 deletions(-) |
| |
| diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c |
| index 1c455207341d..2fde0ec33dbd 100644 |
| |
| |
| @@ -223,7 +223,7 @@ static __always_inline void fe_1(fe *h) |
| h->v[0] = 1; |
| } |
| |
| -static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| +static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| { |
| { const u32 x20 = in1[9]; |
| { const u32 x21 = in1[8]; |
| @@ -266,7 +266,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) |
| fe_add_impl(h->v, f->v, g->v); |
| } |
| |
| -static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| +static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| { |
| { const u32 x20 = in1[9]; |
| { const u32 x21 = in1[8]; |
| @@ -309,7 +309,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) |
| fe_sub_impl(h->v, f->v, g->v); |
| } |
| |
| -static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| +static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) |
| { |
| { const u32 x20 = in1[9]; |
| { const u32 x21 = in1[8]; |
| @@ -441,7 +441,7 @@ fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) |
| fe_mul_impl(h->v, f->v, g->v); |
| } |
| |
| -static void fe_sqr_impl(u32 out[10], const u32 in1[10]) |
| +static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10]) |
| { |
| { const u32 x17 = in1[9]; |
| { const u32 x18 = in1[8]; |
| @@ -619,7 +619,7 @@ static __always_inline void fe_invert(fe *out, const fe *z) |
| * |
| * Preconditions: b in {0,1} |
| */ |
| -static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b) |
| +static noinline void fe_cswap(fe *f, fe *g, unsigned int b) |
| { |
| unsigned i; |
| b = 0 - b; |
| -- |
| 2.18.2 |
| |
| |
| From 02e323f7bc2dc013b79e8cb4d742c8f827598824 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:36 +0100 |
| Subject: [PATCH 029/100] crypto: curve25519 - x86_64 library and KPP |
| implementations |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit bb611bdfd6be34d9f822c73305fcc83720499d38 upstream. |
| |
| This implementation is the fastest available x86_64 implementation, and |
| unlike Sandy2x, it doesn't requie use of the floating point registers at |
| all. Instead it makes use of BMI2 and ADX, available on recent |
| microarchitectures. The implementation was written by Armando |
| Faz-Hernández with contributions (upstream) from Samuel Neves and me, |
| in addition to further changes in the kernel implementation from us. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Samuel Neves <sneves@dei.uc.pt> |
| Co-developed-by: Samuel Neves <sneves@dei.uc.pt> |
| [ardb: - move to arch/x86/crypto |
| - wire into lib/crypto framework |
| - implement crypto API KPP hooks ] |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/Makefile | 1 + |
| arch/x86/crypto/curve25519-x86_64.c | 2475 +++++++++++++++++++++++++++ |
| crypto/Kconfig | 6 + |
| 3 files changed, 2482 insertions(+) |
| create mode 100644 arch/x86/crypto/curve25519-x86_64.c |
| |
| diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile |
| index 922c8ecfa00f..958440eae27e 100644 |
| |
| |
| @@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o |
| |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o |
| +obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o |
| |
| # These modules require assembler to support AVX. |
| ifeq ($(avx_supported),yes) |
| diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c |
| new file mode 100644 |
| index 000000000000..a52a3fb15727 |
| |
| |
| @@ -0,0 +1,2475 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
| +/* |
| + * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved. |
| + * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. |
| + */ |
| + |
| +#include <crypto/curve25519.h> |
| +#include <crypto/internal/kpp.h> |
| + |
| +#include <linux/types.h> |
| +#include <linux/jump_label.h> |
| +#include <linux/kernel.h> |
| +#include <linux/module.h> |
| + |
| +#include <asm/cpufeature.h> |
| +#include <asm/processor.h> |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); |
| + |
| +enum { NUM_WORDS_ELTFP25519 = 4 }; |
| +typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; |
| +typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; |
| + |
| +#define mul_eltfp25519_1w_adx(c, a, b) do { \ |
| + mul_256x256_integer_adx(m.buffer, a, b); \ |
| + red_eltfp25519_1w_adx(c, m.buffer); \ |
| +} while (0) |
| + |
| +#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ |
| + mul_256x256_integer_bmi2(m.buffer, a, b); \ |
| + red_eltfp25519_1w_bmi2(c, m.buffer); \ |
| +} while (0) |
| + |
| +#define sqr_eltfp25519_1w_adx(a) do { \ |
| + sqr_256x256_integer_adx(m.buffer, a); \ |
| + red_eltfp25519_1w_adx(a, m.buffer); \ |
| +} while (0) |
| + |
| +#define sqr_eltfp25519_1w_bmi2(a) do { \ |
| + sqr_256x256_integer_bmi2(m.buffer, a); \ |
| + red_eltfp25519_1w_bmi2(a, m.buffer); \ |
| +} while (0) |
| + |
| +#define mul_eltfp25519_2w_adx(c, a, b) do { \ |
| + mul2_256x256_integer_adx(m.buffer, a, b); \ |
| + red_eltfp25519_2w_adx(c, m.buffer); \ |
| +} while (0) |
| + |
| +#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ |
| + mul2_256x256_integer_bmi2(m.buffer, a, b); \ |
| + red_eltfp25519_2w_bmi2(c, m.buffer); \ |
| +} while (0) |
| + |
| +#define sqr_eltfp25519_2w_adx(a) do { \ |
| + sqr2_256x256_integer_adx(m.buffer, a); \ |
| + red_eltfp25519_2w_adx(a, m.buffer); \ |
| +} while (0) |
| + |
| +#define sqr_eltfp25519_2w_bmi2(a) do { \ |
| + sqr2_256x256_integer_bmi2(m.buffer, a); \ |
| + red_eltfp25519_2w_bmi2(a, m.buffer); \ |
| +} while (0) |
| + |
| +#define sqrn_eltfp25519_1w_adx(a, times) do { \ |
| + int ____counter = (times); \ |
| + while (____counter-- > 0) \ |
| + sqr_eltfp25519_1w_adx(a); \ |
| +} while (0) |
| + |
| +#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ |
| + int ____counter = (times); \ |
| + while (____counter-- > 0) \ |
| + sqr_eltfp25519_1w_bmi2(a); \ |
| +} while (0) |
| + |
| +#define copy_eltfp25519_1w(C, A) do { \ |
| + (C)[0] = (A)[0]; \ |
| + (C)[1] = (A)[1]; \ |
| + (C)[2] = (A)[2]; \ |
| + (C)[3] = (A)[3]; \ |
| +} while (0) |
| + |
| +#define setzero_eltfp25519_1w(C) do { \ |
| + (C)[0] = 0; \ |
| + (C)[1] = 0; \ |
| + (C)[2] = 0; \ |
| + (C)[3] = 0; \ |
| +} while (0) |
| + |
| +__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { |
| + /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, |
| + 0xffffffffffffffffUL, 0x5fffffffffffffffUL, |
| + /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, |
| + 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, |
| + /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, |
| + 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, |
| + /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, |
| + 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, |
| + /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, |
| + 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, |
| + /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, |
| + 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, |
| + /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, |
| + 0xc1c20d06231f7614UL, 0x2938218da274f972UL, |
| + /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, |
| + 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, |
| + /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, |
| + 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, |
| + /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, |
| + 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, |
| + /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, |
| + 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, |
| + /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, |
| + 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, |
| + /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, |
| + 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, |
| + /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, |
| + 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, |
| + /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, |
| + 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, |
| + /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, |
| + 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, |
| + /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, |
| + 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, |
| + /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, |
| + 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, |
| + /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, |
| + 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, |
| + /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, |
| + 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, |
| + /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, |
| + 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, |
| + /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, |
| + 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, |
| + /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, |
| + 0x23758739f630a257UL, 0x295a407a01a78580UL, |
| + /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, |
| + 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, |
| + /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, |
| + 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, |
| + /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, |
| + 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, |
| + /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, |
| + 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, |
| + /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, |
| + 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, |
| + /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, |
| + 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, |
| + /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, |
| + 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, |
| + /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, |
| + 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, |
| + /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, |
| + 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, |
| + /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, |
| + 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, |
| + /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, |
| + 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, |
| + /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, |
| + 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, |
| + /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, |
| + 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, |
| + /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, |
| + 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, |
| + /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, |
| + 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, |
| + /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, |
| + 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, |
| + /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, |
| + 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, |
| + /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, |
| + 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, |
| + /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, |
| + 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, |
| + /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, |
| + 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, |
| + /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, |
| + 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, |
| + /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, |
| + 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, |
| + /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, |
| + 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, |
| + /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, |
| + 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, |
| + /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, |
| + 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, |
| + /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, |
| + 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, |
| + /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, |
| + 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, |
| + /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, |
| + 0xc189218075e91436UL, 0x6d9284169b3b8484UL, |
| + /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, |
| + 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, |
| + /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, |
| + 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, |
| + /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, |
| + 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, |
| + /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, |
| + 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, |
| + /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, |
| + 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, |
| + /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, |
| + 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, |
| + /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, |
| + 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, |
| + /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, |
| + 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, |
| + /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, |
| + 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, |
| + /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, |
| + 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, |
| + /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, |
| + 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, |
| + /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, |
| + 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, |
| + /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, |
| + 0x25232973322dbef4UL, 0x445dc4758c17f770UL, |
| + /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, |
| + 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, |
| + /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, |
| + 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, |
| + /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, |
| + 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, |
| + /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, |
| + 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, |
| + /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, |
| + 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, |
| + /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, |
| + 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, |
| + /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, |
| + 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, |
| + /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, |
| + 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, |
| + /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, |
| + 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, |
| + /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, |
| + 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, |
| + /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, |
| + 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, |
| + /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, |
| + 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, |
| + /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, |
| + 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, |
| + /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, |
| + 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, |
| + /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, |
| + 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, |
| + /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, |
| + 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, |
| + /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, |
| + 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, |
| + /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, |
| + 0x894d1d855ae52359UL, 0x68e122157b743d69UL, |
| + /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, |
| + 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, |
| + /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, |
| + 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, |
| + /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, |
| + 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, |
| + /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, |
| + 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, |
| + /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, |
| + 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, |
| + /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, |
| + 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, |
| + /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, |
| + 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, |
| + /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, |
| + 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, |
| + /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, |
| + 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, |
| + /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, |
| + 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, |
| + /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, |
| + 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, |
| + /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, |
| + 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, |
| + /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, |
| + 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, |
| + /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, |
| + 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, |
| + /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, |
| + 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, |
| + /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, |
| + 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, |
| + /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, |
| + 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, |
| + /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, |
| + 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, |
| + /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, |
| + 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, |
| + /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, |
| + 0x4a497962066e6043UL, 0x705b3aab41355b44UL, |
| + /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, |
| + 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, |
| + /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, |
| + 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, |
| + /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, |
| + 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, |
| + /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, |
| + 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, |
| + /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, |
| + 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, |
| + /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, |
| + 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, |
| + /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, |
| + 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, |
| + /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, |
| + 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, |
| + /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, |
| + 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, |
| + /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, |
| + 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, |
| + /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, |
| + 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, |
| + /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, |
| + 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, |
| + /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, |
| + 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, |
| + /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, |
| + 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, |
| + /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, |
| + 0x508e862f121692fcUL, 0x3a81907fa093c291UL, |
| + /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, |
| + 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, |
| + /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, |
| + 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, |
| + /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, |
| + 0xe488de11d761e352UL, 0x0e878a01a085545cUL, |
| + /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, |
| + 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, |
| + /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, |
| + 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, |
| + /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, |
| + 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, |
| + /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, |
| + 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, |
| + /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, |
| + 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, |
| + /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, |
| + 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, |
| + /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, |
| + 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, |
| + /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, |
| + 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, |
| + /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, |
| + 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, |
| + /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, |
| + 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, |
| + /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, |
| + 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, |
| + /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, |
| + 0x266fd5809208f294UL, 0x5c847085619a26b9UL, |
| + /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, |
| + 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, |
| + /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, |
| + 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, |
| + /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, |
| + 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, |
| + /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, |
| + 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, |
| + /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, |
| + 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, |
| + /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, |
| + 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, |
| + /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, |
| + 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, |
| + /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, |
| + 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, |
| + /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, |
| + 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, |
| + /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, |
| + 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, |
| + /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, |
| + 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, |
| + /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, |
| + 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, |
| + /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, |
| + 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, |
| + /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, |
| + 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, |
| + /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, |
| + 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, |
| + /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, |
| + 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, |
| + /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, |
| + 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, |
| + /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, |
| + 0x52d17436309d4253UL, 0x356f97e13efae576UL, |
| + /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, |
| + 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, |
| + /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, |
| + 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, |
| + /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, |
| + 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, |
| + /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, |
| + 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, |
| + /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, |
| + 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, |
| + /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, |
| + 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, |
| + /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, |
| + 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, |
| + /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, |
| + 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, |
| + /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, |
| + 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, |
| + /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, |
| + 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, |
| + /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, |
| + 0x497d723f802e88e1UL, 0x30684dea602f408dUL, |
| + /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, |
| + 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, |
| + /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, |
| + 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, |
| + /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, |
| + 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, |
| + /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, |
| + 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, |
| + /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, |
| + 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, |
| + /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, |
| + 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, |
| + /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, |
| + 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, |
| + /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, |
| + 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, |
| + /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, |
| + 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, |
| + /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, |
| + 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, |
| + /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, |
| + 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, |
| + /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, |
| + 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, |
| + /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, |
| + 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, |
| + /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, |
| + 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, |
| + /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, |
| + 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, |
| + /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, |
| + 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, |
| + /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, |
| + 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, |
| + /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, |
| + 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, |
| + /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, |
| + 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, |
| + /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, |
| + 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, |
| + /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, |
| + 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, |
| + /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, |
| + 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, |
| + /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, |
| + 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, |
| + /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, |
| + 0x81004b71e33cc191UL, 0x44e6be345122803cUL, |
| + /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, |
| + 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, |
| + /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, |
| + 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, |
| + /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, |
| + 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, |
| + /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, |
| + 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, |
| + /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, |
| + 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, |
| + /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, |
| + 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, |
| + /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, |
| + 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, |
| + /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, |
| + 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, |
| + /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, |
| + 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, |
| + /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, |
| + 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, |
| + /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, |
| + 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, |
| + /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, |
| + 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, |
| + /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, |
| + 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, |
| + /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, |
| + 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, |
| + /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, |
| + 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, |
| + /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, |
| + 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, |
| + /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, |
| + 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, |
| + /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, |
| + 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, |
| + /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, |
| + 0x33979624f0e917beUL, 0x2c018dc527356b30UL, |
| + /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, |
| + 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, |
| + /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, |
| + 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, |
| + /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, |
| + 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, |
| + /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, |
| + 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, |
| + /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, |
| + 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, |
| + /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, |
| + 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, |
| + /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, |
| + 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, |
| + /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, |
| + 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, |
| + /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, |
| + 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, |
| + /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, |
| + 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, |
| + /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, |
| + 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, |
| + /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, |
| + 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, |
| + /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, |
| + 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, |
| + /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, |
| + 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, |
| + /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, |
| + 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, |
| + /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, |
| + 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, |
| + /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, |
| + 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, |
| + /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, |
| + 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, |
| + /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, |
| + 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, |
| + /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, |
| + 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, |
| + /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, |
| + 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, |
| + /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, |
| + 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, |
| + /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, |
| + 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, |
| + /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, |
| + 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, |
| + /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, |
| + 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, |
| + /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, |
| + 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, |
| + /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, |
| + 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, |
| + /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, |
| + 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, |
| + /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, |
| + 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, |
| + /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, |
| + 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, |
| + /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, |
| + 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, |
| + /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, |
| + 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, |
| + /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, |
| + 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, |
| + /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, |
| + 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, |
| + /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, |
| + 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, |
| + /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, |
| + 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, |
| + /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, |
| + 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, |
| + /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, |
| + 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, |
| + /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, |
| + 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, |
| + /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, |
| + 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, |
| + /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, |
| + 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, |
| + /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, |
| + 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, |
| + /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, |
| + 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, |
| + /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, |
| + 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, |
| + /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, |
| + 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, |
| + /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, |
| + 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, |
| + /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, |
| + 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, |
| + /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, |
| + 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL |
| +}; |
| + |
| +/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] |
| + * a is two 256-bit integers: a0[0:3] and a1[4:7] |
| + * b is two 256-bit integers: b0[0:3] and b1[4:7] |
| + */ |
| +static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, |
| + const u64 *const b) |
| +{ |
| + asm volatile( |
| + "xorl %%r14d, %%r14d ;" |
| + "movq (%1), %%rdx; " /* A[0] */ |
| + "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "movq %%r8, (%0) ;" |
| + "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ |
| + "adox %%r10, %%r15 ;" |
| + "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ |
| + "adox %%r8, %%rax ;" |
| + "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ |
| + "adox %%r10, %%rbx ;" |
| + /******************************************/ |
| + "adox %%r14, %%rcx ;" |
| + |
| + "movq 8(%1), %%rdx; " /* A[1] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| + "adox %%r15, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| + "adox %%r10, %%r9 ;" |
| + "adcx %%r9, %%rax ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ |
| + "adox %%r8, %%r11 ;" |
| + "adcx %%r11, %%rbx ;" |
| + "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ |
| + "adox %%r10, %%r13 ;" |
| + "adcx %%r13, %%rcx ;" |
| + /******************************************/ |
| + "adox %%r14, %%r15 ;" |
| + "adcx %%r14, %%r15 ;" |
| + |
| + "movq 16(%1), %%rdx; " /* A[2] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| + "adox %%rax, %%r8 ;" |
| + "movq %%r8, 16(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| + "adox %%r10, %%r9 ;" |
| + "adcx %%r9, %%rbx ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ |
| + "adox %%r8, %%r11 ;" |
| + "adcx %%r11, %%rcx ;" |
| + "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ |
| + "adox %%r10, %%r13 ;" |
| + "adcx %%r13, %%r15 ;" |
| + /******************************************/ |
| + "adox %%r14, %%rax ;" |
| + "adcx %%r14, %%rax ;" |
| + |
| + "movq 24(%1), %%rdx; " /* A[3] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| + "adox %%rbx, %%r8 ;" |
| + "movq %%r8, 24(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| + "adox %%r10, %%r9 ;" |
| + "adcx %%r9, %%rcx ;" |
| + "movq %%rcx, 32(%0) ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ |
| + "adox %%r8, %%r11 ;" |
| + "adcx %%r11, %%r15 ;" |
| + "movq %%r15, 40(%0) ;" |
| + "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ |
| + "adox %%r10, %%r13 ;" |
| + "adcx %%r13, %%rax ;" |
| + "movq %%rax, 48(%0) ;" |
| + /******************************************/ |
| + "adox %%r14, %%rbx ;" |
| + "adcx %%r14, %%rbx ;" |
| + "movq %%rbx, 56(%0) ;" |
| + |
| + "movq 32(%1), %%rdx; " /* C[0] */ |
| + "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "movq %%r8, 64(%0);" |
| + "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ |
| + "adox %%r10, %%r15 ;" |
| + "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ |
| + "adox %%r8, %%rax ;" |
| + "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ |
| + "adox %%r10, %%rbx ;" |
| + /******************************************/ |
| + "adox %%r14, %%rcx ;" |
| + |
| + "movq 40(%1), %%rdx; " /* C[1] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ |
| + "adox %%r15, %%r8 ;" |
| + "movq %%r8, 72(%0);" |
| + "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ |
| + "adox %%r10, %%r9 ;" |
| + "adcx %%r9, %%rax ;" |
| + "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ |
| + "adox %%r8, %%r11 ;" |
| + "adcx %%r11, %%rbx ;" |
| + "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ |
| + "adox %%r10, %%r13 ;" |
| + "adcx %%r13, %%rcx ;" |
| + /******************************************/ |
| + "adox %%r14, %%r15 ;" |
| + "adcx %%r14, %%r15 ;" |
| + |
| + "movq 48(%1), %%rdx; " /* C[2] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ |
| + "adox %%rax, %%r8 ;" |
| + "movq %%r8, 80(%0);" |
| + "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ |
| + "adox %%r10, %%r9 ;" |
| + "adcx %%r9, %%rbx ;" |
| + "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ |
| + "adox %%r8, %%r11 ;" |
| + "adcx %%r11, %%rcx ;" |
| + "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ |
| + "adox %%r10, %%r13 ;" |
| + "adcx %%r13, %%r15 ;" |
| + /******************************************/ |
| + "adox %%r14, %%rax ;" |
| + "adcx %%r14, %%rax ;" |
| + |
| + "movq 56(%1), %%rdx; " /* C[3] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ |
| + "adox %%rbx, %%r8 ;" |
| + "movq %%r8, 88(%0);" |
| + "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ |
| + "adox %%r10, %%r9 ;" |
| + "adcx %%r9, %%rcx ;" |
| + "movq %%rcx, 96(%0) ;" |
| + "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ |
| + "adox %%r8, %%r11 ;" |
| + "adcx %%r11, %%r15 ;" |
| + "movq %%r15, 104(%0) ;" |
| + "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ |
| + "adox %%r10, %%r13 ;" |
| + "adcx %%r13, %%rax ;" |
| + "movq %%rax, 112(%0) ;" |
| + /******************************************/ |
| + "adox %%r14, %%rbx ;" |
| + "adcx %%r14, %%rbx ;" |
| + "movq %%rbx, 120(%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11", "%r13", "%r14", "%r15"); |
| +} |
| + |
| +static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, |
| + const u64 *const b) |
| +{ |
| + asm volatile( |
| + "movq (%1), %%rdx; " /* A[0] */ |
| + "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ |
| + "movq %%r8, (%0) ;" |
| + "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ |
| + "addq %%r10, %%r15 ;" |
| + "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ |
| + "adcq %%r8, %%rax ;" |
| + "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ |
| + "adcq %%r10, %%rbx ;" |
| + /******************************************/ |
| + "adcq $0, %%rcx ;" |
| + |
| + "movq 8(%1), %%rdx; " /* A[1] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| + "addq %%r15, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%r15 ;" |
| + |
| + "addq %%r9, %%rax ;" |
| + "adcq %%r11, %%rbx ;" |
| + "adcq %%r13, %%rcx ;" |
| + "adcq $0, %%r15 ;" |
| + |
| + "movq 16(%1), %%rdx; " /* A[2] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| + "addq %%rax, %%r8 ;" |
| + "movq %%r8, 16(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%rax ;" |
| + |
| + "addq %%r9, %%rbx ;" |
| + "adcq %%r11, %%rcx ;" |
| + "adcq %%r13, %%r15 ;" |
| + "adcq $0, %%rax ;" |
| + |
| + "movq 24(%1), %%rdx; " /* A[3] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| + "addq %%rbx, %%r8 ;" |
| + "movq %%r8, 24(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%rbx ;" |
| + |
| + "addq %%r9, %%rcx ;" |
| + "movq %%rcx, 32(%0) ;" |
| + "adcq %%r11, %%r15 ;" |
| + "movq %%r15, 40(%0) ;" |
| + "adcq %%r13, %%rax ;" |
| + "movq %%rax, 48(%0) ;" |
| + "adcq $0, %%rbx ;" |
| + "movq %%rbx, 56(%0) ;" |
| + |
| + "movq 32(%1), %%rdx; " /* C[0] */ |
| + "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ |
| + "movq %%r8, 64(%0) ;" |
| + "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ |
| + "addq %%r10, %%r15 ;" |
| + "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ |
| + "adcq %%r8, %%rax ;" |
| + "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ |
| + "adcq %%r10, %%rbx ;" |
| + /******************************************/ |
| + "adcq $0, %%rcx ;" |
| + |
| + "movq 40(%1), %%rdx; " /* C[1] */ |
| + "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ |
| + "addq %%r15, %%r8 ;" |
| + "movq %%r8, 72(%0) ;" |
| + "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%r15 ;" |
| + |
| + "addq %%r9, %%rax ;" |
| + "adcq %%r11, %%rbx ;" |
| + "adcq %%r13, %%rcx ;" |
| + "adcq $0, %%r15 ;" |
| + |
| + "movq 48(%1), %%rdx; " /* C[2] */ |
| + "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ |
| + "addq %%rax, %%r8 ;" |
| + "movq %%r8, 80(%0) ;" |
| + "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%rax ;" |
| + |
| + "addq %%r9, %%rbx ;" |
| + "adcq %%r11, %%rcx ;" |
| + "adcq %%r13, %%r15 ;" |
| + "adcq $0, %%rax ;" |
| + |
| + "movq 56(%1), %%rdx; " /* C[3] */ |
| + "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ |
| + "addq %%rbx, %%r8 ;" |
| + "movq %%r8, 88(%0) ;" |
| + "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%rbx ;" |
| + |
| + "addq %%r9, %%rcx ;" |
| + "movq %%rcx, 96(%0) ;" |
| + "adcq %%r11, %%r15 ;" |
| + "movq %%r15, 104(%0) ;" |
| + "adcq %%r13, %%rax ;" |
| + "movq %%rax, 112(%0) ;" |
| + "adcq $0, %%rbx ;" |
| + "movq %%rbx, 120(%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11", "%r13", "%r15"); |
| +} |
| + |
| +static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movq (%1), %%rdx ;" /* A[0] */ |
| + "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ |
| + "xorl %%r15d, %%r15d;" |
| + "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ |
| + "adcx %%r14, %%r9 ;" |
| + "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ |
| + "adcx %%rax, %%r10 ;" |
| + "movq 24(%1), %%rdx ;" /* A[3] */ |
| + "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ |
| + "adcx %%rcx, %%r11 ;" |
| + "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ |
| + "adcx %%rax, %%rbx ;" |
| + "movq 8(%1), %%rdx ;" /* A[1] */ |
| + "adcx %%r15, %%r13 ;" |
| + "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ |
| + "movq $0, %%r14 ;" |
| + /******************************************/ |
| + "adcx %%r15, %%r14 ;" |
| + |
| + "xorl %%r15d, %%r15d;" |
| + "adox %%rax, %%r10 ;" |
| + "adcx %%r8, %%r8 ;" |
| + "adox %%rcx, %%r11 ;" |
| + "adcx %%r9, %%r9 ;" |
| + "adox %%r15, %%rbx ;" |
| + "adcx %%r10, %%r10 ;" |
| + "adox %%r15, %%r13 ;" |
| + "adcx %%r11, %%r11 ;" |
| + "adox %%r15, %%r14 ;" |
| + "adcx %%rbx, %%rbx ;" |
| + "adcx %%r13, %%r13 ;" |
| + "adcx %%r14, %%r14 ;" |
| + |
| + "movq (%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ |
| + /*******************/ |
| + "movq %%rax, 0(%0) ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "movq 8(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ |
| + "adcq %%rax, %%r9 ;" |
| + "movq %%r9, 16(%0) ;" |
| + "adcq %%rcx, %%r10 ;" |
| + "movq %%r10, 24(%0) ;" |
| + "movq 16(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ |
| + "adcq %%rax, %%r11 ;" |
| + "movq %%r11, 32(%0) ;" |
| + "adcq %%rcx, %%rbx ;" |
| + "movq %%rbx, 40(%0) ;" |
| + "movq 24(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ |
| + "adcq %%rax, %%r13 ;" |
| + "movq %%r13, 48(%0) ;" |
| + "adcq %%rcx, %%r14 ;" |
| + "movq %%r14, 56(%0) ;" |
| + |
| + |
| + "movq 32(%1), %%rdx ;" /* B[0] */ |
| + "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ |
| + "xorl %%r15d, %%r15d;" |
| + "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ |
| + "adcx %%r14, %%r9 ;" |
| + "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ |
| + "adcx %%rax, %%r10 ;" |
| + "movq 56(%1), %%rdx ;" /* B[3] */ |
| + "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ |
| + "adcx %%rcx, %%r11 ;" |
| + "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ |
| + "adcx %%rax, %%rbx ;" |
| + "movq 40(%1), %%rdx ;" /* B[1] */ |
| + "adcx %%r15, %%r13 ;" |
| + "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ |
| + "movq $0, %%r14 ;" |
| + /******************************************/ |
| + "adcx %%r15, %%r14 ;" |
| + |
| + "xorl %%r15d, %%r15d;" |
| + "adox %%rax, %%r10 ;" |
| + "adcx %%r8, %%r8 ;" |
| + "adox %%rcx, %%r11 ;" |
| + "adcx %%r9, %%r9 ;" |
| + "adox %%r15, %%rbx ;" |
| + "adcx %%r10, %%r10 ;" |
| + "adox %%r15, %%r13 ;" |
| + "adcx %%r11, %%r11 ;" |
| + "adox %%r15, %%r14 ;" |
| + "adcx %%rbx, %%rbx ;" |
| + "adcx %%r13, %%r13 ;" |
| + "adcx %%r14, %%r14 ;" |
| + |
| + "movq 32(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ |
| + /*******************/ |
| + "movq %%rax, 64(%0) ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 72(%0) ;" |
| + "movq 40(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ |
| + "adcq %%rax, %%r9 ;" |
| + "movq %%r9, 80(%0) ;" |
| + "adcq %%rcx, %%r10 ;" |
| + "movq %%r10, 88(%0) ;" |
| + "movq 48(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ |
| + "adcq %%rax, %%r11 ;" |
| + "movq %%r11, 96(%0) ;" |
| + "adcq %%rcx, %%rbx ;" |
| + "movq %%rbx, 104(%0) ;" |
| + "movq 56(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ |
| + "adcq %%rax, %%r13 ;" |
| + "movq %%r13, 112(%0) ;" |
| + "adcq %%rcx, %%r14 ;" |
| + "movq %%r14, 120(%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11", "%r13", "%r14", "%r15"); |
| +} |
| + |
| +static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movq 8(%1), %%rdx ;" /* A[1] */ |
| + "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ |
| + "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ |
| + "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ |
| + |
| + "movq 16(%1), %%rdx ;" /* A[2] */ |
| + "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ |
| + "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ |
| + |
| + "addq %%rax, %%r9 ;" |
| + "adcq %%rdx, %%r10 ;" |
| + "adcq %%rcx, %%r11 ;" |
| + "adcq %%r14, %%r15 ;" |
| + "adcq $0, %%r13 ;" |
| + "movq $0, %%r14 ;" |
| + "adcq $0, %%r14 ;" |
| + |
| + "movq (%1), %%rdx ;" /* A[0] */ |
| + "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ |
| + |
| + "addq %%rax, %%r10 ;" |
| + "adcq %%rcx, %%r11 ;" |
| + "adcq $0, %%r15 ;" |
| + "adcq $0, %%r13 ;" |
| + "adcq $0, %%r14 ;" |
| + |
| + "shldq $1, %%r13, %%r14 ;" |
| + "shldq $1, %%r15, %%r13 ;" |
| + "shldq $1, %%r11, %%r15 ;" |
| + "shldq $1, %%r10, %%r11 ;" |
| + "shldq $1, %%r9, %%r10 ;" |
| + "shldq $1, %%r8, %%r9 ;" |
| + "shlq $1, %%r8 ;" |
| + |
| + /*******************/ |
| + "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ |
| + /*******************/ |
| + "movq %%rax, 0(%0) ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "movq 8(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ |
| + "adcq %%rax, %%r9 ;" |
| + "movq %%r9, 16(%0) ;" |
| + "adcq %%rcx, %%r10 ;" |
| + "movq %%r10, 24(%0) ;" |
| + "movq 16(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ |
| + "adcq %%rax, %%r11 ;" |
| + "movq %%r11, 32(%0) ;" |
| + "adcq %%rcx, %%r15 ;" |
| + "movq %%r15, 40(%0) ;" |
| + "movq 24(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ |
| + "adcq %%rax, %%r13 ;" |
| + "movq %%r13, 48(%0) ;" |
| + "adcq %%rcx, %%r14 ;" |
| + "movq %%r14, 56(%0) ;" |
| + |
| + "movq 40(%1), %%rdx ;" /* B[1] */ |
| + "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ |
| + "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ |
| + "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ |
| + |
| + "movq 48(%1), %%rdx ;" /* B[2] */ |
| + "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ |
| + "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ |
| + |
| + "addq %%rax, %%r9 ;" |
| + "adcq %%rdx, %%r10 ;" |
| + "adcq %%rcx, %%r11 ;" |
| + "adcq %%r14, %%r15 ;" |
| + "adcq $0, %%r13 ;" |
| + "movq $0, %%r14 ;" |
| + "adcq $0, %%r14 ;" |
| + |
| + "movq 32(%1), %%rdx ;" /* B[0] */ |
| + "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ |
| + |
| + "addq %%rax, %%r10 ;" |
| + "adcq %%rcx, %%r11 ;" |
| + "adcq $0, %%r15 ;" |
| + "adcq $0, %%r13 ;" |
| + "adcq $0, %%r14 ;" |
| + |
| + "shldq $1, %%r13, %%r14 ;" |
| + "shldq $1, %%r15, %%r13 ;" |
| + "shldq $1, %%r11, %%r15 ;" |
| + "shldq $1, %%r10, %%r11 ;" |
| + "shldq $1, %%r9, %%r10 ;" |
| + "shldq $1, %%r8, %%r9 ;" |
| + "shlq $1, %%r8 ;" |
| + |
| + /*******************/ |
| + "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ |
| + /*******************/ |
| + "movq %%rax, 64(%0) ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 72(%0) ;" |
| + "movq 40(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ |
| + "adcq %%rax, %%r9 ;" |
| + "movq %%r9, 80(%0) ;" |
| + "adcq %%rcx, %%r10 ;" |
| + "movq %%r10, 88(%0) ;" |
| + "movq 48(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ |
| + "adcq %%rax, %%r11 ;" |
| + "movq %%r11, 96(%0) ;" |
| + "adcq %%rcx, %%r15 ;" |
| + "movq %%r15, 104(%0) ;" |
| + "movq 56(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ |
| + "adcq %%rax, %%r13 ;" |
| + "movq %%r13, 112(%0) ;" |
| + "adcq %%rcx, %%r14 ;" |
| + "movq %%r14, 120(%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| + "%r11", "%r13", "%r14", "%r15"); |
| +} |
| + |
| +static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ |
| + "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ |
| + "xorl %%ebx, %%ebx ;" |
| + "adox (%1), %%r8 ;" |
| + "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ |
| + "adcx %%r10, %%r9 ;" |
| + "adox 8(%1), %%r9 ;" |
| + "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ |
| + "adcx %%r11, %%r10 ;" |
| + "adox 16(%1), %%r10 ;" |
| + "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ |
| + "adcx %%rax, %%r11 ;" |
| + "adox 24(%1), %%r11 ;" |
| + /***************************************/ |
| + "adcx %%rbx, %%rcx ;" |
| + "adox %%rbx, %%rcx ;" |
| + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ |
| + "adcx %%rcx, %%r8 ;" |
| + "adcx %%rbx, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcx %%rbx, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcx %%rbx, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + |
| + "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ |
| + "xorl %%ebx, %%ebx ;" |
| + "adox 64(%1), %%r8 ;" |
| + "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ |
| + "adcx %%r10, %%r9 ;" |
| + "adox 72(%1), %%r9 ;" |
| + "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ |
| + "adcx %%r11, %%r10 ;" |
| + "adox 80(%1), %%r10 ;" |
| + "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ |
| + "adcx %%rax, %%r11 ;" |
| + "adox 88(%1), %%r11 ;" |
| + /****************************************/ |
| + "adcx %%rbx, %%rcx ;" |
| + "adox %%rbx, %%rcx ;" |
| + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ |
| + "adcx %%rcx, %%r8 ;" |
| + "adcx %%rbx, %%r9 ;" |
| + "movq %%r9, 40(%0) ;" |
| + "adcx %%rbx, %%r10 ;" |
| + "movq %%r10, 48(%0) ;" |
| + "adcx %%rbx, %%r11 ;" |
| + "movq %%r11, 56(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 32(%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11"); |
| +} |
| + |
| +static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ |
| + "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| + "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| + "addq %%r10, %%r9 ;" |
| + "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ |
| + "adcq %%r11, %%r10 ;" |
| + "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| + "adcq %%rax, %%r11 ;" |
| + /***************************************/ |
| + "adcq $0, %%rcx ;" |
| + "addq (%1), %%r8 ;" |
| + "adcq 8(%1), %%r9 ;" |
| + "adcq 16(%1), %%r10 ;" |
| + "adcq 24(%1), %%r11 ;" |
| + "adcq $0, %%rcx ;" |
| + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ |
| + "addq %%rcx, %%r8 ;" |
| + "adcq $0, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcq $0, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcq $0, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + |
| + "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| + "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| + "addq %%r10, %%r9 ;" |
| + "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ |
| + "adcq %%r11, %%r10 ;" |
| + "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| + "adcq %%rax, %%r11 ;" |
| + /****************************************/ |
| + "adcq $0, %%rcx ;" |
| + "addq 64(%1), %%r8 ;" |
| + "adcq 72(%1), %%r9 ;" |
| + "adcq 80(%1), %%r10 ;" |
| + "adcq 88(%1), %%r11 ;" |
| + "adcq $0, %%rcx ;" |
| + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ |
| + "addq %%rcx, %%r8 ;" |
| + "adcq $0, %%r9 ;" |
| + "movq %%r9, 40(%0) ;" |
| + "adcq $0, %%r10 ;" |
| + "movq %%r10, 48(%0) ;" |
| + "adcq $0, %%r11 ;" |
| + "movq %%r11, 56(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 32(%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| + "%r11"); |
| +} |
| + |
| +static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, |
| + const u64 *const b) |
| +{ |
| + asm volatile( |
| + "movq (%1), %%rdx; " /* A[0] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "movq %%r8, (%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ |
| + "adox %%r9, %%r10 ;" |
| + "movq %%r10, 8(%0) ;" |
| + "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ |
| + "adox %%r11, %%r15 ;" |
| + "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ |
| + "adox %%r13, %%r14 ;" |
| + "movq $0, %%rax ;" |
| + /******************************************/ |
| + "adox %%rdx, %%rax ;" |
| + |
| + "movq 8(%1), %%rdx; " /* A[1] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "adcx 8(%0), %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| + "adox %%r9, %%r10 ;" |
| + "adcx %%r15, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ |
| + "adox %%r11, %%r15 ;" |
| + "adcx %%r14, %%r15 ;" |
| + "movq $0, %%r8 ;" |
| + "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ |
| + "adox %%r13, %%r14 ;" |
| + "adcx %%rax, %%r14 ;" |
| + "movq $0, %%rax ;" |
| + /******************************************/ |
| + "adox %%rdx, %%rax ;" |
| + "adcx %%r8, %%rax ;" |
| + |
| + "movq 16(%1), %%rdx; " /* A[2] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "adcx 16(%0), %%r8 ;" |
| + "movq %%r8, 16(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| + "adox %%r9, %%r10 ;" |
| + "adcx %%r15, %%r10 ;" |
| + "movq %%r10, 24(%0) ;" |
| + "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ |
| + "adox %%r11, %%r15 ;" |
| + "adcx %%r14, %%r15 ;" |
| + "movq $0, %%r8 ;" |
| + "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ |
| + "adox %%r13, %%r14 ;" |
| + "adcx %%rax, %%r14 ;" |
| + "movq $0, %%rax ;" |
| + /******************************************/ |
| + "adox %%rdx, %%rax ;" |
| + "adcx %%r8, %%rax ;" |
| + |
| + "movq 24(%1), %%rdx; " /* A[3] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| + "xorl %%r10d, %%r10d ;" |
| + "adcx 24(%0), %%r8 ;" |
| + "movq %%r8, 24(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| + "adox %%r9, %%r10 ;" |
| + "adcx %%r15, %%r10 ;" |
| + "movq %%r10, 32(%0) ;" |
| + "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ |
| + "adox %%r11, %%r15 ;" |
| + "adcx %%r14, %%r15 ;" |
| + "movq %%r15, 40(%0) ;" |
| + "movq $0, %%r8 ;" |
| + "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ |
| + "adox %%r13, %%r14 ;" |
| + "adcx %%rax, %%r14 ;" |
| + "movq %%r14, 48(%0) ;" |
| + "movq $0, %%rax ;" |
| + /******************************************/ |
| + "adox %%rdx, %%rax ;" |
| + "adcx %%r8, %%rax ;" |
| + "movq %%rax, 56(%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", |
| + "%r13", "%r14", "%r15"); |
| +} |
| + |
| +static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, |
| + const u64 *const b) |
| +{ |
| + asm volatile( |
| + "movq (%1), %%rdx; " /* A[0] */ |
| + "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ |
| + "movq %%r8, (%0) ;" |
| + "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ |
| + "addq %%r10, %%r15 ;" |
| + "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ |
| + "adcq %%r8, %%rax ;" |
| + "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ |
| + "adcq %%r10, %%rbx ;" |
| + /******************************************/ |
| + "adcq $0, %%rcx ;" |
| + |
| + "movq 8(%1), %%rdx; " /* A[1] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| + "addq %%r15, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%r15 ;" |
| + |
| + "addq %%r9, %%rax ;" |
| + "adcq %%r11, %%rbx ;" |
| + "adcq %%r13, %%rcx ;" |
| + "adcq $0, %%r15 ;" |
| + |
| + "movq 16(%1), %%rdx; " /* A[2] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| + "addq %%rax, %%r8 ;" |
| + "movq %%r8, 16(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%rax ;" |
| + |
| + "addq %%r9, %%rbx ;" |
| + "adcq %%r11, %%rcx ;" |
| + "adcq %%r13, %%r15 ;" |
| + "adcq $0, %%rax ;" |
| + |
| + "movq 24(%1), %%rdx; " /* A[3] */ |
| + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| + "addq %%rbx, %%r8 ;" |
| + "movq %%r8, 24(%0) ;" |
| + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| + "adcq %%r10, %%r9 ;" |
| + "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ |
| + "adcq %%r8, %%r11 ;" |
| + "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ |
| + "adcq %%r10, %%r13 ;" |
| + /******************************************/ |
| + "adcq $0, %%rbx ;" |
| + |
| + "addq %%r9, %%rcx ;" |
| + "movq %%rcx, 32(%0) ;" |
| + "adcq %%r11, %%r15 ;" |
| + "movq %%r15, 40(%0) ;" |
| + "adcq %%r13, %%rax ;" |
| + "movq %%rax, 48(%0) ;" |
| + "adcq $0, %%rbx ;" |
| + "movq %%rbx, 56(%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11", "%r13", "%r15"); |
| +} |
| + |
| +static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movq (%1), %%rdx ;" /* A[0] */ |
| + "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ |
| + "xorl %%r15d, %%r15d;" |
| + "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ |
| + "adcx %%r14, %%r9 ;" |
| + "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ |
| + "adcx %%rax, %%r10 ;" |
| + "movq 24(%1), %%rdx ;" /* A[3] */ |
| + "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ |
| + "adcx %%rcx, %%r11 ;" |
| + "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ |
| + "adcx %%rax, %%rbx ;" |
| + "movq 8(%1), %%rdx ;" /* A[1] */ |
| + "adcx %%r15, %%r13 ;" |
| + "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ |
| + "movq $0, %%r14 ;" |
| + /******************************************/ |
| + "adcx %%r15, %%r14 ;" |
| + |
| + "xorl %%r15d, %%r15d;" |
| + "adox %%rax, %%r10 ;" |
| + "adcx %%r8, %%r8 ;" |
| + "adox %%rcx, %%r11 ;" |
| + "adcx %%r9, %%r9 ;" |
| + "adox %%r15, %%rbx ;" |
| + "adcx %%r10, %%r10 ;" |
| + "adox %%r15, %%r13 ;" |
| + "adcx %%r11, %%r11 ;" |
| + "adox %%r15, %%r14 ;" |
| + "adcx %%rbx, %%rbx ;" |
| + "adcx %%r13, %%r13 ;" |
| + "adcx %%r14, %%r14 ;" |
| + |
| + "movq (%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ |
| + /*******************/ |
| + "movq %%rax, 0(%0) ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "movq 8(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ |
| + "adcq %%rax, %%r9 ;" |
| + "movq %%r9, 16(%0) ;" |
| + "adcq %%rcx, %%r10 ;" |
| + "movq %%r10, 24(%0) ;" |
| + "movq 16(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ |
| + "adcq %%rax, %%r11 ;" |
| + "movq %%r11, 32(%0) ;" |
| + "adcq %%rcx, %%rbx ;" |
| + "movq %%rbx, 40(%0) ;" |
| + "movq 24(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ |
| + "adcq %%rax, %%r13 ;" |
| + "movq %%r13, 48(%0) ;" |
| + "adcq %%rcx, %%r14 ;" |
| + "movq %%r14, 56(%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11", "%r13", "%r14", "%r15"); |
| +} |
| + |
| +static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movq 8(%1), %%rdx ;" /* A[1] */ |
| + "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ |
| + "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ |
| + "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ |
| + |
| + "movq 16(%1), %%rdx ;" /* A[2] */ |
| + "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ |
| + "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ |
| + |
| + "addq %%rax, %%r9 ;" |
| + "adcq %%rdx, %%r10 ;" |
| + "adcq %%rcx, %%r11 ;" |
| + "adcq %%r14, %%r15 ;" |
| + "adcq $0, %%r13 ;" |
| + "movq $0, %%r14 ;" |
| + "adcq $0, %%r14 ;" |
| + |
| + "movq (%1), %%rdx ;" /* A[0] */ |
| + "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ |
| + |
| + "addq %%rax, %%r10 ;" |
| + "adcq %%rcx, %%r11 ;" |
| + "adcq $0, %%r15 ;" |
| + "adcq $0, %%r13 ;" |
| + "adcq $0, %%r14 ;" |
| + |
| + "shldq $1, %%r13, %%r14 ;" |
| + "shldq $1, %%r15, %%r13 ;" |
| + "shldq $1, %%r11, %%r15 ;" |
| + "shldq $1, %%r10, %%r11 ;" |
| + "shldq $1, %%r9, %%r10 ;" |
| + "shldq $1, %%r8, %%r9 ;" |
| + "shlq $1, %%r8 ;" |
| + |
| + /*******************/ |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ |
| + /*******************/ |
| + "movq %%rax, 0(%0) ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, 8(%0) ;" |
| + "movq 8(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ |
| + "adcq %%rax, %%r9 ;" |
| + "movq %%r9, 16(%0) ;" |
| + "adcq %%rcx, %%r10 ;" |
| + "movq %%r10, 24(%0) ;" |
| + "movq 16(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ |
| + "adcq %%rax, %%r11 ;" |
| + "movq %%r11, 32(%0) ;" |
| + "adcq %%rcx, %%r15 ;" |
| + "movq %%r15, 40(%0) ;" |
| + "movq 24(%1), %%rdx ;" |
| + "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ |
| + "adcq %%rax, %%r13 ;" |
| + "movq %%r13, 48(%0) ;" |
| + "adcq %%rcx, %%r14 ;" |
| + "movq %%r14, 56(%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| + "%r11", "%r13", "%r14", "%r15"); |
| +} |
| + |
| +static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ |
| + "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| + "xorl %%ebx, %%ebx ;" |
| + "adox (%1), %%r8 ;" |
| + "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| + "adcx %%r10, %%r9 ;" |
| + "adox 8(%1), %%r9 ;" |
| + "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ |
| + "adcx %%r11, %%r10 ;" |
| + "adox 16(%1), %%r10 ;" |
| + "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| + "adcx %%rax, %%r11 ;" |
| + "adox 24(%1), %%r11 ;" |
| + /***************************************/ |
| + "adcx %%rbx, %%rcx ;" |
| + "adox %%rbx, %%rcx ;" |
| + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ |
| + "adcx %%rcx, %%r8 ;" |
| + "adcx %%rbx, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcx %%rbx, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcx %%rbx, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| + "%r10", "%r11"); |
| +} |
| + |
| +static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) |
| +{ |
| + asm volatile( |
| + "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ |
| + "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| + "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| + "addq %%r10, %%r9 ;" |
| + "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ |
| + "adcq %%r11, %%r10 ;" |
| + "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| + "adcq %%rax, %%r11 ;" |
| + /***************************************/ |
| + "adcq $0, %%rcx ;" |
| + "addq (%1), %%r8 ;" |
| + "adcq 8(%1), %%r9 ;" |
| + "adcq 16(%1), %%r10 ;" |
| + "adcq 24(%1), %%r11 ;" |
| + "adcq $0, %%rcx ;" |
| + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ |
| + "addq %%rcx, %%r8 ;" |
| + "adcq $0, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcq $0, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcq $0, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + : |
| + : "r"(c), "r"(a) |
| + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| + "%r11"); |
| +} |
| + |
| +static __always_inline void |
| +add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) |
| +{ |
| + asm volatile( |
| + "mov $38, %%eax ;" |
| + "xorl %%ecx, %%ecx ;" |
| + "movq (%2), %%r8 ;" |
| + "adcx (%1), %%r8 ;" |
| + "movq 8(%2), %%r9 ;" |
| + "adcx 8(%1), %%r9 ;" |
| + "movq 16(%2), %%r10 ;" |
| + "adcx 16(%1), %%r10 ;" |
| + "movq 24(%2), %%r11 ;" |
| + "adcx 24(%1), %%r11 ;" |
| + "cmovc %%eax, %%ecx ;" |
| + "xorl %%eax, %%eax ;" |
| + "adcx %%rcx, %%r8 ;" |
| + "adcx %%rax, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcx %%rax, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcx %%rax, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $38, %%ecx ;" |
| + "cmovc %%ecx, %%eax ;" |
| + "addq %%rax, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); |
| +} |
| + |
| +static __always_inline void |
| +add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) |
| +{ |
| + asm volatile( |
| + "mov $38, %%eax ;" |
| + "movq (%2), %%r8 ;" |
| + "addq (%1), %%r8 ;" |
| + "movq 8(%2), %%r9 ;" |
| + "adcq 8(%1), %%r9 ;" |
| + "movq 16(%2), %%r10 ;" |
| + "adcq 16(%1), %%r10 ;" |
| + "movq 24(%2), %%r11 ;" |
| + "adcq 24(%1), %%r11 ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%eax, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "adcq $0, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcq $0, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcq $0, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%eax, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); |
| +} |
| + |
| +static __always_inline void |
| +sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) |
| +{ |
| + asm volatile( |
| + "mov $38, %%eax ;" |
| + "movq (%1), %%r8 ;" |
| + "subq (%2), %%r8 ;" |
| + "movq 8(%1), %%r9 ;" |
| + "sbbq 8(%2), %%r9 ;" |
| + "movq 16(%1), %%r10 ;" |
| + "sbbq 16(%2), %%r10 ;" |
| + "movq 24(%1), %%r11 ;" |
| + "sbbq 24(%2), %%r11 ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%eax, %%ecx ;" |
| + "subq %%rcx, %%r8 ;" |
| + "sbbq $0, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "sbbq $0, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "sbbq $0, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%eax, %%ecx ;" |
| + "subq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(b) |
| + : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); |
| +} |
| + |
| +/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ |
| +static __always_inline void |
| +mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) |
| +{ |
| + const u64 a24 = 121666; |
| + asm volatile( |
| + "movq %2, %%rdx ;" |
| + "mulx (%1), %%r8, %%r10 ;" |
| + "mulx 8(%1), %%r9, %%r11 ;" |
| + "addq %%r10, %%r9 ;" |
| + "mulx 16(%1), %%r10, %%rax ;" |
| + "adcq %%r11, %%r10 ;" |
| + "mulx 24(%1), %%r11, %%rcx ;" |
| + "adcq %%rax, %%r11 ;" |
| + /**************************/ |
| + "adcq $0, %%rcx ;" |
| + "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ |
| + "imul %%rdx, %%rcx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "adcq $0, %%r9 ;" |
| + "movq %%r9, 8(%0) ;" |
| + "adcq $0, %%r10 ;" |
| + "movq %%r10, 16(%0) ;" |
| + "adcq $0, %%r11 ;" |
| + "movq %%r11, 24(%0) ;" |
| + "mov $0, %%ecx ;" |
| + "cmovc %%edx, %%ecx ;" |
| + "addq %%rcx, %%r8 ;" |
| + "movq %%r8, (%0) ;" |
| + : |
| + : "r"(c), "r"(a), "r"(a24) |
| + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| + "%r11"); |
| +} |
| + |
| +static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) |
| +{ |
| + struct { |
| + eltfp25519_1w_buffer buffer; |
| + eltfp25519_1w x0, x1, x2; |
| + } __aligned(32) m; |
| + u64 *T[4]; |
| + |
| + T[0] = m.x0; |
| + T[1] = c; /* x^(-1) */ |
| + T[2] = m.x1; |
| + T[3] = m.x2; |
| + |
| + copy_eltfp25519_1w(T[1], a); |
| + sqrn_eltfp25519_1w_adx(T[1], 1); |
| + copy_eltfp25519_1w(T[2], T[1]); |
| + sqrn_eltfp25519_1w_adx(T[2], 2); |
| + mul_eltfp25519_1w_adx(T[0], a, T[2]); |
| + mul_eltfp25519_1w_adx(T[1], T[1], T[0]); |
| + copy_eltfp25519_1w(T[2], T[1]); |
| + sqrn_eltfp25519_1w_adx(T[2], 1); |
| + mul_eltfp25519_1w_adx(T[0], T[0], T[2]); |
| + copy_eltfp25519_1w(T[2], T[0]); |
| + sqrn_eltfp25519_1w_adx(T[2], 5); |
| + mul_eltfp25519_1w_adx(T[0], T[0], T[2]); |
| + copy_eltfp25519_1w(T[2], T[0]); |
| + sqrn_eltfp25519_1w_adx(T[2], 10); |
| + mul_eltfp25519_1w_adx(T[2], T[2], T[0]); |
| + copy_eltfp25519_1w(T[3], T[2]); |
| + sqrn_eltfp25519_1w_adx(T[3], 20); |
| + mul_eltfp25519_1w_adx(T[3], T[3], T[2]); |
| + sqrn_eltfp25519_1w_adx(T[3], 10); |
| + mul_eltfp25519_1w_adx(T[3], T[3], T[0]); |
| + copy_eltfp25519_1w(T[0], T[3]); |
| + sqrn_eltfp25519_1w_adx(T[0], 50); |
| + mul_eltfp25519_1w_adx(T[0], T[0], T[3]); |
| + copy_eltfp25519_1w(T[2], T[0]); |
| + sqrn_eltfp25519_1w_adx(T[2], 100); |
| + mul_eltfp25519_1w_adx(T[2], T[2], T[0]); |
| + sqrn_eltfp25519_1w_adx(T[2], 50); |
| + mul_eltfp25519_1w_adx(T[2], T[2], T[3]); |
| + sqrn_eltfp25519_1w_adx(T[2], 5); |
| + mul_eltfp25519_1w_adx(T[1], T[1], T[2]); |
| + |
| + memzero_explicit(&m, sizeof(m)); |
| +} |
| + |
| +static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) |
| +{ |
| + struct { |
| + eltfp25519_1w_buffer buffer; |
| + eltfp25519_1w x0, x1, x2; |
| + } __aligned(32) m; |
| + u64 *T[5]; |
| + |
| + T[0] = m.x0; |
| + T[1] = c; /* x^(-1) */ |
| + T[2] = m.x1; |
| + T[3] = m.x2; |
| + |
| + copy_eltfp25519_1w(T[1], a); |
| + sqrn_eltfp25519_1w_bmi2(T[1], 1); |
| + copy_eltfp25519_1w(T[2], T[1]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 2); |
| + mul_eltfp25519_1w_bmi2(T[0], a, T[2]); |
| + mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); |
| + copy_eltfp25519_1w(T[2], T[1]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 1); |
| + mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); |
| + copy_eltfp25519_1w(T[2], T[0]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 5); |
| + mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); |
| + copy_eltfp25519_1w(T[2], T[0]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 10); |
| + mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); |
| + copy_eltfp25519_1w(T[3], T[2]); |
| + sqrn_eltfp25519_1w_bmi2(T[3], 20); |
| + mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); |
| + sqrn_eltfp25519_1w_bmi2(T[3], 10); |
| + mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); |
| + copy_eltfp25519_1w(T[0], T[3]); |
| + sqrn_eltfp25519_1w_bmi2(T[0], 50); |
| + mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); |
| + copy_eltfp25519_1w(T[2], T[0]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 100); |
| + mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 50); |
| + mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); |
| + sqrn_eltfp25519_1w_bmi2(T[2], 5); |
| + mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); |
| + |
| + memzero_explicit(&m, sizeof(m)); |
| +} |
| + |
| +/* Given c, a 256-bit number, fred_eltfp25519_1w updates c |
| + * with a number such that 0 <= C < 2**255-19. |
| + */ |
| +static __always_inline void fred_eltfp25519_1w(u64 *const c) |
| +{ |
| + u64 tmp0 = 38, tmp1 = 19; |
| + asm volatile( |
| + "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ |
| + "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ |
| + |
| + /* Add either 19 or 38 to c */ |
| + "addq %4, %0 ;" |
| + "adcq $0, %1 ;" |
| + "adcq $0, %2 ;" |
| + "adcq $0, %3 ;" |
| + |
| + /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ |
| + "movl $0, %k4 ;" |
| + "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ |
| + "btrq $63, %3 ;" /* Clear bit 255 */ |
| + |
| + /* Subtract 19 if necessary */ |
| + "subq %4, %0 ;" |
| + "sbbq $0, %1 ;" |
| + "sbbq $0, %2 ;" |
| + "sbbq $0, %3 ;" |
| + |
| + : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), |
| + "+r"(tmp1) |
| + : |
| + : "memory", "cc"); |
| +} |
| + |
| +static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) |
| +{ |
| + u64 temp; |
| + asm volatile( |
| + "test %9, %9 ;" |
| + "movq %0, %8 ;" |
| + "cmovnzq %4, %0 ;" |
| + "cmovnzq %8, %4 ;" |
| + "movq %1, %8 ;" |
| + "cmovnzq %5, %1 ;" |
| + "cmovnzq %8, %5 ;" |
| + "movq %2, %8 ;" |
| + "cmovnzq %6, %2 ;" |
| + "cmovnzq %8, %6 ;" |
| + "movq %3, %8 ;" |
| + "cmovnzq %7, %3 ;" |
| + "cmovnzq %8, %7 ;" |
| + : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), |
| + "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), |
| + "=r"(temp) |
| + : "r"(bit) |
| + : "cc" |
| + ); |
| +} |
| + |
| +static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) |
| +{ |
| + asm volatile( |
| + "test %4, %4 ;" |
| + "cmovnzq %5, %0 ;" |
| + "cmovnzq %6, %1 ;" |
| + "cmovnzq %7, %2 ;" |
| + "cmovnzq %8, %3 ;" |
| + : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) |
| + : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) |
| + : "cc" |
| + ); |
| +} |
| + |
| +static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], |
| + const u8 private_key[CURVE25519_KEY_SIZE], |
| + const u8 session_key[CURVE25519_KEY_SIZE]) |
| +{ |
| + struct { |
| + u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| + u64 workspace[6 * NUM_WORDS_ELTFP25519]; |
| + u8 session[CURVE25519_KEY_SIZE]; |
| + u8 private[CURVE25519_KEY_SIZE]; |
| + } __aligned(32) m; |
| + |
| + int i = 0, j = 0; |
| + u64 prev = 0; |
| + u64 *const X1 = (u64 *)m.session; |
| + u64 *const key = (u64 *)m.private; |
| + u64 *const Px = m.coordinates + 0; |
| + u64 *const Pz = m.coordinates + 4; |
| + u64 *const Qx = m.coordinates + 8; |
| + u64 *const Qz = m.coordinates + 12; |
| + u64 *const X2 = Qx; |
| + u64 *const Z2 = Qz; |
| + u64 *const X3 = Px; |
| + u64 *const Z3 = Pz; |
| + u64 *const X2Z2 = Qx; |
| + u64 *const X3Z3 = Px; |
| + |
| + u64 *const A = m.workspace + 0; |
| + u64 *const B = m.workspace + 4; |
| + u64 *const D = m.workspace + 8; |
| + u64 *const C = m.workspace + 12; |
| + u64 *const DA = m.workspace + 16; |
| + u64 *const CB = m.workspace + 20; |
| + u64 *const AB = A; |
| + u64 *const DC = D; |
| + u64 *const DACB = DA; |
| + |
| + memcpy(m.private, private_key, sizeof(m.private)); |
| + memcpy(m.session, session_key, sizeof(m.session)); |
| + |
| + curve25519_clamp_secret(m.private); |
| + |
| + /* As in the draft: |
| + * When receiving such an array, implementations of curve25519 |
| + * MUST mask the most-significant bit in the final byte. This |
| + * is done to preserve compatibility with point formats which |
| + * reserve the sign bit for use in other protocols and to |
| + * increase resistance to implementation fingerprinting |
| + */ |
| + m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; |
| + |
| + copy_eltfp25519_1w(Px, X1); |
| + setzero_eltfp25519_1w(Pz); |
| + setzero_eltfp25519_1w(Qx); |
| + setzero_eltfp25519_1w(Qz); |
| + |
| + Pz[0] = 1; |
| + Qx[0] = 1; |
| + |
| + /* main-loop */ |
| + prev = 0; |
| + j = 62; |
| + for (i = 3; i >= 0; --i) { |
| + while (j >= 0) { |
| + u64 bit = (key[i] >> j) & 0x1; |
| + u64 swap = bit ^ prev; |
| + prev = bit; |
| + |
| + add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ |
| + sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ |
| + add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ |
| + sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ |
| + mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ |
| + |
| + cselect(swap, A, C); |
| + cselect(swap, B, D); |
| + |
| + sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ |
| + add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ |
| + sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ |
| + sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ |
| + |
| + copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ |
| + sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ |
| + |
| + mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ |
| + add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ |
| + mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ |
| + mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ |
| + --j; |
| + } |
| + j = 63; |
| + } |
| + |
| + inv_eltfp25519_1w_adx(A, Qz); |
| + mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); |
| + fred_eltfp25519_1w((u64 *)shared); |
| + |
| + memzero_explicit(&m, sizeof(m)); |
| +} |
| + |
| +static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], |
| + const u8 private_key[CURVE25519_KEY_SIZE]) |
| +{ |
| + struct { |
| + u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| + u64 workspace[4 * NUM_WORDS_ELTFP25519]; |
| + u8 private[CURVE25519_KEY_SIZE]; |
| + } __aligned(32) m; |
| + |
| + const int ite[4] = { 64, 64, 64, 63 }; |
| + const int q = 3; |
| + u64 swap = 1; |
| + |
| + int i = 0, j = 0, k = 0; |
| + u64 *const key = (u64 *)m.private; |
| + u64 *const Ur1 = m.coordinates + 0; |
| + u64 *const Zr1 = m.coordinates + 4; |
| + u64 *const Ur2 = m.coordinates + 8; |
| + u64 *const Zr2 = m.coordinates + 12; |
| + |
| + u64 *const UZr1 = m.coordinates + 0; |
| + u64 *const ZUr2 = m.coordinates + 8; |
| + |
| + u64 *const A = m.workspace + 0; |
| + u64 *const B = m.workspace + 4; |
| + u64 *const C = m.workspace + 8; |
| + u64 *const D = m.workspace + 12; |
| + |
| + u64 *const AB = m.workspace + 0; |
| + u64 *const CD = m.workspace + 8; |
| + |
| + const u64 *const P = table_ladder_8k; |
| + |
| + memcpy(m.private, private_key, sizeof(m.private)); |
| + |
| + curve25519_clamp_secret(m.private); |
| + |
| + setzero_eltfp25519_1w(Ur1); |
| + setzero_eltfp25519_1w(Zr1); |
| + setzero_eltfp25519_1w(Zr2); |
| + Ur1[0] = 1; |
| + Zr1[0] = 1; |
| + Zr2[0] = 1; |
| + |
| + /* G-S */ |
| + Ur2[3] = 0x1eaecdeee27cab34UL; |
| + Ur2[2] = 0xadc7a0b9235d48e2UL; |
| + Ur2[1] = 0xbbf095ae14b2edf8UL; |
| + Ur2[0] = 0x7e94e1fec82faabdUL; |
| + |
| + /* main-loop */ |
| + j = q; |
| + for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { |
| + while (j < ite[i]) { |
| + u64 bit = (key[i] >> j) & 0x1; |
| + k = (64 * i + j - q); |
| + swap = swap ^ bit; |
| + cswap(swap, Ur1, Ur2); |
| + cswap(swap, Zr1, Zr2); |
| + swap = bit; |
| + /* Addition */ |
| + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| + add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| + mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ |
| + sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ |
| + add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ |
| + sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ |
| + mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ |
| + ++j; |
| + } |
| + j = 0; |
| + } |
| + |
| + /* Doubling */ |
| + for (i = 0; i < q; ++i) { |
| + add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| + sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ |
| + copy_eltfp25519_1w(C, B); /* C = B */ |
| + sub_eltfp25519_1w(B, A, B); /* B = A-B */ |
| + mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ |
| + add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ |
| + mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ |
| + } |
| + |
| + /* Convert to affine coordinates */ |
| + inv_eltfp25519_1w_adx(A, Zr1); |
| + mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); |
| + fred_eltfp25519_1w((u64 *)session_key); |
| + |
| + memzero_explicit(&m, sizeof(m)); |
| +} |
| + |
| +static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], |
| + const u8 private_key[CURVE25519_KEY_SIZE], |
| + const u8 session_key[CURVE25519_KEY_SIZE]) |
| +{ |
| + struct { |
| + u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| + u64 workspace[6 * NUM_WORDS_ELTFP25519]; |
| + u8 session[CURVE25519_KEY_SIZE]; |
| + u8 private[CURVE25519_KEY_SIZE]; |
| + } __aligned(32) m; |
| + |
| + int i = 0, j = 0; |
| + u64 prev = 0; |
| + u64 *const X1 = (u64 *)m.session; |
| + u64 *const key = (u64 *)m.private; |
| + u64 *const Px = m.coordinates + 0; |
| + u64 *const Pz = m.coordinates + 4; |
| + u64 *const Qx = m.coordinates + 8; |
| + u64 *const Qz = m.coordinates + 12; |
| + u64 *const X2 = Qx; |
| + u64 *const Z2 = Qz; |
| + u64 *const X3 = Px; |
| + u64 *const Z3 = Pz; |
| + u64 *const X2Z2 = Qx; |
| + u64 *const X3Z3 = Px; |
| + |
| + u64 *const A = m.workspace + 0; |
| + u64 *const B = m.workspace + 4; |
| + u64 *const D = m.workspace + 8; |
| + u64 *const C = m.workspace + 12; |
| + u64 *const DA = m.workspace + 16; |
| + u64 *const CB = m.workspace + 20; |
| + u64 *const AB = A; |
| + u64 *const DC = D; |
| + u64 *const DACB = DA; |
| + |
| + memcpy(m.private, private_key, sizeof(m.private)); |
| + memcpy(m.session, session_key, sizeof(m.session)); |
| + |
| + curve25519_clamp_secret(m.private); |
| + |
| + /* As in the draft: |
| + * When receiving such an array, implementations of curve25519 |
| + * MUST mask the most-significant bit in the final byte. This |
| + * is done to preserve compatibility with point formats which |
| + * reserve the sign bit for use in other protocols and to |
| + * increase resistance to implementation fingerprinting |
| + */ |
| + m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; |
| + |
| + copy_eltfp25519_1w(Px, X1); |
| + setzero_eltfp25519_1w(Pz); |
| + setzero_eltfp25519_1w(Qx); |
| + setzero_eltfp25519_1w(Qz); |
| + |
| + Pz[0] = 1; |
| + Qx[0] = 1; |
| + |
| + /* main-loop */ |
| + prev = 0; |
| + j = 62; |
| + for (i = 3; i >= 0; --i) { |
| + while (j >= 0) { |
| + u64 bit = (key[i] >> j) & 0x1; |
| + u64 swap = bit ^ prev; |
| + prev = bit; |
| + |
| + add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ |
| + sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ |
| + add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ |
| + sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ |
| + mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ |
| + |
| + cselect(swap, A, C); |
| + cselect(swap, B, D); |
| + |
| + sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ |
| + add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ |
| + sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ |
| + sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ |
| + |
| + copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ |
| + sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ |
| + |
| + mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ |
| + add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ |
| + mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ |
| + mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ |
| + --j; |
| + } |
| + j = 63; |
| + } |
| + |
| + inv_eltfp25519_1w_bmi2(A, Qz); |
| + mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); |
| + fred_eltfp25519_1w((u64 *)shared); |
| + |
| + memzero_explicit(&m, sizeof(m)); |
| +} |
| + |
| +static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], |
| + const u8 private_key[CURVE25519_KEY_SIZE]) |
| +{ |
| + struct { |
| + u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| + u64 workspace[4 * NUM_WORDS_ELTFP25519]; |
| + u8 private[CURVE25519_KEY_SIZE]; |
| + } __aligned(32) m; |
| + |
| + const int ite[4] = { 64, 64, 64, 63 }; |
| + const int q = 3; |
| + u64 swap = 1; |
| + |
| + int i = 0, j = 0, k = 0; |
| + u64 *const key = (u64 *)m.private; |
| + u64 *const Ur1 = m.coordinates + 0; |
| + u64 *const Zr1 = m.coordinates + 4; |
| + u64 *const Ur2 = m.coordinates + 8; |
| + u64 *const Zr2 = m.coordinates + 12; |
| + |
| + u64 *const UZr1 = m.coordinates + 0; |
| + u64 *const ZUr2 = m.coordinates + 8; |
| + |
| + u64 *const A = m.workspace + 0; |
| + u64 *const B = m.workspace + 4; |
| + u64 *const C = m.workspace + 8; |
| + u64 *const D = m.workspace + 12; |
| + |
| + u64 *const AB = m.workspace + 0; |
| + u64 *const CD = m.workspace + 8; |
| + |
| + const u64 *const P = table_ladder_8k; |
| + |
| + memcpy(m.private, private_key, sizeof(m.private)); |
| + |
| + curve25519_clamp_secret(m.private); |
| + |
| + setzero_eltfp25519_1w(Ur1); |
| + setzero_eltfp25519_1w(Zr1); |
| + setzero_eltfp25519_1w(Zr2); |
| + Ur1[0] = 1; |
| + Zr1[0] = 1; |
| + Zr2[0] = 1; |
| + |
| + /* G-S */ |
| + Ur2[3] = 0x1eaecdeee27cab34UL; |
| + Ur2[2] = 0xadc7a0b9235d48e2UL; |
| + Ur2[1] = 0xbbf095ae14b2edf8UL; |
| + Ur2[0] = 0x7e94e1fec82faabdUL; |
| + |
| + /* main-loop */ |
| + j = q; |
| + for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { |
| + while (j < ite[i]) { |
| + u64 bit = (key[i] >> j) & 0x1; |
| + k = (64 * i + j - q); |
| + swap = swap ^ bit; |
| + cswap(swap, Ur1, Ur2); |
| + cswap(swap, Zr1, Zr2); |
| + swap = bit; |
| + /* Addition */ |
| + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| + add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| + mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ |
| + sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ |
| + add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ |
| + sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ |
| + mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ |
| + ++j; |
| + } |
| + j = 0; |
| + } |
| + |
| + /* Doubling */ |
| + for (i = 0; i < q; ++i) { |
| + add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| + sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ |
| + copy_eltfp25519_1w(C, B); /* C = B */ |
| + sub_eltfp25519_1w(B, A, B); /* B = A-B */ |
| + mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ |
| + add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ |
| + mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ |
| + } |
| + |
| + /* Convert to affine coordinates */ |
| + inv_eltfp25519_1w_bmi2(A, Zr1); |
| + mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); |
| + fred_eltfp25519_1w((u64 *)session_key); |
| + |
| + memzero_explicit(&m, sizeof(m)); |
| +} |
| + |
| +void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE], |
| + const u8 basepoint[CURVE25519_KEY_SIZE]) |
| +{ |
| + if (static_branch_likely(&curve25519_use_adx)) |
| + curve25519_adx(mypublic, secret, basepoint); |
| + else if (static_branch_likely(&curve25519_use_bmi2)) |
| + curve25519_bmi2(mypublic, secret, basepoint); |
| + else |
| + curve25519_generic(mypublic, secret, basepoint); |
| +} |
| +EXPORT_SYMBOL(curve25519_arch); |
| + |
| +void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE]) |
| +{ |
| + if (static_branch_likely(&curve25519_use_adx)) |
| + curve25519_adx_base(pub, secret); |
| + else if (static_branch_likely(&curve25519_use_bmi2)) |
| + curve25519_bmi2_base(pub, secret); |
| + else |
| + curve25519_generic(pub, secret, curve25519_base_point); |
| +} |
| +EXPORT_SYMBOL(curve25519_base_arch); |
| + |
| +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, |
| + unsigned int len) |
| +{ |
| + u8 *secret = kpp_tfm_ctx(tfm); |
| + |
| + if (!len) |
| + curve25519_generate_secret(secret); |
| + else if (len == CURVE25519_KEY_SIZE && |
| + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) |
| + memcpy(secret, buf, CURVE25519_KEY_SIZE); |
| + else |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static int curve25519_generate_public_key(struct kpp_request *req) |
| +{ |
| + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); |
| + const u8 *secret = kpp_tfm_ctx(tfm); |
| + u8 buf[CURVE25519_KEY_SIZE]; |
| + int copied, nbytes; |
| + |
| + if (req->src) |
| + return -EINVAL; |
| + |
| + curve25519_base_arch(buf, secret); |
| + |
| + /* might want less than we've got */ |
| + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); |
| + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, |
| + nbytes), |
| + buf, nbytes); |
| + if (copied != nbytes) |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static int curve25519_compute_shared_secret(struct kpp_request *req) |
| +{ |
| + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); |
| + const u8 *secret = kpp_tfm_ctx(tfm); |
| + u8 public_key[CURVE25519_KEY_SIZE]; |
| + u8 buf[CURVE25519_KEY_SIZE]; |
| + int copied, nbytes; |
| + |
| + if (!req->src) |
| + return -EINVAL; |
| + |
| + copied = sg_copy_to_buffer(req->src, |
| + sg_nents_for_len(req->src, |
| + CURVE25519_KEY_SIZE), |
| + public_key, CURVE25519_KEY_SIZE); |
| + if (copied != CURVE25519_KEY_SIZE) |
| + return -EINVAL; |
| + |
| + curve25519_arch(buf, secret, public_key); |
| + |
| + /* might want less than we've got */ |
| + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); |
| + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, |
| + nbytes), |
| + buf, nbytes); |
| + if (copied != nbytes) |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) |
| +{ |
| + return CURVE25519_KEY_SIZE; |
| +} |
| + |
| +static struct kpp_alg curve25519_alg = { |
| + .base.cra_name = "curve25519", |
| + .base.cra_driver_name = "curve25519-x86", |
| + .base.cra_priority = 200, |
| + .base.cra_module = THIS_MODULE, |
| + .base.cra_ctxsize = CURVE25519_KEY_SIZE, |
| + |
| + .set_secret = curve25519_set_secret, |
| + .generate_public_key = curve25519_generate_public_key, |
| + .compute_shared_secret = curve25519_compute_shared_secret, |
| + .max_size = curve25519_max_size, |
| +}; |
| + |
| +static int __init curve25519_mod_init(void) |
| +{ |
| + if (boot_cpu_has(X86_FEATURE_BMI2)) |
| + static_branch_enable(&curve25519_use_bmi2); |
| + else if (boot_cpu_has(X86_FEATURE_ADX)) |
| + static_branch_enable(&curve25519_use_adx); |
| + else |
| + return 0; |
| + return crypto_register_kpp(&curve25519_alg); |
| +} |
| + |
| +static void __exit curve25519_mod_exit(void) |
| +{ |
| + if (boot_cpu_has(X86_FEATURE_BMI2) || |
| + boot_cpu_has(X86_FEATURE_ADX)) |
| + crypto_unregister_kpp(&curve25519_alg); |
| +} |
| + |
| +module_init(curve25519_mod_init); |
| +module_exit(curve25519_mod_exit); |
| + |
| +MODULE_ALIAS_CRYPTO("curve25519"); |
| +MODULE_ALIAS_CRYPTO("curve25519-x86"); |
| +MODULE_LICENSE("GPL v2"); |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index a3fc859830c1..b8b738bcc312 100644 |
| |
| |
| @@ -269,6 +269,12 @@ config CRYPTO_CURVE25519 |
| select CRYPTO_KPP |
| select CRYPTO_LIB_CURVE25519_GENERIC |
| |
| +config CRYPTO_CURVE25519_X86 |
| + tristate "x86_64 accelerated Curve25519 scalar multiplication library" |
| + depends on X86 && 64BIT |
| + select CRYPTO_LIB_CURVE25519_GENERIC |
| + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 |
| + |
| comment "Authenticated Encryption with Associated Data" |
| |
| config CRYPTO_CCM |
| -- |
| 2.18.2 |
| |
| |
| From ef199be33340e0c41e7ae3bae7e90821fdff04dc Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:37 +0100 |
| Subject: [PATCH 030/100] crypto: arm/curve25519 - import Bernstein and |
| Schwabe's Curve25519 ARM implementation |
| |
| commit f0fb006b604f98e2309a30f34ef455ac734f7c1c upstream. |
| |
| This comes from Dan Bernstein and Peter Schwabe's public domain NEON |
| code, and is included here in raw form so that subsequent commits that |
| fix these up for the kernel can see how it has changed. This code does |
| have some entirely cosmetic formatting differences, adding indentation |
| and so forth, so that when we actually port it for use in the kernel in |
| the subsequent commit, it's obvious what's changed in the process. |
| |
| This code originates from SUPERCOP 20180818, available at |
| <https://bench.cr.yp.to/supercop.html>. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/curve25519-core.S | 2105 +++++++++++++++++++++++++++++ |
| 1 file changed, 2105 insertions(+) |
| create mode 100644 arch/arm/crypto/curve25519-core.S |
| |
| diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S |
| new file mode 100644 |
| index 000000000000..f33b85fef382 |
| |
| |
| @@ -0,0 +1,2105 @@ |
| +/* |
| + * Public domain code from Daniel J. Bernstein and Peter Schwabe, from |
| + * SUPERCOP's curve25519/neon2/scalarmult.s. |
| + */ |
| + |
| +.fpu neon |
| +.text |
| +.align 4 |
| +.global _crypto_scalarmult_curve25519_neon2 |
| +.global crypto_scalarmult_curve25519_neon2 |
| +.type _crypto_scalarmult_curve25519_neon2 STT_FUNC |
| +.type crypto_scalarmult_curve25519_neon2 STT_FUNC |
| + _crypto_scalarmult_curve25519_neon2: |
| + crypto_scalarmult_curve25519_neon2: |
| + vpush {q4, q5, q6, q7} |
| + mov r12, sp |
| + sub sp, sp, #736 |
| + and sp, sp, #0xffffffe0 |
| + strd r4, [sp, #0] |
| + strd r6, [sp, #8] |
| + strd r8, [sp, #16] |
| + strd r10, [sp, #24] |
| + str r12, [sp, #480] |
| + str r14, [sp, #484] |
| + mov r0, r0 |
| + mov r1, r1 |
| + mov r2, r2 |
| + add r3, sp, #32 |
| + ldr r4, =0 |
| + ldr r5, =254 |
| + vmov.i32 q0, #1 |
| + vshr.u64 q1, q0, #7 |
| + vshr.u64 q0, q0, #8 |
| + vmov.i32 d4, #19 |
| + vmov.i32 d5, #38 |
| + add r6, sp, #512 |
| + vst1.8 {d2-d3}, [r6, : 128] |
| + add r6, sp, #528 |
| + vst1.8 {d0-d1}, [r6, : 128] |
| + add r6, sp, #544 |
| + vst1.8 {d4-d5}, [r6, : 128] |
| + add r6, r3, #0 |
| + vmov.i32 q2, #0 |
| + vst1.8 {d4-d5}, [r6, : 128]! |
| + vst1.8 {d4-d5}, [r6, : 128]! |
| + vst1.8 d4, [r6, : 64] |
| + add r6, r3, #0 |
| + ldr r7, =960 |
| + sub r7, r7, #2 |
| + neg r7, r7 |
| + sub r7, r7, r7, LSL #7 |
| + str r7, [r6] |
| + add r6, sp, #704 |
| + vld1.8 {d4-d5}, [r1]! |
| + vld1.8 {d6-d7}, [r1] |
| + vst1.8 {d4-d5}, [r6, : 128]! |
| + vst1.8 {d6-d7}, [r6, : 128] |
| + sub r1, r6, #16 |
| + ldrb r6, [r1] |
| + and r6, r6, #248 |
| + strb r6, [r1] |
| + ldrb r6, [r1, #31] |
| + and r6, r6, #127 |
| + orr r6, r6, #64 |
| + strb r6, [r1, #31] |
| + vmov.i64 q2, #0xffffffff |
| + vshr.u64 q3, q2, #7 |
| + vshr.u64 q2, q2, #6 |
| + vld1.8 {d8}, [r2] |
| + vld1.8 {d10}, [r2] |
| + add r2, r2, #6 |
| + vld1.8 {d12}, [r2] |
| + vld1.8 {d14}, [r2] |
| + add r2, r2, #6 |
| + vld1.8 {d16}, [r2] |
| + add r2, r2, #4 |
| + vld1.8 {d18}, [r2] |
| + vld1.8 {d20}, [r2] |
| + add r2, r2, #6 |
| + vld1.8 {d22}, [r2] |
| + add r2, r2, #2 |
| + vld1.8 {d24}, [r2] |
| + vld1.8 {d26}, [r2] |
| + vshr.u64 q5, q5, #26 |
| + vshr.u64 q6, q6, #3 |
| + vshr.u64 q7, q7, #29 |
| + vshr.u64 q8, q8, #6 |
| + vshr.u64 q10, q10, #25 |
| + vshr.u64 q11, q11, #3 |
| + vshr.u64 q12, q12, #12 |
| + vshr.u64 q13, q13, #38 |
| + vand q4, q4, q2 |
| + vand q6, q6, q2 |
| + vand q8, q8, q2 |
| + vand q10, q10, q2 |
| + vand q2, q12, q2 |
| + vand q5, q5, q3 |
| + vand q7, q7, q3 |
| + vand q9, q9, q3 |
| + vand q11, q11, q3 |
| + vand q3, q13, q3 |
| + add r2, r3, #48 |
| + vadd.i64 q12, q4, q1 |
| + vadd.i64 q13, q10, q1 |
| + vshr.s64 q12, q12, #26 |
| + vshr.s64 q13, q13, #26 |
| + vadd.i64 q5, q5, q12 |
| + vshl.i64 q12, q12, #26 |
| + vadd.i64 q14, q5, q0 |
| + vadd.i64 q11, q11, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q15, q11, q0 |
| + vsub.i64 q4, q4, q12 |
| + vshr.s64 q12, q14, #25 |
| + vsub.i64 q10, q10, q13 |
| + vshr.s64 q13, q15, #25 |
| + vadd.i64 q6, q6, q12 |
| + vshl.i64 q12, q12, #25 |
| + vadd.i64 q14, q6, q1 |
| + vadd.i64 q2, q2, q13 |
| + vsub.i64 q5, q5, q12 |
| + vshr.s64 q12, q14, #26 |
| + vshl.i64 q13, q13, #25 |
| + vadd.i64 q14, q2, q1 |
| + vadd.i64 q7, q7, q12 |
| + vshl.i64 q12, q12, #26 |
| + vadd.i64 q15, q7, q0 |
| + vsub.i64 q11, q11, q13 |
| + vshr.s64 q13, q14, #26 |
| + vsub.i64 q6, q6, q12 |
| + vshr.s64 q12, q15, #25 |
| + vadd.i64 q3, q3, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q14, q3, q0 |
| + vadd.i64 q8, q8, q12 |
| + vshl.i64 q12, q12, #25 |
| + vadd.i64 q15, q8, q1 |
| + add r2, r2, #8 |
| + vsub.i64 q2, q2, q13 |
| + vshr.s64 q13, q14, #25 |
| + vsub.i64 q7, q7, q12 |
| + vshr.s64 q12, q15, #26 |
| + vadd.i64 q14, q13, q13 |
| + vadd.i64 q9, q9, q12 |
| + vtrn.32 d12, d14 |
| + vshl.i64 q12, q12, #26 |
| + vtrn.32 d13, d15 |
| + vadd.i64 q0, q9, q0 |
| + vadd.i64 q4, q4, q14 |
| + vst1.8 d12, [r2, : 64]! |
| + vshl.i64 q6, q13, #4 |
| + vsub.i64 q7, q8, q12 |
| + vshr.s64 q0, q0, #25 |
| + vadd.i64 q4, q4, q6 |
| + vadd.i64 q6, q10, q0 |
| + vshl.i64 q0, q0, #25 |
| + vadd.i64 q8, q6, q1 |
| + vadd.i64 q4, q4, q13 |
| + vshl.i64 q10, q13, #25 |
| + vadd.i64 q1, q4, q1 |
| + vsub.i64 q0, q9, q0 |
| + vshr.s64 q8, q8, #26 |
| + vsub.i64 q3, q3, q10 |
| + vtrn.32 d14, d0 |
| + vshr.s64 q1, q1, #26 |
| + vtrn.32 d15, d1 |
| + vadd.i64 q0, q11, q8 |
| + vst1.8 d14, [r2, : 64] |
| + vshl.i64 q7, q8, #26 |
| + vadd.i64 q5, q5, q1 |
| + vtrn.32 d4, d6 |
| + vshl.i64 q1, q1, #26 |
| + vtrn.32 d5, d7 |
| + vsub.i64 q3, q6, q7 |
| + add r2, r2, #16 |
| + vsub.i64 q1, q4, q1 |
| + vst1.8 d4, [r2, : 64] |
| + vtrn.32 d6, d0 |
| + vtrn.32 d7, d1 |
| + sub r2, r2, #8 |
| + vtrn.32 d2, d10 |
| + vtrn.32 d3, d11 |
| + vst1.8 d6, [r2, : 64] |
| + sub r2, r2, #24 |
| + vst1.8 d2, [r2, : 64] |
| + add r2, r3, #96 |
| + vmov.i32 q0, #0 |
| + vmov.i64 d2, #0xff |
| + vmov.i64 d3, #0 |
| + vshr.u32 q1, q1, #7 |
| + vst1.8 {d2-d3}, [r2, : 128]! |
| + vst1.8 {d0-d1}, [r2, : 128]! |
| + vst1.8 d0, [r2, : 64] |
| + add r2, r3, #144 |
| + vmov.i32 q0, #0 |
| + vst1.8 {d0-d1}, [r2, : 128]! |
| + vst1.8 {d0-d1}, [r2, : 128]! |
| + vst1.8 d0, [r2, : 64] |
| + add r2, r3, #240 |
| + vmov.i32 q0, #0 |
| + vmov.i64 d2, #0xff |
| + vmov.i64 d3, #0 |
| + vshr.u32 q1, q1, #7 |
| + vst1.8 {d2-d3}, [r2, : 128]! |
| + vst1.8 {d0-d1}, [r2, : 128]! |
| + vst1.8 d0, [r2, : 64] |
| + add r2, r3, #48 |
| + add r6, r3, #192 |
| + vld1.8 {d0-d1}, [r2, : 128]! |
| + vld1.8 {d2-d3}, [r2, : 128]! |
| + vld1.8 {d4}, [r2, : 64] |
| + vst1.8 {d0-d1}, [r6, : 128]! |
| + vst1.8 {d2-d3}, [r6, : 128]! |
| + vst1.8 d4, [r6, : 64] |
| +._mainloop: |
| + mov r2, r5, LSR #3 |
| + and r6, r5, #7 |
| + ldrb r2, [r1, r2] |
| + mov r2, r2, LSR r6 |
| + and r2, r2, #1 |
| + str r5, [sp, #488] |
| + eor r4, r4, r2 |
| + str r2, [sp, #492] |
| + neg r2, r4 |
| + add r4, r3, #96 |
| + add r5, r3, #192 |
| + add r6, r3, #144 |
| + vld1.8 {d8-d9}, [r4, : 128]! |
| + add r7, r3, #240 |
| + vld1.8 {d10-d11}, [r5, : 128]! |
| + veor q6, q4, q5 |
| + vld1.8 {d14-d15}, [r6, : 128]! |
| + vdup.i32 q8, r2 |
| + vld1.8 {d18-d19}, [r7, : 128]! |
| + veor q10, q7, q9 |
| + vld1.8 {d22-d23}, [r4, : 128]! |
| + vand q6, q6, q8 |
| + vld1.8 {d24-d25}, [r5, : 128]! |
| + vand q10, q10, q8 |
| + vld1.8 {d26-d27}, [r6, : 128]! |
| + veor q4, q4, q6 |
| + vld1.8 {d28-d29}, [r7, : 128]! |
| + veor q5, q5, q6 |
| + vld1.8 {d0}, [r4, : 64] |
| + veor q6, q7, q10 |
| + vld1.8 {d2}, [r5, : 64] |
| + veor q7, q9, q10 |
| + vld1.8 {d4}, [r6, : 64] |
| + veor q9, q11, q12 |
| + vld1.8 {d6}, [r7, : 64] |
| + veor q10, q0, q1 |
| + sub r2, r4, #32 |
| + vand q9, q9, q8 |
| + sub r4, r5, #32 |
| + vand q10, q10, q8 |
| + sub r5, r6, #32 |
| + veor q11, q11, q9 |
| + sub r6, r7, #32 |
| + veor q0, q0, q10 |
| + veor q9, q12, q9 |
| + veor q1, q1, q10 |
| + veor q10, q13, q14 |
| + veor q12, q2, q3 |
| + vand q10, q10, q8 |
| + vand q8, q12, q8 |
| + veor q12, q13, q10 |
| + veor q2, q2, q8 |
| + veor q10, q14, q10 |
| + veor q3, q3, q8 |
| + vadd.i32 q8, q4, q6 |
| + vsub.i32 q4, q4, q6 |
| + vst1.8 {d16-d17}, [r2, : 128]! |
| + vadd.i32 q6, q11, q12 |
| + vst1.8 {d8-d9}, [r5, : 128]! |
| + vsub.i32 q4, q11, q12 |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| + vadd.i32 q6, q0, q2 |
| + vst1.8 {d8-d9}, [r5, : 128]! |
| + vsub.i32 q0, q0, q2 |
| + vst1.8 d12, [r2, : 64] |
| + vadd.i32 q2, q5, q7 |
| + vst1.8 d0, [r5, : 64] |
| + vsub.i32 q0, q5, q7 |
| + vst1.8 {d4-d5}, [r4, : 128]! |
| + vadd.i32 q2, q9, q10 |
| + vst1.8 {d0-d1}, [r6, : 128]! |
| + vsub.i32 q0, q9, q10 |
| + vst1.8 {d4-d5}, [r4, : 128]! |
| + vadd.i32 q2, q1, q3 |
| + vst1.8 {d0-d1}, [r6, : 128]! |
| + vsub.i32 q0, q1, q3 |
| + vst1.8 d4, [r4, : 64] |
| + vst1.8 d0, [r6, : 64] |
| + add r2, sp, #544 |
| + add r4, r3, #96 |
| + add r5, r3, #144 |
| + vld1.8 {d0-d1}, [r2, : 128] |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vld1.8 {d4-d5}, [r5, : 128]! |
| + vzip.i32 q1, q2 |
| + vld1.8 {d6-d7}, [r4, : 128]! |
| + vld1.8 {d8-d9}, [r5, : 128]! |
| + vshl.i32 q5, q1, #1 |
| + vzip.i32 q3, q4 |
| + vshl.i32 q6, q2, #1 |
| + vld1.8 {d14}, [r4, : 64] |
| + vshl.i32 q8, q3, #1 |
| + vld1.8 {d15}, [r5, : 64] |
| + vshl.i32 q9, q4, #1 |
| + vmul.i32 d21, d7, d1 |
| + vtrn.32 d14, d15 |
| + vmul.i32 q11, q4, q0 |
| + vmul.i32 q0, q7, q0 |
| + vmull.s32 q12, d2, d2 |
| + vmlal.s32 q12, d11, d1 |
| + vmlal.s32 q12, d12, d0 |
| + vmlal.s32 q12, d13, d23 |
| + vmlal.s32 q12, d16, d22 |
| + vmlal.s32 q12, d7, d21 |
| + vmull.s32 q10, d2, d11 |
| + vmlal.s32 q10, d4, d1 |
| + vmlal.s32 q10, d13, d0 |
| + vmlal.s32 q10, d6, d23 |
| + vmlal.s32 q10, d17, d22 |
| + vmull.s32 q13, d10, d4 |
| + vmlal.s32 q13, d11, d3 |
| + vmlal.s32 q13, d13, d1 |
| + vmlal.s32 q13, d16, d0 |
| + vmlal.s32 q13, d17, d23 |
| + vmlal.s32 q13, d8, d22 |
| + vmull.s32 q1, d10, d5 |
| + vmlal.s32 q1, d11, d4 |
| + vmlal.s32 q1, d6, d1 |
| + vmlal.s32 q1, d17, d0 |
| + vmlal.s32 q1, d8, d23 |
| + vmull.s32 q14, d10, d6 |
| + vmlal.s32 q14, d11, d13 |
| + vmlal.s32 q14, d4, d4 |
| + vmlal.s32 q14, d17, d1 |
| + vmlal.s32 q14, d18, d0 |
| + vmlal.s32 q14, d9, d23 |
| + vmull.s32 q11, d10, d7 |
| + vmlal.s32 q11, d11, d6 |
| + vmlal.s32 q11, d12, d5 |
| + vmlal.s32 q11, d8, d1 |
| + vmlal.s32 q11, d19, d0 |
| + vmull.s32 q15, d10, d8 |
| + vmlal.s32 q15, d11, d17 |
| + vmlal.s32 q15, d12, d6 |
| + vmlal.s32 q15, d13, d5 |
| + vmlal.s32 q15, d19, d1 |
| + vmlal.s32 q15, d14, d0 |
| + vmull.s32 q2, d10, d9 |
| + vmlal.s32 q2, d11, d8 |
| + vmlal.s32 q2, d12, d7 |
| + vmlal.s32 q2, d13, d6 |
| + vmlal.s32 q2, d14, d1 |
| + vmull.s32 q0, d15, d1 |
| + vmlal.s32 q0, d10, d14 |
| + vmlal.s32 q0, d11, d19 |
| + vmlal.s32 q0, d12, d8 |
| + vmlal.s32 q0, d13, d17 |
| + vmlal.s32 q0, d6, d6 |
| + add r2, sp, #512 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmull.s32 q3, d16, d7 |
| + vmlal.s32 q3, d10, d15 |
| + vmlal.s32 q3, d11, d14 |
| + vmlal.s32 q3, d12, d9 |
| + vmlal.s32 q3, d13, d8 |
| + add r2, sp, #528 |
| + vld1.8 {d8-d9}, [r2, : 128] |
| + vadd.i64 q5, q12, q9 |
| + vadd.i64 q6, q15, q9 |
| + vshr.s64 q5, q5, #26 |
| + vshr.s64 q6, q6, #26 |
| + vadd.i64 q7, q10, q5 |
| + vshl.i64 q5, q5, #26 |
| + vadd.i64 q8, q7, q4 |
| + vadd.i64 q2, q2, q6 |
| + vshl.i64 q6, q6, #26 |
| + vadd.i64 q10, q2, q4 |
| + vsub.i64 q5, q12, q5 |
| + vshr.s64 q8, q8, #25 |
| + vsub.i64 q6, q15, q6 |
| + vshr.s64 q10, q10, #25 |
| + vadd.i64 q12, q13, q8 |
| + vshl.i64 q8, q8, #25 |
| + vadd.i64 q13, q12, q9 |
| + vadd.i64 q0, q0, q10 |
| + vsub.i64 q7, q7, q8 |
| + vshr.s64 q8, q13, #26 |
| + vshl.i64 q10, q10, #25 |
| + vadd.i64 q13, q0, q9 |
| + vadd.i64 q1, q1, q8 |
| + vshl.i64 q8, q8, #26 |
| + vadd.i64 q15, q1, q4 |
| + vsub.i64 q2, q2, q10 |
| + vshr.s64 q10, q13, #26 |
| + vsub.i64 q8, q12, q8 |
| + vshr.s64 q12, q15, #25 |
| + vadd.i64 q3, q3, q10 |
| + vshl.i64 q10, q10, #26 |
| + vadd.i64 q13, q3, q4 |
| + vadd.i64 q14, q14, q12 |
| + add r2, r3, #288 |
| + vshl.i64 q12, q12, #25 |
| + add r4, r3, #336 |
| + vadd.i64 q15, q14, q9 |
| + add r2, r2, #8 |
| + vsub.i64 q0, q0, q10 |
| + add r4, r4, #8 |
| + vshr.s64 q10, q13, #25 |
| + vsub.i64 q1, q1, q12 |
| + vshr.s64 q12, q15, #26 |
| + vadd.i64 q13, q10, q10 |
| + vadd.i64 q11, q11, q12 |
| + vtrn.32 d16, d2 |
| + vshl.i64 q12, q12, #26 |
| + vtrn.32 d17, d3 |
| + vadd.i64 q1, q11, q4 |
| + vadd.i64 q4, q5, q13 |
| + vst1.8 d16, [r2, : 64]! |
| + vshl.i64 q5, q10, #4 |
| + vst1.8 d17, [r4, : 64]! |
| + vsub.i64 q8, q14, q12 |
| + vshr.s64 q1, q1, #25 |
| + vadd.i64 q4, q4, q5 |
| + vadd.i64 q5, q6, q1 |
| + vshl.i64 q1, q1, #25 |
| + vadd.i64 q6, q5, q9 |
| + vadd.i64 q4, q4, q10 |
| + vshl.i64 q10, q10, #25 |
| + vadd.i64 q9, q4, q9 |
| + vsub.i64 q1, q11, q1 |
| + vshr.s64 q6, q6, #26 |
| + vsub.i64 q3, q3, q10 |
| + vtrn.32 d16, d2 |
| + vshr.s64 q9, q9, #26 |
| + vtrn.32 d17, d3 |
| + vadd.i64 q1, q2, q6 |
| + vst1.8 d16, [r2, : 64] |
| + vshl.i64 q2, q6, #26 |
| + vst1.8 d17, [r4, : 64] |
| + vadd.i64 q6, q7, q9 |
| + vtrn.32 d0, d6 |
| + vshl.i64 q7, q9, #26 |
| + vtrn.32 d1, d7 |
| + vsub.i64 q2, q5, q2 |
| + add r2, r2, #16 |
| + vsub.i64 q3, q4, q7 |
| + vst1.8 d0, [r2, : 64] |
| + add r4, r4, #16 |
| + vst1.8 d1, [r4, : 64] |
| + vtrn.32 d4, d2 |
| + vtrn.32 d5, d3 |
| + sub r2, r2, #8 |
| + sub r4, r4, #8 |
| + vtrn.32 d6, d12 |
| + vtrn.32 d7, d13 |
| + vst1.8 d4, [r2, : 64] |
| + vst1.8 d5, [r4, : 64] |
| + sub r2, r2, #24 |
| + sub r4, r4, #24 |
| + vst1.8 d6, [r2, : 64] |
| + vst1.8 d7, [r4, : 64] |
| + add r2, r3, #240 |
| + add r4, r3, #96 |
| + vld1.8 {d0-d1}, [r4, : 128]! |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vld1.8 {d4}, [r4, : 64] |
| + add r4, r3, #144 |
| + vld1.8 {d6-d7}, [r4, : 128]! |
| + vtrn.32 q0, q3 |
| + vld1.8 {d8-d9}, [r4, : 128]! |
| + vshl.i32 q5, q0, #4 |
| + vtrn.32 q1, q4 |
| + vshl.i32 q6, q3, #4 |
| + vadd.i32 q5, q5, q0 |
| + vadd.i32 q6, q6, q3 |
| + vshl.i32 q7, q1, #4 |
| + vld1.8 {d5}, [r4, : 64] |
| + vshl.i32 q8, q4, #4 |
| + vtrn.32 d4, d5 |
| + vadd.i32 q7, q7, q1 |
| + vadd.i32 q8, q8, q4 |
| + vld1.8 {d18-d19}, [r2, : 128]! |
| + vshl.i32 q10, q2, #4 |
| + vld1.8 {d22-d23}, [r2, : 128]! |
| + vadd.i32 q10, q10, q2 |
| + vld1.8 {d24}, [r2, : 64] |
| + vadd.i32 q5, q5, q0 |
| + add r2, r3, #192 |
| + vld1.8 {d26-d27}, [r2, : 128]! |
| + vadd.i32 q6, q6, q3 |
| + vld1.8 {d28-d29}, [r2, : 128]! |
| + vadd.i32 q8, q8, q4 |
| + vld1.8 {d25}, [r2, : 64] |
| + vadd.i32 q10, q10, q2 |
| + vtrn.32 q9, q13 |
| + vadd.i32 q7, q7, q1 |
| + vadd.i32 q5, q5, q0 |
| + vtrn.32 q11, q14 |
| + vadd.i32 q6, q6, q3 |
| + add r2, sp, #560 |
| + vadd.i32 q10, q10, q2 |
| + vtrn.32 d24, d25 |
| + vst1.8 {d12-d13}, [r2, : 128] |
| + vshl.i32 q6, q13, #1 |
| + add r2, sp, #576 |
| + vst1.8 {d20-d21}, [r2, : 128] |
| + vshl.i32 q10, q14, #1 |
| + add r2, sp, #592 |
| + vst1.8 {d12-d13}, [r2, : 128] |
| + vshl.i32 q15, q12, #1 |
| + vadd.i32 q8, q8, q4 |
| + vext.32 d10, d31, d30, #0 |
| + vadd.i32 q7, q7, q1 |
| + add r2, sp, #608 |
| + vst1.8 {d16-d17}, [r2, : 128] |
| + vmull.s32 q8, d18, d5 |
| + vmlal.s32 q8, d26, d4 |
| + vmlal.s32 q8, d19, d9 |
| + vmlal.s32 q8, d27, d3 |
| + vmlal.s32 q8, d22, d8 |
| + vmlal.s32 q8, d28, d2 |
| + vmlal.s32 q8, d23, d7 |
| + vmlal.s32 q8, d29, d1 |
| + vmlal.s32 q8, d24, d6 |
| + vmlal.s32 q8, d25, d0 |
| + add r2, sp, #624 |
| + vst1.8 {d14-d15}, [r2, : 128] |
| + vmull.s32 q2, d18, d4 |
| + vmlal.s32 q2, d12, d9 |
| + vmlal.s32 q2, d13, d8 |
| + vmlal.s32 q2, d19, d3 |
| + vmlal.s32 q2, d22, d2 |
| + vmlal.s32 q2, d23, d1 |
| + vmlal.s32 q2, d24, d0 |
| + add r2, sp, #640 |
| + vst1.8 {d20-d21}, [r2, : 128] |
| + vmull.s32 q7, d18, d9 |
| + vmlal.s32 q7, d26, d3 |
| + vmlal.s32 q7, d19, d8 |
| + vmlal.s32 q7, d27, d2 |
| + vmlal.s32 q7, d22, d7 |
| + vmlal.s32 q7, d28, d1 |
| + vmlal.s32 q7, d23, d6 |
| + vmlal.s32 q7, d29, d0 |
| + add r2, sp, #656 |
| + vst1.8 {d10-d11}, [r2, : 128] |
| + vmull.s32 q5, d18, d3 |
| + vmlal.s32 q5, d19, d2 |
| + vmlal.s32 q5, d22, d1 |
| + vmlal.s32 q5, d23, d0 |
| + vmlal.s32 q5, d12, d8 |
| + add r2, sp, #672 |
| + vst1.8 {d16-d17}, [r2, : 128] |
| + vmull.s32 q4, d18, d8 |
| + vmlal.s32 q4, d26, d2 |
| + vmlal.s32 q4, d19, d7 |
| + vmlal.s32 q4, d27, d1 |
| + vmlal.s32 q4, d22, d6 |
| + vmlal.s32 q4, d28, d0 |
| + vmull.s32 q8, d18, d7 |
| + vmlal.s32 q8, d26, d1 |
| + vmlal.s32 q8, d19, d6 |
| + vmlal.s32 q8, d27, d0 |
| + add r2, sp, #576 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vmlal.s32 q7, d24, d21 |
| + vmlal.s32 q7, d25, d20 |
| + vmlal.s32 q4, d23, d21 |
| + vmlal.s32 q4, d29, d20 |
| + vmlal.s32 q8, d22, d21 |
| + vmlal.s32 q8, d28, d20 |
| + vmlal.s32 q5, d24, d20 |
| + add r2, sp, #576 |
| + vst1.8 {d14-d15}, [r2, : 128] |
| + vmull.s32 q7, d18, d6 |
| + vmlal.s32 q7, d26, d0 |
| + add r2, sp, #656 |
| + vld1.8 {d30-d31}, [r2, : 128] |
| + vmlal.s32 q2, d30, d21 |
| + vmlal.s32 q7, d19, d21 |
| + vmlal.s32 q7, d27, d20 |
| + add r2, sp, #624 |
| + vld1.8 {d26-d27}, [r2, : 128] |
| + vmlal.s32 q4, d25, d27 |
| + vmlal.s32 q8, d29, d27 |
| + vmlal.s32 q8, d25, d26 |
| + vmlal.s32 q7, d28, d27 |
| + vmlal.s32 q7, d29, d26 |
| + add r2, sp, #608 |
| + vld1.8 {d28-d29}, [r2, : 128] |
| + vmlal.s32 q4, d24, d29 |
| + vmlal.s32 q8, d23, d29 |
| + vmlal.s32 q8, d24, d28 |
| + vmlal.s32 q7, d22, d29 |
| + vmlal.s32 q7, d23, d28 |
| + add r2, sp, #608 |
| + vst1.8 {d8-d9}, [r2, : 128] |
| + add r2, sp, #560 |
| + vld1.8 {d8-d9}, [r2, : 128] |
| + vmlal.s32 q7, d24, d9 |
| + vmlal.s32 q7, d25, d31 |
| + vmull.s32 q1, d18, d2 |
| + vmlal.s32 q1, d19, d1 |
| + vmlal.s32 q1, d22, d0 |
| + vmlal.s32 q1, d24, d27 |
| + vmlal.s32 q1, d23, d20 |
| + vmlal.s32 q1, d12, d7 |
| + vmlal.s32 q1, d13, d6 |
| + vmull.s32 q6, d18, d1 |
| + vmlal.s32 q6, d19, d0 |
| + vmlal.s32 q6, d23, d27 |
| + vmlal.s32 q6, d22, d20 |
| + vmlal.s32 q6, d24, d26 |
| + vmull.s32 q0, d18, d0 |
| + vmlal.s32 q0, d22, d27 |
| + vmlal.s32 q0, d23, d26 |
| + vmlal.s32 q0, d24, d31 |
| + vmlal.s32 q0, d19, d20 |
| + add r2, sp, #640 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmlal.s32 q2, d18, d7 |
| + vmlal.s32 q2, d19, d6 |
| + vmlal.s32 q5, d18, d6 |
| + vmlal.s32 q5, d19, d21 |
| + vmlal.s32 q1, d18, d21 |
| + vmlal.s32 q1, d19, d29 |
| + vmlal.s32 q0, d18, d28 |
| + vmlal.s32 q0, d19, d9 |
| + vmlal.s32 q6, d18, d29 |
| + vmlal.s32 q6, d19, d28 |
| + add r2, sp, #592 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + add r2, sp, #512 |
| + vld1.8 {d22-d23}, [r2, : 128] |
| + vmlal.s32 q5, d19, d7 |
| + vmlal.s32 q0, d18, d21 |
| + vmlal.s32 q0, d19, d29 |
| + vmlal.s32 q6, d18, d6 |
| + add r2, sp, #528 |
| + vld1.8 {d6-d7}, [r2, : 128] |
| + vmlal.s32 q6, d19, d21 |
| + add r2, sp, #576 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmlal.s32 q0, d30, d8 |
| + add r2, sp, #672 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vmlal.s32 q5, d30, d29 |
| + add r2, sp, #608 |
| + vld1.8 {d24-d25}, [r2, : 128] |
| + vmlal.s32 q1, d30, d28 |
| + vadd.i64 q13, q0, q11 |
| + vadd.i64 q14, q5, q11 |
| + vmlal.s32 q6, d30, d9 |
| + vshr.s64 q4, q13, #26 |
| + vshr.s64 q13, q14, #26 |
| + vadd.i64 q7, q7, q4 |
| + vshl.i64 q4, q4, #26 |
| + vadd.i64 q14, q7, q3 |
| + vadd.i64 q9, q9, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q15, q9, q3 |
| + vsub.i64 q0, q0, q4 |
| + vshr.s64 q4, q14, #25 |
| + vsub.i64 q5, q5, q13 |
| + vshr.s64 q13, q15, #25 |
| + vadd.i64 q6, q6, q4 |
| + vshl.i64 q4, q4, #25 |
| + vadd.i64 q14, q6, q11 |
| + vadd.i64 q2, q2, q13 |
| + vsub.i64 q4, q7, q4 |
| + vshr.s64 q7, q14, #26 |
| + vshl.i64 q13, q13, #25 |
| + vadd.i64 q14, q2, q11 |
| + vadd.i64 q8, q8, q7 |
| + vshl.i64 q7, q7, #26 |
| + vadd.i64 q15, q8, q3 |
| + vsub.i64 q9, q9, q13 |
| + vshr.s64 q13, q14, #26 |
| + vsub.i64 q6, q6, q7 |
| + vshr.s64 q7, q15, #25 |
| + vadd.i64 q10, q10, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q14, q10, q3 |
| + vadd.i64 q1, q1, q7 |
| + add r2, r3, #144 |
| + vshl.i64 q7, q7, #25 |
| + add r4, r3, #96 |
| + vadd.i64 q15, q1, q11 |
| + add r2, r2, #8 |
| + vsub.i64 q2, q2, q13 |
| + add r4, r4, #8 |
| + vshr.s64 q13, q14, #25 |
| + vsub.i64 q7, q8, q7 |
| + vshr.s64 q8, q15, #26 |
| + vadd.i64 q14, q13, q13 |
| + vadd.i64 q12, q12, q8 |
| + vtrn.32 d12, d14 |
| + vshl.i64 q8, q8, #26 |
| + vtrn.32 d13, d15 |
| + vadd.i64 q3, q12, q3 |
| + vadd.i64 q0, q0, q14 |
| + vst1.8 d12, [r2, : 64]! |
| + vshl.i64 q7, q13, #4 |
| + vst1.8 d13, [r4, : 64]! |
| + vsub.i64 q1, q1, q8 |
| + vshr.s64 q3, q3, #25 |
| + vadd.i64 q0, q0, q7 |
| + vadd.i64 q5, q5, q3 |
| + vshl.i64 q3, q3, #25 |
| + vadd.i64 q6, q5, q11 |
| + vadd.i64 q0, q0, q13 |
| + vshl.i64 q7, q13, #25 |
| + vadd.i64 q8, q0, q11 |
| + vsub.i64 q3, q12, q3 |
| + vshr.s64 q6, q6, #26 |
| + vsub.i64 q7, q10, q7 |
| + vtrn.32 d2, d6 |
| + vshr.s64 q8, q8, #26 |
| + vtrn.32 d3, d7 |
| + vadd.i64 q3, q9, q6 |
| + vst1.8 d2, [r2, : 64] |
| + vshl.i64 q6, q6, #26 |
| + vst1.8 d3, [r4, : 64] |
| + vadd.i64 q1, q4, q8 |
| + vtrn.32 d4, d14 |
| + vshl.i64 q4, q8, #26 |
| + vtrn.32 d5, d15 |
| + vsub.i64 q5, q5, q6 |
| + add r2, r2, #16 |
| + vsub.i64 q0, q0, q4 |
| + vst1.8 d4, [r2, : 64] |
| + add r4, r4, #16 |
| + vst1.8 d5, [r4, : 64] |
| + vtrn.32 d10, d6 |
| + vtrn.32 d11, d7 |
| + sub r2, r2, #8 |
| + sub r4, r4, #8 |
| + vtrn.32 d0, d2 |
| + vtrn.32 d1, d3 |
| + vst1.8 d10, [r2, : 64] |
| + vst1.8 d11, [r4, : 64] |
| + sub r2, r2, #24 |
| + sub r4, r4, #24 |
| + vst1.8 d0, [r2, : 64] |
| + vst1.8 d1, [r4, : 64] |
| + add r2, r3, #288 |
| + add r4, r3, #336 |
| + vld1.8 {d0-d1}, [r2, : 128]! |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vsub.i32 q0, q0, q1 |
| + vld1.8 {d2-d3}, [r2, : 128]! |
| + vld1.8 {d4-d5}, [r4, : 128]! |
| + vsub.i32 q1, q1, q2 |
| + add r5, r3, #240 |
| + vld1.8 {d4}, [r2, : 64] |
| + vld1.8 {d6}, [r4, : 64] |
| + vsub.i32 q2, q2, q3 |
| + vst1.8 {d0-d1}, [r5, : 128]! |
| + vst1.8 {d2-d3}, [r5, : 128]! |
| + vst1.8 d4, [r5, : 64] |
| + add r2, r3, #144 |
| + add r4, r3, #96 |
| + add r5, r3, #144 |
| + add r6, r3, #192 |
| + vld1.8 {d0-d1}, [r2, : 128]! |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vsub.i32 q2, q0, q1 |
| + vadd.i32 q0, q0, q1 |
| + vld1.8 {d2-d3}, [r2, : 128]! |
| + vld1.8 {d6-d7}, [r4, : 128]! |
| + vsub.i32 q4, q1, q3 |
| + vadd.i32 q1, q1, q3 |
| + vld1.8 {d6}, [r2, : 64] |
| + vld1.8 {d10}, [r4, : 64] |
| + vsub.i32 q6, q3, q5 |
| + vadd.i32 q3, q3, q5 |
| + vst1.8 {d4-d5}, [r5, : 128]! |
| + vst1.8 {d0-d1}, [r6, : 128]! |
| + vst1.8 {d8-d9}, [r5, : 128]! |
| + vst1.8 {d2-d3}, [r6, : 128]! |
| + vst1.8 d12, [r5, : 64] |
| + vst1.8 d6, [r6, : 64] |
| + add r2, r3, #0 |
| + add r4, r3, #240 |
| + vld1.8 {d0-d1}, [r4, : 128]! |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vld1.8 {d4}, [r4, : 64] |
| + add r4, r3, #336 |
| + vld1.8 {d6-d7}, [r4, : 128]! |
| + vtrn.32 q0, q3 |
| + vld1.8 {d8-d9}, [r4, : 128]! |
| + vshl.i32 q5, q0, #4 |
| + vtrn.32 q1, q4 |
| + vshl.i32 q6, q3, #4 |
| + vadd.i32 q5, q5, q0 |
| + vadd.i32 q6, q6, q3 |
| + vshl.i32 q7, q1, #4 |
| + vld1.8 {d5}, [r4, : 64] |
| + vshl.i32 q8, q4, #4 |
| + vtrn.32 d4, d5 |
| + vadd.i32 q7, q7, q1 |
| + vadd.i32 q8, q8, q4 |
| + vld1.8 {d18-d19}, [r2, : 128]! |
| + vshl.i32 q10, q2, #4 |
| + vld1.8 {d22-d23}, [r2, : 128]! |
| + vadd.i32 q10, q10, q2 |
| + vld1.8 {d24}, [r2, : 64] |
| + vadd.i32 q5, q5, q0 |
| + add r2, r3, #288 |
| + vld1.8 {d26-d27}, [r2, : 128]! |
| + vadd.i32 q6, q6, q3 |
| + vld1.8 {d28-d29}, [r2, : 128]! |
| + vadd.i32 q8, q8, q4 |
| + vld1.8 {d25}, [r2, : 64] |
| + vadd.i32 q10, q10, q2 |
| + vtrn.32 q9, q13 |
| + vadd.i32 q7, q7, q1 |
| + vadd.i32 q5, q5, q0 |
| + vtrn.32 q11, q14 |
| + vadd.i32 q6, q6, q3 |
| + add r2, sp, #560 |
| + vadd.i32 q10, q10, q2 |
| + vtrn.32 d24, d25 |
| + vst1.8 {d12-d13}, [r2, : 128] |
| + vshl.i32 q6, q13, #1 |
| + add r2, sp, #576 |
| + vst1.8 {d20-d21}, [r2, : 128] |
| + vshl.i32 q10, q14, #1 |
| + add r2, sp, #592 |
| + vst1.8 {d12-d13}, [r2, : 128] |
| + vshl.i32 q15, q12, #1 |
| + vadd.i32 q8, q8, q4 |
| + vext.32 d10, d31, d30, #0 |
| + vadd.i32 q7, q7, q1 |
| + add r2, sp, #608 |
| + vst1.8 {d16-d17}, [r2, : 128] |
| + vmull.s32 q8, d18, d5 |
| + vmlal.s32 q8, d26, d4 |
| + vmlal.s32 q8, d19, d9 |
| + vmlal.s32 q8, d27, d3 |
| + vmlal.s32 q8, d22, d8 |
| + vmlal.s32 q8, d28, d2 |
| + vmlal.s32 q8, d23, d7 |
| + vmlal.s32 q8, d29, d1 |
| + vmlal.s32 q8, d24, d6 |
| + vmlal.s32 q8, d25, d0 |
| + add r2, sp, #624 |
| + vst1.8 {d14-d15}, [r2, : 128] |
| + vmull.s32 q2, d18, d4 |
| + vmlal.s32 q2, d12, d9 |
| + vmlal.s32 q2, d13, d8 |
| + vmlal.s32 q2, d19, d3 |
| + vmlal.s32 q2, d22, d2 |
| + vmlal.s32 q2, d23, d1 |
| + vmlal.s32 q2, d24, d0 |
| + add r2, sp, #640 |
| + vst1.8 {d20-d21}, [r2, : 128] |
| + vmull.s32 q7, d18, d9 |
| + vmlal.s32 q7, d26, d3 |
| + vmlal.s32 q7, d19, d8 |
| + vmlal.s32 q7, d27, d2 |
| + vmlal.s32 q7, d22, d7 |
| + vmlal.s32 q7, d28, d1 |
| + vmlal.s32 q7, d23, d6 |
| + vmlal.s32 q7, d29, d0 |
| + add r2, sp, #656 |
| + vst1.8 {d10-d11}, [r2, : 128] |
| + vmull.s32 q5, d18, d3 |
| + vmlal.s32 q5, d19, d2 |
| + vmlal.s32 q5, d22, d1 |
| + vmlal.s32 q5, d23, d0 |
| + vmlal.s32 q5, d12, d8 |
| + add r2, sp, #672 |
| + vst1.8 {d16-d17}, [r2, : 128] |
| + vmull.s32 q4, d18, d8 |
| + vmlal.s32 q4, d26, d2 |
| + vmlal.s32 q4, d19, d7 |
| + vmlal.s32 q4, d27, d1 |
| + vmlal.s32 q4, d22, d6 |
| + vmlal.s32 q4, d28, d0 |
| + vmull.s32 q8, d18, d7 |
| + vmlal.s32 q8, d26, d1 |
| + vmlal.s32 q8, d19, d6 |
| + vmlal.s32 q8, d27, d0 |
| + add r2, sp, #576 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vmlal.s32 q7, d24, d21 |
| + vmlal.s32 q7, d25, d20 |
| + vmlal.s32 q4, d23, d21 |
| + vmlal.s32 q4, d29, d20 |
| + vmlal.s32 q8, d22, d21 |
| + vmlal.s32 q8, d28, d20 |
| + vmlal.s32 q5, d24, d20 |
| + add r2, sp, #576 |
| + vst1.8 {d14-d15}, [r2, : 128] |
| + vmull.s32 q7, d18, d6 |
| + vmlal.s32 q7, d26, d0 |
| + add r2, sp, #656 |
| + vld1.8 {d30-d31}, [r2, : 128] |
| + vmlal.s32 q2, d30, d21 |
| + vmlal.s32 q7, d19, d21 |
| + vmlal.s32 q7, d27, d20 |
| + add r2, sp, #624 |
| + vld1.8 {d26-d27}, [r2, : 128] |
| + vmlal.s32 q4, d25, d27 |
| + vmlal.s32 q8, d29, d27 |
| + vmlal.s32 q8, d25, d26 |
| + vmlal.s32 q7, d28, d27 |
| + vmlal.s32 q7, d29, d26 |
| + add r2, sp, #608 |
| + vld1.8 {d28-d29}, [r2, : 128] |
| + vmlal.s32 q4, d24, d29 |
| + vmlal.s32 q8, d23, d29 |
| + vmlal.s32 q8, d24, d28 |
| + vmlal.s32 q7, d22, d29 |
| + vmlal.s32 q7, d23, d28 |
| + add r2, sp, #608 |
| + vst1.8 {d8-d9}, [r2, : 128] |
| + add r2, sp, #560 |
| + vld1.8 {d8-d9}, [r2, : 128] |
| + vmlal.s32 q7, d24, d9 |
| + vmlal.s32 q7, d25, d31 |
| + vmull.s32 q1, d18, d2 |
| + vmlal.s32 q1, d19, d1 |
| + vmlal.s32 q1, d22, d0 |
| + vmlal.s32 q1, d24, d27 |
| + vmlal.s32 q1, d23, d20 |
| + vmlal.s32 q1, d12, d7 |
| + vmlal.s32 q1, d13, d6 |
| + vmull.s32 q6, d18, d1 |
| + vmlal.s32 q6, d19, d0 |
| + vmlal.s32 q6, d23, d27 |
| + vmlal.s32 q6, d22, d20 |
| + vmlal.s32 q6, d24, d26 |
| + vmull.s32 q0, d18, d0 |
| + vmlal.s32 q0, d22, d27 |
| + vmlal.s32 q0, d23, d26 |
| + vmlal.s32 q0, d24, d31 |
| + vmlal.s32 q0, d19, d20 |
| + add r2, sp, #640 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmlal.s32 q2, d18, d7 |
| + vmlal.s32 q2, d19, d6 |
| + vmlal.s32 q5, d18, d6 |
| + vmlal.s32 q5, d19, d21 |
| + vmlal.s32 q1, d18, d21 |
| + vmlal.s32 q1, d19, d29 |
| + vmlal.s32 q0, d18, d28 |
| + vmlal.s32 q0, d19, d9 |
| + vmlal.s32 q6, d18, d29 |
| + vmlal.s32 q6, d19, d28 |
| + add r2, sp, #592 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + add r2, sp, #512 |
| + vld1.8 {d22-d23}, [r2, : 128] |
| + vmlal.s32 q5, d19, d7 |
| + vmlal.s32 q0, d18, d21 |
| + vmlal.s32 q0, d19, d29 |
| + vmlal.s32 q6, d18, d6 |
| + add r2, sp, #528 |
| + vld1.8 {d6-d7}, [r2, : 128] |
| + vmlal.s32 q6, d19, d21 |
| + add r2, sp, #576 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmlal.s32 q0, d30, d8 |
| + add r2, sp, #672 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vmlal.s32 q5, d30, d29 |
| + add r2, sp, #608 |
| + vld1.8 {d24-d25}, [r2, : 128] |
| + vmlal.s32 q1, d30, d28 |
| + vadd.i64 q13, q0, q11 |
| + vadd.i64 q14, q5, q11 |
| + vmlal.s32 q6, d30, d9 |
| + vshr.s64 q4, q13, #26 |
| + vshr.s64 q13, q14, #26 |
| + vadd.i64 q7, q7, q4 |
| + vshl.i64 q4, q4, #26 |
| + vadd.i64 q14, q7, q3 |
| + vadd.i64 q9, q9, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q15, q9, q3 |
| + vsub.i64 q0, q0, q4 |
| + vshr.s64 q4, q14, #25 |
| + vsub.i64 q5, q5, q13 |
| + vshr.s64 q13, q15, #25 |
| + vadd.i64 q6, q6, q4 |
| + vshl.i64 q4, q4, #25 |
| + vadd.i64 q14, q6, q11 |
| + vadd.i64 q2, q2, q13 |
| + vsub.i64 q4, q7, q4 |
| + vshr.s64 q7, q14, #26 |
| + vshl.i64 q13, q13, #25 |
| + vadd.i64 q14, q2, q11 |
| + vadd.i64 q8, q8, q7 |
| + vshl.i64 q7, q7, #26 |
| + vadd.i64 q15, q8, q3 |
| + vsub.i64 q9, q9, q13 |
| + vshr.s64 q13, q14, #26 |
| + vsub.i64 q6, q6, q7 |
| + vshr.s64 q7, q15, #25 |
| + vadd.i64 q10, q10, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q14, q10, q3 |
| + vadd.i64 q1, q1, q7 |
| + add r2, r3, #288 |
| + vshl.i64 q7, q7, #25 |
| + add r4, r3, #96 |
| + vadd.i64 q15, q1, q11 |
| + add r2, r2, #8 |
| + vsub.i64 q2, q2, q13 |
| + add r4, r4, #8 |
| + vshr.s64 q13, q14, #25 |
| + vsub.i64 q7, q8, q7 |
| + vshr.s64 q8, q15, #26 |
| + vadd.i64 q14, q13, q13 |
| + vadd.i64 q12, q12, q8 |
| + vtrn.32 d12, d14 |
| + vshl.i64 q8, q8, #26 |
| + vtrn.32 d13, d15 |
| + vadd.i64 q3, q12, q3 |
| + vadd.i64 q0, q0, q14 |
| + vst1.8 d12, [r2, : 64]! |
| + vshl.i64 q7, q13, #4 |
| + vst1.8 d13, [r4, : 64]! |
| + vsub.i64 q1, q1, q8 |
| + vshr.s64 q3, q3, #25 |
| + vadd.i64 q0, q0, q7 |
| + vadd.i64 q5, q5, q3 |
| + vshl.i64 q3, q3, #25 |
| + vadd.i64 q6, q5, q11 |
| + vadd.i64 q0, q0, q13 |
| + vshl.i64 q7, q13, #25 |
| + vadd.i64 q8, q0, q11 |
| + vsub.i64 q3, q12, q3 |
| + vshr.s64 q6, q6, #26 |
| + vsub.i64 q7, q10, q7 |
| + vtrn.32 d2, d6 |
| + vshr.s64 q8, q8, #26 |
| + vtrn.32 d3, d7 |
| + vadd.i64 q3, q9, q6 |
| + vst1.8 d2, [r2, : 64] |
| + vshl.i64 q6, q6, #26 |
| + vst1.8 d3, [r4, : 64] |
| + vadd.i64 q1, q4, q8 |
| + vtrn.32 d4, d14 |
| + vshl.i64 q4, q8, #26 |
| + vtrn.32 d5, d15 |
| + vsub.i64 q5, q5, q6 |
| + add r2, r2, #16 |
| + vsub.i64 q0, q0, q4 |
| + vst1.8 d4, [r2, : 64] |
| + add r4, r4, #16 |
| + vst1.8 d5, [r4, : 64] |
| + vtrn.32 d10, d6 |
| + vtrn.32 d11, d7 |
| + sub r2, r2, #8 |
| + sub r4, r4, #8 |
| + vtrn.32 d0, d2 |
| + vtrn.32 d1, d3 |
| + vst1.8 d10, [r2, : 64] |
| + vst1.8 d11, [r4, : 64] |
| + sub r2, r2, #24 |
| + sub r4, r4, #24 |
| + vst1.8 d0, [r2, : 64] |
| + vst1.8 d1, [r4, : 64] |
| + add r2, sp, #544 |
| + add r4, r3, #144 |
| + add r5, r3, #192 |
| + vld1.8 {d0-d1}, [r2, : 128] |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vld1.8 {d4-d5}, [r5, : 128]! |
| + vzip.i32 q1, q2 |
| + vld1.8 {d6-d7}, [r4, : 128]! |
| + vld1.8 {d8-d9}, [r5, : 128]! |
| + vshl.i32 q5, q1, #1 |
| + vzip.i32 q3, q4 |
| + vshl.i32 q6, q2, #1 |
| + vld1.8 {d14}, [r4, : 64] |
| + vshl.i32 q8, q3, #1 |
| + vld1.8 {d15}, [r5, : 64] |
| + vshl.i32 q9, q4, #1 |
| + vmul.i32 d21, d7, d1 |
| + vtrn.32 d14, d15 |
| + vmul.i32 q11, q4, q0 |
| + vmul.i32 q0, q7, q0 |
| + vmull.s32 q12, d2, d2 |
| + vmlal.s32 q12, d11, d1 |
| + vmlal.s32 q12, d12, d0 |
| + vmlal.s32 q12, d13, d23 |
| + vmlal.s32 q12, d16, d22 |
| + vmlal.s32 q12, d7, d21 |
| + vmull.s32 q10, d2, d11 |
| + vmlal.s32 q10, d4, d1 |
| + vmlal.s32 q10, d13, d0 |
| + vmlal.s32 q10, d6, d23 |
| + vmlal.s32 q10, d17, d22 |
| + vmull.s32 q13, d10, d4 |
| + vmlal.s32 q13, d11, d3 |
| + vmlal.s32 q13, d13, d1 |
| + vmlal.s32 q13, d16, d0 |
| + vmlal.s32 q13, d17, d23 |
| + vmlal.s32 q13, d8, d22 |
| + vmull.s32 q1, d10, d5 |
| + vmlal.s32 q1, d11, d4 |
| + vmlal.s32 q1, d6, d1 |
| + vmlal.s32 q1, d17, d0 |
| + vmlal.s32 q1, d8, d23 |
| + vmull.s32 q14, d10, d6 |
| + vmlal.s32 q14, d11, d13 |
| + vmlal.s32 q14, d4, d4 |
| + vmlal.s32 q14, d17, d1 |
| + vmlal.s32 q14, d18, d0 |
| + vmlal.s32 q14, d9, d23 |
| + vmull.s32 q11, d10, d7 |
| + vmlal.s32 q11, d11, d6 |
| + vmlal.s32 q11, d12, d5 |
| + vmlal.s32 q11, d8, d1 |
| + vmlal.s32 q11, d19, d0 |
| + vmull.s32 q15, d10, d8 |
| + vmlal.s32 q15, d11, d17 |
| + vmlal.s32 q15, d12, d6 |
| + vmlal.s32 q15, d13, d5 |
| + vmlal.s32 q15, d19, d1 |
| + vmlal.s32 q15, d14, d0 |
| + vmull.s32 q2, d10, d9 |
| + vmlal.s32 q2, d11, d8 |
| + vmlal.s32 q2, d12, d7 |
| + vmlal.s32 q2, d13, d6 |
| + vmlal.s32 q2, d14, d1 |
| + vmull.s32 q0, d15, d1 |
| + vmlal.s32 q0, d10, d14 |
| + vmlal.s32 q0, d11, d19 |
| + vmlal.s32 q0, d12, d8 |
| + vmlal.s32 q0, d13, d17 |
| + vmlal.s32 q0, d6, d6 |
| + add r2, sp, #512 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmull.s32 q3, d16, d7 |
| + vmlal.s32 q3, d10, d15 |
| + vmlal.s32 q3, d11, d14 |
| + vmlal.s32 q3, d12, d9 |
| + vmlal.s32 q3, d13, d8 |
| + add r2, sp, #528 |
| + vld1.8 {d8-d9}, [r2, : 128] |
| + vadd.i64 q5, q12, q9 |
| + vadd.i64 q6, q15, q9 |
| + vshr.s64 q5, q5, #26 |
| + vshr.s64 q6, q6, #26 |
| + vadd.i64 q7, q10, q5 |
| + vshl.i64 q5, q5, #26 |
| + vadd.i64 q8, q7, q4 |
| + vadd.i64 q2, q2, q6 |
| + vshl.i64 q6, q6, #26 |
| + vadd.i64 q10, q2, q4 |
| + vsub.i64 q5, q12, q5 |
| + vshr.s64 q8, q8, #25 |
| + vsub.i64 q6, q15, q6 |
| + vshr.s64 q10, q10, #25 |
| + vadd.i64 q12, q13, q8 |
| + vshl.i64 q8, q8, #25 |
| + vadd.i64 q13, q12, q9 |
| + vadd.i64 q0, q0, q10 |
| + vsub.i64 q7, q7, q8 |
| + vshr.s64 q8, q13, #26 |
| + vshl.i64 q10, q10, #25 |
| + vadd.i64 q13, q0, q9 |
| + vadd.i64 q1, q1, q8 |
| + vshl.i64 q8, q8, #26 |
| + vadd.i64 q15, q1, q4 |
| + vsub.i64 q2, q2, q10 |
| + vshr.s64 q10, q13, #26 |
| + vsub.i64 q8, q12, q8 |
| + vshr.s64 q12, q15, #25 |
| + vadd.i64 q3, q3, q10 |
| + vshl.i64 q10, q10, #26 |
| + vadd.i64 q13, q3, q4 |
| + vadd.i64 q14, q14, q12 |
| + add r2, r3, #144 |
| + vshl.i64 q12, q12, #25 |
| + add r4, r3, #192 |
| + vadd.i64 q15, q14, q9 |
| + add r2, r2, #8 |
| + vsub.i64 q0, q0, q10 |
| + add r4, r4, #8 |
| + vshr.s64 q10, q13, #25 |
| + vsub.i64 q1, q1, q12 |
| + vshr.s64 q12, q15, #26 |
| + vadd.i64 q13, q10, q10 |
| + vadd.i64 q11, q11, q12 |
| + vtrn.32 d16, d2 |
| + vshl.i64 q12, q12, #26 |
| + vtrn.32 d17, d3 |
| + vadd.i64 q1, q11, q4 |
| + vadd.i64 q4, q5, q13 |
| + vst1.8 d16, [r2, : 64]! |
| + vshl.i64 q5, q10, #4 |
| + vst1.8 d17, [r4, : 64]! |
| + vsub.i64 q8, q14, q12 |
| + vshr.s64 q1, q1, #25 |
| + vadd.i64 q4, q4, q5 |
| + vadd.i64 q5, q6, q1 |
| + vshl.i64 q1, q1, #25 |
| + vadd.i64 q6, q5, q9 |
| + vadd.i64 q4, q4, q10 |
| + vshl.i64 q10, q10, #25 |
| + vadd.i64 q9, q4, q9 |
| + vsub.i64 q1, q11, q1 |
| + vshr.s64 q6, q6, #26 |
| + vsub.i64 q3, q3, q10 |
| + vtrn.32 d16, d2 |
| + vshr.s64 q9, q9, #26 |
| + vtrn.32 d17, d3 |
| + vadd.i64 q1, q2, q6 |
| + vst1.8 d16, [r2, : 64] |
| + vshl.i64 q2, q6, #26 |
| + vst1.8 d17, [r4, : 64] |
| + vadd.i64 q6, q7, q9 |
| + vtrn.32 d0, d6 |
| + vshl.i64 q7, q9, #26 |
| + vtrn.32 d1, d7 |
| + vsub.i64 q2, q5, q2 |
| + add r2, r2, #16 |
| + vsub.i64 q3, q4, q7 |
| + vst1.8 d0, [r2, : 64] |
| + add r4, r4, #16 |
| + vst1.8 d1, [r4, : 64] |
| + vtrn.32 d4, d2 |
| + vtrn.32 d5, d3 |
| + sub r2, r2, #8 |
| + sub r4, r4, #8 |
| + vtrn.32 d6, d12 |
| + vtrn.32 d7, d13 |
| + vst1.8 d4, [r2, : 64] |
| + vst1.8 d5, [r4, : 64] |
| + sub r2, r2, #24 |
| + sub r4, r4, #24 |
| + vst1.8 d6, [r2, : 64] |
| + vst1.8 d7, [r4, : 64] |
| + add r2, r3, #336 |
| + add r4, r3, #288 |
| + vld1.8 {d0-d1}, [r2, : 128]! |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vadd.i32 q0, q0, q1 |
| + vld1.8 {d2-d3}, [r2, : 128]! |
| + vld1.8 {d4-d5}, [r4, : 128]! |
| + vadd.i32 q1, q1, q2 |
| + add r5, r3, #288 |
| + vld1.8 {d4}, [r2, : 64] |
| + vld1.8 {d6}, [r4, : 64] |
| + vadd.i32 q2, q2, q3 |
| + vst1.8 {d0-d1}, [r5, : 128]! |
| + vst1.8 {d2-d3}, [r5, : 128]! |
| + vst1.8 d4, [r5, : 64] |
| + add r2, r3, #48 |
| + add r4, r3, #144 |
| + vld1.8 {d0-d1}, [r4, : 128]! |
| + vld1.8 {d2-d3}, [r4, : 128]! |
| + vld1.8 {d4}, [r4, : 64] |
| + add r4, r3, #288 |
| + vld1.8 {d6-d7}, [r4, : 128]! |
| + vtrn.32 q0, q3 |
| + vld1.8 {d8-d9}, [r4, : 128]! |
| + vshl.i32 q5, q0, #4 |
| + vtrn.32 q1, q4 |
| + vshl.i32 q6, q3, #4 |
| + vadd.i32 q5, q5, q0 |
| + vadd.i32 q6, q6, q3 |
| + vshl.i32 q7, q1, #4 |
| + vld1.8 {d5}, [r4, : 64] |
| + vshl.i32 q8, q4, #4 |
| + vtrn.32 d4, d5 |
| + vadd.i32 q7, q7, q1 |
| + vadd.i32 q8, q8, q4 |
| + vld1.8 {d18-d19}, [r2, : 128]! |
| + vshl.i32 q10, q2, #4 |
| + vld1.8 {d22-d23}, [r2, : 128]! |
| + vadd.i32 q10, q10, q2 |
| + vld1.8 {d24}, [r2, : 64] |
| + vadd.i32 q5, q5, q0 |
| + add r2, r3, #240 |
| + vld1.8 {d26-d27}, [r2, : 128]! |
| + vadd.i32 q6, q6, q3 |
| + vld1.8 {d28-d29}, [r2, : 128]! |
| + vadd.i32 q8, q8, q4 |
| + vld1.8 {d25}, [r2, : 64] |
| + vadd.i32 q10, q10, q2 |
| + vtrn.32 q9, q13 |
| + vadd.i32 q7, q7, q1 |
| + vadd.i32 q5, q5, q0 |
| + vtrn.32 q11, q14 |
| + vadd.i32 q6, q6, q3 |
| + add r2, sp, #560 |
| + vadd.i32 q10, q10, q2 |
| + vtrn.32 d24, d25 |
| + vst1.8 {d12-d13}, [r2, : 128] |
| + vshl.i32 q6, q13, #1 |
| + add r2, sp, #576 |
| + vst1.8 {d20-d21}, [r2, : 128] |
| + vshl.i32 q10, q14, #1 |
| + add r2, sp, #592 |
| + vst1.8 {d12-d13}, [r2, : 128] |
| + vshl.i32 q15, q12, #1 |
| + vadd.i32 q8, q8, q4 |
| + vext.32 d10, d31, d30, #0 |
| + vadd.i32 q7, q7, q1 |
| + add r2, sp, #608 |
| + vst1.8 {d16-d17}, [r2, : 128] |
| + vmull.s32 q8, d18, d5 |
| + vmlal.s32 q8, d26, d4 |
| + vmlal.s32 q8, d19, d9 |
| + vmlal.s32 q8, d27, d3 |
| + vmlal.s32 q8, d22, d8 |
| + vmlal.s32 q8, d28, d2 |
| + vmlal.s32 q8, d23, d7 |
| + vmlal.s32 q8, d29, d1 |
| + vmlal.s32 q8, d24, d6 |
| + vmlal.s32 q8, d25, d0 |
| + add r2, sp, #624 |
| + vst1.8 {d14-d15}, [r2, : 128] |
| + vmull.s32 q2, d18, d4 |
| + vmlal.s32 q2, d12, d9 |
| + vmlal.s32 q2, d13, d8 |
| + vmlal.s32 q2, d19, d3 |
| + vmlal.s32 q2, d22, d2 |
| + vmlal.s32 q2, d23, d1 |
| + vmlal.s32 q2, d24, d0 |
| + add r2, sp, #640 |
| + vst1.8 {d20-d21}, [r2, : 128] |
| + vmull.s32 q7, d18, d9 |
| + vmlal.s32 q7, d26, d3 |
| + vmlal.s32 q7, d19, d8 |
| + vmlal.s32 q7, d27, d2 |
| + vmlal.s32 q7, d22, d7 |
| + vmlal.s32 q7, d28, d1 |
| + vmlal.s32 q7, d23, d6 |
| + vmlal.s32 q7, d29, d0 |
| + add r2, sp, #656 |
| + vst1.8 {d10-d11}, [r2, : 128] |
| + vmull.s32 q5, d18, d3 |
| + vmlal.s32 q5, d19, d2 |
| + vmlal.s32 q5, d22, d1 |
| + vmlal.s32 q5, d23, d0 |
| + vmlal.s32 q5, d12, d8 |
| + add r2, sp, #672 |
| + vst1.8 {d16-d17}, [r2, : 128] |
| + vmull.s32 q4, d18, d8 |
| + vmlal.s32 q4, d26, d2 |
| + vmlal.s32 q4, d19, d7 |
| + vmlal.s32 q4, d27, d1 |
| + vmlal.s32 q4, d22, d6 |
| + vmlal.s32 q4, d28, d0 |
| + vmull.s32 q8, d18, d7 |
| + vmlal.s32 q8, d26, d1 |
| + vmlal.s32 q8, d19, d6 |
| + vmlal.s32 q8, d27, d0 |
| + add r2, sp, #576 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vmlal.s32 q7, d24, d21 |
| + vmlal.s32 q7, d25, d20 |
| + vmlal.s32 q4, d23, d21 |
| + vmlal.s32 q4, d29, d20 |
| + vmlal.s32 q8, d22, d21 |
| + vmlal.s32 q8, d28, d20 |
| + vmlal.s32 q5, d24, d20 |
| + add r2, sp, #576 |
| + vst1.8 {d14-d15}, [r2, : 128] |
| + vmull.s32 q7, d18, d6 |
| + vmlal.s32 q7, d26, d0 |
| + add r2, sp, #656 |
| + vld1.8 {d30-d31}, [r2, : 128] |
| + vmlal.s32 q2, d30, d21 |
| + vmlal.s32 q7, d19, d21 |
| + vmlal.s32 q7, d27, d20 |
| + add r2, sp, #624 |
| + vld1.8 {d26-d27}, [r2, : 128] |
| + vmlal.s32 q4, d25, d27 |
| + vmlal.s32 q8, d29, d27 |
| + vmlal.s32 q8, d25, d26 |
| + vmlal.s32 q7, d28, d27 |
| + vmlal.s32 q7, d29, d26 |
| + add r2, sp, #608 |
| + vld1.8 {d28-d29}, [r2, : 128] |
| + vmlal.s32 q4, d24, d29 |
| + vmlal.s32 q8, d23, d29 |
| + vmlal.s32 q8, d24, d28 |
| + vmlal.s32 q7, d22, d29 |
| + vmlal.s32 q7, d23, d28 |
| + add r2, sp, #608 |
| + vst1.8 {d8-d9}, [r2, : 128] |
| + add r2, sp, #560 |
| + vld1.8 {d8-d9}, [r2, : 128] |
| + vmlal.s32 q7, d24, d9 |
| + vmlal.s32 q7, d25, d31 |
| + vmull.s32 q1, d18, d2 |
| + vmlal.s32 q1, d19, d1 |
| + vmlal.s32 q1, d22, d0 |
| + vmlal.s32 q1, d24, d27 |
| + vmlal.s32 q1, d23, d20 |
| + vmlal.s32 q1, d12, d7 |
| + vmlal.s32 q1, d13, d6 |
| + vmull.s32 q6, d18, d1 |
| + vmlal.s32 q6, d19, d0 |
| + vmlal.s32 q6, d23, d27 |
| + vmlal.s32 q6, d22, d20 |
| + vmlal.s32 q6, d24, d26 |
| + vmull.s32 q0, d18, d0 |
| + vmlal.s32 q0, d22, d27 |
| + vmlal.s32 q0, d23, d26 |
| + vmlal.s32 q0, d24, d31 |
| + vmlal.s32 q0, d19, d20 |
| + add r2, sp, #640 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmlal.s32 q2, d18, d7 |
| + vmlal.s32 q2, d19, d6 |
| + vmlal.s32 q5, d18, d6 |
| + vmlal.s32 q5, d19, d21 |
| + vmlal.s32 q1, d18, d21 |
| + vmlal.s32 q1, d19, d29 |
| + vmlal.s32 q0, d18, d28 |
| + vmlal.s32 q0, d19, d9 |
| + vmlal.s32 q6, d18, d29 |
| + vmlal.s32 q6, d19, d28 |
| + add r2, sp, #592 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + add r2, sp, #512 |
| + vld1.8 {d22-d23}, [r2, : 128] |
| + vmlal.s32 q5, d19, d7 |
| + vmlal.s32 q0, d18, d21 |
| + vmlal.s32 q0, d19, d29 |
| + vmlal.s32 q6, d18, d6 |
| + add r2, sp, #528 |
| + vld1.8 {d6-d7}, [r2, : 128] |
| + vmlal.s32 q6, d19, d21 |
| + add r2, sp, #576 |
| + vld1.8 {d18-d19}, [r2, : 128] |
| + vmlal.s32 q0, d30, d8 |
| + add r2, sp, #672 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vmlal.s32 q5, d30, d29 |
| + add r2, sp, #608 |
| + vld1.8 {d24-d25}, [r2, : 128] |
| + vmlal.s32 q1, d30, d28 |
| + vadd.i64 q13, q0, q11 |
| + vadd.i64 q14, q5, q11 |
| + vmlal.s32 q6, d30, d9 |
| + vshr.s64 q4, q13, #26 |
| + vshr.s64 q13, q14, #26 |
| + vadd.i64 q7, q7, q4 |
| + vshl.i64 q4, q4, #26 |
| + vadd.i64 q14, q7, q3 |
| + vadd.i64 q9, q9, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q15, q9, q3 |
| + vsub.i64 q0, q0, q4 |
| + vshr.s64 q4, q14, #25 |
| + vsub.i64 q5, q5, q13 |
| + vshr.s64 q13, q15, #25 |
| + vadd.i64 q6, q6, q4 |
| + vshl.i64 q4, q4, #25 |
| + vadd.i64 q14, q6, q11 |
| + vadd.i64 q2, q2, q13 |
| + vsub.i64 q4, q7, q4 |
| + vshr.s64 q7, q14, #26 |
| + vshl.i64 q13, q13, #25 |
| + vadd.i64 q14, q2, q11 |
| + vadd.i64 q8, q8, q7 |
| + vshl.i64 q7, q7, #26 |
| + vadd.i64 q15, q8, q3 |
| + vsub.i64 q9, q9, q13 |
| + vshr.s64 q13, q14, #26 |
| + vsub.i64 q6, q6, q7 |
| + vshr.s64 q7, q15, #25 |
| + vadd.i64 q10, q10, q13 |
| + vshl.i64 q13, q13, #26 |
| + vadd.i64 q14, q10, q3 |
| + vadd.i64 q1, q1, q7 |
| + add r2, r3, #240 |
| + vshl.i64 q7, q7, #25 |
| + add r4, r3, #144 |
| + vadd.i64 q15, q1, q11 |
| + add r2, r2, #8 |
| + vsub.i64 q2, q2, q13 |
| + add r4, r4, #8 |
| + vshr.s64 q13, q14, #25 |
| + vsub.i64 q7, q8, q7 |
| + vshr.s64 q8, q15, #26 |
| + vadd.i64 q14, q13, q13 |
| + vadd.i64 q12, q12, q8 |
| + vtrn.32 d12, d14 |
| + vshl.i64 q8, q8, #26 |
| + vtrn.32 d13, d15 |
| + vadd.i64 q3, q12, q3 |
| + vadd.i64 q0, q0, q14 |
| + vst1.8 d12, [r2, : 64]! |
| + vshl.i64 q7, q13, #4 |
| + vst1.8 d13, [r4, : 64]! |
| + vsub.i64 q1, q1, q8 |
| + vshr.s64 q3, q3, #25 |
| + vadd.i64 q0, q0, q7 |
| + vadd.i64 q5, q5, q3 |
| + vshl.i64 q3, q3, #25 |
| + vadd.i64 q6, q5, q11 |
| + vadd.i64 q0, q0, q13 |
| + vshl.i64 q7, q13, #25 |
| + vadd.i64 q8, q0, q11 |
| + vsub.i64 q3, q12, q3 |
| + vshr.s64 q6, q6, #26 |
| + vsub.i64 q7, q10, q7 |
| + vtrn.32 d2, d6 |
| + vshr.s64 q8, q8, #26 |
| + vtrn.32 d3, d7 |
| + vadd.i64 q3, q9, q6 |
| + vst1.8 d2, [r2, : 64] |
| + vshl.i64 q6, q6, #26 |
| + vst1.8 d3, [r4, : 64] |
| + vadd.i64 q1, q4, q8 |
| + vtrn.32 d4, d14 |
| + vshl.i64 q4, q8, #26 |
| + vtrn.32 d5, d15 |
| + vsub.i64 q5, q5, q6 |
| + add r2, r2, #16 |
| + vsub.i64 q0, q0, q4 |
| + vst1.8 d4, [r2, : 64] |
| + add r4, r4, #16 |
| + vst1.8 d5, [r4, : 64] |
| + vtrn.32 d10, d6 |
| + vtrn.32 d11, d7 |
| + sub r2, r2, #8 |
| + sub r4, r4, #8 |
| + vtrn.32 d0, d2 |
| + vtrn.32 d1, d3 |
| + vst1.8 d10, [r2, : 64] |
| + vst1.8 d11, [r4, : 64] |
| + sub r2, r2, #24 |
| + sub r4, r4, #24 |
| + vst1.8 d0, [r2, : 64] |
| + vst1.8 d1, [r4, : 64] |
| + ldr r2, [sp, #488] |
| + ldr r4, [sp, #492] |
| + subs r5, r2, #1 |
| + bge ._mainloop |
| + add r1, r3, #144 |
| + add r2, r3, #336 |
| + vld1.8 {d0-d1}, [r1, : 128]! |
| + vld1.8 {d2-d3}, [r1, : 128]! |
| + vld1.8 {d4}, [r1, : 64] |
| + vst1.8 {d0-d1}, [r2, : 128]! |
| + vst1.8 {d2-d3}, [r2, : 128]! |
| + vst1.8 d4, [r2, : 64] |
| + ldr r1, =0 |
| +._invertloop: |
| + add r2, r3, #144 |
| + ldr r4, =0 |
| + ldr r5, =2 |
| + cmp r1, #1 |
| + ldreq r5, =1 |
| + addeq r2, r3, #336 |
| + addeq r4, r3, #48 |
| + cmp r1, #2 |
| + ldreq r5, =1 |
| + addeq r2, r3, #48 |
| + cmp r1, #3 |
| + ldreq r5, =5 |
| + addeq r4, r3, #336 |
| + cmp r1, #4 |
| + ldreq r5, =10 |
| + cmp r1, #5 |
| + ldreq r5, =20 |
| + cmp r1, #6 |
| + ldreq r5, =10 |
| + addeq r2, r3, #336 |
| + addeq r4, r3, #336 |
| + cmp r1, #7 |
| + ldreq r5, =50 |
| + cmp r1, #8 |
| + ldreq r5, =100 |
| + cmp r1, #9 |
| + ldreq r5, =50 |
| + addeq r2, r3, #336 |
| + cmp r1, #10 |
| + ldreq r5, =5 |
| + addeq r2, r3, #48 |
| + cmp r1, #11 |
| + ldreq r5, =0 |
| + addeq r2, r3, #96 |
| + add r6, r3, #144 |
| + add r7, r3, #288 |
| + vld1.8 {d0-d1}, [r6, : 128]! |
| + vld1.8 {d2-d3}, [r6, : 128]! |
| + vld1.8 {d4}, [r6, : 64] |
| + vst1.8 {d0-d1}, [r7, : 128]! |
| + vst1.8 {d2-d3}, [r7, : 128]! |
| + vst1.8 d4, [r7, : 64] |
| + cmp r5, #0 |
| + beq ._skipsquaringloop |
| +._squaringloop: |
| + add r6, r3, #288 |
| + add r7, r3, #288 |
| + add r8, r3, #288 |
| + vmov.i32 q0, #19 |
| + vmov.i32 q1, #0 |
| + vmov.i32 q2, #1 |
| + vzip.i32 q1, q2 |
| + vld1.8 {d4-d5}, [r7, : 128]! |
| + vld1.8 {d6-d7}, [r7, : 128]! |
| + vld1.8 {d9}, [r7, : 64] |
| + vld1.8 {d10-d11}, [r6, : 128]! |
| + add r7, sp, #416 |
| + vld1.8 {d12-d13}, [r6, : 128]! |
| + vmul.i32 q7, q2, q0 |
| + vld1.8 {d8}, [r6, : 64] |
| + vext.32 d17, d11, d10, #1 |
| + vmul.i32 q9, q3, q0 |
| + vext.32 d16, d10, d8, #1 |
| + vshl.u32 q10, q5, q1 |
| + vext.32 d22, d14, d4, #1 |
| + vext.32 d24, d18, d6, #1 |
| + vshl.u32 q13, q6, q1 |
| + vshl.u32 d28, d8, d2 |
| + vrev64.i32 d22, d22 |
| + vmul.i32 d1, d9, d1 |
| + vrev64.i32 d24, d24 |
| + vext.32 d29, d8, d13, #1 |
| + vext.32 d0, d1, d9, #1 |
| + vrev64.i32 d0, d0 |
| + vext.32 d2, d9, d1, #1 |
| + vext.32 d23, d15, d5, #1 |
| + vmull.s32 q4, d20, d4 |
| + vrev64.i32 d23, d23 |
| + vmlal.s32 q4, d21, d1 |
| + vrev64.i32 d2, d2 |
| + vmlal.s32 q4, d26, d19 |
| + vext.32 d3, d5, d15, #1 |
| + vmlal.s32 q4, d27, d18 |
| + vrev64.i32 d3, d3 |
| + vmlal.s32 q4, d28, d15 |
| + vext.32 d14, d12, d11, #1 |
| + vmull.s32 q5, d16, d23 |
| + vext.32 d15, d13, d12, #1 |
| + vmlal.s32 q5, d17, d4 |
| + vst1.8 d8, [r7, : 64]! |
| + vmlal.s32 q5, d14, d1 |
| + vext.32 d12, d9, d8, #0 |
| + vmlal.s32 q5, d15, d19 |
| + vmov.i64 d13, #0 |
| + vmlal.s32 q5, d29, d18 |
| + vext.32 d25, d19, d7, #1 |
| + vmlal.s32 q6, d20, d5 |
| + vrev64.i32 d25, d25 |
| + vmlal.s32 q6, d21, d4 |
| + vst1.8 d11, [r7, : 64]! |
| + vmlal.s32 q6, d26, d1 |
| + vext.32 d9, d10, d10, #0 |
| + vmlal.s32 q6, d27, d19 |
| + vmov.i64 d8, #0 |
| + vmlal.s32 q6, d28, d18 |
| + vmlal.s32 q4, d16, d24 |
| + vmlal.s32 q4, d17, d5 |
| + vmlal.s32 q4, d14, d4 |
| + vst1.8 d12, [r7, : 64]! |
| + vmlal.s32 q4, d15, d1 |
| + vext.32 d10, d13, d12, #0 |
| + vmlal.s32 q4, d29, d19 |
| + vmov.i64 d11, #0 |
| + vmlal.s32 q5, d20, d6 |
| + vmlal.s32 q5, d21, d5 |
| + vmlal.s32 q5, d26, d4 |
| + vext.32 d13, d8, d8, #0 |
| + vmlal.s32 q5, d27, d1 |
| + vmov.i64 d12, #0 |
| + vmlal.s32 q5, d28, d19 |
| + vst1.8 d9, [r7, : 64]! |
| + vmlal.s32 q6, d16, d25 |
| + vmlal.s32 q6, d17, d6 |
| + vst1.8 d10, [r7, : 64] |
| + vmlal.s32 q6, d14, d5 |
| + vext.32 d8, d11, d10, #0 |
| + vmlal.s32 q6, d15, d4 |
| + vmov.i64 d9, #0 |
| + vmlal.s32 q6, d29, d1 |
| + vmlal.s32 q4, d20, d7 |
| + vmlal.s32 q4, d21, d6 |
| + vmlal.s32 q4, d26, d5 |
| + vext.32 d11, d12, d12, #0 |
| + vmlal.s32 q4, d27, d4 |
| + vmov.i64 d10, #0 |
| + vmlal.s32 q4, d28, d1 |
| + vmlal.s32 q5, d16, d0 |
| + sub r6, r7, #32 |
| + vmlal.s32 q5, d17, d7 |
| + vmlal.s32 q5, d14, d6 |
| + vext.32 d30, d9, d8, #0 |
| + vmlal.s32 q5, d15, d5 |
| + vld1.8 {d31}, [r6, : 64]! |
| + vmlal.s32 q5, d29, d4 |
| + vmlal.s32 q15, d20, d0 |
| + vext.32 d0, d6, d18, #1 |
| + vmlal.s32 q15, d21, d25 |
| + vrev64.i32 d0, d0 |
| + vmlal.s32 q15, d26, d24 |
| + vext.32 d1, d7, d19, #1 |
| + vext.32 d7, d10, d10, #0 |
| + vmlal.s32 q15, d27, d23 |
| + vrev64.i32 d1, d1 |
| + vld1.8 {d6}, [r6, : 64] |
| + vmlal.s32 q15, d28, d22 |
| + vmlal.s32 q3, d16, d4 |
| + add r6, r6, #24 |
| + vmlal.s32 q3, d17, d2 |
| + vext.32 d4, d31, d30, #0 |
| + vmov d17, d11 |
| + vmlal.s32 q3, d14, d1 |
| + vext.32 d11, d13, d13, #0 |
| + vext.32 d13, d30, d30, #0 |
| + vmlal.s32 q3, d15, d0 |
| + vext.32 d1, d8, d8, #0 |
| + vmlal.s32 q3, d29, d3 |
| + vld1.8 {d5}, [r6, : 64] |
| + sub r6, r6, #16 |
| + vext.32 d10, d6, d6, #0 |
| + vmov.i32 q1, #0xffffffff |
| + vshl.i64 q4, q1, #25 |
| + add r7, sp, #512 |
| + vld1.8 {d14-d15}, [r7, : 128] |
| + vadd.i64 q9, q2, q7 |
| + vshl.i64 q1, q1, #26 |
| + vshr.s64 q10, q9, #26 |
| + vld1.8 {d0}, [r6, : 64]! |
| + vadd.i64 q5, q5, q10 |
| + vand q9, q9, q1 |
| + vld1.8 {d16}, [r6, : 64]! |
| + add r6, sp, #528 |
| + vld1.8 {d20-d21}, [r6, : 128] |
| + vadd.i64 q11, q5, q10 |
| + vsub.i64 q2, q2, q9 |
| + vshr.s64 q9, q11, #25 |
| + vext.32 d12, d5, d4, #0 |
| + vand q11, q11, q4 |
| + vadd.i64 q0, q0, q9 |
| + vmov d19, d7 |
| + vadd.i64 q3, q0, q7 |
| + vsub.i64 q5, q5, q11 |
| + vshr.s64 q11, q3, #26 |
| + vext.32 d18, d11, d10, #0 |
| + vand q3, q3, q1 |
| + vadd.i64 q8, q8, q11 |
| + vadd.i64 q11, q8, q10 |
| + vsub.i64 q0, q0, q3 |
| + vshr.s64 q3, q11, #25 |
| + vand q11, q11, q4 |
| + vadd.i64 q3, q6, q3 |
| + vadd.i64 q6, q3, q7 |
| + vsub.i64 q8, q8, q11 |
| + vshr.s64 q11, q6, #26 |
| + vand q6, q6, q1 |
| + vadd.i64 q9, q9, q11 |
| + vadd.i64 d25, d19, d21 |
| + vsub.i64 q3, q3, q6 |
| + vshr.s64 d23, d25, #25 |
| + vand q4, q12, q4 |
| + vadd.i64 d21, d23, d23 |
| + vshl.i64 d25, d23, #4 |
| + vadd.i64 d21, d21, d23 |
| + vadd.i64 d25, d25, d21 |
| + vadd.i64 d4, d4, d25 |
| + vzip.i32 q0, q8 |
| + vadd.i64 d12, d4, d14 |
| + add r6, r8, #8 |
| + vst1.8 d0, [r6, : 64] |
| + vsub.i64 d19, d19, d9 |
| + add r6, r6, #16 |
| + vst1.8 d16, [r6, : 64] |
| + vshr.s64 d22, d12, #26 |
| + vand q0, q6, q1 |
| + vadd.i64 d10, d10, d22 |
| + vzip.i32 q3, q9 |
| + vsub.i64 d4, d4, d0 |
| + sub r6, r6, #8 |
| + vst1.8 d6, [r6, : 64] |
| + add r6, r6, #16 |
| + vst1.8 d18, [r6, : 64] |
| + vzip.i32 q2, q5 |
| + sub r6, r6, #32 |
| + vst1.8 d4, [r6, : 64] |
| + subs r5, r5, #1 |
| + bhi ._squaringloop |
| +._skipsquaringloop: |
| + mov r2, r2 |
| + add r5, r3, #288 |
| + add r6, r3, #144 |
| + vmov.i32 q0, #19 |
| + vmov.i32 q1, #0 |
| + vmov.i32 q2, #1 |
| + vzip.i32 q1, q2 |
| + vld1.8 {d4-d5}, [r5, : 128]! |
| + vld1.8 {d6-d7}, [r5, : 128]! |
| + vld1.8 {d9}, [r5, : 64] |
| + vld1.8 {d10-d11}, [r2, : 128]! |
| + add r5, sp, #416 |
| + vld1.8 {d12-d13}, [r2, : 128]! |
| + vmul.i32 q7, q2, q0 |
| + vld1.8 {d8}, [r2, : 64] |
| + vext.32 d17, d11, d10, #1 |
| + vmul.i32 q9, q3, q0 |
| + vext.32 d16, d10, d8, #1 |
| + vshl.u32 q10, q5, q1 |
| + vext.32 d22, d14, d4, #1 |
| + vext.32 d24, d18, d6, #1 |
| + vshl.u32 q13, q6, q1 |
| + vshl.u32 d28, d8, d2 |
| + vrev64.i32 d22, d22 |
| + vmul.i32 d1, d9, d1 |
| + vrev64.i32 d24, d24 |
| + vext.32 d29, d8, d13, #1 |
| + vext.32 d0, d1, d9, #1 |
| + vrev64.i32 d0, d0 |
| + vext.32 d2, d9, d1, #1 |
| + vext.32 d23, d15, d5, #1 |
| + vmull.s32 q4, d20, d4 |
| + vrev64.i32 d23, d23 |
| + vmlal.s32 q4, d21, d1 |
| + vrev64.i32 d2, d2 |
| + vmlal.s32 q4, d26, d19 |
| + vext.32 d3, d5, d15, #1 |
| + vmlal.s32 q4, d27, d18 |
| + vrev64.i32 d3, d3 |
| + vmlal.s32 q4, d28, d15 |
| + vext.32 d14, d12, d11, #1 |
| + vmull.s32 q5, d16, d23 |
| + vext.32 d15, d13, d12, #1 |
| + vmlal.s32 q5, d17, d4 |
| + vst1.8 d8, [r5, : 64]! |
| + vmlal.s32 q5, d14, d1 |
| + vext.32 d12, d9, d8, #0 |
| + vmlal.s32 q5, d15, d19 |
| + vmov.i64 d13, #0 |
| + vmlal.s32 q5, d29, d18 |
| + vext.32 d25, d19, d7, #1 |
| + vmlal.s32 q6, d20, d5 |
| + vrev64.i32 d25, d25 |
| + vmlal.s32 q6, d21, d4 |
| + vst1.8 d11, [r5, : 64]! |
| + vmlal.s32 q6, d26, d1 |
| + vext.32 d9, d10, d10, #0 |
| + vmlal.s32 q6, d27, d19 |
| + vmov.i64 d8, #0 |
| + vmlal.s32 q6, d28, d18 |
| + vmlal.s32 q4, d16, d24 |
| + vmlal.s32 q4, d17, d5 |
| + vmlal.s32 q4, d14, d4 |
| + vst1.8 d12, [r5, : 64]! |
| + vmlal.s32 q4, d15, d1 |
| + vext.32 d10, d13, d12, #0 |
| + vmlal.s32 q4, d29, d19 |
| + vmov.i64 d11, #0 |
| + vmlal.s32 q5, d20, d6 |
| + vmlal.s32 q5, d21, d5 |
| + vmlal.s32 q5, d26, d4 |
| + vext.32 d13, d8, d8, #0 |
| + vmlal.s32 q5, d27, d1 |
| + vmov.i64 d12, #0 |
| + vmlal.s32 q5, d28, d19 |
| + vst1.8 d9, [r5, : 64]! |
| + vmlal.s32 q6, d16, d25 |
| + vmlal.s32 q6, d17, d6 |
| + vst1.8 d10, [r5, : 64] |
| + vmlal.s32 q6, d14, d5 |
| + vext.32 d8, d11, d10, #0 |
| + vmlal.s32 q6, d15, d4 |
| + vmov.i64 d9, #0 |
| + vmlal.s32 q6, d29, d1 |
| + vmlal.s32 q4, d20, d7 |
| + vmlal.s32 q4, d21, d6 |
| + vmlal.s32 q4, d26, d5 |
| + vext.32 d11, d12, d12, #0 |
| + vmlal.s32 q4, d27, d4 |
| + vmov.i64 d10, #0 |
| + vmlal.s32 q4, d28, d1 |
| + vmlal.s32 q5, d16, d0 |
| + sub r2, r5, #32 |
| + vmlal.s32 q5, d17, d7 |
| + vmlal.s32 q5, d14, d6 |
| + vext.32 d30, d9, d8, #0 |
| + vmlal.s32 q5, d15, d5 |
| + vld1.8 {d31}, [r2, : 64]! |
| + vmlal.s32 q5, d29, d4 |
| + vmlal.s32 q15, d20, d0 |
| + vext.32 d0, d6, d18, #1 |
| + vmlal.s32 q15, d21, d25 |
| + vrev64.i32 d0, d0 |
| + vmlal.s32 q15, d26, d24 |
| + vext.32 d1, d7, d19, #1 |
| + vext.32 d7, d10, d10, #0 |
| + vmlal.s32 q15, d27, d23 |
| + vrev64.i32 d1, d1 |
| + vld1.8 {d6}, [r2, : 64] |
| + vmlal.s32 q15, d28, d22 |
| + vmlal.s32 q3, d16, d4 |
| + add r2, r2, #24 |
| + vmlal.s32 q3, d17, d2 |
| + vext.32 d4, d31, d30, #0 |
| + vmov d17, d11 |
| + vmlal.s32 q3, d14, d1 |
| + vext.32 d11, d13, d13, #0 |
| + vext.32 d13, d30, d30, #0 |
| + vmlal.s32 q3, d15, d0 |
| + vext.32 d1, d8, d8, #0 |
| + vmlal.s32 q3, d29, d3 |
| + vld1.8 {d5}, [r2, : 64] |
| + sub r2, r2, #16 |
| + vext.32 d10, d6, d6, #0 |
| + vmov.i32 q1, #0xffffffff |
| + vshl.i64 q4, q1, #25 |
| + add r5, sp, #512 |
| + vld1.8 {d14-d15}, [r5, : 128] |
| + vadd.i64 q9, q2, q7 |
| + vshl.i64 q1, q1, #26 |
| + vshr.s64 q10, q9, #26 |
| + vld1.8 {d0}, [r2, : 64]! |
| + vadd.i64 q5, q5, q10 |
| + vand q9, q9, q1 |
| + vld1.8 {d16}, [r2, : 64]! |
| + add r2, sp, #528 |
| + vld1.8 {d20-d21}, [r2, : 128] |
| + vadd.i64 q11, q5, q10 |
| + vsub.i64 q2, q2, q9 |
| + vshr.s64 q9, q11, #25 |
| + vext.32 d12, d5, d4, #0 |
| + vand q11, q11, q4 |
| + vadd.i64 q0, q0, q9 |
| + vmov d19, d7 |
| + vadd.i64 q3, q0, q7 |
| + vsub.i64 q5, q5, q11 |
| + vshr.s64 q11, q3, #26 |
| + vext.32 d18, d11, d10, #0 |
| + vand q3, q3, q1 |
| + vadd.i64 q8, q8, q11 |
| + vadd.i64 q11, q8, q10 |
| + vsub.i64 q0, q0, q3 |
| + vshr.s64 q3, q11, #25 |
| + vand q11, q11, q4 |
| + vadd.i64 q3, q6, q3 |
| + vadd.i64 q6, q3, q7 |
| + vsub.i64 q8, q8, q11 |
| + vshr.s64 q11, q6, #26 |
| + vand q6, q6, q1 |
| + vadd.i64 q9, q9, q11 |
| + vadd.i64 d25, d19, d21 |
| + vsub.i64 q3, q3, q6 |
| + vshr.s64 d23, d25, #25 |
| + vand q4, q12, q4 |
| + vadd.i64 d21, d23, d23 |
| + vshl.i64 d25, d23, #4 |
| + vadd.i64 d21, d21, d23 |
| + vadd.i64 d25, d25, d21 |
| + vadd.i64 d4, d4, d25 |
| + vzip.i32 q0, q8 |
| + vadd.i64 d12, d4, d14 |
| + add r2, r6, #8 |
| + vst1.8 d0, [r2, : 64] |
| + vsub.i64 d19, d19, d9 |
| + add r2, r2, #16 |
| + vst1.8 d16, [r2, : 64] |
| + vshr.s64 d22, d12, #26 |
| + vand q0, q6, q1 |
| + vadd.i64 d10, d10, d22 |
| + vzip.i32 q3, q9 |
| + vsub.i64 d4, d4, d0 |
| + sub r2, r2, #8 |
| + vst1.8 d6, [r2, : 64] |
| + add r2, r2, #16 |
| + vst1.8 d18, [r2, : 64] |
| + vzip.i32 q2, q5 |
| + sub r2, r2, #32 |
| + vst1.8 d4, [r2, : 64] |
| + cmp r4, #0 |
| + beq ._skippostcopy |
| + add r2, r3, #144 |
| + mov r4, r4 |
| + vld1.8 {d0-d1}, [r2, : 128]! |
| + vld1.8 {d2-d3}, [r2, : 128]! |
| + vld1.8 {d4}, [r2, : 64] |
| + vst1.8 {d0-d1}, [r4, : 128]! |
| + vst1.8 {d2-d3}, [r4, : 128]! |
| + vst1.8 d4, [r4, : 64] |
| +._skippostcopy: |
| + cmp r1, #1 |
| + bne ._skipfinalcopy |
| + add r2, r3, #288 |
| + add r4, r3, #144 |
| + vld1.8 {d0-d1}, [r2, : 128]! |
| + vld1.8 {d2-d3}, [r2, : 128]! |
| + vld1.8 {d4}, [r2, : 64] |
| + vst1.8 {d0-d1}, [r4, : 128]! |
| + vst1.8 {d2-d3}, [r4, : 128]! |
| + vst1.8 d4, [r4, : 64] |
| +._skipfinalcopy: |
| + add r1, r1, #1 |
| + cmp r1, #12 |
| + blo ._invertloop |
| + add r1, r3, #144 |
| + ldr r2, [r1], #4 |
| + ldr r3, [r1], #4 |
| + ldr r4, [r1], #4 |
| + ldr r5, [r1], #4 |
| + ldr r6, [r1], #4 |
| + ldr r7, [r1], #4 |
| + ldr r8, [r1], #4 |
| + ldr r9, [r1], #4 |
| + ldr r10, [r1], #4 |
| + ldr r1, [r1] |
| + add r11, r1, r1, LSL #4 |
| + add r11, r11, r1, LSL #1 |
| + add r11, r11, #16777216 |
| + mov r11, r11, ASR #25 |
| + add r11, r11, r2 |
| + mov r11, r11, ASR #26 |
| + add r11, r11, r3 |
| + mov r11, r11, ASR #25 |
| + add r11, r11, r4 |
| + mov r11, r11, ASR #26 |
| + add r11, r11, r5 |
| + mov r11, r11, ASR #25 |
| + add r11, r11, r6 |
| + mov r11, r11, ASR #26 |
| + add r11, r11, r7 |
| + mov r11, r11, ASR #25 |
| + add r11, r11, r8 |
| + mov r11, r11, ASR #26 |
| + add r11, r11, r9 |
| + mov r11, r11, ASR #25 |
| + add r11, r11, r10 |
| + mov r11, r11, ASR #26 |
| + add r11, r11, r1 |
| + mov r11, r11, ASR #25 |
| + add r2, r2, r11 |
| + add r2, r2, r11, LSL #1 |
| + add r2, r2, r11, LSL #4 |
| + mov r11, r2, ASR #26 |
| + add r3, r3, r11 |
| + sub r2, r2, r11, LSL #26 |
| + mov r11, r3, ASR #25 |
| + add r4, r4, r11 |
| + sub r3, r3, r11, LSL #25 |
| + mov r11, r4, ASR #26 |
| + add r5, r5, r11 |
| + sub r4, r4, r11, LSL #26 |
| + mov r11, r5, ASR #25 |
| + add r6, r6, r11 |
| + sub r5, r5, r11, LSL #25 |
| + mov r11, r6, ASR #26 |
| + add r7, r7, r11 |
| + sub r6, r6, r11, LSL #26 |
| + mov r11, r7, ASR #25 |
| + add r8, r8, r11 |
| + sub r7, r7, r11, LSL #25 |
| + mov r11, r8, ASR #26 |
| + add r9, r9, r11 |
| + sub r8, r8, r11, LSL #26 |
| + mov r11, r9, ASR #25 |
| + add r10, r10, r11 |
| + sub r9, r9, r11, LSL #25 |
| + mov r11, r10, ASR #26 |
| + add r1, r1, r11 |
| + sub r10, r10, r11, LSL #26 |
| + mov r11, r1, ASR #25 |
| + sub r1, r1, r11, LSL #25 |
| + add r2, r2, r3, LSL #26 |
| + mov r3, r3, LSR #6 |
| + add r3, r3, r4, LSL #19 |
| + mov r4, r4, LSR #13 |
| + add r4, r4, r5, LSL #13 |
| + mov r5, r5, LSR #19 |
| + add r5, r5, r6, LSL #6 |
| + add r6, r7, r8, LSL #25 |
| + mov r7, r8, LSR #7 |
| + add r7, r7, r9, LSL #19 |
| + mov r8, r9, LSR #13 |
| + add r8, r8, r10, LSL #12 |
| + mov r9, r10, LSR #20 |
| + add r1, r9, r1, LSL #6 |
| + str r2, [r0], #4 |
| + str r3, [r0], #4 |
| + str r4, [r0], #4 |
| + str r5, [r0], #4 |
| + str r6, [r0], #4 |
| + str r7, [r0], #4 |
| + str r8, [r0], #4 |
| + str r1, [r0] |
| + ldrd r4, [sp, #0] |
| + ldrd r6, [sp, #8] |
| + ldrd r8, [sp, #16] |
| + ldrd r10, [sp, #24] |
| + ldr r12, [sp, #480] |
| + ldr r14, [sp, #484] |
| + ldr r0, =0 |
| + mov sp, r12 |
| + vpop {q4, q5, q6, q7} |
| + bx lr |
| -- |
| 2.18.2 |
| |
| |
| From 69668393c502f10dbd39d1c89f312b3bdce30763 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 8 Nov 2019 13:22:38 +0100 |
| Subject: [PATCH 031/100] crypto: arm/curve25519 - wire up NEON implementation |
| |
| commit d8f1308a025fc7e00414194ed742d5f05a21e13c upstream. |
| |
| This ports the SUPERCOP implementation for usage in kernel space. In |
| addition to the usual header, macro, and style changes required for |
| kernel space, it makes a few small changes to the code: |
| |
| - The stack alignment is relaxed to 16 bytes. |
| - Superfluous mov statements have been removed. |
| - ldr for constants has been replaced with movw. |
| - ldreq has been replaced with moveq. |
| - The str epilogue has been made more idiomatic. |
| - SIMD registers are not pushed and popped at the beginning and end. |
| - The prologue and epilogue have been made idiomatic. |
| - A hole has been removed from the stack, saving 32 bytes. |
| - We write-back the base register whenever possible for vld1.8. |
| - Some multiplications have been reordered for better A7 performance. |
| |
| There are more opportunities for cleanup, since this code is from qhasm, |
| which doesn't always do the most opportune thing. But even prior to |
| extensive hand optimizations, this code delivers significant performance |
| improvements (given in get_cycles() per call): |
| |
| ----------- ------------- |
| | generic C | this commit | |
| ------------ ----------- ------------- |
| | Cortex-A7 | 49136 | 22395 | |
| ------------ ----------- ------------- |
| | Cortex-A17 | 17326 | 4983 | |
| ------------ ----------- ------------- |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| [ardb: - move to arch/arm/crypto |
| - wire into lib/crypto framework |
| - implement crypto API KPP hooks ] |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/Kconfig | 6 + |
| arch/arm/crypto/Makefile | 2 + |
| arch/arm/crypto/curve25519-core.S | 347 +++++++++++++----------------- |
| arch/arm/crypto/curve25519-glue.c | 127 +++++++++++ |
| 4 files changed, 287 insertions(+), 195 deletions(-) |
| create mode 100644 arch/arm/crypto/curve25519-glue.c |
| |
| diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig |
| index 2e8a9289bded..61fa7e4aa8f9 100644 |
| |
| |
| @@ -141,4 +141,10 @@ config CRYPTO_NHPOLY1305_NEON |
| depends on KERNEL_MODE_NEON |
| select CRYPTO_NHPOLY1305 |
| |
| +config CRYPTO_CURVE25519_NEON |
| + tristate "NEON accelerated Curve25519 scalar multiplication library" |
| + depends on KERNEL_MODE_NEON |
| + select CRYPTO_LIB_CURVE25519_GENERIC |
| + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 |
| + |
| endif |
| diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile |
| index 4f6a8a81dabc..7700385cec9f 100644 |
| |
| |
| @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o |
| obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o |
| obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o |
| +obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o |
| |
| ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o |
| ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o |
| @@ -58,6 +59,7 @@ chacha-neon-y := chacha-scalar-core.o chacha-glue.o |
| chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o |
| poly1305-arm-y := poly1305-core.o poly1305-glue.o |
| nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o |
| +curve25519-neon-y := curve25519-core.o curve25519-glue.o |
| |
| ifdef REGENERATE_ARM_CRYPTO |
| quiet_cmd_perl = PERL $@ |
| diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S |
| index f33b85fef382..be18af52e7dc 100644 |
| |
| |
| @@ -1,43 +1,35 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| /* |
| - * Public domain code from Daniel J. Bernstein and Peter Schwabe, from |
| - * SUPERCOP's curve25519/neon2/scalarmult.s. |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This |
| + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been |
| + * manually reworked for use in kernel space. |
| */ |
| |
| -.fpu neon |
| +#include <linux/linkage.h> |
| + |
| .text |
| +.fpu neon |
| +.arch armv7-a |
| .align 4 |
| -.global _crypto_scalarmult_curve25519_neon2 |
| -.global crypto_scalarmult_curve25519_neon2 |
| -.type _crypto_scalarmult_curve25519_neon2 STT_FUNC |
| -.type crypto_scalarmult_curve25519_neon2 STT_FUNC |
| - _crypto_scalarmult_curve25519_neon2: |
| - crypto_scalarmult_curve25519_neon2: |
| - vpush {q4, q5, q6, q7} |
| - mov r12, sp |
| - sub sp, sp, #736 |
| - and sp, sp, #0xffffffe0 |
| - strd r4, [sp, #0] |
| - strd r6, [sp, #8] |
| - strd r8, [sp, #16] |
| - strd r10, [sp, #24] |
| - str r12, [sp, #480] |
| - str r14, [sp, #484] |
| - mov r0, r0 |
| - mov r1, r1 |
| - mov r2, r2 |
| - add r3, sp, #32 |
| - ldr r4, =0 |
| - ldr r5, =254 |
| + |
| +ENTRY(curve25519_neon) |
| + push {r4-r11, lr} |
| + mov ip, sp |
| + sub r3, sp, #704 |
| + and r3, r3, #0xfffffff0 |
| + mov sp, r3 |
| + movw r4, #0 |
| + movw r5, #254 |
| vmov.i32 q0, #1 |
| vshr.u64 q1, q0, #7 |
| vshr.u64 q0, q0, #8 |
| vmov.i32 d4, #19 |
| vmov.i32 d5, #38 |
| - add r6, sp, #512 |
| - vst1.8 {d2-d3}, [r6, : 128] |
| - add r6, sp, #528 |
| - vst1.8 {d0-d1}, [r6, : 128] |
| - add r6, sp, #544 |
| + add r6, sp, #480 |
| + vst1.8 {d2-d3}, [r6, : 128]! |
| + vst1.8 {d0-d1}, [r6, : 128]! |
| vst1.8 {d4-d5}, [r6, : 128] |
| add r6, r3, #0 |
| vmov.i32 q2, #0 |
| @@ -45,12 +37,12 @@ |
| vst1.8 {d4-d5}, [r6, : 128]! |
| vst1.8 d4, [r6, : 64] |
| add r6, r3, #0 |
| - ldr r7, =960 |
| + movw r7, #960 |
| sub r7, r7, #2 |
| neg r7, r7 |
| sub r7, r7, r7, LSL #7 |
| str r7, [r6] |
| - add r6, sp, #704 |
| + add r6, sp, #672 |
| vld1.8 {d4-d5}, [r1]! |
| vld1.8 {d6-d7}, [r1] |
| vst1.8 {d4-d5}, [r6, : 128]! |
| @@ -212,15 +204,15 @@ |
| vst1.8 {d0-d1}, [r6, : 128]! |
| vst1.8 {d2-d3}, [r6, : 128]! |
| vst1.8 d4, [r6, : 64] |
| -._mainloop: |
| +.Lmainloop: |
| mov r2, r5, LSR #3 |
| and r6, r5, #7 |
| ldrb r2, [r1, r2] |
| mov r2, r2, LSR r6 |
| and r2, r2, #1 |
| - str r5, [sp, #488] |
| + str r5, [sp, #456] |
| eor r4, r4, r2 |
| - str r2, [sp, #492] |
| + str r2, [sp, #460] |
| neg r2, r4 |
| add r4, r3, #96 |
| add r5, r3, #192 |
| @@ -291,7 +283,7 @@ |
| vsub.i32 q0, q1, q3 |
| vst1.8 d4, [r4, : 64] |
| vst1.8 d0, [r6, : 64] |
| - add r2, sp, #544 |
| + add r2, sp, #512 |
| add r4, r3, #96 |
| add r5, r3, #144 |
| vld1.8 {d0-d1}, [r2, : 128] |
| @@ -361,14 +353,13 @@ |
| vmlal.s32 q0, d12, d8 |
| vmlal.s32 q0, d13, d17 |
| vmlal.s32 q0, d6, d6 |
| - add r2, sp, #512 |
| - vld1.8 {d18-d19}, [r2, : 128] |
| + add r2, sp, #480 |
| + vld1.8 {d18-d19}, [r2, : 128]! |
| vmull.s32 q3, d16, d7 |
| vmlal.s32 q3, d10, d15 |
| vmlal.s32 q3, d11, d14 |
| vmlal.s32 q3, d12, d9 |
| vmlal.s32 q3, d13, d8 |
| - add r2, sp, #528 |
| vld1.8 {d8-d9}, [r2, : 128] |
| vadd.i64 q5, q12, q9 |
| vadd.i64 q6, q15, q9 |
| @@ -502,22 +493,19 @@ |
| vadd.i32 q5, q5, q0 |
| vtrn.32 q11, q14 |
| vadd.i32 q6, q6, q3 |
| - add r2, sp, #560 |
| + add r2, sp, #528 |
| vadd.i32 q10, q10, q2 |
| vtrn.32 d24, d25 |
| - vst1.8 {d12-d13}, [r2, : 128] |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| vshl.i32 q6, q13, #1 |
| - add r2, sp, #576 |
| - vst1.8 {d20-d21}, [r2, : 128] |
| + vst1.8 {d20-d21}, [r2, : 128]! |
| vshl.i32 q10, q14, #1 |
| - add r2, sp, #592 |
| - vst1.8 {d12-d13}, [r2, : 128] |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| vshl.i32 q15, q12, #1 |
| vadd.i32 q8, q8, q4 |
| vext.32 d10, d31, d30, #0 |
| vadd.i32 q7, q7, q1 |
| - add r2, sp, #608 |
| - vst1.8 {d16-d17}, [r2, : 128] |
| + vst1.8 {d16-d17}, [r2, : 128]! |
| vmull.s32 q8, d18, d5 |
| vmlal.s32 q8, d26, d4 |
| vmlal.s32 q8, d19, d9 |
| @@ -528,8 +516,7 @@ |
| vmlal.s32 q8, d29, d1 |
| vmlal.s32 q8, d24, d6 |
| vmlal.s32 q8, d25, d0 |
| - add r2, sp, #624 |
| - vst1.8 {d14-d15}, [r2, : 128] |
| + vst1.8 {d14-d15}, [r2, : 128]! |
| vmull.s32 q2, d18, d4 |
| vmlal.s32 q2, d12, d9 |
| vmlal.s32 q2, d13, d8 |
| @@ -537,8 +524,7 @@ |
| vmlal.s32 q2, d22, d2 |
| vmlal.s32 q2, d23, d1 |
| vmlal.s32 q2, d24, d0 |
| - add r2, sp, #640 |
| - vst1.8 {d20-d21}, [r2, : 128] |
| + vst1.8 {d20-d21}, [r2, : 128]! |
| vmull.s32 q7, d18, d9 |
| vmlal.s32 q7, d26, d3 |
| vmlal.s32 q7, d19, d8 |
| @@ -547,14 +533,12 @@ |
| vmlal.s32 q7, d28, d1 |
| vmlal.s32 q7, d23, d6 |
| vmlal.s32 q7, d29, d0 |
| - add r2, sp, #656 |
| - vst1.8 {d10-d11}, [r2, : 128] |
| + vst1.8 {d10-d11}, [r2, : 128]! |
| vmull.s32 q5, d18, d3 |
| vmlal.s32 q5, d19, d2 |
| vmlal.s32 q5, d22, d1 |
| vmlal.s32 q5, d23, d0 |
| vmlal.s32 q5, d12, d8 |
| - add r2, sp, #672 |
| vst1.8 {d16-d17}, [r2, : 128] |
| vmull.s32 q4, d18, d8 |
| vmlal.s32 q4, d26, d2 |
| @@ -566,7 +550,7 @@ |
| vmlal.s32 q8, d26, d1 |
| vmlal.s32 q8, d19, d6 |
| vmlal.s32 q8, d27, d0 |
| - add r2, sp, #576 |
| + add r2, sp, #544 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vmlal.s32 q7, d24, d21 |
| vmlal.s32 q7, d25, d20 |
| @@ -575,32 +559,30 @@ |
| vmlal.s32 q8, d22, d21 |
| vmlal.s32 q8, d28, d20 |
| vmlal.s32 q5, d24, d20 |
| - add r2, sp, #576 |
| vst1.8 {d14-d15}, [r2, : 128] |
| vmull.s32 q7, d18, d6 |
| vmlal.s32 q7, d26, d0 |
| - add r2, sp, #656 |
| + add r2, sp, #624 |
| vld1.8 {d30-d31}, [r2, : 128] |
| vmlal.s32 q2, d30, d21 |
| vmlal.s32 q7, d19, d21 |
| vmlal.s32 q7, d27, d20 |
| - add r2, sp, #624 |
| + add r2, sp, #592 |
| vld1.8 {d26-d27}, [r2, : 128] |
| vmlal.s32 q4, d25, d27 |
| vmlal.s32 q8, d29, d27 |
| vmlal.s32 q8, d25, d26 |
| vmlal.s32 q7, d28, d27 |
| vmlal.s32 q7, d29, d26 |
| - add r2, sp, #608 |
| + add r2, sp, #576 |
| vld1.8 {d28-d29}, [r2, : 128] |
| vmlal.s32 q4, d24, d29 |
| vmlal.s32 q8, d23, d29 |
| vmlal.s32 q8, d24, d28 |
| vmlal.s32 q7, d22, d29 |
| vmlal.s32 q7, d23, d28 |
| - add r2, sp, #608 |
| vst1.8 {d8-d9}, [r2, : 128] |
| - add r2, sp, #560 |
| + add r2, sp, #528 |
| vld1.8 {d8-d9}, [r2, : 128] |
| vmlal.s32 q7, d24, d9 |
| vmlal.s32 q7, d25, d31 |
| @@ -621,36 +603,36 @@ |
| vmlal.s32 q0, d23, d26 |
| vmlal.s32 q0, d24, d31 |
| vmlal.s32 q0, d19, d20 |
| - add r2, sp, #640 |
| + add r2, sp, #608 |
| vld1.8 {d18-d19}, [r2, : 128] |
| vmlal.s32 q2, d18, d7 |
| - vmlal.s32 q2, d19, d6 |
| vmlal.s32 q5, d18, d6 |
| - vmlal.s32 q5, d19, d21 |
| vmlal.s32 q1, d18, d21 |
| - vmlal.s32 q1, d19, d29 |
| vmlal.s32 q0, d18, d28 |
| - vmlal.s32 q0, d19, d9 |
| vmlal.s32 q6, d18, d29 |
| + vmlal.s32 q2, d19, d6 |
| + vmlal.s32 q5, d19, d21 |
| + vmlal.s32 q1, d19, d29 |
| + vmlal.s32 q0, d19, d9 |
| vmlal.s32 q6, d19, d28 |
| - add r2, sp, #592 |
| + add r2, sp, #560 |
| vld1.8 {d18-d19}, [r2, : 128] |
| - add r2, sp, #512 |
| + add r2, sp, #480 |
| vld1.8 {d22-d23}, [r2, : 128] |
| vmlal.s32 q5, d19, d7 |
| vmlal.s32 q0, d18, d21 |
| vmlal.s32 q0, d19, d29 |
| vmlal.s32 q6, d18, d6 |
| - add r2, sp, #528 |
| + add r2, sp, #496 |
| vld1.8 {d6-d7}, [r2, : 128] |
| vmlal.s32 q6, d19, d21 |
| - add r2, sp, #576 |
| + add r2, sp, #544 |
| vld1.8 {d18-d19}, [r2, : 128] |
| vmlal.s32 q0, d30, d8 |
| - add r2, sp, #672 |
| + add r2, sp, #640 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vmlal.s32 q5, d30, d29 |
| - add r2, sp, #608 |
| + add r2, sp, #576 |
| vld1.8 {d24-d25}, [r2, : 128] |
| vmlal.s32 q1, d30, d28 |
| vadd.i64 q13, q0, q11 |
| @@ -823,22 +805,19 @@ |
| vadd.i32 q5, q5, q0 |
| vtrn.32 q11, q14 |
| vadd.i32 q6, q6, q3 |
| - add r2, sp, #560 |
| + add r2, sp, #528 |
| vadd.i32 q10, q10, q2 |
| vtrn.32 d24, d25 |
| - vst1.8 {d12-d13}, [r2, : 128] |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| vshl.i32 q6, q13, #1 |
| - add r2, sp, #576 |
| - vst1.8 {d20-d21}, [r2, : 128] |
| + vst1.8 {d20-d21}, [r2, : 128]! |
| vshl.i32 q10, q14, #1 |
| - add r2, sp, #592 |
| - vst1.8 {d12-d13}, [r2, : 128] |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| vshl.i32 q15, q12, #1 |
| vadd.i32 q8, q8, q4 |
| vext.32 d10, d31, d30, #0 |
| vadd.i32 q7, q7, q1 |
| - add r2, sp, #608 |
| - vst1.8 {d16-d17}, [r2, : 128] |
| + vst1.8 {d16-d17}, [r2, : 128]! |
| vmull.s32 q8, d18, d5 |
| vmlal.s32 q8, d26, d4 |
| vmlal.s32 q8, d19, d9 |
| @@ -849,8 +828,7 @@ |
| vmlal.s32 q8, d29, d1 |
| vmlal.s32 q8, d24, d6 |
| vmlal.s32 q8, d25, d0 |
| - add r2, sp, #624 |
| - vst1.8 {d14-d15}, [r2, : 128] |
| + vst1.8 {d14-d15}, [r2, : 128]! |
| vmull.s32 q2, d18, d4 |
| vmlal.s32 q2, d12, d9 |
| vmlal.s32 q2, d13, d8 |
| @@ -858,8 +836,7 @@ |
| vmlal.s32 q2, d22, d2 |
| vmlal.s32 q2, d23, d1 |
| vmlal.s32 q2, d24, d0 |
| - add r2, sp, #640 |
| - vst1.8 {d20-d21}, [r2, : 128] |
| + vst1.8 {d20-d21}, [r2, : 128]! |
| vmull.s32 q7, d18, d9 |
| vmlal.s32 q7, d26, d3 |
| vmlal.s32 q7, d19, d8 |
| @@ -868,15 +845,13 @@ |
| vmlal.s32 q7, d28, d1 |
| vmlal.s32 q7, d23, d6 |
| vmlal.s32 q7, d29, d0 |
| - add r2, sp, #656 |
| - vst1.8 {d10-d11}, [r2, : 128] |
| + vst1.8 {d10-d11}, [r2, : 128]! |
| vmull.s32 q5, d18, d3 |
| vmlal.s32 q5, d19, d2 |
| vmlal.s32 q5, d22, d1 |
| vmlal.s32 q5, d23, d0 |
| vmlal.s32 q5, d12, d8 |
| - add r2, sp, #672 |
| - vst1.8 {d16-d17}, [r2, : 128] |
| + vst1.8 {d16-d17}, [r2, : 128]! |
| vmull.s32 q4, d18, d8 |
| vmlal.s32 q4, d26, d2 |
| vmlal.s32 q4, d19, d7 |
| @@ -887,7 +862,7 @@ |
| vmlal.s32 q8, d26, d1 |
| vmlal.s32 q8, d19, d6 |
| vmlal.s32 q8, d27, d0 |
| - add r2, sp, #576 |
| + add r2, sp, #544 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vmlal.s32 q7, d24, d21 |
| vmlal.s32 q7, d25, d20 |
| @@ -896,32 +871,30 @@ |
| vmlal.s32 q8, d22, d21 |
| vmlal.s32 q8, d28, d20 |
| vmlal.s32 q5, d24, d20 |
| - add r2, sp, #576 |
| vst1.8 {d14-d15}, [r2, : 128] |
| vmull.s32 q7, d18, d6 |
| vmlal.s32 q7, d26, d0 |
| - add r2, sp, #656 |
| + add r2, sp, #624 |
| vld1.8 {d30-d31}, [r2, : 128] |
| vmlal.s32 q2, d30, d21 |
| vmlal.s32 q7, d19, d21 |
| vmlal.s32 q7, d27, d20 |
| - add r2, sp, #624 |
| + add r2, sp, #592 |
| vld1.8 {d26-d27}, [r2, : 128] |
| vmlal.s32 q4, d25, d27 |
| vmlal.s32 q8, d29, d27 |
| vmlal.s32 q8, d25, d26 |
| vmlal.s32 q7, d28, d27 |
| vmlal.s32 q7, d29, d26 |
| - add r2, sp, #608 |
| + add r2, sp, #576 |
| vld1.8 {d28-d29}, [r2, : 128] |
| vmlal.s32 q4, d24, d29 |
| vmlal.s32 q8, d23, d29 |
| vmlal.s32 q8, d24, d28 |
| vmlal.s32 q7, d22, d29 |
| vmlal.s32 q7, d23, d28 |
| - add r2, sp, #608 |
| vst1.8 {d8-d9}, [r2, : 128] |
| - add r2, sp, #560 |
| + add r2, sp, #528 |
| vld1.8 {d8-d9}, [r2, : 128] |
| vmlal.s32 q7, d24, d9 |
| vmlal.s32 q7, d25, d31 |
| @@ -942,36 +915,36 @@ |
| vmlal.s32 q0, d23, d26 |
| vmlal.s32 q0, d24, d31 |
| vmlal.s32 q0, d19, d20 |
| - add r2, sp, #640 |
| + add r2, sp, #608 |
| vld1.8 {d18-d19}, [r2, : 128] |
| vmlal.s32 q2, d18, d7 |
| - vmlal.s32 q2, d19, d6 |
| vmlal.s32 q5, d18, d6 |
| - vmlal.s32 q5, d19, d21 |
| vmlal.s32 q1, d18, d21 |
| - vmlal.s32 q1, d19, d29 |
| vmlal.s32 q0, d18, d28 |
| - vmlal.s32 q0, d19, d9 |
| vmlal.s32 q6, d18, d29 |
| + vmlal.s32 q2, d19, d6 |
| + vmlal.s32 q5, d19, d21 |
| + vmlal.s32 q1, d19, d29 |
| + vmlal.s32 q0, d19, d9 |
| vmlal.s32 q6, d19, d28 |
| - add r2, sp, #592 |
| + add r2, sp, #560 |
| vld1.8 {d18-d19}, [r2, : 128] |
| - add r2, sp, #512 |
| + add r2, sp, #480 |
| vld1.8 {d22-d23}, [r2, : 128] |
| vmlal.s32 q5, d19, d7 |
| vmlal.s32 q0, d18, d21 |
| vmlal.s32 q0, d19, d29 |
| vmlal.s32 q6, d18, d6 |
| - add r2, sp, #528 |
| + add r2, sp, #496 |
| vld1.8 {d6-d7}, [r2, : 128] |
| vmlal.s32 q6, d19, d21 |
| - add r2, sp, #576 |
| + add r2, sp, #544 |
| vld1.8 {d18-d19}, [r2, : 128] |
| vmlal.s32 q0, d30, d8 |
| - add r2, sp, #672 |
| + add r2, sp, #640 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vmlal.s32 q5, d30, d29 |
| - add r2, sp, #608 |
| + add r2, sp, #576 |
| vld1.8 {d24-d25}, [r2, : 128] |
| vmlal.s32 q1, d30, d28 |
| vadd.i64 q13, q0, q11 |
| @@ -1069,7 +1042,7 @@ |
| sub r4, r4, #24 |
| vst1.8 d0, [r2, : 64] |
| vst1.8 d1, [r4, : 64] |
| - add r2, sp, #544 |
| + add r2, sp, #512 |
| add r4, r3, #144 |
| add r5, r3, #192 |
| vld1.8 {d0-d1}, [r2, : 128] |
| @@ -1139,14 +1112,13 @@ |
| vmlal.s32 q0, d12, d8 |
| vmlal.s32 q0, d13, d17 |
| vmlal.s32 q0, d6, d6 |
| - add r2, sp, #512 |
| - vld1.8 {d18-d19}, [r2, : 128] |
| + add r2, sp, #480 |
| + vld1.8 {d18-d19}, [r2, : 128]! |
| vmull.s32 q3, d16, d7 |
| vmlal.s32 q3, d10, d15 |
| vmlal.s32 q3, d11, d14 |
| vmlal.s32 q3, d12, d9 |
| vmlal.s32 q3, d13, d8 |
| - add r2, sp, #528 |
| vld1.8 {d8-d9}, [r2, : 128] |
| vadd.i64 q5, q12, q9 |
| vadd.i64 q6, q15, q9 |
| @@ -1295,22 +1267,19 @@ |
| vadd.i32 q5, q5, q0 |
| vtrn.32 q11, q14 |
| vadd.i32 q6, q6, q3 |
| - add r2, sp, #560 |
| + add r2, sp, #528 |
| vadd.i32 q10, q10, q2 |
| vtrn.32 d24, d25 |
| - vst1.8 {d12-d13}, [r2, : 128] |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| vshl.i32 q6, q13, #1 |
| - add r2, sp, #576 |
| - vst1.8 {d20-d21}, [r2, : 128] |
| + vst1.8 {d20-d21}, [r2, : 128]! |
| vshl.i32 q10, q14, #1 |
| - add r2, sp, #592 |
| - vst1.8 {d12-d13}, [r2, : 128] |
| + vst1.8 {d12-d13}, [r2, : 128]! |
| vshl.i32 q15, q12, #1 |
| vadd.i32 q8, q8, q4 |
| vext.32 d10, d31, d30, #0 |
| vadd.i32 q7, q7, q1 |
| - add r2, sp, #608 |
| - vst1.8 {d16-d17}, [r2, : 128] |
| + vst1.8 {d16-d17}, [r2, : 128]! |
| vmull.s32 q8, d18, d5 |
| vmlal.s32 q8, d26, d4 |
| vmlal.s32 q8, d19, d9 |
| @@ -1321,8 +1290,7 @@ |
| vmlal.s32 q8, d29, d1 |
| vmlal.s32 q8, d24, d6 |
| vmlal.s32 q8, d25, d0 |
| - add r2, sp, #624 |
| - vst1.8 {d14-d15}, [r2, : 128] |
| + vst1.8 {d14-d15}, [r2, : 128]! |
| vmull.s32 q2, d18, d4 |
| vmlal.s32 q2, d12, d9 |
| vmlal.s32 q2, d13, d8 |
| @@ -1330,8 +1298,7 @@ |
| vmlal.s32 q2, d22, d2 |
| vmlal.s32 q2, d23, d1 |
| vmlal.s32 q2, d24, d0 |
| - add r2, sp, #640 |
| - vst1.8 {d20-d21}, [r2, : 128] |
| + vst1.8 {d20-d21}, [r2, : 128]! |
| vmull.s32 q7, d18, d9 |
| vmlal.s32 q7, d26, d3 |
| vmlal.s32 q7, d19, d8 |
| @@ -1340,15 +1307,13 @@ |
| vmlal.s32 q7, d28, d1 |
| vmlal.s32 q7, d23, d6 |
| vmlal.s32 q7, d29, d0 |
| - add r2, sp, #656 |
| - vst1.8 {d10-d11}, [r2, : 128] |
| + vst1.8 {d10-d11}, [r2, : 128]! |
| vmull.s32 q5, d18, d3 |
| vmlal.s32 q5, d19, d2 |
| vmlal.s32 q5, d22, d1 |
| vmlal.s32 q5, d23, d0 |
| vmlal.s32 q5, d12, d8 |
| - add r2, sp, #672 |
| - vst1.8 {d16-d17}, [r2, : 128] |
| + vst1.8 {d16-d17}, [r2, : 128]! |
| vmull.s32 q4, d18, d8 |
| vmlal.s32 q4, d26, d2 |
| vmlal.s32 q4, d19, d7 |
| @@ -1359,7 +1324,7 @@ |
| vmlal.s32 q8, d26, d1 |
| vmlal.s32 q8, d19, d6 |
| vmlal.s32 q8, d27, d0 |
| - add r2, sp, #576 |
| + add r2, sp, #544 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vmlal.s32 q7, d24, d21 |
| vmlal.s32 q7, d25, d20 |
| @@ -1368,32 +1333,30 @@ |
| vmlal.s32 q8, d22, d21 |
| vmlal.s32 q8, d28, d20 |
| vmlal.s32 q5, d24, d20 |
| - add r2, sp, #576 |
| vst1.8 {d14-d15}, [r2, : 128] |
| vmull.s32 q7, d18, d6 |
| vmlal.s32 q7, d26, d0 |
| - add r2, sp, #656 |
| + add r2, sp, #624 |
| vld1.8 {d30-d31}, [r2, : 128] |
| vmlal.s32 q2, d30, d21 |
| vmlal.s32 q7, d19, d21 |
| vmlal.s32 q7, d27, d20 |
| - add r2, sp, #624 |
| + add r2, sp, #592 |
| vld1.8 {d26-d27}, [r2, : 128] |
| vmlal.s32 q4, d25, d27 |
| vmlal.s32 q8, d29, d27 |
| vmlal.s32 q8, d25, d26 |
| vmlal.s32 q7, d28, d27 |
| vmlal.s32 q7, d29, d26 |
| - add r2, sp, #608 |
| + add r2, sp, #576 |
| vld1.8 {d28-d29}, [r2, : 128] |
| vmlal.s32 q4, d24, d29 |
| vmlal.s32 q8, d23, d29 |
| vmlal.s32 q8, d24, d28 |
| vmlal.s32 q7, d22, d29 |
| vmlal.s32 q7, d23, d28 |
| - add r2, sp, #608 |
| vst1.8 {d8-d9}, [r2, : 128] |
| - add r2, sp, #560 |
| + add r2, sp, #528 |
| vld1.8 {d8-d9}, [r2, : 128] |
| vmlal.s32 q7, d24, d9 |
| vmlal.s32 q7, d25, d31 |
| @@ -1414,36 +1377,36 @@ |
| vmlal.s32 q0, d23, d26 |
| vmlal.s32 q0, d24, d31 |
| vmlal.s32 q0, d19, d20 |
| - add r2, sp, #640 |
| + add r2, sp, #608 |
| vld1.8 {d18-d19}, [r2, : 128] |
| vmlal.s32 q2, d18, d7 |
| - vmlal.s32 q2, d19, d6 |
| vmlal.s32 q5, d18, d6 |
| - vmlal.s32 q5, d19, d21 |
| vmlal.s32 q1, d18, d21 |
| - vmlal.s32 q1, d19, d29 |
| vmlal.s32 q0, d18, d28 |
| - vmlal.s32 q0, d19, d9 |
| vmlal.s32 q6, d18, d29 |
| + vmlal.s32 q2, d19, d6 |
| + vmlal.s32 q5, d19, d21 |
| + vmlal.s32 q1, d19, d29 |
| + vmlal.s32 q0, d19, d9 |
| vmlal.s32 q6, d19, d28 |
| - add r2, sp, #592 |
| + add r2, sp, #560 |
| vld1.8 {d18-d19}, [r2, : 128] |
| - add r2, sp, #512 |
| + add r2, sp, #480 |
| vld1.8 {d22-d23}, [r2, : 128] |
| vmlal.s32 q5, d19, d7 |
| vmlal.s32 q0, d18, d21 |
| vmlal.s32 q0, d19, d29 |
| vmlal.s32 q6, d18, d6 |
| - add r2, sp, #528 |
| + add r2, sp, #496 |
| vld1.8 {d6-d7}, [r2, : 128] |
| vmlal.s32 q6, d19, d21 |
| - add r2, sp, #576 |
| + add r2, sp, #544 |
| vld1.8 {d18-d19}, [r2, : 128] |
| vmlal.s32 q0, d30, d8 |
| - add r2, sp, #672 |
| + add r2, sp, #640 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vmlal.s32 q5, d30, d29 |
| - add r2, sp, #608 |
| + add r2, sp, #576 |
| vld1.8 {d24-d25}, [r2, : 128] |
| vmlal.s32 q1, d30, d28 |
| vadd.i64 q13, q0, q11 |
| @@ -1541,10 +1504,10 @@ |
| sub r4, r4, #24 |
| vst1.8 d0, [r2, : 64] |
| vst1.8 d1, [r4, : 64] |
| - ldr r2, [sp, #488] |
| - ldr r4, [sp, #492] |
| + ldr r2, [sp, #456] |
| + ldr r4, [sp, #460] |
| subs r5, r2, #1 |
| - bge ._mainloop |
| + bge .Lmainloop |
| add r1, r3, #144 |
| add r2, r3, #336 |
| vld1.8 {d0-d1}, [r1, : 128]! |
| @@ -1553,41 +1516,41 @@ |
| vst1.8 {d0-d1}, [r2, : 128]! |
| vst1.8 {d2-d3}, [r2, : 128]! |
| vst1.8 d4, [r2, : 64] |
| - ldr r1, =0 |
| -._invertloop: |
| + movw r1, #0 |
| +.Linvertloop: |
| add r2, r3, #144 |
| - ldr r4, =0 |
| - ldr r5, =2 |
| + movw r4, #0 |
| + movw r5, #2 |
| cmp r1, #1 |
| - ldreq r5, =1 |
| + moveq r5, #1 |
| addeq r2, r3, #336 |
| addeq r4, r3, #48 |
| cmp r1, #2 |
| - ldreq r5, =1 |
| + moveq r5, #1 |
| addeq r2, r3, #48 |
| cmp r1, #3 |
| - ldreq r5, =5 |
| + moveq r5, #5 |
| addeq r4, r3, #336 |
| cmp r1, #4 |
| - ldreq r5, =10 |
| + moveq r5, #10 |
| cmp r1, #5 |
| - ldreq r5, =20 |
| + moveq r5, #20 |
| cmp r1, #6 |
| - ldreq r5, =10 |
| + moveq r5, #10 |
| addeq r2, r3, #336 |
| addeq r4, r3, #336 |
| cmp r1, #7 |
| - ldreq r5, =50 |
| + moveq r5, #50 |
| cmp r1, #8 |
| - ldreq r5, =100 |
| + moveq r5, #100 |
| cmp r1, #9 |
| - ldreq r5, =50 |
| + moveq r5, #50 |
| addeq r2, r3, #336 |
| cmp r1, #10 |
| - ldreq r5, =5 |
| + moveq r5, #5 |
| addeq r2, r3, #48 |
| cmp r1, #11 |
| - ldreq r5, =0 |
| + moveq r5, #0 |
| addeq r2, r3, #96 |
| add r6, r3, #144 |
| add r7, r3, #288 |
| @@ -1598,8 +1561,8 @@ |
| vst1.8 {d2-d3}, [r7, : 128]! |
| vst1.8 d4, [r7, : 64] |
| cmp r5, #0 |
| - beq ._skipsquaringloop |
| -._squaringloop: |
| + beq .Lskipsquaringloop |
| +.Lsquaringloop: |
| add r6, r3, #288 |
| add r7, r3, #288 |
| add r8, r3, #288 |
| @@ -1611,7 +1574,7 @@ |
| vld1.8 {d6-d7}, [r7, : 128]! |
| vld1.8 {d9}, [r7, : 64] |
| vld1.8 {d10-d11}, [r6, : 128]! |
| - add r7, sp, #416 |
| + add r7, sp, #384 |
| vld1.8 {d12-d13}, [r6, : 128]! |
| vmul.i32 q7, q2, q0 |
| vld1.8 {d8}, [r6, : 64] |
| @@ -1726,7 +1689,7 @@ |
| vext.32 d10, d6, d6, #0 |
| vmov.i32 q1, #0xffffffff |
| vshl.i64 q4, q1, #25 |
| - add r7, sp, #512 |
| + add r7, sp, #480 |
| vld1.8 {d14-d15}, [r7, : 128] |
| vadd.i64 q9, q2, q7 |
| vshl.i64 q1, q1, #26 |
| @@ -1735,7 +1698,7 @@ |
| vadd.i64 q5, q5, q10 |
| vand q9, q9, q1 |
| vld1.8 {d16}, [r6, : 64]! |
| - add r6, sp, #528 |
| + add r6, sp, #496 |
| vld1.8 {d20-d21}, [r6, : 128] |
| vadd.i64 q11, q5, q10 |
| vsub.i64 q2, q2, q9 |
| @@ -1789,8 +1752,8 @@ |
| sub r6, r6, #32 |
| vst1.8 d4, [r6, : 64] |
| subs r5, r5, #1 |
| - bhi ._squaringloop |
| -._skipsquaringloop: |
| + bhi .Lsquaringloop |
| +.Lskipsquaringloop: |
| mov r2, r2 |
| add r5, r3, #288 |
| add r6, r3, #144 |
| @@ -1802,7 +1765,7 @@ |
| vld1.8 {d6-d7}, [r5, : 128]! |
| vld1.8 {d9}, [r5, : 64] |
| vld1.8 {d10-d11}, [r2, : 128]! |
| - add r5, sp, #416 |
| + add r5, sp, #384 |
| vld1.8 {d12-d13}, [r2, : 128]! |
| vmul.i32 q7, q2, q0 |
| vld1.8 {d8}, [r2, : 64] |
| @@ -1917,7 +1880,7 @@ |
| vext.32 d10, d6, d6, #0 |
| vmov.i32 q1, #0xffffffff |
| vshl.i64 q4, q1, #25 |
| - add r5, sp, #512 |
| + add r5, sp, #480 |
| vld1.8 {d14-d15}, [r5, : 128] |
| vadd.i64 q9, q2, q7 |
| vshl.i64 q1, q1, #26 |
| @@ -1926,7 +1889,7 @@ |
| vadd.i64 q5, q5, q10 |
| vand q9, q9, q1 |
| vld1.8 {d16}, [r2, : 64]! |
| - add r2, sp, #528 |
| + add r2, sp, #496 |
| vld1.8 {d20-d21}, [r2, : 128] |
| vadd.i64 q11, q5, q10 |
| vsub.i64 q2, q2, q9 |
| @@ -1980,7 +1943,7 @@ |
| sub r2, r2, #32 |
| vst1.8 d4, [r2, : 64] |
| cmp r4, #0 |
| - beq ._skippostcopy |
| + beq .Lskippostcopy |
| add r2, r3, #144 |
| mov r4, r4 |
| vld1.8 {d0-d1}, [r2, : 128]! |
| @@ -1989,9 +1952,9 @@ |
| vst1.8 {d0-d1}, [r4, : 128]! |
| vst1.8 {d2-d3}, [r4, : 128]! |
| vst1.8 d4, [r4, : 64] |
| -._skippostcopy: |
| +.Lskippostcopy: |
| cmp r1, #1 |
| - bne ._skipfinalcopy |
| + bne .Lskipfinalcopy |
| add r2, r3, #288 |
| add r4, r3, #144 |
| vld1.8 {d0-d1}, [r2, : 128]! |
| @@ -2000,10 +1963,10 @@ |
| vst1.8 {d0-d1}, [r4, : 128]! |
| vst1.8 {d2-d3}, [r4, : 128]! |
| vst1.8 d4, [r4, : 64] |
| -._skipfinalcopy: |
| +.Lskipfinalcopy: |
| add r1, r1, #1 |
| cmp r1, #12 |
| - blo ._invertloop |
| + blo .Linvertloop |
| add r1, r3, #144 |
| ldr r2, [r1], #4 |
| ldr r3, [r1], #4 |
| @@ -2085,21 +2048,15 @@ |
| add r8, r8, r10, LSL #12 |
| mov r9, r10, LSR #20 |
| add r1, r9, r1, LSL #6 |
| - str r2, [r0], #4 |
| - str r3, [r0], #4 |
| - str r4, [r0], #4 |
| - str r5, [r0], #4 |
| - str r6, [r0], #4 |
| - str r7, [r0], #4 |
| - str r8, [r0], #4 |
| - str r1, [r0] |
| - ldrd r4, [sp, #0] |
| - ldrd r6, [sp, #8] |
| - ldrd r8, [sp, #16] |
| - ldrd r10, [sp, #24] |
| - ldr r12, [sp, #480] |
| - ldr r14, [sp, #484] |
| - ldr r0, =0 |
| - mov sp, r12 |
| - vpop {q4, q5, q6, q7} |
| - bx lr |
| + str r2, [r0] |
| + str r3, [r0, #4] |
| + str r4, [r0, #8] |
| + str r5, [r0, #12] |
| + str r6, [r0, #16] |
| + str r7, [r0, #20] |
| + str r8, [r0, #24] |
| + str r1, [r0, #28] |
| + movw r0, #0 |
| + mov sp, ip |
| + pop {r4-r11, pc} |
| +ENDPROC(curve25519_neon) |
| diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c |
| new file mode 100644 |
| index 000000000000..2e9e12d2f642 |
| |
| |
| @@ -0,0 +1,127 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This |
| + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been |
| + * manually reworked for use in kernel space. |
| + */ |
| + |
| +#include <asm/hwcap.h> |
| +#include <asm/neon.h> |
| +#include <asm/simd.h> |
| +#include <crypto/internal/kpp.h> |
| +#include <crypto/internal/simd.h> |
| +#include <linux/types.h> |
| +#include <linux/module.h> |
| +#include <linux/init.h> |
| +#include <linux/jump_label.h> |
| +#include <crypto/curve25519.h> |
| + |
| +asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE], |
| + const u8 basepoint[CURVE25519_KEY_SIZE]); |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
| + |
| +void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], |
| + const u8 scalar[CURVE25519_KEY_SIZE], |
| + const u8 point[CURVE25519_KEY_SIZE]) |
| +{ |
| + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { |
| + kernel_neon_begin(); |
| + curve25519_neon(out, scalar, point); |
| + kernel_neon_end(); |
| + } else { |
| + curve25519_generic(out, scalar, point); |
| + } |
| +} |
| +EXPORT_SYMBOL(curve25519_arch); |
| + |
| +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, |
| + unsigned int len) |
| +{ |
| + u8 *secret = kpp_tfm_ctx(tfm); |
| + |
| + if (!len) |
| + curve25519_generate_secret(secret); |
| + else if (len == CURVE25519_KEY_SIZE && |
| + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) |
| + memcpy(secret, buf, CURVE25519_KEY_SIZE); |
| + else |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static int curve25519_compute_value(struct kpp_request *req) |
| +{ |
| + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); |
| + const u8 *secret = kpp_tfm_ctx(tfm); |
| + u8 public_key[CURVE25519_KEY_SIZE]; |
| + u8 buf[CURVE25519_KEY_SIZE]; |
| + int copied, nbytes; |
| + u8 const *bp; |
| + |
| + if (req->src) { |
| + copied = sg_copy_to_buffer(req->src, |
| + sg_nents_for_len(req->src, |
| + CURVE25519_KEY_SIZE), |
| + public_key, CURVE25519_KEY_SIZE); |
| + if (copied != CURVE25519_KEY_SIZE) |
| + return -EINVAL; |
| + bp = public_key; |
| + } else { |
| + bp = curve25519_base_point; |
| + } |
| + |
| + curve25519_arch(buf, secret, bp); |
| + |
| + /* might want less than we've got */ |
| + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); |
| + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, |
| + nbytes), |
| + buf, nbytes); |
| + if (copied != nbytes) |
| + return -EINVAL; |
| + return 0; |
| +} |
| + |
| +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) |
| +{ |
| + return CURVE25519_KEY_SIZE; |
| +} |
| + |
| +static struct kpp_alg curve25519_alg = { |
| + .base.cra_name = "curve25519", |
| + .base.cra_driver_name = "curve25519-neon", |
| + .base.cra_priority = 200, |
| + .base.cra_module = THIS_MODULE, |
| + .base.cra_ctxsize = CURVE25519_KEY_SIZE, |
| + |
| + .set_secret = curve25519_set_secret, |
| + .generate_public_key = curve25519_compute_value, |
| + .compute_shared_secret = curve25519_compute_value, |
| + .max_size = curve25519_max_size, |
| +}; |
| + |
| +static int __init mod_init(void) |
| +{ |
| + if (elf_hwcap & HWCAP_NEON) { |
| + static_branch_enable(&have_neon); |
| + return crypto_register_kpp(&curve25519_alg); |
| + } |
| + return 0; |
| +} |
| + |
| +static void __exit mod_exit(void) |
| +{ |
| + if (elf_hwcap & HWCAP_NEON) |
| + crypto_unregister_kpp(&curve25519_alg); |
| +} |
| + |
| +module_init(mod_init); |
| +module_exit(mod_exit); |
| + |
| +MODULE_ALIAS_CRYPTO("curve25519"); |
| +MODULE_ALIAS_CRYPTO("curve25519-neon"); |
| +MODULE_LICENSE("GPL v2"); |
| -- |
| 2.18.2 |
| |
| |
| From 3c53ca73117d858f90c1a41d8e78a6b5d569d2e6 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:39 +0100 |
| Subject: [PATCH 032/100] crypto: chacha20poly1305 - import construction and |
| selftest from Zinc |
| |
| commit ed20078b7e3331e82828be357147af6a3282e4ce upstream. |
| |
| This incorporates the chacha20poly1305 from the Zinc library, retaining |
| the library interface, but replacing the implementation with calls into |
| the code that already existed in the kernel's crypto API. |
| |
| Note that this library API does not implement RFC7539 fully, given that |
| it is limited to 64-bit nonces. (The 96-bit nonce version that was part |
| of the selftest only has been removed, along with the 96-bit nonce test |
| vectors that only tested the selftest but not the actual library itself) |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/crypto/chacha20poly1305.h | 37 + |
| lib/crypto/Kconfig | 7 + |
| lib/crypto/Makefile | 4 + |
| lib/crypto/chacha20poly1305-selftest.c | 7348 ++++++++++++++++++++++++ |
| lib/crypto/chacha20poly1305.c | 219 + |
| 5 files changed, 7615 insertions(+) |
| create mode 100644 include/crypto/chacha20poly1305.h |
| create mode 100644 lib/crypto/chacha20poly1305-selftest.c |
| create mode 100644 lib/crypto/chacha20poly1305.c |
| |
| diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h |
| new file mode 100644 |
| index 000000000000..ad3b1de58df8 |
| |
| |
| @@ -0,0 +1,37 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef __CHACHA20POLY1305_H |
| +#define __CHACHA20POLY1305_H |
| + |
| +#include <linux/types.h> |
| + |
| +enum chacha20poly1305_lengths { |
| + XCHACHA20POLY1305_NONCE_SIZE = 24, |
| + CHACHA20POLY1305_KEY_SIZE = 32, |
| + CHACHA20POLY1305_AUTHTAG_SIZE = 16 |
| +}; |
| + |
| +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| + |
| +bool __must_check |
| +chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| + |
| +void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| + |
| +bool __must_check xchacha20poly1305_decrypt( |
| + u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, |
| + const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| + |
| +#endif /* __CHACHA20POLY1305_H */ |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index b1d830dc1c9e..0b2c4fce26d9 100644 |
| |
| |
| @@ -119,5 +119,12 @@ config CRYPTO_LIB_POLY1305 |
| by either the generic implementation or an arch-specific one, if one |
| is available and enabled. |
| |
| +config CRYPTO_LIB_CHACHA20POLY1305 |
| + tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" |
| + depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA |
| + depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 |
| + select CRYPTO_LIB_CHACHA |
| + select CRYPTO_LIB_POLY1305 |
| + |
| config CRYPTO_LIB_SHA256 |
| tristate |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index 273c55d5e147..34a701ab8b92 100644 |
| |
| |
| @@ -16,6 +16,9 @@ libblake2s-generic-y += blake2s-generic.o |
| obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o |
| libblake2s-y += blake2s.o |
| |
| +obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o |
| +libchacha20poly1305-y += chacha20poly1305.o |
| + |
| obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o |
| libcurve25519-y := curve25519-fiat32.o |
| libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o |
| @@ -32,4 +35,5 @@ libsha256-y := sha256.o |
| |
| ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) |
| libblake2s-y += blake2s-selftest.o |
| +libchacha20poly1305-y += chacha20poly1305-selftest.o |
| endif |
| diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c |
| new file mode 100644 |
| index 000000000000..d1ed0f27cfdb |
| |
| |
| @@ -0,0 +1,7348 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include <crypto/chacha20poly1305.h> |
| +#include <crypto/poly1305.h> |
| + |
| +#include <asm/unaligned.h> |
| +#include <linux/bug.h> |
| +#include <linux/init.h> |
| +#include <linux/mm.h> |
| +#include <linux/kernel.h> |
| +#include <linux/slab.h> |
| + |
| +struct chacha20poly1305_testvec { |
| + const u8 *input, *output, *assoc, *nonce, *key; |
| + size_t ilen, alen, nlen; |
| + bool failure; |
| +}; |
| + |
| +/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539 |
| + * 2.8.2. After they are generated by reference implementations. And the final |
| + * marked ones are taken from wycheproof, but we only do these for the encrypt |
| + * side, because mostly we're stressing the primitives rather than the actual |
| + * chapoly construction. |
| + */ |
| + |
| +static const u8 enc_input001[] __initconst = { |
| + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, |
| + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, |
| + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, |
| + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, |
| + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, |
| + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, |
| + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, |
| + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, |
| + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, |
| + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, |
| + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, |
| + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, |
| + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, |
| + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, |
| + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, |
| + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, |
| + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, |
| + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, |
| + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, |
| + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, |
| + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, |
| + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, |
| + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, |
| + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, |
| + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, |
| + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, |
| + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, |
| + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, |
| + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, |
| + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, |
| + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, |
| + 0x9d |
| +}; |
| +static const u8 enc_output001[] __initconst = { |
| + 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, |
| + 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, |
| + 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, |
| + 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, |
| + 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, |
| + 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, |
| + 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, |
| + 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, |
| + 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, |
| + 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, |
| + 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, |
| + 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, |
| + 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, |
| + 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, |
| + 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, |
| + 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, |
| + 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, |
| + 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, |
| + 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, |
| + 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, |
| + 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, |
| + 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, |
| + 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, |
| + 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, |
| + 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, |
| + 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, |
| + 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, |
| + 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, |
| + 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, |
| + 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, |
| + 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, |
| + 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, |
| + 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, |
| + 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, |
| + 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, |
| + 0x38 |
| +}; |
| +static const u8 enc_assoc001[] __initconst = { |
| + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x4e, 0x91 |
| +}; |
| +static const u8 enc_nonce001[] __initconst = { |
| + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 |
| +}; |
| +static const u8 enc_key001[] __initconst = { |
| + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, |
| + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, |
| + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, |
| + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 |
| +}; |
| + |
| +static const u8 enc_input002[] __initconst = { }; |
| +static const u8 enc_output002[] __initconst = { |
| + 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1, |
| + 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92 |
| +}; |
| +static const u8 enc_assoc002[] __initconst = { }; |
| +static const u8 enc_nonce002[] __initconst = { |
| + 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e |
| +}; |
| +static const u8 enc_key002[] __initconst = { |
| + 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f, |
| + 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86, |
| + 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef, |
| + 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68 |
| +}; |
| + |
| +static const u8 enc_input003[] __initconst = { }; |
| +static const u8 enc_output003[] __initconst = { |
| + 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6, |
| + 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77 |
| +}; |
| +static const u8 enc_assoc003[] __initconst = { |
| + 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b |
| +}; |
| +static const u8 enc_nonce003[] __initconst = { |
| + 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d |
| +}; |
| +static const u8 enc_key003[] __initconst = { |
| + 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88, |
| + 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a, |
| + 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08, |
| + 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d |
| +}; |
| + |
| +static const u8 enc_input004[] __initconst = { |
| + 0xa4 |
| +}; |
| +static const u8 enc_output004[] __initconst = { |
| + 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2, |
| + 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac, |
| + 0x89 |
| +}; |
| +static const u8 enc_assoc004[] __initconst = { |
| + 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40 |
| +}; |
| +static const u8 enc_nonce004[] __initconst = { |
| + 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4 |
| +}; |
| +static const u8 enc_key004[] __initconst = { |
| + 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8, |
| + 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1, |
| + 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d, |
| + 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e |
| +}; |
| + |
| +static const u8 enc_input005[] __initconst = { |
| + 0x2d |
| +}; |
| +static const u8 enc_output005[] __initconst = { |
| + 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e, |
| + 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c, |
| + 0xac |
| +}; |
| +static const u8 enc_assoc005[] __initconst = { }; |
| +static const u8 enc_nonce005[] __initconst = { |
| + 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30 |
| +}; |
| +static const u8 enc_key005[] __initconst = { |
| + 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31, |
| + 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87, |
| + 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01, |
| + 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87 |
| +}; |
| + |
| +static const u8 enc_input006[] __initconst = { |
| + 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a, |
| + 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92, |
| + 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37, |
| + 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50, |
| + 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec, |
| + 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb, |
| + 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66, |
| + 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb, |
| + 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b, |
| + 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e, |
| + 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3, |
| + 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0, |
| + 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb, |
| + 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41, |
| + 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc, |
| + 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde, |
| + 0x8f |
| +}; |
| +static const u8 enc_output006[] __initconst = { |
| + 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1, |
| + 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15, |
| + 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c, |
| + 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda, |
| + 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11, |
| + 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8, |
| + 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc, |
| + 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3, |
| + 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5, |
| + 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02, |
| + 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93, |
| + 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78, |
| + 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1, |
| + 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66, |
| + 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc, |
| + 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0, |
| + 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d, |
| + 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a, |
| + 0xeb |
| +}; |
| +static const u8 enc_assoc006[] __initconst = { |
| + 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b |
| +}; |
| +static const u8 enc_nonce006[] __initconst = { |
| + 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c |
| +}; |
| +static const u8 enc_key006[] __initconst = { |
| + 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae, |
| + 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78, |
| + 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9, |
| + 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01 |
| +}; |
| + |
| +static const u8 enc_input007[] __initconst = { |
| + 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5, |
| + 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a, |
| + 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1, |
| + 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17, |
| + 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c, |
| + 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1, |
| + 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51, |
| + 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1, |
| + 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86, |
| + 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a, |
| + 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a, |
| + 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98, |
| + 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36, |
| + 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34, |
| + 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57, |
| + 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84, |
| + 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4, |
| + 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80, |
| + 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82, |
| + 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5, |
| + 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d, |
| + 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c, |
| + 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf, |
| + 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc, |
| + 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3, |
| + 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14, |
| + 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81, |
| + 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77, |
| + 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3, |
| + 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2, |
| + 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b, |
| + 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3 |
| +}; |
| +static const u8 enc_output007[] __initconst = { |
| + 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c, |
| + 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8, |
| + 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c, |
| + 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb, |
| + 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0, |
| + 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21, |
| + 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70, |
| + 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac, |
| + 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99, |
| + 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9, |
| + 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f, |
| + 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7, |
| + 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53, |
| + 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12, |
| + 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6, |
| + 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0, |
| + 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54, |
| + 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6, |
| + 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e, |
| + 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb, |
| + 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30, |
| + 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f, |
| + 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2, |
| + 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e, |
| + 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34, |
| + 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39, |
| + 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7, |
| + 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9, |
| + 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82, |
| + 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04, |
| + 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34, |
| + 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef, |
| + 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42, |
| + 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53 |
| +}; |
| +static const u8 enc_assoc007[] __initconst = { }; |
| +static const u8 enc_nonce007[] __initconst = { |
| + 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0 |
| +}; |
| +static const u8 enc_key007[] __initconst = { |
| + 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd, |
| + 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c, |
| + 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80, |
| + 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01 |
| +}; |
| + |
| +static const u8 enc_input008[] __initconst = { |
| + 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10, |
| + 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2, |
| + 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c, |
| + 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb, |
| + 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12, |
| + 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa, |
| + 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6, |
| + 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4, |
| + 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91, |
| + 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb, |
| + 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47, |
| + 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15, |
| + 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f, |
| + 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a, |
| + 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3, |
| + 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97, |
| + 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80, |
| + 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e, |
| + 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f, |
| + 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10, |
| + 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a, |
| + 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0, |
| + 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35, |
| + 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d, |
| + 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d, |
| + 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57, |
| + 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4, |
| + 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f, |
| + 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39, |
| + 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda, |
| + 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17, |
| + 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43, |
| + 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19, |
| + 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09, |
| + 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21, |
| + 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07, |
| + 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f, |
| + 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b, |
| + 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a, |
| + 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed, |
| + 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2, |
| + 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca, |
| + 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff, |
| + 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b, |
| + 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b, |
| + 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b, |
| + 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6, |
| + 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04, |
| + 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48, |
| + 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b, |
| + 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13, |
| + 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8, |
| + 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f, |
| + 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0, |
| + 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92, |
| + 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a, |
| + 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41, |
| + 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17, |
| + 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30, |
| + 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20, |
| + 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49, |
| + 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a, |
| + 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b, |
| + 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3 |
| +}; |
| +static const u8 enc_output008[] __initconst = { |
| + 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd, |
| + 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1, |
| + 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93, |
| + 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d, |
| + 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c, |
| + 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6, |
| + 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4, |
| + 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5, |
| + 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84, |
| + 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd, |
| + 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed, |
| + 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab, |
| + 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13, |
| + 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49, |
| + 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6, |
| + 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8, |
| + 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2, |
| + 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94, |
| + 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18, |
| + 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60, |
| + 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8, |
| + 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b, |
| + 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f, |
| + 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c, |
| + 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20, |
| + 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff, |
| + 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9, |
| + 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c, |
| + 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9, |
| + 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6, |
| + 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea, |
| + 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e, |
| + 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82, |
| + 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1, |
| + 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70, |
| + 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1, |
| + 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c, |
| + 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7, |
| + 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc, |
| + 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc, |
| + 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3, |
| + 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb, |
| + 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97, |
| + 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f, |
| + 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39, |
| + 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f, |
| + 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d, |
| + 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2, |
| + 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d, |
| + 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96, |
| + 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b, |
| + 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20, |
| + 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95, |
| + 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb, |
| + 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35, |
| + 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62, |
| + 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9, |
| + 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6, |
| + 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8, |
| + 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a, |
| + 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93, |
| + 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14, |
| + 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99, |
| + 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86, |
| + 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f, |
| + 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54 |
| +}; |
| +static const u8 enc_assoc008[] __initconst = { }; |
| +static const u8 enc_nonce008[] __initconst = { |
| + 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02 |
| +}; |
| +static const u8 enc_key008[] __initconst = { |
| + 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53, |
| + 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0, |
| + 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86, |
| + 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba |
| +}; |
| + |
| +static const u8 enc_input009[] __initconst = { |
| + 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b, |
| + 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8, |
| + 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca, |
| + 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09, |
| + 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5, |
| + 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85, |
| + 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44, |
| + 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97, |
| + 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77, |
| + 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41, |
| + 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c, |
| + 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00, |
| + 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82, |
| + 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f, |
| + 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e, |
| + 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55, |
| + 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab, |
| + 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17, |
| + 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e, |
| + 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f, |
| + 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82, |
| + 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3, |
| + 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f, |
| + 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0, |
| + 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08, |
| + 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b, |
| + 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85, |
| + 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28, |
| + 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c, |
| + 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62, |
| + 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2, |
| + 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3, |
| + 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62, |
| + 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40, |
| + 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f, |
| + 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b, |
| + 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91, |
| + 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5, |
| + 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c, |
| + 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4, |
| + 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49, |
| + 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04, |
| + 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03, |
| + 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa, |
| + 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec, |
| + 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6, |
| + 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69, |
| + 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36, |
| + 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8, |
| + 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf, |
| + 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe, |
| + 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82, |
| + 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab, |
| + 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d, |
| + 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3, |
| + 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5, |
| + 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34, |
| + 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49, |
| + 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f, |
| + 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d, |
| + 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42, |
| + 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef, |
| + 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27, |
| + 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52, |
| + 0x65 |
| +}; |
| +static const u8 enc_output009[] __initconst = { |
| + 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf, |
| + 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66, |
| + 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72, |
| + 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd, |
| + 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28, |
| + 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe, |
| + 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06, |
| + 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5, |
| + 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7, |
| + 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09, |
| + 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a, |
| + 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00, |
| + 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62, |
| + 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb, |
| + 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2, |
| + 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28, |
| + 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e, |
| + 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a, |
| + 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6, |
| + 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83, |
| + 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9, |
| + 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a, |
| + 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79, |
| + 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a, |
| + 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea, |
| + 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b, |
| + 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52, |
| + 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb, |
| + 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89, |
| + 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad, |
| + 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19, |
| + 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71, |
| + 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d, |
| + 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54, |
| + 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a, |
| + 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d, |
| + 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95, |
| + 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42, |
| + 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16, |
| + 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6, |
| + 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf, |
| + 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d, |
| + 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f, |
| + 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b, |
| + 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e, |
| + 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4, |
| + 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c, |
| + 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4, |
| + 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1, |
| + 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb, |
| + 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff, |
| + 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2, |
| + 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06, |
| + 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66, |
| + 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90, |
| + 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55, |
| + 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc, |
| + 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8, |
| + 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62, |
| + 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba, |
| + 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2, |
| + 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89, |
| + 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06, |
| + 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90, |
| + 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf, |
| + 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8, |
| + 0xae |
| +}; |
| +static const u8 enc_assoc009[] __initconst = { |
| + 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e, |
| + 0xef |
| +}; |
| +static const u8 enc_nonce009[] __initconst = { |
| + 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78 |
| +}; |
| +static const u8 enc_key009[] __initconst = { |
| + 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5, |
| + 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86, |
| + 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2, |
| + 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b |
| +}; |
| + |
| +static const u8 enc_input010[] __initconst = { |
| + 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf, |
| + 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c, |
| + 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22, |
| + 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc, |
| + 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16, |
| + 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7, |
| + 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4, |
| + 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d, |
| + 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5, |
| + 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46, |
| + 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82, |
| + 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b, |
| + 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a, |
| + 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf, |
| + 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca, |
| + 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95, |
| + 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09, |
| + 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3, |
| + 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3, |
| + 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f, |
| + 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58, |
| + 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad, |
| + 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde, |
| + 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44, |
| + 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a, |
| + 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9, |
| + 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26, |
| + 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc, |
| + 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74, |
| + 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b, |
| + 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93, |
| + 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37, |
| + 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f, |
| + 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d, |
| + 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca, |
| + 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73, |
| + 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f, |
| + 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1, |
| + 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9, |
| + 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76, |
| + 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac, |
| + 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7, |
| + 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce, |
| + 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30, |
| + 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb, |
| + 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa, |
| + 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd, |
| + 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f, |
| + 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb, |
| + 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34, |
| + 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e, |
| + 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f, |
| + 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53, |
| + 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41, |
| + 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e, |
| + 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d, |
| + 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27, |
| + 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e, |
| + 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8, |
| + 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a, |
| + 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12, |
| + 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3, |
| + 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66, |
| + 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0, |
| + 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c, |
| + 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4, |
| + 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49, |
| + 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90, |
| + 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11, |
| + 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c, |
| + 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b, |
| + 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74, |
| + 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c, |
| + 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27, |
| + 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1, |
| + 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27, |
| + 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88, |
| + 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27, |
| + 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b, |
| + 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39, |
| + 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7, |
| + 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc, |
| + 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe, |
| + 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5, |
| + 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf, |
| + 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05, |
| + 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73, |
| + 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda, |
| + 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe, |
| + 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71, |
| + 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed, |
| + 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d, |
| + 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33, |
| + 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f, |
| + 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a, |
| + 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa, |
| + 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e, |
| + 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e, |
| + 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87, |
| + 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5, |
| + 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4, |
| + 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38, |
| + 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34, |
| + 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f, |
| + 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36, |
| + 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69, |
| + 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44, |
| + 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5, |
| + 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce, |
| + 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd, |
| + 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27, |
| + 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f, |
| + 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8, |
| + 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a, |
| + 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5, |
| + 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca, |
| + 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e, |
| + 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92, |
| + 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13, |
| + 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf, |
| + 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6, |
| + 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3, |
| + 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b, |
| + 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d, |
| + 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f, |
| + 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40, |
| + 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c, |
| + 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f |
| +}; |
| +static const u8 enc_output010[] __initconst = { |
| + 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b, |
| + 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74, |
| + 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1, |
| + 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd, |
| + 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6, |
| + 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5, |
| + 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96, |
| + 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02, |
| + 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30, |
| + 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57, |
| + 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53, |
| + 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65, |
| + 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71, |
| + 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9, |
| + 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18, |
| + 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce, |
| + 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a, |
| + 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69, |
| + 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2, |
| + 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95, |
| + 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49, |
| + 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e, |
| + 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a, |
| + 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a, |
| + 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e, |
| + 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19, |
| + 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b, |
| + 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75, |
| + 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d, |
| + 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d, |
| + 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f, |
| + 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a, |
| + 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d, |
| + 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5, |
| + 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c, |
| + 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77, |
| + 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46, |
| + 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43, |
| + 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe, |
| + 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8, |
| + 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76, |
| + 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47, |
| + 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8, |
| + 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32, |
| + 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59, |
| + 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae, |
| + 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a, |
| + 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3, |
| + 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74, |
| + 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75, |
| + 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2, |
| + 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e, |
| + 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2, |
| + 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9, |
| + 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1, |
| + 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07, |
| + 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79, |
| + 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71, |
| + 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad, |
| + 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a, |
| + 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c, |
| + 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9, |
| + 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79, |
| + 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27, |
| + 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90, |
| + 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe, |
| + 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99, |
| + 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1, |
| + 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9, |
| + 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0, |
| + 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28, |
| + 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e, |
| + 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20, |
| + 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60, |
| + 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47, |
| + 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68, |
| + 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe, |
| + 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33, |
| + 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8, |
| + 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38, |
| + 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7, |
| + 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04, |
| + 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c, |
| + 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f, |
| + 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c, |
| + 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77, |
| + 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54, |
| + 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5, |
| + 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4, |
| + 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2, |
| + 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e, |
| + 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27, |
| + 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f, |
| + 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92, |
| + 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55, |
| + 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe, |
| + 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04, |
| + 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4, |
| + 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56, |
| + 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02, |
| + 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2, |
| + 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8, |
| + 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27, |
| + 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47, |
| + 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10, |
| + 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43, |
| + 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0, |
| + 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee, |
| + 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47, |
| + 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6, |
| + 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d, |
| + 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c, |
| + 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3, |
| + 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b, |
| + 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09, |
| + 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d, |
| + 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1, |
| + 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd, |
| + 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4, |
| + 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63, |
| + 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87, |
| + 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd, |
| + 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e, |
| + 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a, |
| + 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c, |
| + 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38, |
| + 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a, |
| + 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5, |
| + 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9, |
| + 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0 |
| +}; |
| +static const u8 enc_assoc010[] __initconst = { |
| + 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27, |
| + 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2 |
| +}; |
| +static const u8 enc_nonce010[] __initconst = { |
| + 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30 |
| +}; |
| +static const u8 enc_key010[] __initconst = { |
| + 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44, |
| + 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf, |
| + 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74, |
| + 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7 |
| +}; |
| + |
| +static const u8 enc_input011[] __initconst = { |
| + 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b, |
| + 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b, |
| + 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d, |
| + 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee, |
| + 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30, |
| + 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20, |
| + 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f, |
| + 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e, |
| + 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66, |
| + 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46, |
| + 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35, |
| + 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6, |
| + 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0, |
| + 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15, |
| + 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13, |
| + 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7, |
| + 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3, |
| + 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37, |
| + 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc, |
| + 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95, |
| + 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8, |
| + 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac, |
| + 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45, |
| + 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf, |
| + 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d, |
| + 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc, |
| + 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45, |
| + 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a, |
| + 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec, |
| + 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e, |
| + 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10, |
| + 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8, |
| + 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66, |
| + 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0, |
| + 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62, |
| + 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b, |
| + 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4, |
| + 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96, |
| + 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7, |
| + 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74, |
| + 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8, |
| + 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b, |
| + 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70, |
| + 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95, |
| + 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3, |
| + 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9, |
| + 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d, |
| + 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e, |
| + 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32, |
| + 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5, |
| + 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80, |
| + 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3, |
| + 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad, |
| + 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d, |
| + 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20, |
| + 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17, |
| + 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6, |
| + 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d, |
| + 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82, |
| + 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c, |
| + 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9, |
| + 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb, |
| + 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96, |
| + 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9, |
| + 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f, |
| + 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40, |
| + 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc, |
| + 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce, |
| + 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71, |
| + 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f, |
| + 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35, |
| + 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90, |
| + 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8, |
| + 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01, |
| + 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1, |
| + 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe, |
| + 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4, |
| + 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf, |
| + 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9, |
| + 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f, |
| + 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04, |
| + 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7, |
| + 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15, |
| + 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc, |
| + 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0, |
| + 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae, |
| + 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb, |
| + 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed, |
| + 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51, |
| + 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52, |
| + 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84, |
| + 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5, |
| + 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4, |
| + 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e, |
| + 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74, |
| + 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f, |
| + 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13, |
| + 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea, |
| + 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b, |
| + 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef, |
| + 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09, |
| + 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe, |
| + 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1, |
| + 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9, |
| + 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15, |
| + 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a, |
| + 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab, |
| + 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36, |
| + 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd, |
| + 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde, |
| + 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd, |
| + 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47, |
| + 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5, |
| + 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69, |
| + 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21, |
| + 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98, |
| + 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07, |
| + 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57, |
| + 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd, |
| + 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03, |
| + 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11, |
| + 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96, |
| + 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91, |
| + 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d, |
| + 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0, |
| + 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9, |
| + 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42, |
| + 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a, |
| + 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18, |
| + 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc, |
| + 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce, |
| + 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc, |
| + 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0, |
| + 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf, |
| + 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7, |
| + 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80, |
| + 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c, |
| + 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82, |
| + 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9, |
| + 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20, |
| + 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58, |
| + 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6, |
| + 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc, |
| + 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50, |
| + 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86, |
| + 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a, |
| + 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80, |
| + 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec, |
| + 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08, |
| + 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c, |
| + 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde, |
| + 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d, |
| + 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17, |
| + 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f, |
| + 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26, |
| + 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96, |
| + 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97, |
| + 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6, |
| + 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55, |
| + 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e, |
| + 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88, |
| + 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5, |
| + 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b, |
| + 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15, |
| + 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1, |
| + 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4, |
| + 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3, |
| + 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf, |
| + 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e, |
| + 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb, |
| + 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76, |
| + 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5, |
| + 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c, |
| + 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde, |
| + 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f, |
| + 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51, |
| + 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9, |
| + 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99, |
| + 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6, |
| + 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04, |
| + 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31, |
| + 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a, |
| + 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56, |
| + 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e, |
| + 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78, |
| + 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a, |
| + 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7, |
| + 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb, |
| + 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6, |
| + 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8, |
| + 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc, |
| + 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84, |
| + 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86, |
| + 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76, |
| + 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a, |
| + 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73, |
| + 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8, |
| + 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6, |
| + 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2, |
| + 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56, |
| + 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb, |
| + 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab, |
| + 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76, |
| + 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69, |
| + 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d, |
| + 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc, |
| + 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22, |
| + 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39, |
| + 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6, |
| + 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9, |
| + 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f, |
| + 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1, |
| + 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83, |
| + 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc, |
| + 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4, |
| + 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59, |
| + 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68, |
| + 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef, |
| + 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1, |
| + 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3, |
| + 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44, |
| + 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09, |
| + 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8, |
| + 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a, |
| + 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d, |
| + 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae, |
| + 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2, |
| + 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10, |
| + 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a, |
| + 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34, |
| + 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f, |
| + 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9, |
| + 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b, |
| + 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d, |
| + 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57, |
| + 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03, |
| + 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87, |
| + 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca, |
| + 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53, |
| + 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f, |
| + 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61, |
| + 0x10, 0x1e, 0xbf, 0xec, 0xa8 |
| +}; |
| +static const u8 enc_output011[] __initconst = { |
| + 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8, |
| + 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc, |
| + 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74, |
| + 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73, |
| + 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e, |
| + 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9, |
| + 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e, |
| + 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd, |
| + 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57, |
| + 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19, |
| + 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f, |
| + 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45, |
| + 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e, |
| + 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39, |
| + 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03, |
| + 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f, |
| + 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0, |
| + 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce, |
| + 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb, |
| + 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52, |
| + 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21, |
| + 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a, |
| + 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35, |
| + 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91, |
| + 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b, |
| + 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e, |
| + 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19, |
| + 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07, |
| + 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18, |
| + 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96, |
| + 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68, |
| + 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4, |
| + 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57, |
| + 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c, |
| + 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23, |
| + 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8, |
| + 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6, |
| + 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40, |
| + 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab, |
| + 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb, |
| + 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea, |
| + 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8, |
| + 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31, |
| + 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0, |
| + 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc, |
| + 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94, |
| + 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1, |
| + 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46, |
| + 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6, |
| + 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7, |
| + 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71, |
| + 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a, |
| + 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33, |
| + 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38, |
| + 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23, |
| + 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb, |
| + 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65, |
| + 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73, |
| + 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8, |
| + 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb, |
| + 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a, |
| + 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca, |
| + 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5, |
| + 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71, |
| + 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8, |
| + 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d, |
| + 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6, |
| + 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d, |
| + 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7, |
| + 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5, |
| + 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8, |
| + 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd, |
| + 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29, |
| + 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22, |
| + 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5, |
| + 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67, |
| + 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11, |
| + 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e, |
| + 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09, |
| + 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4, |
| + 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f, |
| + 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa, |
| + 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec, |
| + 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b, |
| + 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d, |
| + 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b, |
| + 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48, |
| + 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3, |
| + 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63, |
| + 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd, |
| + 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78, |
| + 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed, |
| + 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82, |
| + 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f, |
| + 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3, |
| + 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9, |
| + 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72, |
| + 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74, |
| + 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40, |
| + 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b, |
| + 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a, |
| + 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5, |
| + 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98, |
| + 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71, |
| + 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e, |
| + 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4, |
| + 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46, |
| + 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e, |
| + 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f, |
| + 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93, |
| + 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0, |
| + 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5, |
| + 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61, |
| + 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64, |
| + 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85, |
| + 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20, |
| + 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6, |
| + 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc, |
| + 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8, |
| + 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50, |
| + 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4, |
| + 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80, |
| + 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0, |
| + 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a, |
| + 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35, |
| + 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43, |
| + 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12, |
| + 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7, |
| + 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34, |
| + 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42, |
| + 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0, |
| + 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95, |
| + 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74, |
| + 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5, |
| + 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12, |
| + 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6, |
| + 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86, |
| + 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97, |
| + 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45, |
| + 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19, |
| + 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86, |
| + 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c, |
| + 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba, |
| + 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29, |
| + 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6, |
| + 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6, |
| + 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09, |
| + 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31, |
| + 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99, |
| + 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b, |
| + 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca, |
| + 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00, |
| + 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93, |
| + 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3, |
| + 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07, |
| + 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda, |
| + 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90, |
| + 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b, |
| + 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a, |
| + 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6, |
| + 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c, |
| + 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57, |
| + 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15, |
| + 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e, |
| + 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51, |
| + 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75, |
| + 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19, |
| + 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08, |
| + 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14, |
| + 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba, |
| + 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff, |
| + 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90, |
| + 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e, |
| + 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93, |
| + 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad, |
| + 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2, |
| + 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac, |
| + 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d, |
| + 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06, |
| + 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c, |
| + 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91, |
| + 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17, |
| + 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20, |
| + 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7, |
| + 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf, |
| + 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c, |
| + 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2, |
| + 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e, |
| + 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a, |
| + 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05, |
| + 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58, |
| + 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8, |
| + 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d, |
| + 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71, |
| + 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3, |
| + 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe, |
| + 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62, |
| + 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16, |
| + 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66, |
| + 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4, |
| + 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2, |
| + 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35, |
| + 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3, |
| + 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4, |
| + 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f, |
| + 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe, |
| + 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56, |
| + 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b, |
| + 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37, |
| + 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3, |
| + 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f, |
| + 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f, |
| + 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0, |
| + 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70, |
| + 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd, |
| + 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f, |
| + 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e, |
| + 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67, |
| + 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51, |
| + 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23, |
| + 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3, |
| + 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5, |
| + 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09, |
| + 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7, |
| + 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed, |
| + 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb, |
| + 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6, |
| + 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5, |
| + 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96, |
| + 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe, |
| + 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44, |
| + 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6, |
| + 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e, |
| + 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0, |
| + 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79, |
| + 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f, |
| + 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d, |
| + 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82, |
| + 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47, |
| + 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93, |
| + 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6, |
| + 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69, |
| + 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e, |
| + 0x2b, 0xdf, 0xcd, 0xf9, 0x3c |
| +}; |
| +static const u8 enc_assoc011[] __initconst = { |
| + 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7 |
| +}; |
| +static const u8 enc_nonce011[] __initconst = { |
| + 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa |
| +}; |
| +static const u8 enc_key011[] __initconst = { |
| + 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85, |
| + 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca, |
| + 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52, |
| + 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38 |
| +}; |
| + |
| +static const u8 enc_input012[] __initconst = { |
| + 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, |
| + 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, |
| + 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, |
| + 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, |
| + 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, |
| + 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, |
| + 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, |
| + 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, |
| + 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, |
| + 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, |
| + 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, |
| + 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, |
| + 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, |
| + 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, |
| + 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, |
| + 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, |
| + 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, |
| + 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, |
| + 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, |
| + 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, |
| + 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, |
| + 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, |
| + 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, |
| + 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, |
| + 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, |
| + 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, |
| + 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, |
| + 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, |
| + 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, |
| + 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, |
| + 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, |
| + 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, |
| + 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, |
| + 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, |
| + 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, |
| + 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, |
| + 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, |
| + 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, |
| + 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, |
| + 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, |
| + 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, |
| + 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, |
| + 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, |
| + 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, |
| + 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, |
| + 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, |
| + 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, |
| + 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, |
| + 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, |
| + 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, |
| + 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, |
| + 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, |
| + 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, |
| + 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, |
| + 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, |
| + 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, |
| + 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, |
| + 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, |
| + 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, |
| + 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, |
| + 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, |
| + 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, |
| + 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, |
| + 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, |
| + 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, |
| + 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, |
| + 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, |
| + 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, |
| + 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, |
| + 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, |
| + 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, |
| + 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, |
| + 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, |
| + 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, |
| + 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, |
| + 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, |
| + 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, |
| + 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, |
| + 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, |
| + 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, |
| + 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, |
| + 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, |
| + 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, |
| + 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, |
| + 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, |
| + 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, |
| + 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, |
| + 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, |
| + 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, |
| + 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, |
| + 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, |
| + 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, |
| + 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, |
| + 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, |
| + 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, |
| + 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, |
| + 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, |
| + 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, |
| + 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, |
| + 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, |
| + 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, |
| + 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, |
| + 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, |
| + 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, |
| + 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, |
| + 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, |
| + 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, |
| + 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, |
| + 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, |
| + 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, |
| + 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, |
| + 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, |
| + 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, |
| + 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, |
| + 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, |
| + 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, |
| + 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, |
| + 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, |
| + 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, |
| + 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, |
| + 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, |
| + 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, |
| + 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, |
| + 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, |
| + 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, |
| + 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, |
| + 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, |
| + 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, |
| + 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, |
| + 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, |
| + 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, |
| + 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, |
| + 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, |
| + 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, |
| + 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, |
| + 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, |
| + 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, |
| + 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, |
| + 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, |
| + 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, |
| + 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, |
| + 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, |
| + 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, |
| + 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, |
| + 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, |
| + 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, |
| + 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, |
| + 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, |
| + 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, |
| + 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, |
| + 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, |
| + 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, |
| + 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, |
| + 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, |
| + 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, |
| + 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, |
| + 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, |
| + 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, |
| + 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, |
| + 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, |
| + 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, |
| + 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, |
| + 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, |
| + 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, |
| + 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, |
| + 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, |
| + 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, |
| + 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, |
| + 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, |
| + 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, |
| + 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, |
| + 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, |
| + 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, |
| + 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, |
| + 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, |
| + 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, |
| + 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, |
| + 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, |
| + 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, |
| + 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, |
| + 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, |
| + 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, |
| + 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, |
| + 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, |
| + 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, |
| + 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, |
| + 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, |
| + 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, |
| + 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, |
| + 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, |
| + 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, |
| + 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, |
| + 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, |
| + 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, |
| + 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, |
| + 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, |
| + 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, |
| + 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, |
| + 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, |
| + 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, |
| + 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, |
| + 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, |
| + 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, |
| + 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, |
| + 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, |
| + 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, |
| + 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, |
| + 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, |
| + 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, |
| + 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, |
| + 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, |
| + 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, |
| + 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, |
| + 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, |
| + 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, |
| + 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, |
| + 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, |
| + 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, |
| + 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, |
| + 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, |
| + 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, |
| + 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, |
| + 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, |
| + 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, |
| + 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, |
| + 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, |
| + 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, |
| + 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, |
| + 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, |
| + 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, |
| + 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, |
| + 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, |
| + 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, |
| + 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, |
| + 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, |
| + 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, |
| + 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, |
| + 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, |
| + 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, |
| + 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, |
| + 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, |
| + 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, |
| + 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, |
| + 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, |
| + 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, |
| + 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, |
| + 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, |
| + 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, |
| + 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, |
| + 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, |
| + 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, |
| + 0x78, 0xec, 0x00 |
| +}; |
| +static const u8 enc_output012[] __initconst = { |
| + 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, |
| + 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, |
| + 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, |
| + 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, |
| + 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, |
| + 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, |
| + 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, |
| + 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, |
| + 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, |
| + 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, |
| + 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, |
| + 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, |
| + 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, |
| + 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, |
| + 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, |
| + 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, |
| + 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, |
| + 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, |
| + 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, |
| + 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, |
| + 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, |
| + 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, |
| + 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, |
| + 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, |
| + 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, |
| + 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, |
| + 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, |
| + 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, |
| + 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, |
| + 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, |
| + 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, |
| + 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, |
| + 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, |
| + 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, |
| + 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, |
| + 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, |
| + 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, |
| + 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, |
| + 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, |
| + 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, |
| + 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, |
| + 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, |
| + 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, |
| + 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, |
| + 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, |
| + 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, |
| + 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, |
| + 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, |
| + 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, |
| + 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, |
| + 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, |
| + 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, |
| + 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, |
| + 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, |
| + 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, |
| + 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, |
| + 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, |
| + 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, |
| + 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, |
| + 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, |
| + 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, |
| + 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, |
| + 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, |
| + 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, |
| + 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, |
| + 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, |
| + 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, |
| + 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, |
| + 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, |
| + 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, |
| + 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, |
| + 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, |
| + 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, |
| + 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, |
| + 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, |
| + 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, |
| + 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, |
| + 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, |
| + 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, |
| + 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, |
| + 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, |
| + 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, |
| + 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, |
| + 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, |
| + 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, |
| + 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, |
| + 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, |
| + 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, |
| + 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, |
| + 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, |
| + 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, |
| + 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, |
| + 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, |
| + 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, |
| + 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, |
| + 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, |
| + 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, |
| + 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, |
| + 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, |
| + 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, |
| + 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, |
| + 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, |
| + 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, |
| + 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, |
| + 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, |
| + 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, |
| + 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, |
| + 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, |
| + 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, |
| + 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, |
| + 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, |
| + 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, |
| + 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, |
| + 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, |
| + 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, |
| + 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, |
| + 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, |
| + 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, |
| + 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, |
| + 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, |
| + 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, |
| + 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, |
| + 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, |
| + 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, |
| + 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, |
| + 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, |
| + 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, |
| + 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, |
| + 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, |
| + 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, |
| + 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, |
| + 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, |
| + 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, |
| + 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, |
| + 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, |
| + 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, |
| + 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, |
| + 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, |
| + 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, |
| + 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, |
| + 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, |
| + 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, |
| + 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, |
| + 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, |
| + 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, |
| + 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, |
| + 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, |
| + 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, |
| + 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, |
| + 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, |
| + 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, |
| + 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, |
| + 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, |
| + 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, |
| + 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, |
| + 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, |
| + 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, |
| + 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, |
| + 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, |
| + 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, |
| + 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, |
| + 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, |
| + 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, |
| + 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, |
| + 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, |
| + 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, |
| + 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, |
| + 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, |
| + 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, |
| + 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, |
| + 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, |
| + 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, |
| + 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, |
| + 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, |
| + 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, |
| + 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, |
| + 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, |
| + 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, |
| + 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, |
| + 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, |
| + 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, |
| + 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, |
| + 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, |
| + 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, |
| + 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, |
| + 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, |
| + 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, |
| + 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, |
| + 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, |
| + 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, |
| + 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, |
| + 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, |
| + 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, |
| + 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, |
| + 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, |
| + 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, |
| + 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, |
| + 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, |
| + 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, |
| + 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, |
| + 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, |
| + 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, |
| + 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, |
| + 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, |
| + 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, |
| + 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, |
| + 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, |
| + 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, |
| + 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, |
| + 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, |
| + 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, |
| + 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, |
| + 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, |
| + 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, |
| + 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, |
| + 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, |
| + 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, |
| + 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, |
| + 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, |
| + 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, |
| + 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, |
| + 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, |
| + 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, |
| + 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, |
| + 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, |
| + 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, |
| + 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, |
| + 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, |
| + 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, |
| + 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, |
| + 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, |
| + 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, |
| + 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, |
| + 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, |
| + 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, |
| + 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, |
| + 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, |
| + 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, |
| + 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, |
| + 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, |
| + 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, |
| + 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, |
| + 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, |
| + 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, |
| + 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, |
| + 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, |
| + 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, |
| + 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, |
| + 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, |
| + 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, |
| + 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, |
| + 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, |
| + 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, |
| + 0x70, 0xcf, 0xd6 |
| +}; |
| +static const u8 enc_assoc012[] __initconst = { |
| + 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, |
| + 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, |
| + 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, |
| + 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, |
| + 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, |
| + 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, |
| + 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, |
| + 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 |
| +}; |
| +static const u8 enc_nonce012[] __initconst = { |
| + 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 |
| +}; |
| +static const u8 enc_key012[] __initconst = { |
| + 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, |
| + 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, |
| + 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, |
| + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input053[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, |
| + 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, |
| + 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, |
| + 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe |
| +}; |
| +static const u8 enc_output053[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0xe6, 0xd3, 0xd7, 0x32, 0x4a, 0x1c, 0xbb, 0xa7, |
| + 0x77, 0xbb, 0xb0, 0xec, 0xdd, 0xa3, 0x78, 0x07 |
| +}; |
| +static const u8 enc_assoc053[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_nonce053[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key053[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input054[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, |
| + 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, |
| + 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, |
| + 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe, |
| + 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe, |
| + 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b, |
| + 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5, |
| + 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd |
| +}; |
| +static const u8 enc_output054[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x06, 0x2d, 0xe6, 0x79, 0x5f, 0x27, 0x4f, 0xd2, |
| + 0xa3, 0x05, 0xd7, 0x69, 0x80, 0xbc, 0x9c, 0xce |
| +}; |
| +static const u8 enc_assoc054[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_nonce054[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key054[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input055[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, |
| + 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, |
| + 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, |
| + 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe, |
| + 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe, |
| + 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b, |
| + 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5, |
| + 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd, |
| + 0x7a, 0xda, 0x44, 0x42, 0x42, 0x69, 0xbf, 0xfa, |
| + 0x55, 0x27, 0xf2, 0x70, 0xac, 0xf6, 0x85, 0x02, |
| + 0xb7, 0x4c, 0x5a, 0xe2, 0xe6, 0x0c, 0x05, 0x80, |
| + 0x98, 0x1a, 0x49, 0x38, 0x45, 0x93, 0x92, 0xc4, |
| + 0x9b, 0xb2, 0xf2, 0x84, 0xb6, 0x46, 0xef, 0xc7, |
| + 0xf3, 0xf0, 0xb1, 0x36, 0x1d, 0xc3, 0x48, 0xed, |
| + 0x77, 0xd3, 0x0b, 0xc5, 0x76, 0x92, 0xed, 0x38, |
| + 0xfb, 0xac, 0x01, 0x88, 0x38, 0x04, 0x88, 0xc7 |
| +}; |
| +static const u8 enc_output055[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0xd8, 0xb4, 0x79, 0x02, 0xba, 0xae, 0xaf, 0xb3, |
| + 0x42, 0x03, 0x05, 0x15, 0x29, 0xaf, 0x28, 0x2e |
| +}; |
| +static const u8 enc_assoc055[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_nonce055[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key055[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input056[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, |
| + 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, |
| + 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, |
| + 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41 |
| +}; |
| +static const u8 enc_output056[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xb3, 0x89, 0x1c, 0x84, 0x9c, 0xb5, 0x2c, 0x27, |
| + 0x74, 0x7e, 0xdf, 0xcf, 0x31, 0x21, 0x3b, 0xb6 |
| +}; |
| +static const u8 enc_assoc056[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce056[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key056[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input057[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, |
| + 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, |
| + 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, |
| + 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41, |
| + 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01, |
| + 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4, |
| + 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a, |
| + 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42 |
| +}; |
| +static const u8 enc_output057[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xf0, 0xc1, 0x2d, 0x26, 0xef, 0x03, 0x02, 0x9b, |
| + 0x62, 0xc0, 0x08, 0xda, 0x27, 0xc5, 0xdc, 0x68 |
| +}; |
| +static const u8 enc_assoc057[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce057[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key057[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input058[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, |
| + 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, |
| + 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, |
| + 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41, |
| + 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01, |
| + 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4, |
| + 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a, |
| + 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42, |
| + 0x85, 0x25, 0xbb, 0xbd, 0xbd, 0x96, 0x40, 0x05, |
| + 0xaa, 0xd8, 0x0d, 0x8f, 0x53, 0x09, 0x7a, 0xfd, |
| + 0x48, 0xb3, 0xa5, 0x1d, 0x19, 0xf3, 0xfa, 0x7f, |
| + 0x67, 0xe5, 0xb6, 0xc7, 0xba, 0x6c, 0x6d, 0x3b, |
| + 0x64, 0x4d, 0x0d, 0x7b, 0x49, 0xb9, 0x10, 0x38, |
| + 0x0c, 0x0f, 0x4e, 0xc9, 0xe2, 0x3c, 0xb7, 0x12, |
| + 0x88, 0x2c, 0xf4, 0x3a, 0x89, 0x6d, 0x12, 0xc7, |
| + 0x04, 0x53, 0xfe, 0x77, 0xc7, 0xfb, 0x77, 0x38 |
| +}; |
| +static const u8 enc_output058[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xee, 0x65, 0x78, 0x30, 0x01, 0xc2, 0x56, 0x91, |
| + 0xfa, 0x28, 0xd0, 0xf5, 0xf1, 0xc1, 0xd7, 0x62 |
| +}; |
| +static const u8 enc_assoc058[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce058[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key058[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input059[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, |
| + 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, |
| + 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, |
| + 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e |
| +}; |
| +static const u8 enc_output059[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x79, 0xba, 0x7a, 0x29, 0xf5, 0xa7, 0xbb, 0x75, |
| + 0x79, 0x7a, 0xf8, 0x7a, 0x61, 0x01, 0x29, 0xa4 |
| +}; |
| +static const u8 enc_assoc059[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 |
| +}; |
| +static const u8 enc_nonce059[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key059[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input060[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, |
| + 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, |
| + 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, |
| + 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e, |
| + 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e, |
| + 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab, |
| + 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65, |
| + 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d |
| +}; |
| +static const u8 enc_output060[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x36, 0xb1, 0x74, 0x38, 0x19, 0xe1, 0xb9, 0xba, |
| + 0x15, 0x51, 0xe8, 0xed, 0x92, 0x2a, 0x95, 0x9a |
| +}; |
| +static const u8 enc_assoc060[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 |
| +}; |
| +static const u8 enc_nonce060[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key060[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input061[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, |
| + 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, |
| + 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, |
| + 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e, |
| + 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e, |
| + 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab, |
| + 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65, |
| + 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d, |
| + 0x7a, 0xda, 0x44, 0xc2, 0x42, 0x69, 0xbf, 0x7a, |
| + 0x55, 0x27, 0xf2, 0xf0, 0xac, 0xf6, 0x85, 0x82, |
| + 0xb7, 0x4c, 0x5a, 0x62, 0xe6, 0x0c, 0x05, 0x00, |
| + 0x98, 0x1a, 0x49, 0xb8, 0x45, 0x93, 0x92, 0x44, |
| + 0x9b, 0xb2, 0xf2, 0x04, 0xb6, 0x46, 0xef, 0x47, |
| + 0xf3, 0xf0, 0xb1, 0xb6, 0x1d, 0xc3, 0x48, 0x6d, |
| + 0x77, 0xd3, 0x0b, 0x45, 0x76, 0x92, 0xed, 0xb8, |
| + 0xfb, 0xac, 0x01, 0x08, 0x38, 0x04, 0x88, 0x47 |
| +}; |
| +static const u8 enc_output061[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0xfe, 0xac, 0x49, 0x55, 0x55, 0x4e, 0x80, 0x6f, |
| + 0x3a, 0x19, 0x02, 0xe2, 0x44, 0x32, 0xc0, 0x8a |
| +}; |
| +static const u8 enc_assoc061[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 |
| +}; |
| +static const u8 enc_nonce061[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key061[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input062[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, |
| + 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, |
| + 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, |
| + 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1 |
| +}; |
| +static const u8 enc_output062[] __initconst = { |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0x20, 0xa3, 0x79, 0x8d, 0xf1, 0x29, 0x2c, 0x59, |
| + 0x72, 0xbf, 0x97, 0x41, 0xae, 0xc3, 0x8a, 0x19 |
| +}; |
| +static const u8 enc_assoc062[] __initconst = { |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f |
| +}; |
| +static const u8 enc_nonce062[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key062[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input063[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, |
| + 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, |
| + 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, |
| + 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1, |
| + 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81, |
| + 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54, |
| + 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a, |
| + 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2 |
| +}; |
| +static const u8 enc_output063[] __initconst = { |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xc0, 0x3d, 0x9f, 0x67, 0x35, 0x4a, 0x97, 0xb2, |
| + 0xf0, 0x74, 0xf7, 0x55, 0x15, 0x57, 0xe4, 0x9c |
| +}; |
| +static const u8 enc_assoc063[] __initconst = { |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f |
| +}; |
| +static const u8 enc_nonce063[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key063[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input064[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, |
| + 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, |
| + 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, |
| + 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1, |
| + 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81, |
| + 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54, |
| + 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a, |
| + 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2, |
| + 0x85, 0x25, 0xbb, 0x3d, 0xbd, 0x96, 0x40, 0x85, |
| + 0xaa, 0xd8, 0x0d, 0x0f, 0x53, 0x09, 0x7a, 0x7d, |
| + 0x48, 0xb3, 0xa5, 0x9d, 0x19, 0xf3, 0xfa, 0xff, |
| + 0x67, 0xe5, 0xb6, 0x47, 0xba, 0x6c, 0x6d, 0xbb, |
| + 0x64, 0x4d, 0x0d, 0xfb, 0x49, 0xb9, 0x10, 0xb8, |
| + 0x0c, 0x0f, 0x4e, 0x49, 0xe2, 0x3c, 0xb7, 0x92, |
| + 0x88, 0x2c, 0xf4, 0xba, 0x89, 0x6d, 0x12, 0x47, |
| + 0x04, 0x53, 0xfe, 0xf7, 0xc7, 0xfb, 0x77, 0xb8 |
| +}; |
| +static const u8 enc_output064[] __initconst = { |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xc8, 0x6d, 0xa8, 0xdd, 0x65, 0x22, 0x86, 0xd5, |
| + 0x02, 0x13, 0xd3, 0x28, 0xd6, 0x3e, 0x40, 0x06 |
| +}; |
| +static const u8 enc_assoc064[] __initconst = { |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f |
| +}; |
| +static const u8 enc_nonce064[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key064[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input065[] __initconst = { |
| + 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, |
| + 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, |
| + 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, |
| + 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41 |
| +}; |
| +static const u8 enc_output065[] __initconst = { |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0xbe, 0xde, 0x90, 0x83, 0xce, 0xb3, 0x6d, 0xdf, |
| + 0xe5, 0xfa, 0x81, 0x1f, 0x95, 0x47, 0x1c, 0x67 |
| +}; |
| +static const u8 enc_assoc065[] __initconst = { |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce065[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key065[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input066[] __initconst = { |
| + 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, |
| + 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, |
| + 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, |
| + 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41, |
| + 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01, |
| + 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4, |
| + 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a, |
| + 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42 |
| +}; |
| +static const u8 enc_output066[] __initconst = { |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x30, 0x08, 0x74, 0xbb, 0x06, 0x92, 0xb6, 0x89, |
| + 0xde, 0xad, 0x9a, 0xe1, 0x5b, 0x06, 0x73, 0x90 |
| +}; |
| +static const u8 enc_assoc066[] __initconst = { |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce066[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key066[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input067[] __initconst = { |
| + 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, |
| + 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, |
| + 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, |
| + 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41, |
| + 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01, |
| + 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4, |
| + 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a, |
| + 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42, |
| + 0x05, 0x25, 0xbb, 0xbd, 0x3d, 0x96, 0x40, 0x05, |
| + 0x2a, 0xd8, 0x0d, 0x8f, 0xd3, 0x09, 0x7a, 0xfd, |
| + 0xc8, 0xb3, 0xa5, 0x1d, 0x99, 0xf3, 0xfa, 0x7f, |
| + 0xe7, 0xe5, 0xb6, 0xc7, 0x3a, 0x6c, 0x6d, 0x3b, |
| + 0xe4, 0x4d, 0x0d, 0x7b, 0xc9, 0xb9, 0x10, 0x38, |
| + 0x8c, 0x0f, 0x4e, 0xc9, 0x62, 0x3c, 0xb7, 0x12, |
| + 0x08, 0x2c, 0xf4, 0x3a, 0x09, 0x6d, 0x12, 0xc7, |
| + 0x84, 0x53, 0xfe, 0x77, 0x47, 0xfb, 0x77, 0x38 |
| +}; |
| +static const u8 enc_output067[] __initconst = { |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x99, 0xca, 0xd8, 0x5f, 0x45, 0xca, 0x40, 0x94, |
| + 0x2d, 0x0d, 0x4d, 0x5e, 0x95, 0x0a, 0xde, 0x22 |
| +}; |
| +static const u8 enc_assoc067[] __initconst = { |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, |
| + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce067[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key067[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input068[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, |
| + 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, |
| + 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, |
| + 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41 |
| +}; |
| +static const u8 enc_output068[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x8b, 0xbe, 0x14, 0x52, 0x72, 0xe7, 0xc2, 0xd9, |
| + 0xa1, 0x89, 0x1a, 0x3a, 0xb0, 0x98, 0x3d, 0x9d |
| +}; |
| +static const u8 enc_assoc068[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce068[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key068[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input069[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, |
| + 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, |
| + 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, |
| + 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41, |
| + 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01, |
| + 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4, |
| + 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a, |
| + 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42 |
| +}; |
| +static const u8 enc_output069[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x3b, 0x41, 0x86, 0x19, 0x13, 0xa8, 0xf6, 0xde, |
| + 0x7f, 0x61, 0xe2, 0x25, 0x63, 0x1b, 0xc3, 0x82 |
| +}; |
| +static const u8 enc_assoc069[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce069[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key069[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input070[] __initconst = { |
| + 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, |
| + 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, |
| + 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, |
| + 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41, |
| + 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01, |
| + 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4, |
| + 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a, |
| + 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42, |
| + 0x7a, 0xda, 0x44, 0x42, 0xbd, 0x96, 0x40, 0x05, |
| + 0x55, 0x27, 0xf2, 0x70, 0x53, 0x09, 0x7a, 0xfd, |
| + 0xb7, 0x4c, 0x5a, 0xe2, 0x19, 0xf3, 0xfa, 0x7f, |
| + 0x98, 0x1a, 0x49, 0x38, 0xba, 0x6c, 0x6d, 0x3b, |
| + 0x9b, 0xb2, 0xf2, 0x84, 0x49, 0xb9, 0x10, 0x38, |
| + 0xf3, 0xf0, 0xb1, 0x36, 0xe2, 0x3c, 0xb7, 0x12, |
| + 0x77, 0xd3, 0x0b, 0xc5, 0x89, 0x6d, 0x12, 0xc7, |
| + 0xfb, 0xac, 0x01, 0x88, 0xc7, 0xfb, 0x77, 0x38 |
| +}; |
| +static const u8 enc_output070[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x84, 0x28, 0xbc, 0xf0, 0x23, 0xec, 0x6b, 0xf3, |
| + 0x1f, 0xd9, 0xef, 0xb2, 0x03, 0xff, 0x08, 0x71 |
| +}; |
| +static const u8 enc_assoc070[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce070[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key070[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input071[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, |
| + 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, |
| + 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, |
| + 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe |
| +}; |
| +static const u8 enc_output071[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0x13, 0x9f, 0xdf, 0x64, 0x74, 0xea, 0x24, 0xf5, |
| + 0x49, 0xb0, 0x75, 0x82, 0x5f, 0x2c, 0x76, 0x20 |
| +}; |
| +static const u8 enc_assoc071[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_nonce071[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key071[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input072[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, |
| + 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, |
| + 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, |
| + 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe, |
| + 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe, |
| + 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b, |
| + 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5, |
| + 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd |
| +}; |
| +static const u8 enc_output072[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xbb, 0xad, 0x8d, 0x86, 0x3b, 0x83, 0x5a, 0x8e, |
| + 0x86, 0x64, 0xfd, 0x1d, 0x45, 0x66, 0xb6, 0xb4 |
| +}; |
| +static const u8 enc_assoc072[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_nonce072[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key072[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input073[] __initconst = { |
| + 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, |
| + 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, |
| + 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, |
| + 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe, |
| + 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe, |
| + 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b, |
| + 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5, |
| + 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd, |
| + 0x85, 0x25, 0xbb, 0xbd, 0x42, 0x69, 0xbf, 0xfa, |
| + 0xaa, 0xd8, 0x0d, 0x8f, 0xac, 0xf6, 0x85, 0x02, |
| + 0x48, 0xb3, 0xa5, 0x1d, 0xe6, 0x0c, 0x05, 0x80, |
| + 0x67, 0xe5, 0xb6, 0xc7, 0x45, 0x93, 0x92, 0xc4, |
| + 0x64, 0x4d, 0x0d, 0x7b, 0xb6, 0x46, 0xef, 0xc7, |
| + 0x0c, 0x0f, 0x4e, 0xc9, 0x1d, 0xc3, 0x48, 0xed, |
| + 0x88, 0x2c, 0xf4, 0x3a, 0x76, 0x92, 0xed, 0x38, |
| + 0x04, 0x53, 0xfe, 0x77, 0x38, 0x04, 0x88, 0xc7 |
| +}; |
| +static const u8 enc_output073[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0x42, 0xf2, 0x35, 0x42, 0x97, 0x84, 0x9a, 0x51, |
| + 0x1d, 0x53, 0xe5, 0x57, 0x17, 0x72, 0xf7, 0x1f |
| +}; |
| +static const u8 enc_assoc073[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_nonce073[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 |
| +}; |
| +static const u8 enc_key073[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input076[] __initconst = { |
| + 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85, |
| + 0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02, |
| + 0xce, 0x03, 0xa0, 0xfa, 0xf5, 0x99, 0x2a, 0x09, |
| + 0x52, 0x2c, 0xdd, 0x12, 0x06, 0xd2, 0x20, 0xb8, |
| + 0xf8, 0xbd, 0x07, 0xd1, 0xf1, 0xf5, 0xa1, 0xbd, |
| + 0x9a, 0x71, 0xd1, 0x1c, 0x7f, 0x57, 0x9b, 0x85, |
| + 0x58, 0x18, 0xc0, 0x8d, 0x4d, 0xe0, 0x36, 0x39, |
| + 0x31, 0x83, 0xb7, 0xf5, 0x90, 0xb3, 0x35, 0xae, |
| + 0xd8, 0xde, 0x5b, 0x57, 0xb1, 0x3c, 0x5f, 0xed, |
| + 0xe2, 0x44, 0x1c, 0x3e, 0x18, 0x4a, 0xa9, 0xd4, |
| + 0x6e, 0x61, 0x59, 0x85, 0x06, 0xb3, 0xe1, 0x1c, |
| + 0x43, 0xc6, 0x2c, 0xbc, 0xac, 0xec, 0xed, 0x33, |
| + 0x19, 0x08, 0x75, 0xb0, 0x12, 0x21, 0x8b, 0x19, |
| + 0x30, 0xfb, 0x7c, 0x38, 0xec, 0x45, 0xac, 0x11, |
| + 0xc3, 0x53, 0xd0, 0xcf, 0x93, 0x8d, 0xcc, 0xb9, |
| + 0xef, 0xad, 0x8f, 0xed, 0xbe, 0x46, 0xda, 0xa5 |
| +}; |
| +static const u8 enc_output076[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x4b, 0x0b, 0xda, 0x8a, 0xd0, 0x43, 0x83, 0x0d, |
| + 0x83, 0x19, 0xab, 0x82, 0xc5, 0x0c, 0x76, 0x63 |
| +}; |
| +static const u8 enc_assoc076[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce076[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xb4, 0xf0 |
| +}; |
| +static const u8 enc_key076[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input077[] __initconst = { |
| + 0x86, 0xcb, 0xac, 0xae, 0x4d, 0x3f, 0x74, 0xae, |
| + 0x01, 0x21, 0x3e, 0x05, 0x51, 0xcc, 0x15, 0x16, |
| + 0x0e, 0xa1, 0xbe, 0x84, 0x08, 0xe3, 0xd5, 0xd7, |
| + 0x4f, 0x01, 0x46, 0x49, 0x95, 0xa6, 0x9e, 0x61, |
| + 0x76, 0xcb, 0x9e, 0x02, 0xb2, 0x24, 0x7e, 0xd2, |
| + 0x99, 0x89, 0x2f, 0x91, 0x82, 0xa4, 0x5c, 0xaf, |
| + 0x4c, 0x69, 0x40, 0x56, 0x11, 0x76, 0x6e, 0xdf, |
| + 0xaf, 0xdc, 0x28, 0x55, 0x19, 0xea, 0x30, 0x48, |
| + 0x0c, 0x44, 0xf0, 0x5e, 0x78, 0x1e, 0xac, 0xf8, |
| + 0xfc, 0xec, 0xc7, 0x09, 0x0a, 0xbb, 0x28, 0xfa, |
| + 0x5f, 0xd5, 0x85, 0xac, 0x8c, 0xda, 0x7e, 0x87, |
| + 0x72, 0xe5, 0x94, 0xe4, 0xce, 0x6c, 0x88, 0x32, |
| + 0x81, 0x93, 0x2e, 0x0f, 0x89, 0xf8, 0x77, 0xa1, |
| + 0xf0, 0x4d, 0x9c, 0x32, 0xb0, 0x6c, 0xf9, 0x0b, |
| + 0x0e, 0x76, 0x2b, 0x43, 0x0c, 0x4d, 0x51, 0x7c, |
| + 0x97, 0x10, 0x70, 0x68, 0xf4, 0x98, 0xef, 0x7f |
| +}; |
| +static const u8 enc_output077[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x4b, 0xc9, 0x8f, 0x72, 0xc4, 0x94, 0xc2, 0xa4, |
| + 0x3c, 0x2b, 0x15, 0xa1, 0x04, 0x3f, 0x1c, 0xfa |
| +}; |
| +static const u8 enc_assoc077[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce077[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xfb, 0x66 |
| +}; |
| +static const u8 enc_key077[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input078[] __initconst = { |
| + 0xfa, 0xb1, 0xcd, 0xdf, 0x4f, 0xe1, 0x98, 0xef, |
| + 0x63, 0xad, 0xd8, 0x81, 0xd6, 0xea, 0xd6, 0xc5, |
| + 0x76, 0x37, 0xbb, 0xe9, 0x20, 0x18, 0xca, 0x7c, |
| + 0x0b, 0x96, 0xfb, 0xa0, 0x87, 0x1e, 0x93, 0x2d, |
| + 0xb1, 0xfb, 0xf9, 0x07, 0x61, 0xbe, 0x25, 0xdf, |
| + 0x8d, 0xfa, 0xf9, 0x31, 0xce, 0x57, 0x57, 0xe6, |
| + 0x17, 0xb3, 0xd7, 0xa9, 0xf0, 0xbf, 0x0f, 0xfe, |
| + 0x5d, 0x59, 0x1a, 0x33, 0xc1, 0x43, 0xb8, 0xf5, |
| + 0x3f, 0xd0, 0xb5, 0xa1, 0x96, 0x09, 0xfd, 0x62, |
| + 0xe5, 0xc2, 0x51, 0xa4, 0x28, 0x1a, 0x20, 0x0c, |
| + 0xfd, 0xc3, 0x4f, 0x28, 0x17, 0x10, 0x40, 0x6f, |
| + 0x4e, 0x37, 0x62, 0x54, 0x46, 0xff, 0x6e, 0xf2, |
| + 0x24, 0x91, 0x3d, 0xeb, 0x0d, 0x89, 0xaf, 0x33, |
| + 0x71, 0x28, 0xe3, 0xd1, 0x55, 0xd1, 0x6d, 0x3e, |
| + 0xc3, 0x24, 0x60, 0x41, 0x43, 0x21, 0x43, 0xe9, |
| + 0xab, 0x3a, 0x6d, 0x2c, 0xcc, 0x2f, 0x4d, 0x62 |
| +}; |
| +static const u8 enc_output078[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xf7, 0xe9, 0xe1, 0x51, 0xb0, 0x25, 0x33, 0xc7, |
| + 0x46, 0x58, 0xbf, 0xc7, 0x73, 0x7c, 0x68, 0x0d |
| +}; |
| +static const u8 enc_assoc078[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce078[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xbb, 0x90 |
| +}; |
| +static const u8 enc_key078[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input079[] __initconst = { |
| + 0x22, 0x72, 0x02, 0xbe, 0x7f, 0x35, 0x15, 0xe9, |
| + 0xd1, 0xc0, 0x2e, 0xea, 0x2f, 0x19, 0x50, 0xb6, |
| + 0x48, 0x1b, 0x04, 0x8a, 0x4c, 0x91, 0x50, 0x6c, |
| + 0xb4, 0x0d, 0x50, 0x4e, 0x6c, 0x94, 0x9f, 0x82, |
| + 0xd1, 0x97, 0xc2, 0x5a, 0xd1, 0x7d, 0xc7, 0x21, |
| + 0x65, 0x11, 0x25, 0x78, 0x2a, 0xc7, 0xa7, 0x12, |
| + 0x47, 0xfe, 0xae, 0xf3, 0x2f, 0x1f, 0x25, 0x0c, |
| + 0xe4, 0xbb, 0x8f, 0x79, 0xac, 0xaa, 0x17, 0x9d, |
| + 0x45, 0xa7, 0xb0, 0x54, 0x5f, 0x09, 0x24, 0x32, |
| + 0x5e, 0xfa, 0x87, 0xd5, 0xe4, 0x41, 0xd2, 0x84, |
| + 0x78, 0xc6, 0x1f, 0x22, 0x23, 0xee, 0x67, 0xc3, |
| + 0xb4, 0x1f, 0x43, 0x94, 0x53, 0x5e, 0x2a, 0x24, |
| + 0x36, 0x9a, 0x2e, 0x16, 0x61, 0x3c, 0x45, 0x94, |
| + 0x90, 0xc1, 0x4f, 0xb1, 0xd7, 0x55, 0xfe, 0x53, |
| + 0xfb, 0xe1, 0xee, 0x45, 0xb1, 0xb2, 0x1f, 0x71, |
| + 0x62, 0xe2, 0xfc, 0xaa, 0x74, 0x2a, 0xbe, 0xfd |
| +}; |
| +static const u8 enc_output079[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x79, 0x5b, 0xcf, 0xf6, 0x47, 0xc5, 0x53, 0xc2, |
| + 0xe4, 0xeb, 0x6e, 0x0e, 0xaf, 0xd9, 0xe0, 0x4e |
| +}; |
| +static const u8 enc_assoc079[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce079[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x48, 0x4a |
| +}; |
| +static const u8 enc_key079[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input080[] __initconst = { |
| + 0xfa, 0xe5, 0x83, 0x45, 0xc1, 0x6c, 0xb0, 0xf5, |
| + 0xcc, 0x53, 0x7f, 0x2b, 0x1b, 0x34, 0x69, 0xc9, |
| + 0x69, 0x46, 0x3b, 0x3e, 0xa7, 0x1b, 0xcf, 0x6b, |
| + 0x98, 0xd6, 0x69, 0xa8, 0xe6, 0x0e, 0x04, 0xfc, |
| + 0x08, 0xd5, 0xfd, 0x06, 0x9c, 0x36, 0x26, 0x38, |
| + 0xe3, 0x40, 0x0e, 0xf4, 0xcb, 0x24, 0x2e, 0x27, |
| + 0xe2, 0x24, 0x5e, 0x68, 0xcb, 0x9e, 0xc5, 0x83, |
| + 0xda, 0x53, 0x40, 0xb1, 0x2e, 0xdf, 0x42, 0x3b, |
| + 0x73, 0x26, 0xad, 0x20, 0xfe, 0xeb, 0x57, 0xda, |
| + 0xca, 0x2e, 0x04, 0x67, 0xa3, 0x28, 0x99, 0xb4, |
| + 0x2d, 0xf8, 0xe5, 0x6d, 0x84, 0xe0, 0x06, 0xbc, |
| + 0x8a, 0x7a, 0xcc, 0x73, 0x1e, 0x7c, 0x1f, 0x6b, |
| + 0xec, 0xb5, 0x71, 0x9f, 0x70, 0x77, 0xf0, 0xd4, |
| + 0xf4, 0xc6, 0x1a, 0xb1, 0x1e, 0xba, 0xc1, 0x00, |
| + 0x18, 0x01, 0xce, 0x33, 0xc4, 0xe4, 0xa7, 0x7d, |
| + 0x83, 0x1d, 0x3c, 0xe3, 0x4e, 0x84, 0x10, 0xe1 |
| +}; |
| +static const u8 enc_output080[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x19, 0x46, 0xd6, 0x53, 0x96, 0x0f, 0x94, 0x7a, |
| + 0x74, 0xd3, 0xe8, 0x09, 0x3c, 0xf4, 0x85, 0x02 |
| +}; |
| +static const u8 enc_assoc080[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce080[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x2f, 0x40 |
| +}; |
| +static const u8 enc_key080[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input081[] __initconst = { |
| + 0xeb, 0xb2, 0x16, 0xdd, 0xd7, 0xca, 0x70, 0x92, |
| + 0x15, 0xf5, 0x03, 0xdf, 0x9c, 0xe6, 0x3c, 0x5c, |
| + 0xd2, 0x19, 0x4e, 0x7d, 0x90, 0x99, 0xe8, 0xa9, |
| + 0x0b, 0x2a, 0xfa, 0xad, 0x5e, 0xba, 0x35, 0x06, |
| + 0x99, 0x25, 0xa6, 0x03, 0xfd, 0xbc, 0x34, 0x1a, |
| + 0xae, 0xd4, 0x15, 0x05, 0xb1, 0x09, 0x41, 0xfa, |
| + 0x38, 0x56, 0xa7, 0xe2, 0x47, 0xb1, 0x04, 0x07, |
| + 0x09, 0x74, 0x6c, 0xfc, 0x20, 0x96, 0xca, 0xa6, |
| + 0x31, 0xb2, 0xff, 0xf4, 0x1c, 0x25, 0x05, 0x06, |
| + 0xd8, 0x89, 0xc1, 0xc9, 0x06, 0x71, 0xad, 0xe8, |
| + 0x53, 0xee, 0x63, 0x94, 0xc1, 0x91, 0x92, 0xa5, |
| + 0xcf, 0x37, 0x10, 0xd1, 0x07, 0x30, 0x99, 0xe5, |
| + 0xbc, 0x94, 0x65, 0x82, 0xfc, 0x0f, 0xab, 0x9f, |
| + 0x54, 0x3c, 0x71, 0x6a, 0xe2, 0x48, 0x6a, 0x86, |
| + 0x83, 0xfd, 0xca, 0x39, 0xd2, 0xe1, 0x4f, 0x23, |
| + 0xd0, 0x0a, 0x58, 0x26, 0x64, 0xf4, 0xec, 0xb1 |
| +}; |
| +static const u8 enc_output081[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x36, 0xc3, 0x00, 0x29, 0x85, 0xdd, 0x21, 0xba, |
| + 0xf8, 0x95, 0xd6, 0x33, 0x57, 0x3f, 0x12, 0xc0 |
| +}; |
| +static const u8 enc_assoc081[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce081[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x93, 0x35 |
| +}; |
| +static const u8 enc_key081[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input082[] __initconst = { |
| + 0x40, 0x8a, 0xe6, 0xef, 0x1c, 0x7e, 0xf0, 0xfb, |
| + 0x2c, 0x2d, 0x61, 0x08, 0x16, 0xfc, 0x78, 0x49, |
| + 0xef, 0xa5, 0x8f, 0x78, 0x27, 0x3f, 0x5f, 0x16, |
| + 0x6e, 0xa6, 0x5f, 0x81, 0xb5, 0x75, 0x74, 0x7d, |
| + 0x03, 0x5b, 0x30, 0x40, 0xfe, 0xde, 0x1e, 0xb9, |
| + 0x45, 0x97, 0x88, 0x66, 0x97, 0x88, 0x40, 0x8e, |
| + 0x00, 0x41, 0x3b, 0x3e, 0x37, 0x6d, 0x15, 0x2d, |
| + 0x20, 0x4a, 0xa2, 0xb7, 0xa8, 0x35, 0x58, 0xfc, |
| + 0xd4, 0x8a, 0x0e, 0xf7, 0xa2, 0x6b, 0x1c, 0xd6, |
| + 0xd3, 0x5d, 0x23, 0xb3, 0xf5, 0xdf, 0xe0, 0xca, |
| + 0x77, 0xa4, 0xce, 0x32, 0xb9, 0x4a, 0xbf, 0x83, |
| + 0xda, 0x2a, 0xef, 0xca, 0xf0, 0x68, 0x38, 0x08, |
| + 0x79, 0xe8, 0x9f, 0xb0, 0xa3, 0x82, 0x95, 0x95, |
| + 0xcf, 0x44, 0xc3, 0x85, 0x2a, 0xe2, 0xcc, 0x66, |
| + 0x2b, 0x68, 0x9f, 0x93, 0x55, 0xd9, 0xc1, 0x83, |
| + 0x80, 0x1f, 0x6a, 0xcc, 0x31, 0x3f, 0x89, 0x07 |
| +}; |
| +static const u8 enc_output082[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x65, 0x14, 0x51, 0x8e, 0x0a, 0x26, 0x41, 0x42, |
| + 0xe0, 0xb7, 0x35, 0x1f, 0x96, 0x7f, 0xc2, 0xae |
| +}; |
| +static const u8 enc_assoc082[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce082[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf7, 0xd5 |
| +}; |
| +static const u8 enc_key082[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input083[] __initconst = { |
| + 0x0a, 0x0a, 0x24, 0x49, 0x9b, 0xca, 0xde, 0x58, |
| + 0xcf, 0x15, 0x76, 0xc3, 0x12, 0xac, 0xa9, 0x84, |
| + 0x71, 0x8c, 0xb4, 0xcc, 0x7e, 0x01, 0x53, 0xf5, |
| + 0xa9, 0x01, 0x58, 0x10, 0x85, 0x96, 0x44, 0xdf, |
| + 0xc0, 0x21, 0x17, 0x4e, 0x0b, 0x06, 0x0a, 0x39, |
| + 0x74, 0x48, 0xde, 0x8b, 0x48, 0x4a, 0x86, 0x03, |
| + 0xbe, 0x68, 0x0a, 0x69, 0x34, 0xc0, 0x90, 0x6f, |
| + 0x30, 0xdd, 0x17, 0xea, 0xe2, 0xd4, 0xc5, 0xfa, |
| + 0xa7, 0x77, 0xf8, 0xca, 0x53, 0x37, 0x0e, 0x08, |
| + 0x33, 0x1b, 0x88, 0xc3, 0x42, 0xba, 0xc9, 0x59, |
| + 0x78, 0x7b, 0xbb, 0x33, 0x93, 0x0e, 0x3b, 0x56, |
| + 0xbe, 0x86, 0xda, 0x7f, 0x2a, 0x6e, 0xb1, 0xf9, |
| + 0x40, 0x89, 0xd1, 0xd1, 0x81, 0x07, 0x4d, 0x43, |
| + 0x02, 0xf8, 0xe0, 0x55, 0x2d, 0x0d, 0xe1, 0xfa, |
| + 0xb3, 0x06, 0xa2, 0x1b, 0x42, 0xd4, 0xc3, 0xba, |
| + 0x6e, 0x6f, 0x0c, 0xbc, 0xc8, 0x1e, 0x87, 0x7a |
| +}; |
| +static const u8 enc_output083[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x4c, 0x19, 0x4d, 0xa6, 0xa9, 0x9f, 0xd6, 0x5b, |
| + 0x40, 0xe9, 0xca, 0xd7, 0x98, 0xf4, 0x4b, 0x19 |
| +}; |
| +static const u8 enc_assoc083[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce083[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xfc, 0xe4 |
| +}; |
| +static const u8 enc_key083[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input084[] __initconst = { |
| + 0x4a, 0x0a, 0xaf, 0xf8, 0x49, 0x47, 0x29, 0x18, |
| + 0x86, 0x91, 0x70, 0x13, 0x40, 0xf3, 0xce, 0x2b, |
| + 0x8a, 0x78, 0xee, 0xd3, 0xa0, 0xf0, 0x65, 0x99, |
| + 0x4b, 0x72, 0x48, 0x4e, 0x79, 0x91, 0xd2, 0x5c, |
| + 0x29, 0xaa, 0x07, 0x5e, 0xb1, 0xfc, 0x16, 0xde, |
| + 0x93, 0xfe, 0x06, 0x90, 0x58, 0x11, 0x2a, 0xb2, |
| + 0x84, 0xa3, 0xed, 0x18, 0x78, 0x03, 0x26, 0xd1, |
| + 0x25, 0x8a, 0x47, 0x22, 0x2f, 0xa6, 0x33, 0xd8, |
| + 0xb2, 0x9f, 0x3b, 0xd9, 0x15, 0x0b, 0x23, 0x9b, |
| + 0x15, 0x46, 0xc2, 0xbb, 0x9b, 0x9f, 0x41, 0x0f, |
| + 0xeb, 0xea, 0xd3, 0x96, 0x00, 0x0e, 0xe4, 0x77, |
| + 0x70, 0x15, 0x32, 0xc3, 0xd0, 0xf5, 0xfb, 0xf8, |
| + 0x95, 0xd2, 0x80, 0x19, 0x6d, 0x2f, 0x73, 0x7c, |
| + 0x5e, 0x9f, 0xec, 0x50, 0xd9, 0x2b, 0xb0, 0xdf, |
| + 0x5d, 0x7e, 0x51, 0x3b, 0xe5, 0xb8, 0xea, 0x97, |
| + 0x13, 0x10, 0xd5, 0xbf, 0x16, 0xba, 0x7a, 0xee |
| +}; |
| +static const u8 enc_output084[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xc8, 0xae, 0x77, 0x88, 0xcd, 0x28, 0x74, 0xab, |
| + 0xc1, 0x38, 0x54, 0x1e, 0x11, 0xfd, 0x05, 0x87 |
| +}; |
| +static const u8 enc_assoc084[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce084[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x01, 0x84, 0x86, 0xa8 |
| +}; |
| +static const u8 enc_key084[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input085[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x78, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x9c, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0xd4, 0xd2, 0x06, 0x61, 0x6f, 0x92, 0x93, 0xf6, |
| + 0x5b, 0x45, 0xdb, 0xbc, 0x74, 0xe7, 0xc2, 0xed, |
| + 0xfb, 0xcb, 0xbf, 0x1c, 0xfb, 0x67, 0x9b, 0xb7, |
| + 0x39, 0xa5, 0x86, 0x2d, 0xe2, 0xbc, 0xb9, 0x37, |
| + 0xf7, 0x4d, 0x5b, 0xf8, 0x67, 0x1c, 0x5a, 0x8a, |
| + 0x50, 0x92, 0xf6, 0x1d, 0x54, 0xc9, 0xaa, 0x5b |
| +}; |
| +static const u8 enc_output085[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x93, 0x3a, 0x51, 0x63, 0xc7, 0xf6, 0x23, 0x68, |
| + 0x32, 0x7b, 0x3f, 0xbc, 0x10, 0x36, 0xc9, 0x43 |
| +}; |
| +static const u8 enc_assoc085[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce085[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key085[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input093[] __initconst = { |
| + 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d, |
| + 0x3d, 0xb7, 0x66, 0x4a, 0x34, 0xae, 0x6b, 0x44, |
| + 0x4d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x5b, 0x8b, 0x94, 0x50, 0x9e, 0x2b, 0x74, 0xa3, |
| + 0x6d, 0x34, 0x6e, 0x33, 0xd5, 0x72, 0x65, 0x9b, |
| + 0xa9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0x83, 0xdc, 0xe9, 0xf3, 0x07, 0x3e, 0xfa, 0xdb, |
| + 0x7d, 0x23, 0xb8, 0x7a, 0xce, 0x35, 0x16, 0x8c |
| +}; |
| +static const u8 enc_output093[] __initconst = { |
| + 0x00, 0x39, 0xe2, 0xfd, 0x2f, 0xd3, 0x12, 0x14, |
| + 0x9e, 0x98, 0x98, 0x80, 0x88, 0x48, 0x13, 0xe7, |
| + 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96, |
| + 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00, |
| + 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96, |
| + 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00, |
| + 0xa5, 0x19, 0xac, 0x1a, 0x35, 0xb4, 0xa5, 0x77, |
| + 0x87, 0x51, 0x0a, 0xf7, 0x8d, 0x8d, 0x20, 0x0a |
| +}; |
| +static const u8 enc_assoc093[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce093[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key093[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input094[] __initconst = { |
| + 0xd3, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xe5, 0xda, 0x78, 0x76, 0x6f, 0xa1, 0x92, 0x90, |
| + 0xc0, 0x31, 0xf7, 0x52, 0x08, 0x50, 0x67, 0x45, |
| + 0xae, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x49, 0x6d, 0xde, 0xb0, 0x55, 0x09, 0xc6, 0xef, |
| + 0xff, 0xab, 0x75, 0xeb, 0x2d, 0xf4, 0xab, 0x09, |
| + 0x76, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x01, 0x49, 0xef, 0x50, 0x4b, 0x71, 0xb1, 0x20, |
| + 0xca, 0x4f, 0xf3, 0x95, 0x19, 0xc2, 0xc2, 0x10 |
| +}; |
| +static const u8 enc_output094[] __initconst = { |
| + 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x62, 0x18, 0xb2, 0x7f, 0x83, 0xb8, 0xb4, 0x66, |
| + 0x02, 0xf6, 0xe1, 0xd8, 0x34, 0x20, 0x7b, 0x02, |
| + 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29, |
| + 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02, |
| + 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29, |
| + 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02, |
| + 0x30, 0x2f, 0xe8, 0x2a, 0xb0, 0xa0, 0x9a, 0xf6, |
| + 0x44, 0x00, 0xd0, 0x15, 0xae, 0x83, 0xd9, 0xcc |
| +}; |
| +static const u8 enc_assoc094[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce094[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key094[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input095[] __initconst = { |
| + 0xe9, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x6d, 0xf1, 0x39, 0x4e, 0xdc, 0x53, 0x9b, 0x5b, |
| + 0x3a, 0x09, 0x57, 0xbe, 0x0f, 0xb8, 0x59, 0x46, |
| + 0x80, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0xd1, 0x76, 0x9f, 0xe8, 0x06, 0xbb, 0xfe, 0xb6, |
| + 0xf5, 0x90, 0x95, 0x0f, 0x2e, 0xac, 0x9e, 0x0a, |
| + 0x58, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x99, 0x52, 0xae, 0x08, 0x18, 0xc3, 0x89, 0x79, |
| + 0xc0, 0x74, 0x13, 0x71, 0x1a, 0x9a, 0xf7, 0x13 |
| +}; |
| +static const u8 enc_output095[] __initconst = { |
| + 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xea, 0x33, 0xf3, 0x47, 0x30, 0x4a, 0xbd, 0xad, |
| + 0xf8, 0xce, 0x41, 0x34, 0x33, 0xc8, 0x45, 0x01, |
| + 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70, |
| + 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01, |
| + 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70, |
| + 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01, |
| + 0x98, 0xa7, 0xe8, 0x36, 0xe0, 0xee, 0x4d, 0x02, |
| + 0x35, 0x00, 0xd0, 0x55, 0x7e, 0xc2, 0xcb, 0xe0 |
| +}; |
| +static const u8 enc_assoc095[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce095[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key095[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input096[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x64, 0xf9, 0x0f, 0x5b, 0x26, 0x92, 0xb8, 0x60, |
| + 0xd4, 0x59, 0x6f, 0xf4, 0xb3, 0x40, 0x2c, 0x5c, |
| + 0x00, 0xb9, 0xbb, 0x53, 0x70, 0x7a, 0xa6, 0x67, |
| + 0xd3, 0x56, 0xfe, 0x50, 0xc7, 0x19, 0x96, 0x94, |
| + 0x03, 0x35, 0x61, 0xe7, 0xca, 0xca, 0x6d, 0x94, |
| + 0x1d, 0xc3, 0xcd, 0x69, 0x14, 0xad, 0x69, 0x04 |
| +}; |
| +static const u8 enc_output096[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xe3, 0x3b, 0xc5, 0x52, 0xca, 0x8b, 0x9e, 0x96, |
| + 0x16, 0x9e, 0x79, 0x7e, 0x8f, 0x30, 0x30, 0x1b, |
| + 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52, |
| + 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f, |
| + 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52, |
| + 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f, |
| + 0x6a, 0xb8, 0xdc, 0xe2, 0xc5, 0x9d, 0xa4, 0x73, |
| + 0x71, 0x30, 0xb0, 0x25, 0x2f, 0x68, 0xa8, 0xd8 |
| +}; |
| +static const u8 enc_assoc096[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce096[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key096[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input097[] __initconst = { |
| + 0x68, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xb0, 0x8f, 0x25, 0x67, 0x5b, 0x9b, 0xcb, 0xf6, |
| + 0xe3, 0x84, 0x07, 0xde, 0x2e, 0xc7, 0x5a, 0x47, |
| + 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x2d, 0x2a, 0xf7, 0xcd, 0x6b, 0x08, 0x05, 0x01, |
| + 0xd3, 0x1b, 0xa5, 0x4f, 0xb2, 0xeb, 0x75, 0x96, |
| + 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x65, 0x0e, 0xc6, 0x2d, 0x75, 0x70, 0x72, 0xce, |
| + 0xe6, 0xff, 0x23, 0x31, 0x86, 0xdd, 0x1c, 0x8f |
| +}; |
| +static const u8 enc_output097[] __initconst = { |
| + 0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x37, 0x4d, 0xef, 0x6e, 0xb7, 0x82, 0xed, 0x00, |
| + 0x21, 0x43, 0x11, 0x54, 0x12, 0xb7, 0x46, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7, |
| + 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7, |
| + 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d, |
| + 0x04, 0x4d, 0xea, 0x60, 0x88, 0x80, 0x41, 0x2b, |
| + 0xfd, 0xff, 0xcf, 0x35, 0x57, 0x9e, 0x9b, 0x26 |
| +}; |
| +static const u8 enc_assoc097[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce097[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key097[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input098[] __initconst = { |
| + 0x6d, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xa1, 0x61, 0xb5, 0xab, 0x04, 0x09, 0x00, 0x62, |
| + 0x9e, 0xfe, 0xff, 0x78, 0xd7, 0xd8, 0x6b, 0x45, |
| + 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0xc6, 0xf8, 0x07, 0x8c, 0xc8, 0xef, 0x12, 0xa0, |
| + 0xff, 0x65, 0x7d, 0x6d, 0x08, 0xdb, 0x10, 0xb8, |
| + 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x8e, 0xdc, 0x36, 0x6c, 0xd6, 0x97, 0x65, 0x6f, |
| + 0xca, 0x81, 0xfb, 0x13, 0x3c, 0xed, 0x79, 0xa1 |
| +}; |
| +static const u8 enc_output098[] __initconst = { |
| + 0x6d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x26, 0xa3, 0x7f, 0xa2, 0xe8, 0x10, 0x26, 0x94, |
| + 0x5c, 0x39, 0xe9, 0xf2, 0xeb, 0xa8, 0x77, 0x02, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66, |
| + 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66, |
| + 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3, |
| + 0x1e, 0x6b, 0xea, 0x63, 0x14, 0x54, 0x2e, 0x2e, |
| + 0xf9, 0xff, 0xcf, 0x45, 0x0b, 0x2e, 0x98, 0x2b |
| +}; |
| +static const u8 enc_assoc098[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce098[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key098[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input099[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xfc, 0x01, 0xb8, 0x91, 0xe5, 0xf0, 0xf9, 0x12, |
| + 0x8d, 0x7d, 0x1c, 0x57, 0x91, 0x92, 0xb6, 0x98, |
| + 0x63, 0x41, 0x44, 0x15, 0xb6, 0x99, 0x68, 0x95, |
| + 0x9a, 0x72, 0x91, 0xb7, 0xa5, 0xaf, 0x13, 0x48, |
| + 0x60, 0xcd, 0x9e, 0xa1, 0x0c, 0x29, 0xa3, 0x66, |
| + 0x54, 0xe7, 0xa2, 0x8e, 0x76, 0x1b, 0xec, 0xd8 |
| +}; |
| +static const u8 enc_output099[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x7b, 0xc3, 0x72, 0x98, 0x09, 0xe9, 0xdf, 0xe4, |
| + 0x4f, 0xba, 0x0a, 0xdd, 0xad, 0xe2, 0xaa, 0xdf, |
| + 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0, |
| + 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3, |
| + 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0, |
| + 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3, |
| + 0xed, 0x20, 0x17, 0xc8, 0xdb, 0xa4, 0x77, 0x56, |
| + 0x29, 0x04, 0x9d, 0x78, 0x6e, 0x3b, 0xce, 0xb1 |
| +}; |
| +static const u8 enc_assoc099[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce099[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key099[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input100[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x6b, 0x6d, 0xc9, 0xd2, 0x1a, 0x81, 0x9e, 0x70, |
| + 0xb5, 0x77, 0xf4, 0x41, 0x37, 0xd3, 0xd6, 0xbd, |
| + 0x13, 0x35, 0xf5, 0xeb, 0x44, 0x49, 0x40, 0x77, |
| + 0xb2, 0x64, 0x49, 0xa5, 0x4b, 0x6c, 0x7c, 0x75, |
| + 0x10, 0xb9, 0x2f, 0x5f, 0xfe, 0xf9, 0x8b, 0x84, |
| + 0x7c, 0xf1, 0x7a, 0x9c, 0x98, 0xd8, 0x83, 0xe5 |
| +}; |
| +static const u8 enc_output100[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xec, 0xaf, 0x03, 0xdb, 0xf6, 0x98, 0xb8, 0x86, |
| + 0x77, 0xb0, 0xe2, 0xcb, 0x0b, 0xa3, 0xca, 0xfa, |
| + 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42, |
| + 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee, |
| + 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42, |
| + 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee, |
| + 0x07, 0x3f, 0x17, 0xcb, 0x67, 0x78, 0x64, 0x59, |
| + 0x25, 0x04, 0x9d, 0x88, 0x22, 0xcb, 0xca, 0xb6 |
| +}; |
| +static const u8 enc_assoc100[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce100[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key100[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input101[] __initconst = { |
| + 0xff, 0xcb, 0x2b, 0x11, 0x06, 0xf8, 0x23, 0x4c, |
| + 0x5e, 0x99, 0xd4, 0xdb, 0x4c, 0x70, 0x48, 0xde, |
| + 0x32, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x16, 0xe9, 0x88, 0x4a, 0x11, 0x4f, 0x0e, 0x92, |
| + 0x66, 0xce, 0xa3, 0x88, 0x5f, 0xe3, 0x6b, 0x9f, |
| + 0xd6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0xce, 0xbe, 0xf5, 0xe9, 0x88, 0x5a, 0x80, 0xea, |
| + 0x76, 0xd9, 0x75, 0xc1, 0x44, 0xa4, 0x18, 0x88 |
| +}; |
| +static const u8 enc_output101[] __initconst = { |
| + 0xff, 0xa0, 0xfc, 0x3e, 0x80, 0x32, 0xc3, 0xd5, |
| + 0xfd, 0xb6, 0x2a, 0x11, 0xf0, 0x96, 0x30, 0x7d, |
| + 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7, |
| + 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04, |
| + 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7, |
| + 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04, |
| + 0x8b, 0x9b, 0xb4, 0xb4, 0x86, 0x12, 0x89, 0x65, |
| + 0x8c, 0x69, 0x6a, 0x83, 0x40, 0x15, 0x04, 0x05 |
| +}; |
| +static const u8 enc_assoc101[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce101[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key101[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input102[] __initconst = { |
| + 0x6f, 0x9e, 0x70, 0xed, 0x3b, 0x8b, 0xac, 0xa0, |
| + 0x26, 0xe4, 0x6a, 0x5a, 0x09, 0x43, 0x15, 0x8d, |
| + 0x21, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x0c, 0x61, 0x2c, 0x5e, 0x8d, 0x89, 0xa8, 0x73, |
| + 0xdb, 0xca, 0xad, 0x5b, 0x73, 0x46, 0x42, 0x9b, |
| + 0xc5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0xd4, 0x36, 0x51, 0xfd, 0x14, 0x9c, 0x26, 0x0b, |
| + 0xcb, 0xdd, 0x7b, 0x12, 0x68, 0x01, 0x31, 0x8c |
| +}; |
| +static const u8 enc_output102[] __initconst = { |
| + 0x6f, 0xf5, 0xa7, 0xc2, 0xbd, 0x41, 0x4c, 0x39, |
| + 0x85, 0xcb, 0x94, 0x90, 0xb5, 0xa5, 0x6d, 0x2e, |
| + 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46, |
| + 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00, |
| + 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46, |
| + 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00, |
| + 0x8b, 0x3b, 0xbd, 0x51, 0x64, 0x44, 0x59, 0x56, |
| + 0x8d, 0x81, 0xca, 0x1f, 0xa7, 0x2c, 0xe4, 0x04 |
| +}; |
| +static const u8 enc_assoc102[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce102[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key102[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input103[] __initconst = { |
| + 0x41, 0x2b, 0x08, 0x0a, 0x3e, 0x19, 0xc1, 0x0d, |
| + 0x44, 0xa1, 0xaf, 0x1e, 0xab, 0xde, 0xb4, 0xce, |
| + 0x35, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x6b, 0x83, 0x94, 0x33, 0x09, 0x21, 0x48, 0x6c, |
| + 0xa1, 0x1d, 0x29, 0x1c, 0x3e, 0x97, 0xee, 0x9a, |
| + 0xd1, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0xb3, 0xd4, 0xe9, 0x90, 0x90, 0x34, 0xc6, 0x14, |
| + 0xb1, 0x0a, 0xff, 0x55, 0x25, 0xd0, 0x9d, 0x8d |
| +}; |
| +static const u8 enc_output103[] __initconst = { |
| + 0x41, 0x40, 0xdf, 0x25, 0xb8, 0xd3, 0x21, 0x94, |
| + 0xe7, 0x8e, 0x51, 0xd4, 0x17, 0x38, 0xcc, 0x6d, |
| + 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59, |
| + 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01, |
| + 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59, |
| + 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01, |
| + 0x86, 0xfb, 0xab, 0x2b, 0x4a, 0x94, 0xf4, 0x7a, |
| + 0xa5, 0x6f, 0x0a, 0xea, 0x65, 0xd1, 0x10, 0x08 |
| +}; |
| +static const u8 enc_assoc103[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce103[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key103[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input104[] __initconst = { |
| + 0xb2, 0x47, 0xa7, 0x47, 0x23, 0x49, 0x1a, 0xac, |
| + 0xac, 0xaa, 0xd7, 0x09, 0xc9, 0x1e, 0x93, 0x2b, |
| + 0x31, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x9a, 0xde, 0x04, 0xe7, 0x5b, 0xb7, 0x01, 0xd9, |
| + 0x66, 0x06, 0x01, 0xb3, 0x47, 0x65, 0xde, 0x98, |
| + 0xd5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0x42, 0x89, 0x79, 0x44, 0xc2, 0xa2, 0x8f, 0xa1, |
| + 0x76, 0x11, 0xd7, 0xfa, 0x5c, 0x22, 0xad, 0x8f |
| +}; |
| +static const u8 enc_output104[] __initconst = { |
| + 0xb2, 0x2c, 0x70, 0x68, 0xa5, 0x83, 0xfa, 0x35, |
| + 0x0f, 0x85, 0x29, 0xc3, 0x75, 0xf8, 0xeb, 0x88, |
| + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec, |
| + 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03, |
| + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec, |
| + 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03, |
| + 0xa0, 0x19, 0xac, 0x2e, 0xd6, 0x67, 0xe1, 0x7d, |
| + 0xa1, 0x6f, 0x0a, 0xfa, 0x19, 0x61, 0x0d, 0x0d |
| +}; |
| +static const u8 enc_assoc104[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce104[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key104[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input105[] __initconst = { |
| + 0x74, 0x0f, 0x9e, 0x49, 0xf6, 0x10, 0xef, 0xa5, |
| + 0x85, 0xb6, 0x59, 0xca, 0x6e, 0xd8, 0xb4, 0x99, |
| + 0x2d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x41, 0x2d, 0x96, 0xaf, 0xbe, 0x80, 0xec, 0x3e, |
| + 0x79, 0xd4, 0x51, 0xb0, 0x0a, 0x2d, 0xb2, 0x9a, |
| + 0xc9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0x99, 0x7a, 0xeb, 0x0c, 0x27, 0x95, 0x62, 0x46, |
| + 0x69, 0xc3, 0x87, 0xf9, 0x11, 0x6a, 0xc1, 0x8d |
| +}; |
| +static const u8 enc_output105[] __initconst = { |
| + 0x74, 0x64, 0x49, 0x66, 0x70, 0xda, 0x0f, 0x3c, |
| + 0x26, 0x99, 0xa7, 0x00, 0xd2, 0x3e, 0xcc, 0x3a, |
| + 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b, |
| + 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01, |
| + 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b, |
| + 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01, |
| + 0x73, 0x6e, 0x18, 0x18, 0x16, 0x96, 0xa5, 0x88, |
| + 0x9c, 0x31, 0x59, 0xfa, 0xab, 0xab, 0x20, 0xfd |
| +}; |
| +static const u8 enc_assoc105[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce105[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key105[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input106[] __initconst = { |
| + 0xad, 0xba, 0x5d, 0x10, 0x5b, 0xc8, 0xaa, 0x06, |
| + 0x2c, 0x23, 0x36, 0xcb, 0x88, 0x9d, 0xdb, 0xd5, |
| + 0x37, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x17, 0x7c, 0x5f, 0xfe, 0x28, 0x75, 0xf4, 0x68, |
| + 0xf6, 0xc2, 0x96, 0x57, 0x48, 0xf3, 0x59, 0x9a, |
| + 0xd3, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0xcf, 0x2b, 0x22, 0x5d, 0xb1, 0x60, 0x7a, 0x10, |
| + 0xe6, 0xd5, 0x40, 0x1e, 0x53, 0xb4, 0x2a, 0x8d |
| +}; |
| +static const u8 enc_output106[] __initconst = { |
| + 0xad, 0xd1, 0x8a, 0x3f, 0xdd, 0x02, 0x4a, 0x9f, |
| + 0x8f, 0x0c, 0xc8, 0x01, 0x34, 0x7b, 0xa3, 0x76, |
| + 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d, |
| + 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01, |
| + 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d, |
| + 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01, |
| + 0xba, 0xd5, 0x8f, 0x10, 0xa9, 0x1e, 0x6a, 0x88, |
| + 0x9a, 0xba, 0x32, 0xfd, 0x17, 0xd8, 0x33, 0x1a |
| +}; |
| +static const u8 enc_assoc106[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce106[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key106[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input107[] __initconst = { |
| + 0xfe, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xc0, 0x01, 0xed, 0xc5, 0xda, 0x44, 0x2e, 0x71, |
| + 0x9b, 0xce, 0x9a, 0xbe, 0x27, 0x3a, 0xf1, 0x44, |
| + 0xb4, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x48, 0x02, 0x5f, 0x41, 0xfa, 0x4e, 0x33, 0x6c, |
| + 0x78, 0x69, 0x57, 0xa2, 0xa7, 0xc4, 0x93, 0x0a, |
| + 0x6c, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x00, 0x26, 0x6e, 0xa1, 0xe4, 0x36, 0x44, 0xa3, |
| + 0x4d, 0x8d, 0xd1, 0xdc, 0x93, 0xf2, 0xfa, 0x13 |
| +}; |
| +static const u8 enc_output107[] __initconst = { |
| + 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x47, 0xc3, 0x27, 0xcc, 0x36, 0x5d, 0x08, 0x87, |
| + 0x59, 0x09, 0x8c, 0x34, 0x1b, 0x4a, 0xed, 0x03, |
| + 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa, |
| + 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01, |
| + 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa, |
| + 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01, |
| + 0xd6, 0x8c, 0xe1, 0x74, 0x07, 0x9a, 0xdd, 0x02, |
| + 0x8d, 0xd0, 0x5c, 0xf8, 0x14, 0x63, 0x04, 0x88 |
| +}; |
| +static const u8 enc_assoc107[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce107[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key107[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input108[] __initconst = { |
| + 0xb5, 0x13, 0xb0, 0x6a, 0xb9, 0xac, 0x14, 0x43, |
| + 0x5a, 0xcb, 0x8a, 0xa3, 0xa3, 0x7a, 0xfd, 0xb6, |
| + 0x54, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x61, 0x95, 0x01, 0x93, 0xb1, 0xbf, 0x03, 0x11, |
| + 0xff, 0x11, 0x79, 0x89, 0xae, 0xd9, 0xa9, 0x99, |
| + 0xb0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0xb9, 0xc2, 0x7c, 0x30, 0x28, 0xaa, 0x8d, 0x69, |
| + 0xef, 0x06, 0xaf, 0xc0, 0xb5, 0x9e, 0xda, 0x8e |
| +}; |
| +static const u8 enc_output108[] __initconst = { |
| + 0xb5, 0x78, 0x67, 0x45, 0x3f, 0x66, 0xf4, 0xda, |
| + 0xf9, 0xe4, 0x74, 0x69, 0x1f, 0x9c, 0x85, 0x15, |
| + 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24, |
| + 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02, |
| + 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24, |
| + 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02, |
| + 0xaa, 0x48, 0xa3, 0x88, 0x7d, 0x4b, 0x05, 0x96, |
| + 0x99, 0xc2, 0xfd, 0xf9, 0xc6, 0x78, 0x7e, 0x0a |
| +}; |
| +static const u8 enc_assoc108[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce108[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key108[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input109[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xd4, 0xf1, 0x09, 0xe8, 0x14, 0xce, 0xa8, 0x5a, |
| + 0x08, 0xc0, 0x11, 0xd8, 0x50, 0xdd, 0x1d, 0xcb, |
| + 0xcf, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x53, 0x40, 0xb8, 0x5a, 0x9a, 0xa0, 0x82, 0x96, |
| + 0xb7, 0x7a, 0x5f, 0xc3, 0x96, 0x1f, 0x66, 0x0f, |
| + 0x17, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x1b, 0x64, 0x89, 0xba, 0x84, 0xd8, 0xf5, 0x59, |
| + 0x82, 0x9e, 0xd9, 0xbd, 0xa2, 0x29, 0x0f, 0x16 |
| +}; |
| +static const u8 enc_output109[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x53, 0x33, 0xc3, 0xe1, 0xf8, 0xd7, 0x8e, 0xac, |
| + 0xca, 0x07, 0x07, 0x52, 0x6c, 0xad, 0x01, 0x8c, |
| + 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50, |
| + 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04, |
| + 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50, |
| + 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04, |
| + 0xb9, 0x36, 0xa8, 0x17, 0xf2, 0x21, 0x1a, 0xf1, |
| + 0x29, 0xe2, 0xcf, 0x16, 0x0f, 0xd4, 0x2b, 0xcb |
| +}; |
| +static const u8 enc_assoc109[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce109[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key109[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input110[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xdf, 0x4c, 0x62, 0x03, 0x2d, 0x41, 0x19, 0xb5, |
| + 0x88, 0x47, 0x7e, 0x99, 0x92, 0x5a, 0x56, 0xd9, |
| + 0xd6, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0xfa, 0x84, 0xf0, 0x64, 0x55, 0x36, 0x42, 0x1b, |
| + 0x2b, 0xb9, 0x24, 0x6e, 0xc2, 0x19, 0xed, 0x0b, |
| + 0x0e, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0xb2, 0xa0, 0xc1, 0x84, 0x4b, 0x4e, 0x35, 0xd4, |
| + 0x1e, 0x5d, 0xa2, 0x10, 0xf6, 0x2f, 0x84, 0x12 |
| +}; |
| +static const u8 enc_output110[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x58, 0x8e, 0xa8, 0x0a, 0xc1, 0x58, 0x3f, 0x43, |
| + 0x4a, 0x80, 0x68, 0x13, 0xae, 0x2a, 0x4a, 0x9e, |
| + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd, |
| + 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00, |
| + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd, |
| + 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00, |
| + 0x9f, 0x7a, 0xc4, 0x35, 0x1f, 0x6b, 0x91, 0xe6, |
| + 0x30, 0x97, 0xa7, 0x13, 0x11, 0x5d, 0x05, 0xbe |
| +}; |
| +static const u8 enc_assoc110[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce110[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key110[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input111[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x13, 0xf8, 0x0a, 0x00, 0x6d, 0xc1, 0xbb, 0xda, |
| + 0xd6, 0x39, 0xa9, 0x2f, 0xc7, 0xec, 0xa6, 0x55, |
| + 0xf7, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x63, 0x48, 0xb8, 0xfd, 0x29, 0xbf, 0x96, 0xd5, |
| + 0x63, 0xa5, 0x17, 0xe2, 0x7d, 0x7b, 0xfc, 0x0f, |
| + 0x2f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x2b, 0x6c, 0x89, 0x1d, 0x37, 0xc7, 0xe1, 0x1a, |
| + 0x56, 0x41, 0x91, 0x9c, 0x49, 0x4d, 0x95, 0x16 |
| +}; |
| +static const u8 enc_output111[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x94, 0x3a, 0xc0, 0x09, 0x81, 0xd8, 0x9d, 0x2c, |
| + 0x14, 0xfe, 0xbf, 0xa5, 0xfb, 0x9c, 0xba, 0x12, |
| + 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13, |
| + 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04, |
| + 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13, |
| + 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04, |
| + 0x9a, 0x18, 0xa8, 0x28, 0x07, 0x02, 0x69, 0xf4, |
| + 0x47, 0x00, 0xd0, 0x09, 0xe7, 0x17, 0x1c, 0xc9 |
| +}; |
| +static const u8 enc_assoc111[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce111[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key111[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input112[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x82, 0xe5, 0x9b, 0x45, 0x82, 0x91, 0x50, 0x38, |
| + 0xf9, 0x33, 0x81, 0x1e, 0x65, 0x2d, 0xc6, 0x6a, |
| + 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0xb6, 0x71, 0xc8, 0xca, 0xc2, 0x70, 0xc2, 0x65, |
| + 0xa0, 0xac, 0x2f, 0x53, 0x57, 0x99, 0x88, 0x0a, |
| + 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0xfe, 0x55, 0xf9, 0x2a, 0xdc, 0x08, 0xb5, 0xaa, |
| + 0x95, 0x48, 0xa9, 0x2d, 0x63, 0xaf, 0xe1, 0x13 |
| +}; |
| +static const u8 enc_output112[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x05, 0x27, 0x51, 0x4c, 0x6e, 0x88, 0x76, 0xce, |
| + 0x3b, 0xf4, 0x97, 0x94, 0x59, 0x5d, 0xda, 0x2d, |
| + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3, |
| + 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01, |
| + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3, |
| + 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01, |
| + 0xb4, 0x36, 0xa8, 0x2b, 0x93, 0xd5, 0x55, 0xf7, |
| + 0x43, 0x00, 0xd0, 0x19, 0x9b, 0xa7, 0x18, 0xce |
| +}; |
| +static const u8 enc_assoc112[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce112[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key112[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input113[] __initconst = { |
| + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0xf1, 0xd1, 0x28, 0x87, 0xb7, 0x21, 0x69, 0x86, |
| + 0xa1, 0x2d, 0x79, 0x09, 0x8b, 0x6d, 0xe6, 0x0f, |
| + 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0xa7, 0xc7, 0x58, 0x99, 0xf3, 0xe6, 0x0a, 0xf1, |
| + 0xfc, 0xb6, 0xc7, 0x30, 0x7d, 0x87, 0x59, 0x0f, |
| + 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0xef, 0xe3, 0x69, 0x79, 0xed, 0x9e, 0x7d, 0x3e, |
| + 0xc9, 0x52, 0x41, 0x4e, 0x49, 0xb1, 0x30, 0x16 |
| +}; |
| +static const u8 enc_output113[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x76, 0x13, 0xe2, 0x8e, 0x5b, 0x38, 0x4f, 0x70, |
| + 0x63, 0xea, 0x6f, 0x83, 0xb7, 0x1d, 0xfa, 0x48, |
| + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37, |
| + 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04, |
| + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37, |
| + 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04, |
| + 0xce, 0x54, 0xa8, 0x2e, 0x1f, 0xa9, 0x42, 0xfa, |
| + 0x3f, 0x00, 0xd0, 0x29, 0x4f, 0x37, 0x15, 0xd3 |
| +}; |
| +static const u8 enc_assoc113[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce113[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key113[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input114[] __initconst = { |
| + 0xcb, 0xf1, 0xda, 0x9e, 0x0b, 0xa9, 0x37, 0x73, |
| + 0x74, 0xe6, 0x9e, 0x1c, 0x0e, 0x60, 0x0c, 0xfc, |
| + 0x34, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0xbe, 0x3f, 0xa6, 0x6b, 0x6c, 0xe7, 0x80, 0x8a, |
| + 0xa3, 0xe4, 0x59, 0x49, 0xf9, 0x44, 0x64, 0x9f, |
| + 0xd0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0x66, 0x68, 0xdb, 0xc8, 0xf5, 0xf2, 0x0e, 0xf2, |
| + 0xb3, 0xf3, 0x8f, 0x00, 0xe2, 0x03, 0x17, 0x88 |
| +}; |
| +static const u8 enc_output114[] __initconst = { |
| + 0xcb, 0x9a, 0x0d, 0xb1, 0x8d, 0x63, 0xd7, 0xea, |
| + 0xd7, 0xc9, 0x60, 0xd6, 0xb2, 0x86, 0x74, 0x5f, |
| + 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf, |
| + 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04, |
| + 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf, |
| + 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04, |
| + 0x23, 0x83, 0xab, 0x0b, 0x79, 0x92, 0x05, 0x69, |
| + 0x9b, 0x51, 0x0a, 0xa7, 0x09, 0xbf, 0x31, 0xf1 |
| +}; |
| +static const u8 enc_assoc114[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce114[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key114[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input115[] __initconst = { |
| + 0x8f, 0x27, 0x86, 0x94, 0xc4, 0xe9, 0xda, 0xeb, |
| + 0xd5, 0x8d, 0x3e, 0x5b, 0x96, 0x6e, 0x8b, 0x68, |
| + 0x42, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, |
| + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, |
| + 0x06, 0x53, 0xe7, 0xa3, 0x31, 0x71, 0x88, 0x33, |
| + 0xac, 0xc3, 0xb9, 0xad, 0xff, 0x1c, 0x31, 0x98, |
| + 0xa6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, |
| + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, |
| + 0xde, 0x04, 0x9a, 0x00, 0xa8, 0x64, 0x06, 0x4b, |
| + 0xbc, 0xd4, 0x6f, 0xe4, 0xe4, 0x5b, 0x42, 0x8f |
| +}; |
| +static const u8 enc_output115[] __initconst = { |
| + 0x8f, 0x4c, 0x51, 0xbb, 0x42, 0x23, 0x3a, 0x72, |
| + 0x76, 0xa2, 0xc0, 0x91, 0x2a, 0x88, 0xf3, 0xcb, |
| + 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06, |
| + 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03, |
| + 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06, |
| + 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03, |
| + 0x8b, 0xfb, 0xab, 0x17, 0xa9, 0xe0, 0xb8, 0x74, |
| + 0x8b, 0x51, 0x0a, 0xe7, 0xd9, 0xfd, 0x23, 0x05 |
| +}; |
| +static const u8 enc_assoc115[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce115[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key115[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input116[] __initconst = { |
| + 0xd5, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x9a, 0x22, 0xd7, 0x0a, 0x48, 0xe2, 0x4f, 0xdd, |
| + 0xcd, 0xd4, 0x41, 0x9d, 0xe6, 0x4c, 0x8f, 0x44, |
| + 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x77, 0xb5, 0xc9, 0x07, 0xd9, 0xc9, 0xe1, 0xea, |
| + 0x51, 0x85, 0x1a, 0x20, 0x4a, 0xad, 0x9f, 0x0a, |
| + 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x3f, 0x91, 0xf8, 0xe7, 0xc7, 0xb1, 0x96, 0x25, |
| + 0x64, 0x61, 0x9c, 0x5e, 0x7e, 0x9b, 0xf6, 0x13 |
| +}; |
| +static const u8 enc_output116[] __initconst = { |
| + 0xd5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x1d, 0xe0, 0x1d, 0x03, 0xa4, 0xfb, 0x69, 0x2b, |
| + 0x0f, 0x13, 0x57, 0x17, 0xda, 0x3c, 0x93, 0x03, |
| + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c, |
| + 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01, |
| + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c, |
| + 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01, |
| + 0x49, 0xbc, 0x6e, 0x9f, 0xc5, 0x1c, 0x4d, 0x50, |
| + 0x30, 0x36, 0x64, 0x4d, 0x84, 0x27, 0x73, 0xd2 |
| +}; |
| +static const u8 enc_assoc116[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce116[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key116[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input117[] __initconst = { |
| + 0xdb, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x75, 0xd5, 0x64, 0x3a, 0xa5, 0xaf, 0x93, 0x4d, |
| + 0x8c, 0xce, 0x39, 0x2c, 0xc3, 0xee, 0xdb, 0x47, |
| + 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0x60, 0x1b, 0x5a, 0xd2, 0x06, 0x7f, 0x28, 0x06, |
| + 0x6a, 0x8f, 0x32, 0x81, 0x71, 0x5b, 0xa8, 0x08, |
| + 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x28, 0x3f, 0x6b, 0x32, 0x18, 0x07, 0x5f, 0xc9, |
| + 0x5f, 0x6b, 0xb4, 0xff, 0x45, 0x6d, 0xc1, 0x11 |
| +}; |
| +static const u8 enc_output117[] __initconst = { |
| + 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xf2, 0x17, 0xae, 0x33, 0x49, 0xb6, 0xb5, 0xbb, |
| + 0x4e, 0x09, 0x2f, 0xa6, 0xff, 0x9e, 0xc7, 0x00, |
| + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0, |
| + 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03, |
| + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0, |
| + 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03, |
| + 0x63, 0xda, 0x6e, 0xa2, 0x51, 0xf0, 0x39, 0x53, |
| + 0x2c, 0x36, 0x64, 0x5d, 0x38, 0xb7, 0x6f, 0xd7 |
| +}; |
| +static const u8 enc_assoc117[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce117[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key117[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - edge case intermediate sums in poly1305 */ |
| +static const u8 enc_input118[] __initconst = { |
| + 0x93, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, |
| + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, |
| + 0x62, 0x48, 0x39, 0x60, 0x42, 0x16, 0xe4, 0x03, |
| + 0xeb, 0xcc, 0x6a, 0xf5, 0x59, 0xec, 0x8b, 0x43, |
| + 0x97, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, |
| + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, |
| + 0xd8, 0xc8, 0xc3, 0xfa, 0x1a, 0x9e, 0x47, 0x4a, |
| + 0xbe, 0x52, 0xd0, 0x2c, 0x81, 0x87, 0xe9, 0x0f, |
| + 0x4f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, |
| + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, |
| + 0x90, 0xec, 0xf2, 0x1a, 0x04, 0xe6, 0x30, 0x85, |
| + 0x8b, 0xb6, 0x56, 0x52, 0xb5, 0xb1, 0x80, 0x16 |
| +}; |
| +static const u8 enc_output118[] __initconst = { |
| + 0x93, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xe5, 0x8a, 0xf3, 0x69, 0xae, 0x0f, 0xc2, 0xf5, |
| + 0x29, 0x0b, 0x7c, 0x7f, 0x65, 0x9c, 0x97, 0x04, |
| + 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c, |
| + 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04, |
| + 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c, |
| + 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04, |
| + 0x73, 0xeb, 0x27, 0x24, 0xb5, 0xc4, 0x05, 0xf0, |
| + 0x4d, 0x00, 0xd0, 0xf1, 0x58, 0x40, 0xa1, 0xc1 |
| +}; |
| +static const u8 enc_assoc118[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce118[] __initconst = { |
| + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 |
| +}; |
| +static const u8 enc_key118[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +static const struct chacha20poly1305_testvec |
| +chacha20poly1305_enc_vectors[] __initconst = { |
| + { enc_input001, enc_output001, enc_assoc001, enc_nonce001, enc_key001, |
| + sizeof(enc_input001), sizeof(enc_assoc001), sizeof(enc_nonce001) }, |
| + { enc_input002, enc_output002, enc_assoc002, enc_nonce002, enc_key002, |
| + sizeof(enc_input002), sizeof(enc_assoc002), sizeof(enc_nonce002) }, |
| + { enc_input003, enc_output003, enc_assoc003, enc_nonce003, enc_key003, |
| + sizeof(enc_input003), sizeof(enc_assoc003), sizeof(enc_nonce003) }, |
| + { enc_input004, enc_output004, enc_assoc004, enc_nonce004, enc_key004, |
| + sizeof(enc_input004), sizeof(enc_assoc004), sizeof(enc_nonce004) }, |
| + { enc_input005, enc_output005, enc_assoc005, enc_nonce005, enc_key005, |
| + sizeof(enc_input005), sizeof(enc_assoc005), sizeof(enc_nonce005) }, |
| + { enc_input006, enc_output006, enc_assoc006, enc_nonce006, enc_key006, |
| + sizeof(enc_input006), sizeof(enc_assoc006), sizeof(enc_nonce006) }, |
| + { enc_input007, enc_output007, enc_assoc007, enc_nonce007, enc_key007, |
| + sizeof(enc_input007), sizeof(enc_assoc007), sizeof(enc_nonce007) }, |
| + { enc_input008, enc_output008, enc_assoc008, enc_nonce008, enc_key008, |
| + sizeof(enc_input008), sizeof(enc_assoc008), sizeof(enc_nonce008) }, |
| + { enc_input009, enc_output009, enc_assoc009, enc_nonce009, enc_key009, |
| + sizeof(enc_input009), sizeof(enc_assoc009), sizeof(enc_nonce009) }, |
| + { enc_input010, enc_output010, enc_assoc010, enc_nonce010, enc_key010, |
| + sizeof(enc_input010), sizeof(enc_assoc010), sizeof(enc_nonce010) }, |
| + { enc_input011, enc_output011, enc_assoc011, enc_nonce011, enc_key011, |
| + sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) }, |
| + { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012, |
| + sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) }, |
| + { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053, |
| + sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) }, |
| + { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054, |
| + sizeof(enc_input054), sizeof(enc_assoc054), sizeof(enc_nonce054) }, |
| + { enc_input055, enc_output055, enc_assoc055, enc_nonce055, enc_key055, |
| + sizeof(enc_input055), sizeof(enc_assoc055), sizeof(enc_nonce055) }, |
| + { enc_input056, enc_output056, enc_assoc056, enc_nonce056, enc_key056, |
| + sizeof(enc_input056), sizeof(enc_assoc056), sizeof(enc_nonce056) }, |
| + { enc_input057, enc_output057, enc_assoc057, enc_nonce057, enc_key057, |
| + sizeof(enc_input057), sizeof(enc_assoc057), sizeof(enc_nonce057) }, |
| + { enc_input058, enc_output058, enc_assoc058, enc_nonce058, enc_key058, |
| + sizeof(enc_input058), sizeof(enc_assoc058), sizeof(enc_nonce058) }, |
| + { enc_input059, enc_output059, enc_assoc059, enc_nonce059, enc_key059, |
| + sizeof(enc_input059), sizeof(enc_assoc059), sizeof(enc_nonce059) }, |
| + { enc_input060, enc_output060, enc_assoc060, enc_nonce060, enc_key060, |
| + sizeof(enc_input060), sizeof(enc_assoc060), sizeof(enc_nonce060) }, |
| + { enc_input061, enc_output061, enc_assoc061, enc_nonce061, enc_key061, |
| + sizeof(enc_input061), sizeof(enc_assoc061), sizeof(enc_nonce061) }, |
| + { enc_input062, enc_output062, enc_assoc062, enc_nonce062, enc_key062, |
| + sizeof(enc_input062), sizeof(enc_assoc062), sizeof(enc_nonce062) }, |
| + { enc_input063, enc_output063, enc_assoc063, enc_nonce063, enc_key063, |
| + sizeof(enc_input063), sizeof(enc_assoc063), sizeof(enc_nonce063) }, |
| + { enc_input064, enc_output064, enc_assoc064, enc_nonce064, enc_key064, |
| + sizeof(enc_input064), sizeof(enc_assoc064), sizeof(enc_nonce064) }, |
| + { enc_input065, enc_output065, enc_assoc065, enc_nonce065, enc_key065, |
| + sizeof(enc_input065), sizeof(enc_assoc065), sizeof(enc_nonce065) }, |
| + { enc_input066, enc_output066, enc_assoc066, enc_nonce066, enc_key066, |
| + sizeof(enc_input066), sizeof(enc_assoc066), sizeof(enc_nonce066) }, |
| + { enc_input067, enc_output067, enc_assoc067, enc_nonce067, enc_key067, |
| + sizeof(enc_input067), sizeof(enc_assoc067), sizeof(enc_nonce067) }, |
| + { enc_input068, enc_output068, enc_assoc068, enc_nonce068, enc_key068, |
| + sizeof(enc_input068), sizeof(enc_assoc068), sizeof(enc_nonce068) }, |
| + { enc_input069, enc_output069, enc_assoc069, enc_nonce069, enc_key069, |
| + sizeof(enc_input069), sizeof(enc_assoc069), sizeof(enc_nonce069) }, |
| + { enc_input070, enc_output070, enc_assoc070, enc_nonce070, enc_key070, |
| + sizeof(enc_input070), sizeof(enc_assoc070), sizeof(enc_nonce070) }, |
| + { enc_input071, enc_output071, enc_assoc071, enc_nonce071, enc_key071, |
| + sizeof(enc_input071), sizeof(enc_assoc071), sizeof(enc_nonce071) }, |
| + { enc_input072, enc_output072, enc_assoc072, enc_nonce072, enc_key072, |
| + sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) }, |
| + { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073, |
| + sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) }, |
| + { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076, |
| + sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) }, |
| + { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077, |
| + sizeof(enc_input077), sizeof(enc_assoc077), sizeof(enc_nonce077) }, |
| + { enc_input078, enc_output078, enc_assoc078, enc_nonce078, enc_key078, |
| + sizeof(enc_input078), sizeof(enc_assoc078), sizeof(enc_nonce078) }, |
| + { enc_input079, enc_output079, enc_assoc079, enc_nonce079, enc_key079, |
| + sizeof(enc_input079), sizeof(enc_assoc079), sizeof(enc_nonce079) }, |
| + { enc_input080, enc_output080, enc_assoc080, enc_nonce080, enc_key080, |
| + sizeof(enc_input080), sizeof(enc_assoc080), sizeof(enc_nonce080) }, |
| + { enc_input081, enc_output081, enc_assoc081, enc_nonce081, enc_key081, |
| + sizeof(enc_input081), sizeof(enc_assoc081), sizeof(enc_nonce081) }, |
| + { enc_input082, enc_output082, enc_assoc082, enc_nonce082, enc_key082, |
| + sizeof(enc_input082), sizeof(enc_assoc082), sizeof(enc_nonce082) }, |
| + { enc_input083, enc_output083, enc_assoc083, enc_nonce083, enc_key083, |
| + sizeof(enc_input083), sizeof(enc_assoc083), sizeof(enc_nonce083) }, |
| + { enc_input084, enc_output084, enc_assoc084, enc_nonce084, enc_key084, |
| + sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) }, |
| + { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085, |
| + sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) }, |
| + { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093, |
| + sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) }, |
| + { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094, |
| + sizeof(enc_input094), sizeof(enc_assoc094), sizeof(enc_nonce094) }, |
| + { enc_input095, enc_output095, enc_assoc095, enc_nonce095, enc_key095, |
| + sizeof(enc_input095), sizeof(enc_assoc095), sizeof(enc_nonce095) }, |
| + { enc_input096, enc_output096, enc_assoc096, enc_nonce096, enc_key096, |
| + sizeof(enc_input096), sizeof(enc_assoc096), sizeof(enc_nonce096) }, |
| + { enc_input097, enc_output097, enc_assoc097, enc_nonce097, enc_key097, |
| + sizeof(enc_input097), sizeof(enc_assoc097), sizeof(enc_nonce097) }, |
| + { enc_input098, enc_output098, enc_assoc098, enc_nonce098, enc_key098, |
| + sizeof(enc_input098), sizeof(enc_assoc098), sizeof(enc_nonce098) }, |
| + { enc_input099, enc_output099, enc_assoc099, enc_nonce099, enc_key099, |
| + sizeof(enc_input099), sizeof(enc_assoc099), sizeof(enc_nonce099) }, |
| + { enc_input100, enc_output100, enc_assoc100, enc_nonce100, enc_key100, |
| + sizeof(enc_input100), sizeof(enc_assoc100), sizeof(enc_nonce100) }, |
| + { enc_input101, enc_output101, enc_assoc101, enc_nonce101, enc_key101, |
| + sizeof(enc_input101), sizeof(enc_assoc101), sizeof(enc_nonce101) }, |
| + { enc_input102, enc_output102, enc_assoc102, enc_nonce102, enc_key102, |
| + sizeof(enc_input102), sizeof(enc_assoc102), sizeof(enc_nonce102) }, |
| + { enc_input103, enc_output103, enc_assoc103, enc_nonce103, enc_key103, |
| + sizeof(enc_input103), sizeof(enc_assoc103), sizeof(enc_nonce103) }, |
| + { enc_input104, enc_output104, enc_assoc104, enc_nonce104, enc_key104, |
| + sizeof(enc_input104), sizeof(enc_assoc104), sizeof(enc_nonce104) }, |
| + { enc_input105, enc_output105, enc_assoc105, enc_nonce105, enc_key105, |
| + sizeof(enc_input105), sizeof(enc_assoc105), sizeof(enc_nonce105) }, |
| + { enc_input106, enc_output106, enc_assoc106, enc_nonce106, enc_key106, |
| + sizeof(enc_input106), sizeof(enc_assoc106), sizeof(enc_nonce106) }, |
| + { enc_input107, enc_output107, enc_assoc107, enc_nonce107, enc_key107, |
| + sizeof(enc_input107), sizeof(enc_assoc107), sizeof(enc_nonce107) }, |
| + { enc_input108, enc_output108, enc_assoc108, enc_nonce108, enc_key108, |
| + sizeof(enc_input108), sizeof(enc_assoc108), sizeof(enc_nonce108) }, |
| + { enc_input109, enc_output109, enc_assoc109, enc_nonce109, enc_key109, |
| + sizeof(enc_input109), sizeof(enc_assoc109), sizeof(enc_nonce109) }, |
| + { enc_input110, enc_output110, enc_assoc110, enc_nonce110, enc_key110, |
| + sizeof(enc_input110), sizeof(enc_assoc110), sizeof(enc_nonce110) }, |
| + { enc_input111, enc_output111, enc_assoc111, enc_nonce111, enc_key111, |
| + sizeof(enc_input111), sizeof(enc_assoc111), sizeof(enc_nonce111) }, |
| + { enc_input112, enc_output112, enc_assoc112, enc_nonce112, enc_key112, |
| + sizeof(enc_input112), sizeof(enc_assoc112), sizeof(enc_nonce112) }, |
| + { enc_input113, enc_output113, enc_assoc113, enc_nonce113, enc_key113, |
| + sizeof(enc_input113), sizeof(enc_assoc113), sizeof(enc_nonce113) }, |
| + { enc_input114, enc_output114, enc_assoc114, enc_nonce114, enc_key114, |
| + sizeof(enc_input114), sizeof(enc_assoc114), sizeof(enc_nonce114) }, |
| + { enc_input115, enc_output115, enc_assoc115, enc_nonce115, enc_key115, |
| + sizeof(enc_input115), sizeof(enc_assoc115), sizeof(enc_nonce115) }, |
| + { enc_input116, enc_output116, enc_assoc116, enc_nonce116, enc_key116, |
| + sizeof(enc_input116), sizeof(enc_assoc116), sizeof(enc_nonce116) }, |
| + { enc_input117, enc_output117, enc_assoc117, enc_nonce117, enc_key117, |
| + sizeof(enc_input117), sizeof(enc_assoc117), sizeof(enc_nonce117) }, |
| + { enc_input118, enc_output118, enc_assoc118, enc_nonce118, enc_key118, |
| + sizeof(enc_input118), sizeof(enc_assoc118), sizeof(enc_nonce118) } |
| +}; |
| + |
| +static const u8 dec_input001[] __initconst = { |
| + 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, |
| + 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, |
| + 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, |
| + 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, |
| + 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, |
| + 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, |
| + 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, |
| + 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, |
| + 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, |
| + 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, |
| + 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, |
| + 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, |
| + 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, |
| + 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, |
| + 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, |
| + 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, |
| + 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, |
| + 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, |
| + 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, |
| + 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, |
| + 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, |
| + 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, |
| + 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, |
| + 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, |
| + 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, |
| + 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, |
| + 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, |
| + 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, |
| + 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, |
| + 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, |
| + 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, |
| + 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, |
| + 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, |
| + 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, |
| + 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, |
| + 0x38 |
| +}; |
| +static const u8 dec_output001[] __initconst = { |
| + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, |
| + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, |
| + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, |
| + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, |
| + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, |
| + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, |
| + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, |
| + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, |
| + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, |
| + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, |
| + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, |
| + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, |
| + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, |
| + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, |
| + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, |
| + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, |
| + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, |
| + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, |
| + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, |
| + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, |
| + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, |
| + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, |
| + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, |
| + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, |
| + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, |
| + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, |
| + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, |
| + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, |
| + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, |
| + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, |
| + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, |
| + 0x9d |
| +}; |
| +static const u8 dec_assoc001[] __initconst = { |
| + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x4e, 0x91 |
| +}; |
| +static const u8 dec_nonce001[] __initconst = { |
| + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 |
| +}; |
| +static const u8 dec_key001[] __initconst = { |
| + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, |
| + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, |
| + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, |
| + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 |
| +}; |
| + |
| +static const u8 dec_input002[] __initconst = { |
| + 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1, |
| + 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92 |
| +}; |
| +static const u8 dec_output002[] __initconst = { }; |
| +static const u8 dec_assoc002[] __initconst = { }; |
| +static const u8 dec_nonce002[] __initconst = { |
| + 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e |
| +}; |
| +static const u8 dec_key002[] __initconst = { |
| + 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f, |
| + 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86, |
| + 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef, |
| + 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68 |
| +}; |
| + |
| +static const u8 dec_input003[] __initconst = { |
| + 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6, |
| + 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77 |
| +}; |
| +static const u8 dec_output003[] __initconst = { }; |
| +static const u8 dec_assoc003[] __initconst = { |
| + 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b |
| +}; |
| +static const u8 dec_nonce003[] __initconst = { |
| + 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d |
| +}; |
| +static const u8 dec_key003[] __initconst = { |
| + 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88, |
| + 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a, |
| + 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08, |
| + 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d |
| +}; |
| + |
| +static const u8 dec_input004[] __initconst = { |
| + 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2, |
| + 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac, |
| + 0x89 |
| +}; |
| +static const u8 dec_output004[] __initconst = { |
| + 0xa4 |
| +}; |
| +static const u8 dec_assoc004[] __initconst = { |
| + 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40 |
| +}; |
| +static const u8 dec_nonce004[] __initconst = { |
| + 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4 |
| +}; |
| +static const u8 dec_key004[] __initconst = { |
| + 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8, |
| + 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1, |
| + 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d, |
| + 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e |
| +}; |
| + |
| +static const u8 dec_input005[] __initconst = { |
| + 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e, |
| + 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c, |
| + 0xac |
| +}; |
| +static const u8 dec_output005[] __initconst = { |
| + 0x2d |
| +}; |
| +static const u8 dec_assoc005[] __initconst = { }; |
| +static const u8 dec_nonce005[] __initconst = { |
| + 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30 |
| +}; |
| +static const u8 dec_key005[] __initconst = { |
| + 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31, |
| + 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87, |
| + 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01, |
| + 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87 |
| +}; |
| + |
| +static const u8 dec_input006[] __initconst = { |
| + 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1, |
| + 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15, |
| + 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c, |
| + 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda, |
| + 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11, |
| + 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8, |
| + 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc, |
| + 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3, |
| + 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5, |
| + 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02, |
| + 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93, |
| + 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78, |
| + 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1, |
| + 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66, |
| + 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc, |
| + 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0, |
| + 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d, |
| + 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a, |
| + 0xeb |
| +}; |
| +static const u8 dec_output006[] __initconst = { |
| + 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a, |
| + 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92, |
| + 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37, |
| + 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50, |
| + 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec, |
| + 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb, |
| + 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66, |
| + 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb, |
| + 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b, |
| + 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e, |
| + 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3, |
| + 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0, |
| + 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb, |
| + 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41, |
| + 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc, |
| + 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde, |
| + 0x8f |
| +}; |
| +static const u8 dec_assoc006[] __initconst = { |
| + 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b |
| +}; |
| +static const u8 dec_nonce006[] __initconst = { |
| + 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c |
| +}; |
| +static const u8 dec_key006[] __initconst = { |
| + 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae, |
| + 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78, |
| + 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9, |
| + 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01 |
| +}; |
| + |
| +static const u8 dec_input007[] __initconst = { |
| + 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c, |
| + 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8, |
| + 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c, |
| + 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb, |
| + 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0, |
| + 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21, |
| + 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70, |
| + 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac, |
| + 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99, |
| + 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9, |
| + 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f, |
| + 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7, |
| + 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53, |
| + 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12, |
| + 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6, |
| + 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0, |
| + 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54, |
| + 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6, |
| + 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e, |
| + 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb, |
| + 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30, |
| + 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f, |
| + 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2, |
| + 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e, |
| + 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34, |
| + 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39, |
| + 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7, |
| + 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9, |
| + 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82, |
| + 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04, |
| + 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34, |
| + 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef, |
| + 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42, |
| + 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53 |
| +}; |
| +static const u8 dec_output007[] __initconst = { |
| + 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5, |
| + 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a, |
| + 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1, |
| + 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17, |
| + 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c, |
| + 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1, |
| + 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51, |
| + 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1, |
| + 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86, |
| + 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a, |
| + 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a, |
| + 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98, |
| + 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36, |
| + 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34, |
| + 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57, |
| + 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84, |
| + 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4, |
| + 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80, |
| + 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82, |
| + 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5, |
| + 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d, |
| + 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c, |
| + 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf, |
| + 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc, |
| + 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3, |
| + 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14, |
| + 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81, |
| + 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77, |
| + 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3, |
| + 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2, |
| + 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b, |
| + 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3 |
| +}; |
| +static const u8 dec_assoc007[] __initconst = { }; |
| +static const u8 dec_nonce007[] __initconst = { |
| + 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0 |
| +}; |
| +static const u8 dec_key007[] __initconst = { |
| + 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd, |
| + 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c, |
| + 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80, |
| + 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01 |
| +}; |
| + |
| +static const u8 dec_input008[] __initconst = { |
| + 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd, |
| + 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1, |
| + 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93, |
| + 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d, |
| + 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c, |
| + 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6, |
| + 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4, |
| + 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5, |
| + 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84, |
| + 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd, |
| + 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed, |
| + 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab, |
| + 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13, |
| + 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49, |
| + 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6, |
| + 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8, |
| + 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2, |
| + 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94, |
| + 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18, |
| + 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60, |
| + 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8, |
| + 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b, |
| + 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f, |
| + 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c, |
| + 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20, |
| + 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff, |
| + 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9, |
| + 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c, |
| + 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9, |
| + 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6, |
| + 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea, |
| + 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e, |
| + 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82, |
| + 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1, |
| + 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70, |
| + 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1, |
| + 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c, |
| + 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7, |
| + 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc, |
| + 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc, |
| + 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3, |
| + 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb, |
| + 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97, |
| + 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f, |
| + 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39, |
| + 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f, |
| + 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d, |
| + 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2, |
| + 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d, |
| + 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96, |
| + 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b, |
| + 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20, |
| + 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95, |
| + 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb, |
| + 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35, |
| + 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62, |
| + 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9, |
| + 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6, |
| + 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8, |
| + 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a, |
| + 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93, |
| + 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14, |
| + 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99, |
| + 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86, |
| + 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f, |
| + 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54 |
| +}; |
| +static const u8 dec_output008[] __initconst = { |
| + 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10, |
| + 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2, |
| + 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c, |
| + 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb, |
| + 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12, |
| + 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa, |
| + 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6, |
| + 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4, |
| + 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91, |
| + 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb, |
| + 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47, |
| + 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15, |
| + 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f, |
| + 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a, |
| + 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3, |
| + 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97, |
| + 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80, |
| + 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e, |
| + 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f, |
| + 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10, |
| + 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a, |
| + 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0, |
| + 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35, |
| + 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d, |
| + 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d, |
| + 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57, |
| + 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4, |
| + 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f, |
| + 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39, |
| + 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda, |
| + 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17, |
| + 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43, |
| + 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19, |
| + 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09, |
| + 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21, |
| + 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07, |
| + 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f, |
| + 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b, |
| + 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a, |
| + 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed, |
| + 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2, |
| + 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca, |
| + 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff, |
| + 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b, |
| + 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b, |
| + 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b, |
| + 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6, |
| + 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04, |
| + 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48, |
| + 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b, |
| + 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13, |
| + 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8, |
| + 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f, |
| + 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0, |
| + 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92, |
| + 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a, |
| + 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41, |
| + 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17, |
| + 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30, |
| + 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20, |
| + 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49, |
| + 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a, |
| + 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b, |
| + 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3 |
| +}; |
| +static const u8 dec_assoc008[] __initconst = { }; |
| +static const u8 dec_nonce008[] __initconst = { |
| + 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02 |
| +}; |
| +static const u8 dec_key008[] __initconst = { |
| + 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53, |
| + 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0, |
| + 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86, |
| + 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba |
| +}; |
| + |
| +static const u8 dec_input009[] __initconst = { |
| + 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf, |
| + 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66, |
| + 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72, |
| + 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd, |
| + 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28, |
| + 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe, |
| + 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06, |
| + 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5, |
| + 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7, |
| + 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09, |
| + 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a, |
| + 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00, |
| + 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62, |
| + 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb, |
| + 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2, |
| + 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28, |
| + 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e, |
| + 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a, |
| + 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6, |
| + 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83, |
| + 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9, |
| + 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a, |
| + 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79, |
| + 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a, |
| + 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea, |
| + 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b, |
| + 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52, |
| + 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb, |
| + 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89, |
| + 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad, |
| + 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19, |
| + 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71, |
| + 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d, |
| + 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54, |
| + 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a, |
| + 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d, |
| + 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95, |
| + 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42, |
| + 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16, |
| + 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6, |
| + 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf, |
| + 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d, |
| + 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f, |
| + 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b, |
| + 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e, |
| + 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4, |
| + 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c, |
| + 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4, |
| + 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1, |
| + 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb, |
| + 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff, |
| + 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2, |
| + 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06, |
| + 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66, |
| + 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90, |
| + 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55, |
| + 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc, |
| + 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8, |
| + 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62, |
| + 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba, |
| + 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2, |
| + 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89, |
| + 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06, |
| + 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90, |
| + 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf, |
| + 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8, |
| + 0xae |
| +}; |
| +static const u8 dec_output009[] __initconst = { |
| + 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b, |
| + 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8, |
| + 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca, |
| + 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09, |
| + 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5, |
| + 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85, |
| + 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44, |
| + 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97, |
| + 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77, |
| + 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41, |
| + 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c, |
| + 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00, |
| + 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82, |
| + 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f, |
| + 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e, |
| + 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55, |
| + 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab, |
| + 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17, |
| + 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e, |
| + 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f, |
| + 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82, |
| + 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3, |
| + 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f, |
| + 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0, |
| + 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08, |
| + 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b, |
| + 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85, |
| + 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28, |
| + 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c, |
| + 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62, |
| + 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2, |
| + 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3, |
| + 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62, |
| + 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40, |
| + 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f, |
| + 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b, |
| + 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91, |
| + 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5, |
| + 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c, |
| + 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4, |
| + 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49, |
| + 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04, |
| + 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03, |
| + 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa, |
| + 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec, |
| + 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6, |
| + 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69, |
| + 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36, |
| + 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8, |
| + 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf, |
| + 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe, |
| + 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82, |
| + 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab, |
| + 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d, |
| + 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3, |
| + 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5, |
| + 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34, |
| + 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49, |
| + 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f, |
| + 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d, |
| + 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42, |
| + 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef, |
| + 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27, |
| + 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52, |
| + 0x65 |
| +}; |
| +static const u8 dec_assoc009[] __initconst = { |
| + 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e, |
| + 0xef |
| +}; |
| +static const u8 dec_nonce009[] __initconst = { |
| + 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78 |
| +}; |
| +static const u8 dec_key009[] __initconst = { |
| + 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5, |
| + 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86, |
| + 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2, |
| + 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b |
| +}; |
| + |
| +static const u8 dec_input010[] __initconst = { |
| + 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b, |
| + 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74, |
| + 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1, |
| + 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd, |
| + 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6, |
| + 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5, |
| + 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96, |
| + 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02, |
| + 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30, |
| + 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57, |
| + 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53, |
| + 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65, |
| + 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71, |
| + 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9, |
| + 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18, |
| + 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce, |
| + 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a, |
| + 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69, |
| + 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2, |
| + 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95, |
| + 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49, |
| + 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e, |
| + 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a, |
| + 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a, |
| + 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e, |
| + 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19, |
| + 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b, |
| + 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75, |
| + 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d, |
| + 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d, |
| + 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f, |
| + 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a, |
| + 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d, |
| + 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5, |
| + 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c, |
| + 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77, |
| + 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46, |
| + 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43, |
| + 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe, |
| + 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8, |
| + 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76, |
| + 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47, |
| + 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8, |
| + 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32, |
| + 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59, |
| + 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae, |
| + 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a, |
| + 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3, |
| + 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74, |
| + 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75, |
| + 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2, |
| + 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e, |
| + 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2, |
| + 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9, |
| + 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1, |
| + 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07, |
| + 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79, |
| + 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71, |
| + 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad, |
| + 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a, |
| + 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c, |
| + 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9, |
| + 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79, |
| + 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27, |
| + 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90, |
| + 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe, |
| + 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99, |
| + 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1, |
| + 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9, |
| + 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0, |
| + 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28, |
| + 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e, |
| + 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20, |
| + 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60, |
| + 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47, |
| + 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68, |
| + 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe, |
| + 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33, |
| + 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8, |
| + 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38, |
| + 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7, |
| + 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04, |
| + 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c, |
| + 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f, |
| + 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c, |
| + 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77, |
| + 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54, |
| + 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5, |
| + 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4, |
| + 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2, |
| + 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e, |
| + 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27, |
| + 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f, |
| + 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92, |
| + 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55, |
| + 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe, |
| + 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04, |
| + 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4, |
| + 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56, |
| + 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02, |
| + 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2, |
| + 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8, |
| + 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27, |
| + 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47, |
| + 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10, |
| + 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43, |
| + 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0, |
| + 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee, |
| + 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47, |
| + 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6, |
| + 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d, |
| + 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c, |
| + 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3, |
| + 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b, |
| + 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09, |
| + 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d, |
| + 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1, |
| + 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd, |
| + 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4, |
| + 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63, |
| + 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87, |
| + 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd, |
| + 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e, |
| + 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a, |
| + 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c, |
| + 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38, |
| + 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a, |
| + 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5, |
| + 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9, |
| + 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0 |
| +}; |
| +static const u8 dec_output010[] __initconst = { |
| + 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf, |
| + 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c, |
| + 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22, |
| + 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc, |
| + 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16, |
| + 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7, |
| + 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4, |
| + 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d, |
| + 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5, |
| + 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46, |
| + 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82, |
| + 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b, |
| + 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a, |
| + 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf, |
| + 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca, |
| + 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95, |
| + 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09, |
| + 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3, |
| + 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3, |
| + 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f, |
| + 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58, |
| + 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad, |
| + 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde, |
| + 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44, |
| + 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a, |
| + 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9, |
| + 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26, |
| + 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc, |
| + 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74, |
| + 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b, |
| + 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93, |
| + 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37, |
| + 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f, |
| + 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d, |
| + 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca, |
| + 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73, |
| + 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f, |
| + 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1, |
| + 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9, |
| + 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76, |
| + 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac, |
| + 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7, |
| + 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce, |
| + 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30, |
| + 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb, |
| + 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa, |
| + 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd, |
| + 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f, |
| + 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb, |
| + 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34, |
| + 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e, |
| + 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f, |
| + 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53, |
| + 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41, |
| + 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e, |
| + 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d, |
| + 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27, |
| + 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e, |
| + 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8, |
| + 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a, |
| + 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12, |
| + 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3, |
| + 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66, |
| + 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0, |
| + 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c, |
| + 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4, |
| + 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49, |
| + 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90, |
| + 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11, |
| + 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c, |
| + 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b, |
| + 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74, |
| + 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c, |
| + 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27, |
| + 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1, |
| + 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27, |
| + 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88, |
| + 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27, |
| + 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b, |
| + 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39, |
| + 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7, |
| + 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc, |
| + 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe, |
| + 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5, |
| + 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf, |
| + 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05, |
| + 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73, |
| + 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda, |
| + 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe, |
| + 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71, |
| + 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed, |
| + 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d, |
| + 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33, |
| + 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f, |
| + 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a, |
| + 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa, |
| + 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e, |
| + 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e, |
| + 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87, |
| + 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5, |
| + 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4, |
| + 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38, |
| + 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34, |
| + 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f, |
| + 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36, |
| + 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69, |
| + 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44, |
| + 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5, |
| + 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce, |
| + 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd, |
| + 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27, |
| + 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f, |
| + 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8, |
| + 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a, |
| + 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5, |
| + 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca, |
| + 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e, |
| + 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92, |
| + 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13, |
| + 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf, |
| + 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6, |
| + 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3, |
| + 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b, |
| + 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d, |
| + 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f, |
| + 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40, |
| + 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c, |
| + 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f |
| +}; |
| +static const u8 dec_assoc010[] __initconst = { |
| + 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27, |
| + 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2 |
| +}; |
| +static const u8 dec_nonce010[] __initconst = { |
| + 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30 |
| +}; |
| +static const u8 dec_key010[] __initconst = { |
| + 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44, |
| + 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf, |
| + 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74, |
| + 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7 |
| +}; |
| + |
| +static const u8 dec_input011[] __initconst = { |
| + 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8, |
| + 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc, |
| + 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74, |
| + 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73, |
| + 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e, |
| + 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9, |
| + 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e, |
| + 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd, |
| + 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57, |
| + 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19, |
| + 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f, |
| + 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45, |
| + 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e, |
| + 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39, |
| + 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03, |
| + 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f, |
| + 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0, |
| + 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce, |
| + 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb, |
| + 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52, |
| + 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21, |
| + 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a, |
| + 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35, |
| + 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91, |
| + 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b, |
| + 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e, |
| + 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19, |
| + 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07, |
| + 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18, |
| + 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96, |
| + 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68, |
| + 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4, |
| + 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57, |
| + 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c, |
| + 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23, |
| + 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8, |
| + 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6, |
| + 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40, |
| + 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab, |
| + 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb, |
| + 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea, |
| + 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8, |
| + 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31, |
| + 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0, |
| + 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc, |
| + 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94, |
| + 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1, |
| + 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46, |
| + 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6, |
| + 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7, |
| + 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71, |
| + 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a, |
| + 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33, |
| + 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38, |
| + 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23, |
| + 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb, |
| + 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65, |
| + 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73, |
| + 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8, |
| + 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb, |
| + 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a, |
| + 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca, |
| + 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5, |
| + 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71, |
| + 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8, |
| + 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d, |
| + 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6, |
| + 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d, |
| + 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7, |
| + 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5, |
| + 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8, |
| + 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd, |
| + 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29, |
| + 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22, |
| + 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5, |
| + 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67, |
| + 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11, |
| + 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e, |
| + 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09, |
| + 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4, |
| + 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f, |
| + 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa, |
| + 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec, |
| + 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b, |
| + 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d, |
| + 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b, |
| + 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48, |
| + 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3, |
| + 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63, |
| + 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd, |
| + 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78, |
| + 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed, |
| + 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82, |
| + 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f, |
| + 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3, |
| + 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9, |
| + 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72, |
| + 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74, |
| + 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40, |
| + 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b, |
| + 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a, |
| + 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5, |
| + 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98, |
| + 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71, |
| + 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e, |
| + 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4, |
| + 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46, |
| + 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e, |
| + 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f, |
| + 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93, |
| + 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0, |
| + 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5, |
| + 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61, |
| + 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64, |
| + 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85, |
| + 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20, |
| + 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6, |
| + 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc, |
| + 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8, |
| + 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50, |
| + 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4, |
| + 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80, |
| + 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0, |
| + 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a, |
| + 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35, |
| + 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43, |
| + 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12, |
| + 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7, |
| + 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34, |
| + 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42, |
| + 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0, |
| + 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95, |
| + 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74, |
| + 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5, |
| + 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12, |
| + 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6, |
| + 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86, |
| + 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97, |
| + 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45, |
| + 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19, |
| + 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86, |
| + 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c, |
| + 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba, |
| + 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29, |
| + 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6, |
| + 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6, |
| + 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09, |
| + 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31, |
| + 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99, |
| + 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b, |
| + 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca, |
| + 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00, |
| + 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93, |
| + 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3, |
| + 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07, |
| + 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda, |
| + 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90, |
| + 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b, |
| + 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a, |
| + 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6, |
| + 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c, |
| + 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57, |
| + 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15, |
| + 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e, |
| + 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51, |
| + 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75, |
| + 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19, |
| + 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08, |
| + 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14, |
| + 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba, |
| + 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff, |
| + 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90, |
| + 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e, |
| + 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93, |
| + 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad, |
| + 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2, |
| + 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac, |
| + 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d, |
| + 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06, |
| + 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c, |
| + 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91, |
| + 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17, |
| + 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20, |
| + 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7, |
| + 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf, |
| + 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c, |
| + 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2, |
| + 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e, |
| + 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a, |
| + 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05, |
| + 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58, |
| + 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8, |
| + 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d, |
| + 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71, |
| + 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3, |
| + 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe, |
| + 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62, |
| + 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16, |
| + 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66, |
| + 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4, |
| + 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2, |
| + 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35, |
| + 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3, |
| + 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4, |
| + 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f, |
| + 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe, |
| + 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56, |
| + 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b, |
| + 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37, |
| + 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3, |
| + 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f, |
| + 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f, |
| + 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0, |
| + 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70, |
| + 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd, |
| + 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f, |
| + 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e, |
| + 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67, |
| + 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51, |
| + 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23, |
| + 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3, |
| + 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5, |
| + 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09, |
| + 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7, |
| + 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed, |
| + 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb, |
| + 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6, |
| + 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5, |
| + 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96, |
| + 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe, |
| + 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44, |
| + 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6, |
| + 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e, |
| + 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0, |
| + 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79, |
| + 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f, |
| + 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d, |
| + 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82, |
| + 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47, |
| + 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93, |
| + 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6, |
| + 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69, |
| + 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e, |
| + 0x2b, 0xdf, 0xcd, 0xf9, 0x3c |
| +}; |
| +static const u8 dec_output011[] __initconst = { |
| + 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b, |
| + 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b, |
| + 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d, |
| + 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee, |
| + 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30, |
| + 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20, |
| + 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f, |
| + 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e, |
| + 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66, |
| + 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46, |
| + 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35, |
| + 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6, |
| + 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0, |
| + 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15, |
| + 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13, |
| + 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7, |
| + 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3, |
| + 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37, |
| + 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc, |
| + 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95, |
| + 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8, |
| + 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac, |
| + 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45, |
| + 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf, |
| + 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d, |
| + 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc, |
| + 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45, |
| + 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a, |
| + 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec, |
| + 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e, |
| + 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10, |
| + 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8, |
| + 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66, |
| + 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0, |
| + 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62, |
| + 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b, |
| + 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4, |
| + 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96, |
| + 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7, |
| + 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74, |
| + 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8, |
| + 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b, |
| + 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70, |
| + 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95, |
| + 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3, |
| + 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9, |
| + 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d, |
| + 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e, |
| + 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32, |
| + 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5, |
| + 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80, |
| + 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3, |
| + 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad, |
| + 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d, |
| + 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20, |
| + 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17, |
| + 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6, |
| + 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d, |
| + 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82, |
| + 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c, |
| + 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9, |
| + 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb, |
| + 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96, |
| + 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9, |
| + 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f, |
| + 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40, |
| + 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc, |
| + 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce, |
| + 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71, |
| + 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f, |
| + 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35, |
| + 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90, |
| + 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8, |
| + 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01, |
| + 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1, |
| + 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe, |
| + 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4, |
| + 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf, |
| + 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9, |
| + 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f, |
| + 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04, |
| + 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7, |
| + 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15, |
| + 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc, |
| + 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0, |
| + 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae, |
| + 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb, |
| + 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed, |
| + 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51, |
| + 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52, |
| + 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84, |
| + 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5, |
| + 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4, |
| + 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e, |
| + 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74, |
| + 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f, |
| + 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13, |
| + 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea, |
| + 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b, |
| + 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef, |
| + 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09, |
| + 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe, |
| + 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1, |
| + 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9, |
| + 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15, |
| + 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a, |
| + 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab, |
| + 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36, |
| + 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd, |
| + 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde, |
| + 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd, |
| + 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47, |
| + 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5, |
| + 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69, |
| + 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21, |
| + 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98, |
| + 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07, |
| + 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57, |
| + 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd, |
| + 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03, |
| + 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11, |
| + 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96, |
| + 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91, |
| + 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d, |
| + 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0, |
| + 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9, |
| + 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42, |
| + 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a, |
| + 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18, |
| + 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc, |
| + 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce, |
| + 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc, |
| + 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0, |
| + 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf, |
| + 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7, |
| + 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80, |
| + 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c, |
| + 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82, |
| + 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9, |
| + 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20, |
| + 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58, |
| + 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6, |
| + 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc, |
| + 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50, |
| + 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86, |
| + 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a, |
| + 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80, |
| + 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec, |
| + 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08, |
| + 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c, |
| + 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde, |
| + 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d, |
| + 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17, |
| + 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f, |
| + 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26, |
| + 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96, |
| + 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97, |
| + 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6, |
| + 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55, |
| + 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e, |
| + 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88, |
| + 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5, |
| + 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b, |
| + 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15, |
| + 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1, |
| + 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4, |
| + 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3, |
| + 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf, |
| + 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e, |
| + 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb, |
| + 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76, |
| + 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5, |
| + 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c, |
| + 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde, |
| + 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f, |
| + 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51, |
| + 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9, |
| + 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99, |
| + 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6, |
| + 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04, |
| + 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31, |
| + 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a, |
| + 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56, |
| + 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e, |
| + 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78, |
| + 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a, |
| + 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7, |
| + 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb, |
| + 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6, |
| + 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8, |
| + 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc, |
| + 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84, |
| + 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86, |
| + 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76, |
| + 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a, |
| + 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73, |
| + 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8, |
| + 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6, |
| + 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2, |
| + 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56, |
| + 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb, |
| + 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab, |
| + 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76, |
| + 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69, |
| + 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d, |
| + 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc, |
| + 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22, |
| + 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39, |
| + 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6, |
| + 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9, |
| + 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f, |
| + 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1, |
| + 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83, |
| + 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc, |
| + 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4, |
| + 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59, |
| + 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68, |
| + 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef, |
| + 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1, |
| + 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3, |
| + 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44, |
| + 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09, |
| + 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8, |
| + 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a, |
| + 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d, |
| + 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae, |
| + 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2, |
| + 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10, |
| + 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a, |
| + 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34, |
| + 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f, |
| + 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9, |
| + 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b, |
| + 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d, |
| + 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57, |
| + 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03, |
| + 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87, |
| + 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca, |
| + 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53, |
| + 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f, |
| + 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61, |
| + 0x10, 0x1e, 0xbf, 0xec, 0xa8 |
| +}; |
| +static const u8 dec_assoc011[] __initconst = { |
| + 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7 |
| +}; |
| +static const u8 dec_nonce011[] __initconst = { |
| + 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa |
| +}; |
| +static const u8 dec_key011[] __initconst = { |
| + 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85, |
| + 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca, |
| + 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52, |
| + 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38 |
| +}; |
| + |
| +static const u8 dec_input012[] __initconst = { |
| + 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, |
| + 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, |
| + 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, |
| + 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, |
| + 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, |
| + 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, |
| + 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, |
| + 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, |
| + 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, |
| + 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, |
| + 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, |
| + 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, |
| + 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, |
| + 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, |
| + 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, |
| + 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, |
| + 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, |
| + 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, |
| + 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, |
| + 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, |
| + 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, |
| + 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, |
| + 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, |
| + 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, |
| + 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, |
| + 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, |
| + 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, |
| + 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, |
| + 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, |
| + 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, |
| + 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, |
| + 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, |
| + 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, |
| + 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, |
| + 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, |
| + 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, |
| + 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, |
| + 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, |
| + 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, |
| + 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, |
| + 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, |
| + 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, |
| + 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, |
| + 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, |
| + 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, |
| + 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, |
| + 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, |
| + 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, |
| + 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, |
| + 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, |
| + 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, |
| + 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, |
| + 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, |
| + 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, |
| + 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, |
| + 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, |
| + 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, |
| + 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, |
| + 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, |
| + 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, |
| + 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, |
| + 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, |
| + 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, |
| + 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, |
| + 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, |
| + 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, |
| + 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, |
| + 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, |
| + 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, |
| + 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, |
| + 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, |
| + 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, |
| + 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, |
| + 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, |
| + 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, |
| + 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, |
| + 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, |
| + 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, |
| + 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, |
| + 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, |
| + 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, |
| + 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, |
| + 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, |
| + 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, |
| + 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, |
| + 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, |
| + 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, |
| + 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, |
| + 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, |
| + 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, |
| + 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, |
| + 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, |
| + 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, |
| + 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, |
| + 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, |
| + 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, |
| + 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, |
| + 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, |
| + 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, |
| + 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, |
| + 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, |
| + 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, |
| + 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, |
| + 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, |
| + 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, |
| + 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, |
| + 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, |
| + 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, |
| + 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, |
| + 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, |
| + 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, |
| + 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, |
| + 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, |
| + 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, |
| + 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, |
| + 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, |
| + 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, |
| + 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, |
| + 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, |
| + 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, |
| + 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, |
| + 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, |
| + 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, |
| + 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, |
| + 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, |
| + 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, |
| + 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, |
| + 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, |
| + 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, |
| + 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, |
| + 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, |
| + 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, |
| + 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, |
| + 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, |
| + 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, |
| + 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, |
| + 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, |
| + 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, |
| + 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, |
| + 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, |
| + 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, |
| + 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, |
| + 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, |
| + 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, |
| + 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, |
| + 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, |
| + 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, |
| + 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, |
| + 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, |
| + 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, |
| + 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, |
| + 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, |
| + 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, |
| + 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, |
| + 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, |
| + 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, |
| + 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, |
| + 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, |
| + 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, |
| + 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, |
| + 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, |
| + 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, |
| + 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, |
| + 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, |
| + 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, |
| + 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, |
| + 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, |
| + 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, |
| + 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, |
| + 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, |
| + 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, |
| + 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, |
| + 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, |
| + 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, |
| + 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, |
| + 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, |
| + 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, |
| + 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, |
| + 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, |
| + 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, |
| + 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, |
| + 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, |
| + 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, |
| + 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, |
| + 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, |
| + 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, |
| + 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, |
| + 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, |
| + 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, |
| + 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, |
| + 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, |
| + 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, |
| + 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, |
| + 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, |
| + 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, |
| + 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, |
| + 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, |
| + 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, |
| + 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, |
| + 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, |
| + 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, |
| + 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, |
| + 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, |
| + 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, |
| + 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, |
| + 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, |
| + 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, |
| + 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, |
| + 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, |
| + 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, |
| + 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, |
| + 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, |
| + 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, |
| + 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, |
| + 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, |
| + 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, |
| + 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, |
| + 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, |
| + 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, |
| + 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, |
| + 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, |
| + 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, |
| + 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, |
| + 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, |
| + 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, |
| + 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, |
| + 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, |
| + 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, |
| + 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, |
| + 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, |
| + 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, |
| + 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, |
| + 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, |
| + 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, |
| + 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, |
| + 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, |
| + 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, |
| + 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, |
| + 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, |
| + 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, |
| + 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, |
| + 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, |
| + 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, |
| + 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, |
| + 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, |
| + 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, |
| + 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, |
| + 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, |
| + 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, |
| + 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, |
| + 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, |
| + 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, |
| + 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, |
| + 0x70, 0xcf, 0xd6 |
| +}; |
| +static const u8 dec_output012[] __initconst = { |
| + 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, |
| + 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, |
| + 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, |
| + 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, |
| + 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, |
| + 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, |
| + 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, |
| + 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, |
| + 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, |
| + 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, |
| + 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, |
| + 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, |
| + 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, |
| + 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, |
| + 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, |
| + 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, |
| + 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, |
| + 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, |
| + 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, |
| + 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, |
| + 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, |
| + 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, |
| + 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, |
| + 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, |
| + 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, |
| + 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, |
| + 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, |
| + 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, |
| + 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, |
| + 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, |
| + 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, |
| + 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, |
| + 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, |
| + 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, |
| + 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, |
| + 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, |
| + 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, |
| + 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, |
| + 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, |
| + 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, |
| + 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, |
| + 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, |
| + 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, |
| + 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, |
| + 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, |
| + 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, |
| + 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, |
| + 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, |
| + 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, |
| + 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, |
| + 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, |
| + 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, |
| + 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, |
| + 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, |
| + 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, |
| + 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, |
| + 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, |
| + 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, |
| + 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, |
| + 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, |
| + 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, |
| + 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, |
| + 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, |
| + 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, |
| + 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, |
| + 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, |
| + 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, |
| + 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, |
| + 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, |
| + 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, |
| + 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, |
| + 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, |
| + 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, |
| + 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, |
| + 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, |
| + 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, |
| + 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, |
| + 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, |
| + 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, |
| + 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, |
| + 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, |
| + 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, |
| + 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, |
| + 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, |
| + 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, |
| + 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, |
| + 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, |
| + 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, |
| + 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, |
| + 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, |
| + 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, |
| + 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, |
| + 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, |
| + 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, |
| + 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, |
| + 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, |
| + 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, |
| + 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, |
| + 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, |
| + 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, |
| + 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, |
| + 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, |
| + 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, |
| + 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, |
| + 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, |
| + 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, |
| + 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, |
| + 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, |
| + 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, |
| + 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, |
| + 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, |
| + 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, |
| + 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, |
| + 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, |
| + 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, |
| + 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, |
| + 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, |
| + 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, |
| + 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, |
| + 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, |
| + 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, |
| + 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, |
| + 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, |
| + 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, |
| + 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, |
| + 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, |
| + 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, |
| + 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, |
| + 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, |
| + 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, |
| + 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, |
| + 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, |
| + 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, |
| + 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, |
| + 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, |
| + 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, |
| + 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, |
| + 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, |
| + 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, |
| + 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, |
| + 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, |
| + 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, |
| + 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, |
| + 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, |
| + 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, |
| + 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, |
| + 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, |
| + 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, |
| + 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, |
| + 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, |
| + 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, |
| + 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, |
| + 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, |
| + 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, |
| + 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, |
| + 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, |
| + 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, |
| + 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, |
| + 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, |
| + 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, |
| + 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, |
| + 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, |
| + 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, |
| + 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, |
| + 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, |
| + 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, |
| + 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, |
| + 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, |
| + 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, |
| + 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, |
| + 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, |
| + 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, |
| + 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, |
| + 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, |
| + 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, |
| + 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, |
| + 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, |
| + 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, |
| + 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, |
| + 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, |
| + 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, |
| + 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, |
| + 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, |
| + 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, |
| + 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, |
| + 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, |
| + 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, |
| + 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, |
| + 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, |
| + 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, |
| + 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, |
| + 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, |
| + 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, |
| + 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, |
| + 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, |
| + 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, |
| + 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, |
| + 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, |
| + 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, |
| + 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, |
| + 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, |
| + 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, |
| + 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, |
| + 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, |
| + 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, |
| + 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, |
| + 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, |
| + 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, |
| + 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, |
| + 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, |
| + 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, |
| + 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, |
| + 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, |
| + 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, |
| + 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, |
| + 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, |
| + 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, |
| + 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, |
| + 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, |
| + 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, |
| + 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, |
| + 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, |
| + 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, |
| + 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, |
| + 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, |
| + 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, |
| + 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, |
| + 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, |
| + 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, |
| + 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, |
| + 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, |
| + 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, |
| + 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, |
| + 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, |
| + 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, |
| + 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, |
| + 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, |
| + 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, |
| + 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, |
| + 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, |
| + 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, |
| + 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, |
| + 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, |
| + 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, |
| + 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, |
| + 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, |
| + 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, |
| + 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, |
| + 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, |
| + 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, |
| + 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, |
| + 0x78, 0xec, 0x00 |
| +}; |
| +static const u8 dec_assoc012[] __initconst = { |
| + 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, |
| + 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, |
| + 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, |
| + 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, |
| + 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, |
| + 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, |
| + 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, |
| + 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 |
| +}; |
| +static const u8 dec_nonce012[] __initconst = { |
| + 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 |
| +}; |
| +static const u8 dec_key012[] __initconst = { |
| + 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, |
| + 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, |
| + 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, |
| + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 |
| +}; |
| + |
| +static const u8 dec_input013[] __initconst = { |
| + 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, |
| + 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, |
| + 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, |
| + 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, |
| + 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, |
| + 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, |
| + 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, |
| + 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, |
| + 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, |
| + 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, |
| + 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, |
| + 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, |
| + 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, |
| + 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, |
| + 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, |
| + 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, |
| + 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, |
| + 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, |
| + 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, |
| + 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, |
| + 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, |
| + 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, |
| + 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, |
| + 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, |
| + 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, |
| + 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, |
| + 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, |
| + 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, |
| + 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, |
| + 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, |
| + 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, |
| + 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, |
| + 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, |
| + 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, |
| + 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, |
| + 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, |
| + 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, |
| + 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, |
| + 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, |
| + 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, |
| + 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, |
| + 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, |
| + 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, |
| + 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, |
| + 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, |
| + 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, |
| + 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, |
| + 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, |
| + 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, |
| + 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, |
| + 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, |
| + 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, |
| + 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, |
| + 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, |
| + 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, |
| + 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, |
| + 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, |
| + 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, |
| + 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, |
| + 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, |
| + 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, |
| + 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, |
| + 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, |
| + 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, |
| + 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, |
| + 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, |
| + 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, |
| + 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, |
| + 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, |
| + 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, |
| + 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, |
| + 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, |
| + 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, |
| + 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, |
| + 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, |
| + 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, |
| + 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, |
| + 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, |
| + 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, |
| + 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, |
| + 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, |
| + 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, |
| + 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, |
| + 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, |
| + 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, |
| + 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, |
| + 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, |
| + 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, |
| + 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, |
| + 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, |
| + 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, |
| + 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, |
| + 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, |
| + 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, |
| + 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, |
| + 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, |
| + 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, |
| + 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, |
| + 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, |
| + 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, |
| + 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, |
| + 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, |
| + 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, |
| + 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, |
| + 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, |
| + 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, |
| + 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, |
| + 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, |
| + 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, |
| + 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, |
| + 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, |
| + 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, |
| + 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, |
| + 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, |
| + 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, |
| + 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, |
| + 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, |
| + 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, |
| + 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, |
| + 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, |
| + 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, |
| + 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, |
| + 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, |
| + 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, |
| + 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, |
| + 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, |
| + 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, |
| + 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, |
| + 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, |
| + 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, |
| + 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, |
| + 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, |
| + 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, |
| + 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, |
| + 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, |
| + 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, |
| + 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, |
| + 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, |
| + 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, |
| + 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, |
| + 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, |
| + 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, |
| + 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, |
| + 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, |
| + 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, |
| + 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, |
| + 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, |
| + 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, |
| + 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, |
| + 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, |
| + 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, |
| + 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, |
| + 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, |
| + 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, |
| + 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, |
| + 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, |
| + 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, |
| + 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, |
| + 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, |
| + 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, |
| + 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, |
| + 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, |
| + 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, |
| + 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, |
| + 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, |
| + 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, |
| + 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, |
| + 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, |
| + 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, |
| + 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, |
| + 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, |
| + 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, |
| + 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, |
| + 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, |
| + 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, |
| + 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, |
| + 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, |
| + 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, |
| + 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, |
| + 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, |
| + 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, |
| + 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, |
| + 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, |
| + 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, |
| + 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, |
| + 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, |
| + 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, |
| + 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, |
| + 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, |
| + 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, |
| + 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, |
| + 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, |
| + 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, |
| + 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, |
| + 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, |
| + 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, |
| + 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, |
| + 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, |
| + 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, |
| + 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, |
| + 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, |
| + 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, |
| + 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, |
| + 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, |
| + 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, |
| + 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, |
| + 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, |
| + 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, |
| + 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, |
| + 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, |
| + 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, |
| + 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, |
| + 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, |
| + 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, |
| + 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, |
| + 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, |
| + 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, |
| + 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, |
| + 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, |
| + 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, |
| + 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, |
| + 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, |
| + 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, |
| + 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, |
| + 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, |
| + 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, |
| + 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, |
| + 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, |
| + 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, |
| + 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, |
| + 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, |
| + 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, |
| + 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, |
| + 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, |
| + 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, |
| + 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, |
| + 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, |
| + 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, |
| + 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, |
| + 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, |
| + 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, |
| + 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, |
| + 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, |
| + 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, |
| + 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, |
| + 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, |
| + 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, |
| + 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, |
| + 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, |
| + 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, |
| + 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, |
| + 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, |
| + 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, |
| + 0x70, 0xcf, 0xd7 |
| +}; |
| +static const u8 dec_output013[] __initconst = { |
| + 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, |
| + 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, |
| + 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, |
| + 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, |
| + 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, |
| + 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, |
| + 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, |
| + 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, |
| + 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, |
| + 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, |
| + 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, |
| + 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, |
| + 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, |
| + 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, |
| + 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, |
| + 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, |
| + 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, |
| + 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, |
| + 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, |
| + 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, |
| + 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, |
| + 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, |
| + 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, |
| + 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, |
| + 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, |
| + 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, |
| + 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, |
| + 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, |
| + 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, |
| + 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, |
| + 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, |
| + 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, |
| + 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, |
| + 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, |
| + 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, |
| + 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, |
| + 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, |
| + 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, |
| + 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, |
| + 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, |
| + 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, |
| + 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, |
| + 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, |
| + 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, |
| + 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, |
| + 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, |
| + 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, |
| + 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, |
| + 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, |
| + 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, |
| + 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, |
| + 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, |
| + 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, |
| + 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, |
| + 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, |
| + 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, |
| + 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, |
| + 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, |
| + 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, |
| + 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, |
| + 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, |
| + 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, |
| + 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, |
| + 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, |
| + 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, |
| + 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, |
| + 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, |
| + 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, |
| + 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, |
| + 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, |
| + 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, |
| + 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, |
| + 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, |
| + 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, |
| + 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, |
| + 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, |
| + 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, |
| + 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, |
| + 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, |
| + 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, |
| + 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, |
| + 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, |
| + 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, |
| + 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, |
| + 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, |
| + 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, |
| + 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, |
| + 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, |
| + 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, |
| + 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, |
| + 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, |
| + 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, |
| + 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, |
| + 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, |
| + 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, |
| + 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, |
| + 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, |
| + 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, |
| + 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, |
| + 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, |
| + 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, |
| + 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, |
| + 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, |
| + 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, |
| + 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, |
| + 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, |
| + 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, |
| + 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, |
| + 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, |
| + 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, |
| + 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, |
| + 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, |
| + 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, |
| + 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, |
| + 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, |
| + 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, |
| + 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, |
| + 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, |
| + 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, |
| + 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, |
| + 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, |
| + 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, |
| + 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, |
| + 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, |
| + 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, |
| + 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, |
| + 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, |
| + 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, |
| + 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, |
| + 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, |
| + 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, |
| + 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, |
| + 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, |
| + 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, |
| + 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, |
| + 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, |
| + 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, |
| + 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, |
| + 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, |
| + 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, |
| + 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, |
| + 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, |
| + 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, |
| + 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, |
| + 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, |
| + 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, |
| + 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, |
| + 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, |
| + 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, |
| + 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, |
| + 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, |
| + 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, |
| + 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, |
| + 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, |
| + 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, |
| + 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, |
| + 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, |
| + 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, |
| + 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, |
| + 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, |
| + 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, |
| + 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, |
| + 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, |
| + 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, |
| + 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, |
| + 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, |
| + 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, |
| + 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, |
| + 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, |
| + 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, |
| + 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, |
| + 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, |
| + 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, |
| + 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, |
| + 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, |
| + 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, |
| + 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, |
| + 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, |
| + 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, |
| + 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, |
| + 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, |
| + 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, |
| + 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, |
| + 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, |
| + 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, |
| + 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, |
| + 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, |
| + 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, |
| + 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, |
| + 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, |
| + 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, |
| + 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, |
| + 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, |
| + 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, |
| + 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, |
| + 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, |
| + 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, |
| + 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, |
| + 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, |
| + 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, |
| + 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, |
| + 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, |
| + 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, |
| + 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, |
| + 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, |
| + 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, |
| + 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, |
| + 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, |
| + 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, |
| + 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, |
| + 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, |
| + 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, |
| + 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, |
| + 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, |
| + 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, |
| + 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, |
| + 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, |
| + 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, |
| + 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, |
| + 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, |
| + 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, |
| + 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, |
| + 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, |
| + 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, |
| + 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, |
| + 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, |
| + 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, |
| + 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, |
| + 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, |
| + 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, |
| + 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, |
| + 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, |
| + 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, |
| + 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, |
| + 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, |
| + 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, |
| + 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, |
| + 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, |
| + 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, |
| + 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, |
| + 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, |
| + 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, |
| + 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, |
| + 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, |
| + 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, |
| + 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, |
| + 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, |
| + 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, |
| + 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, |
| + 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, |
| + 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, |
| + 0x78, 0xec, 0x00 |
| +}; |
| +static const u8 dec_assoc013[] __initconst = { |
| + 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, |
| + 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, |
| + 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, |
| + 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, |
| + 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, |
| + 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, |
| + 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, |
| + 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 |
| +}; |
| +static const u8 dec_nonce013[] __initconst = { |
| + 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 |
| +}; |
| +static const u8 dec_key013[] __initconst = { |
| + 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, |
| + 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, |
| + 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, |
| + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 |
| +}; |
| + |
| +static const struct chacha20poly1305_testvec |
| +chacha20poly1305_dec_vectors[] __initconst = { |
| + { dec_input001, dec_output001, dec_assoc001, dec_nonce001, dec_key001, |
| + sizeof(dec_input001), sizeof(dec_assoc001), sizeof(dec_nonce001) }, |
| + { dec_input002, dec_output002, dec_assoc002, dec_nonce002, dec_key002, |
| + sizeof(dec_input002), sizeof(dec_assoc002), sizeof(dec_nonce002) }, |
| + { dec_input003, dec_output003, dec_assoc003, dec_nonce003, dec_key003, |
| + sizeof(dec_input003), sizeof(dec_assoc003), sizeof(dec_nonce003) }, |
| + { dec_input004, dec_output004, dec_assoc004, dec_nonce004, dec_key004, |
| + sizeof(dec_input004), sizeof(dec_assoc004), sizeof(dec_nonce004) }, |
| + { dec_input005, dec_output005, dec_assoc005, dec_nonce005, dec_key005, |
| + sizeof(dec_input005), sizeof(dec_assoc005), sizeof(dec_nonce005) }, |
| + { dec_input006, dec_output006, dec_assoc006, dec_nonce006, dec_key006, |
| + sizeof(dec_input006), sizeof(dec_assoc006), sizeof(dec_nonce006) }, |
| + { dec_input007, dec_output007, dec_assoc007, dec_nonce007, dec_key007, |
| + sizeof(dec_input007), sizeof(dec_assoc007), sizeof(dec_nonce007) }, |
| + { dec_input008, dec_output008, dec_assoc008, dec_nonce008, dec_key008, |
| + sizeof(dec_input008), sizeof(dec_assoc008), sizeof(dec_nonce008) }, |
| + { dec_input009, dec_output009, dec_assoc009, dec_nonce009, dec_key009, |
| + sizeof(dec_input009), sizeof(dec_assoc009), sizeof(dec_nonce009) }, |
| + { dec_input010, dec_output010, dec_assoc010, dec_nonce010, dec_key010, |
| + sizeof(dec_input010), sizeof(dec_assoc010), sizeof(dec_nonce010) }, |
| + { dec_input011, dec_output011, dec_assoc011, dec_nonce011, dec_key011, |
| + sizeof(dec_input011), sizeof(dec_assoc011), sizeof(dec_nonce011) }, |
| + { dec_input012, dec_output012, dec_assoc012, dec_nonce012, dec_key012, |
| + sizeof(dec_input012), sizeof(dec_assoc012), sizeof(dec_nonce012) }, |
| + { dec_input013, dec_output013, dec_assoc013, dec_nonce013, dec_key013, |
| + sizeof(dec_input013), sizeof(dec_assoc013), sizeof(dec_nonce013), |
| + true } |
| +}; |
| + |
| +static const u8 xenc_input001[] __initconst = { |
| + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, |
| + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, |
| + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, |
| + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, |
| + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, |
| + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, |
| + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, |
| + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, |
| + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, |
| + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, |
| + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, |
| + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, |
| + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, |
| + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, |
| + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, |
| + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, |
| + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, |
| + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, |
| + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, |
| + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, |
| + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, |
| + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, |
| + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, |
| + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, |
| + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, |
| + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, |
| + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, |
| + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, |
| + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, |
| + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, |
| + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, |
| + 0x9d |
| +}; |
| +static const u8 xenc_output001[] __initconst = { |
| + 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77, |
| + 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92, |
| + 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18, |
| + 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d, |
| + 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e, |
| + 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86, |
| + 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2, |
| + 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85, |
| + 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09, |
| + 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49, |
| + 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd, |
| + 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8, |
| + 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f, |
| + 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79, |
| + 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8, |
| + 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0, |
| + 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88, |
| + 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71, |
| + 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91, |
| + 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf, |
| + 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89, |
| + 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46, |
| + 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e, |
| + 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90, |
| + 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b, |
| + 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58, |
| + 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54, |
| + 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1, |
| + 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73, |
| + 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69, |
| + 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05, |
| + 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83, |
| + 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13, |
| + 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8, |
| + 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5, |
| + 0x9c |
| +}; |
| +static const u8 xenc_assoc001[] __initconst = { |
| + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x4e, 0x91 |
| +}; |
| +static const u8 xenc_nonce001[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 |
| +}; |
| +static const u8 xenc_key001[] __initconst = { |
| + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, |
| + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, |
| + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, |
| + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 |
| +}; |
| + |
| +static const struct chacha20poly1305_testvec |
| +xchacha20poly1305_enc_vectors[] __initconst = { |
| + { xenc_input001, xenc_output001, xenc_assoc001, xenc_nonce001, xenc_key001, |
| + sizeof(xenc_input001), sizeof(xenc_assoc001), sizeof(xenc_nonce001) } |
| +}; |
| + |
| +static const u8 xdec_input001[] __initconst = { |
| + 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77, |
| + 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92, |
| + 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18, |
| + 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d, |
| + 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e, |
| + 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86, |
| + 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2, |
| + 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85, |
| + 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09, |
| + 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49, |
| + 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd, |
| + 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8, |
| + 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f, |
| + 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79, |
| + 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8, |
| + 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0, |
| + 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88, |
| + 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71, |
| + 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91, |
| + 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf, |
| + 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89, |
| + 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46, |
| + 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e, |
| + 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90, |
| + 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b, |
| + 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58, |
| + 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54, |
| + 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1, |
| + 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73, |
| + 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69, |
| + 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05, |
| + 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83, |
| + 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13, |
| + 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8, |
| + 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5, |
| + 0x9c |
| +}; |
| +static const u8 xdec_output001[] __initconst = { |
| + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, |
| + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, |
| + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, |
| + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, |
| + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, |
| + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, |
| + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, |
| + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, |
| + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, |
| + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, |
| + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, |
| + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, |
| + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, |
| + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, |
| + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, |
| + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, |
| + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, |
| + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, |
| + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, |
| + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, |
| + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, |
| + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, |
| + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, |
| + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, |
| + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, |
| + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, |
| + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, |
| + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, |
| + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, |
| + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, |
| + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, |
| + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, |
| + 0x9d |
| +}; |
| +static const u8 xdec_assoc001[] __initconst = { |
| + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x4e, 0x91 |
| +}; |
| +static const u8 xdec_nonce001[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 |
| +}; |
| +static const u8 xdec_key001[] __initconst = { |
| + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, |
| + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, |
| + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, |
| + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 |
| +}; |
| + |
| +static const struct chacha20poly1305_testvec |
| +xchacha20poly1305_dec_vectors[] __initconst = { |
| + { xdec_input001, xdec_output001, xdec_assoc001, xdec_nonce001, xdec_key001, |
| + sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) } |
| +}; |
| + |
| +static void __init |
| +chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u8 *nonce, const size_t nonce_len, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + if (nonce_len == 8) |
| + chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, |
| + get_unaligned_le64(nonce), key); |
| + else |
| + BUG(); |
| +} |
| + |
| +static bool __init |
| +decryption_success(bool func_ret, bool expect_failure, int memcmp_result) |
| +{ |
| + if (expect_failure) |
| + return !func_ret; |
| + return func_ret && !memcmp_result; |
| +} |
| + |
| +bool __init chacha20poly1305_selftest(void) |
| +{ |
| + enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; |
| + size_t i; |
| + u8 *computed_output = NULL, *heap_src = NULL; |
| + bool success = true, ret; |
| + |
| + heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); |
| + computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); |
| + if (!heap_src || !computed_output) { |
| + pr_err("chacha20poly1305 self-test malloc: FAIL\n"); |
| + success = false; |
| + goto out; |
| + } |
| + |
| + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { |
| + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| + chacha20poly1305_selftest_encrypt(computed_output, |
| + chacha20poly1305_enc_vectors[i].input, |
| + chacha20poly1305_enc_vectors[i].ilen, |
| + chacha20poly1305_enc_vectors[i].assoc, |
| + chacha20poly1305_enc_vectors[i].alen, |
| + chacha20poly1305_enc_vectors[i].nonce, |
| + chacha20poly1305_enc_vectors[i].nlen, |
| + chacha20poly1305_enc_vectors[i].key); |
| + if (memcmp(computed_output, |
| + chacha20poly1305_enc_vectors[i].output, |
| + chacha20poly1305_enc_vectors[i].ilen + |
| + POLY1305_DIGEST_SIZE)) { |
| + pr_err("chacha20poly1305 encryption self-test %zu: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| + |
| + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { |
| + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| + ret = chacha20poly1305_decrypt(computed_output, |
| + chacha20poly1305_dec_vectors[i].input, |
| + chacha20poly1305_dec_vectors[i].ilen, |
| + chacha20poly1305_dec_vectors[i].assoc, |
| + chacha20poly1305_dec_vectors[i].alen, |
| + get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce), |
| + chacha20poly1305_dec_vectors[i].key); |
| + if (!decryption_success(ret, |
| + chacha20poly1305_dec_vectors[i].failure, |
| + memcmp(computed_output, |
| + chacha20poly1305_dec_vectors[i].output, |
| + chacha20poly1305_dec_vectors[i].ilen - |
| + POLY1305_DIGEST_SIZE))) { |
| + pr_err("chacha20poly1305 decryption self-test %zu: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| + |
| + |
| + for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { |
| + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| + xchacha20poly1305_encrypt(computed_output, |
| + xchacha20poly1305_enc_vectors[i].input, |
| + xchacha20poly1305_enc_vectors[i].ilen, |
| + xchacha20poly1305_enc_vectors[i].assoc, |
| + xchacha20poly1305_enc_vectors[i].alen, |
| + xchacha20poly1305_enc_vectors[i].nonce, |
| + xchacha20poly1305_enc_vectors[i].key); |
| + if (memcmp(computed_output, |
| + xchacha20poly1305_enc_vectors[i].output, |
| + xchacha20poly1305_enc_vectors[i].ilen + |
| + POLY1305_DIGEST_SIZE)) { |
| + pr_err("xchacha20poly1305 encryption self-test %zu: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| + for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) { |
| + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| + ret = xchacha20poly1305_decrypt(computed_output, |
| + xchacha20poly1305_dec_vectors[i].input, |
| + xchacha20poly1305_dec_vectors[i].ilen, |
| + xchacha20poly1305_dec_vectors[i].assoc, |
| + xchacha20poly1305_dec_vectors[i].alen, |
| + xchacha20poly1305_dec_vectors[i].nonce, |
| + xchacha20poly1305_dec_vectors[i].key); |
| + if (!decryption_success(ret, |
| + xchacha20poly1305_dec_vectors[i].failure, |
| + memcmp(computed_output, |
| + xchacha20poly1305_dec_vectors[i].output, |
| + xchacha20poly1305_dec_vectors[i].ilen - |
| + POLY1305_DIGEST_SIZE))) { |
| + pr_err("xchacha20poly1305 decryption self-test %zu: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| + |
| +out: |
| + kfree(heap_src); |
| + kfree(computed_output); |
| + return success; |
| +} |
| diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c |
| new file mode 100644 |
| index 000000000000..c12ddbe9eb92 |
| |
| |
| @@ -0,0 +1,219 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is an implementation of the ChaCha20Poly1305 AEAD construction. |
| + * |
| + * Information: https://tools.ietf.org/html/rfc8439 |
| + */ |
| + |
| +#include <crypto/algapi.h> |
| +#include <crypto/chacha20poly1305.h> |
| +#include <crypto/chacha.h> |
| +#include <crypto/poly1305.h> |
| + |
| +#include <asm/unaligned.h> |
| +#include <linux/kernel.h> |
| +#include <linux/init.h> |
| +#include <linux/mm.h> |
| +#include <linux/module.h> |
| + |
| +#define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32)) |
| + |
| +bool __init chacha20poly1305_selftest(void); |
| + |
| +static void chacha_load_key(u32 *k, const u8 *in) |
| +{ |
| + k[0] = get_unaligned_le32(in); |
| + k[1] = get_unaligned_le32(in + 4); |
| + k[2] = get_unaligned_le32(in + 8); |
| + k[3] = get_unaligned_le32(in + 12); |
| + k[4] = get_unaligned_le32(in + 16); |
| + k[5] = get_unaligned_le32(in + 20); |
| + k[6] = get_unaligned_le32(in + 24); |
| + k[7] = get_unaligned_le32(in + 28); |
| +} |
| + |
| +static void xchacha_init(u32 *chacha_state, const u8 *key, const u8 *nonce) |
| +{ |
| + u32 k[CHACHA_KEY_WORDS]; |
| + u8 iv[CHACHA_IV_SIZE]; |
| + |
| + memset(iv, 0, 8); |
| + memcpy(iv + 8, nonce + 16, 8); |
| + |
| + chacha_load_key(k, key); |
| + |
| + /* Compute the subkey given the original key and first 128 nonce bits */ |
| + chacha_init(chacha_state, k, nonce); |
| + hchacha_block(chacha_state, k, 20); |
| + |
| + chacha_init(chacha_state, k, iv); |
| + |
| + memzero_explicit(k, sizeof(k)); |
| + memzero_explicit(iv, sizeof(iv)); |
| +} |
| + |
| +static void |
| +__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, u32 *chacha_state) |
| +{ |
| + const u8 *pad0 = page_address(ZERO_PAGE(0)); |
| + struct poly1305_desc_ctx poly1305_state; |
| + union { |
| + u8 block0[POLY1305_KEY_SIZE]; |
| + __le64 lens[2]; |
| + } b; |
| + |
| + chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); |
| + poly1305_init(&poly1305_state, b.block0); |
| + |
| + poly1305_update(&poly1305_state, ad, ad_len); |
| + if (ad_len & 0xf) |
| + poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); |
| + |
| + chacha_crypt(chacha_state, dst, src, src_len, 20); |
| + |
| + poly1305_update(&poly1305_state, dst, src_len); |
| + if (src_len & 0xf) |
| + poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf)); |
| + |
| + b.lens[0] = cpu_to_le64(ad_len); |
| + b.lens[1] = cpu_to_le64(src_len); |
| + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); |
| + |
| + poly1305_final(&poly1305_state, dst + src_len); |
| + |
| + memzero_explicit(chacha_state, CHACHA_STATE_WORDS * sizeof(u32)); |
| + memzero_explicit(&b, sizeof(b)); |
| +} |
| + |
| +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + u32 chacha_state[CHACHA_STATE_WORDS]; |
| + u32 k[CHACHA_KEY_WORDS]; |
| + __le64 iv[2]; |
| + |
| + chacha_load_key(k, key); |
| + |
| + iv[0] = 0; |
| + iv[1] = cpu_to_le64(nonce); |
| + |
| + chacha_init(chacha_state, k, (u8 *)iv); |
| + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state); |
| + |
| + memzero_explicit(iv, sizeof(iv)); |
| + memzero_explicit(k, sizeof(k)); |
| +} |
| +EXPORT_SYMBOL(chacha20poly1305_encrypt); |
| + |
| +void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + u32 chacha_state[CHACHA_STATE_WORDS]; |
| + |
| + xchacha_init(chacha_state, key, nonce); |
| + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state); |
| +} |
| +EXPORT_SYMBOL(xchacha20poly1305_encrypt); |
| + |
| +static bool |
| +__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, u32 *chacha_state) |
| +{ |
| + const u8 *pad0 = page_address(ZERO_PAGE(0)); |
| + struct poly1305_desc_ctx poly1305_state; |
| + size_t dst_len; |
| + int ret; |
| + union { |
| + u8 block0[POLY1305_KEY_SIZE]; |
| + u8 mac[POLY1305_DIGEST_SIZE]; |
| + __le64 lens[2]; |
| + } b; |
| + |
| + if (unlikely(src_len < POLY1305_DIGEST_SIZE)) |
| + return false; |
| + |
| + chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); |
| + poly1305_init(&poly1305_state, b.block0); |
| + |
| + poly1305_update(&poly1305_state, ad, ad_len); |
| + if (ad_len & 0xf) |
| + poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); |
| + |
| + dst_len = src_len - POLY1305_DIGEST_SIZE; |
| + poly1305_update(&poly1305_state, src, dst_len); |
| + if (dst_len & 0xf) |
| + poly1305_update(&poly1305_state, pad0, 0x10 - (dst_len & 0xf)); |
| + |
| + b.lens[0] = cpu_to_le64(ad_len); |
| + b.lens[1] = cpu_to_le64(dst_len); |
| + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); |
| + |
| + poly1305_final(&poly1305_state, b.mac); |
| + |
| + ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE); |
| + if (likely(!ret)) |
| + chacha_crypt(chacha_state, dst, src, dst_len, 20); |
| + |
| + memzero_explicit(&b, sizeof(b)); |
| + |
| + return !ret; |
| +} |
| + |
| +bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + u32 chacha_state[CHACHA_STATE_WORDS]; |
| + u32 k[CHACHA_KEY_WORDS]; |
| + __le64 iv[2]; |
| + bool ret; |
| + |
| + chacha_load_key(k, key); |
| + |
| + iv[0] = 0; |
| + iv[1] = cpu_to_le64(nonce); |
| + |
| + chacha_init(chacha_state, k, (u8 *)iv); |
| + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, |
| + chacha_state); |
| + |
| + memzero_explicit(chacha_state, sizeof(chacha_state)); |
| + memzero_explicit(iv, sizeof(iv)); |
| + memzero_explicit(k, sizeof(k)); |
| + return ret; |
| +} |
| +EXPORT_SYMBOL(chacha20poly1305_decrypt); |
| + |
| +bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + u32 chacha_state[CHACHA_STATE_WORDS]; |
| + |
| + xchacha_init(chacha_state, key, nonce); |
| + return __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, |
| + chacha_state); |
| +} |
| +EXPORT_SYMBOL(xchacha20poly1305_decrypt); |
| + |
| +static int __init mod_init(void) |
| +{ |
| + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && |
| + WARN_ON(!chacha20poly1305_selftest())) |
| + return -ENODEV; |
| + return 0; |
| +} |
| + |
| +module_init(mod_init); |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| -- |
| 2.18.2 |
| |
| |
| From 265dc97191fdbe6f8bc4ea513b07ff27cc1885ad Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 8 Nov 2019 13:22:40 +0100 |
| Subject: [PATCH 033/100] crypto: lib/chacha20poly1305 - reimplement |
| crypt_from_sg() routine |
| |
| commit d95312a3ccc0cd544d374be2fc45aeaa803e5fd9 upstream. |
| |
| Reimplement the library routines to perform chacha20poly1305 en/decryption |
| on scatterlists, without [ab]using the [deprecated] blkcipher interface, |
| which is rather heavyweight and does things we don't really need. |
| |
| Instead, we use the sg_miter API in a novel and clever way, to iterate |
| over the scatterlist in-place (i.e., source == destination, which is the |
| only way this library is expected to be used). That way, we don't have to |
| iterate over two scatterlists in parallel. |
| |
| Another optimization is that, instead of relying on the blkcipher walker |
| to present the input in suitable chunks, we recognize that ChaCha is a |
| streamcipher, and so we can simply deal with partial blocks by keeping a |
| block of cipherstream on the stack and use crypto_xor() to mix it with |
| the in/output. |
| |
| Finally, we omit the scatterwalk_and_copy() call if the last element of |
| the scatterlist covers the MAC as well (which is the common case), |
| avoiding the need to walk the scatterlist and kmap() the page twice. |
| |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/crypto/chacha20poly1305.h | 11 ++ |
| lib/crypto/chacha20poly1305-selftest.c | 45 ++++++++ |
| lib/crypto/chacha20poly1305.c | 150 +++++++++++++++++++++++++ |
| 3 files changed, 206 insertions(+) |
| |
| diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h |
| index ad3b1de58df8..234ee28078ef 100644 |
| |
| |
| @@ -7,6 +7,7 @@ |
| #define __CHACHA20POLY1305_H |
| |
| #include <linux/types.h> |
| +#include <linux/scatterlist.h> |
| |
| enum chacha20poly1305_lengths { |
| XCHACHA20POLY1305_NONCE_SIZE = 24, |
| @@ -34,4 +35,14 @@ bool __must_check xchacha20poly1305_decrypt( |
| const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], |
| const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| |
| +bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| + |
| +bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]); |
| + |
| #endif /* __CHACHA20POLY1305_H */ |
| diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c |
| index d1ed0f27cfdb..465de46dbdef 100644 |
| |
| |
| @@ -7250,6 +7250,7 @@ bool __init chacha20poly1305_selftest(void) |
| enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; |
| size_t i; |
| u8 *computed_output = NULL, *heap_src = NULL; |
| + struct scatterlist sg_src; |
| bool success = true, ret; |
| |
| heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); |
| @@ -7280,6 +7281,29 @@ bool __init chacha20poly1305_selftest(void) |
| } |
| } |
| |
| + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { |
| + if (chacha20poly1305_enc_vectors[i].nlen != 8) |
| + continue; |
| + memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, |
| + chacha20poly1305_enc_vectors[i].ilen); |
| + sg_init_one(&sg_src, heap_src, |
| + chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE); |
| + chacha20poly1305_encrypt_sg_inplace(&sg_src, |
| + chacha20poly1305_enc_vectors[i].ilen, |
| + chacha20poly1305_enc_vectors[i].assoc, |
| + chacha20poly1305_enc_vectors[i].alen, |
| + get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce), |
| + chacha20poly1305_enc_vectors[i].key); |
| + if (memcmp(heap_src, |
| + chacha20poly1305_enc_vectors[i].output, |
| + chacha20poly1305_enc_vectors[i].ilen + |
| + POLY1305_DIGEST_SIZE)) { |
| + pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| + |
| for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { |
| memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| ret = chacha20poly1305_decrypt(computed_output, |
| @@ -7301,6 +7325,27 @@ bool __init chacha20poly1305_selftest(void) |
| } |
| } |
| |
| + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { |
| + memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, |
| + chacha20poly1305_dec_vectors[i].ilen); |
| + sg_init_one(&sg_src, heap_src, |
| + chacha20poly1305_dec_vectors[i].ilen); |
| + ret = chacha20poly1305_decrypt_sg_inplace(&sg_src, |
| + chacha20poly1305_dec_vectors[i].ilen, |
| + chacha20poly1305_dec_vectors[i].assoc, |
| + chacha20poly1305_dec_vectors[i].alen, |
| + get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce), |
| + chacha20poly1305_dec_vectors[i].key); |
| + if (!decryption_success(ret, |
| + chacha20poly1305_dec_vectors[i].failure, |
| + memcmp(heap_src, chacha20poly1305_dec_vectors[i].output, |
| + chacha20poly1305_dec_vectors[i].ilen - |
| + POLY1305_DIGEST_SIZE))) { |
| + pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n", |
| + i + 1); |
| + success = false; |
| + } |
| + } |
| |
| for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { |
| memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c |
| index c12ddbe9eb92..821e5cc9b14e 100644 |
| |
| |
| @@ -11,6 +11,7 @@ |
| #include <crypto/chacha20poly1305.h> |
| #include <crypto/chacha.h> |
| #include <crypto/poly1305.h> |
| +#include <crypto/scatterwalk.h> |
| |
| #include <asm/unaligned.h> |
| #include <linux/kernel.h> |
| @@ -205,6 +206,155 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| } |
| EXPORT_SYMBOL(xchacha20poly1305_decrypt); |
| |
| +static |
| +bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, |
| + const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE], |
| + int encrypt) |
| +{ |
| + const u8 *pad0 = page_address(ZERO_PAGE(0)); |
| + struct poly1305_desc_ctx poly1305_state; |
| + u32 chacha_state[CHACHA_STATE_WORDS]; |
| + struct sg_mapping_iter miter; |
| + size_t partial = 0; |
| + unsigned int flags; |
| + bool ret = true; |
| + int sl; |
| + union { |
| + struct { |
| + u32 k[CHACHA_KEY_WORDS]; |
| + __le64 iv[2]; |
| + }; |
| + u8 block0[POLY1305_KEY_SIZE]; |
| + u8 chacha_stream[CHACHA_BLOCK_SIZE]; |
| + struct { |
| + u8 mac[2][POLY1305_DIGEST_SIZE]; |
| + }; |
| + __le64 lens[2]; |
| + } b __aligned(16); |
| + |
| + chacha_load_key(b.k, key); |
| + |
| + b.iv[0] = 0; |
| + b.iv[1] = cpu_to_le64(nonce); |
| + |
| + chacha_init(chacha_state, b.k, (u8 *)b.iv); |
| + chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); |
| + poly1305_init(&poly1305_state, b.block0); |
| + |
| + if (unlikely(ad_len)) { |
| + poly1305_update(&poly1305_state, ad, ad_len); |
| + if (ad_len & 0xf) |
| + poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); |
| + } |
| + |
| + flags = SG_MITER_TO_SG; |
| + if (!preemptible()) |
| + flags |= SG_MITER_ATOMIC; |
| + |
| + sg_miter_start(&miter, src, sg_nents(src), flags); |
| + |
| + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { |
| + u8 *addr = miter.addr; |
| + size_t length = min_t(size_t, sl, miter.length); |
| + |
| + if (!encrypt) |
| + poly1305_update(&poly1305_state, addr, length); |
| + |
| + if (unlikely(partial)) { |
| + size_t l = min(length, CHACHA_BLOCK_SIZE - partial); |
| + |
| + crypto_xor(addr, b.chacha_stream + partial, l); |
| + partial = (partial + l) & (CHACHA_BLOCK_SIZE - 1); |
| + |
| + addr += l; |
| + length -= l; |
| + } |
| + |
| + if (likely(length >= CHACHA_BLOCK_SIZE || length == sl)) { |
| + size_t l = length; |
| + |
| + if (unlikely(length < sl)) |
| + l &= ~(CHACHA_BLOCK_SIZE - 1); |
| + chacha_crypt(chacha_state, addr, addr, l, 20); |
| + addr += l; |
| + length -= l; |
| + } |
| + |
| + if (unlikely(length > 0)) { |
| + chacha_crypt(chacha_state, b.chacha_stream, pad0, |
| + CHACHA_BLOCK_SIZE, 20); |
| + crypto_xor(addr, b.chacha_stream, length); |
| + partial = length; |
| + } |
| + |
| + if (encrypt) |
| + poly1305_update(&poly1305_state, miter.addr, |
| + min_t(size_t, sl, miter.length)); |
| + } |
| + |
| + if (src_len & 0xf) |
| + poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf)); |
| + |
| + b.lens[0] = cpu_to_le64(ad_len); |
| + b.lens[1] = cpu_to_le64(src_len); |
| + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); |
| + |
| + if (likely(sl <= -POLY1305_DIGEST_SIZE)) { |
| + if (encrypt) { |
| + poly1305_final(&poly1305_state, |
| + miter.addr + miter.length + sl); |
| + ret = true; |
| + } else { |
| + poly1305_final(&poly1305_state, b.mac[0]); |
| + ret = !crypto_memneq(b.mac[0], |
| + miter.addr + miter.length + sl, |
| + POLY1305_DIGEST_SIZE); |
| + } |
| + } |
| + |
| + sg_miter_stop(&miter); |
| + |
| + if (unlikely(sl > -POLY1305_DIGEST_SIZE)) { |
| + poly1305_final(&poly1305_state, b.mac[1]); |
| + scatterwalk_map_and_copy(b.mac[encrypt], src, src_len, |
| + sizeof(b.mac[1]), encrypt); |
| + ret = encrypt || |
| + !crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE); |
| + } |
| + |
| + memzero_explicit(chacha_state, sizeof(chacha_state)); |
| + memzero_explicit(&b, sizeof(b)); |
| + |
| + return ret; |
| +} |
| + |
| +bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + return chacha20poly1305_crypt_sg_inplace(src, src_len, ad, ad_len, |
| + nonce, key, 1); |
| +} |
| +EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace); |
| + |
| +bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u64 nonce, |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + if (unlikely(src_len < POLY1305_DIGEST_SIZE)) |
| + return false; |
| + |
| + return chacha20poly1305_crypt_sg_inplace(src, |
| + src_len - POLY1305_DIGEST_SIZE, |
| + ad, ad_len, nonce, key, 0); |
| +} |
| +EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace); |
| + |
| static int __init mod_init(void) |
| { |
| if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && |
| -- |
| 2.18.2 |
| |
| |
| From d43d24b8f78fd3653105b3c7fccaf48406459faf Mon Sep 17 00:00:00 2001 |
| From: Eric Biggers <ebiggers@google.com> |
| Date: Sun, 17 Nov 2019 23:21:29 -0800 |
| Subject: [PATCH 034/100] crypto: chacha_generic - remove unnecessary setkey() |
| functions |
| |
| commit 2043323a799a660bc84bbee404cf7a2617ec6157 upstream. |
| |
| Use chacha20_setkey() and chacha12_setkey() from |
| <crypto/internal/chacha.h> instead of defining them again in |
| chacha_generic.c. |
| |
| Signed-off-by: Eric Biggers <ebiggers@google.com> |
| Acked-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/chacha_generic.c | 18 +++--------------- |
| 1 file changed, 3 insertions(+), 15 deletions(-) |
| |
| diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c |
| index c1b147318393..8beea79ab117 100644 |
| |
| |
| @@ -37,18 +37,6 @@ static int chacha_stream_xor(struct skcipher_request *req, |
| return err; |
| } |
| |
| -static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize) |
| -{ |
| - return chacha_setkey(tfm, key, keysize, 20); |
| -} |
| - |
| -static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| - unsigned int keysize) |
| -{ |
| - return chacha_setkey(tfm, key, keysize, 12); |
| -} |
| - |
| static int crypto_chacha_crypt(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| @@ -91,7 +79,7 @@ static struct skcipher_alg algs[] = { |
| .max_keysize = CHACHA_KEY_SIZE, |
| .ivsize = CHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| + .setkey = chacha20_setkey, |
| .encrypt = crypto_chacha_crypt, |
| .decrypt = crypto_chacha_crypt, |
| }, { |
| @@ -106,7 +94,7 @@ static struct skcipher_alg algs[] = { |
| .max_keysize = CHACHA_KEY_SIZE, |
| .ivsize = XCHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha20_setkey, |
| + .setkey = chacha20_setkey, |
| .encrypt = crypto_xchacha_crypt, |
| .decrypt = crypto_xchacha_crypt, |
| }, { |
| @@ -121,7 +109,7 @@ static struct skcipher_alg algs[] = { |
| .max_keysize = CHACHA_KEY_SIZE, |
| .ivsize = XCHACHA_IV_SIZE, |
| .chunksize = CHACHA_BLOCK_SIZE, |
| - .setkey = crypto_chacha12_setkey, |
| + .setkey = chacha12_setkey, |
| .encrypt = crypto_xchacha_crypt, |
| .decrypt = crypto_xchacha_crypt, |
| } |
| -- |
| 2.18.2 |
| |
| |
| From a34dfabc1996f2c4cbcd1357e42af15c7cb212ea Mon Sep 17 00:00:00 2001 |
| From: Eric Biggers <ebiggers@google.com> |
| Date: Sun, 17 Nov 2019 23:21:58 -0800 |
| Subject: [PATCH 035/100] crypto: x86/chacha - only unregister algorithms if |
| registered |
| |
| commit b62755aed3a3f5ca9edd2718339ccea3b6bbbe57 upstream. |
| |
| It's not valid to call crypto_unregister_skciphers() without a prior |
| call to crypto_register_skciphers(). |
| |
| Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function") |
| Signed-off-by: Eric Biggers <ebiggers@google.com> |
| Acked-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/chacha_glue.c | 3 ++- |
| 1 file changed, 2 insertions(+), 1 deletion(-) |
| |
| diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c |
| index b391e13a9e41..a94e30b6f941 100644 |
| |
| |
| @@ -304,7 +304,8 @@ static int __init chacha_simd_mod_init(void) |
| |
| static void __exit chacha_simd_mod_fini(void) |
| { |
| - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| + if (boot_cpu_has(X86_FEATURE_SSSE3)) |
| + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| module_init(chacha_simd_mod_init); |
| -- |
| 2.18.2 |
| |
| |
| From cca80e2b0611bf97f9bfad1148c6cb301dde2aec Mon Sep 17 00:00:00 2001 |
| From: Eric Biggers <ebiggers@google.com> |
| Date: Sun, 17 Nov 2019 23:22:16 -0800 |
| Subject: [PATCH 036/100] crypto: lib/chacha20poly1305 - use chacha20_crypt() |
| |
| commit 413808b71e6204b0cc1eeaa77960f7c3cd381d33 upstream. |
| |
| Use chacha20_crypt() instead of chacha_crypt(), since it's not really |
| appropriate for users of the ChaCha library API to be passing the number |
| of rounds as an argument. |
| |
| Signed-off-by: Eric Biggers <ebiggers@google.com> |
| Acked-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| lib/crypto/chacha20poly1305.c | 16 ++++++++-------- |
| 1 file changed, 8 insertions(+), 8 deletions(-) |
| |
| diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c |
| index 821e5cc9b14e..6d83cafebc69 100644 |
| |
| |
| @@ -66,14 +66,14 @@ __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| __le64 lens[2]; |
| } b; |
| |
| - chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); |
| + chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0)); |
| poly1305_init(&poly1305_state, b.block0); |
| |
| poly1305_update(&poly1305_state, ad, ad_len); |
| if (ad_len & 0xf) |
| poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); |
| |
| - chacha_crypt(chacha_state, dst, src, src_len, 20); |
| + chacha20_crypt(chacha_state, dst, src, src_len); |
| |
| poly1305_update(&poly1305_state, dst, src_len); |
| if (src_len & 0xf) |
| @@ -140,7 +140,7 @@ __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| if (unlikely(src_len < POLY1305_DIGEST_SIZE)) |
| return false; |
| |
| - chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); |
| + chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0)); |
| poly1305_init(&poly1305_state, b.block0); |
| |
| poly1305_update(&poly1305_state, ad, ad_len); |
| @@ -160,7 +160,7 @@ __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, |
| |
| ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE); |
| if (likely(!ret)) |
| - chacha_crypt(chacha_state, dst, src, dst_len, 20); |
| + chacha20_crypt(chacha_state, dst, src, dst_len); |
| |
| memzero_explicit(&b, sizeof(b)); |
| |
| @@ -241,7 +241,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, |
| b.iv[1] = cpu_to_le64(nonce); |
| |
| chacha_init(chacha_state, b.k, (u8 *)b.iv); |
| - chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); |
| + chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0)); |
| poly1305_init(&poly1305_state, b.block0); |
| |
| if (unlikely(ad_len)) { |
| @@ -278,14 +278,14 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, |
| |
| if (unlikely(length < sl)) |
| l &= ~(CHACHA_BLOCK_SIZE - 1); |
| - chacha_crypt(chacha_state, addr, addr, l, 20); |
| + chacha20_crypt(chacha_state, addr, addr, l); |
| addr += l; |
| length -= l; |
| } |
| |
| if (unlikely(length > 0)) { |
| - chacha_crypt(chacha_state, b.chacha_stream, pad0, |
| - CHACHA_BLOCK_SIZE, 20); |
| + chacha20_crypt(chacha_state, b.chacha_stream, pad0, |
| + CHACHA_BLOCK_SIZE); |
| crypto_xor(addr, b.chacha_stream, length); |
| partial = length; |
| } |
| -- |
| 2.18.2 |
| |
| |
| From f2be36196660d017639e2f1ac064665e3aa9cda4 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 25 Nov 2019 11:31:12 +0100 |
| Subject: [PATCH 037/100] crypto: arch - conditionalize crypto api in arch glue |
| for lib code |
| |
| commit 8394bfec51e0e565556101bcc4e2fe7551104cd8 upstream. |
| |
| For glue code that's used by Zinc, the actual Crypto API functions might |
| not necessarily exist, and don't need to exist either. Before this |
| patch, there are valid build configurations that lead to a unbuildable |
| kernel. This fixes it to conditionalize those symbols on the existence |
| of the proper config entry. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Acked-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/chacha-glue.c | 26 ++++++++++++++++---------- |
| arch/arm/crypto/curve25519-glue.c | 5 +++-- |
| arch/arm/crypto/poly1305-glue.c | 9 ++++++--- |
| arch/arm64/crypto/chacha-neon-glue.c | 5 +++-- |
| arch/arm64/crypto/poly1305-glue.c | 5 +++-- |
| arch/mips/crypto/chacha-glue.c | 6 ++++-- |
| arch/mips/crypto/poly1305-glue.c | 6 ++++-- |
| arch/x86/crypto/blake2s-glue.c | 6 ++++-- |
| arch/x86/crypto/chacha_glue.c | 5 +++-- |
| arch/x86/crypto/curve25519-x86_64.c | 7 ++++--- |
| arch/x86/crypto/poly1305_glue.c | 5 +++-- |
| 11 files changed, 53 insertions(+), 32 deletions(-) |
| |
| diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c |
| index 3f0c057aa050..7bdf8823066d 100644 |
| |
| |
| @@ -286,11 +286,13 @@ static struct skcipher_alg neon_algs[] = { |
| |
| static int __init chacha_simd_mod_init(void) |
| { |
| - int err; |
| + int err = 0; |
| |
| - err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| - if (err) |
| - return err; |
| + if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { |
| + err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + if (err) |
| + return err; |
| + } |
| |
| if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { |
| int i; |
| @@ -310,18 +312,22 @@ static int __init chacha_simd_mod_init(void) |
| static_branch_enable(&use_neon); |
| } |
| |
| - err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| - if (err) |
| - crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { |
| + err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| + if (err) |
| + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + } |
| } |
| return err; |
| } |
| |
| static void __exit chacha_simd_mod_fini(void) |
| { |
| - crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) |
| - crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| + if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { |
| + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) |
| + crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| + } |
| } |
| |
| module_init(chacha_simd_mod_init); |
| diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c |
| index 2e9e12d2f642..f3f42cf3b893 100644 |
| |
| |
| @@ -108,14 +108,15 @@ static int __init mod_init(void) |
| { |
| if (elf_hwcap & HWCAP_NEON) { |
| static_branch_enable(&have_neon); |
| - return crypto_register_kpp(&curve25519_alg); |
| + return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? |
| + crypto_register_kpp(&curve25519_alg) : 0; |
| } |
| return 0; |
| } |
| |
| static void __exit mod_exit(void) |
| { |
| - if (elf_hwcap & HWCAP_NEON) |
| + if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) |
| crypto_unregister_kpp(&curve25519_alg); |
| } |
| |
| diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c |
| index 74a725ac89c9..abe3f2d587dc 100644 |
| |
| |
| @@ -249,16 +249,19 @@ static int __init arm_poly1305_mod_init(void) |
| if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && |
| (elf_hwcap & HWCAP_NEON)) |
| static_branch_enable(&have_neon); |
| - else |
| + else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
| /* register only the first entry */ |
| return crypto_register_shash(&arm_poly1305_algs[0]); |
| |
| - return crypto_register_shashes(arm_poly1305_algs, |
| - ARRAY_SIZE(arm_poly1305_algs)); |
| + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
| + crypto_register_shashes(arm_poly1305_algs, |
| + ARRAY_SIZE(arm_poly1305_algs)) : 0; |
| } |
| |
| static void __exit arm_poly1305_mod_exit(void) |
| { |
| + if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
| + return; |
| if (!static_branch_likely(&have_neon)) { |
| crypto_unregister_shash(&arm_poly1305_algs[0]); |
| return; |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index b08029d7bde6..71c11d2e9fcd 100644 |
| |
| |
| @@ -211,12 +211,13 @@ static int __init chacha_simd_mod_init(void) |
| |
| static_branch_enable(&have_neon); |
| |
| - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| + return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ? |
| + crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; |
| } |
| |
| static void __exit chacha_simd_mod_fini(void) |
| { |
| - if (cpu_have_named_feature(ASIMD)) |
| + if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && cpu_have_named_feature(ASIMD)) |
| crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c |
| index dd843d0ee83a..83a2338a8826 100644 |
| |
| |
| @@ -220,12 +220,13 @@ static int __init neon_poly1305_mod_init(void) |
| |
| static_branch_enable(&have_neon); |
| |
| - return crypto_register_shash(&neon_poly1305_alg); |
| + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
| + crypto_register_shash(&neon_poly1305_alg) : 0; |
| } |
| |
| static void __exit neon_poly1305_mod_exit(void) |
| { |
| - if (cpu_have_named_feature(ASIMD)) |
| + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) |
| crypto_unregister_shash(&neon_poly1305_alg); |
| } |
| |
| diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c |
| index 779e399c9bef..90896029d0cd 100644 |
| |
| |
| @@ -128,12 +128,14 @@ static struct skcipher_alg algs[] = { |
| |
| static int __init chacha_simd_mod_init(void) |
| { |
| - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| + return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ? |
| + crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; |
| } |
| |
| static void __exit chacha_simd_mod_fini(void) |
| { |
| - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| + if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) |
| + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| module_init(chacha_simd_mod_init); |
| diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c |
| index b759b6ccc361..b37d29cf5d0a 100644 |
| |
| |
| @@ -187,12 +187,14 @@ static struct shash_alg mips_poly1305_alg = { |
| |
| static int __init mips_poly1305_mod_init(void) |
| { |
| - return crypto_register_shash(&mips_poly1305_alg); |
| + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
| + crypto_register_shash(&mips_poly1305_alg) : 0; |
| } |
| |
| static void __exit mips_poly1305_mod_exit(void) |
| { |
| - crypto_unregister_shash(&mips_poly1305_alg); |
| + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
| + crypto_unregister_shash(&mips_poly1305_alg); |
| } |
| |
| module_init(mips_poly1305_mod_init); |
| diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c |
| index 4a37ba7cdbe5..1d9ff8a45e1f 100644 |
| |
| |
| @@ -210,12 +210,14 @@ static int __init blake2s_mod_init(void) |
| XFEATURE_MASK_AVX512, NULL)) |
| static_branch_enable(&blake2s_use_avx512); |
| |
| - return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); |
| + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
| + crypto_register_shashes(blake2s_algs, |
| + ARRAY_SIZE(blake2s_algs)) : 0; |
| } |
| |
| static void __exit blake2s_mod_exit(void) |
| { |
| - if (boot_cpu_has(X86_FEATURE_SSSE3)) |
| + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) |
| crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); |
| } |
| |
| diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c |
| index a94e30b6f941..1bebe11b9ec9 100644 |
| |
| |
| @@ -299,12 +299,13 @@ static int __init chacha_simd_mod_init(void) |
| boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ |
| static_branch_enable(&chacha_use_avx512vl); |
| } |
| - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| + return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ? |
| + crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; |
| } |
| |
| static void __exit chacha_simd_mod_fini(void) |
| { |
| - if (boot_cpu_has(X86_FEATURE_SSSE3)) |
| + if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3)) |
| crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| } |
| |
| diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c |
| index a52a3fb15727..eec7d2d24239 100644 |
| |
| |
| @@ -2457,13 +2457,14 @@ static int __init curve25519_mod_init(void) |
| static_branch_enable(&curve25519_use_adx); |
| else |
| return 0; |
| - return crypto_register_kpp(&curve25519_alg); |
| + return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? |
| + crypto_register_kpp(&curve25519_alg) : 0; |
| } |
| |
| static void __exit curve25519_mod_exit(void) |
| { |
| - if (boot_cpu_has(X86_FEATURE_BMI2) || |
| - boot_cpu_has(X86_FEATURE_ADX)) |
| + if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && |
| + (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX))) |
| crypto_unregister_kpp(&curve25519_alg); |
| } |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index 370cd88068ec..0cc4537e6617 100644 |
| |
| |
| @@ -224,12 +224,13 @@ static int __init poly1305_simd_mod_init(void) |
| cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) |
| static_branch_enable(&poly1305_use_avx2); |
| |
| - return crypto_register_shash(&alg); |
| + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0; |
| } |
| |
| static void __exit poly1305_simd_mod_exit(void) |
| { |
| - crypto_unregister_shash(&alg); |
| + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
| + crypto_unregister_shash(&alg); |
| } |
| |
| module_init(poly1305_simd_mod_init); |
| -- |
| 2.18.2 |
| |
| |
| From 59949ef5e1e03dbbe4cfc235b2f1ae4f7f84b90a Mon Sep 17 00:00:00 2001 |
| From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= <valdis.kletnieks@vt.edu> |
| Date: Thu, 5 Dec 2019 20:58:36 -0500 |
| Subject: [PATCH 038/100] crypto: chacha - fix warning message in header file |
| |
| commit 579d705cd64e44f3fcda1a6cfd5f37468a5ddf63 upstream. |
| |
| Building with W=1 causes a warning: |
| |
| CC [M] arch/x86/crypto/chacha_glue.o |
| In file included from arch/x86/crypto/chacha_glue.c:10: |
| ./include/crypto/internal/chacha.h:37:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] |
| 37 | static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| | ^~~~~~ |
| |
| Straighten out the order to match the rest of the header file. |
| |
| Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/crypto/internal/chacha.h | 2 +- |
| 1 file changed, 1 insertion(+), 1 deletion(-) |
| |
| diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h |
| index aa5d4a16aac5..b085dc1ac151 100644 |
| |
| |
| @@ -34,7 +34,7 @@ static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| return chacha_setkey(tfm, key, keysize, 20); |
| } |
| |
| -static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| +static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, |
| unsigned int keysize) |
| { |
| return chacha_setkey(tfm, key, keysize, 12); |
| -- |
| 2.18.2 |
| |
| |
| From 543a947c79372b975b492c5e8706503a79a9831c Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 11 Dec 2019 10:26:39 +0100 |
| Subject: [PATCH 039/100] crypto: arm/curve25519 - add arch-specific key |
| generation function |
| |
| commit 84faa307249b341f6ad8de3e1869d77a65e26669 upstream. |
| |
| Somehow this was forgotten when Zinc was being split into oddly shaped |
| pieces, resulting in linker errors. The x86_64 glue has a specific key |
| generation implementation, but the Arm one does not. However, it can |
| still receive the NEON speedups by calling the ordinary DH function |
| using the base point. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Acked-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/curve25519-glue.c | 7 +++++++ |
| 1 file changed, 7 insertions(+) |
| |
| diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c |
| index f3f42cf3b893..776ae07e0469 100644 |
| |
| |
| @@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], |
| } |
| EXPORT_SYMBOL(curve25519_arch); |
| |
| +void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], |
| + const u8 secret[CURVE25519_KEY_SIZE]) |
| +{ |
| + return curve25519_arch(pub, secret, curve25519_base_point); |
| +} |
| +EXPORT_SYMBOL(curve25519_base_arch); |
| + |
| static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, |
| unsigned int len) |
| { |
| -- |
| 2.18.2 |
| |
| |
| From 7b55c4f9819e6791a6b881b2cf8880e947adfe8d Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 16 Dec 2019 19:53:26 +0100 |
| Subject: [PATCH 040/100] crypto: lib/curve25519 - re-add selftests |
| |
| commit aa127963f1cab2b93c74c9b128a84610203fb674 upstream. |
| |
| Somehow these were dropped when Zinc was being integrated, which is |
| problematic, because testing the library interface for Curve25519 is |
| important.. This commit simply adds them back and wires them in in the |
| same way that the blake2s selftests are wired in. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| lib/crypto/Makefile | 1 + |
| lib/crypto/curve25519-selftest.c | 1321 ++++++++++++++++++++++++++++++ |
| lib/crypto/curve25519.c | 17 + |
| 3 files changed, 1339 insertions(+) |
| create mode 100644 lib/crypto/curve25519-selftest.c |
| |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index 34a701ab8b92..f97f9b941110 100644 |
| |
| |
| @@ -36,4 +36,5 @@ libsha256-y := sha256.o |
| ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) |
| libblake2s-y += blake2s-selftest.o |
| libchacha20poly1305-y += chacha20poly1305-selftest.o |
| +libcurve25519-y += curve25519-selftest.o |
| endif |
| diff --git a/lib/crypto/curve25519-selftest.c b/lib/crypto/curve25519-selftest.c |
| new file mode 100644 |
| index 000000000000..c85e85381e78 |
| |
| |
| @@ -0,0 +1,1321 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include <crypto/curve25519.h> |
| + |
| +struct curve25519_test_vector { |
| + u8 private[CURVE25519_KEY_SIZE]; |
| + u8 public[CURVE25519_KEY_SIZE]; |
| + u8 result[CURVE25519_KEY_SIZE]; |
| + bool valid; |
| +}; |
| +static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = { |
| + { |
| + .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, |
| + 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, |
| + 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, |
| + 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, |
| + .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, |
| + 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, |
| + 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, |
| + 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, |
| + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, |
| + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, |
| + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, |
| + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, |
| + .valid = true |
| + }, |
| + { |
| + .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, |
| + 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, |
| + 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, |
| + 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, |
| + .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, |
| + 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, |
| + 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, |
| + 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, |
| + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, |
| + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, |
| + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, |
| + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, |
| + .valid = true |
| + }, |
| + { |
| + .private = { 1 }, |
| + .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, |
| + 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, |
| + 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, |
| + 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, |
| + .valid = true |
| + }, |
| + { |
| + .private = { 1 }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, |
| + 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, |
| + 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, |
| + 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, |
| + .valid = true |
| + }, |
| + { |
| + .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, |
| + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, |
| + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, |
| + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, |
| + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, |
| + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, |
| + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, |
| + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, |
| + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, |
| + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, |
| + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, |
| + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, |
| + .valid = true |
| + }, |
| + { |
| + .private = { 1, 2, 3, 4 }, |
| + .public = { 0 }, |
| + .result = { 0 }, |
| + .valid = false |
| + }, |
| + { |
| + .private = { 2, 4, 6, 8 }, |
| + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, |
| + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, |
| + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, |
| + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, |
| + .result = { 0 }, |
| + .valid = false |
| + }, |
| + { |
| + .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, |
| + .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, |
| + 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, |
| + 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, |
| + 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, |
| + .valid = true |
| + }, |
| + { |
| + .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, |
| + .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, |
| + 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, |
| + 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, |
| + 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, |
| + .valid = true |
| + }, |
| + /* wycheproof - normal case */ |
| + { |
| + .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, |
| + 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, |
| + 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, |
| + 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, |
| + .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, |
| + 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, |
| + 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, |
| + 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, |
| + .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, |
| + 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, |
| + 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, |
| + 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key on twist */ |
| + { |
| + .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, |
| + 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, |
| + 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, |
| + 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, |
| + .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, |
| + 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, |
| + 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, |
| + 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, |
| + .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, |
| + 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, |
| + 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, |
| + 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key on twist */ |
| + { |
| + .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, |
| + 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, |
| + 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, |
| + 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, |
| + .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, |
| + 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, |
| + 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, |
| + 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, |
| + .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, |
| + 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, |
| + 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, |
| + 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key on twist */ |
| + { |
| + .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, |
| + 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, |
| + 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, |
| + 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, |
| + .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, |
| + 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, |
| + 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, |
| + 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, |
| + .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, |
| + 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, |
| + 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, |
| + 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key on twist */ |
| + { |
| + .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, |
| + 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, |
| + 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, |
| + 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, |
| + .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, |
| + 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, |
| + 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, |
| + 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, |
| + .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, |
| + 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, |
| + 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, |
| + 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key on twist */ |
| + { |
| + .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, |
| + 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, |
| + 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, |
| + 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, |
| + .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, |
| + 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, |
| + 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, |
| + 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, |
| + .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, |
| + 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, |
| + 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, |
| + 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key = 0 */ |
| + { |
| + .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, |
| + 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, |
| + 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, |
| + 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, |
| + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key = 1 */ |
| + { |
| + .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, |
| + 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, |
| + 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, |
| + 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, |
| + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - edge case on twist */ |
| + { |
| + .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, |
| + 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, |
| + 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, |
| + 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, |
| + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, |
| + 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, |
| + 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, |
| + 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case on twist */ |
| + { |
| + .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, |
| + 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, |
| + 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, |
| + 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, |
| + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, |
| + 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, |
| + 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, |
| + 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case on twist */ |
| + { |
| + .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, |
| + 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, |
| + 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, |
| + 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, |
| + .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, |
| + 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, |
| + 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, |
| + .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, |
| + 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, |
| + 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, |
| + 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case on twist */ |
| + { |
| + .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, |
| + 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, |
| + 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, |
| + 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, |
| + .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, |
| + 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, |
| + 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, |
| + .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, |
| + 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, |
| + 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, |
| + 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case on twist */ |
| + { |
| + .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, |
| + 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, |
| + 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, |
| + 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, |
| + .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, |
| + 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, |
| + 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, |
| + 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case on twist */ |
| + { |
| + .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, |
| + 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, |
| + 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, |
| + 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, |
| + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, |
| + 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, |
| + 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, |
| + 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, |
| + 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, |
| + 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, |
| + 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, |
| + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, |
| + 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, |
| + 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, |
| + 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, |
| + 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, |
| + 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, |
| + 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, |
| + .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, |
| + 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, |
| + 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, |
| + 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, |
| + 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, |
| + 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, |
| + 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, |
| + .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, |
| + 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, |
| + 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, |
| + 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, |
| + 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, |
| + 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, |
| + 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, |
| + .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, |
| + 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, |
| + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, |
| + 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, |
| + .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, |
| + 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, |
| + 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, |
| + 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, |
| + 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, |
| + 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, |
| + 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, |
| + 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, |
| + 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, |
| + 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, |
| + 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, |
| + 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, |
| + 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, |
| + .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, |
| + 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, |
| + 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, |
| + 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for public key */ |
| + { |
| + .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, |
| + 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, |
| + 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, |
| + 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, |
| + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, |
| + 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, |
| + 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, |
| + 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, |
| + 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, |
| + 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, |
| + 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, |
| + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, |
| + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, |
| + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, |
| + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, |
| + 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, |
| + 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, |
| + 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, |
| + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, |
| + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, |
| + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, |
| + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, |
| + 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, |
| + 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, |
| + 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, |
| + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, |
| + 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, |
| + 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, |
| + 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, |
| + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, |
| + 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, |
| + 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, |
| + 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, |
| + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, |
| + 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, |
| + 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, |
| + 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, |
| + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, |
| + 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, |
| + 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, |
| + 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, |
| + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, |
| + 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, |
| + 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, |
| + 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, |
| + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, |
| + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, |
| + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, |
| + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, |
| + 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, |
| + 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, |
| + 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, |
| + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, |
| + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, |
| + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, |
| + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, |
| + 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, |
| + 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, |
| + 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, |
| + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, |
| + 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, |
| + 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, |
| + 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, |
| + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key with low order */ |
| + { |
| + .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, |
| + 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, |
| + 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, |
| + 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, |
| + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = false |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, |
| + 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, |
| + 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, |
| + 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, |
| + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, |
| + 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, |
| + 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, |
| + 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, |
| + 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, |
| + 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, |
| + 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, |
| + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, |
| + 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, |
| + 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, |
| + 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, |
| + 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, |
| + 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, |
| + 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, |
| + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, |
| + 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, |
| + 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, |
| + 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, |
| + 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, |
| + 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, |
| + 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, |
| + 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, |
| + 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, |
| + 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, |
| + 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, |
| + 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, |
| + 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, |
| + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, |
| + 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, |
| + 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, |
| + 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, |
| + 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, |
| + 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, |
| + 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, |
| + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, |
| + 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, |
| + 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, |
| + 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, |
| + 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, |
| + 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, |
| + 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, |
| + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, |
| + .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, |
| + 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, |
| + 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, |
| + 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, |
| + 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, |
| + 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, |
| + 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, |
| + .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, |
| + 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, |
| + 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, |
| + 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, |
| + 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, |
| + 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, |
| + 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, |
| + .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, |
| + 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, |
| + 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, |
| + 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, |
| + 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, |
| + 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, |
| + 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, |
| + .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, |
| + 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, |
| + 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, |
| + 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, |
| + 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, |
| + 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, |
| + 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, |
| + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, |
| + 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, |
| + 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, |
| + 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, |
| + 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, |
| + 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, |
| + 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, |
| + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, |
| + 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, |
| + 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, |
| + 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, |
| + 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, |
| + 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, |
| + 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, |
| + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, |
| + 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, |
| + 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, |
| + 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, |
| + 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, |
| + 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, |
| + 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, |
| + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, |
| + 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, |
| + 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, |
| + 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, |
| + 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, |
| + 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, |
| + 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, |
| + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, |
| + 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, |
| + 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, |
| + 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - public key >= p */ |
| + { |
| + .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, |
| + 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, |
| + 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, |
| + 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, |
| + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, |
| + .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, |
| + 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, |
| + 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, |
| + 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - RFC 7748 */ |
| + { |
| + .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, |
| + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, |
| + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, |
| + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, |
| + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, |
| + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, |
| + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, |
| + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, |
| + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, |
| + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, |
| + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, |
| + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - RFC 7748 */ |
| + { |
| + .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, |
| + 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, |
| + 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, |
| + 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, |
| + .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, |
| + 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, |
| + 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, |
| + 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, |
| + .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, |
| + 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, |
| + 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, |
| + 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, |
| + 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, |
| + 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, |
| + 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, |
| + .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, |
| + 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, |
| + 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, |
| + 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, |
| + .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, |
| + 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, |
| + 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, |
| + 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, |
| + .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, |
| + 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, |
| + 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, |
| + 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, |
| + .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, |
| + 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, |
| + 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, |
| + 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, |
| + .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, |
| + 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, |
| + 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, |
| + 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, |
| + .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, |
| + 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, |
| + 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, |
| + 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, |
| + .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, |
| + 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, |
| + 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, |
| + 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, |
| + .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, |
| + 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, |
| + 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, |
| + 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, |
| + .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, |
| + 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, |
| + 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, |
| + 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, |
| + .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, |
| + 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, |
| + 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, |
| + 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, |
| + .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, |
| + 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, |
| + 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, |
| + 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, |
| + .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, |
| + 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, |
| + 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, |
| + 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - edge case for shared secret */ |
| + { |
| + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, |
| + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, |
| + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, |
| + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, |
| + .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, |
| + 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, |
| + 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, |
| + 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, |
| + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - checking for overflow */ |
| + { |
| + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, |
| + 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, |
| + 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, |
| + 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, |
| + .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, |
| + 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, |
| + 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, |
| + 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - checking for overflow */ |
| + { |
| + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, |
| + 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, |
| + 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, |
| + 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, |
| + .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, |
| + 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, |
| + 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, |
| + 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - checking for overflow */ |
| + { |
| + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, |
| + 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, |
| + 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, |
| + 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, |
| + .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, |
| + 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, |
| + 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, |
| + 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - checking for overflow */ |
| + { |
| + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, |
| + 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, |
| + 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, |
| + 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, |
| + .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, |
| + 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, |
| + 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, |
| + 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, |
| + .valid = true |
| + }, |
| + /* wycheproof - checking for overflow */ |
| + { |
| + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, |
| + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, |
| + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, |
| + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, |
| + .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, |
| + 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, |
| + 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, |
| + 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, |
| + .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, |
| + 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, |
| + 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, |
| + 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - private key == -1 (mod order) */ |
| + { |
| + .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, |
| + 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, |
| + .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, |
| + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, |
| + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, |
| + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, |
| + .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, |
| + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, |
| + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, |
| + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, |
| + .valid = true |
| + }, |
| + /* wycheproof - private key == 1 (mod order) on twist */ |
| + { |
| + .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, |
| + 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, |
| + .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, |
| + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, |
| + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, |
| + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, |
| + .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, |
| + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, |
| + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, |
| + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, |
| + .valid = true |
| + } |
| +}; |
| + |
| +bool __init curve25519_selftest(void) |
| +{ |
| + bool success = true, ret, ret2; |
| + size_t i = 0, j; |
| + u8 in[CURVE25519_KEY_SIZE]; |
| + u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE], |
| + out3[CURVE25519_KEY_SIZE]; |
| + |
| + for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { |
| + memset(out, 0, CURVE25519_KEY_SIZE); |
| + ret = curve25519(out, curve25519_test_vectors[i].private, |
| + curve25519_test_vectors[i].public); |
| + if (ret != curve25519_test_vectors[i].valid || |
| + memcmp(out, curve25519_test_vectors[i].result, |
| + CURVE25519_KEY_SIZE)) { |
| + pr_err("curve25519 self-test %zu: FAIL\n", i + 1); |
| + success = false; |
| + } |
| + } |
| + |
| + for (i = 0; i < 5; ++i) { |
| + get_random_bytes(in, sizeof(in)); |
| + ret = curve25519_generate_public(out, in); |
| + ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); |
| + curve25519_generic(out3, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); |
| + if (ret != ret2 || |
| + memcmp(out, out2, CURVE25519_KEY_SIZE) || |
| + memcmp(out, out3, CURVE25519_KEY_SIZE)) { |
| + pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x", |
| + i + 1); |
| + for (j = CURVE25519_KEY_SIZE; j-- > 0;) |
| + printk(KERN_CONT "%02x", in[j]); |
| + printk(KERN_CONT "\n"); |
| + success = false; |
| + } |
| + } |
| + |
| + return success; |
| +} |
| diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c |
| index 0106bebe6900..c03ccdb99434 100644 |
| |
| |
| @@ -13,6 +13,8 @@ |
| #include <linux/module.h> |
| #include <linux/init.h> |
| |
| +bool curve25519_selftest(void); |
| + |
| const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; |
| const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; |
| |
| @@ -20,6 +22,21 @@ EXPORT_SYMBOL(curve25519_null_point); |
| EXPORT_SYMBOL(curve25519_base_point); |
| EXPORT_SYMBOL(curve25519_generic); |
| |
| +static int __init mod_init(void) |
| +{ |
| + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && |
| + WARN_ON(!curve25519_selftest())) |
| + return -ENODEV; |
| + return 0; |
| +} |
| + |
| +static void __exit mod_exit(void) |
| +{ |
| +} |
| + |
| +module_init(mod_init); |
| +module_exit(mod_exit); |
| + |
| MODULE_LICENSE("GPL v2"); |
| MODULE_DESCRIPTION("Curve25519 scalar multiplication"); |
| MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| -- |
| 2.18.2 |
| |
| |
| From 773a55c545ad6cb457b4eac8d71d68e695ec0ac4 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 5 Jan 2020 22:40:46 -0500 |
| Subject: [PATCH 041/100] crypto: poly1305 - add new 32 and 64-bit generic |
| versions |
| |
| commit 1c08a104360f3e18f4ee6346c21cc3923efb952e upstream. |
| |
| These two C implementations from Zinc -- a 32x32 one and a 64x64 one, |
| depending on the platform -- come from Andrew Moon's public domain |
| poly1305-donna portable code, modified for usage in the kernel. The |
| precomputation in the 32-bit version and the use of 64x64 multiplies in |
| the 64-bit version make these perform better than the code it replaces. |
| Moon's code is also very widespread and has received many eyeballs of |
| scrutiny. |
| |
| There's a bit of interference between the x86 implementation, which |
| relies on internal details of the old scalar implementation. In the next |
| commit, the x86 implementation will be replaced with a faster one that |
| doesn't rely on this, so none of this matters much. But for now, to keep |
| this passing the tests, we inline the bits of the old implementation |
| that the x86 implementation relied on. Also, since we now support a |
| slightly larger key space, via the union, some offsets had to be fixed |
| up. |
| |
| Nonce calculation was folded in with the emit function, to take |
| advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no |
| nonce handling in emit, so this path was conditionalized. We also |
| introduced a new struct, poly1305_core_key, to represent the precise |
| amount of space that particular implementation uses. |
| |
| Testing with kbench9000, depending on the CPU, the update function for |
| the 32x32 version has been improved by 4%-7%, and for the 64x64 by |
| 19%-30%. The 32x32 gains are small, but I think there's great value in |
| having a parallel implementation to the 64x64 one so that the two can be |
| compared side-by-side as nice stand-alone units. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305-avx2-x86_64.S | 20 +-- |
| arch/x86/crypto/poly1305_glue.c | 215 +++++++++++++++++++++++-- |
| crypto/adiantum.c | 4 +- |
| crypto/nhpoly1305.c | 2 +- |
| crypto/poly1305_generic.c | 25 ++- |
| include/crypto/internal/poly1305.h | 45 ++---- |
| include/crypto/nhpoly1305.h | 4 +- |
| include/crypto/poly1305.h | 26 ++- |
| lib/crypto/Makefile | 4 +- |
| lib/crypto/poly1305-donna32.c | 204 +++++++++++++++++++++++ |
| lib/crypto/poly1305-donna64.c | 185 +++++++++++++++++++++ |
| lib/crypto/poly1305.c | 169 +------------------ |
| 12 files changed, 675 insertions(+), 228 deletions(-) |
| create mode 100644 lib/crypto/poly1305-donna32.c |
| create mode 100644 lib/crypto/poly1305-donna64.c |
| |
| diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S |
| index 8b341bc29d41..1688fb551070 100644 |
| |
| |
| @@ -34,16 +34,16 @@ ORMASK: .octa 0x00000000010000000000000001000000 |
| #define u2 0x08(%r8) |
| #define u3 0x0c(%r8) |
| #define u4 0x10(%r8) |
| -#define w0 0x14(%r8) |
| -#define w1 0x18(%r8) |
| -#define w2 0x1c(%r8) |
| -#define w3 0x20(%r8) |
| -#define w4 0x24(%r8) |
| -#define y0 0x28(%r8) |
| -#define y1 0x2c(%r8) |
| -#define y2 0x30(%r8) |
| -#define y3 0x34(%r8) |
| -#define y4 0x38(%r8) |
| +#define w0 0x18(%r8) |
| +#define w1 0x1c(%r8) |
| +#define w2 0x20(%r8) |
| +#define w3 0x24(%r8) |
| +#define w4 0x28(%r8) |
| +#define y0 0x30(%r8) |
| +#define y1 0x34(%r8) |
| +#define y2 0x38(%r8) |
| +#define y3 0x3c(%r8) |
| +#define y4 0x40(%r8) |
| #define m %rsi |
| #define hc0 %ymm0 |
| #define hc1 %ymm1 |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index 0cc4537e6617..edb7113e36f3 100644 |
| |
| |
| @@ -25,6 +25,21 @@ asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, |
| static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); |
| static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); |
| |
| +static inline u64 mlt(u64 a, u64 b) |
| +{ |
| + return a * b; |
| +} |
| + |
| +static inline u32 sr(u64 v, u_char n) |
| +{ |
| + return v >> n; |
| +} |
| + |
| +static inline u32 and(u32 v, u32 mask) |
| +{ |
| + return v & mask; |
| +} |
| + |
| static void poly1305_simd_mult(u32 *a, const u32 *b) |
| { |
| u8 m[POLY1305_BLOCK_SIZE]; |
| @@ -36,6 +51,168 @@ static void poly1305_simd_mult(u32 *a, const u32 *b) |
| poly1305_block_sse2(a, m, b, 1); |
| } |
| |
| +static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key) |
| +{ |
| + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; |
| + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; |
| + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; |
| + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; |
| + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; |
| +} |
| + |
| +static void poly1305_integer_blocks(struct poly1305_state *state, |
| + const struct poly1305_key *key, |
| + const void *src, |
| + unsigned int nblocks, u32 hibit) |
| +{ |
| + u32 r0, r1, r2, r3, r4; |
| + u32 s1, s2, s3, s4; |
| + u32 h0, h1, h2, h3, h4; |
| + u64 d0, d1, d2, d3, d4; |
| + |
| + if (!nblocks) |
| + return; |
| + |
| + r0 = key->r[0]; |
| + r1 = key->r[1]; |
| + r2 = key->r[2]; |
| + r3 = key->r[3]; |
| + r4 = key->r[4]; |
| + |
| + s1 = r1 * 5; |
| + s2 = r2 * 5; |
| + s3 = r3 * 5; |
| + s4 = r4 * 5; |
| + |
| + h0 = state->h[0]; |
| + h1 = state->h[1]; |
| + h2 = state->h[2]; |
| + h3 = state->h[3]; |
| + h4 = state->h[4]; |
| + |
| + do { |
| + /* h += m[i] */ |
| + h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; |
| + h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; |
| + h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; |
| + h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; |
| + h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); |
| + |
| + /* h *= r */ |
| + d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + |
| + mlt(h3, s2) + mlt(h4, s1); |
| + d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + |
| + mlt(h3, s3) + mlt(h4, s2); |
| + d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + |
| + mlt(h3, s4) + mlt(h4, s3); |
| + d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + |
| + mlt(h3, r0) + mlt(h4, s4); |
| + d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + |
| + mlt(h3, r1) + mlt(h4, r0); |
| + |
| + /* (partial) h %= p */ |
| + d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); |
| + d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); |
| + d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); |
| + d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); |
| + h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); |
| + h1 += h0 >> 26; h0 = h0 & 0x3ffffff; |
| + |
| + src += POLY1305_BLOCK_SIZE; |
| + } while (--nblocks); |
| + |
| + state->h[0] = h0; |
| + state->h[1] = h1; |
| + state->h[2] = h2; |
| + state->h[3] = h3; |
| + state->h[4] = h4; |
| +} |
| + |
| +static void poly1305_integer_emit(const struct poly1305_state *state, void *dst) |
| +{ |
| + u32 h0, h1, h2, h3, h4; |
| + u32 g0, g1, g2, g3, g4; |
| + u32 mask; |
| + |
| + /* fully carry h */ |
| + h0 = state->h[0]; |
| + h1 = state->h[1]; |
| + h2 = state->h[2]; |
| + h3 = state->h[3]; |
| + h4 = state->h[4]; |
| + |
| + h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; |
| + h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; |
| + h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; |
| + h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; |
| + h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; |
| + |
| + /* compute h + -p */ |
| + g0 = h0 + 5; |
| + g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; |
| + g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; |
| + g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; |
| + g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; |
| + |
| + /* select h if h < p, or h + -p if h >= p */ |
| + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; |
| + g0 &= mask; |
| + g1 &= mask; |
| + g2 &= mask; |
| + g3 &= mask; |
| + g4 &= mask; |
| + mask = ~mask; |
| + h0 = (h0 & mask) | g0; |
| + h1 = (h1 & mask) | g1; |
| + h2 = (h2 & mask) | g2; |
| + h3 = (h3 & mask) | g3; |
| + h4 = (h4 & mask) | g4; |
| + |
| + /* h = h % (2^128) */ |
| + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); |
| + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); |
| + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); |
| + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); |
| +} |
| + |
| +void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) |
| +{ |
| + poly1305_integer_setkey(desc->opaque_r, key); |
| + desc->s[0] = get_unaligned_le32(key + 16); |
| + desc->s[1] = get_unaligned_le32(key + 20); |
| + desc->s[2] = get_unaligned_le32(key + 24); |
| + desc->s[3] = get_unaligned_le32(key + 28); |
| + poly1305_core_init(&desc->h); |
| + desc->buflen = 0; |
| + desc->sset = true; |
| + desc->rset = 1; |
| +} |
| +EXPORT_SYMBOL_GPL(poly1305_init_arch); |
| + |
| +static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + if (!dctx->sset) { |
| + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| + poly1305_integer_setkey(dctx->r, src); |
| + src += POLY1305_BLOCK_SIZE; |
| + srclen -= POLY1305_BLOCK_SIZE; |
| + dctx->rset = 1; |
| + } |
| + if (srclen >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(src + 0); |
| + dctx->s[1] = get_unaligned_le32(src + 4); |
| + dctx->s[2] = get_unaligned_le32(src + 8); |
| + dctx->s[3] = get_unaligned_le32(src + 12); |
| + src += POLY1305_BLOCK_SIZE; |
| + srclen -= POLY1305_BLOCK_SIZE; |
| + dctx->sset = true; |
| + } |
| + } |
| + return srclen; |
| +} |
| + |
| static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, |
| const u8 *src, unsigned int srclen) |
| { |
| @@ -47,8 +224,8 @@ static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, |
| srclen = datalen; |
| } |
| if (srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_core_blocks(&dctx->h, dctx->r, src, |
| - srclen / POLY1305_BLOCK_SIZE, 1); |
| + poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src, |
| + srclen / POLY1305_BLOCK_SIZE, 1); |
| srclen %= POLY1305_BLOCK_SIZE; |
| } |
| return srclen; |
| @@ -105,12 +282,6 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| return srclen; |
| } |
| |
| -void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) |
| -{ |
| - poly1305_init_generic(desc, key); |
| -} |
| -EXPORT_SYMBOL(poly1305_init_arch); |
| - |
| void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| unsigned int srclen) |
| { |
| @@ -158,9 +329,31 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| } |
| EXPORT_SYMBOL(poly1305_update_arch); |
| |
| -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) |
| +void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst) |
| { |
| - poly1305_final_generic(desc, digest); |
| + __le32 digest[4]; |
| + u64 f = 0; |
| + |
| + if (unlikely(desc->buflen)) { |
| + desc->buf[desc->buflen++] = 1; |
| + memset(desc->buf + desc->buflen, 0, |
| + POLY1305_BLOCK_SIZE - desc->buflen); |
| + poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0); |
| + } |
| + |
| + poly1305_integer_emit(&desc->h, digest); |
| + |
| + /* mac = (h + s) % (2^128) */ |
| + f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; |
| + put_unaligned_le32(f, dst + 0); |
| + f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; |
| + put_unaligned_le32(f, dst + 4); |
| + f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; |
| + put_unaligned_le32(f, dst + 8); |
| + f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; |
| + put_unaligned_le32(f, dst + 12); |
| + |
| + *desc = (struct poly1305_desc_ctx){}; |
| } |
| EXPORT_SYMBOL(poly1305_final_arch); |
| |
| @@ -183,7 +376,7 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| if (unlikely(!dctx->sset)) |
| return -ENOKEY; |
| |
| - poly1305_final_generic(dctx, dst); |
| + poly1305_final_arch(dctx, dst); |
| return 0; |
| } |
| |
| diff --git a/crypto/adiantum.c b/crypto/adiantum.c |
| index aded26092268..c846a887abe1 100644 |
| |
| |
| @@ -72,7 +72,7 @@ struct adiantum_tfm_ctx { |
| struct crypto_skcipher *streamcipher; |
| struct crypto_cipher *blockcipher; |
| struct crypto_shash *hash; |
| - struct poly1305_key header_hash_key; |
| + struct poly1305_core_key header_hash_key; |
| }; |
| |
| struct adiantum_request_ctx { |
| @@ -249,7 +249,7 @@ static void adiantum_hash_header(struct skcipher_request *req) |
| poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, |
| TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1); |
| |
| - poly1305_core_emit(&state, &rctx->header_hash); |
| + poly1305_core_emit(&state, NULL, &rctx->header_hash); |
| } |
| |
| /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */ |
| diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c |
| index f6b6a52092b4..8a3006c3b51b 100644 |
| |
| |
| @@ -210,7 +210,7 @@ int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn) |
| if (state->nh_remaining) |
| process_nh_hash_value(state, key); |
| |
| - poly1305_core_emit(&state->poly_state, dst); |
| + poly1305_core_emit(&state->poly_state, NULL, dst); |
| return 0; |
| } |
| EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); |
| diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c |
| index 21edbd8c99fb..94af47eb6fa6 100644 |
| |
| |
| @@ -31,6 +31,29 @@ static int crypto_poly1305_init(struct shash_desc *desc) |
| return 0; |
| } |
| |
| +static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| + const u8 *src, unsigned int srclen) |
| +{ |
| + if (!dctx->sset) { |
| + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| + poly1305_core_setkey(&dctx->core_r, src); |
| + src += POLY1305_BLOCK_SIZE; |
| + srclen -= POLY1305_BLOCK_SIZE; |
| + dctx->rset = 2; |
| + } |
| + if (srclen >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(src + 0); |
| + dctx->s[1] = get_unaligned_le32(src + 4); |
| + dctx->s[2] = get_unaligned_le32(src + 8); |
| + dctx->s[3] = get_unaligned_le32(src + 12); |
| + src += POLY1305_BLOCK_SIZE; |
| + srclen -= POLY1305_BLOCK_SIZE; |
| + dctx->sset = true; |
| + } |
| + } |
| + return srclen; |
| +} |
| + |
| static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| unsigned int srclen) |
| { |
| @@ -42,7 +65,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
| srclen = datalen; |
| } |
| |
| - poly1305_core_blocks(&dctx->h, dctx->r, src, |
| + poly1305_core_blocks(&dctx->h, &dctx->core_r, src, |
| srclen / POLY1305_BLOCK_SIZE, 1); |
| } |
| |
| diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h |
| index 479b0cab2a1a..064e52ca5248 100644 |
| |
| |
| @@ -11,48 +11,23 @@ |
| #include <crypto/poly1305.h> |
| |
| /* |
| - * Poly1305 core functions. These implement the ε-almost-∆-universal hash |
| - * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce |
| - * ("s key") at the end. They also only support block-aligned inputs. |
| + * Poly1305 core functions. These only accept whole blocks; the caller must |
| + * handle any needed block buffering and padding. 'hibit' must be 1 for any |
| + * full blocks, or 0 for the final block if it had to be padded. If 'nonce' is |
| + * non-NULL, then it's added at the end to compute the Poly1305 MAC. Otherwise, |
| + * only the ε-almost-∆-universal hash function (not the full MAC) is computed. |
| */ |
| -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); |
| + |
| +void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); |
| static inline void poly1305_core_init(struct poly1305_state *state) |
| { |
| *state = (struct poly1305_state){}; |
| } |
| |
| void poly1305_core_blocks(struct poly1305_state *state, |
| - const struct poly1305_key *key, const void *src, |
| + const struct poly1305_core_key *key, const void *src, |
| unsigned int nblocks, u32 hibit); |
| -void poly1305_core_emit(const struct poly1305_state *state, void *dst); |
| - |
| -/* |
| - * Poly1305 requires a unique key for each tag, which implies that we can't set |
| - * it on the tfm that gets accessed by multiple users simultaneously. Instead we |
| - * expect the key as the first 32 bytes in the update() call. |
| - */ |
| -static inline |
| -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen) |
| -{ |
| - if (!dctx->sset) { |
| - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_core_setkey(dctx->r, src); |
| - src += POLY1305_BLOCK_SIZE; |
| - srclen -= POLY1305_BLOCK_SIZE; |
| - dctx->rset = 1; |
| - } |
| - if (srclen >= POLY1305_BLOCK_SIZE) { |
| - dctx->s[0] = get_unaligned_le32(src + 0); |
| - dctx->s[1] = get_unaligned_le32(src + 4); |
| - dctx->s[2] = get_unaligned_le32(src + 8); |
| - dctx->s[3] = get_unaligned_le32(src + 12); |
| - src += POLY1305_BLOCK_SIZE; |
| - srclen -= POLY1305_BLOCK_SIZE; |
| - dctx->sset = true; |
| - } |
| - } |
| - return srclen; |
| -} |
| +void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], |
| + void *dst); |
| |
| #endif |
| diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h |
| index 53c04423c582..306925fea190 100644 |
| |
| |
| @@ -7,7 +7,7 @@ |
| #define _NHPOLY1305_H |
| |
| #include <crypto/hash.h> |
| -#include <crypto/poly1305.h> |
| +#include <crypto/internal/poly1305.h> |
| |
| /* NH parameterization: */ |
| |
| @@ -33,7 +33,7 @@ |
| #define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES) |
| |
| struct nhpoly1305_key { |
| - struct poly1305_key poly_key; |
| + struct poly1305_core_key poly_key; |
| u32 nh_key[NH_KEY_WORDS]; |
| }; |
| |
| diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h |
| index 74c6e1cd73ee..f1f67fc749cf 100644 |
| |
| |
| @@ -13,12 +13,29 @@ |
| #define POLY1305_KEY_SIZE 32 |
| #define POLY1305_DIGEST_SIZE 16 |
| |
| +/* The poly1305_key and poly1305_state types are mostly opaque and |
| + * implementation-defined. Limbs might be in base 2^64 or base 2^26, or |
| + * different yet. The union type provided keeps these 64-bit aligned for the |
| + * case in which this is implemented using 64x64 multiplies. |
| + */ |
| + |
| struct poly1305_key { |
| - u32 r[5]; /* key, base 2^26 */ |
| + union { |
| + u32 r[5]; |
| + u64 r64[3]; |
| + }; |
| +}; |
| + |
| +struct poly1305_core_key { |
| + struct poly1305_key key; |
| + struct poly1305_key precomputed_s; |
| }; |
| |
| struct poly1305_state { |
| - u32 h[5]; /* accumulator, base 2^26 */ |
| + union { |
| + u32 h[5]; |
| + u64 h64[3]; |
| + }; |
| }; |
| |
| struct poly1305_desc_ctx { |
| @@ -35,7 +52,10 @@ struct poly1305_desc_ctx { |
| /* accumulator */ |
| struct poly1305_state h; |
| /* key */ |
| - struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; |
| + union { |
| + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; |
| + struct poly1305_core_key core_r; |
| + }; |
| }; |
| |
| void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index f97f9b941110..6ecaf83a5a9a 100644 |
| |
| |
| @@ -28,7 +28,9 @@ obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| libdes-y := des.o |
| |
| obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o |
| -libpoly1305-y := poly1305.o |
| +libpoly1305-y := poly1305-donna32.o |
| +libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o |
| +libpoly1305-y += poly1305.o |
| |
| obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o |
| libsha256-y := sha256.o |
| diff --git a/lib/crypto/poly1305-donna32.c b/lib/crypto/poly1305-donna32.c |
| new file mode 100644 |
| index 000000000000..3cc77d94390b |
| |
| |
| @@ -0,0 +1,204 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is based in part on Andrew Moon's poly1305-donna, which is in the |
| + * public domain. |
| + */ |
| + |
| +#include <linux/kernel.h> |
| +#include <asm/unaligned.h> |
| +#include <crypto/internal/poly1305.h> |
| + |
| +void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) |
| +{ |
| + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| + key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff; |
| + key->key.r[1] = (get_unaligned_le32(&raw_key[3]) >> 2) & 0x3ffff03; |
| + key->key.r[2] = (get_unaligned_le32(&raw_key[6]) >> 4) & 0x3ffc0ff; |
| + key->key.r[3] = (get_unaligned_le32(&raw_key[9]) >> 6) & 0x3f03fff; |
| + key->key.r[4] = (get_unaligned_le32(&raw_key[12]) >> 8) & 0x00fffff; |
| + |
| + /* s = 5*r */ |
| + key->precomputed_s.r[0] = key->key.r[1] * 5; |
| + key->precomputed_s.r[1] = key->key.r[2] * 5; |
| + key->precomputed_s.r[2] = key->key.r[3] * 5; |
| + key->precomputed_s.r[3] = key->key.r[4] * 5; |
| +} |
| +EXPORT_SYMBOL(poly1305_core_setkey); |
| + |
| +void poly1305_core_blocks(struct poly1305_state *state, |
| + const struct poly1305_core_key *key, const void *src, |
| + unsigned int nblocks, u32 hibit) |
| +{ |
| + const u8 *input = src; |
| + u32 r0, r1, r2, r3, r4; |
| + u32 s1, s2, s3, s4; |
| + u32 h0, h1, h2, h3, h4; |
| + u64 d0, d1, d2, d3, d4; |
| + u32 c; |
| + |
| + if (!nblocks) |
| + return; |
| + |
| + hibit <<= 24; |
| + |
| + r0 = key->key.r[0]; |
| + r1 = key->key.r[1]; |
| + r2 = key->key.r[2]; |
| + r3 = key->key.r[3]; |
| + r4 = key->key.r[4]; |
| + |
| + s1 = key->precomputed_s.r[0]; |
| + s2 = key->precomputed_s.r[1]; |
| + s3 = key->precomputed_s.r[2]; |
| + s4 = key->precomputed_s.r[3]; |
| + |
| + h0 = state->h[0]; |
| + h1 = state->h[1]; |
| + h2 = state->h[2]; |
| + h3 = state->h[3]; |
| + h4 = state->h[4]; |
| + |
| + do { |
| + /* h += m[i] */ |
| + h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff; |
| + h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff; |
| + h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff; |
| + h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff; |
| + h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit; |
| + |
| + /* h *= r */ |
| + d0 = ((u64)h0 * r0) + ((u64)h1 * s4) + |
| + ((u64)h2 * s3) + ((u64)h3 * s2) + |
| + ((u64)h4 * s1); |
| + d1 = ((u64)h0 * r1) + ((u64)h1 * r0) + |
| + ((u64)h2 * s4) + ((u64)h3 * s3) + |
| + ((u64)h4 * s2); |
| + d2 = ((u64)h0 * r2) + ((u64)h1 * r1) + |
| + ((u64)h2 * r0) + ((u64)h3 * s4) + |
| + ((u64)h4 * s3); |
| + d3 = ((u64)h0 * r3) + ((u64)h1 * r2) + |
| + ((u64)h2 * r1) + ((u64)h3 * r0) + |
| + ((u64)h4 * s4); |
| + d4 = ((u64)h0 * r4) + ((u64)h1 * r3) + |
| + ((u64)h2 * r2) + ((u64)h3 * r1) + |
| + ((u64)h4 * r0); |
| + |
| + /* (partial) h %= p */ |
| + c = (u32)(d0 >> 26); |
| + h0 = (u32)d0 & 0x3ffffff; |
| + d1 += c; |
| + c = (u32)(d1 >> 26); |
| + h1 = (u32)d1 & 0x3ffffff; |
| + d2 += c; |
| + c = (u32)(d2 >> 26); |
| + h2 = (u32)d2 & 0x3ffffff; |
| + d3 += c; |
| + c = (u32)(d3 >> 26); |
| + h3 = (u32)d3 & 0x3ffffff; |
| + d4 += c; |
| + c = (u32)(d4 >> 26); |
| + h4 = (u32)d4 & 0x3ffffff; |
| + h0 += c * 5; |
| + c = (h0 >> 26); |
| + h0 = h0 & 0x3ffffff; |
| + h1 += c; |
| + |
| + input += POLY1305_BLOCK_SIZE; |
| + } while (--nblocks); |
| + |
| + state->h[0] = h0; |
| + state->h[1] = h1; |
| + state->h[2] = h2; |
| + state->h[3] = h3; |
| + state->h[4] = h4; |
| +} |
| +EXPORT_SYMBOL(poly1305_core_blocks); |
| + |
| +void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], |
| + void *dst) |
| +{ |
| + u8 *mac = dst; |
| + u32 h0, h1, h2, h3, h4, c; |
| + u32 g0, g1, g2, g3, g4; |
| + u64 f; |
| + u32 mask; |
| + |
| + /* fully carry h */ |
| + h0 = state->h[0]; |
| + h1 = state->h[1]; |
| + h2 = state->h[2]; |
| + h3 = state->h[3]; |
| + h4 = state->h[4]; |
| + |
| + c = h1 >> 26; |
| + h1 = h1 & 0x3ffffff; |
| + h2 += c; |
| + c = h2 >> 26; |
| + h2 = h2 & 0x3ffffff; |
| + h3 += c; |
| + c = h3 >> 26; |
| + h3 = h3 & 0x3ffffff; |
| + h4 += c; |
| + c = h4 >> 26; |
| + h4 = h4 & 0x3ffffff; |
| + h0 += c * 5; |
| + c = h0 >> 26; |
| + h0 = h0 & 0x3ffffff; |
| + h1 += c; |
| + |
| + /* compute h + -p */ |
| + g0 = h0 + 5; |
| + c = g0 >> 26; |
| + g0 &= 0x3ffffff; |
| + g1 = h1 + c; |
| + c = g1 >> 26; |
| + g1 &= 0x3ffffff; |
| + g2 = h2 + c; |
| + c = g2 >> 26; |
| + g2 &= 0x3ffffff; |
| + g3 = h3 + c; |
| + c = g3 >> 26; |
| + g3 &= 0x3ffffff; |
| + g4 = h4 + c - (1UL << 26); |
| + |
| + /* select h if h < p, or h + -p if h >= p */ |
| + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; |
| + g0 &= mask; |
| + g1 &= mask; |
| + g2 &= mask; |
| + g3 &= mask; |
| + g4 &= mask; |
| + mask = ~mask; |
| + |
| + h0 = (h0 & mask) | g0; |
| + h1 = (h1 & mask) | g1; |
| + h2 = (h2 & mask) | g2; |
| + h3 = (h3 & mask) | g3; |
| + h4 = (h4 & mask) | g4; |
| + |
| + /* h = h % (2^128) */ |
| + h0 = ((h0) | (h1 << 26)) & 0xffffffff; |
| + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; |
| + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; |
| + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; |
| + |
| + if (likely(nonce)) { |
| + /* mac = (h + nonce) % (2^128) */ |
| + f = (u64)h0 + nonce[0]; |
| + h0 = (u32)f; |
| + f = (u64)h1 + nonce[1] + (f >> 32); |
| + h1 = (u32)f; |
| + f = (u64)h2 + nonce[2] + (f >> 32); |
| + h2 = (u32)f; |
| + f = (u64)h3 + nonce[3] + (f >> 32); |
| + h3 = (u32)f; |
| + } |
| + |
| + put_unaligned_le32(h0, &mac[0]); |
| + put_unaligned_le32(h1, &mac[4]); |
| + put_unaligned_le32(h2, &mac[8]); |
| + put_unaligned_le32(h3, &mac[12]); |
| +} |
| +EXPORT_SYMBOL(poly1305_core_emit); |
| diff --git a/lib/crypto/poly1305-donna64.c b/lib/crypto/poly1305-donna64.c |
| new file mode 100644 |
| index 000000000000..6ae181bb4345 |
| |
| |
| @@ -0,0 +1,185 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is based in part on Andrew Moon's poly1305-donna, which is in the |
| + * public domain. |
| + */ |
| + |
| +#include <linux/kernel.h> |
| +#include <asm/unaligned.h> |
| +#include <crypto/internal/poly1305.h> |
| + |
| +typedef __uint128_t u128; |
| + |
| +void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) |
| +{ |
| + u64 t0, t1; |
| + |
| + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| + t0 = get_unaligned_le64(&raw_key[0]); |
| + t1 = get_unaligned_le64(&raw_key[8]); |
| + |
| + key->key.r64[0] = t0 & 0xffc0fffffffULL; |
| + key->key.r64[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL; |
| + key->key.r64[2] = ((t1 >> 24)) & 0x00ffffffc0fULL; |
| + |
| + /* s = 20*r */ |
| + key->precomputed_s.r64[0] = key->key.r64[1] * 20; |
| + key->precomputed_s.r64[1] = key->key.r64[2] * 20; |
| +} |
| +EXPORT_SYMBOL(poly1305_core_setkey); |
| + |
| +void poly1305_core_blocks(struct poly1305_state *state, |
| + const struct poly1305_core_key *key, const void *src, |
| + unsigned int nblocks, u32 hibit) |
| +{ |
| + const u8 *input = src; |
| + u64 hibit64; |
| + u64 r0, r1, r2; |
| + u64 s1, s2; |
| + u64 h0, h1, h2; |
| + u64 c; |
| + u128 d0, d1, d2, d; |
| + |
| + if (!nblocks) |
| + return; |
| + |
| + hibit64 = ((u64)hibit) << 40; |
| + |
| + r0 = key->key.r64[0]; |
| + r1 = key->key.r64[1]; |
| + r2 = key->key.r64[2]; |
| + |
| + h0 = state->h64[0]; |
| + h1 = state->h64[1]; |
| + h2 = state->h64[2]; |
| + |
| + s1 = key->precomputed_s.r64[0]; |
| + s2 = key->precomputed_s.r64[1]; |
| + |
| + do { |
| + u64 t0, t1; |
| + |
| + /* h += m[i] */ |
| + t0 = get_unaligned_le64(&input[0]); |
| + t1 = get_unaligned_le64(&input[8]); |
| + |
| + h0 += t0 & 0xfffffffffffULL; |
| + h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL; |
| + h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64; |
| + |
| + /* h *= r */ |
| + d0 = (u128)h0 * r0; |
| + d = (u128)h1 * s2; |
| + d0 += d; |
| + d = (u128)h2 * s1; |
| + d0 += d; |
| + d1 = (u128)h0 * r1; |
| + d = (u128)h1 * r0; |
| + d1 += d; |
| + d = (u128)h2 * s2; |
| + d1 += d; |
| + d2 = (u128)h0 * r2; |
| + d = (u128)h1 * r1; |
| + d2 += d; |
| + d = (u128)h2 * r0; |
| + d2 += d; |
| + |
| + /* (partial) h %= p */ |
| + c = (u64)(d0 >> 44); |
| + h0 = (u64)d0 & 0xfffffffffffULL; |
| + d1 += c; |
| + c = (u64)(d1 >> 44); |
| + h1 = (u64)d1 & 0xfffffffffffULL; |
| + d2 += c; |
| + c = (u64)(d2 >> 42); |
| + h2 = (u64)d2 & 0x3ffffffffffULL; |
| + h0 += c * 5; |
| + c = h0 >> 44; |
| + h0 = h0 & 0xfffffffffffULL; |
| + h1 += c; |
| + |
| + input += POLY1305_BLOCK_SIZE; |
| + } while (--nblocks); |
| + |
| + state->h64[0] = h0; |
| + state->h64[1] = h1; |
| + state->h64[2] = h2; |
| +} |
| +EXPORT_SYMBOL(poly1305_core_blocks); |
| + |
| +void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], |
| + void *dst) |
| +{ |
| + u8 *mac = dst; |
| + u64 h0, h1, h2, c; |
| + u64 g0, g1, g2; |
| + u64 t0, t1; |
| + |
| + /* fully carry h */ |
| + h0 = state->h64[0]; |
| + h1 = state->h64[1]; |
| + h2 = state->h64[2]; |
| + |
| + c = h1 >> 44; |
| + h1 &= 0xfffffffffffULL; |
| + h2 += c; |
| + c = h2 >> 42; |
| + h2 &= 0x3ffffffffffULL; |
| + h0 += c * 5; |
| + c = h0 >> 44; |
| + h0 &= 0xfffffffffffULL; |
| + h1 += c; |
| + c = h1 >> 44; |
| + h1 &= 0xfffffffffffULL; |
| + h2 += c; |
| + c = h2 >> 42; |
| + h2 &= 0x3ffffffffffULL; |
| + h0 += c * 5; |
| + c = h0 >> 44; |
| + h0 &= 0xfffffffffffULL; |
| + h1 += c; |
| + |
| + /* compute h + -p */ |
| + g0 = h0 + 5; |
| + c = g0 >> 44; |
| + g0 &= 0xfffffffffffULL; |
| + g1 = h1 + c; |
| + c = g1 >> 44; |
| + g1 &= 0xfffffffffffULL; |
| + g2 = h2 + c - (1ULL << 42); |
| + |
| + /* select h if h < p, or h + -p if h >= p */ |
| + c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1; |
| + g0 &= c; |
| + g1 &= c; |
| + g2 &= c; |
| + c = ~c; |
| + h0 = (h0 & c) | g0; |
| + h1 = (h1 & c) | g1; |
| + h2 = (h2 & c) | g2; |
| + |
| + if (likely(nonce)) { |
| + /* h = (h + nonce) */ |
| + t0 = ((u64)nonce[1] << 32) | nonce[0]; |
| + t1 = ((u64)nonce[3] << 32) | nonce[2]; |
| + |
| + h0 += t0 & 0xfffffffffffULL; |
| + c = h0 >> 44; |
| + h0 &= 0xfffffffffffULL; |
| + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c; |
| + c = h1 >> 44; |
| + h1 &= 0xfffffffffffULL; |
| + h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c; |
| + h2 &= 0x3ffffffffffULL; |
| + } |
| + |
| + /* mac = h % (2^128) */ |
| + h0 = h0 | (h1 << 44); |
| + h1 = (h1 >> 20) | (h2 << 24); |
| + |
| + put_unaligned_le64(h0, &mac[0]); |
| + put_unaligned_le64(h1, &mac[8]); |
| +} |
| +EXPORT_SYMBOL(poly1305_core_emit); |
| diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c |
| index 32ec293c65ae..9d2d14df0fee 100644 |
| |
| |
| @@ -12,151 +12,9 @@ |
| #include <linux/module.h> |
| #include <asm/unaligned.h> |
| |
| -static inline u64 mlt(u64 a, u64 b) |
| -{ |
| - return a * b; |
| -} |
| - |
| -static inline u32 sr(u64 v, u_char n) |
| -{ |
| - return v >> n; |
| -} |
| - |
| -static inline u32 and(u32 v, u32 mask) |
| -{ |
| - return v & mask; |
| -} |
| - |
| -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) |
| -{ |
| - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| - key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; |
| - key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; |
| - key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; |
| - key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; |
| - key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; |
| -} |
| -EXPORT_SYMBOL_GPL(poly1305_core_setkey); |
| - |
| -void poly1305_core_blocks(struct poly1305_state *state, |
| - const struct poly1305_key *key, const void *src, |
| - unsigned int nblocks, u32 hibit) |
| -{ |
| - u32 r0, r1, r2, r3, r4; |
| - u32 s1, s2, s3, s4; |
| - u32 h0, h1, h2, h3, h4; |
| - u64 d0, d1, d2, d3, d4; |
| - |
| - if (!nblocks) |
| - return; |
| - |
| - r0 = key->r[0]; |
| - r1 = key->r[1]; |
| - r2 = key->r[2]; |
| - r3 = key->r[3]; |
| - r4 = key->r[4]; |
| - |
| - s1 = r1 * 5; |
| - s2 = r2 * 5; |
| - s3 = r3 * 5; |
| - s4 = r4 * 5; |
| - |
| - h0 = state->h[0]; |
| - h1 = state->h[1]; |
| - h2 = state->h[2]; |
| - h3 = state->h[3]; |
| - h4 = state->h[4]; |
| - |
| - do { |
| - /* h += m[i] */ |
| - h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; |
| - h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; |
| - h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; |
| - h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; |
| - h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); |
| - |
| - /* h *= r */ |
| - d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + |
| - mlt(h3, s2) + mlt(h4, s1); |
| - d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + |
| - mlt(h3, s3) + mlt(h4, s2); |
| - d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + |
| - mlt(h3, s4) + mlt(h4, s3); |
| - d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + |
| - mlt(h3, r0) + mlt(h4, s4); |
| - d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + |
| - mlt(h3, r1) + mlt(h4, r0); |
| - |
| - /* (partial) h %= p */ |
| - d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); |
| - d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); |
| - d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); |
| - d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); |
| - h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); |
| - h1 += h0 >> 26; h0 = h0 & 0x3ffffff; |
| - |
| - src += POLY1305_BLOCK_SIZE; |
| - } while (--nblocks); |
| - |
| - state->h[0] = h0; |
| - state->h[1] = h1; |
| - state->h[2] = h2; |
| - state->h[3] = h3; |
| - state->h[4] = h4; |
| -} |
| -EXPORT_SYMBOL_GPL(poly1305_core_blocks); |
| - |
| -void poly1305_core_emit(const struct poly1305_state *state, void *dst) |
| -{ |
| - u32 h0, h1, h2, h3, h4; |
| - u32 g0, g1, g2, g3, g4; |
| - u32 mask; |
| - |
| - /* fully carry h */ |
| - h0 = state->h[0]; |
| - h1 = state->h[1]; |
| - h2 = state->h[2]; |
| - h3 = state->h[3]; |
| - h4 = state->h[4]; |
| - |
| - h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; |
| - h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; |
| - h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; |
| - h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; |
| - h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; |
| - |
| - /* compute h + -p */ |
| - g0 = h0 + 5; |
| - g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; |
| - g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; |
| - g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; |
| - g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; |
| - |
| - /* select h if h < p, or h + -p if h >= p */ |
| - mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; |
| - g0 &= mask; |
| - g1 &= mask; |
| - g2 &= mask; |
| - g3 &= mask; |
| - g4 &= mask; |
| - mask = ~mask; |
| - h0 = (h0 & mask) | g0; |
| - h1 = (h1 & mask) | g1; |
| - h2 = (h2 & mask) | g2; |
| - h3 = (h3 & mask) | g3; |
| - h4 = (h4 & mask) | g4; |
| - |
| - /* h = h % (2^128) */ |
| - put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); |
| - put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); |
| - put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); |
| - put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); |
| -} |
| -EXPORT_SYMBOL_GPL(poly1305_core_emit); |
| - |
| void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) |
| { |
| - poly1305_core_setkey(desc->r, key); |
| + poly1305_core_setkey(&desc->core_r, key); |
| desc->s[0] = get_unaligned_le32(key + 16); |
| desc->s[1] = get_unaligned_le32(key + 20); |
| desc->s[2] = get_unaligned_le32(key + 24); |
| @@ -164,7 +22,7 @@ void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) |
| poly1305_core_init(&desc->h); |
| desc->buflen = 0; |
| desc->sset = true; |
| - desc->rset = 1; |
| + desc->rset = 2; |
| } |
| EXPORT_SYMBOL_GPL(poly1305_init_generic); |
| |
| @@ -181,13 +39,14 @@ void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, |
| desc->buflen += bytes; |
| |
| if (desc->buflen == POLY1305_BLOCK_SIZE) { |
| - poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1); |
| + poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, |
| + 1, 1); |
| desc->buflen = 0; |
| } |
| } |
| |
| if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { |
| - poly1305_core_blocks(&desc->h, desc->r, src, |
| + poly1305_core_blocks(&desc->h, &desc->core_r, src, |
| nbytes / POLY1305_BLOCK_SIZE, 1); |
| src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); |
| nbytes %= POLY1305_BLOCK_SIZE; |
| @@ -202,28 +61,14 @@ EXPORT_SYMBOL_GPL(poly1305_update_generic); |
| |
| void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) |
| { |
| - __le32 digest[4]; |
| - u64 f = 0; |
| - |
| if (unlikely(desc->buflen)) { |
| desc->buf[desc->buflen++] = 1; |
| memset(desc->buf + desc->buflen, 0, |
| POLY1305_BLOCK_SIZE - desc->buflen); |
| - poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0); |
| + poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0); |
| } |
| |
| - poly1305_core_emit(&desc->h, digest); |
| - |
| - /* mac = (h + s) % (2^128) */ |
| - f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; |
| - put_unaligned_le32(f, dst + 0); |
| - f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; |
| - put_unaligned_le32(f, dst + 4); |
| - f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; |
| - put_unaligned_le32(f, dst + 8); |
| - f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; |
| - put_unaligned_le32(f, dst + 12); |
| - |
| + poly1305_core_emit(&desc->h, desc->s, dst); |
| *desc = (struct poly1305_desc_ctx){}; |
| } |
| EXPORT_SYMBOL_GPL(poly1305_final_generic); |
| -- |
| 2.18.2 |
| |
| |
| From c416a98eb307de061d222f9db53651c9b0eb0964 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 5 Jan 2020 22:40:47 -0500 |
| Subject: [PATCH 042/100] crypto: x86/poly1305 - import unmodified cryptogams |
| implementation |
| |
| commit 0896ca2a0cb6127e8a129f1f2a680d49b6b0f65c upstream. |
| |
| These x86_64 vectorized implementations come from Andy Polyakov's |
| CRYPTOGAMS implementation, and are included here in raw form without |
| modification, so that subsequent commits that fix these up for the |
| kernel can see how it has changed. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 4159 +++++++++++++++++ |
| 1 file changed, 4159 insertions(+) |
| create mode 100644 arch/x86/crypto/poly1305-x86_64-cryptogams.pl |
| |
| diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl |
| new file mode 100644 |
| index 000000000000..342ad7f18aa7 |
| |
| |
| @@ -0,0 +1,4159 @@ |
| +#! /usr/bin/env perl |
| +# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. |
| +# |
| +# Licensed under the OpenSSL license (the "License"). You may not use |
| +# this file except in compliance with the License. You can obtain a copy |
| +# in the file LICENSE in the source distribution or at |
| +# https://www.openssl.org/source/license.html |
| + |
| +# |
| +# ==================================================================== |
| +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| +# project. The module is, however, dual licensed under OpenSSL and |
| +# CRYPTOGAMS licenses depending on where you obtain it. For further |
| +# details see http://www.openssl.org/~appro/cryptogams/. |
| +# ==================================================================== |
| +# |
| +# This module implements Poly1305 hash for x86_64. |
| +# |
| +# March 2015 |
| +# |
| +# Initial release. |
| +# |
| +# December 2016 |
| +# |
| +# Add AVX512F+VL+BW code path. |
| +# |
| +# November 2017 |
| +# |
| +# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be |
| +# executed even on Knights Landing. Trigger for modification was |
| +# observation that AVX512 code paths can negatively affect overall |
| +# Skylake-X system performance. Since we are likely to suppress |
| +# AVX512F capability flag [at least on Skylake-X], conversion serves |
| +# as kind of "investment protection". Note that next *lake processor, |
| +# Cannolake, has AVX512IFMA code path to execute... |
| +# |
| +# Numbers are cycles per processed byte with poly1305_blocks alone, |
| +# measured with rdtsc at fixed clock frequency. |
| +# |
| +# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512 |
| +# P4 4.46/+120% - |
| +# Core 2 2.41/+90% - |
| +# Westmere 1.88/+120% - |
| +# Sandy Bridge 1.39/+140% 1.10 |
| +# Haswell 1.14/+175% 1.11 0.65 |
| +# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35] |
| +# Silvermont 2.83/+95% - |
| +# Knights L 3.60/? 1.65 1.10 0.41(***) |
| +# Goldmont 1.70/+180% - |
| +# VIA Nano 1.82/+150% - |
| +# Sledgehammer 1.38/+160% - |
| +# Bulldozer 2.30/+130% 0.97 |
| +# Ryzen 1.15/+200% 1.08 1.18 |
| +# |
| +# (*) improvement coefficients relative to clang are more modest and |
| +# are ~50% on most processors, in both cases we are comparing to |
| +# __int128 code; |
| +# (**) SSE2 implementation was attempted, but among non-AVX processors |
| +# it was faster than integer-only code only on older Intel P4 and |
| +# Core processors, 50-30%, less newer processor is, but slower on |
| +# contemporary ones, for example almost 2x slower on Atom, and as |
| +# former are naturally disappearing, SSE2 is deemed unnecessary; |
| +# (***) strangely enough performance seems to vary from core to core, |
| +# listed result is best case; |
| + |
| +$flavour = shift; |
| +$output = shift; |
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
| + |
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
| + |
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
| +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
| +die "can't locate x86_64-xlate.pl"; |
| + |
| +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { |
| + $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26); |
| +} |
| + |
| +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { |
| + $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12); |
| + $avx += 2 if ($1==2.11 && $2>=8); |
| +} |
| + |
| +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { |
| + $avx = ($1>=10) + ($1>=12); |
| +} |
| + |
| +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { |
| + $avx = ($2>=3.0) + ($2>3.0); |
| +} |
| + |
| +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; |
| +*STDOUT=*OUT; |
| + |
| +my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); |
| +my ($mac,$nonce)=($inp,$len); # *_emit arguments |
| +my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13)); |
| +my ($h0,$h1,$h2)=("%r14","%rbx","%rbp"); |
| + |
| +sub poly1305_iteration { |
| +# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 |
| +# output: $h0-$h2 *= $r0-$r1 |
| +$code.=<<___; |
| + mulq $h0 # h0*r1 |
| + mov %rax,$d2 |
| + mov $r0,%rax |
| + mov %rdx,$d3 |
| + |
| + mulq $h0 # h0*r0 |
| + mov %rax,$h0 # future $h0 |
| + mov $r0,%rax |
| + mov %rdx,$d1 |
| + |
| + mulq $h1 # h1*r0 |
| + add %rax,$d2 |
| + mov $s1,%rax |
| + adc %rdx,$d3 |
| + |
| + mulq $h1 # h1*s1 |
| + mov $h2,$h1 # borrow $h1 |
| + add %rax,$h0 |
| + adc %rdx,$d1 |
| + |
| + imulq $s1,$h1 # h2*s1 |
| + add $h1,$d2 |
| + mov $d1,$h1 |
| + adc \$0,$d3 |
| + |
| + imulq $r0,$h2 # h2*r0 |
| + add $d2,$h1 |
| + mov \$-4,%rax # mask value |
| + adc $h2,$d3 |
| + |
| + and $d3,%rax # last reduction step |
| + mov $d3,$h2 |
| + shr \$2,$d3 |
| + and \$3,$h2 |
| + add $d3,%rax |
| + add %rax,$h0 |
| + adc \$0,$h1 |
| + adc \$0,$h2 |
| +___ |
| +} |
| + |
| +######################################################################## |
| +# Layout of opaque area is following. |
| +# |
| +# unsigned __int64 h[3]; # current hash value base 2^64 |
| +# unsigned __int64 r[2]; # key value base 2^64 |
| + |
| +$code.=<<___; |
| +.text |
| + |
| +.extern OPENSSL_ia32cap_P |
| + |
| +.globl poly1305_init |
| +.hidden poly1305_init |
| +.globl poly1305_blocks |
| +.hidden poly1305_blocks |
| +.globl poly1305_emit |
| +.hidden poly1305_emit |
| + |
| +.type poly1305_init,\@function,3 |
| +.align 32 |
| +poly1305_init: |
| + xor %rax,%rax |
| + mov %rax,0($ctx) # initialize hash value |
| + mov %rax,8($ctx) |
| + mov %rax,16($ctx) |
| + |
| + cmp \$0,$inp |
| + je .Lno_key |
| + |
| + lea poly1305_blocks(%rip),%r10 |
| + lea poly1305_emit(%rip),%r11 |
| +___ |
| +$code.=<<___ if ($avx); |
| + mov OPENSSL_ia32cap_P+4(%rip),%r9 |
| + lea poly1305_blocks_avx(%rip),%rax |
| + lea poly1305_emit_avx(%rip),%rcx |
| + bt \$`60-32`,%r9 # AVX? |
| + cmovc %rax,%r10 |
| + cmovc %rcx,%r11 |
| +___ |
| +$code.=<<___ if ($avx>1); |
| + lea poly1305_blocks_avx2(%rip),%rax |
| + bt \$`5+32`,%r9 # AVX2? |
| + cmovc %rax,%r10 |
| +___ |
| +$code.=<<___ if ($avx>3); |
| + mov \$`(1<<31|1<<21|1<<16)`,%rax |
| + shr \$32,%r9 |
| + and %rax,%r9 |
| + cmp %rax,%r9 |
| + je .Linit_base2_44 |
| +___ |
| +$code.=<<___; |
| + mov \$0x0ffffffc0fffffff,%rax |
| + mov \$0x0ffffffc0ffffffc,%rcx |
| + and 0($inp),%rax |
| + and 8($inp),%rcx |
| + mov %rax,24($ctx) |
| + mov %rcx,32($ctx) |
| +___ |
| +$code.=<<___ if ($flavour !~ /elf32/); |
| + mov %r10,0(%rdx) |
| + mov %r11,8(%rdx) |
| +___ |
| +$code.=<<___ if ($flavour =~ /elf32/); |
| + mov %r10d,0(%rdx) |
| + mov %r11d,4(%rdx) |
| +___ |
| +$code.=<<___; |
| + mov \$1,%eax |
| +.Lno_key: |
| + ret |
| +.size poly1305_init,.-poly1305_init |
| + |
| +.type poly1305_blocks,\@function,4 |
| +.align 32 |
| +poly1305_blocks: |
| +.cfi_startproc |
| +.Lblocks: |
| + shr \$4,$len |
| + jz .Lno_data # too short |
| + |
| + push %rbx |
| +.cfi_push %rbx |
| + push %rbp |
| +.cfi_push %rbp |
| + push %r12 |
| +.cfi_push %r12 |
| + push %r13 |
| +.cfi_push %r13 |
| + push %r14 |
| +.cfi_push %r14 |
| + push %r15 |
| +.cfi_push %r15 |
| +.Lblocks_body: |
| + |
| + mov $len,%r15 # reassign $len |
| + |
| + mov 24($ctx),$r0 # load r |
| + mov 32($ctx),$s1 |
| + |
| + mov 0($ctx),$h0 # load hash value |
| + mov 8($ctx),$h1 |
| + mov 16($ctx),$h2 |
| + |
| + mov $s1,$r1 |
| + shr \$2,$s1 |
| + mov $r1,%rax |
| + add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| + jmp .Loop |
| + |
| +.align 32 |
| +.Loop: |
| + add 0($inp),$h0 # accumulate input |
| + adc 8($inp),$h1 |
| + lea 16($inp),$inp |
| + adc $padbit,$h2 |
| +___ |
| + &poly1305_iteration(); |
| +$code.=<<___; |
| + mov $r1,%rax |
| + dec %r15 # len-=16 |
| + jnz .Loop |
| + |
| + mov $h0,0($ctx) # store hash value |
| + mov $h1,8($ctx) |
| + mov $h2,16($ctx) |
| + |
| + mov 0(%rsp),%r15 |
| +.cfi_restore %r15 |
| + mov 8(%rsp),%r14 |
| +.cfi_restore %r14 |
| + mov 16(%rsp),%r13 |
| +.cfi_restore %r13 |
| + mov 24(%rsp),%r12 |
| +.cfi_restore %r12 |
| + mov 32(%rsp),%rbp |
| +.cfi_restore %rbp |
| + mov 40(%rsp),%rbx |
| +.cfi_restore %rbx |
| + lea 48(%rsp),%rsp |
| +.cfi_adjust_cfa_offset -48 |
| +.Lno_data: |
| +.Lblocks_epilogue: |
| + ret |
| +.cfi_endproc |
| +.size poly1305_blocks,.-poly1305_blocks |
| + |
| +.type poly1305_emit,\@function,3 |
| +.align 32 |
| +poly1305_emit: |
| +.Lemit: |
| + mov 0($ctx),%r8 # load hash value |
| + mov 8($ctx),%r9 |
| + mov 16($ctx),%r10 |
| + |
| + mov %r8,%rax |
| + add \$5,%r8 # compare to modulus |
| + mov %r9,%rcx |
| + adc \$0,%r9 |
| + adc \$0,%r10 |
| + shr \$2,%r10 # did 130-bit value overflow? |
| + cmovnz %r8,%rax |
| + cmovnz %r9,%rcx |
| + |
| + add 0($nonce),%rax # accumulate nonce |
| + adc 8($nonce),%rcx |
| + mov %rax,0($mac) # write result |
| + mov %rcx,8($mac) |
| + |
| + ret |
| +.size poly1305_emit,.-poly1305_emit |
| +___ |
| +if ($avx) { |
| + |
| +######################################################################## |
| +# Layout of opaque area is following. |
| +# |
| +# unsigned __int32 h[5]; # current hash value base 2^26 |
| +# unsigned __int32 is_base2_26; |
| +# unsigned __int64 r[2]; # key value base 2^64 |
| +# unsigned __int64 pad; |
| +# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9]; |
| +# |
| +# where r^n are base 2^26 digits of degrees of multiplier key. There are |
| +# 5 digits, but last four are interleaved with multiples of 5, totalling |
| +# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4. |
| + |
| +my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) = |
| + map("%xmm$_",(0..15)); |
| + |
| +$code.=<<___; |
| +.type __poly1305_block,\@abi-omnipotent |
| +.align 32 |
| +__poly1305_block: |
| +___ |
| + &poly1305_iteration(); |
| +$code.=<<___; |
| + ret |
| +.size __poly1305_block,.-__poly1305_block |
| + |
| +.type __poly1305_init_avx,\@abi-omnipotent |
| +.align 32 |
| +__poly1305_init_avx: |
| + mov $r0,$h0 |
| + mov $r1,$h1 |
| + xor $h2,$h2 |
| + |
| + lea 48+64($ctx),$ctx # size optimization |
| + |
| + mov $r1,%rax |
| + call __poly1305_block # r^2 |
| + |
| + mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26 |
| + mov \$0x3ffffff,%edx |
| + mov $h0,$d1 |
| + and $h0#d,%eax |
| + mov $r0,$d2 |
| + and $r0#d,%edx |
| + mov %eax,`16*0+0-64`($ctx) |
| + shr \$26,$d1 |
| + mov %edx,`16*0+4-64`($ctx) |
| + shr \$26,$d2 |
| + |
| + mov \$0x3ffffff,%eax |
| + mov \$0x3ffffff,%edx |
| + and $d1#d,%eax |
| + and $d2#d,%edx |
| + mov %eax,`16*1+0-64`($ctx) |
| + lea (%rax,%rax,4),%eax # *5 |
| + mov %edx,`16*1+4-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + mov %eax,`16*2+0-64`($ctx) |
| + shr \$26,$d1 |
| + mov %edx,`16*2+4-64`($ctx) |
| + shr \$26,$d2 |
| + |
| + mov $h1,%rax |
| + mov $r1,%rdx |
| + shl \$12,%rax |
| + shl \$12,%rdx |
| + or $d1,%rax |
| + or $d2,%rdx |
| + and \$0x3ffffff,%eax |
| + and \$0x3ffffff,%edx |
| + mov %eax,`16*3+0-64`($ctx) |
| + lea (%rax,%rax,4),%eax # *5 |
| + mov %edx,`16*3+4-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + mov %eax,`16*4+0-64`($ctx) |
| + mov $h1,$d1 |
| + mov %edx,`16*4+4-64`($ctx) |
| + mov $r1,$d2 |
| + |
| + mov \$0x3ffffff,%eax |
| + mov \$0x3ffffff,%edx |
| + shr \$14,$d1 |
| + shr \$14,$d2 |
| + and $d1#d,%eax |
| + and $d2#d,%edx |
| + mov %eax,`16*5+0-64`($ctx) |
| + lea (%rax,%rax,4),%eax # *5 |
| + mov %edx,`16*5+4-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + mov %eax,`16*6+0-64`($ctx) |
| + shr \$26,$d1 |
| + mov %edx,`16*6+4-64`($ctx) |
| + shr \$26,$d2 |
| + |
| + mov $h2,%rax |
| + shl \$24,%rax |
| + or %rax,$d1 |
| + mov $d1#d,`16*7+0-64`($ctx) |
| + lea ($d1,$d1,4),$d1 # *5 |
| + mov $d2#d,`16*7+4-64`($ctx) |
| + lea ($d2,$d2,4),$d2 # *5 |
| + mov $d1#d,`16*8+0-64`($ctx) |
| + mov $d2#d,`16*8+4-64`($ctx) |
| + |
| + mov $r1,%rax |
| + call __poly1305_block # r^3 |
| + |
| + mov \$0x3ffffff,%eax # save r^3 base 2^26 |
| + mov $h0,$d1 |
| + and $h0#d,%eax |
| + shr \$26,$d1 |
| + mov %eax,`16*0+12-64`($ctx) |
| + |
| + mov \$0x3ffffff,%edx |
| + and $d1#d,%edx |
| + mov %edx,`16*1+12-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + shr \$26,$d1 |
| + mov %edx,`16*2+12-64`($ctx) |
| + |
| + mov $h1,%rax |
| + shl \$12,%rax |
| + or $d1,%rax |
| + and \$0x3ffffff,%eax |
| + mov %eax,`16*3+12-64`($ctx) |
| + lea (%rax,%rax,4),%eax # *5 |
| + mov $h1,$d1 |
| + mov %eax,`16*4+12-64`($ctx) |
| + |
| + mov \$0x3ffffff,%edx |
| + shr \$14,$d1 |
| + and $d1#d,%edx |
| + mov %edx,`16*5+12-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + shr \$26,$d1 |
| + mov %edx,`16*6+12-64`($ctx) |
| + |
| + mov $h2,%rax |
| + shl \$24,%rax |
| + or %rax,$d1 |
| + mov $d1#d,`16*7+12-64`($ctx) |
| + lea ($d1,$d1,4),$d1 # *5 |
| + mov $d1#d,`16*8+12-64`($ctx) |
| + |
| + mov $r1,%rax |
| + call __poly1305_block # r^4 |
| + |
| + mov \$0x3ffffff,%eax # save r^4 base 2^26 |
| + mov $h0,$d1 |
| + and $h0#d,%eax |
| + shr \$26,$d1 |
| + mov %eax,`16*0+8-64`($ctx) |
| + |
| + mov \$0x3ffffff,%edx |
| + and $d1#d,%edx |
| + mov %edx,`16*1+8-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + shr \$26,$d1 |
| + mov %edx,`16*2+8-64`($ctx) |
| + |
| + mov $h1,%rax |
| + shl \$12,%rax |
| + or $d1,%rax |
| + and \$0x3ffffff,%eax |
| + mov %eax,`16*3+8-64`($ctx) |
| + lea (%rax,%rax,4),%eax # *5 |
| + mov $h1,$d1 |
| + mov %eax,`16*4+8-64`($ctx) |
| + |
| + mov \$0x3ffffff,%edx |
| + shr \$14,$d1 |
| + and $d1#d,%edx |
| + mov %edx,`16*5+8-64`($ctx) |
| + lea (%rdx,%rdx,4),%edx # *5 |
| + shr \$26,$d1 |
| + mov %edx,`16*6+8-64`($ctx) |
| + |
| + mov $h2,%rax |
| + shl \$24,%rax |
| + or %rax,$d1 |
| + mov $d1#d,`16*7+8-64`($ctx) |
| + lea ($d1,$d1,4),$d1 # *5 |
| + mov $d1#d,`16*8+8-64`($ctx) |
| + |
| + lea -48-64($ctx),$ctx # size [de-]optimization |
| + ret |
| +.size __poly1305_init_avx,.-__poly1305_init_avx |
| + |
| +.type poly1305_blocks_avx,\@function,4 |
| +.align 32 |
| +poly1305_blocks_avx: |
| +.cfi_startproc |
| + mov 20($ctx),%r8d # is_base2_26 |
| + cmp \$128,$len |
| + jae .Lblocks_avx |
| + test %r8d,%r8d |
| + jz .Lblocks |
| + |
| +.Lblocks_avx: |
| + and \$-16,$len |
| + jz .Lno_data_avx |
| + |
| + vzeroupper |
| + |
| + test %r8d,%r8d |
| + jz .Lbase2_64_avx |
| + |
| + test \$31,$len |
| + jz .Leven_avx |
| + |
| + push %rbx |
| +.cfi_push %rbx |
| + push %rbp |
| +.cfi_push %rbp |
| + push %r12 |
| +.cfi_push %r12 |
| + push %r13 |
| +.cfi_push %r13 |
| + push %r14 |
| +.cfi_push %r14 |
| + push %r15 |
| +.cfi_push %r15 |
| +.Lblocks_avx_body: |
| + |
| + mov $len,%r15 # reassign $len |
| + |
| + mov 0($ctx),$d1 # load hash value |
| + mov 8($ctx),$d2 |
| + mov 16($ctx),$h2#d |
| + |
| + mov 24($ctx),$r0 # load r |
| + mov 32($ctx),$s1 |
| + |
| + ################################# base 2^26 -> base 2^64 |
| + mov $d1#d,$h0#d |
| + and \$`-1*(1<<31)`,$d1 |
| + mov $d2,$r1 # borrow $r1 |
| + mov $d2#d,$h1#d |
| + and \$`-1*(1<<31)`,$d2 |
| + |
| + shr \$6,$d1 |
| + shl \$52,$r1 |
| + add $d1,$h0 |
| + shr \$12,$h1 |
| + shr \$18,$d2 |
| + add $r1,$h0 |
| + adc $d2,$h1 |
| + |
| + mov $h2,$d1 |
| + shl \$40,$d1 |
| + shr \$24,$h2 |
| + add $d1,$h1 |
| + adc \$0,$h2 # can be partially reduced... |
| + |
| + mov \$-4,$d2 # ... so reduce |
| + mov $h2,$d1 |
| + and $h2,$d2 |
| + shr \$2,$d1 |
| + and \$3,$h2 |
| + add $d2,$d1 # =*5 |
| + add $d1,$h0 |
| + adc \$0,$h1 |
| + adc \$0,$h2 |
| + |
| + mov $s1,$r1 |
| + mov $s1,%rax |
| + shr \$2,$s1 |
| + add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| + |
| + add 0($inp),$h0 # accumulate input |
| + adc 8($inp),$h1 |
| + lea 16($inp),$inp |
| + adc $padbit,$h2 |
| + |
| + call __poly1305_block |
| + |
| + test $padbit,$padbit # if $padbit is zero, |
| + jz .Lstore_base2_64_avx # store hash in base 2^64 format |
| + |
| + ################################# base 2^64 -> base 2^26 |
| + mov $h0,%rax |
| + mov $h0,%rdx |
| + shr \$52,$h0 |
| + mov $h1,$r0 |
| + mov $h1,$r1 |
| + shr \$26,%rdx |
| + and \$0x3ffffff,%rax # h[0] |
| + shl \$12,$r0 |
| + and \$0x3ffffff,%rdx # h[1] |
| + shr \$14,$h1 |
| + or $r0,$h0 |
| + shl \$24,$h2 |
| + and \$0x3ffffff,$h0 # h[2] |
| + shr \$40,$r1 |
| + and \$0x3ffffff,$h1 # h[3] |
| + or $r1,$h2 # h[4] |
| + |
| + sub \$16,%r15 |
| + jz .Lstore_base2_26_avx |
| + |
| + vmovd %rax#d,$H0 |
| + vmovd %rdx#d,$H1 |
| + vmovd $h0#d,$H2 |
| + vmovd $h1#d,$H3 |
| + vmovd $h2#d,$H4 |
| + jmp .Lproceed_avx |
| + |
| +.align 32 |
| +.Lstore_base2_64_avx: |
| + mov $h0,0($ctx) |
| + mov $h1,8($ctx) |
| + mov $h2,16($ctx) # note that is_base2_26 is zeroed |
| + jmp .Ldone_avx |
| + |
| +.align 16 |
| +.Lstore_base2_26_avx: |
| + mov %rax#d,0($ctx) # store hash value base 2^26 |
| + mov %rdx#d,4($ctx) |
| + mov $h0#d,8($ctx) |
| + mov $h1#d,12($ctx) |
| + mov $h2#d,16($ctx) |
| +.align 16 |
| +.Ldone_avx: |
| + mov 0(%rsp),%r15 |
| +.cfi_restore %r15 |
| + mov 8(%rsp),%r14 |
| +.cfi_restore %r14 |
| + mov 16(%rsp),%r13 |
| +.cfi_restore %r13 |
| + mov 24(%rsp),%r12 |
| +.cfi_restore %r12 |
| + mov 32(%rsp),%rbp |
| +.cfi_restore %rbp |
| + mov 40(%rsp),%rbx |
| +.cfi_restore %rbx |
| + lea 48(%rsp),%rsp |
| +.cfi_adjust_cfa_offset -48 |
| +.Lno_data_avx: |
| +.Lblocks_avx_epilogue: |
| + ret |
| +.cfi_endproc |
| + |
| +.align 32 |
| +.Lbase2_64_avx: |
| +.cfi_startproc |
| + push %rbx |
| +.cfi_push %rbx |
| + push %rbp |
| +.cfi_push %rbp |
| + push %r12 |
| +.cfi_push %r12 |
| + push %r13 |
| +.cfi_push %r13 |
| + push %r14 |
| +.cfi_push %r14 |
| + push %r15 |
| +.cfi_push %r15 |
| +.Lbase2_64_avx_body: |
| + |
| + mov $len,%r15 # reassign $len |
| + |
| + mov 24($ctx),$r0 # load r |
| + mov 32($ctx),$s1 |
| + |
| + mov 0($ctx),$h0 # load hash value |
| + mov 8($ctx),$h1 |
| + mov 16($ctx),$h2#d |
| + |
| + mov $s1,$r1 |
| + mov $s1,%rax |
| + shr \$2,$s1 |
| + add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| + |
| + test \$31,$len |
| + jz .Linit_avx |
| + |
| + add 0($inp),$h0 # accumulate input |
| + adc 8($inp),$h1 |
| + lea 16($inp),$inp |
| + adc $padbit,$h2 |
| + sub \$16,%r15 |
| + |
| + call __poly1305_block |
| + |
| +.Linit_avx: |
| + ################################# base 2^64 -> base 2^26 |
| + mov $h0,%rax |
| + mov $h0,%rdx |
| + shr \$52,$h0 |
| + mov $h1,$d1 |
| + mov $h1,$d2 |
| + shr \$26,%rdx |
| + and \$0x3ffffff,%rax # h[0] |
| + shl \$12,$d1 |
| + and \$0x3ffffff,%rdx # h[1] |
| + shr \$14,$h1 |
| + or $d1,$h0 |
| + shl \$24,$h2 |
| + and \$0x3ffffff,$h0 # h[2] |
| + shr \$40,$d2 |
| + and \$0x3ffffff,$h1 # h[3] |
| + or $d2,$h2 # h[4] |
| + |
| + vmovd %rax#d,$H0 |
| + vmovd %rdx#d,$H1 |
| + vmovd $h0#d,$H2 |
| + vmovd $h1#d,$H3 |
| + vmovd $h2#d,$H4 |
| + movl \$1,20($ctx) # set is_base2_26 |
| + |
| + call __poly1305_init_avx |
| + |
| +.Lproceed_avx: |
| + mov %r15,$len |
| + |
| + mov 0(%rsp),%r15 |
| +.cfi_restore %r15 |
| + mov 8(%rsp),%r14 |
| +.cfi_restore %r14 |
| + mov 16(%rsp),%r13 |
| +.cfi_restore %r13 |
| + mov 24(%rsp),%r12 |
| +.cfi_restore %r12 |
| + mov 32(%rsp),%rbp |
| +.cfi_restore %rbp |
| + mov 40(%rsp),%rbx |
| +.cfi_restore %rbx |
| + lea 48(%rsp),%rax |
| + lea 48(%rsp),%rsp |
| +.cfi_adjust_cfa_offset -48 |
| +.Lbase2_64_avx_epilogue: |
| + jmp .Ldo_avx |
| +.cfi_endproc |
| + |
| +.align 32 |
| +.Leven_avx: |
| +.cfi_startproc |
| + vmovd 4*0($ctx),$H0 # load hash value |
| + vmovd 4*1($ctx),$H1 |
| + vmovd 4*2($ctx),$H2 |
| + vmovd 4*3($ctx),$H3 |
| + vmovd 4*4($ctx),$H4 |
| + |
| +.Ldo_avx: |
| +___ |
| +$code.=<<___ if (!$win64); |
| + lea -0x58(%rsp),%r11 |
| +.cfi_def_cfa %r11,0x60 |
| + sub \$0x178,%rsp |
| +___ |
| +$code.=<<___ if ($win64); |
| + lea -0xf8(%rsp),%r11 |
| + sub \$0x218,%rsp |
| + vmovdqa %xmm6,0x50(%r11) |
| + vmovdqa %xmm7,0x60(%r11) |
| + vmovdqa %xmm8,0x70(%r11) |
| + vmovdqa %xmm9,0x80(%r11) |
| + vmovdqa %xmm10,0x90(%r11) |
| + vmovdqa %xmm11,0xa0(%r11) |
| + vmovdqa %xmm12,0xb0(%r11) |
| + vmovdqa %xmm13,0xc0(%r11) |
| + vmovdqa %xmm14,0xd0(%r11) |
| + vmovdqa %xmm15,0xe0(%r11) |
| +.Ldo_avx_body: |
| +___ |
| +$code.=<<___; |
| + sub \$64,$len |
| + lea -32($inp),%rax |
| + cmovc %rax,$inp |
| + |
| + vmovdqu `16*3`($ctx),$D4 # preload r0^2 |
| + lea `16*3+64`($ctx),$ctx # size optimization |
| + lea .Lconst(%rip),%rcx |
| + |
| + ################################################################ |
| + # load input |
| + vmovdqu 16*2($inp),$T0 |
| + vmovdqu 16*3($inp),$T1 |
| + vmovdqa 64(%rcx),$MASK # .Lmask26 |
| + |
| + vpsrldq \$6,$T0,$T2 # splat input |
| + vpsrldq \$6,$T1,$T3 |
| + vpunpckhqdq $T1,$T0,$T4 # 4 |
| + vpunpcklqdq $T1,$T0,$T0 # 0:1 |
| + vpunpcklqdq $T3,$T2,$T3 # 2:3 |
| + |
| + vpsrlq \$40,$T4,$T4 # 4 |
| + vpsrlq \$26,$T0,$T1 |
| + vpand $MASK,$T0,$T0 # 0 |
| + vpsrlq \$4,$T3,$T2 |
| + vpand $MASK,$T1,$T1 # 1 |
| + vpsrlq \$30,$T3,$T3 |
| + vpand $MASK,$T2,$T2 # 2 |
| + vpand $MASK,$T3,$T3 # 3 |
| + vpor 32(%rcx),$T4,$T4 # padbit, yes, always |
| + |
| + jbe .Lskip_loop_avx |
| + |
| + # expand and copy pre-calculated table to stack |
| + vmovdqu `16*1-64`($ctx),$D1 |
| + vmovdqu `16*2-64`($ctx),$D2 |
| + vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434 |
| + vpshufd \$0x44,$D4,$D0 # xx12 -> 1212 |
| + vmovdqa $D3,-0x90(%r11) |
| + vmovdqa $D0,0x00(%rsp) |
| + vpshufd \$0xEE,$D1,$D4 |
| + vmovdqu `16*3-64`($ctx),$D0 |
| + vpshufd \$0x44,$D1,$D1 |
| + vmovdqa $D4,-0x80(%r11) |
| + vmovdqa $D1,0x10(%rsp) |
| + vpshufd \$0xEE,$D2,$D3 |
| + vmovdqu `16*4-64`($ctx),$D1 |
| + vpshufd \$0x44,$D2,$D2 |
| + vmovdqa $D3,-0x70(%r11) |
| + vmovdqa $D2,0x20(%rsp) |
| + vpshufd \$0xEE,$D0,$D4 |
| + vmovdqu `16*5-64`($ctx),$D2 |
| + vpshufd \$0x44,$D0,$D0 |
| + vmovdqa $D4,-0x60(%r11) |
| + vmovdqa $D0,0x30(%rsp) |
| + vpshufd \$0xEE,$D1,$D3 |
| + vmovdqu `16*6-64`($ctx),$D0 |
| + vpshufd \$0x44,$D1,$D1 |
| + vmovdqa $D3,-0x50(%r11) |
| + vmovdqa $D1,0x40(%rsp) |
| + vpshufd \$0xEE,$D2,$D4 |
| + vmovdqu `16*7-64`($ctx),$D1 |
| + vpshufd \$0x44,$D2,$D2 |
| + vmovdqa $D4,-0x40(%r11) |
| + vmovdqa $D2,0x50(%rsp) |
| + vpshufd \$0xEE,$D0,$D3 |
| + vmovdqu `16*8-64`($ctx),$D2 |
| + vpshufd \$0x44,$D0,$D0 |
| + vmovdqa $D3,-0x30(%r11) |
| + vmovdqa $D0,0x60(%rsp) |
| + vpshufd \$0xEE,$D1,$D4 |
| + vpshufd \$0x44,$D1,$D1 |
| + vmovdqa $D4,-0x20(%r11) |
| + vmovdqa $D1,0x70(%rsp) |
| + vpshufd \$0xEE,$D2,$D3 |
| + vmovdqa 0x00(%rsp),$D4 # preload r0^2 |
| + vpshufd \$0x44,$D2,$D2 |
| + vmovdqa $D3,-0x10(%r11) |
| + vmovdqa $D2,0x80(%rsp) |
| + |
| + jmp .Loop_avx |
| + |
| +.align 32 |
| +.Loop_avx: |
| + ################################################################ |
| + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 |
| + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r |
| + # \___________________/ |
| + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 |
| + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r |
| + # \___________________/ \____________________/ |
| + # |
| + # Note that we start with inp[2:3]*r^2. This is because it |
| + # doesn't depend on reduction in previous iteration. |
| + ################################################################ |
| + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + # |
| + # though note that $Tx and $Hx are "reversed" in this section, |
| + # and $D4 is preloaded with r0^2... |
| + |
| + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 |
| + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 |
| + vmovdqa $H2,0x20(%r11) # offload hash |
| + vpmuludq $T2,$D4,$D2 # d3 = h2*r0 |
| + vmovdqa 0x10(%rsp),$H2 # r1^2 |
| + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 |
| + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 |
| + |
| + vmovdqa $H0,0x00(%r11) # |
| + vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1 |
| + vmovdqa $H1,0x10(%r11) # |
| + vpmuludq $T3,$H2,$H1 # h3*r1 |
| + vpaddq $H0,$D0,$D0 # d0 += h4*s1 |
| + vpaddq $H1,$D4,$D4 # d4 += h3*r1 |
| + vmovdqa $H3,0x30(%r11) # |
| + vpmuludq $T2,$H2,$H0 # h2*r1 |
| + vpmuludq $T1,$H2,$H1 # h1*r1 |
| + vpaddq $H0,$D3,$D3 # d3 += h2*r1 |
| + vmovdqa 0x30(%rsp),$H3 # r2^2 |
| + vpaddq $H1,$D2,$D2 # d2 += h1*r1 |
| + vmovdqa $H4,0x40(%r11) # |
| + vpmuludq $T0,$H2,$H2 # h0*r1 |
| + vpmuludq $T2,$H3,$H0 # h2*r2 |
| + vpaddq $H2,$D1,$D1 # d1 += h0*r1 |
| + |
| + vmovdqa 0x40(%rsp),$H4 # s2^2 |
| + vpaddq $H0,$D4,$D4 # d4 += h2*r2 |
| + vpmuludq $T1,$H3,$H1 # h1*r2 |
| + vpmuludq $T0,$H3,$H3 # h0*r2 |
| + vpaddq $H1,$D3,$D3 # d3 += h1*r2 |
| + vmovdqa 0x50(%rsp),$H2 # r3^2 |
| + vpaddq $H3,$D2,$D2 # d2 += h0*r2 |
| + vpmuludq $T4,$H4,$H0 # h4*s2 |
| + vpmuludq $T3,$H4,$H4 # h3*s2 |
| + vpaddq $H0,$D1,$D1 # d1 += h4*s2 |
| + vmovdqa 0x60(%rsp),$H3 # s3^2 |
| + vpaddq $H4,$D0,$D0 # d0 += h3*s2 |
| + |
| + vmovdqa 0x80(%rsp),$H4 # s4^2 |
| + vpmuludq $T1,$H2,$H1 # h1*r3 |
| + vpmuludq $T0,$H2,$H2 # h0*r3 |
| + vpaddq $H1,$D4,$D4 # d4 += h1*r3 |
| + vpaddq $H2,$D3,$D3 # d3 += h0*r3 |
| + vpmuludq $T4,$H3,$H0 # h4*s3 |
| + vpmuludq $T3,$H3,$H1 # h3*s3 |
| + vpaddq $H0,$D2,$D2 # d2 += h4*s3 |
| + vmovdqu 16*0($inp),$H0 # load input |
| + vpaddq $H1,$D1,$D1 # d1 += h3*s3 |
| + vpmuludq $T2,$H3,$H3 # h2*s3 |
| + vpmuludq $T2,$H4,$T2 # h2*s4 |
| + vpaddq $H3,$D0,$D0 # d0 += h2*s3 |
| + |
| + vmovdqu 16*1($inp),$H1 # |
| + vpaddq $T2,$D1,$D1 # d1 += h2*s4 |
| + vpmuludq $T3,$H4,$T3 # h3*s4 |
| + vpmuludq $T4,$H4,$T4 # h4*s4 |
| + vpsrldq \$6,$H0,$H2 # splat input |
| + vpaddq $T3,$D2,$D2 # d2 += h3*s4 |
| + vpaddq $T4,$D3,$D3 # d3 += h4*s4 |
| + vpsrldq \$6,$H1,$H3 # |
| + vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4 |
| + vpmuludq $T1,$H4,$T0 # h1*s4 |
| + vpunpckhqdq $H1,$H0,$H4 # 4 |
| + vpaddq $T4,$D4,$D4 # d4 += h0*r4 |
| + vmovdqa -0x90(%r11),$T4 # r0^4 |
| + vpaddq $T0,$D0,$D0 # d0 += h1*s4 |
| + |
| + vpunpcklqdq $H1,$H0,$H0 # 0:1 |
| + vpunpcklqdq $H3,$H2,$H3 # 2:3 |
| + |
| + #vpsrlq \$40,$H4,$H4 # 4 |
| + vpsrldq \$`40/8`,$H4,$H4 # 4 |
| + vpsrlq \$26,$H0,$H1 |
| + vpand $MASK,$H0,$H0 # 0 |
| + vpsrlq \$4,$H3,$H2 |
| + vpand $MASK,$H1,$H1 # 1 |
| + vpand 0(%rcx),$H4,$H4 # .Lmask24 |
| + vpsrlq \$30,$H3,$H3 |
| + vpand $MASK,$H2,$H2 # 2 |
| + vpand $MASK,$H3,$H3 # 3 |
| + vpor 32(%rcx),$H4,$H4 # padbit, yes, always |
| + |
| + vpaddq 0x00(%r11),$H0,$H0 # add hash value |
| + vpaddq 0x10(%r11),$H1,$H1 |
| + vpaddq 0x20(%r11),$H2,$H2 |
| + vpaddq 0x30(%r11),$H3,$H3 |
| + vpaddq 0x40(%r11),$H4,$H4 |
| + |
| + lea 16*2($inp),%rax |
| + lea 16*4($inp),$inp |
| + sub \$64,$len |
| + cmovc %rax,$inp |
| + |
| + ################################################################ |
| + # Now we accumulate (inp[0:1]+hash)*r^4 |
| + ################################################################ |
| + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + |
| + vpmuludq $H0,$T4,$T0 # h0*r0 |
| + vpmuludq $H1,$T4,$T1 # h1*r0 |
| + vpaddq $T0,$D0,$D0 |
| + vpaddq $T1,$D1,$D1 |
| + vmovdqa -0x80(%r11),$T2 # r1^4 |
| + vpmuludq $H2,$T4,$T0 # h2*r0 |
| + vpmuludq $H3,$T4,$T1 # h3*r0 |
| + vpaddq $T0,$D2,$D2 |
| + vpaddq $T1,$D3,$D3 |
| + vpmuludq $H4,$T4,$T4 # h4*r0 |
| + vpmuludq -0x70(%r11),$H4,$T0 # h4*s1 |
| + vpaddq $T4,$D4,$D4 |
| + |
| + vpaddq $T0,$D0,$D0 # d0 += h4*s1 |
| + vpmuludq $H2,$T2,$T1 # h2*r1 |
| + vpmuludq $H3,$T2,$T0 # h3*r1 |
| + vpaddq $T1,$D3,$D3 # d3 += h2*r1 |
| + vmovdqa -0x60(%r11),$T3 # r2^4 |
| + vpaddq $T0,$D4,$D4 # d4 += h3*r1 |
| + vpmuludq $H1,$T2,$T1 # h1*r1 |
| + vpmuludq $H0,$T2,$T2 # h0*r1 |
| + vpaddq $T1,$D2,$D2 # d2 += h1*r1 |
| + vpaddq $T2,$D1,$D1 # d1 += h0*r1 |
| + |
| + vmovdqa -0x50(%r11),$T4 # s2^4 |
| + vpmuludq $H2,$T3,$T0 # h2*r2 |
| + vpmuludq $H1,$T3,$T1 # h1*r2 |
| + vpaddq $T0,$D4,$D4 # d4 += h2*r2 |
| + vpaddq $T1,$D3,$D3 # d3 += h1*r2 |
| + vmovdqa -0x40(%r11),$T2 # r3^4 |
| + vpmuludq $H0,$T3,$T3 # h0*r2 |
| + vpmuludq $H4,$T4,$T0 # h4*s2 |
| + vpaddq $T3,$D2,$D2 # d2 += h0*r2 |
| + vpaddq $T0,$D1,$D1 # d1 += h4*s2 |
| + vmovdqa -0x30(%r11),$T3 # s3^4 |
| + vpmuludq $H3,$T4,$T4 # h3*s2 |
| + vpmuludq $H1,$T2,$T1 # h1*r3 |
| + vpaddq $T4,$D0,$D0 # d0 += h3*s2 |
| + |
| + vmovdqa -0x10(%r11),$T4 # s4^4 |
| + vpaddq $T1,$D4,$D4 # d4 += h1*r3 |
| + vpmuludq $H0,$T2,$T2 # h0*r3 |
| + vpmuludq $H4,$T3,$T0 # h4*s3 |
| + vpaddq $T2,$D3,$D3 # d3 += h0*r3 |
| + vpaddq $T0,$D2,$D2 # d2 += h4*s3 |
| + vmovdqu 16*2($inp),$T0 # load input |
| + vpmuludq $H3,$T3,$T2 # h3*s3 |
| + vpmuludq $H2,$T3,$T3 # h2*s3 |
| + vpaddq $T2,$D1,$D1 # d1 += h3*s3 |
| + vmovdqu 16*3($inp),$T1 # |
| + vpaddq $T3,$D0,$D0 # d0 += h2*s3 |
| + |
| + vpmuludq $H2,$T4,$H2 # h2*s4 |
| + vpmuludq $H3,$T4,$H3 # h3*s4 |
| + vpsrldq \$6,$T0,$T2 # splat input |
| + vpaddq $H2,$D1,$D1 # d1 += h2*s4 |
| + vpmuludq $H4,$T4,$H4 # h4*s4 |
| + vpsrldq \$6,$T1,$T3 # |
| + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4 |
| + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4 |
| + vpmuludq -0x20(%r11),$H0,$H4 # h0*r4 |
| + vpmuludq $H1,$T4,$H0 |
| + vpunpckhqdq $T1,$T0,$T4 # 4 |
| + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 |
| + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 |
| + |
| + vpunpcklqdq $T1,$T0,$T0 # 0:1 |
| + vpunpcklqdq $T3,$T2,$T3 # 2:3 |
| + |
| + #vpsrlq \$40,$T4,$T4 # 4 |
| + vpsrldq \$`40/8`,$T4,$T4 # 4 |
| + vpsrlq \$26,$T0,$T1 |
| + vmovdqa 0x00(%rsp),$D4 # preload r0^2 |
| + vpand $MASK,$T0,$T0 # 0 |
| + vpsrlq \$4,$T3,$T2 |
| + vpand $MASK,$T1,$T1 # 1 |
| + vpand 0(%rcx),$T4,$T4 # .Lmask24 |
| + vpsrlq \$30,$T3,$T3 |
| + vpand $MASK,$T2,$T2 # 2 |
| + vpand $MASK,$T3,$T3 # 3 |
| + vpor 32(%rcx),$T4,$T4 # padbit, yes, always |
| + |
| + ################################################################ |
| + # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein |
| + # and P. Schwabe |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $D0,$D1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H4,$D0 |
| + vpand $MASK,$H4,$H4 |
| + |
| + vpsrlq \$26,$H1,$D1 |
| + vpand $MASK,$H1,$H1 |
| + vpaddq $D1,$H2,$H2 # h1 -> h2 |
| + |
| + vpaddq $D0,$H0,$H0 |
| + vpsllq \$2,$D0,$D0 |
| + vpaddq $D0,$H0,$H0 # h4 -> h0 |
| + |
| + vpsrlq \$26,$H2,$D2 |
| + vpand $MASK,$H2,$H2 |
| + vpaddq $D2,$H3,$H3 # h2 -> h3 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + ja .Loop_avx |
| + |
| +.Lskip_loop_avx: |
| + ################################################################ |
| + # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 |
| + |
| + vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2 |
| + add \$32,$len |
| + jnz .Long_tail_avx |
| + |
| + vpaddq $H2,$T2,$T2 |
| + vpaddq $H0,$T0,$T0 |
| + vpaddq $H1,$T1,$T1 |
| + vpaddq $H3,$T3,$T3 |
| + vpaddq $H4,$T4,$T4 |
| + |
| +.Long_tail_avx: |
| + vmovdqa $H2,0x20(%r11) |
| + vmovdqa $H0,0x00(%r11) |
| + vmovdqa $H1,0x10(%r11) |
| + vmovdqa $H3,0x30(%r11) |
| + vmovdqa $H4,0x40(%r11) |
| + |
| + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + |
| + vpmuludq $T2,$D4,$D2 # d2 = h2*r0 |
| + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 |
| + vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n |
| + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 |
| + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 |
| + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 |
| + |
| + vpmuludq $T3,$H2,$H0 # h3*r1 |
| + vpaddq $H0,$D4,$D4 # d4 += h3*r1 |
| + vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n |
| + vpmuludq $T2,$H2,$H1 # h2*r1 |
| + vpaddq $H1,$D3,$D3 # d3 += h2*r1 |
| + vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n |
| + vpmuludq $T1,$H2,$H0 # h1*r1 |
| + vpaddq $H0,$D2,$D2 # d2 += h1*r1 |
| + vpmuludq $T0,$H2,$H2 # h0*r1 |
| + vpaddq $H2,$D1,$D1 # d1 += h0*r1 |
| + vpmuludq $T4,$H3,$H3 # h4*s1 |
| + vpaddq $H3,$D0,$D0 # d0 += h4*s1 |
| + |
| + vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n |
| + vpmuludq $T2,$H4,$H1 # h2*r2 |
| + vpaddq $H1,$D4,$D4 # d4 += h2*r2 |
| + vpmuludq $T1,$H4,$H0 # h1*r2 |
| + vpaddq $H0,$D3,$D3 # d3 += h1*r2 |
| + vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n |
| + vpmuludq $T0,$H4,$H4 # h0*r2 |
| + vpaddq $H4,$D2,$D2 # d2 += h0*r2 |
| + vpmuludq $T4,$H2,$H1 # h4*s2 |
| + vpaddq $H1,$D1,$D1 # d1 += h4*s2 |
| + vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n |
| + vpmuludq $T3,$H2,$H2 # h3*s2 |
| + vpaddq $H2,$D0,$D0 # d0 += h3*s2 |
| + |
| + vpmuludq $T1,$H3,$H0 # h1*r3 |
| + vpaddq $H0,$D4,$D4 # d4 += h1*r3 |
| + vpmuludq $T0,$H3,$H3 # h0*r3 |
| + vpaddq $H3,$D3,$D3 # d3 += h0*r3 |
| + vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n |
| + vpmuludq $T4,$H4,$H1 # h4*s3 |
| + vpaddq $H1,$D2,$D2 # d2 += h4*s3 |
| + vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n |
| + vpmuludq $T3,$H4,$H0 # h3*s3 |
| + vpaddq $H0,$D1,$D1 # d1 += h3*s3 |
| + vpmuludq $T2,$H4,$H4 # h2*s3 |
| + vpaddq $H4,$D0,$D0 # d0 += h2*s3 |
| + |
| + vpmuludq $T0,$H2,$H2 # h0*r4 |
| + vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4 |
| + vpmuludq $T4,$H3,$H1 # h4*s4 |
| + vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4 |
| + vpmuludq $T3,$H3,$H0 # h3*s4 |
| + vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4 |
| + vpmuludq $T2,$H3,$H1 # h2*s4 |
| + vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4 |
| + vpmuludq $T1,$H3,$H3 # h1*s4 |
| + vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4 |
| + |
| + jz .Lshort_tail_avx |
| + |
| + vmovdqu 16*0($inp),$H0 # load input |
| + vmovdqu 16*1($inp),$H1 |
| + |
| + vpsrldq \$6,$H0,$H2 # splat input |
| + vpsrldq \$6,$H1,$H3 |
| + vpunpckhqdq $H1,$H0,$H4 # 4 |
| + vpunpcklqdq $H1,$H0,$H0 # 0:1 |
| + vpunpcklqdq $H3,$H2,$H3 # 2:3 |
| + |
| + vpsrlq \$40,$H4,$H4 # 4 |
| + vpsrlq \$26,$H0,$H1 |
| + vpand $MASK,$H0,$H0 # 0 |
| + vpsrlq \$4,$H3,$H2 |
| + vpand $MASK,$H1,$H1 # 1 |
| + vpsrlq \$30,$H3,$H3 |
| + vpand $MASK,$H2,$H2 # 2 |
| + vpand $MASK,$H3,$H3 # 3 |
| + vpor 32(%rcx),$H4,$H4 # padbit, yes, always |
| + |
| + vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4 |
| + vpaddq 0x00(%r11),$H0,$H0 |
| + vpaddq 0x10(%r11),$H1,$H1 |
| + vpaddq 0x20(%r11),$H2,$H2 |
| + vpaddq 0x30(%r11),$H3,$H3 |
| + vpaddq 0x40(%r11),$H4,$H4 |
| + |
| + ################################################################ |
| + # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate |
| + |
| + vpmuludq $H0,$T4,$T0 # h0*r0 |
| + vpaddq $T0,$D0,$D0 # d0 += h0*r0 |
| + vpmuludq $H1,$T4,$T1 # h1*r0 |
| + vpaddq $T1,$D1,$D1 # d1 += h1*r0 |
| + vpmuludq $H2,$T4,$T0 # h2*r0 |
| + vpaddq $T0,$D2,$D2 # d2 += h2*r0 |
| + vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n |
| + vpmuludq $H3,$T4,$T1 # h3*r0 |
| + vpaddq $T1,$D3,$D3 # d3 += h3*r0 |
| + vpmuludq $H4,$T4,$T4 # h4*r0 |
| + vpaddq $T4,$D4,$D4 # d4 += h4*r0 |
| + |
| + vpmuludq $H3,$T2,$T0 # h3*r1 |
| + vpaddq $T0,$D4,$D4 # d4 += h3*r1 |
| + vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1 |
| + vpmuludq $H2,$T2,$T1 # h2*r1 |
| + vpaddq $T1,$D3,$D3 # d3 += h2*r1 |
| + vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2 |
| + vpmuludq $H1,$T2,$T0 # h1*r1 |
| + vpaddq $T0,$D2,$D2 # d2 += h1*r1 |
| + vpmuludq $H0,$T2,$T2 # h0*r1 |
| + vpaddq $T2,$D1,$D1 # d1 += h0*r1 |
| + vpmuludq $H4,$T3,$T3 # h4*s1 |
| + vpaddq $T3,$D0,$D0 # d0 += h4*s1 |
| + |
| + vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2 |
| + vpmuludq $H2,$T4,$T1 # h2*r2 |
| + vpaddq $T1,$D4,$D4 # d4 += h2*r2 |
| + vpmuludq $H1,$T4,$T0 # h1*r2 |
| + vpaddq $T0,$D3,$D3 # d3 += h1*r2 |
| + vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3 |
| + vpmuludq $H0,$T4,$T4 # h0*r2 |
| + vpaddq $T4,$D2,$D2 # d2 += h0*r2 |
| + vpmuludq $H4,$T2,$T1 # h4*s2 |
| + vpaddq $T1,$D1,$D1 # d1 += h4*s2 |
| + vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3 |
| + vpmuludq $H3,$T2,$T2 # h3*s2 |
| + vpaddq $T2,$D0,$D0 # d0 += h3*s2 |
| + |
| + vpmuludq $H1,$T3,$T0 # h1*r3 |
| + vpaddq $T0,$D4,$D4 # d4 += h1*r3 |
| + vpmuludq $H0,$T3,$T3 # h0*r3 |
| + vpaddq $T3,$D3,$D3 # d3 += h0*r3 |
| + vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4 |
| + vpmuludq $H4,$T4,$T1 # h4*s3 |
| + vpaddq $T1,$D2,$D2 # d2 += h4*s3 |
| + vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4 |
| + vpmuludq $H3,$T4,$T0 # h3*s3 |
| + vpaddq $T0,$D1,$D1 # d1 += h3*s3 |
| + vpmuludq $H2,$T4,$T4 # h2*s3 |
| + vpaddq $T4,$D0,$D0 # d0 += h2*s3 |
| + |
| + vpmuludq $H0,$T2,$T2 # h0*r4 |
| + vpaddq $T2,$D4,$D4 # d4 += h0*r4 |
| + vpmuludq $H4,$T3,$T1 # h4*s4 |
| + vpaddq $T1,$D3,$D3 # d3 += h4*s4 |
| + vpmuludq $H3,$T3,$T0 # h3*s4 |
| + vpaddq $T0,$D2,$D2 # d2 += h3*s4 |
| + vpmuludq $H2,$T3,$T1 # h2*s4 |
| + vpaddq $T1,$D1,$D1 # d1 += h2*s4 |
| + vpmuludq $H1,$T3,$T3 # h1*s4 |
| + vpaddq $T3,$D0,$D0 # d0 += h1*s4 |
| + |
| +.Lshort_tail_avx: |
| + ################################################################ |
| + # horizontal addition |
| + |
| + vpsrldq \$8,$D4,$T4 |
| + vpsrldq \$8,$D3,$T3 |
| + vpsrldq \$8,$D1,$T1 |
| + vpsrldq \$8,$D0,$T0 |
| + vpsrldq \$8,$D2,$T2 |
| + vpaddq $T3,$D3,$D3 |
| + vpaddq $T4,$D4,$D4 |
| + vpaddq $T0,$D0,$D0 |
| + vpaddq $T1,$D1,$D1 |
| + vpaddq $T2,$D2,$D2 |
| + |
| + ################################################################ |
| + # lazy reduction |
| + |
| + vpsrlq \$26,$D3,$H3 |
| + vpand $MASK,$D3,$D3 |
| + vpaddq $H3,$D4,$D4 # h3 -> h4 |
| + |
| + vpsrlq \$26,$D0,$H0 |
| + vpand $MASK,$D0,$D0 |
| + vpaddq $H0,$D1,$D1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$D4,$H4 |
| + vpand $MASK,$D4,$D4 |
| + |
| + vpsrlq \$26,$D1,$H1 |
| + vpand $MASK,$D1,$D1 |
| + vpaddq $H1,$D2,$D2 # h1 -> h2 |
| + |
| + vpaddq $H4,$D0,$D0 |
| + vpsllq \$2,$H4,$H4 |
| + vpaddq $H4,$D0,$D0 # h4 -> h0 |
| + |
| + vpsrlq \$26,$D2,$H2 |
| + vpand $MASK,$D2,$D2 |
| + vpaddq $H2,$D3,$D3 # h2 -> h3 |
| + |
| + vpsrlq \$26,$D0,$H0 |
| + vpand $MASK,$D0,$D0 |
| + vpaddq $H0,$D1,$D1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$D3,$H3 |
| + vpand $MASK,$D3,$D3 |
| + vpaddq $H3,$D4,$D4 # h3 -> h4 |
| + |
| + vmovd $D0,`4*0-48-64`($ctx) # save partially reduced |
| + vmovd $D1,`4*1-48-64`($ctx) |
| + vmovd $D2,`4*2-48-64`($ctx) |
| + vmovd $D3,`4*3-48-64`($ctx) |
| + vmovd $D4,`4*4-48-64`($ctx) |
| +___ |
| +$code.=<<___ if ($win64); |
| + vmovdqa 0x50(%r11),%xmm6 |
| + vmovdqa 0x60(%r11),%xmm7 |
| + vmovdqa 0x70(%r11),%xmm8 |
| + vmovdqa 0x80(%r11),%xmm9 |
| + vmovdqa 0x90(%r11),%xmm10 |
| + vmovdqa 0xa0(%r11),%xmm11 |
| + vmovdqa 0xb0(%r11),%xmm12 |
| + vmovdqa 0xc0(%r11),%xmm13 |
| + vmovdqa 0xd0(%r11),%xmm14 |
| + vmovdqa 0xe0(%r11),%xmm15 |
| + lea 0xf8(%r11),%rsp |
| +.Ldo_avx_epilogue: |
| +___ |
| +$code.=<<___ if (!$win64); |
| + lea 0x58(%r11),%rsp |
| +.cfi_def_cfa %rsp,8 |
| +___ |
| +$code.=<<___; |
| + vzeroupper |
| + ret |
| +.cfi_endproc |
| +.size poly1305_blocks_avx,.-poly1305_blocks_avx |
| + |
| +.type poly1305_emit_avx,\@function,3 |
| +.align 32 |
| +poly1305_emit_avx: |
| + cmpl \$0,20($ctx) # is_base2_26? |
| + je .Lemit |
| + |
| + mov 0($ctx),%eax # load hash value base 2^26 |
| + mov 4($ctx),%ecx |
| + mov 8($ctx),%r8d |
| + mov 12($ctx),%r11d |
| + mov 16($ctx),%r10d |
| + |
| + shl \$26,%rcx # base 2^26 -> base 2^64 |
| + mov %r8,%r9 |
| + shl \$52,%r8 |
| + add %rcx,%rax |
| + shr \$12,%r9 |
| + add %rax,%r8 # h0 |
| + adc \$0,%r9 |
| + |
| + shl \$14,%r11 |
| + mov %r10,%rax |
| + shr \$24,%r10 |
| + add %r11,%r9 |
| + shl \$40,%rax |
| + add %rax,%r9 # h1 |
| + adc \$0,%r10 # h2 |
| + |
| + mov %r10,%rax # could be partially reduced, so reduce |
| + mov %r10,%rcx |
| + and \$3,%r10 |
| + shr \$2,%rax |
| + and \$-4,%rcx |
| + add %rcx,%rax |
| + add %rax,%r8 |
| + adc \$0,%r9 |
| + adc \$0,%r10 |
| + |
| + mov %r8,%rax |
| + add \$5,%r8 # compare to modulus |
| + mov %r9,%rcx |
| + adc \$0,%r9 |
| + adc \$0,%r10 |
| + shr \$2,%r10 # did 130-bit value overflow? |
| + cmovnz %r8,%rax |
| + cmovnz %r9,%rcx |
| + |
| + add 0($nonce),%rax # accumulate nonce |
| + adc 8($nonce),%rcx |
| + mov %rax,0($mac) # write result |
| + mov %rcx,8($mac) |
| + |
| + ret |
| +.size poly1305_emit_avx,.-poly1305_emit_avx |
| +___ |
| + |
| +if ($avx>1) { |
| +my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = |
| + map("%ymm$_",(0..15)); |
| +my $S4=$MASK; |
| + |
| +$code.=<<___; |
| +.type poly1305_blocks_avx2,\@function,4 |
| +.align 32 |
| +poly1305_blocks_avx2: |
| +.cfi_startproc |
| + mov 20($ctx),%r8d # is_base2_26 |
| + cmp \$128,$len |
| + jae .Lblocks_avx2 |
| + test %r8d,%r8d |
| + jz .Lblocks |
| + |
| +.Lblocks_avx2: |
| + and \$-16,$len |
| + jz .Lno_data_avx2 |
| + |
| + vzeroupper |
| + |
| + test %r8d,%r8d |
| + jz .Lbase2_64_avx2 |
| + |
| + test \$63,$len |
| + jz .Leven_avx2 |
| + |
| + push %rbx |
| +.cfi_push %rbx |
| + push %rbp |
| +.cfi_push %rbp |
| + push %r12 |
| +.cfi_push %r12 |
| + push %r13 |
| +.cfi_push %r13 |
| + push %r14 |
| +.cfi_push %r14 |
| + push %r15 |
| +.cfi_push %r15 |
| +.Lblocks_avx2_body: |
| + |
| + mov $len,%r15 # reassign $len |
| + |
| + mov 0($ctx),$d1 # load hash value |
| + mov 8($ctx),$d2 |
| + mov 16($ctx),$h2#d |
| + |
| + mov 24($ctx),$r0 # load r |
| + mov 32($ctx),$s1 |
| + |
| + ################################# base 2^26 -> base 2^64 |
| + mov $d1#d,$h0#d |
| + and \$`-1*(1<<31)`,$d1 |
| + mov $d2,$r1 # borrow $r1 |
| + mov $d2#d,$h1#d |
| + and \$`-1*(1<<31)`,$d2 |
| + |
| + shr \$6,$d1 |
| + shl \$52,$r1 |
| + add $d1,$h0 |
| + shr \$12,$h1 |
| + shr \$18,$d2 |
| + add $r1,$h0 |
| + adc $d2,$h1 |
| + |
| + mov $h2,$d1 |
| + shl \$40,$d1 |
| + shr \$24,$h2 |
| + add $d1,$h1 |
| + adc \$0,$h2 # can be partially reduced... |
| + |
| + mov \$-4,$d2 # ... so reduce |
| + mov $h2,$d1 |
| + and $h2,$d2 |
| + shr \$2,$d1 |
| + and \$3,$h2 |
| + add $d2,$d1 # =*5 |
| + add $d1,$h0 |
| + adc \$0,$h1 |
| + adc \$0,$h2 |
| + |
| + mov $s1,$r1 |
| + mov $s1,%rax |
| + shr \$2,$s1 |
| + add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| + |
| +.Lbase2_26_pre_avx2: |
| + add 0($inp),$h0 # accumulate input |
| + adc 8($inp),$h1 |
| + lea 16($inp),$inp |
| + adc $padbit,$h2 |
| + sub \$16,%r15 |
| + |
| + call __poly1305_block |
| + mov $r1,%rax |
| + |
| + test \$63,%r15 |
| + jnz .Lbase2_26_pre_avx2 |
| + |
| + test $padbit,$padbit # if $padbit is zero, |
| + jz .Lstore_base2_64_avx2 # store hash in base 2^64 format |
| + |
| + ################################# base 2^64 -> base 2^26 |
| + mov $h0,%rax |
| + mov $h0,%rdx |
| + shr \$52,$h0 |
| + mov $h1,$r0 |
| + mov $h1,$r1 |
| + shr \$26,%rdx |
| + and \$0x3ffffff,%rax # h[0] |
| + shl \$12,$r0 |
| + and \$0x3ffffff,%rdx # h[1] |
| + shr \$14,$h1 |
| + or $r0,$h0 |
| + shl \$24,$h2 |
| + and \$0x3ffffff,$h0 # h[2] |
| + shr \$40,$r1 |
| + and \$0x3ffffff,$h1 # h[3] |
| + or $r1,$h2 # h[4] |
| + |
| + test %r15,%r15 |
| + jz .Lstore_base2_26_avx2 |
| + |
| + vmovd %rax#d,%x#$H0 |
| + vmovd %rdx#d,%x#$H1 |
| + vmovd $h0#d,%x#$H2 |
| + vmovd $h1#d,%x#$H3 |
| + vmovd $h2#d,%x#$H4 |
| + jmp .Lproceed_avx2 |
| + |
| +.align 32 |
| +.Lstore_base2_64_avx2: |
| + mov $h0,0($ctx) |
| + mov $h1,8($ctx) |
| + mov $h2,16($ctx) # note that is_base2_26 is zeroed |
| + jmp .Ldone_avx2 |
| + |
| +.align 16 |
| +.Lstore_base2_26_avx2: |
| + mov %rax#d,0($ctx) # store hash value base 2^26 |
| + mov %rdx#d,4($ctx) |
| + mov $h0#d,8($ctx) |
| + mov $h1#d,12($ctx) |
| + mov $h2#d,16($ctx) |
| +.align 16 |
| +.Ldone_avx2: |
| + mov 0(%rsp),%r15 |
| +.cfi_restore %r15 |
| + mov 8(%rsp),%r14 |
| +.cfi_restore %r14 |
| + mov 16(%rsp),%r13 |
| +.cfi_restore %r13 |
| + mov 24(%rsp),%r12 |
| +.cfi_restore %r12 |
| + mov 32(%rsp),%rbp |
| +.cfi_restore %rbp |
| + mov 40(%rsp),%rbx |
| +.cfi_restore %rbx |
| + lea 48(%rsp),%rsp |
| +.cfi_adjust_cfa_offset -48 |
| +.Lno_data_avx2: |
| +.Lblocks_avx2_epilogue: |
| + ret |
| +.cfi_endproc |
| + |
| +.align 32 |
| +.Lbase2_64_avx2: |
| +.cfi_startproc |
| + push %rbx |
| +.cfi_push %rbx |
| + push %rbp |
| +.cfi_push %rbp |
| + push %r12 |
| +.cfi_push %r12 |
| + push %r13 |
| +.cfi_push %r13 |
| + push %r14 |
| +.cfi_push %r14 |
| + push %r15 |
| +.cfi_push %r15 |
| +.Lbase2_64_avx2_body: |
| + |
| + mov $len,%r15 # reassign $len |
| + |
| + mov 24($ctx),$r0 # load r |
| + mov 32($ctx),$s1 |
| + |
| + mov 0($ctx),$h0 # load hash value |
| + mov 8($ctx),$h1 |
| + mov 16($ctx),$h2#d |
| + |
| + mov $s1,$r1 |
| + mov $s1,%rax |
| + shr \$2,$s1 |
| + add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| + |
| + test \$63,$len |
| + jz .Linit_avx2 |
| + |
| +.Lbase2_64_pre_avx2: |
| + add 0($inp),$h0 # accumulate input |
| + adc 8($inp),$h1 |
| + lea 16($inp),$inp |
| + adc $padbit,$h2 |
| + sub \$16,%r15 |
| + |
| + call __poly1305_block |
| + mov $r1,%rax |
| + |
| + test \$63,%r15 |
| + jnz .Lbase2_64_pre_avx2 |
| + |
| +.Linit_avx2: |
| + ################################# base 2^64 -> base 2^26 |
| + mov $h0,%rax |
| + mov $h0,%rdx |
| + shr \$52,$h0 |
| + mov $h1,$d1 |
| + mov $h1,$d2 |
| + shr \$26,%rdx |
| + and \$0x3ffffff,%rax # h[0] |
| + shl \$12,$d1 |
| + and \$0x3ffffff,%rdx # h[1] |
| + shr \$14,$h1 |
| + or $d1,$h0 |
| + shl \$24,$h2 |
| + and \$0x3ffffff,$h0 # h[2] |
| + shr \$40,$d2 |
| + and \$0x3ffffff,$h1 # h[3] |
| + or $d2,$h2 # h[4] |
| + |
| + vmovd %rax#d,%x#$H0 |
| + vmovd %rdx#d,%x#$H1 |
| + vmovd $h0#d,%x#$H2 |
| + vmovd $h1#d,%x#$H3 |
| + vmovd $h2#d,%x#$H4 |
| + movl \$1,20($ctx) # set is_base2_26 |
| + |
| + call __poly1305_init_avx |
| + |
| +.Lproceed_avx2: |
| + mov %r15,$len # restore $len |
| + mov OPENSSL_ia32cap_P+8(%rip),%r10d |
| + mov \$`(1<<31|1<<30|1<<16)`,%r11d |
| + |
| + mov 0(%rsp),%r15 |
| +.cfi_restore %r15 |
| + mov 8(%rsp),%r14 |
| +.cfi_restore %r14 |
| + mov 16(%rsp),%r13 |
| +.cfi_restore %r13 |
| + mov 24(%rsp),%r12 |
| +.cfi_restore %r12 |
| + mov 32(%rsp),%rbp |
| +.cfi_restore %rbp |
| + mov 40(%rsp),%rbx |
| +.cfi_restore %rbx |
| + lea 48(%rsp),%rax |
| + lea 48(%rsp),%rsp |
| +.cfi_adjust_cfa_offset -48 |
| +.Lbase2_64_avx2_epilogue: |
| + jmp .Ldo_avx2 |
| +.cfi_endproc |
| + |
| +.align 32 |
| +.Leven_avx2: |
| +.cfi_startproc |
| + mov OPENSSL_ia32cap_P+8(%rip),%r10d |
| + vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 |
| + vmovd 4*1($ctx),%x#$H1 |
| + vmovd 4*2($ctx),%x#$H2 |
| + vmovd 4*3($ctx),%x#$H3 |
| + vmovd 4*4($ctx),%x#$H4 |
| + |
| +.Ldo_avx2: |
| +___ |
| +$code.=<<___ if ($avx>2); |
| + cmp \$512,$len |
| + jb .Lskip_avx512 |
| + and %r11d,%r10d |
| + test \$`1<<16`,%r10d # check for AVX512F |
| + jnz .Lblocks_avx512 |
| +.Lskip_avx512: |
| +___ |
| +$code.=<<___ if (!$win64); |
| + lea -8(%rsp),%r11 |
| +.cfi_def_cfa %r11,16 |
| + sub \$0x128,%rsp |
| +___ |
| +$code.=<<___ if ($win64); |
| + lea -0xf8(%rsp),%r11 |
| + sub \$0x1c8,%rsp |
| + vmovdqa %xmm6,0x50(%r11) |
| + vmovdqa %xmm7,0x60(%r11) |
| + vmovdqa %xmm8,0x70(%r11) |
| + vmovdqa %xmm9,0x80(%r11) |
| + vmovdqa %xmm10,0x90(%r11) |
| + vmovdqa %xmm11,0xa0(%r11) |
| + vmovdqa %xmm12,0xb0(%r11) |
| + vmovdqa %xmm13,0xc0(%r11) |
| + vmovdqa %xmm14,0xd0(%r11) |
| + vmovdqa %xmm15,0xe0(%r11) |
| +.Ldo_avx2_body: |
| +___ |
| +$code.=<<___; |
| + lea .Lconst(%rip),%rcx |
| + lea 48+64($ctx),$ctx # size optimization |
| + vmovdqa 96(%rcx),$T0 # .Lpermd_avx2 |
| + |
| + # expand and copy pre-calculated table to stack |
| + vmovdqu `16*0-64`($ctx),%x#$T2 |
| + and \$-512,%rsp |
| + vmovdqu `16*1-64`($ctx),%x#$T3 |
| + vmovdqu `16*2-64`($ctx),%x#$T4 |
| + vmovdqu `16*3-64`($ctx),%x#$D0 |
| + vmovdqu `16*4-64`($ctx),%x#$D1 |
| + vmovdqu `16*5-64`($ctx),%x#$D2 |
| + lea 0x90(%rsp),%rax # size optimization |
| + vmovdqu `16*6-64`($ctx),%x#$D3 |
| + vpermd $T2,$T0,$T2 # 00003412 -> 14243444 |
| + vmovdqu `16*7-64`($ctx),%x#$D4 |
| + vpermd $T3,$T0,$T3 |
| + vmovdqu `16*8-64`($ctx),%x#$MASK |
| + vpermd $T4,$T0,$T4 |
| + vmovdqa $T2,0x00(%rsp) |
| + vpermd $D0,$T0,$D0 |
| + vmovdqa $T3,0x20-0x90(%rax) |
| + vpermd $D1,$T0,$D1 |
| + vmovdqa $T4,0x40-0x90(%rax) |
| + vpermd $D2,$T0,$D2 |
| + vmovdqa $D0,0x60-0x90(%rax) |
| + vpermd $D3,$T0,$D3 |
| + vmovdqa $D1,0x80-0x90(%rax) |
| + vpermd $D4,$T0,$D4 |
| + vmovdqa $D2,0xa0-0x90(%rax) |
| + vpermd $MASK,$T0,$MASK |
| + vmovdqa $D3,0xc0-0x90(%rax) |
| + vmovdqa $D4,0xe0-0x90(%rax) |
| + vmovdqa $MASK,0x100-0x90(%rax) |
| + vmovdqa 64(%rcx),$MASK # .Lmask26 |
| + |
| + ################################################################ |
| + # load input |
| + vmovdqu 16*0($inp),%x#$T0 |
| + vmovdqu 16*1($inp),%x#$T1 |
| + vinserti128 \$1,16*2($inp),$T0,$T0 |
| + vinserti128 \$1,16*3($inp),$T1,$T1 |
| + lea 16*4($inp),$inp |
| + |
| + vpsrldq \$6,$T0,$T2 # splat input |
| + vpsrldq \$6,$T1,$T3 |
| + vpunpckhqdq $T1,$T0,$T4 # 4 |
| + vpunpcklqdq $T3,$T2,$T2 # 2:3 |
| + vpunpcklqdq $T1,$T0,$T0 # 0:1 |
| + |
| + vpsrlq \$30,$T2,$T3 |
| + vpsrlq \$4,$T2,$T2 |
| + vpsrlq \$26,$T0,$T1 |
| + vpsrlq \$40,$T4,$T4 # 4 |
| + vpand $MASK,$T2,$T2 # 2 |
| + vpand $MASK,$T0,$T0 # 0 |
| + vpand $MASK,$T1,$T1 # 1 |
| + vpand $MASK,$T3,$T3 # 3 |
| + vpor 32(%rcx),$T4,$T4 # padbit, yes, always |
| + |
| + vpaddq $H2,$T2,$H2 # accumulate input |
| + sub \$64,$len |
| + jz .Ltail_avx2 |
| + jmp .Loop_avx2 |
| + |
| +.align 32 |
| +.Loop_avx2: |
| + ################################################################ |
| + # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 |
| + # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 |
| + # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2 |
| + # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1 |
| + # \________/\__________/ |
| + ################################################################ |
| + #vpaddq $H2,$T2,$H2 # accumulate input |
| + vpaddq $H0,$T0,$H0 |
| + vmovdqa `32*0`(%rsp),$T0 # r0^4 |
| + vpaddq $H1,$T1,$H1 |
| + vmovdqa `32*1`(%rsp),$T1 # r1^4 |
| + vpaddq $H3,$T3,$H3 |
| + vmovdqa `32*3`(%rsp),$T2 # r2^4 |
| + vpaddq $H4,$T4,$H4 |
| + vmovdqa `32*6-0x90`(%rax),$T3 # s3^4 |
| + vmovdqa `32*8-0x90`(%rax),$S4 # s4^4 |
| + |
| + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + # |
| + # however, as h2 is "chronologically" first one available pull |
| + # corresponding operations up, so it's |
| + # |
| + # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4 |
| + # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4 |
| + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 |
| + # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4 |
| + |
| + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 |
| + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 |
| + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 |
| + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 |
| + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 |
| + |
| + vpmuludq $H0,$T1,$T4 # h0*r1 |
| + vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp |
| + vpaddq $T4,$D1,$D1 # d1 += h0*r1 |
| + vpaddq $H2,$D2,$D2 # d2 += h1*r1 |
| + vpmuludq $H3,$T1,$T4 # h3*r1 |
| + vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1 |
| + vpaddq $T4,$D4,$D4 # d4 += h3*r1 |
| + vpaddq $H2,$D0,$D0 # d0 += h4*s1 |
| + vmovdqa `32*4-0x90`(%rax),$T1 # s2 |
| + |
| + vpmuludq $H0,$T0,$T4 # h0*r0 |
| + vpmuludq $H1,$T0,$H2 # h1*r0 |
| + vpaddq $T4,$D0,$D0 # d0 += h0*r0 |
| + vpaddq $H2,$D1,$D1 # d1 += h1*r0 |
| + vpmuludq $H3,$T0,$T4 # h3*r0 |
| + vpmuludq $H4,$T0,$H2 # h4*r0 |
| + vmovdqu 16*0($inp),%x#$T0 # load input |
| + vpaddq $T4,$D3,$D3 # d3 += h3*r0 |
| + vpaddq $H2,$D4,$D4 # d4 += h4*r0 |
| + vinserti128 \$1,16*2($inp),$T0,$T0 |
| + |
| + vpmuludq $H3,$T1,$T4 # h3*s2 |
| + vpmuludq $H4,$T1,$H2 # h4*s2 |
| + vmovdqu 16*1($inp),%x#$T1 |
| + vpaddq $T4,$D0,$D0 # d0 += h3*s2 |
| + vpaddq $H2,$D1,$D1 # d1 += h4*s2 |
| + vmovdqa `32*5-0x90`(%rax),$H2 # r3 |
| + vpmuludq $H1,$T2,$T4 # h1*r2 |
| + vpmuludq $H0,$T2,$T2 # h0*r2 |
| + vpaddq $T4,$D3,$D3 # d3 += h1*r2 |
| + vpaddq $T2,$D2,$D2 # d2 += h0*r2 |
| + vinserti128 \$1,16*3($inp),$T1,$T1 |
| + lea 16*4($inp),$inp |
| + |
| + vpmuludq $H1,$H2,$T4 # h1*r3 |
| + vpmuludq $H0,$H2,$H2 # h0*r3 |
| + vpsrldq \$6,$T0,$T2 # splat input |
| + vpaddq $T4,$D4,$D4 # d4 += h1*r3 |
| + vpaddq $H2,$D3,$D3 # d3 += h0*r3 |
| + vpmuludq $H3,$T3,$T4 # h3*s3 |
| + vpmuludq $H4,$T3,$H2 # h4*s3 |
| + vpsrldq \$6,$T1,$T3 |
| + vpaddq $T4,$D1,$D1 # d1 += h3*s3 |
| + vpaddq $H2,$D2,$D2 # d2 += h4*s3 |
| + vpunpckhqdq $T1,$T0,$T4 # 4 |
| + |
| + vpmuludq $H3,$S4,$H3 # h3*s4 |
| + vpmuludq $H4,$S4,$H4 # h4*s4 |
| + vpunpcklqdq $T1,$T0,$T0 # 0:1 |
| + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 |
| + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 |
| + vpunpcklqdq $T3,$T2,$T3 # 2:3 |
| + vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4 |
| + vpmuludq $H1,$S4,$H0 # h1*s4 |
| + vmovdqa 64(%rcx),$MASK # .Lmask26 |
| + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 |
| + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 |
| + |
| + ################################################################ |
| + # lazy reduction (interleaved with tail of input splat) |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $D0,$D1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H4,$D4 |
| + vpand $MASK,$H4,$H4 |
| + |
| + vpsrlq \$4,$T3,$T2 |
| + |
| + vpsrlq \$26,$H1,$D1 |
| + vpand $MASK,$H1,$H1 |
| + vpaddq $D1,$H2,$H2 # h1 -> h2 |
| + |
| + vpaddq $D4,$H0,$H0 |
| + vpsllq \$2,$D4,$D4 |
| + vpaddq $D4,$H0,$H0 # h4 -> h0 |
| + |
| + vpand $MASK,$T2,$T2 # 2 |
| + vpsrlq \$26,$T0,$T1 |
| + |
| + vpsrlq \$26,$H2,$D2 |
| + vpand $MASK,$H2,$H2 |
| + vpaddq $D2,$H3,$H3 # h2 -> h3 |
| + |
| + vpaddq $T2,$H2,$H2 # modulo-scheduled |
| + vpsrlq \$30,$T3,$T3 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$40,$T4,$T4 # 4 |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vpand $MASK,$T0,$T0 # 0 |
| + vpand $MASK,$T1,$T1 # 1 |
| + vpand $MASK,$T3,$T3 # 3 |
| + vpor 32(%rcx),$T4,$T4 # padbit, yes, always |
| + |
| + sub \$64,$len |
| + jnz .Loop_avx2 |
| + |
| + .byte 0x66,0x90 |
| +.Ltail_avx2: |
| + ################################################################ |
| + # while above multiplications were by r^4 in all lanes, in last |
| + # iteration we multiply least significant lane by r^4 and most |
| + # significant one by r, so copy of above except that references |
| + # to the precomputed table are displaced by 4... |
| + |
| + #vpaddq $H2,$T2,$H2 # accumulate input |
| + vpaddq $H0,$T0,$H0 |
| + vmovdqu `32*0+4`(%rsp),$T0 # r0^4 |
| + vpaddq $H1,$T1,$H1 |
| + vmovdqu `32*1+4`(%rsp),$T1 # r1^4 |
| + vpaddq $H3,$T3,$H3 |
| + vmovdqu `32*3+4`(%rsp),$T2 # r2^4 |
| + vpaddq $H4,$T4,$H4 |
| + vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4 |
| + vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4 |
| + |
| + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 |
| + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 |
| + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 |
| + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 |
| + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 |
| + |
| + vpmuludq $H0,$T1,$T4 # h0*r1 |
| + vpmuludq $H1,$T1,$H2 # h1*r1 |
| + vpaddq $T4,$D1,$D1 # d1 += h0*r1 |
| + vpaddq $H2,$D2,$D2 # d2 += h1*r1 |
| + vpmuludq $H3,$T1,$T4 # h3*r1 |
| + vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1 |
| + vpaddq $T4,$D4,$D4 # d4 += h3*r1 |
| + vpaddq $H2,$D0,$D0 # d0 += h4*s1 |
| + |
| + vpmuludq $H0,$T0,$T4 # h0*r0 |
| + vpmuludq $H1,$T0,$H2 # h1*r0 |
| + vpaddq $T4,$D0,$D0 # d0 += h0*r0 |
| + vmovdqu `32*4+4-0x90`(%rax),$T1 # s2 |
| + vpaddq $H2,$D1,$D1 # d1 += h1*r0 |
| + vpmuludq $H3,$T0,$T4 # h3*r0 |
| + vpmuludq $H4,$T0,$H2 # h4*r0 |
| + vpaddq $T4,$D3,$D3 # d3 += h3*r0 |
| + vpaddq $H2,$D4,$D4 # d4 += h4*r0 |
| + |
| + vpmuludq $H3,$T1,$T4 # h3*s2 |
| + vpmuludq $H4,$T1,$H2 # h4*s2 |
| + vpaddq $T4,$D0,$D0 # d0 += h3*s2 |
| + vpaddq $H2,$D1,$D1 # d1 += h4*s2 |
| + vmovdqu `32*5+4-0x90`(%rax),$H2 # r3 |
| + vpmuludq $H1,$T2,$T4 # h1*r2 |
| + vpmuludq $H0,$T2,$T2 # h0*r2 |
| + vpaddq $T4,$D3,$D3 # d3 += h1*r2 |
| + vpaddq $T2,$D2,$D2 # d2 += h0*r2 |
| + |
| + vpmuludq $H1,$H2,$T4 # h1*r3 |
| + vpmuludq $H0,$H2,$H2 # h0*r3 |
| + vpaddq $T4,$D4,$D4 # d4 += h1*r3 |
| + vpaddq $H2,$D3,$D3 # d3 += h0*r3 |
| + vpmuludq $H3,$T3,$T4 # h3*s3 |
| + vpmuludq $H4,$T3,$H2 # h4*s3 |
| + vpaddq $T4,$D1,$D1 # d1 += h3*s3 |
| + vpaddq $H2,$D2,$D2 # d2 += h4*s3 |
| + |
| + vpmuludq $H3,$S4,$H3 # h3*s4 |
| + vpmuludq $H4,$S4,$H4 # h4*s4 |
| + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 |
| + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 |
| + vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4 |
| + vpmuludq $H1,$S4,$H0 # h1*s4 |
| + vmovdqa 64(%rcx),$MASK # .Lmask26 |
| + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 |
| + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 |
| + |
| + ################################################################ |
| + # horizontal addition |
| + |
| + vpsrldq \$8,$D1,$T1 |
| + vpsrldq \$8,$H2,$T2 |
| + vpsrldq \$8,$H3,$T3 |
| + vpsrldq \$8,$H4,$T4 |
| + vpsrldq \$8,$H0,$T0 |
| + vpaddq $T1,$D1,$D1 |
| + vpaddq $T2,$H2,$H2 |
| + vpaddq $T3,$H3,$H3 |
| + vpaddq $T4,$H4,$H4 |
| + vpaddq $T0,$H0,$H0 |
| + |
| + vpermq \$0x2,$H3,$T3 |
| + vpermq \$0x2,$H4,$T4 |
| + vpermq \$0x2,$H0,$T0 |
| + vpermq \$0x2,$D1,$T1 |
| + vpermq \$0x2,$H2,$T2 |
| + vpaddq $T3,$H3,$H3 |
| + vpaddq $T4,$H4,$H4 |
| + vpaddq $T0,$H0,$H0 |
| + vpaddq $T1,$D1,$D1 |
| + vpaddq $T2,$H2,$H2 |
| + |
| + ################################################################ |
| + # lazy reduction |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $D0,$D1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H4,$D4 |
| + vpand $MASK,$H4,$H4 |
| + |
| + vpsrlq \$26,$H1,$D1 |
| + vpand $MASK,$H1,$H1 |
| + vpaddq $D1,$H2,$H2 # h1 -> h2 |
| + |
| + vpaddq $D4,$H0,$H0 |
| + vpsllq \$2,$D4,$D4 |
| + vpaddq $D4,$H0,$H0 # h4 -> h0 |
| + |
| + vpsrlq \$26,$H2,$D2 |
| + vpand $MASK,$H2,$H2 |
| + vpaddq $D2,$H3,$H3 # h2 -> h3 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced |
| + vmovd %x#$H1,`4*1-48-64`($ctx) |
| + vmovd %x#$H2,`4*2-48-64`($ctx) |
| + vmovd %x#$H3,`4*3-48-64`($ctx) |
| + vmovd %x#$H4,`4*4-48-64`($ctx) |
| +___ |
| +$code.=<<___ if ($win64); |
| + vmovdqa 0x50(%r11),%xmm6 |
| + vmovdqa 0x60(%r11),%xmm7 |
| + vmovdqa 0x70(%r11),%xmm8 |
| + vmovdqa 0x80(%r11),%xmm9 |
| + vmovdqa 0x90(%r11),%xmm10 |
| + vmovdqa 0xa0(%r11),%xmm11 |
| + vmovdqa 0xb0(%r11),%xmm12 |
| + vmovdqa 0xc0(%r11),%xmm13 |
| + vmovdqa 0xd0(%r11),%xmm14 |
| + vmovdqa 0xe0(%r11),%xmm15 |
| + lea 0xf8(%r11),%rsp |
| +.Ldo_avx2_epilogue: |
| +___ |
| +$code.=<<___ if (!$win64); |
| + lea 8(%r11),%rsp |
| +.cfi_def_cfa %rsp,8 |
| +___ |
| +$code.=<<___; |
| + vzeroupper |
| + ret |
| +.cfi_endproc |
| +.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 |
| +___ |
| +####################################################################### |
| +if ($avx>2) { |
| +# On entry we have input length divisible by 64. But since inner loop |
| +# processes 128 bytes per iteration, cases when length is not divisible |
| +# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this |
| +# reason stack layout is kept identical to poly1305_blocks_avx2. If not |
| +# for this tail, we wouldn't have to even allocate stack frame... |
| + |
| +my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); |
| +my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); |
| +my $PADBIT="%zmm30"; |
| + |
| +map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain |
| +map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4)); |
| +map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); |
| +map(s/%y/%z/,($MASK)); |
| + |
| +$code.=<<___; |
| +.type poly1305_blocks_avx512,\@function,4 |
| +.align 32 |
| +poly1305_blocks_avx512: |
| +.cfi_startproc |
| +.Lblocks_avx512: |
| + mov \$15,%eax |
| + kmovw %eax,%k2 |
| +___ |
| +$code.=<<___ if (!$win64); |
| + lea -8(%rsp),%r11 |
| +.cfi_def_cfa %r11,16 |
| + sub \$0x128,%rsp |
| +___ |
| +$code.=<<___ if ($win64); |
| + lea -0xf8(%rsp),%r11 |
| + sub \$0x1c8,%rsp |
| + vmovdqa %xmm6,0x50(%r11) |
| + vmovdqa %xmm7,0x60(%r11) |
| + vmovdqa %xmm8,0x70(%r11) |
| + vmovdqa %xmm9,0x80(%r11) |
| + vmovdqa %xmm10,0x90(%r11) |
| + vmovdqa %xmm11,0xa0(%r11) |
| + vmovdqa %xmm12,0xb0(%r11) |
| + vmovdqa %xmm13,0xc0(%r11) |
| + vmovdqa %xmm14,0xd0(%r11) |
| + vmovdqa %xmm15,0xe0(%r11) |
| +.Ldo_avx512_body: |
| +___ |
| +$code.=<<___; |
| + lea .Lconst(%rip),%rcx |
| + lea 48+64($ctx),$ctx # size optimization |
| + vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2 |
| + |
| + # expand pre-calculated table |
| + vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0} |
| + and \$-512,%rsp |
| + vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1} |
| + mov \$0x20,%rax |
| + vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1} |
| + vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2} |
| + vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2} |
| + vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3} |
| + vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3} |
| + vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4} |
| + vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4} |
| + vpermd $D0,$T2,$R0 # 00003412 -> 14243444 |
| + vpbroadcastq 64(%rcx),$MASK # .Lmask26 |
| + vpermd $D1,$T2,$R1 |
| + vpermd $T0,$T2,$S1 |
| + vpermd $D2,$T2,$R2 |
| + vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0 |
| + vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304 |
| + vpermd $T1,$T2,$S2 |
| + vmovdqu64 $R1,0x00(%rsp,%rax){%k2} |
| + vpsrlq \$32,$R1,$T1 |
| + vpermd $D3,$T2,$R3 |
| + vmovdqa64 $S1,0x40(%rsp){%k2} |
| + vpermd $T3,$T2,$S3 |
| + vpermd $D4,$T2,$R4 |
| + vmovdqu64 $R2,0x40(%rsp,%rax){%k2} |
| + vpermd $T4,$T2,$S4 |
| + vmovdqa64 $S2,0x80(%rsp){%k2} |
| + vmovdqu64 $R3,0x80(%rsp,%rax){%k2} |
| + vmovdqa64 $S3,0xc0(%rsp){%k2} |
| + vmovdqu64 $R4,0xc0(%rsp,%rax){%k2} |
| + vmovdqa64 $S4,0x100(%rsp){%k2} |
| + |
| + ################################################################ |
| + # calculate 5th through 8th powers of the key |
| + # |
| + # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1 |
| + # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2 |
| + # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3 |
| + # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4 |
| + # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0 |
| + |
| + vpmuludq $T0,$R0,$D0 # d0 = r0'*r0 |
| + vpmuludq $T0,$R1,$D1 # d1 = r0'*r1 |
| + vpmuludq $T0,$R2,$D2 # d2 = r0'*r2 |
| + vpmuludq $T0,$R3,$D3 # d3 = r0'*r3 |
| + vpmuludq $T0,$R4,$D4 # d4 = r0'*r4 |
| + vpsrlq \$32,$R2,$T2 |
| + |
| + vpmuludq $T1,$S4,$M0 |
| + vpmuludq $T1,$R0,$M1 |
| + vpmuludq $T1,$R1,$M2 |
| + vpmuludq $T1,$R2,$M3 |
| + vpmuludq $T1,$R3,$M4 |
| + vpsrlq \$32,$R3,$T3 |
| + vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4 |
| + vpaddq $M1,$D1,$D1 # d1 += r1'*r0 |
| + vpaddq $M2,$D2,$D2 # d2 += r1'*r1 |
| + vpaddq $M3,$D3,$D3 # d3 += r1'*r2 |
| + vpaddq $M4,$D4,$D4 # d4 += r1'*r3 |
| + |
| + vpmuludq $T2,$S3,$M0 |
| + vpmuludq $T2,$S4,$M1 |
| + vpmuludq $T2,$R1,$M3 |
| + vpmuludq $T2,$R2,$M4 |
| + vpmuludq $T2,$R0,$M2 |
| + vpsrlq \$32,$R4,$T4 |
| + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3 |
| + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4 |
| + vpaddq $M3,$D3,$D3 # d3 += r2'*r1 |
| + vpaddq $M4,$D4,$D4 # d4 += r2'*r2 |
| + vpaddq $M2,$D2,$D2 # d2 += r2'*r0 |
| + |
| + vpmuludq $T3,$S2,$M0 |
| + vpmuludq $T3,$R0,$M3 |
| + vpmuludq $T3,$R1,$M4 |
| + vpmuludq $T3,$S3,$M1 |
| + vpmuludq $T3,$S4,$M2 |
| + vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2 |
| + vpaddq $M3,$D3,$D3 # d3 += r3'*r0 |
| + vpaddq $M4,$D4,$D4 # d4 += r3'*r1 |
| + vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3 |
| + vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4 |
| + |
| + vpmuludq $T4,$S4,$M3 |
| + vpmuludq $T4,$R0,$M4 |
| + vpmuludq $T4,$S1,$M0 |
| + vpmuludq $T4,$S2,$M1 |
| + vpmuludq $T4,$S3,$M2 |
| + vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4 |
| + vpaddq $M4,$D4,$D4 # d4 += r2'*r0 |
| + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1 |
| + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2 |
| + vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3 |
| + |
| + ################################################################ |
| + # load input |
| + vmovdqu64 16*0($inp),%z#$T3 |
| + vmovdqu64 16*4($inp),%z#$T4 |
| + lea 16*8($inp),$inp |
| + |
| + ################################################################ |
| + # lazy reduction |
| + |
| + vpsrlq \$26,$D3,$M3 |
| + vpandq $MASK,$D3,$D3 |
| + vpaddq $M3,$D4,$D4 # d3 -> d4 |
| + |
| + vpsrlq \$26,$D0,$M0 |
| + vpandq $MASK,$D0,$D0 |
| + vpaddq $M0,$D1,$D1 # d0 -> d1 |
| + |
| + vpsrlq \$26,$D4,$M4 |
| + vpandq $MASK,$D4,$D4 |
| + |
| + vpsrlq \$26,$D1,$M1 |
| + vpandq $MASK,$D1,$D1 |
| + vpaddq $M1,$D2,$D2 # d1 -> d2 |
| + |
| + vpaddq $M4,$D0,$D0 |
| + vpsllq \$2,$M4,$M4 |
| + vpaddq $M4,$D0,$D0 # d4 -> d0 |
| + |
| + vpsrlq \$26,$D2,$M2 |
| + vpandq $MASK,$D2,$D2 |
| + vpaddq $M2,$D3,$D3 # d2 -> d3 |
| + |
| + vpsrlq \$26,$D0,$M0 |
| + vpandq $MASK,$D0,$D0 |
| + vpaddq $M0,$D1,$D1 # d0 -> d1 |
| + |
| + vpsrlq \$26,$D3,$M3 |
| + vpandq $MASK,$D3,$D3 |
| + vpaddq $M3,$D4,$D4 # d3 -> d4 |
| + |
| + ################################################################ |
| + # at this point we have 14243444 in $R0-$S4 and 05060708 in |
| + # $D0-$D4, ... |
| + |
| + vpunpcklqdq $T4,$T3,$T0 # transpose input |
| + vpunpckhqdq $T4,$T3,$T4 |
| + |
| + # ... since input 64-bit lanes are ordered as 73625140, we could |
| + # "vperm" it to 76543210 (here and in each loop iteration), *or* |
| + # we could just flow along, hence the goal for $R0-$S4 is |
| + # 1858286838784888 ... |
| + |
| + vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512: |
| + mov \$0x7777,%eax |
| + kmovw %eax,%k1 |
| + |
| + vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4--- |
| + vpermd $R1,$M0,$R1 |
| + vpermd $R2,$M0,$R2 |
| + vpermd $R3,$M0,$R3 |
| + vpermd $R4,$M0,$R4 |
| + |
| + vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888 |
| + vpermd $D1,$M0,${R1}{%k1} |
| + vpermd $D2,$M0,${R2}{%k1} |
| + vpermd $D3,$M0,${R3}{%k1} |
| + vpermd $D4,$M0,${R4}{%k1} |
| + |
| + vpslld \$2,$R1,$S1 # *5 |
| + vpslld \$2,$R2,$S2 |
| + vpslld \$2,$R3,$S3 |
| + vpslld \$2,$R4,$S4 |
| + vpaddd $R1,$S1,$S1 |
| + vpaddd $R2,$S2,$S2 |
| + vpaddd $R3,$S3,$S3 |
| + vpaddd $R4,$S4,$S4 |
| + |
| + vpbroadcastq 32(%rcx),$PADBIT # .L129 |
| + |
| + vpsrlq \$52,$T0,$T2 # splat input |
| + vpsllq \$12,$T4,$T3 |
| + vporq $T3,$T2,$T2 |
| + vpsrlq \$26,$T0,$T1 |
| + vpsrlq \$14,$T4,$T3 |
| + vpsrlq \$40,$T4,$T4 # 4 |
| + vpandq $MASK,$T2,$T2 # 2 |
| + vpandq $MASK,$T0,$T0 # 0 |
| + #vpandq $MASK,$T1,$T1 # 1 |
| + #vpandq $MASK,$T3,$T3 # 3 |
| + #vporq $PADBIT,$T4,$T4 # padbit, yes, always |
| + |
| + vpaddq $H2,$T2,$H2 # accumulate input |
| + sub \$192,$len |
| + jbe .Ltail_avx512 |
| + jmp .Loop_avx512 |
| + |
| +.align 32 |
| +.Loop_avx512: |
| + ################################################################ |
| + # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8 |
| + # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7 |
| + # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6 |
| + # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5 |
| + # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4 |
| + # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3 |
| + # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2 |
| + # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1 |
| + # \________/\___________/ |
| + ################################################################ |
| + #vpaddq $H2,$T2,$H2 # accumulate input |
| + |
| + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 |
| + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 |
| + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 |
| + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 |
| + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 |
| + # |
| + # however, as h2 is "chronologically" first one available pull |
| + # corresponding operations up, so it's |
| + # |
| + # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4 |
| + # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0 |
| + # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1 |
| + # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2 |
| + # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3 |
| + |
| + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 |
| + vpaddq $H0,$T0,$H0 |
| + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 |
| + vpandq $MASK,$T1,$T1 # 1 |
| + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 |
| + vpandq $MASK,$T3,$T3 # 3 |
| + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 |
| + vporq $PADBIT,$T4,$T4 # padbit, yes, always |
| + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 |
| + vpaddq $H1,$T1,$H1 # accumulate input |
| + vpaddq $H3,$T3,$H3 |
| + vpaddq $H4,$T4,$H4 |
| + |
| + vmovdqu64 16*0($inp),$T3 # load input |
| + vmovdqu64 16*4($inp),$T4 |
| + lea 16*8($inp),$inp |
| + vpmuludq $H0,$R3,$M3 |
| + vpmuludq $H0,$R4,$M4 |
| + vpmuludq $H0,$R0,$M0 |
| + vpmuludq $H0,$R1,$M1 |
| + vpaddq $M3,$D3,$D3 # d3 += h0*r3 |
| + vpaddq $M4,$D4,$D4 # d4 += h0*r4 |
| + vpaddq $M0,$D0,$D0 # d0 += h0*r0 |
| + vpaddq $M1,$D1,$D1 # d1 += h0*r1 |
| + |
| + vpmuludq $H1,$R2,$M3 |
| + vpmuludq $H1,$R3,$M4 |
| + vpmuludq $H1,$S4,$M0 |
| + vpmuludq $H0,$R2,$M2 |
| + vpaddq $M3,$D3,$D3 # d3 += h1*r2 |
| + vpaddq $M4,$D4,$D4 # d4 += h1*r3 |
| + vpaddq $M0,$D0,$D0 # d0 += h1*s4 |
| + vpaddq $M2,$D2,$D2 # d2 += h0*r2 |
| + |
| + vpunpcklqdq $T4,$T3,$T0 # transpose input |
| + vpunpckhqdq $T4,$T3,$T4 |
| + |
| + vpmuludq $H3,$R0,$M3 |
| + vpmuludq $H3,$R1,$M4 |
| + vpmuludq $H1,$R0,$M1 |
| + vpmuludq $H1,$R1,$M2 |
| + vpaddq $M3,$D3,$D3 # d3 += h3*r0 |
| + vpaddq $M4,$D4,$D4 # d4 += h3*r1 |
| + vpaddq $M1,$D1,$D1 # d1 += h1*r0 |
| + vpaddq $M2,$D2,$D2 # d2 += h1*r1 |
| + |
| + vpmuludq $H4,$S4,$M3 |
| + vpmuludq $H4,$R0,$M4 |
| + vpmuludq $H3,$S2,$M0 |
| + vpmuludq $H3,$S3,$M1 |
| + vpaddq $M3,$D3,$D3 # d3 += h4*s4 |
| + vpmuludq $H3,$S4,$M2 |
| + vpaddq $M4,$D4,$D4 # d4 += h4*r0 |
| + vpaddq $M0,$D0,$D0 # d0 += h3*s2 |
| + vpaddq $M1,$D1,$D1 # d1 += h3*s3 |
| + vpaddq $M2,$D2,$D2 # d2 += h3*s4 |
| + |
| + vpmuludq $H4,$S1,$M0 |
| + vpmuludq $H4,$S2,$M1 |
| + vpmuludq $H4,$S3,$M2 |
| + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 |
| + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 |
| + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 |
| + |
| + ################################################################ |
| + # lazy reduction (interleaved with input splat) |
| + |
| + vpsrlq \$52,$T0,$T2 # splat input |
| + vpsllq \$12,$T4,$T3 |
| + |
| + vpsrlq \$26,$D3,$H3 |
| + vpandq $MASK,$D3,$D3 |
| + vpaddq $H3,$D4,$H4 # h3 -> h4 |
| + |
| + vporq $T3,$T2,$T2 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpandq $MASK,$H0,$H0 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpandq $MASK,$T2,$T2 # 2 |
| + |
| + vpsrlq \$26,$H4,$D4 |
| + vpandq $MASK,$H4,$H4 |
| + |
| + vpsrlq \$26,$H1,$D1 |
| + vpandq $MASK,$H1,$H1 |
| + vpaddq $D1,$H2,$H2 # h1 -> h2 |
| + |
| + vpaddq $D4,$H0,$H0 |
| + vpsllq \$2,$D4,$D4 |
| + vpaddq $D4,$H0,$H0 # h4 -> h0 |
| + |
| + vpaddq $T2,$H2,$H2 # modulo-scheduled |
| + vpsrlq \$26,$T0,$T1 |
| + |
| + vpsrlq \$26,$H2,$D2 |
| + vpandq $MASK,$H2,$H2 |
| + vpaddq $D2,$D3,$H3 # h2 -> h3 |
| + |
| + vpsrlq \$14,$T4,$T3 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpandq $MASK,$H0,$H0 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$40,$T4,$T4 # 4 |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpandq $MASK,$H3,$H3 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vpandq $MASK,$T0,$T0 # 0 |
| + #vpandq $MASK,$T1,$T1 # 1 |
| + #vpandq $MASK,$T3,$T3 # 3 |
| + #vporq $PADBIT,$T4,$T4 # padbit, yes, always |
| + |
| + sub \$128,$len |
| + ja .Loop_avx512 |
| + |
| +.Ltail_avx512: |
| + ################################################################ |
| + # while above multiplications were by r^8 in all lanes, in last |
| + # iteration we multiply least significant lane by r^8 and most |
| + # significant one by r, that's why table gets shifted... |
| + |
| + vpsrlq \$32,$R0,$R0 # 0105020603070408 |
| + vpsrlq \$32,$R1,$R1 |
| + vpsrlq \$32,$R2,$R2 |
| + vpsrlq \$32,$S3,$S3 |
| + vpsrlq \$32,$S4,$S4 |
| + vpsrlq \$32,$R3,$R3 |
| + vpsrlq \$32,$R4,$R4 |
| + vpsrlq \$32,$S1,$S1 |
| + vpsrlq \$32,$S2,$S2 |
| + |
| + ################################################################ |
| + # load either next or last 64 byte of input |
| + lea ($inp,$len),$inp |
| + |
| + #vpaddq $H2,$T2,$H2 # accumulate input |
| + vpaddq $H0,$T0,$H0 |
| + |
| + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 |
| + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 |
| + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 |
| + vpandq $MASK,$T1,$T1 # 1 |
| + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 |
| + vpandq $MASK,$T3,$T3 # 3 |
| + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 |
| + vporq $PADBIT,$T4,$T4 # padbit, yes, always |
| + vpaddq $H1,$T1,$H1 # accumulate input |
| + vpaddq $H3,$T3,$H3 |
| + vpaddq $H4,$T4,$H4 |
| + |
| + vmovdqu 16*0($inp),%x#$T0 |
| + vpmuludq $H0,$R3,$M3 |
| + vpmuludq $H0,$R4,$M4 |
| + vpmuludq $H0,$R0,$M0 |
| + vpmuludq $H0,$R1,$M1 |
| + vpaddq $M3,$D3,$D3 # d3 += h0*r3 |
| + vpaddq $M4,$D4,$D4 # d4 += h0*r4 |
| + vpaddq $M0,$D0,$D0 # d0 += h0*r0 |
| + vpaddq $M1,$D1,$D1 # d1 += h0*r1 |
| + |
| + vmovdqu 16*1($inp),%x#$T1 |
| + vpmuludq $H1,$R2,$M3 |
| + vpmuludq $H1,$R3,$M4 |
| + vpmuludq $H1,$S4,$M0 |
| + vpmuludq $H0,$R2,$M2 |
| + vpaddq $M3,$D3,$D3 # d3 += h1*r2 |
| + vpaddq $M4,$D4,$D4 # d4 += h1*r3 |
| + vpaddq $M0,$D0,$D0 # d0 += h1*s4 |
| + vpaddq $M2,$D2,$D2 # d2 += h0*r2 |
| + |
| + vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0 |
| + vpmuludq $H3,$R0,$M3 |
| + vpmuludq $H3,$R1,$M4 |
| + vpmuludq $H1,$R0,$M1 |
| + vpmuludq $H1,$R1,$M2 |
| + vpaddq $M3,$D3,$D3 # d3 += h3*r0 |
| + vpaddq $M4,$D4,$D4 # d4 += h3*r1 |
| + vpaddq $M1,$D1,$D1 # d1 += h1*r0 |
| + vpaddq $M2,$D2,$D2 # d2 += h1*r1 |
| + |
| + vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1 |
| + vpmuludq $H4,$S4,$M3 |
| + vpmuludq $H4,$R0,$M4 |
| + vpmuludq $H3,$S2,$M0 |
| + vpmuludq $H3,$S3,$M1 |
| + vpmuludq $H3,$S4,$M2 |
| + vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4 |
| + vpaddq $M4,$D4,$D4 # d4 += h4*r0 |
| + vpaddq $M0,$D0,$D0 # d0 += h3*s2 |
| + vpaddq $M1,$D1,$D1 # d1 += h3*s3 |
| + vpaddq $M2,$D2,$D2 # d2 += h3*s4 |
| + |
| + vpmuludq $H4,$S1,$M0 |
| + vpmuludq $H4,$S2,$M1 |
| + vpmuludq $H4,$S3,$M2 |
| + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 |
| + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 |
| + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 |
| + |
| + ################################################################ |
| + # horizontal addition |
| + |
| + mov \$1,%eax |
| + vpermq \$0xb1,$H3,$D3 |
| + vpermq \$0xb1,$D4,$H4 |
| + vpermq \$0xb1,$H0,$D0 |
| + vpermq \$0xb1,$H1,$D1 |
| + vpermq \$0xb1,$H2,$D2 |
| + vpaddq $D3,$H3,$H3 |
| + vpaddq $D4,$H4,$H4 |
| + vpaddq $D0,$H0,$H0 |
| + vpaddq $D1,$H1,$H1 |
| + vpaddq $D2,$H2,$H2 |
| + |
| + kmovw %eax,%k3 |
| + vpermq \$0x2,$H3,$D3 |
| + vpermq \$0x2,$H4,$D4 |
| + vpermq \$0x2,$H0,$D0 |
| + vpermq \$0x2,$H1,$D1 |
| + vpermq \$0x2,$H2,$D2 |
| + vpaddq $D3,$H3,$H3 |
| + vpaddq $D4,$H4,$H4 |
| + vpaddq $D0,$H0,$H0 |
| + vpaddq $D1,$H1,$H1 |
| + vpaddq $D2,$H2,$H2 |
| + |
| + vextracti64x4 \$0x1,$H3,%y#$D3 |
| + vextracti64x4 \$0x1,$H4,%y#$D4 |
| + vextracti64x4 \$0x1,$H0,%y#$D0 |
| + vextracti64x4 \$0x1,$H1,%y#$D1 |
| + vextracti64x4 \$0x1,$H2,%y#$D2 |
| + vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case |
| + vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2 |
| + vpaddq $D0,$H0,${H0}{%k3}{z} |
| + vpaddq $D1,$H1,${H1}{%k3}{z} |
| + vpaddq $D2,$H2,${H2}{%k3}{z} |
| +___ |
| +map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT)); |
| +map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK)); |
| +$code.=<<___; |
| + ################################################################ |
| + # lazy reduction (interleaved with input splat) |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpsrldq \$6,$T0,$T2 # splat input |
| + vpsrldq \$6,$T1,$T3 |
| + vpunpckhqdq $T1,$T0,$T4 # 4 |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpunpcklqdq $T3,$T2,$T2 # 2:3 |
| + vpunpcklqdq $T1,$T0,$T0 # 0:1 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H4,$D4 |
| + vpand $MASK,$H4,$H4 |
| + |
| + vpsrlq \$26,$H1,$D1 |
| + vpand $MASK,$H1,$H1 |
| + vpsrlq \$30,$T2,$T3 |
| + vpsrlq \$4,$T2,$T2 |
| + vpaddq $D1,$H2,$H2 # h1 -> h2 |
| + |
| + vpaddq $D4,$H0,$H0 |
| + vpsllq \$2,$D4,$D4 |
| + vpsrlq \$26,$T0,$T1 |
| + vpsrlq \$40,$T4,$T4 # 4 |
| + vpaddq $D4,$H0,$H0 # h4 -> h0 |
| + |
| + vpsrlq \$26,$H2,$D2 |
| + vpand $MASK,$H2,$H2 |
| + vpand $MASK,$T2,$T2 # 2 |
| + vpand $MASK,$T0,$T0 # 0 |
| + vpaddq $D2,$H3,$H3 # h2 -> h3 |
| + |
| + vpsrlq \$26,$H0,$D0 |
| + vpand $MASK,$H0,$H0 |
| + vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2 |
| + vpand $MASK,$T1,$T1 # 1 |
| + vpaddq $D0,$H1,$H1 # h0 -> h1 |
| + |
| + vpsrlq \$26,$H3,$D3 |
| + vpand $MASK,$H3,$H3 |
| + vpand $MASK,$T3,$T3 # 3 |
| + vpor 32(%rcx),$T4,$T4 # padbit, yes, always |
| + vpaddq $D3,$H4,$H4 # h3 -> h4 |
| + |
| + lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 |
| + add \$64,$len |
| + jnz .Ltail_avx2 |
| + |
| + vpsubq $T2,$H2,$H2 # undo input accumulation |
| + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced |
| + vmovd %x#$H1,`4*1-48-64`($ctx) |
| + vmovd %x#$H2,`4*2-48-64`($ctx) |
| + vmovd %x#$H3,`4*3-48-64`($ctx) |
| + vmovd %x#$H4,`4*4-48-64`($ctx) |
| + vzeroall |
| +___ |
| +$code.=<<___ if ($win64); |
| + movdqa 0x50(%r11),%xmm6 |
| + movdqa 0x60(%r11),%xmm7 |
| + movdqa 0x70(%r11),%xmm8 |
| + movdqa 0x80(%r11),%xmm9 |
| + movdqa 0x90(%r11),%xmm10 |
| + movdqa 0xa0(%r11),%xmm11 |
| + movdqa 0xb0(%r11),%xmm12 |
| + movdqa 0xc0(%r11),%xmm13 |
| + movdqa 0xd0(%r11),%xmm14 |
| + movdqa 0xe0(%r11),%xmm15 |
| + lea 0xf8(%r11),%rsp |
| +.Ldo_avx512_epilogue: |
| +___ |
| +$code.=<<___ if (!$win64); |
| + lea 8(%r11),%rsp |
| +.cfi_def_cfa %rsp,8 |
| +___ |
| +$code.=<<___; |
| + ret |
| +.cfi_endproc |
| +.size poly1305_blocks_avx512,.-poly1305_blocks_avx512 |
| +___ |
| +if ($avx>3) { |
| +######################################################################## |
| +# VPMADD52 version using 2^44 radix. |
| +# |
| +# One can argue that base 2^52 would be more natural. Well, even though |
| +# some operations would be more natural, one has to recognize couple of |
| +# things. Base 2^52 doesn't provide advantage over base 2^44 if you look |
| +# at amount of multiply-n-accumulate operations. Secondly, it makes it |
| +# impossible to pre-compute multiples of 5 [referred to as s[]/sN in |
| +# reference implementations], which means that more such operations |
| +# would have to be performed in inner loop, which in turn makes critical |
| +# path longer. In other words, even though base 2^44 reduction might |
| +# look less elegant, overall critical path is actually shorter... |
| + |
| +######################################################################## |
| +# Layout of opaque area is following. |
| +# |
| +# unsigned __int64 h[3]; # current hash value base 2^44 |
| +# unsigned __int64 s[2]; # key value*20 base 2^44 |
| +# unsigned __int64 r[3]; # key value base 2^44 |
| +# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4]; |
| +# # r^n positions reflect |
| +# # placement in register, not |
| +# # memory, R[3] is R[1]*20 |
| + |
| +$code.=<<___; |
| +.type poly1305_init_base2_44,\@function,3 |
| +.align 32 |
| +poly1305_init_base2_44: |
| + xor %rax,%rax |
| + mov %rax,0($ctx) # initialize hash value |
| + mov %rax,8($ctx) |
| + mov %rax,16($ctx) |
| + |
| +.Linit_base2_44: |
| + lea poly1305_blocks_vpmadd52(%rip),%r10 |
| + lea poly1305_emit_base2_44(%rip),%r11 |
| + |
| + mov \$0x0ffffffc0fffffff,%rax |
| + mov \$0x0ffffffc0ffffffc,%rcx |
| + and 0($inp),%rax |
| + mov \$0x00000fffffffffff,%r8 |
| + and 8($inp),%rcx |
| + mov \$0x00000fffffffffff,%r9 |
| + and %rax,%r8 |
| + shrd \$44,%rcx,%rax |
| + mov %r8,40($ctx) # r0 |
| + and %r9,%rax |
| + shr \$24,%rcx |
| + mov %rax,48($ctx) # r1 |
| + lea (%rax,%rax,4),%rax # *5 |
| + mov %rcx,56($ctx) # r2 |
| + shl \$2,%rax # magic <<2 |
| + lea (%rcx,%rcx,4),%rcx # *5 |
| + shl \$2,%rcx # magic <<2 |
| + mov %rax,24($ctx) # s1 |
| + mov %rcx,32($ctx) # s2 |
| + movq \$-1,64($ctx) # write impossible value |
| +___ |
| +$code.=<<___ if ($flavour !~ /elf32/); |
| + mov %r10,0(%rdx) |
| + mov %r11,8(%rdx) |
| +___ |
| +$code.=<<___ if ($flavour =~ /elf32/); |
| + mov %r10d,0(%rdx) |
| + mov %r11d,4(%rdx) |
| +___ |
| +$code.=<<___; |
| + mov \$1,%eax |
| + ret |
| +.size poly1305_init_base2_44,.-poly1305_init_base2_44 |
| +___ |
| +{ |
| +my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17)); |
| +my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21)); |
| +my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25)); |
| + |
| +$code.=<<___; |
| +.type poly1305_blocks_vpmadd52,\@function,4 |
| +.align 32 |
| +poly1305_blocks_vpmadd52: |
| + shr \$4,$len |
| + jz .Lno_data_vpmadd52 # too short |
| + |
| + shl \$40,$padbit |
| + mov 64($ctx),%r8 # peek on power of the key |
| + |
| + # if powers of the key are not calculated yet, process up to 3 |
| + # blocks with this single-block subroutine, otherwise ensure that |
| + # length is divisible by 2 blocks and pass the rest down to next |
| + # subroutine... |
| + |
| + mov \$3,%rax |
| + mov \$1,%r10 |
| + cmp \$4,$len # is input long |
| + cmovae %r10,%rax |
| + test %r8,%r8 # is power value impossible? |
| + cmovns %r10,%rax |
| + |
| + and $len,%rax # is input of favourable length? |
| + jz .Lblocks_vpmadd52_4x |
| + |
| + sub %rax,$len |
| + mov \$7,%r10d |
| + mov \$1,%r11d |
| + kmovw %r10d,%k7 |
| + lea .L2_44_inp_permd(%rip),%r10 |
| + kmovw %r11d,%k1 |
| + |
| + vmovq $padbit,%x#$PAD |
| + vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd |
| + vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift |
| + vpermq \$0xcf,$PAD,$PAD |
| + vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask |
| + |
| + vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value |
| + vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys |
| + vmovdqu64 32($ctx),${r1r0s2}{%k7}{z} |
| + vmovdqu64 24($ctx),${r0s2s1}{%k7}{z} |
| + |
| + vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt |
| + vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft |
| + |
| + jmp .Loop_vpmadd52 |
| + |
| +.align 32 |
| +.Loop_vpmadd52: |
| + vmovdqu32 0($inp),%x#$T0 # load input as ----3210 |
| + lea 16($inp),$inp |
| + |
| + vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110 |
| + vpsrlvq $inp_shift,$T0,$T0 |
| + vpandq $reduc_mask,$T0,$T0 |
| + vporq $PAD,$T0,$T0 |
| + |
| + vpaddq $T0,$Dlo,$Dlo # accumulate input |
| + |
| + vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value |
| + vpermq \$0b01010101,$Dlo,${H1}{%k7}{z} |
| + vpermq \$0b10101010,$Dlo,${H2}{%k7}{z} |
| + |
| + vpxord $Dlo,$Dlo,$Dlo |
| + vpxord $Dhi,$Dhi,$Dhi |
| + |
| + vpmadd52luq $r2r1r0,$H0,$Dlo |
| + vpmadd52huq $r2r1r0,$H0,$Dhi |
| + |
| + vpmadd52luq $r1r0s2,$H1,$Dlo |
| + vpmadd52huq $r1r0s2,$H1,$Dhi |
| + |
| + vpmadd52luq $r0s2s1,$H2,$Dlo |
| + vpmadd52huq $r0s2s1,$H2,$Dhi |
| + |
| + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword |
| + vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword |
| + vpandq $reduc_mask,$Dlo,$Dlo |
| + |
| + vpaddq $T0,$Dhi,$Dhi |
| + |
| + vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword |
| + |
| + vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-) |
| + |
| + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word |
| + vpandq $reduc_mask,$Dlo,$Dlo |
| + |
| + vpermq \$0b10010011,$T0,$T0 |
| + |
| + vpaddq $T0,$Dlo,$Dlo |
| + |
| + vpermq \$0b10010011,$Dlo,${T0}{%k1}{z} |
| + |
| + vpaddq $T0,$Dlo,$Dlo |
| + vpsllq \$2,$T0,$T0 |
| + |
| + vpaddq $T0,$Dlo,$Dlo |
| + |
| + dec %rax # len-=16 |
| + jnz .Loop_vpmadd52 |
| + |
| + vmovdqu64 $Dlo,0($ctx){%k7} # store hash value |
| + |
| + test $len,$len |
| + jnz .Lblocks_vpmadd52_4x |
| + |
| +.Lno_data_vpmadd52: |
| + ret |
| +.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 |
| +___ |
| +} |
| +{ |
| +######################################################################## |
| +# As implied by its name 4x subroutine processes 4 blocks in parallel |
| +# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power |
| +# and is handled in 256-bit %ymm registers. |
| + |
| +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); |
| +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); |
| +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); |
| + |
| +$code.=<<___; |
| +.type poly1305_blocks_vpmadd52_4x,\@function,4 |
| +.align 32 |
| +poly1305_blocks_vpmadd52_4x: |
| + shr \$4,$len |
| + jz .Lno_data_vpmadd52_4x # too short |
| + |
| + shl \$40,$padbit |
| + mov 64($ctx),%r8 # peek on power of the key |
| + |
| +.Lblocks_vpmadd52_4x: |
| + vpbroadcastq $padbit,$PAD |
| + |
| + vmovdqa64 .Lx_mask44(%rip),$mask44 |
| + mov \$5,%eax |
| + vmovdqa64 .Lx_mask42(%rip),$mask42 |
| + kmovw %eax,%k1 # used in 2x path |
| + |
| + test %r8,%r8 # is power value impossible? |
| + js .Linit_vpmadd52 # if it is, then init R[4] |
| + |
| + vmovq 0($ctx),%x#$H0 # load current hash value |
| + vmovq 8($ctx),%x#$H1 |
| + vmovq 16($ctx),%x#$H2 |
| + |
| + test \$3,$len # is length 4*n+2? |
| + jnz .Lblocks_vpmadd52_2x_do |
| + |
| +.Lblocks_vpmadd52_4x_do: |
| + vpbroadcastq 64($ctx),$R0 # load 4th power of the key |
| + vpbroadcastq 96($ctx),$R1 |
| + vpbroadcastq 128($ctx),$R2 |
| + vpbroadcastq 160($ctx),$S1 |
| + |
| +.Lblocks_vpmadd52_4x_key_loaded: |
| + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 |
| + vpaddq $R2,$S2,$S2 |
| + vpsllq \$2,$S2,$S2 |
| + |
| + test \$7,$len # is len 8*n? |
| + jz .Lblocks_vpmadd52_8x |
| + |
| + vmovdqu64 16*0($inp),$T2 # load data |
| + vmovdqu64 16*2($inp),$T3 |
| + lea 16*4($inp),$inp |
| + |
| + vpunpcklqdq $T3,$T2,$T1 # transpose data |
| + vpunpckhqdq $T3,$T2,$T3 |
| + |
| + # at this point 64-bit lanes are ordered as 3-1-2-0 |
| + |
| + vpsrlq \$24,$T3,$T2 # splat the data |
| + vporq $PAD,$T2,$T2 |
| + vpaddq $T2,$H2,$H2 # accumulate input |
| + vpandq $mask44,$T1,$T0 |
| + vpsrlq \$44,$T1,$T1 |
| + vpsllq \$20,$T3,$T3 |
| + vporq $T3,$T1,$T1 |
| + vpandq $mask44,$T1,$T1 |
| + |
| + sub \$4,$len |
| + jz .Ltail_vpmadd52_4x |
| + jmp .Loop_vpmadd52_4x |
| + ud2 |
| + |
| +.align 32 |
| +.Linit_vpmadd52: |
| + vmovq 24($ctx),%x#$S1 # load key |
| + vmovq 56($ctx),%x#$H2 |
| + vmovq 32($ctx),%x#$S2 |
| + vmovq 40($ctx),%x#$R0 |
| + vmovq 48($ctx),%x#$R1 |
| + |
| + vmovdqa $R0,$H0 |
| + vmovdqa $R1,$H1 |
| + vmovdqa $H2,$R2 |
| + |
| + mov \$2,%eax |
| + |
| +.Lmul_init_vpmadd52: |
| + vpxorq $D0lo,$D0lo,$D0lo |
| + vpmadd52luq $H2,$S1,$D0lo |
| + vpxorq $D0hi,$D0hi,$D0hi |
| + vpmadd52huq $H2,$S1,$D0hi |
| + vpxorq $D1lo,$D1lo,$D1lo |
| + vpmadd52luq $H2,$S2,$D1lo |
| + vpxorq $D1hi,$D1hi,$D1hi |
| + vpmadd52huq $H2,$S2,$D1hi |
| + vpxorq $D2lo,$D2lo,$D2lo |
| + vpmadd52luq $H2,$R0,$D2lo |
| + vpxorq $D2hi,$D2hi,$D2hi |
| + vpmadd52huq $H2,$R0,$D2hi |
| + |
| + vpmadd52luq $H0,$R0,$D0lo |
| + vpmadd52huq $H0,$R0,$D0hi |
| + vpmadd52luq $H0,$R1,$D1lo |
| + vpmadd52huq $H0,$R1,$D1hi |
| + vpmadd52luq $H0,$R2,$D2lo |
| + vpmadd52huq $H0,$R2,$D2hi |
| + |
| + vpmadd52luq $H1,$S2,$D0lo |
| + vpmadd52huq $H1,$S2,$D0hi |
| + vpmadd52luq $H1,$R0,$D1lo |
| + vpmadd52huq $H1,$R0,$D1hi |
| + vpmadd52luq $H1,$R1,$D2lo |
| + vpmadd52huq $H1,$R1,$D2hi |
| + |
| + ################################################################ |
| + # partial reduction |
| + vpsrlq \$44,$D0lo,$tmp |
| + vpsllq \$8,$D0hi,$D0hi |
| + vpandq $mask44,$D0lo,$H0 |
| + vpaddq $tmp,$D0hi,$D0hi |
| + |
| + vpaddq $D0hi,$D1lo,$D1lo |
| + |
| + vpsrlq \$44,$D1lo,$tmp |
| + vpsllq \$8,$D1hi,$D1hi |
| + vpandq $mask44,$D1lo,$H1 |
| + vpaddq $tmp,$D1hi,$D1hi |
| + |
| + vpaddq $D1hi,$D2lo,$D2lo |
| + |
| + vpsrlq \$42,$D2lo,$tmp |
| + vpsllq \$10,$D2hi,$D2hi |
| + vpandq $mask42,$D2lo,$H2 |
| + vpaddq $tmp,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + vpsllq \$2,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + |
| + vpsrlq \$44,$H0,$tmp # additional step |
| + vpandq $mask44,$H0,$H0 |
| + |
| + vpaddq $tmp,$H1,$H1 |
| + |
| + dec %eax |
| + jz .Ldone_init_vpmadd52 |
| + |
| + vpunpcklqdq $R1,$H1,$R1 # 1,2 |
| + vpbroadcastq %x#$H1,%x#$H1 # 2,2 |
| + vpunpcklqdq $R2,$H2,$R2 |
| + vpbroadcastq %x#$H2,%x#$H2 |
| + vpunpcklqdq $R0,$H0,$R0 |
| + vpbroadcastq %x#$H0,%x#$H0 |
| + |
| + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 |
| + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 |
| + vpaddq $R1,$S1,$S1 |
| + vpaddq $R2,$S2,$S2 |
| + vpsllq \$2,$S1,$S1 |
| + vpsllq \$2,$S2,$S2 |
| + |
| + jmp .Lmul_init_vpmadd52 |
| + ud2 |
| + |
| +.align 32 |
| +.Ldone_init_vpmadd52: |
| + vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4 |
| + vinserti128 \$1,%x#$R2,$H2,$R2 |
| + vinserti128 \$1,%x#$R0,$H0,$R0 |
| + |
| + vpermq \$0b11011000,$R1,$R1 # 1,3,2,4 |
| + vpermq \$0b11011000,$R2,$R2 |
| + vpermq \$0b11011000,$R0,$R0 |
| + |
| + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 |
| + vpaddq $R1,$S1,$S1 |
| + vpsllq \$2,$S1,$S1 |
| + |
| + vmovq 0($ctx),%x#$H0 # load current hash value |
| + vmovq 8($ctx),%x#$H1 |
| + vmovq 16($ctx),%x#$H2 |
| + |
| + test \$3,$len # is length 4*n+2? |
| + jnz .Ldone_init_vpmadd52_2x |
| + |
| + vmovdqu64 $R0,64($ctx) # save key powers |
| + vpbroadcastq %x#$R0,$R0 # broadcast 4th power |
| + vmovdqu64 $R1,96($ctx) |
| + vpbroadcastq %x#$R1,$R1 |
| + vmovdqu64 $R2,128($ctx) |
| + vpbroadcastq %x#$R2,$R2 |
| + vmovdqu64 $S1,160($ctx) |
| + vpbroadcastq %x#$S1,$S1 |
| + |
| + jmp .Lblocks_vpmadd52_4x_key_loaded |
| + ud2 |
| + |
| +.align 32 |
| +.Ldone_init_vpmadd52_2x: |
| + vmovdqu64 $R0,64($ctx) # save key powers |
| + vpsrldq \$8,$R0,$R0 # 0-1-0-2 |
| + vmovdqu64 $R1,96($ctx) |
| + vpsrldq \$8,$R1,$R1 |
| + vmovdqu64 $R2,128($ctx) |
| + vpsrldq \$8,$R2,$R2 |
| + vmovdqu64 $S1,160($ctx) |
| + vpsrldq \$8,$S1,$S1 |
| + jmp .Lblocks_vpmadd52_2x_key_loaded |
| + ud2 |
| + |
| +.align 32 |
| +.Lblocks_vpmadd52_2x_do: |
| + vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers |
| + vmovdqu64 160+8($ctx),${S1}{%k1}{z} |
| + vmovdqu64 64+8($ctx),${R0}{%k1}{z} |
| + vmovdqu64 96+8($ctx),${R1}{%k1}{z} |
| + |
| +.Lblocks_vpmadd52_2x_key_loaded: |
| + vmovdqu64 16*0($inp),$T2 # load data |
| + vpxorq $T3,$T3,$T3 |
| + lea 16*2($inp),$inp |
| + |
| + vpunpcklqdq $T3,$T2,$T1 # transpose data |
| + vpunpckhqdq $T3,$T2,$T3 |
| + |
| + # at this point 64-bit lanes are ordered as x-1-x-0 |
| + |
| + vpsrlq \$24,$T3,$T2 # splat the data |
| + vporq $PAD,$T2,$T2 |
| + vpaddq $T2,$H2,$H2 # accumulate input |
| + vpandq $mask44,$T1,$T0 |
| + vpsrlq \$44,$T1,$T1 |
| + vpsllq \$20,$T3,$T3 |
| + vporq $T3,$T1,$T1 |
| + vpandq $mask44,$T1,$T1 |
| + |
| + jmp .Ltail_vpmadd52_2x |
| + ud2 |
| + |
| +.align 32 |
| +.Loop_vpmadd52_4x: |
| + #vpaddq $T2,$H2,$H2 # accumulate input |
| + vpaddq $T0,$H0,$H0 |
| + vpaddq $T1,$H1,$H1 |
| + |
| + vpxorq $D0lo,$D0lo,$D0lo |
| + vpmadd52luq $H2,$S1,$D0lo |
| + vpxorq $D0hi,$D0hi,$D0hi |
| + vpmadd52huq $H2,$S1,$D0hi |
| + vpxorq $D1lo,$D1lo,$D1lo |
| + vpmadd52luq $H2,$S2,$D1lo |
| + vpxorq $D1hi,$D1hi,$D1hi |
| + vpmadd52huq $H2,$S2,$D1hi |
| + vpxorq $D2lo,$D2lo,$D2lo |
| + vpmadd52luq $H2,$R0,$D2lo |
| + vpxorq $D2hi,$D2hi,$D2hi |
| + vpmadd52huq $H2,$R0,$D2hi |
| + |
| + vmovdqu64 16*0($inp),$T2 # load data |
| + vmovdqu64 16*2($inp),$T3 |
| + lea 16*4($inp),$inp |
| + vpmadd52luq $H0,$R0,$D0lo |
| + vpmadd52huq $H0,$R0,$D0hi |
| + vpmadd52luq $H0,$R1,$D1lo |
| + vpmadd52huq $H0,$R1,$D1hi |
| + vpmadd52luq $H0,$R2,$D2lo |
| + vpmadd52huq $H0,$R2,$D2hi |
| + |
| + vpunpcklqdq $T3,$T2,$T1 # transpose data |
| + vpunpckhqdq $T3,$T2,$T3 |
| + vpmadd52luq $H1,$S2,$D0lo |
| + vpmadd52huq $H1,$S2,$D0hi |
| + vpmadd52luq $H1,$R0,$D1lo |
| + vpmadd52huq $H1,$R0,$D1hi |
| + vpmadd52luq $H1,$R1,$D2lo |
| + vpmadd52huq $H1,$R1,$D2hi |
| + |
| + ################################################################ |
| + # partial reduction (interleaved with data splat) |
| + vpsrlq \$44,$D0lo,$tmp |
| + vpsllq \$8,$D0hi,$D0hi |
| + vpandq $mask44,$D0lo,$H0 |
| + vpaddq $tmp,$D0hi,$D0hi |
| + |
| + vpsrlq \$24,$T3,$T2 |
| + vporq $PAD,$T2,$T2 |
| + vpaddq $D0hi,$D1lo,$D1lo |
| + |
| + vpsrlq \$44,$D1lo,$tmp |
| + vpsllq \$8,$D1hi,$D1hi |
| + vpandq $mask44,$D1lo,$H1 |
| + vpaddq $tmp,$D1hi,$D1hi |
| + |
| + vpandq $mask44,$T1,$T0 |
| + vpsrlq \$44,$T1,$T1 |
| + vpsllq \$20,$T3,$T3 |
| + vpaddq $D1hi,$D2lo,$D2lo |
| + |
| + vpsrlq \$42,$D2lo,$tmp |
| + vpsllq \$10,$D2hi,$D2hi |
| + vpandq $mask42,$D2lo,$H2 |
| + vpaddq $tmp,$D2hi,$D2hi |
| + |
| + vpaddq $T2,$H2,$H2 # accumulate input |
| + vpaddq $D2hi,$H0,$H0 |
| + vpsllq \$2,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + vporq $T3,$T1,$T1 |
| + vpandq $mask44,$T1,$T1 |
| + |
| + vpsrlq \$44,$H0,$tmp # additional step |
| + vpandq $mask44,$H0,$H0 |
| + |
| + vpaddq $tmp,$H1,$H1 |
| + |
| + sub \$4,$len # len-=64 |
| + jnz .Loop_vpmadd52_4x |
| + |
| +.Ltail_vpmadd52_4x: |
| + vmovdqu64 128($ctx),$R2 # load all key powers |
| + vmovdqu64 160($ctx),$S1 |
| + vmovdqu64 64($ctx),$R0 |
| + vmovdqu64 96($ctx),$R1 |
| + |
| +.Ltail_vpmadd52_2x: |
| + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 |
| + vpaddq $R2,$S2,$S2 |
| + vpsllq \$2,$S2,$S2 |
| + |
| + #vpaddq $T2,$H2,$H2 # accumulate input |
| + vpaddq $T0,$H0,$H0 |
| + vpaddq $T1,$H1,$H1 |
| + |
| + vpxorq $D0lo,$D0lo,$D0lo |
| + vpmadd52luq $H2,$S1,$D0lo |
| + vpxorq $D0hi,$D0hi,$D0hi |
| + vpmadd52huq $H2,$S1,$D0hi |
| + vpxorq $D1lo,$D1lo,$D1lo |
| + vpmadd52luq $H2,$S2,$D1lo |
| + vpxorq $D1hi,$D1hi,$D1hi |
| + vpmadd52huq $H2,$S2,$D1hi |
| + vpxorq $D2lo,$D2lo,$D2lo |
| + vpmadd52luq $H2,$R0,$D2lo |
| + vpxorq $D2hi,$D2hi,$D2hi |
| + vpmadd52huq $H2,$R0,$D2hi |
| + |
| + vpmadd52luq $H0,$R0,$D0lo |
| + vpmadd52huq $H0,$R0,$D0hi |
| + vpmadd52luq $H0,$R1,$D1lo |
| + vpmadd52huq $H0,$R1,$D1hi |
| + vpmadd52luq $H0,$R2,$D2lo |
| + vpmadd52huq $H0,$R2,$D2hi |
| + |
| + vpmadd52luq $H1,$S2,$D0lo |
| + vpmadd52huq $H1,$S2,$D0hi |
| + vpmadd52luq $H1,$R0,$D1lo |
| + vpmadd52huq $H1,$R0,$D1hi |
| + vpmadd52luq $H1,$R1,$D2lo |
| + vpmadd52huq $H1,$R1,$D2hi |
| + |
| + ################################################################ |
| + # horizontal addition |
| + |
| + mov \$1,%eax |
| + kmovw %eax,%k1 |
| + vpsrldq \$8,$D0lo,$T0 |
| + vpsrldq \$8,$D0hi,$H0 |
| + vpsrldq \$8,$D1lo,$T1 |
| + vpsrldq \$8,$D1hi,$H1 |
| + vpaddq $T0,$D0lo,$D0lo |
| + vpaddq $H0,$D0hi,$D0hi |
| + vpsrldq \$8,$D2lo,$T2 |
| + vpsrldq \$8,$D2hi,$H2 |
| + vpaddq $T1,$D1lo,$D1lo |
| + vpaddq $H1,$D1hi,$D1hi |
| + vpermq \$0x2,$D0lo,$T0 |
| + vpermq \$0x2,$D0hi,$H0 |
| + vpaddq $T2,$D2lo,$D2lo |
| + vpaddq $H2,$D2hi,$D2hi |
| + |
| + vpermq \$0x2,$D1lo,$T1 |
| + vpermq \$0x2,$D1hi,$H1 |
| + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} |
| + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} |
| + vpermq \$0x2,$D2lo,$T2 |
| + vpermq \$0x2,$D2hi,$H2 |
| + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} |
| + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} |
| + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} |
| + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} |
| + |
| + ################################################################ |
| + # partial reduction |
| + vpsrlq \$44,$D0lo,$tmp |
| + vpsllq \$8,$D0hi,$D0hi |
| + vpandq $mask44,$D0lo,$H0 |
| + vpaddq $tmp,$D0hi,$D0hi |
| + |
| + vpaddq $D0hi,$D1lo,$D1lo |
| + |
| + vpsrlq \$44,$D1lo,$tmp |
| + vpsllq \$8,$D1hi,$D1hi |
| + vpandq $mask44,$D1lo,$H1 |
| + vpaddq $tmp,$D1hi,$D1hi |
| + |
| + vpaddq $D1hi,$D2lo,$D2lo |
| + |
| + vpsrlq \$42,$D2lo,$tmp |
| + vpsllq \$10,$D2hi,$D2hi |
| + vpandq $mask42,$D2lo,$H2 |
| + vpaddq $tmp,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + vpsllq \$2,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + |
| + vpsrlq \$44,$H0,$tmp # additional step |
| + vpandq $mask44,$H0,$H0 |
| + |
| + vpaddq $tmp,$H1,$H1 |
| + # at this point $len is |
| + # either 4*n+2 or 0... |
| + sub \$2,$len # len-=32 |
| + ja .Lblocks_vpmadd52_4x_do |
| + |
| + vmovq %x#$H0,0($ctx) |
| + vmovq %x#$H1,8($ctx) |
| + vmovq %x#$H2,16($ctx) |
| + vzeroall |
| + |
| +.Lno_data_vpmadd52_4x: |
| + ret |
| +.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x |
| +___ |
| +} |
| +{ |
| +######################################################################## |
| +# As implied by its name 8x subroutine processes 8 blocks in parallel... |
| +# This is intermediate version, as it's used only in cases when input |
| +# length is either 8*n, 8*n+1 or 8*n+2... |
| + |
| +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); |
| +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); |
| +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); |
| +my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10)); |
| + |
| +$code.=<<___; |
| +.type poly1305_blocks_vpmadd52_8x,\@function,4 |
| +.align 32 |
| +poly1305_blocks_vpmadd52_8x: |
| + shr \$4,$len |
| + jz .Lno_data_vpmadd52_8x # too short |
| + |
| + shl \$40,$padbit |
| + mov 64($ctx),%r8 # peek on power of the key |
| + |
| + vmovdqa64 .Lx_mask44(%rip),$mask44 |
| + vmovdqa64 .Lx_mask42(%rip),$mask42 |
| + |
| + test %r8,%r8 # is power value impossible? |
| + js .Linit_vpmadd52 # if it is, then init R[4] |
| + |
| + vmovq 0($ctx),%x#$H0 # load current hash value |
| + vmovq 8($ctx),%x#$H1 |
| + vmovq 16($ctx),%x#$H2 |
| + |
| +.Lblocks_vpmadd52_8x: |
| + ################################################################ |
| + # fist we calculate more key powers |
| + |
| + vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers |
| + vmovdqu64 160($ctx),$S1 |
| + vmovdqu64 64($ctx),$R0 |
| + vmovdqu64 96($ctx),$R1 |
| + |
| + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 |
| + vpaddq $R2,$S2,$S2 |
| + vpsllq \$2,$S2,$S2 |
| + |
| + vpbroadcastq %x#$R2,$RR2 # broadcast 4th power |
| + vpbroadcastq %x#$R0,$RR0 |
| + vpbroadcastq %x#$R1,$RR1 |
| + |
| + vpxorq $D0lo,$D0lo,$D0lo |
| + vpmadd52luq $RR2,$S1,$D0lo |
| + vpxorq $D0hi,$D0hi,$D0hi |
| + vpmadd52huq $RR2,$S1,$D0hi |
| + vpxorq $D1lo,$D1lo,$D1lo |
| + vpmadd52luq $RR2,$S2,$D1lo |
| + vpxorq $D1hi,$D1hi,$D1hi |
| + vpmadd52huq $RR2,$S2,$D1hi |
| + vpxorq $D2lo,$D2lo,$D2lo |
| + vpmadd52luq $RR2,$R0,$D2lo |
| + vpxorq $D2hi,$D2hi,$D2hi |
| + vpmadd52huq $RR2,$R0,$D2hi |
| + |
| + vpmadd52luq $RR0,$R0,$D0lo |
| + vpmadd52huq $RR0,$R0,$D0hi |
| + vpmadd52luq $RR0,$R1,$D1lo |
| + vpmadd52huq $RR0,$R1,$D1hi |
| + vpmadd52luq $RR0,$R2,$D2lo |
| + vpmadd52huq $RR0,$R2,$D2hi |
| + |
| + vpmadd52luq $RR1,$S2,$D0lo |
| + vpmadd52huq $RR1,$S2,$D0hi |
| + vpmadd52luq $RR1,$R0,$D1lo |
| + vpmadd52huq $RR1,$R0,$D1hi |
| + vpmadd52luq $RR1,$R1,$D2lo |
| + vpmadd52huq $RR1,$R1,$D2hi |
| + |
| + ################################################################ |
| + # partial reduction |
| + vpsrlq \$44,$D0lo,$tmp |
| + vpsllq \$8,$D0hi,$D0hi |
| + vpandq $mask44,$D0lo,$RR0 |
| + vpaddq $tmp,$D0hi,$D0hi |
| + |
| + vpaddq $D0hi,$D1lo,$D1lo |
| + |
| + vpsrlq \$44,$D1lo,$tmp |
| + vpsllq \$8,$D1hi,$D1hi |
| + vpandq $mask44,$D1lo,$RR1 |
| + vpaddq $tmp,$D1hi,$D1hi |
| + |
| + vpaddq $D1hi,$D2lo,$D2lo |
| + |
| + vpsrlq \$42,$D2lo,$tmp |
| + vpsllq \$10,$D2hi,$D2hi |
| + vpandq $mask42,$D2lo,$RR2 |
| + vpaddq $tmp,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$RR0,$RR0 |
| + vpsllq \$2,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$RR0,$RR0 |
| + |
| + vpsrlq \$44,$RR0,$tmp # additional step |
| + vpandq $mask44,$RR0,$RR0 |
| + |
| + vpaddq $tmp,$RR1,$RR1 |
| + |
| + ################################################################ |
| + # At this point Rx holds 1324 powers, RRx - 5768, and the goal |
| + # is 15263748, which reflects how data is loaded... |
| + |
| + vpunpcklqdq $R2,$RR2,$T2 # 3748 |
| + vpunpckhqdq $R2,$RR2,$R2 # 1526 |
| + vpunpcklqdq $R0,$RR0,$T0 |
| + vpunpckhqdq $R0,$RR0,$R0 |
| + vpunpcklqdq $R1,$RR1,$T1 |
| + vpunpckhqdq $R1,$RR1,$R1 |
| +___ |
| +######## switch to %zmm |
| +map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); |
| +map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); |
| +map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); |
| +map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2); |
| + |
| +$code.=<<___; |
| + vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748 |
| + vshufi64x2 \$0x44,$R0,$T0,$RR0 |
| + vshufi64x2 \$0x44,$R1,$T1,$RR1 |
| + |
| + vmovdqu64 16*0($inp),$T2 # load data |
| + vmovdqu64 16*4($inp),$T3 |
| + lea 16*8($inp),$inp |
| + |
| + vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4 |
| + vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4 |
| + vpaddq $RR2,$SS2,$SS2 |
| + vpaddq $RR1,$SS1,$SS1 |
| + vpsllq \$2,$SS2,$SS2 |
| + vpsllq \$2,$SS1,$SS1 |
| + |
| + vpbroadcastq $padbit,$PAD |
| + vpbroadcastq %x#$mask44,$mask44 |
| + vpbroadcastq %x#$mask42,$mask42 |
| + |
| + vpbroadcastq %x#$SS1,$S1 # broadcast 8th power |
| + vpbroadcastq %x#$SS2,$S2 |
| + vpbroadcastq %x#$RR0,$R0 |
| + vpbroadcastq %x#$RR1,$R1 |
| + vpbroadcastq %x#$RR2,$R2 |
| + |
| + vpunpcklqdq $T3,$T2,$T1 # transpose data |
| + vpunpckhqdq $T3,$T2,$T3 |
| + |
| + # at this point 64-bit lanes are ordered as 73625140 |
| + |
| + vpsrlq \$24,$T3,$T2 # splat the data |
| + vporq $PAD,$T2,$T2 |
| + vpaddq $T2,$H2,$H2 # accumulate input |
| + vpandq $mask44,$T1,$T0 |
| + vpsrlq \$44,$T1,$T1 |
| + vpsllq \$20,$T3,$T3 |
| + vporq $T3,$T1,$T1 |
| + vpandq $mask44,$T1,$T1 |
| + |
| + sub \$8,$len |
| + jz .Ltail_vpmadd52_8x |
| + jmp .Loop_vpmadd52_8x |
| + |
| +.align 32 |
| +.Loop_vpmadd52_8x: |
| + #vpaddq $T2,$H2,$H2 # accumulate input |
| + vpaddq $T0,$H0,$H0 |
| + vpaddq $T1,$H1,$H1 |
| + |
| + vpxorq $D0lo,$D0lo,$D0lo |
| + vpmadd52luq $H2,$S1,$D0lo |
| + vpxorq $D0hi,$D0hi,$D0hi |
| + vpmadd52huq $H2,$S1,$D0hi |
| + vpxorq $D1lo,$D1lo,$D1lo |
| + vpmadd52luq $H2,$S2,$D1lo |
| + vpxorq $D1hi,$D1hi,$D1hi |
| + vpmadd52huq $H2,$S2,$D1hi |
| + vpxorq $D2lo,$D2lo,$D2lo |
| + vpmadd52luq $H2,$R0,$D2lo |
| + vpxorq $D2hi,$D2hi,$D2hi |
| + vpmadd52huq $H2,$R0,$D2hi |
| + |
| + vmovdqu64 16*0($inp),$T2 # load data |
| + vmovdqu64 16*4($inp),$T3 |
| + lea 16*8($inp),$inp |
| + vpmadd52luq $H0,$R0,$D0lo |
| + vpmadd52huq $H0,$R0,$D0hi |
| + vpmadd52luq $H0,$R1,$D1lo |
| + vpmadd52huq $H0,$R1,$D1hi |
| + vpmadd52luq $H0,$R2,$D2lo |
| + vpmadd52huq $H0,$R2,$D2hi |
| + |
| + vpunpcklqdq $T3,$T2,$T1 # transpose data |
| + vpunpckhqdq $T3,$T2,$T3 |
| + vpmadd52luq $H1,$S2,$D0lo |
| + vpmadd52huq $H1,$S2,$D0hi |
| + vpmadd52luq $H1,$R0,$D1lo |
| + vpmadd52huq $H1,$R0,$D1hi |
| + vpmadd52luq $H1,$R1,$D2lo |
| + vpmadd52huq $H1,$R1,$D2hi |
| + |
| + ################################################################ |
| + # partial reduction (interleaved with data splat) |
| + vpsrlq \$44,$D0lo,$tmp |
| + vpsllq \$8,$D0hi,$D0hi |
| + vpandq $mask44,$D0lo,$H0 |
| + vpaddq $tmp,$D0hi,$D0hi |
| + |
| + vpsrlq \$24,$T3,$T2 |
| + vporq $PAD,$T2,$T2 |
| + vpaddq $D0hi,$D1lo,$D1lo |
| + |
| + vpsrlq \$44,$D1lo,$tmp |
| + vpsllq \$8,$D1hi,$D1hi |
| + vpandq $mask44,$D1lo,$H1 |
| + vpaddq $tmp,$D1hi,$D1hi |
| + |
| + vpandq $mask44,$T1,$T0 |
| + vpsrlq \$44,$T1,$T1 |
| + vpsllq \$20,$T3,$T3 |
| + vpaddq $D1hi,$D2lo,$D2lo |
| + |
| + vpsrlq \$42,$D2lo,$tmp |
| + vpsllq \$10,$D2hi,$D2hi |
| + vpandq $mask42,$D2lo,$H2 |
| + vpaddq $tmp,$D2hi,$D2hi |
| + |
| + vpaddq $T2,$H2,$H2 # accumulate input |
| + vpaddq $D2hi,$H0,$H0 |
| + vpsllq \$2,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + vporq $T3,$T1,$T1 |
| + vpandq $mask44,$T1,$T1 |
| + |
| + vpsrlq \$44,$H0,$tmp # additional step |
| + vpandq $mask44,$H0,$H0 |
| + |
| + vpaddq $tmp,$H1,$H1 |
| + |
| + sub \$8,$len # len-=128 |
| + jnz .Loop_vpmadd52_8x |
| + |
| +.Ltail_vpmadd52_8x: |
| + #vpaddq $T2,$H2,$H2 # accumulate input |
| + vpaddq $T0,$H0,$H0 |
| + vpaddq $T1,$H1,$H1 |
| + |
| + vpxorq $D0lo,$D0lo,$D0lo |
| + vpmadd52luq $H2,$SS1,$D0lo |
| + vpxorq $D0hi,$D0hi,$D0hi |
| + vpmadd52huq $H2,$SS1,$D0hi |
| + vpxorq $D1lo,$D1lo,$D1lo |
| + vpmadd52luq $H2,$SS2,$D1lo |
| + vpxorq $D1hi,$D1hi,$D1hi |
| + vpmadd52huq $H2,$SS2,$D1hi |
| + vpxorq $D2lo,$D2lo,$D2lo |
| + vpmadd52luq $H2,$RR0,$D2lo |
| + vpxorq $D2hi,$D2hi,$D2hi |
| + vpmadd52huq $H2,$RR0,$D2hi |
| + |
| + vpmadd52luq $H0,$RR0,$D0lo |
| + vpmadd52huq $H0,$RR0,$D0hi |
| + vpmadd52luq $H0,$RR1,$D1lo |
| + vpmadd52huq $H0,$RR1,$D1hi |
| + vpmadd52luq $H0,$RR2,$D2lo |
| + vpmadd52huq $H0,$RR2,$D2hi |
| + |
| + vpmadd52luq $H1,$SS2,$D0lo |
| + vpmadd52huq $H1,$SS2,$D0hi |
| + vpmadd52luq $H1,$RR0,$D1lo |
| + vpmadd52huq $H1,$RR0,$D1hi |
| + vpmadd52luq $H1,$RR1,$D2lo |
| + vpmadd52huq $H1,$RR1,$D2hi |
| + |
| + ################################################################ |
| + # horizontal addition |
| + |
| + mov \$1,%eax |
| + kmovw %eax,%k1 |
| + vpsrldq \$8,$D0lo,$T0 |
| + vpsrldq \$8,$D0hi,$H0 |
| + vpsrldq \$8,$D1lo,$T1 |
| + vpsrldq \$8,$D1hi,$H1 |
| + vpaddq $T0,$D0lo,$D0lo |
| + vpaddq $H0,$D0hi,$D0hi |
| + vpsrldq \$8,$D2lo,$T2 |
| + vpsrldq \$8,$D2hi,$H2 |
| + vpaddq $T1,$D1lo,$D1lo |
| + vpaddq $H1,$D1hi,$D1hi |
| + vpermq \$0x2,$D0lo,$T0 |
| + vpermq \$0x2,$D0hi,$H0 |
| + vpaddq $T2,$D2lo,$D2lo |
| + vpaddq $H2,$D2hi,$D2hi |
| + |
| + vpermq \$0x2,$D1lo,$T1 |
| + vpermq \$0x2,$D1hi,$H1 |
| + vpaddq $T0,$D0lo,$D0lo |
| + vpaddq $H0,$D0hi,$D0hi |
| + vpermq \$0x2,$D2lo,$T2 |
| + vpermq \$0x2,$D2hi,$H2 |
| + vpaddq $T1,$D1lo,$D1lo |
| + vpaddq $H1,$D1hi,$D1hi |
| + vextracti64x4 \$1,$D0lo,%y#$T0 |
| + vextracti64x4 \$1,$D0hi,%y#$H0 |
| + vpaddq $T2,$D2lo,$D2lo |
| + vpaddq $H2,$D2hi,$D2hi |
| + |
| + vextracti64x4 \$1,$D1lo,%y#$T1 |
| + vextracti64x4 \$1,$D1hi,%y#$H1 |
| + vextracti64x4 \$1,$D2lo,%y#$T2 |
| + vextracti64x4 \$1,$D2hi,%y#$H2 |
| +___ |
| +######## switch back to %ymm |
| +map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); |
| +map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); |
| +map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); |
| + |
| +$code.=<<___; |
| + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} |
| + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} |
| + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} |
| + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} |
| + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} |
| + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} |
| + |
| + ################################################################ |
| + # partial reduction |
| + vpsrlq \$44,$D0lo,$tmp |
| + vpsllq \$8,$D0hi,$D0hi |
| + vpandq $mask44,$D0lo,$H0 |
| + vpaddq $tmp,$D0hi,$D0hi |
| + |
| + vpaddq $D0hi,$D1lo,$D1lo |
| + |
| + vpsrlq \$44,$D1lo,$tmp |
| + vpsllq \$8,$D1hi,$D1hi |
| + vpandq $mask44,$D1lo,$H1 |
| + vpaddq $tmp,$D1hi,$D1hi |
| + |
| + vpaddq $D1hi,$D2lo,$D2lo |
| + |
| + vpsrlq \$42,$D2lo,$tmp |
| + vpsllq \$10,$D2hi,$D2hi |
| + vpandq $mask42,$D2lo,$H2 |
| + vpaddq $tmp,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + vpsllq \$2,$D2hi,$D2hi |
| + |
| + vpaddq $D2hi,$H0,$H0 |
| + |
| + vpsrlq \$44,$H0,$tmp # additional step |
| + vpandq $mask44,$H0,$H0 |
| + |
| + vpaddq $tmp,$H1,$H1 |
| + |
| + ################################################################ |
| + |
| + vmovq %x#$H0,0($ctx) |
| + vmovq %x#$H1,8($ctx) |
| + vmovq %x#$H2,16($ctx) |
| + vzeroall |
| + |
| +.Lno_data_vpmadd52_8x: |
| + ret |
| +.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x |
| +___ |
| +} |
| +$code.=<<___; |
| +.type poly1305_emit_base2_44,\@function,3 |
| +.align 32 |
| +poly1305_emit_base2_44: |
| + mov 0($ctx),%r8 # load hash value |
| + mov 8($ctx),%r9 |
| + mov 16($ctx),%r10 |
| + |
| + mov %r9,%rax |
| + shr \$20,%r9 |
| + shl \$44,%rax |
| + mov %r10,%rcx |
| + shr \$40,%r10 |
| + shl \$24,%rcx |
| + |
| + add %rax,%r8 |
| + adc %rcx,%r9 |
| + adc \$0,%r10 |
| + |
| + mov %r8,%rax |
| + add \$5,%r8 # compare to modulus |
| + mov %r9,%rcx |
| + adc \$0,%r9 |
| + adc \$0,%r10 |
| + shr \$2,%r10 # did 130-bit value overflow? |
| + cmovnz %r8,%rax |
| + cmovnz %r9,%rcx |
| + |
| + add 0($nonce),%rax # accumulate nonce |
| + adc 8($nonce),%rcx |
| + mov %rax,0($mac) # write result |
| + mov %rcx,8($mac) |
| + |
| + ret |
| +.size poly1305_emit_base2_44,.-poly1305_emit_base2_44 |
| +___ |
| +} } } |
| +$code.=<<___; |
| +.align 64 |
| +.Lconst: |
| +.Lmask24: |
| +.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 |
| +.L129: |
| +.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 |
| +.Lmask26: |
| +.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 |
| +.Lpermd_avx2: |
| +.long 2,2,2,3,2,0,2,1 |
| +.Lpermd_avx512: |
| +.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 |
| + |
| +.L2_44_inp_permd: |
| +.long 0,1,1,2,2,3,7,7 |
| +.L2_44_inp_shift: |
| +.quad 0,12,24,64 |
| +.L2_44_mask: |
| +.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff |
| +.L2_44_shift_rgt: |
| +.quad 44,44,42,64 |
| +.L2_44_shift_lft: |
| +.quad 8,8,10,64 |
| + |
| +.align 64 |
| +.Lx_mask44: |
| +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff |
| +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff |
| +.Lx_mask42: |
| +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff |
| +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff |
| +___ |
| +} |
| +$code.=<<___; |
| +.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" |
| +.align 16 |
| +___ |
| + |
| +{ # chacha20-poly1305 helpers |
| +my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order |
| + ("%rdi","%rsi","%rdx","%rcx"); # Unix order |
| +$code.=<<___; |
| +.globl xor128_encrypt_n_pad |
| +.type xor128_encrypt_n_pad,\@abi-omnipotent |
| +.align 16 |
| +xor128_encrypt_n_pad: |
| + sub $otp,$inp |
| + sub $otp,$out |
| + mov $len,%r10 # put len aside |
| + shr \$4,$len # len / 16 |
| + jz .Ltail_enc |
| + nop |
| +.Loop_enc_xmm: |
| + movdqu ($inp,$otp),%xmm0 |
| + pxor ($otp),%xmm0 |
| + movdqu %xmm0,($out,$otp) |
| + movdqa %xmm0,($otp) |
| + lea 16($otp),$otp |
| + dec $len |
| + jnz .Loop_enc_xmm |
| + |
| + and \$15,%r10 # len % 16 |
| + jz .Ldone_enc |
| + |
| +.Ltail_enc: |
| + mov \$16,$len |
| + sub %r10,$len |
| + xor %eax,%eax |
| +.Loop_enc_byte: |
| + mov ($inp,$otp),%al |
| + xor ($otp),%al |
| + mov %al,($out,$otp) |
| + mov %al,($otp) |
| + lea 1($otp),$otp |
| + dec %r10 |
| + jnz .Loop_enc_byte |
| + |
| + xor %eax,%eax |
| +.Loop_enc_pad: |
| + mov %al,($otp) |
| + lea 1($otp),$otp |
| + dec $len |
| + jnz .Loop_enc_pad |
| + |
| +.Ldone_enc: |
| + mov $otp,%rax |
| + ret |
| +.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad |
| + |
| +.globl xor128_decrypt_n_pad |
| +.type xor128_decrypt_n_pad,\@abi-omnipotent |
| +.align 16 |
| +xor128_decrypt_n_pad: |
| + sub $otp,$inp |
| + sub $otp,$out |
| + mov $len,%r10 # put len aside |
| + shr \$4,$len # len / 16 |
| + jz .Ltail_dec |
| + nop |
| +.Loop_dec_xmm: |
| + movdqu ($inp,$otp),%xmm0 |
| + movdqa ($otp),%xmm1 |
| + pxor %xmm0,%xmm1 |
| + movdqu %xmm1,($out,$otp) |
| + movdqa %xmm0,($otp) |
| + lea 16($otp),$otp |
| + dec $len |
| + jnz .Loop_dec_xmm |
| + |
| + pxor %xmm1,%xmm1 |
| + and \$15,%r10 # len % 16 |
| + jz .Ldone_dec |
| + |
| +.Ltail_dec: |
| + mov \$16,$len |
| + sub %r10,$len |
| + xor %eax,%eax |
| + xor %r11,%r11 |
| +.Loop_dec_byte: |
| + mov ($inp,$otp),%r11b |
| + mov ($otp),%al |
| + xor %r11b,%al |
| + mov %al,($out,$otp) |
| + mov %r11b,($otp) |
| + lea 1($otp),$otp |
| + dec %r10 |
| + jnz .Loop_dec_byte |
| + |
| + xor %eax,%eax |
| +.Loop_dec_pad: |
| + mov %al,($otp) |
| + lea 1($otp),$otp |
| + dec $len |
| + jnz .Loop_dec_pad |
| + |
| +.Ldone_dec: |
| + mov $otp,%rax |
| + ret |
| +.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad |
| +___ |
| +} |
| + |
| +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, |
| +# CONTEXT *context,DISPATCHER_CONTEXT *disp) |
| +if ($win64) { |
| +$rec="%rcx"; |
| +$frame="%rdx"; |
| +$context="%r8"; |
| +$disp="%r9"; |
| + |
| +$code.=<<___; |
| +.extern __imp_RtlVirtualUnwind |
| +.type se_handler,\@abi-omnipotent |
| +.align 16 |
| +se_handler: |
| + push %rsi |
| + push %rdi |
| + push %rbx |
| + push %rbp |
| + push %r12 |
| + push %r13 |
| + push %r14 |
| + push %r15 |
| + pushfq |
| + sub \$64,%rsp |
| + |
| + mov 120($context),%rax # pull context->Rax |
| + mov 248($context),%rbx # pull context->Rip |
| + |
| + mov 8($disp),%rsi # disp->ImageBase |
| + mov 56($disp),%r11 # disp->HandlerData |
| + |
| + mov 0(%r11),%r10d # HandlerData[0] |
| + lea (%rsi,%r10),%r10 # prologue label |
| + cmp %r10,%rbx # context->Rip<.Lprologue |
| + jb .Lcommon_seh_tail |
| + |
| + mov 152($context),%rax # pull context->Rsp |
| + |
| + mov 4(%r11),%r10d # HandlerData[1] |
| + lea (%rsi,%r10),%r10 # epilogue label |
| + cmp %r10,%rbx # context->Rip>=.Lepilogue |
| + jae .Lcommon_seh_tail |
| + |
| + lea 48(%rax),%rax |
| + |
| + mov -8(%rax),%rbx |
| + mov -16(%rax),%rbp |
| + mov -24(%rax),%r12 |
| + mov -32(%rax),%r13 |
| + mov -40(%rax),%r14 |
| + mov -48(%rax),%r15 |
| + mov %rbx,144($context) # restore context->Rbx |
| + mov %rbp,160($context) # restore context->Rbp |
| + mov %r12,216($context) # restore context->R12 |
| + mov %r13,224($context) # restore context->R13 |
| + mov %r14,232($context) # restore context->R14 |
| + mov %r15,240($context) # restore context->R14 |
| + |
| + jmp .Lcommon_seh_tail |
| +.size se_handler,.-se_handler |
| + |
| +.type avx_handler,\@abi-omnipotent |
| +.align 16 |
| +avx_handler: |
| + push %rsi |
| + push %rdi |
| + push %rbx |
| + push %rbp |
| + push %r12 |
| + push %r13 |
| + push %r14 |
| + push %r15 |
| + pushfq |
| + sub \$64,%rsp |
| + |
| + mov 120($context),%rax # pull context->Rax |
| + mov 248($context),%rbx # pull context->Rip |
| + |
| + mov 8($disp),%rsi # disp->ImageBase |
| + mov 56($disp),%r11 # disp->HandlerData |
| + |
| + mov 0(%r11),%r10d # HandlerData[0] |
| + lea (%rsi,%r10),%r10 # prologue label |
| + cmp %r10,%rbx # context->Rip<prologue label |
| + jb .Lcommon_seh_tail |
| + |
| + mov 152($context),%rax # pull context->Rsp |
| + |
| + mov 4(%r11),%r10d # HandlerData[1] |
| + lea (%rsi,%r10),%r10 # epilogue label |
| + cmp %r10,%rbx # context->Rip>=epilogue label |
| + jae .Lcommon_seh_tail |
| + |
| + mov 208($context),%rax # pull context->R11 |
| + |
| + lea 0x50(%rax),%rsi |
| + lea 0xf8(%rax),%rax |
| + lea 512($context),%rdi # &context.Xmm6 |
| + mov \$20,%ecx |
| + .long 0xa548f3fc # cld; rep movsq |
| + |
| +.Lcommon_seh_tail: |
| + mov 8(%rax),%rdi |
| + mov 16(%rax),%rsi |
| + mov %rax,152($context) # restore context->Rsp |
| + mov %rsi,168($context) # restore context->Rsi |
| + mov %rdi,176($context) # restore context->Rdi |
| + |
| + mov 40($disp),%rdi # disp->ContextRecord |
| + mov $context,%rsi # context |
| + mov \$154,%ecx # sizeof(CONTEXT) |
| + .long 0xa548f3fc # cld; rep movsq |
| + |
| + mov $disp,%rsi |
| + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER |
| + mov 8(%rsi),%rdx # arg2, disp->ImageBase |
| + mov 0(%rsi),%r8 # arg3, disp->ControlPc |
| + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry |
| + mov 40(%rsi),%r10 # disp->ContextRecord |
| + lea 56(%rsi),%r11 # &disp->HandlerData |
| + lea 24(%rsi),%r12 # &disp->EstablisherFrame |
| + mov %r10,32(%rsp) # arg5 |
| + mov %r11,40(%rsp) # arg6 |
| + mov %r12,48(%rsp) # arg7 |
| + mov %rcx,56(%rsp) # arg8, (NULL) |
| + call *__imp_RtlVirtualUnwind(%rip) |
| + |
| + mov \$1,%eax # ExceptionContinueSearch |
| + add \$64,%rsp |
| + popfq |
| + pop %r15 |
| + pop %r14 |
| + pop %r13 |
| + pop %r12 |
| + pop %rbp |
| + pop %rbx |
| + pop %rdi |
| + pop %rsi |
| + ret |
| +.size avx_handler,.-avx_handler |
| + |
| +.section .pdata |
| +.align 4 |
| + .rva .LSEH_begin_poly1305_init |
| + .rva .LSEH_end_poly1305_init |
| + .rva .LSEH_info_poly1305_init |
| + |
| + .rva .LSEH_begin_poly1305_blocks |
| + .rva .LSEH_end_poly1305_blocks |
| + .rva .LSEH_info_poly1305_blocks |
| + |
| + .rva .LSEH_begin_poly1305_emit |
| + .rva .LSEH_end_poly1305_emit |
| + .rva .LSEH_info_poly1305_emit |
| +___ |
| +$code.=<<___ if ($avx); |
| + .rva .LSEH_begin_poly1305_blocks_avx |
| + .rva .Lbase2_64_avx |
| + .rva .LSEH_info_poly1305_blocks_avx_1 |
| + |
| + .rva .Lbase2_64_avx |
| + .rva .Leven_avx |
| + .rva .LSEH_info_poly1305_blocks_avx_2 |
| + |
| + .rva .Leven_avx |
| + .rva .LSEH_end_poly1305_blocks_avx |
| + .rva .LSEH_info_poly1305_blocks_avx_3 |
| + |
| + .rva .LSEH_begin_poly1305_emit_avx |
| + .rva .LSEH_end_poly1305_emit_avx |
| + .rva .LSEH_info_poly1305_emit_avx |
| +___ |
| +$code.=<<___ if ($avx>1); |
| + .rva .LSEH_begin_poly1305_blocks_avx2 |
| + .rva .Lbase2_64_avx2 |
| + .rva .LSEH_info_poly1305_blocks_avx2_1 |
| + |
| + .rva .Lbase2_64_avx2 |
| + .rva .Leven_avx2 |
| + .rva .LSEH_info_poly1305_blocks_avx2_2 |
| + |
| + .rva .Leven_avx2 |
| + .rva .LSEH_end_poly1305_blocks_avx2 |
| + .rva .LSEH_info_poly1305_blocks_avx2_3 |
| +___ |
| +$code.=<<___ if ($avx>2); |
| + .rva .LSEH_begin_poly1305_blocks_avx512 |
| + .rva .LSEH_end_poly1305_blocks_avx512 |
| + .rva .LSEH_info_poly1305_blocks_avx512 |
| +___ |
| +$code.=<<___; |
| +.section .xdata |
| +.align 8 |
| +.LSEH_info_poly1305_init: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init |
| + |
| +.LSEH_info_poly1305_blocks: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .Lblocks_body,.Lblocks_epilogue |
| + |
| +.LSEH_info_poly1305_emit: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit |
| +___ |
| +$code.=<<___ if ($avx); |
| +.LSEH_info_poly1305_blocks_avx_1: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[] |
| + |
| +.LSEH_info_poly1305_blocks_avx_2: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[] |
| + |
| +.LSEH_info_poly1305_blocks_avx_3: |
| + .byte 9,0,0,0 |
| + .rva avx_handler |
| + .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[] |
| + |
| +.LSEH_info_poly1305_emit_avx: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx |
| +___ |
| +$code.=<<___ if ($avx>1); |
| +.LSEH_info_poly1305_blocks_avx2_1: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[] |
| + |
| +.LSEH_info_poly1305_blocks_avx2_2: |
| + .byte 9,0,0,0 |
| + .rva se_handler |
| + .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[] |
| + |
| +.LSEH_info_poly1305_blocks_avx2_3: |
| + .byte 9,0,0,0 |
| + .rva avx_handler |
| + .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[] |
| +___ |
| +$code.=<<___ if ($avx>2); |
| +.LSEH_info_poly1305_blocks_avx512: |
| + .byte 9,0,0,0 |
| + .rva avx_handler |
| + .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[] |
| +___ |
| +} |
| + |
| +foreach (split('\n',$code)) { |
| + s/\`([^\`]*)\`/eval($1)/ge; |
| + s/%r([a-z]+)#d/%e$1/g; |
| + s/%r([0-9]+)#d/%r$1d/g; |
| + s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; |
| + |
| + print $_,"\n"; |
| +} |
| +close STDOUT; |
| -- |
| 2.18.2 |
| |
| |
| From cc9daa3df108633221ac63d297bb8cd45057d3eb Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 5 Jan 2020 22:40:48 -0500 |
| Subject: [PATCH 043/100] crypto: x86/poly1305 - wire up faster implementations |
| for kernel |
| |
| commit d7d7b853566254648df59f7ea27ea05952a6cfa8 upstream. |
| |
| These x86_64 vectorized implementations support AVX, AVX-2, and AVX512F. |
| The AVX-512F implementation is disabled on Skylake, due to throttling, |
| but it is quite fast on >= Cannonlake. |
| |
| On the left is cycle counts on a Core i7 6700HQ using the AVX-2 |
| codepath, comparing this implementation ("new") to the implementation in |
| the current crypto api ("old"). On the right are benchmarks on a Xeon |
| Gold 5120 using the AVX-512 codepath. The new implementation is faster |
| on all benchmarks. |
| |
| AVX-2 AVX-512 |
| --------- ----------- |
| |
| size old new size old new |
| ---- ---- ---- ---- ---- ---- |
| 0 70 68 0 74 70 |
| 16 92 90 16 96 92 |
| 32 134 104 32 136 106 |
| 48 172 120 48 184 124 |
| 64 218 136 64 218 138 |
| 80 254 158 80 260 160 |
| 96 298 174 96 300 176 |
| 112 342 192 112 342 194 |
| 128 388 212 128 384 212 |
| 144 428 228 144 420 226 |
| 160 466 246 160 464 248 |
| 176 510 264 176 504 264 |
| 192 550 282 192 544 282 |
| 208 594 302 208 582 300 |
| 224 628 316 224 624 318 |
| 240 676 334 240 662 338 |
| 256 716 354 256 708 358 |
| 272 764 374 272 748 372 |
| 288 802 352 288 788 358 |
| 304 420 366 304 422 370 |
| 320 428 360 320 432 364 |
| 336 484 378 336 486 380 |
| 352 426 384 352 434 390 |
| 368 478 400 368 480 408 |
| 384 488 394 384 490 398 |
| 400 542 408 400 542 412 |
| 416 486 416 416 492 426 |
| 432 534 430 432 538 436 |
| 448 544 422 448 546 432 |
| 464 600 438 464 600 448 |
| 480 540 448 480 548 456 |
| 496 594 464 496 594 476 |
| 512 602 456 512 606 470 |
| 528 656 476 528 656 480 |
| 544 600 480 544 606 498 |
| 560 650 494 560 652 512 |
| 576 664 490 576 662 508 |
| 592 714 508 592 716 522 |
| 608 656 514 608 664 538 |
| 624 708 532 624 710 552 |
| 640 716 524 640 720 516 |
| 656 770 536 656 772 526 |
| 672 716 548 672 722 544 |
| 688 770 562 688 768 556 |
| 704 774 552 704 778 556 |
| 720 826 568 720 832 568 |
| 736 768 574 736 780 584 |
| 752 822 592 752 826 600 |
| 768 830 584 768 836 560 |
| 784 884 602 784 888 572 |
| 800 828 610 800 838 588 |
| 816 884 628 816 884 604 |
| 832 888 618 832 894 598 |
| 848 942 632 848 946 612 |
| 864 884 644 864 896 628 |
| 880 936 660 880 942 644 |
| 896 948 652 896 952 608 |
| 912 1000 664 912 1004 616 |
| 928 942 676 928 954 634 |
| 944 994 690 944 1000 646 |
| 960 1002 680 960 1008 646 |
| 976 1054 694 976 1062 658 |
| 992 1002 706 992 1012 674 |
| 1008 1052 720 1008 1058 690 |
| |
| This commit wires in the prior implementation from Andy, and makes the |
| following changes to be suitable for kernel land. |
| |
| - Some cosmetic and structural changes, like renaming labels to |
| .Lname, constants, and other Linux conventions, as well as making |
| the code easy for us to maintain moving forward. |
| |
| - CPU feature checking is done in C by the glue code. |
| |
| - We avoid jumping into the middle of functions, to appease objtool, |
| and instead parameterize shared code. |
| |
| - We maintain frame pointers so that stack traces make sense. |
| |
| - We remove the dependency on the perl xlate code, which transforms |
| the output into things that assemblers we don't care about use. |
| |
| Importantly, none of our changes affect the arithmetic or core code, but |
| just involve the differing environment of kernel space. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Samuel Neves <sneves@dei.uc.pt> |
| Co-developed-by: Samuel Neves <sneves@dei.uc.pt> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/.gitignore | 1 + |
| arch/x86/crypto/Makefile | 11 +- |
| arch/x86/crypto/poly1305-avx2-x86_64.S | 390 ---------- |
| arch/x86/crypto/poly1305-sse2-x86_64.S | 590 --------------- |
| arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 682 ++++++++++-------- |
| arch/x86/crypto/poly1305_glue.c | 473 +++++------- |
| lib/crypto/Kconfig | 2 +- |
| 7 files changed, 572 insertions(+), 1577 deletions(-) |
| create mode 100644 arch/x86/crypto/.gitignore |
| delete mode 100644 arch/x86/crypto/poly1305-avx2-x86_64.S |
| delete mode 100644 arch/x86/crypto/poly1305-sse2-x86_64.S |
| |
| diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore |
| new file mode 100644 |
| index 000000000000..c406ea6571fa |
| |
| |
| @@ -0,0 +1 @@ |
| +poly1305-x86_64.S |
| diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile |
| index 958440eae27e..b69e00bf20b8 100644 |
| |
| |
| @@ -73,6 +73,10 @@ aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o |
| |
| nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o |
| blake2s-x86_64-y := blake2s-core.o blake2s-glue.o |
| +poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o |
| +ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),) |
| +targets += poly1305-x86_64-cryptogams.S |
| +endif |
| |
| ifeq ($(avx_supported),yes) |
| camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ |
| @@ -101,10 +105,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o |
| aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o |
| ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
| sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
| -poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o |
| ifeq ($(avx2_supported),yes) |
| sha1-ssse3-y += sha1_avx2_x86_64_asm.o |
| -poly1305-x86_64-y += poly1305-avx2-x86_64.o |
| endif |
| ifeq ($(sha1_ni_supported),yes) |
| sha1-ssse3-y += sha1_ni_asm.o |
| @@ -118,3 +120,8 @@ sha256-ssse3-y += sha256_ni_asm.o |
| endif |
| sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o |
| crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o |
| + |
| +quiet_cmd_perlasm = PERLASM $@ |
| + cmd_perlasm = $(PERL) $< > $@ |
| +$(obj)/%.S: $(src)/%.pl FORCE |
| + $(call if_changed,perlasm) |
| diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S |
| deleted file mode 100644 |
| index 1688fb551070..000000000000 |
| |
| |
| @@ -1,390 +0,0 @@ |
| -/* SPDX-License-Identifier: GPL-2.0-or-later */ |
| -/* |
| - * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions |
| - * |
| - * Copyright (C) 2015 Martin Willi |
| - */ |
| - |
| -#include <linux/linkage.h> |
| - |
| -.section .rodata.cst32.ANMASK, "aM", @progbits, 32 |
| -.align 32 |
| -ANMASK: .octa 0x0000000003ffffff0000000003ffffff |
| - .octa 0x0000000003ffffff0000000003ffffff |
| - |
| -.section .rodata.cst32.ORMASK, "aM", @progbits, 32 |
| -.align 32 |
| -ORMASK: .octa 0x00000000010000000000000001000000 |
| - .octa 0x00000000010000000000000001000000 |
| - |
| -.text |
| - |
| -#define h0 0x00(%rdi) |
| -#define h1 0x04(%rdi) |
| -#define h2 0x08(%rdi) |
| -#define h3 0x0c(%rdi) |
| -#define h4 0x10(%rdi) |
| -#define r0 0x00(%rdx) |
| -#define r1 0x04(%rdx) |
| -#define r2 0x08(%rdx) |
| -#define r3 0x0c(%rdx) |
| -#define r4 0x10(%rdx) |
| -#define u0 0x00(%r8) |
| -#define u1 0x04(%r8) |
| -#define u2 0x08(%r8) |
| -#define u3 0x0c(%r8) |
| -#define u4 0x10(%r8) |
| -#define w0 0x18(%r8) |
| -#define w1 0x1c(%r8) |
| -#define w2 0x20(%r8) |
| -#define w3 0x24(%r8) |
| -#define w4 0x28(%r8) |
| -#define y0 0x30(%r8) |
| -#define y1 0x34(%r8) |
| -#define y2 0x38(%r8) |
| -#define y3 0x3c(%r8) |
| -#define y4 0x40(%r8) |
| -#define m %rsi |
| -#define hc0 %ymm0 |
| -#define hc1 %ymm1 |
| -#define hc2 %ymm2 |
| -#define hc3 %ymm3 |
| -#define hc4 %ymm4 |
| -#define hc0x %xmm0 |
| -#define hc1x %xmm1 |
| -#define hc2x %xmm2 |
| -#define hc3x %xmm3 |
| -#define hc4x %xmm4 |
| -#define t1 %ymm5 |
| -#define t2 %ymm6 |
| -#define t1x %xmm5 |
| -#define t2x %xmm6 |
| -#define ruwy0 %ymm7 |
| -#define ruwy1 %ymm8 |
| -#define ruwy2 %ymm9 |
| -#define ruwy3 %ymm10 |
| -#define ruwy4 %ymm11 |
| -#define ruwy0x %xmm7 |
| -#define ruwy1x %xmm8 |
| -#define ruwy2x %xmm9 |
| -#define ruwy3x %xmm10 |
| -#define ruwy4x %xmm11 |
| -#define svxz1 %ymm12 |
| -#define svxz2 %ymm13 |
| -#define svxz3 %ymm14 |
| -#define svxz4 %ymm15 |
| -#define d0 %r9 |
| -#define d1 %r10 |
| -#define d2 %r11 |
| -#define d3 %r12 |
| -#define d4 %r13 |
| - |
| -ENTRY(poly1305_4block_avx2) |
| - # %rdi: Accumulator h[5] |
| - # %rsi: 64 byte input block m |
| - # %rdx: Poly1305 key r[5] |
| - # %rcx: Quadblock count |
| - # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5], |
| - |
| - # This four-block variant uses loop unrolled block processing. It |
| - # requires 4 Poly1305 keys: r, r^2, r^3 and r^4: |
| - # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r |
| - |
| - vzeroupper |
| - push %rbx |
| - push %r12 |
| - push %r13 |
| - |
| - # combine r0,u0,w0,y0 |
| - vmovd y0,ruwy0x |
| - vmovd w0,t1x |
| - vpunpcklqdq t1,ruwy0,ruwy0 |
| - vmovd u0,t1x |
| - vmovd r0,t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,ruwy0,ruwy0 |
| - |
| - # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5 |
| - vmovd y1,ruwy1x |
| - vmovd w1,t1x |
| - vpunpcklqdq t1,ruwy1,ruwy1 |
| - vmovd u1,t1x |
| - vmovd r1,t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,ruwy1,ruwy1 |
| - vpslld $2,ruwy1,svxz1 |
| - vpaddd ruwy1,svxz1,svxz1 |
| - |
| - # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5 |
| - vmovd y2,ruwy2x |
| - vmovd w2,t1x |
| - vpunpcklqdq t1,ruwy2,ruwy2 |
| - vmovd u2,t1x |
| - vmovd r2,t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,ruwy2,ruwy2 |
| - vpslld $2,ruwy2,svxz2 |
| - vpaddd ruwy2,svxz2,svxz2 |
| - |
| - # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5 |
| - vmovd y3,ruwy3x |
| - vmovd w3,t1x |
| - vpunpcklqdq t1,ruwy3,ruwy3 |
| - vmovd u3,t1x |
| - vmovd r3,t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,ruwy3,ruwy3 |
| - vpslld $2,ruwy3,svxz3 |
| - vpaddd ruwy3,svxz3,svxz3 |
| - |
| - # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5 |
| - vmovd y4,ruwy4x |
| - vmovd w4,t1x |
| - vpunpcklqdq t1,ruwy4,ruwy4 |
| - vmovd u4,t1x |
| - vmovd r4,t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,ruwy4,ruwy4 |
| - vpslld $2,ruwy4,svxz4 |
| - vpaddd ruwy4,svxz4,svxz4 |
| - |
| -.Ldoblock4: |
| - # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff, |
| - # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0] |
| - vmovd 0x00(m),hc0x |
| - vmovd 0x10(m),t1x |
| - vpunpcklqdq t1,hc0,hc0 |
| - vmovd 0x20(m),t1x |
| - vmovd 0x30(m),t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,hc0,hc0 |
| - vpand ANMASK(%rip),hc0,hc0 |
| - vmovd h0,t1x |
| - vpaddd t1,hc0,hc0 |
| - # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff, |
| - # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1] |
| - vmovd 0x03(m),hc1x |
| - vmovd 0x13(m),t1x |
| - vpunpcklqdq t1,hc1,hc1 |
| - vmovd 0x23(m),t1x |
| - vmovd 0x33(m),t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,hc1,hc1 |
| - vpsrld $2,hc1,hc1 |
| - vpand ANMASK(%rip),hc1,hc1 |
| - vmovd h1,t1x |
| - vpaddd t1,hc1,hc1 |
| - # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff, |
| - # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2] |
| - vmovd 0x06(m),hc2x |
| - vmovd 0x16(m),t1x |
| - vpunpcklqdq t1,hc2,hc2 |
| - vmovd 0x26(m),t1x |
| - vmovd 0x36(m),t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,hc2,hc2 |
| - vpsrld $4,hc2,hc2 |
| - vpand ANMASK(%rip),hc2,hc2 |
| - vmovd h2,t1x |
| - vpaddd t1,hc2,hc2 |
| - # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff, |
| - # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3] |
| - vmovd 0x09(m),hc3x |
| - vmovd 0x19(m),t1x |
| - vpunpcklqdq t1,hc3,hc3 |
| - vmovd 0x29(m),t1x |
| - vmovd 0x39(m),t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,hc3,hc3 |
| - vpsrld $6,hc3,hc3 |
| - vpand ANMASK(%rip),hc3,hc3 |
| - vmovd h3,t1x |
| - vpaddd t1,hc3,hc3 |
| - # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24), |
| - # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4] |
| - vmovd 0x0c(m),hc4x |
| - vmovd 0x1c(m),t1x |
| - vpunpcklqdq t1,hc4,hc4 |
| - vmovd 0x2c(m),t1x |
| - vmovd 0x3c(m),t2x |
| - vpunpcklqdq t2,t1,t1 |
| - vperm2i128 $0x20,t1,hc4,hc4 |
| - vpsrld $8,hc4,hc4 |
| - vpor ORMASK(%rip),hc4,hc4 |
| - vmovd h4,t1x |
| - vpaddd t1,hc4,hc4 |
| - |
| - # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ] |
| - vpmuludq hc0,ruwy0,t1 |
| - # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ] |
| - vpmuludq hc1,svxz4,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ] |
| - vpmuludq hc2,svxz3,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ] |
| - vpmuludq hc3,svxz2,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ] |
| - vpmuludq hc4,svxz1,t2 |
| - vpaddq t2,t1,t1 |
| - # d0 = t1[0] + t1[1] + t[2] + t[3] |
| - vpermq $0xee,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vpsrldq $8,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vmovq t1x,d0 |
| - |
| - # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ] |
| - vpmuludq hc0,ruwy1,t1 |
| - # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ] |
| - vpmuludq hc1,ruwy0,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ] |
| - vpmuludq hc2,svxz4,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ] |
| - vpmuludq hc3,svxz3,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ] |
| - vpmuludq hc4,svxz2,t2 |
| - vpaddq t2,t1,t1 |
| - # d1 = t1[0] + t1[1] + t1[3] + t1[4] |
| - vpermq $0xee,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vpsrldq $8,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vmovq t1x,d1 |
| - |
| - # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ] |
| - vpmuludq hc0,ruwy2,t1 |
| - # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ] |
| - vpmuludq hc1,ruwy1,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ] |
| - vpmuludq hc2,ruwy0,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ] |
| - vpmuludq hc3,svxz4,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ] |
| - vpmuludq hc4,svxz3,t2 |
| - vpaddq t2,t1,t1 |
| - # d2 = t1[0] + t1[1] + t1[2] + t1[3] |
| - vpermq $0xee,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vpsrldq $8,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vmovq t1x,d2 |
| - |
| - # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ] |
| - vpmuludq hc0,ruwy3,t1 |
| - # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ] |
| - vpmuludq hc1,ruwy2,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ] |
| - vpmuludq hc2,ruwy1,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ] |
| - vpmuludq hc3,ruwy0,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ] |
| - vpmuludq hc4,svxz4,t2 |
| - vpaddq t2,t1,t1 |
| - # d3 = t1[0] + t1[1] + t1[2] + t1[3] |
| - vpermq $0xee,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vpsrldq $8,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vmovq t1x,d3 |
| - |
| - # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ] |
| - vpmuludq hc0,ruwy4,t1 |
| - # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ] |
| - vpmuludq hc1,ruwy3,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ] |
| - vpmuludq hc2,ruwy2,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ] |
| - vpmuludq hc3,ruwy1,t2 |
| - vpaddq t2,t1,t1 |
| - # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ] |
| - vpmuludq hc4,ruwy0,t2 |
| - vpaddq t2,t1,t1 |
| - # d4 = t1[0] + t1[1] + t1[2] + t1[3] |
| - vpermq $0xee,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vpsrldq $8,t1,t2 |
| - vpaddq t2,t1,t1 |
| - vmovq t1x,d4 |
| - |
| - # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> |
| - # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small |
| - # amount. Careful: we must not assume the carry bits 'd0 >> 26', |
| - # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit |
| - # integers. It's true in a single-block implementation, but not here. |
| - |
| - # d1 += d0 >> 26 |
| - mov d0,%rax |
| - shr $26,%rax |
| - add %rax,d1 |
| - # h0 = d0 & 0x3ffffff |
| - mov d0,%rbx |
| - and $0x3ffffff,%ebx |
| - |
| - # d2 += d1 >> 26 |
| - mov d1,%rax |
| - shr $26,%rax |
| - add %rax,d2 |
| - # h1 = d1 & 0x3ffffff |
| - mov d1,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h1 |
| - |
| - # d3 += d2 >> 26 |
| - mov d2,%rax |
| - shr $26,%rax |
| - add %rax,d3 |
| - # h2 = d2 & 0x3ffffff |
| - mov d2,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h2 |
| - |
| - # d4 += d3 >> 26 |
| - mov d3,%rax |
| - shr $26,%rax |
| - add %rax,d4 |
| - # h3 = d3 & 0x3ffffff |
| - mov d3,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h3 |
| - |
| - # h0 += (d4 >> 26) * 5 |
| - mov d4,%rax |
| - shr $26,%rax |
| - lea (%rax,%rax,4),%rax |
| - add %rax,%rbx |
| - # h4 = d4 & 0x3ffffff |
| - mov d4,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h4 |
| - |
| - # h1 += h0 >> 26 |
| - mov %rbx,%rax |
| - shr $26,%rax |
| - add %eax,h1 |
| - # h0 = h0 & 0x3ffffff |
| - andl $0x3ffffff,%ebx |
| - mov %ebx,h0 |
| - |
| - add $0x40,m |
| - dec %rcx |
| - jnz .Ldoblock4 |
| - |
| - vzeroupper |
| - pop %r13 |
| - pop %r12 |
| - pop %rbx |
| - ret |
| -ENDPROC(poly1305_4block_avx2) |
| diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S |
| deleted file mode 100644 |
| index 5578f846e622..000000000000 |
| |
| |
| @@ -1,590 +0,0 @@ |
| -/* SPDX-License-Identifier: GPL-2.0-or-later */ |
| -/* |
| - * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions |
| - * |
| - * Copyright (C) 2015 Martin Willi |
| - */ |
| - |
| -#include <linux/linkage.h> |
| - |
| -.section .rodata.cst16.ANMASK, "aM", @progbits, 16 |
| -.align 16 |
| -ANMASK: .octa 0x0000000003ffffff0000000003ffffff |
| - |
| -.section .rodata.cst16.ORMASK, "aM", @progbits, 16 |
| -.align 16 |
| -ORMASK: .octa 0x00000000010000000000000001000000 |
| - |
| -.text |
| - |
| -#define h0 0x00(%rdi) |
| -#define h1 0x04(%rdi) |
| -#define h2 0x08(%rdi) |
| -#define h3 0x0c(%rdi) |
| -#define h4 0x10(%rdi) |
| -#define r0 0x00(%rdx) |
| -#define r1 0x04(%rdx) |
| -#define r2 0x08(%rdx) |
| -#define r3 0x0c(%rdx) |
| -#define r4 0x10(%rdx) |
| -#define s1 0x00(%rsp) |
| -#define s2 0x04(%rsp) |
| -#define s3 0x08(%rsp) |
| -#define s4 0x0c(%rsp) |
| -#define m %rsi |
| -#define h01 %xmm0 |
| -#define h23 %xmm1 |
| -#define h44 %xmm2 |
| -#define t1 %xmm3 |
| -#define t2 %xmm4 |
| -#define t3 %xmm5 |
| -#define t4 %xmm6 |
| -#define mask %xmm7 |
| -#define d0 %r8 |
| -#define d1 %r9 |
| -#define d2 %r10 |
| -#define d3 %r11 |
| -#define d4 %r12 |
| - |
| -ENTRY(poly1305_block_sse2) |
| - # %rdi: Accumulator h[5] |
| - # %rsi: 16 byte input block m |
| - # %rdx: Poly1305 key r[5] |
| - # %rcx: Block count |
| - |
| - # This single block variant tries to improve performance by doing two |
| - # multiplications in parallel using SSE instructions. There is quite |
| - # some quardword packing involved, hence the speedup is marginal. |
| - |
| - push %rbx |
| - push %r12 |
| - sub $0x10,%rsp |
| - |
| - # s1..s4 = r1..r4 * 5 |
| - mov r1,%eax |
| - lea (%eax,%eax,4),%eax |
| - mov %eax,s1 |
| - mov r2,%eax |
| - lea (%eax,%eax,4),%eax |
| - mov %eax,s2 |
| - mov r3,%eax |
| - lea (%eax,%eax,4),%eax |
| - mov %eax,s3 |
| - mov r4,%eax |
| - lea (%eax,%eax,4),%eax |
| - mov %eax,s4 |
| - |
| - movdqa ANMASK(%rip),mask |
| - |
| -.Ldoblock: |
| - # h01 = [0, h1, 0, h0] |
| - # h23 = [0, h3, 0, h2] |
| - # h44 = [0, h4, 0, h4] |
| - movd h0,h01 |
| - movd h1,t1 |
| - movd h2,h23 |
| - movd h3,t2 |
| - movd h4,h44 |
| - punpcklqdq t1,h01 |
| - punpcklqdq t2,h23 |
| - punpcklqdq h44,h44 |
| - |
| - # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ] |
| - movd 0x00(m),t1 |
| - movd 0x03(m),t2 |
| - psrld $2,t2 |
| - punpcklqdq t2,t1 |
| - pand mask,t1 |
| - paddd t1,h01 |
| - # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ] |
| - movd 0x06(m),t1 |
| - movd 0x09(m),t2 |
| - psrld $4,t1 |
| - psrld $6,t2 |
| - punpcklqdq t2,t1 |
| - pand mask,t1 |
| - paddd t1,h23 |
| - # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ] |
| - mov 0x0c(m),%eax |
| - shr $8,%eax |
| - or $0x01000000,%eax |
| - movd %eax,t1 |
| - pshufd $0xc4,t1,t1 |
| - paddd t1,h44 |
| - |
| - # t1[0] = h0 * r0 + h2 * s3 |
| - # t1[1] = h1 * s4 + h3 * s2 |
| - movd r0,t1 |
| - movd s4,t2 |
| - punpcklqdq t2,t1 |
| - pmuludq h01,t1 |
| - movd s3,t2 |
| - movd s2,t3 |
| - punpcklqdq t3,t2 |
| - pmuludq h23,t2 |
| - paddq t2,t1 |
| - # t2[0] = h0 * r1 + h2 * s4 |
| - # t2[1] = h1 * r0 + h3 * s3 |
| - movd r1,t2 |
| - movd r0,t3 |
| - punpcklqdq t3,t2 |
| - pmuludq h01,t2 |
| - movd s4,t3 |
| - movd s3,t4 |
| - punpcklqdq t4,t3 |
| - pmuludq h23,t3 |
| - paddq t3,t2 |
| - # t3[0] = h4 * s1 |
| - # t3[1] = h4 * s2 |
| - movd s1,t3 |
| - movd s2,t4 |
| - punpcklqdq t4,t3 |
| - pmuludq h44,t3 |
| - # d0 = t1[0] + t1[1] + t3[0] |
| - # d1 = t2[0] + t2[1] + t3[1] |
| - movdqa t1,t4 |
| - punpcklqdq t2,t4 |
| - punpckhqdq t2,t1 |
| - paddq t4,t1 |
| - paddq t3,t1 |
| - movq t1,d0 |
| - psrldq $8,t1 |
| - movq t1,d1 |
| - |
| - # t1[0] = h0 * r2 + h2 * r0 |
| - # t1[1] = h1 * r1 + h3 * s4 |
| - movd r2,t1 |
| - movd r1,t2 |
| - punpcklqdq t2,t1 |
| - pmuludq h01,t1 |
| - movd r0,t2 |
| - movd s4,t3 |
| - punpcklqdq t3,t2 |
| - pmuludq h23,t2 |
| - paddq t2,t1 |
| - # t2[0] = h0 * r3 + h2 * r1 |
| - # t2[1] = h1 * r2 + h3 * r0 |
| - movd r3,t2 |
| - movd r2,t3 |
| - punpcklqdq t3,t2 |
| - pmuludq h01,t2 |
| - movd r1,t3 |
| - movd r0,t4 |
| - punpcklqdq t4,t3 |
| - pmuludq h23,t3 |
| - paddq t3,t2 |
| - # t3[0] = h4 * s3 |
| - # t3[1] = h4 * s4 |
| - movd s3,t3 |
| - movd s4,t4 |
| - punpcklqdq t4,t3 |
| - pmuludq h44,t3 |
| - # d2 = t1[0] + t1[1] + t3[0] |
| - # d3 = t2[0] + t2[1] + t3[1] |
| - movdqa t1,t4 |
| - punpcklqdq t2,t4 |
| - punpckhqdq t2,t1 |
| - paddq t4,t1 |
| - paddq t3,t1 |
| - movq t1,d2 |
| - psrldq $8,t1 |
| - movq t1,d3 |
| - |
| - # t1[0] = h0 * r4 + h2 * r2 |
| - # t1[1] = h1 * r3 + h3 * r1 |
| - movd r4,t1 |
| - movd r3,t2 |
| - punpcklqdq t2,t1 |
| - pmuludq h01,t1 |
| - movd r2,t2 |
| - movd r1,t3 |
| - punpcklqdq t3,t2 |
| - pmuludq h23,t2 |
| - paddq t2,t1 |
| - # t3[0] = h4 * r0 |
| - movd r0,t3 |
| - pmuludq h44,t3 |
| - # d4 = t1[0] + t1[1] + t3[0] |
| - movdqa t1,t4 |
| - psrldq $8,t4 |
| - paddq t4,t1 |
| - paddq t3,t1 |
| - movq t1,d4 |
| - |
| - # d1 += d0 >> 26 |
| - mov d0,%rax |
| - shr $26,%rax |
| - add %rax,d1 |
| - # h0 = d0 & 0x3ffffff |
| - mov d0,%rbx |
| - and $0x3ffffff,%ebx |
| - |
| - # d2 += d1 >> 26 |
| - mov d1,%rax |
| - shr $26,%rax |
| - add %rax,d2 |
| - # h1 = d1 & 0x3ffffff |
| - mov d1,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h1 |
| - |
| - # d3 += d2 >> 26 |
| - mov d2,%rax |
| - shr $26,%rax |
| - add %rax,d3 |
| - # h2 = d2 & 0x3ffffff |
| - mov d2,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h2 |
| - |
| - # d4 += d3 >> 26 |
| - mov d3,%rax |
| - shr $26,%rax |
| - add %rax,d4 |
| - # h3 = d3 & 0x3ffffff |
| - mov d3,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h3 |
| - |
| - # h0 += (d4 >> 26) * 5 |
| - mov d4,%rax |
| - shr $26,%rax |
| - lea (%rax,%rax,4),%rax |
| - add %rax,%rbx |
| - # h4 = d4 & 0x3ffffff |
| - mov d4,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h4 |
| - |
| - # h1 += h0 >> 26 |
| - mov %rbx,%rax |
| - shr $26,%rax |
| - add %eax,h1 |
| - # h0 = h0 & 0x3ffffff |
| - andl $0x3ffffff,%ebx |
| - mov %ebx,h0 |
| - |
| - add $0x10,m |
| - dec %rcx |
| - jnz .Ldoblock |
| - |
| - # Zeroing of key material |
| - mov %rcx,0x00(%rsp) |
| - mov %rcx,0x08(%rsp) |
| - |
| - add $0x10,%rsp |
| - pop %r12 |
| - pop %rbx |
| - ret |
| -ENDPROC(poly1305_block_sse2) |
| - |
| - |
| -#define u0 0x00(%r8) |
| -#define u1 0x04(%r8) |
| -#define u2 0x08(%r8) |
| -#define u3 0x0c(%r8) |
| -#define u4 0x10(%r8) |
| -#define hc0 %xmm0 |
| -#define hc1 %xmm1 |
| -#define hc2 %xmm2 |
| -#define hc3 %xmm5 |
| -#define hc4 %xmm6 |
| -#define ru0 %xmm7 |
| -#define ru1 %xmm8 |
| -#define ru2 %xmm9 |
| -#define ru3 %xmm10 |
| -#define ru4 %xmm11 |
| -#define sv1 %xmm12 |
| -#define sv2 %xmm13 |
| -#define sv3 %xmm14 |
| -#define sv4 %xmm15 |
| -#undef d0 |
| -#define d0 %r13 |
| - |
| -ENTRY(poly1305_2block_sse2) |
| - # %rdi: Accumulator h[5] |
| - # %rsi: 16 byte input block m |
| - # %rdx: Poly1305 key r[5] |
| - # %rcx: Doubleblock count |
| - # %r8: Poly1305 derived key r^2 u[5] |
| - |
| - # This two-block variant further improves performance by using loop |
| - # unrolled block processing. This is more straight forward and does |
| - # less byte shuffling, but requires a second Poly1305 key r^2: |
| - # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r |
| - |
| - push %rbx |
| - push %r12 |
| - push %r13 |
| - |
| - # combine r0,u0 |
| - movd u0,ru0 |
| - movd r0,t1 |
| - punpcklqdq t1,ru0 |
| - |
| - # combine r1,u1 and s1=r1*5,v1=u1*5 |
| - movd u1,ru1 |
| - movd r1,t1 |
| - punpcklqdq t1,ru1 |
| - movdqa ru1,sv1 |
| - pslld $2,sv1 |
| - paddd ru1,sv1 |
| - |
| - # combine r2,u2 and s2=r2*5,v2=u2*5 |
| - movd u2,ru2 |
| - movd r2,t1 |
| - punpcklqdq t1,ru2 |
| - movdqa ru2,sv2 |
| - pslld $2,sv2 |
| - paddd ru2,sv2 |
| - |
| - # combine r3,u3 and s3=r3*5,v3=u3*5 |
| - movd u3,ru3 |
| - movd r3,t1 |
| - punpcklqdq t1,ru3 |
| - movdqa ru3,sv3 |
| - pslld $2,sv3 |
| - paddd ru3,sv3 |
| - |
| - # combine r4,u4 and s4=r4*5,v4=u4*5 |
| - movd u4,ru4 |
| - movd r4,t1 |
| - punpcklqdq t1,ru4 |
| - movdqa ru4,sv4 |
| - pslld $2,sv4 |
| - paddd ru4,sv4 |
| - |
| -.Ldoblock2: |
| - # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ] |
| - movd 0x00(m),hc0 |
| - movd 0x10(m),t1 |
| - punpcklqdq t1,hc0 |
| - pand ANMASK(%rip),hc0 |
| - movd h0,t1 |
| - paddd t1,hc0 |
| - # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ] |
| - movd 0x03(m),hc1 |
| - movd 0x13(m),t1 |
| - punpcklqdq t1,hc1 |
| - psrld $2,hc1 |
| - pand ANMASK(%rip),hc1 |
| - movd h1,t1 |
| - paddd t1,hc1 |
| - # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ] |
| - movd 0x06(m),hc2 |
| - movd 0x16(m),t1 |
| - punpcklqdq t1,hc2 |
| - psrld $4,hc2 |
| - pand ANMASK(%rip),hc2 |
| - movd h2,t1 |
| - paddd t1,hc2 |
| - # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ] |
| - movd 0x09(m),hc3 |
| - movd 0x19(m),t1 |
| - punpcklqdq t1,hc3 |
| - psrld $6,hc3 |
| - pand ANMASK(%rip),hc3 |
| - movd h3,t1 |
| - paddd t1,hc3 |
| - # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ] |
| - movd 0x0c(m),hc4 |
| - movd 0x1c(m),t1 |
| - punpcklqdq t1,hc4 |
| - psrld $8,hc4 |
| - por ORMASK(%rip),hc4 |
| - movd h4,t1 |
| - paddd t1,hc4 |
| - |
| - # t1 = [ hc0[1] * r0, hc0[0] * u0 ] |
| - movdqa ru0,t1 |
| - pmuludq hc0,t1 |
| - # t1 += [ hc1[1] * s4, hc1[0] * v4 ] |
| - movdqa sv4,t2 |
| - pmuludq hc1,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc2[1] * s3, hc2[0] * v3 ] |
| - movdqa sv3,t2 |
| - pmuludq hc2,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc3[1] * s2, hc3[0] * v2 ] |
| - movdqa sv2,t2 |
| - pmuludq hc3,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc4[1] * s1, hc4[0] * v1 ] |
| - movdqa sv1,t2 |
| - pmuludq hc4,t2 |
| - paddq t2,t1 |
| - # d0 = t1[0] + t1[1] |
| - movdqa t1,t2 |
| - psrldq $8,t2 |
| - paddq t2,t1 |
| - movq t1,d0 |
| - |
| - # t1 = [ hc0[1] * r1, hc0[0] * u1 ] |
| - movdqa ru1,t1 |
| - pmuludq hc0,t1 |
| - # t1 += [ hc1[1] * r0, hc1[0] * u0 ] |
| - movdqa ru0,t2 |
| - pmuludq hc1,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc2[1] * s4, hc2[0] * v4 ] |
| - movdqa sv4,t2 |
| - pmuludq hc2,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc3[1] * s3, hc3[0] * v3 ] |
| - movdqa sv3,t2 |
| - pmuludq hc3,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc4[1] * s2, hc4[0] * v2 ] |
| - movdqa sv2,t2 |
| - pmuludq hc4,t2 |
| - paddq t2,t1 |
| - # d1 = t1[0] + t1[1] |
| - movdqa t1,t2 |
| - psrldq $8,t2 |
| - paddq t2,t1 |
| - movq t1,d1 |
| - |
| - # t1 = [ hc0[1] * r2, hc0[0] * u2 ] |
| - movdqa ru2,t1 |
| - pmuludq hc0,t1 |
| - # t1 += [ hc1[1] * r1, hc1[0] * u1 ] |
| - movdqa ru1,t2 |
| - pmuludq hc1,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc2[1] * r0, hc2[0] * u0 ] |
| - movdqa ru0,t2 |
| - pmuludq hc2,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc3[1] * s4, hc3[0] * v4 ] |
| - movdqa sv4,t2 |
| - pmuludq hc3,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc4[1] * s3, hc4[0] * v3 ] |
| - movdqa sv3,t2 |
| - pmuludq hc4,t2 |
| - paddq t2,t1 |
| - # d2 = t1[0] + t1[1] |
| - movdqa t1,t2 |
| - psrldq $8,t2 |
| - paddq t2,t1 |
| - movq t1,d2 |
| - |
| - # t1 = [ hc0[1] * r3, hc0[0] * u3 ] |
| - movdqa ru3,t1 |
| - pmuludq hc0,t1 |
| - # t1 += [ hc1[1] * r2, hc1[0] * u2 ] |
| - movdqa ru2,t2 |
| - pmuludq hc1,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc2[1] * r1, hc2[0] * u1 ] |
| - movdqa ru1,t2 |
| - pmuludq hc2,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc3[1] * r0, hc3[0] * u0 ] |
| - movdqa ru0,t2 |
| - pmuludq hc3,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc4[1] * s4, hc4[0] * v4 ] |
| - movdqa sv4,t2 |
| - pmuludq hc4,t2 |
| - paddq t2,t1 |
| - # d3 = t1[0] + t1[1] |
| - movdqa t1,t2 |
| - psrldq $8,t2 |
| - paddq t2,t1 |
| - movq t1,d3 |
| - |
| - # t1 = [ hc0[1] * r4, hc0[0] * u4 ] |
| - movdqa ru4,t1 |
| - pmuludq hc0,t1 |
| - # t1 += [ hc1[1] * r3, hc1[0] * u3 ] |
| - movdqa ru3,t2 |
| - pmuludq hc1,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc2[1] * r2, hc2[0] * u2 ] |
| - movdqa ru2,t2 |
| - pmuludq hc2,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc3[1] * r1, hc3[0] * u1 ] |
| - movdqa ru1,t2 |
| - pmuludq hc3,t2 |
| - paddq t2,t1 |
| - # t1 += [ hc4[1] * r0, hc4[0] * u0 ] |
| - movdqa ru0,t2 |
| - pmuludq hc4,t2 |
| - paddq t2,t1 |
| - # d4 = t1[0] + t1[1] |
| - movdqa t1,t2 |
| - psrldq $8,t2 |
| - paddq t2,t1 |
| - movq t1,d4 |
| - |
| - # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> |
| - # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small |
| - # amount. Careful: we must not assume the carry bits 'd0 >> 26', |
| - # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit |
| - # integers. It's true in a single-block implementation, but not here. |
| - |
| - # d1 += d0 >> 26 |
| - mov d0,%rax |
| - shr $26,%rax |
| - add %rax,d1 |
| - # h0 = d0 & 0x3ffffff |
| - mov d0,%rbx |
| - and $0x3ffffff,%ebx |
| - |
| - # d2 += d1 >> 26 |
| - mov d1,%rax |
| - shr $26,%rax |
| - add %rax,d2 |
| - # h1 = d1 & 0x3ffffff |
| - mov d1,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h1 |
| - |
| - # d3 += d2 >> 26 |
| - mov d2,%rax |
| - shr $26,%rax |
| - add %rax,d3 |
| - # h2 = d2 & 0x3ffffff |
| - mov d2,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h2 |
| - |
| - # d4 += d3 >> 26 |
| - mov d3,%rax |
| - shr $26,%rax |
| - add %rax,d4 |
| - # h3 = d3 & 0x3ffffff |
| - mov d3,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h3 |
| - |
| - # h0 += (d4 >> 26) * 5 |
| - mov d4,%rax |
| - shr $26,%rax |
| - lea (%rax,%rax,4),%rax |
| - add %rax,%rbx |
| - # h4 = d4 & 0x3ffffff |
| - mov d4,%rax |
| - and $0x3ffffff,%eax |
| - mov %eax,h4 |
| - |
| - # h1 += h0 >> 26 |
| - mov %rbx,%rax |
| - shr $26,%rax |
| - add %eax,h1 |
| - # h0 = h0 & 0x3ffffff |
| - andl $0x3ffffff,%ebx |
| - mov %ebx,h0 |
| - |
| - add $0x20,m |
| - dec %rcx |
| - jnz .Ldoblock2 |
| - |
| - pop %r13 |
| - pop %r12 |
| - pop %rbx |
| - ret |
| -ENDPROC(poly1305_2block_sse2) |
| diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl |
| index 342ad7f18aa7..80061bea6b16 100644 |
| |
| |
| @@ -1,11 +1,14 @@ |
| -#! /usr/bin/env perl |
| -# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. |
| +#!/usr/bin/env perl |
| +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
| # |
| -# Licensed under the OpenSSL license (the "License"). You may not use |
| -# this file except in compliance with the License. You can obtain a copy |
| -# in the file LICENSE in the source distribution or at |
| -# https://www.openssl.org/source/license.html |
| - |
| +# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. |
| +# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| +# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. |
| +# |
| +# This code is taken from the OpenSSL project but the author, Andy Polyakov, |
| +# has relicensed it under the licenses specified in the SPDX header above. |
| +# The original headers, including the original license headers, are |
| +# included below for completeness. |
| # |
| # |
| # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| @@ -32,7 +35,7 @@ |
| # Skylake-X system performance. Since we are likely to suppress |
| # AVX512F capability flag [at least on Skylake-X], conversion serves |
| # as kind of "investment protection". Note that next *lake processor, |
| -# Cannolake, has AVX512IFMA code path to execute... |
| +# Cannonlake, has AVX512IFMA code path to execute... |
| # |
| # Numbers are cycles per processed byte with poly1305_blocks alone, |
| # measured with rdtsc at fixed clock frequency. |
| @@ -68,39 +71,114 @@ $output = shift; |
| if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
| |
| $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
| - |
| -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
| -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
| -die "can't locate x86_64-xlate.pl"; |
| - |
| -if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` |
| - =~ /GNU assembler version ([2-9]\.[0-9]+)/) { |
| - $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26); |
| +$kernel=0; $kernel=1 if (!$flavour && !$output); |
| + |
| +if (!$kernel) { |
| + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| + ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
| + ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
| + die "can't locate x86_64-xlate.pl"; |
| + |
| + open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; |
| + *STDOUT=*OUT; |
| + |
| + if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { |
| + $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25); |
| + } |
| + |
| + if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { |
| + $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12); |
| + $avx += 1 if ($1==2.11 && $2>=8); |
| + } |
| + |
| + if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { |
| + $avx = ($1>=10) + ($1>=11); |
| + } |
| + |
| + if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { |
| + $avx = ($2>=3.0) + ($2>3.0); |
| + } |
| +} else { |
| + $avx = 4; # The kernel uses ifdefs for this. |
| } |
| |
| -if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && |
| - `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { |
| - $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12); |
| - $avx += 2 if ($1==2.11 && $2>=8); |
| +sub declare_function() { |
| + my ($name, $align, $nargs) = @_; |
| + if($kernel) { |
| + $code .= ".align $align\n"; |
| + $code .= "ENTRY($name)\n"; |
| + $code .= ".L$name:\n"; |
| + } else { |
| + $code .= ".globl $name\n"; |
| + $code .= ".type $name,\@function,$nargs\n"; |
| + $code .= ".align $align\n"; |
| + $code .= "$name:\n"; |
| + } |
| } |
| |
| -if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && |
| - `ml64 2>&1` =~ /Version ([0-9]+)\./) { |
| - $avx = ($1>=10) + ($1>=12); |
| +sub end_function() { |
| + my ($name) = @_; |
| + if($kernel) { |
| + $code .= "ENDPROC($name)\n"; |
| + } else { |
| + $code .= ".size $name,.-$name\n"; |
| + } |
| } |
| |
| -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { |
| - $avx = ($2>=3.0) + ($2>3.0); |
| -} |
| +$code.=<<___ if $kernel; |
| +#include <linux/linkage.h> |
| +___ |
| + |
| +if ($avx) { |
| +$code.=<<___ if $kernel; |
| +.section .rodata |
| +___ |
| +$code.=<<___; |
| +.align 64 |
| +.Lconst: |
| +.Lmask24: |
| +.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 |
| +.L129: |
| +.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 |
| +.Lmask26: |
| +.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 |
| +.Lpermd_avx2: |
| +.long 2,2,2,3,2,0,2,1 |
| +.Lpermd_avx512: |
| +.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 |
| + |
| +.L2_44_inp_permd: |
| +.long 0,1,1,2,2,3,7,7 |
| +.L2_44_inp_shift: |
| +.quad 0,12,24,64 |
| +.L2_44_mask: |
| +.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff |
| +.L2_44_shift_rgt: |
| +.quad 44,44,42,64 |
| +.L2_44_shift_lft: |
| +.quad 8,8,10,64 |
| |
| -open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; |
| -*STDOUT=*OUT; |
| +.align 64 |
| +.Lx_mask44: |
| +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff |
| +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff |
| +.Lx_mask42: |
| +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff |
| +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff |
| +___ |
| +} |
| +$code.=<<___ if (!$kernel); |
| +.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" |
| +.align 16 |
| +___ |
| |
| my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); |
| my ($mac,$nonce)=($inp,$len); # *_emit arguments |
| -my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13)); |
| -my ($h0,$h1,$h2)=("%r14","%rbx","%rbp"); |
| +my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13"); |
| +my ($h0,$h1,$h2)=("%r14","%rbx","%r10"); |
| |
| sub poly1305_iteration { |
| # input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 |
| @@ -155,19 +233,19 @@ ___ |
| |
| $code.=<<___; |
| .text |
| - |
| +___ |
| +$code.=<<___ if (!$kernel); |
| .extern OPENSSL_ia32cap_P |
| |
| -.globl poly1305_init |
| -.hidden poly1305_init |
| -.globl poly1305_blocks |
| -.hidden poly1305_blocks |
| -.globl poly1305_emit |
| -.hidden poly1305_emit |
| - |
| -.type poly1305_init,\@function,3 |
| -.align 32 |
| -poly1305_init: |
| +.globl poly1305_init_x86_64 |
| +.hidden poly1305_init_x86_64 |
| +.globl poly1305_blocks_x86_64 |
| +.hidden poly1305_blocks_x86_64 |
| +.globl poly1305_emit_x86_64 |
| +.hidden poly1305_emit_x86_64 |
| +___ |
| +&declare_function("poly1305_init_x86_64", 32, 3); |
| +$code.=<<___; |
| xor %rax,%rax |
| mov %rax,0($ctx) # initialize hash value |
| mov %rax,8($ctx) |
| @@ -175,11 +253,12 @@ poly1305_init: |
| |
| cmp \$0,$inp |
| je .Lno_key |
| - |
| - lea poly1305_blocks(%rip),%r10 |
| - lea poly1305_emit(%rip),%r11 |
| ___ |
| -$code.=<<___ if ($avx); |
| +$code.=<<___ if (!$kernel); |
| + lea poly1305_blocks_x86_64(%rip),%r10 |
| + lea poly1305_emit_x86_64(%rip),%r11 |
| +___ |
| +$code.=<<___ if (!$kernel && $avx); |
| mov OPENSSL_ia32cap_P+4(%rip),%r9 |
| lea poly1305_blocks_avx(%rip),%rax |
| lea poly1305_emit_avx(%rip),%rcx |
| @@ -187,12 +266,12 @@ $code.=<<___ if ($avx); |
| cmovc %rax,%r10 |
| cmovc %rcx,%r11 |
| ___ |
| -$code.=<<___ if ($avx>1); |
| +$code.=<<___ if (!$kernel && $avx>1); |
| lea poly1305_blocks_avx2(%rip),%rax |
| bt \$`5+32`,%r9 # AVX2? |
| cmovc %rax,%r10 |
| ___ |
| -$code.=<<___ if ($avx>3); |
| +$code.=<<___ if (!$kernel && $avx>3); |
| mov \$`(1<<31|1<<21|1<<16)`,%rax |
| shr \$32,%r9 |
| and %rax,%r9 |
| @@ -207,11 +286,11 @@ $code.=<<___; |
| mov %rax,24($ctx) |
| mov %rcx,32($ctx) |
| ___ |
| -$code.=<<___ if ($flavour !~ /elf32/); |
| +$code.=<<___ if (!$kernel && $flavour !~ /elf32/); |
| mov %r10,0(%rdx) |
| mov %r11,8(%rdx) |
| ___ |
| -$code.=<<___ if ($flavour =~ /elf32/); |
| +$code.=<<___ if (!$kernel && $flavour =~ /elf32/); |
| mov %r10d,0(%rdx) |
| mov %r11d,4(%rdx) |
| ___ |
| @@ -219,11 +298,11 @@ $code.=<<___; |
| mov \$1,%eax |
| .Lno_key: |
| ret |
| -.size poly1305_init,.-poly1305_init |
| +___ |
| +&end_function("poly1305_init_x86_64"); |
| |
| -.type poly1305_blocks,\@function,4 |
| -.align 32 |
| -poly1305_blocks: |
| +&declare_function("poly1305_blocks_x86_64", 32, 4); |
| +$code.=<<___; |
| .cfi_startproc |
| .Lblocks: |
| shr \$4,$len |
| @@ -231,8 +310,6 @@ poly1305_blocks: |
| |
| push %rbx |
| .cfi_push %rbx |
| - push %rbp |
| -.cfi_push %rbp |
| push %r12 |
| .cfi_push %r12 |
| push %r13 |
| @@ -241,6 +318,8 @@ poly1305_blocks: |
| .cfi_push %r14 |
| push %r15 |
| .cfi_push %r15 |
| + push $ctx |
| +.cfi_push $ctx |
| .Lblocks_body: |
| |
| mov $len,%r15 # reassign $len |
| @@ -265,26 +344,29 @@ poly1305_blocks: |
| lea 16($inp),$inp |
| adc $padbit,$h2 |
| ___ |
| + |
| &poly1305_iteration(); |
| + |
| $code.=<<___; |
| mov $r1,%rax |
| dec %r15 # len-=16 |
| jnz .Loop |
| |
| + mov 0(%rsp),$ctx |
| +.cfi_restore $ctx |
| + |
| mov $h0,0($ctx) # store hash value |
| mov $h1,8($ctx) |
| mov $h2,16($ctx) |
| |
| - mov 0(%rsp),%r15 |
| + mov 8(%rsp),%r15 |
| .cfi_restore %r15 |
| - mov 8(%rsp),%r14 |
| + mov 16(%rsp),%r14 |
| .cfi_restore %r14 |
| - mov 16(%rsp),%r13 |
| + mov 24(%rsp),%r13 |
| .cfi_restore %r13 |
| - mov 24(%rsp),%r12 |
| + mov 32(%rsp),%r12 |
| .cfi_restore %r12 |
| - mov 32(%rsp),%rbp |
| -.cfi_restore %rbp |
| mov 40(%rsp),%rbx |
| .cfi_restore %rbx |
| lea 48(%rsp),%rsp |
| @@ -293,11 +375,11 @@ $code.=<<___; |
| .Lblocks_epilogue: |
| ret |
| .cfi_endproc |
| -.size poly1305_blocks,.-poly1305_blocks |
| +___ |
| +&end_function("poly1305_blocks_x86_64"); |
| |
| -.type poly1305_emit,\@function,3 |
| -.align 32 |
| -poly1305_emit: |
| +&declare_function("poly1305_emit_x86_64", 32, 3); |
| +$code.=<<___; |
| .Lemit: |
| mov 0($ctx),%r8 # load hash value |
| mov 8($ctx),%r9 |
| @@ -318,10 +400,14 @@ poly1305_emit: |
| mov %rcx,8($mac) |
| |
| ret |
| -.size poly1305_emit,.-poly1305_emit |
| ___ |
| +&end_function("poly1305_emit_x86_64"); |
| if ($avx) { |
| |
| +if($kernel) { |
| + $code .= "#ifdef CONFIG_AS_AVX\n"; |
| +} |
| + |
| ######################################################################## |
| # Layout of opaque area is following. |
| # |
| @@ -342,15 +428,19 @@ $code.=<<___; |
| .type __poly1305_block,\@abi-omnipotent |
| .align 32 |
| __poly1305_block: |
| + push $ctx |
| ___ |
| &poly1305_iteration(); |
| $code.=<<___; |
| + pop $ctx |
| ret |
| .size __poly1305_block,.-__poly1305_block |
| |
| .type __poly1305_init_avx,\@abi-omnipotent |
| .align 32 |
| __poly1305_init_avx: |
| + push %rbp |
| + mov %rsp,%rbp |
| mov $r0,$h0 |
| mov $r1,$h1 |
| xor $h2,$h2 |
| @@ -507,12 +597,13 @@ __poly1305_init_avx: |
| mov $d1#d,`16*8+8-64`($ctx) |
| |
| lea -48-64($ctx),$ctx # size [de-]optimization |
| + pop %rbp |
| ret |
| .size __poly1305_init_avx,.-__poly1305_init_avx |
| +___ |
| |
| -.type poly1305_blocks_avx,\@function,4 |
| -.align 32 |
| -poly1305_blocks_avx: |
| +&declare_function("poly1305_blocks_avx", 32, 4); |
| +$code.=<<___; |
| .cfi_startproc |
| mov 20($ctx),%r8d # is_base2_26 |
| cmp \$128,$len |
| @@ -532,10 +623,11 @@ poly1305_blocks_avx: |
| test \$31,$len |
| jz .Leven_avx |
| |
| - push %rbx |
| -.cfi_push %rbx |
| push %rbp |
| .cfi_push %rbp |
| + mov %rsp,%rbp |
| + push %rbx |
| +.cfi_push %rbx |
| push %r12 |
| .cfi_push %r12 |
| push %r13 |
| @@ -645,20 +737,18 @@ poly1305_blocks_avx: |
| mov $h2#d,16($ctx) |
| .align 16 |
| .Ldone_avx: |
| - mov 0(%rsp),%r15 |
| + pop %r15 |
| .cfi_restore %r15 |
| - mov 8(%rsp),%r14 |
| + pop %r14 |
| .cfi_restore %r14 |
| - mov 16(%rsp),%r13 |
| + pop %r13 |
| .cfi_restore %r13 |
| - mov 24(%rsp),%r12 |
| + pop %r12 |
| .cfi_restore %r12 |
| - mov 32(%rsp),%rbp |
| -.cfi_restore %rbp |
| - mov 40(%rsp),%rbx |
| + pop %rbx |
| .cfi_restore %rbx |
| - lea 48(%rsp),%rsp |
| -.cfi_adjust_cfa_offset -48 |
| + pop %rbp |
| +.cfi_restore %rbp |
| .Lno_data_avx: |
| .Lblocks_avx_epilogue: |
| ret |
| @@ -667,10 +757,11 @@ poly1305_blocks_avx: |
| .align 32 |
| .Lbase2_64_avx: |
| .cfi_startproc |
| - push %rbx |
| -.cfi_push %rbx |
| push %rbp |
| .cfi_push %rbp |
| + mov %rsp,%rbp |
| + push %rbx |
| +.cfi_push %rbx |
| push %r12 |
| .cfi_push %r12 |
| push %r13 |
| @@ -736,22 +827,18 @@ poly1305_blocks_avx: |
| |
| .Lproceed_avx: |
| mov %r15,$len |
| - |
| - mov 0(%rsp),%r15 |
| + pop %r15 |
| .cfi_restore %r15 |
| - mov 8(%rsp),%r14 |
| + pop %r14 |
| .cfi_restore %r14 |
| - mov 16(%rsp),%r13 |
| + pop %r13 |
| .cfi_restore %r13 |
| - mov 24(%rsp),%r12 |
| + pop %r12 |
| .cfi_restore %r12 |
| - mov 32(%rsp),%rbp |
| -.cfi_restore %rbp |
| - mov 40(%rsp),%rbx |
| + pop %rbx |
| .cfi_restore %rbx |
| - lea 48(%rsp),%rax |
| - lea 48(%rsp),%rsp |
| -.cfi_adjust_cfa_offset -48 |
| + pop %rbp |
| +.cfi_restore %rbp |
| .Lbase2_64_avx_epilogue: |
| jmp .Ldo_avx |
| .cfi_endproc |
| @@ -768,8 +855,11 @@ poly1305_blocks_avx: |
| .Ldo_avx: |
| ___ |
| $code.=<<___ if (!$win64); |
| + lea 8(%rsp),%r10 |
| +.cfi_def_cfa_register %r10 |
| + and \$-32,%rsp |
| + sub \$-8,%rsp |
| lea -0x58(%rsp),%r11 |
| -.cfi_def_cfa %r11,0x60 |
| sub \$0x178,%rsp |
| ___ |
| $code.=<<___ if ($win64); |
| @@ -1361,18 +1451,18 @@ $code.=<<___ if ($win64); |
| .Ldo_avx_epilogue: |
| ___ |
| $code.=<<___ if (!$win64); |
| - lea 0x58(%r11),%rsp |
| -.cfi_def_cfa %rsp,8 |
| + lea -8(%r10),%rsp |
| +.cfi_def_cfa_register %rsp |
| ___ |
| $code.=<<___; |
| vzeroupper |
| ret |
| .cfi_endproc |
| -.size poly1305_blocks_avx,.-poly1305_blocks_avx |
| +___ |
| +&end_function("poly1305_blocks_avx"); |
| |
| -.type poly1305_emit_avx,\@function,3 |
| -.align 32 |
| -poly1305_emit_avx: |
| +&declare_function("poly1305_emit_avx", 32, 3); |
| +$code.=<<___; |
| cmpl \$0,20($ctx) # is_base2_26? |
| je .Lemit |
| |
| @@ -1423,41 +1513,51 @@ poly1305_emit_avx: |
| mov %rcx,8($mac) |
| |
| ret |
| -.size poly1305_emit_avx,.-poly1305_emit_avx |
| ___ |
| +&end_function("poly1305_emit_avx"); |
| + |
| +if ($kernel) { |
| + $code .= "#endif\n"; |
| +} |
| |
| if ($avx>1) { |
| + |
| +if ($kernel) { |
| + $code .= "#ifdef CONFIG_AS_AVX2\n"; |
| +} |
| + |
| my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = |
| map("%ymm$_",(0..15)); |
| my $S4=$MASK; |
| |
| +sub poly1305_blocks_avxN { |
| + my ($avx512) = @_; |
| + my $suffix = $avx512 ? "_avx512" : ""; |
| $code.=<<___; |
| -.type poly1305_blocks_avx2,\@function,4 |
| -.align 32 |
| -poly1305_blocks_avx2: |
| .cfi_startproc |
| mov 20($ctx),%r8d # is_base2_26 |
| cmp \$128,$len |
| - jae .Lblocks_avx2 |
| + jae .Lblocks_avx2$suffix |
| test %r8d,%r8d |
| jz .Lblocks |
| |
| -.Lblocks_avx2: |
| +.Lblocks_avx2$suffix: |
| and \$-16,$len |
| - jz .Lno_data_avx2 |
| + jz .Lno_data_avx2$suffix |
| |
| vzeroupper |
| |
| test %r8d,%r8d |
| - jz .Lbase2_64_avx2 |
| + jz .Lbase2_64_avx2$suffix |
| |
| test \$63,$len |
| - jz .Leven_avx2 |
| + jz .Leven_avx2$suffix |
| |
| - push %rbx |
| -.cfi_push %rbx |
| push %rbp |
| .cfi_push %rbp |
| + mov %rsp,%rbp |
| + push %rbx |
| +.cfi_push %rbx |
| push %r12 |
| .cfi_push %r12 |
| push %r13 |
| @@ -1466,7 +1566,7 @@ poly1305_blocks_avx2: |
| .cfi_push %r14 |
| push %r15 |
| .cfi_push %r15 |
| -.Lblocks_avx2_body: |
| +.Lblocks_avx2_body$suffix: |
| |
| mov $len,%r15 # reassign $len |
| |
| @@ -1513,7 +1613,7 @@ poly1305_blocks_avx2: |
| shr \$2,$s1 |
| add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| |
| -.Lbase2_26_pre_avx2: |
| +.Lbase2_26_pre_avx2$suffix: |
| add 0($inp),$h0 # accumulate input |
| adc 8($inp),$h1 |
| lea 16($inp),$inp |
| @@ -1524,10 +1624,10 @@ poly1305_blocks_avx2: |
| mov $r1,%rax |
| |
| test \$63,%r15 |
| - jnz .Lbase2_26_pre_avx2 |
| + jnz .Lbase2_26_pre_avx2$suffix |
| |
| test $padbit,$padbit # if $padbit is zero, |
| - jz .Lstore_base2_64_avx2 # store hash in base 2^64 format |
| + jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format |
| |
| ################################# base 2^64 -> base 2^26 |
| mov $h0,%rax |
| @@ -1548,57 +1648,56 @@ poly1305_blocks_avx2: |
| or $r1,$h2 # h[4] |
| |
| test %r15,%r15 |
| - jz .Lstore_base2_26_avx2 |
| + jz .Lstore_base2_26_avx2$suffix |
| |
| vmovd %rax#d,%x#$H0 |
| vmovd %rdx#d,%x#$H1 |
| vmovd $h0#d,%x#$H2 |
| vmovd $h1#d,%x#$H3 |
| vmovd $h2#d,%x#$H4 |
| - jmp .Lproceed_avx2 |
| + jmp .Lproceed_avx2$suffix |
| |
| .align 32 |
| -.Lstore_base2_64_avx2: |
| +.Lstore_base2_64_avx2$suffix: |
| mov $h0,0($ctx) |
| mov $h1,8($ctx) |
| mov $h2,16($ctx) # note that is_base2_26 is zeroed |
| - jmp .Ldone_avx2 |
| + jmp .Ldone_avx2$suffix |
| |
| .align 16 |
| -.Lstore_base2_26_avx2: |
| +.Lstore_base2_26_avx2$suffix: |
| mov %rax#d,0($ctx) # store hash value base 2^26 |
| mov %rdx#d,4($ctx) |
| mov $h0#d,8($ctx) |
| mov $h1#d,12($ctx) |
| mov $h2#d,16($ctx) |
| .align 16 |
| -.Ldone_avx2: |
| - mov 0(%rsp),%r15 |
| +.Ldone_avx2$suffix: |
| + pop %r15 |
| .cfi_restore %r15 |
| - mov 8(%rsp),%r14 |
| + pop %r14 |
| .cfi_restore %r14 |
| - mov 16(%rsp),%r13 |
| + pop %r13 |
| .cfi_restore %r13 |
| - mov 24(%rsp),%r12 |
| + pop %r12 |
| .cfi_restore %r12 |
| - mov 32(%rsp),%rbp |
| -.cfi_restore %rbp |
| - mov 40(%rsp),%rbx |
| + pop %rbx |
| .cfi_restore %rbx |
| - lea 48(%rsp),%rsp |
| -.cfi_adjust_cfa_offset -48 |
| -.Lno_data_avx2: |
| -.Lblocks_avx2_epilogue: |
| + pop %rbp |
| +.cfi_restore %rbp |
| +.Lno_data_avx2$suffix: |
| +.Lblocks_avx2_epilogue$suffix: |
| ret |
| .cfi_endproc |
| |
| .align 32 |
| -.Lbase2_64_avx2: |
| +.Lbase2_64_avx2$suffix: |
| .cfi_startproc |
| - push %rbx |
| -.cfi_push %rbx |
| push %rbp |
| .cfi_push %rbp |
| + mov %rsp,%rbp |
| + push %rbx |
| +.cfi_push %rbx |
| push %r12 |
| .cfi_push %r12 |
| push %r13 |
| @@ -1607,7 +1706,7 @@ poly1305_blocks_avx2: |
| .cfi_push %r14 |
| push %r15 |
| .cfi_push %r15 |
| -.Lbase2_64_avx2_body: |
| +.Lbase2_64_avx2_body$suffix: |
| |
| mov $len,%r15 # reassign $len |
| |
| @@ -1624,9 +1723,9 @@ poly1305_blocks_avx2: |
| add $r1,$s1 # s1 = r1 + (r1 >> 2) |
| |
| test \$63,$len |
| - jz .Linit_avx2 |
| + jz .Linit_avx2$suffix |
| |
| -.Lbase2_64_pre_avx2: |
| +.Lbase2_64_pre_avx2$suffix: |
| add 0($inp),$h0 # accumulate input |
| adc 8($inp),$h1 |
| lea 16($inp),$inp |
| @@ -1637,9 +1736,9 @@ poly1305_blocks_avx2: |
| mov $r1,%rax |
| |
| test \$63,%r15 |
| - jnz .Lbase2_64_pre_avx2 |
| + jnz .Lbase2_64_pre_avx2$suffix |
| |
| -.Linit_avx2: |
| +.Linit_avx2$suffix: |
| ################################# base 2^64 -> base 2^26 |
| mov $h0,%rax |
| mov $h0,%rdx |
| @@ -1667,69 +1766,77 @@ poly1305_blocks_avx2: |
| |
| call __poly1305_init_avx |
| |
| -.Lproceed_avx2: |
| +.Lproceed_avx2$suffix: |
| mov %r15,$len # restore $len |
| - mov OPENSSL_ia32cap_P+8(%rip),%r10d |
| +___ |
| +$code.=<<___ if (!$kernel); |
| + mov OPENSSL_ia32cap_P+8(%rip),%r9d |
| mov \$`(1<<31|1<<30|1<<16)`,%r11d |
| - |
| - mov 0(%rsp),%r15 |
| +___ |
| +$code.=<<___; |
| + pop %r15 |
| .cfi_restore %r15 |
| - mov 8(%rsp),%r14 |
| + pop %r14 |
| .cfi_restore %r14 |
| - mov 16(%rsp),%r13 |
| + pop %r13 |
| .cfi_restore %r13 |
| - mov 24(%rsp),%r12 |
| + pop %r12 |
| .cfi_restore %r12 |
| - mov 32(%rsp),%rbp |
| -.cfi_restore %rbp |
| - mov 40(%rsp),%rbx |
| + pop %rbx |
| .cfi_restore %rbx |
| - lea 48(%rsp),%rax |
| - lea 48(%rsp),%rsp |
| -.cfi_adjust_cfa_offset -48 |
| -.Lbase2_64_avx2_epilogue: |
| - jmp .Ldo_avx2 |
| + pop %rbp |
| +.cfi_restore %rbp |
| +.Lbase2_64_avx2_epilogue$suffix: |
| + jmp .Ldo_avx2$suffix |
| .cfi_endproc |
| |
| .align 32 |
| -.Leven_avx2: |
| +.Leven_avx2$suffix: |
| .cfi_startproc |
| - mov OPENSSL_ia32cap_P+8(%rip),%r10d |
| +___ |
| +$code.=<<___ if (!$kernel); |
| + mov OPENSSL_ia32cap_P+8(%rip),%r9d |
| +___ |
| +$code.=<<___; |
| vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 |
| vmovd 4*1($ctx),%x#$H1 |
| vmovd 4*2($ctx),%x#$H2 |
| vmovd 4*3($ctx),%x#$H3 |
| vmovd 4*4($ctx),%x#$H4 |
| |
| -.Ldo_avx2: |
| +.Ldo_avx2$suffix: |
| ___ |
| -$code.=<<___ if ($avx>2); |
| +$code.=<<___ if (!$kernel && $avx>2); |
| cmp \$512,$len |
| jb .Lskip_avx512 |
| - and %r11d,%r10d |
| - test \$`1<<16`,%r10d # check for AVX512F |
| + and %r11d,%r9d |
| + test \$`1<<16`,%r9d # check for AVX512F |
| jnz .Lblocks_avx512 |
| -.Lskip_avx512: |
| +.Lskip_avx512$suffix: |
| +___ |
| +$code.=<<___ if ($avx > 2 && $avx512 && $kernel); |
| + cmp \$512,$len |
| + jae .Lblocks_avx512 |
| ___ |
| $code.=<<___ if (!$win64); |
| - lea -8(%rsp),%r11 |
| -.cfi_def_cfa %r11,16 |
| + lea 8(%rsp),%r10 |
| +.cfi_def_cfa_register %r10 |
| sub \$0x128,%rsp |
| ___ |
| $code.=<<___ if ($win64); |
| - lea -0xf8(%rsp),%r11 |
| + lea 8(%rsp),%r10 |
| sub \$0x1c8,%rsp |
| - vmovdqa %xmm6,0x50(%r11) |
| - vmovdqa %xmm7,0x60(%r11) |
| - vmovdqa %xmm8,0x70(%r11) |
| - vmovdqa %xmm9,0x80(%r11) |
| - vmovdqa %xmm10,0x90(%r11) |
| - vmovdqa %xmm11,0xa0(%r11) |
| - vmovdqa %xmm12,0xb0(%r11) |
| - vmovdqa %xmm13,0xc0(%r11) |
| - vmovdqa %xmm14,0xd0(%r11) |
| - vmovdqa %xmm15,0xe0(%r11) |
| -.Ldo_avx2_body: |
| + vmovdqa %xmm6,-0xb0(%r10) |
| + vmovdqa %xmm7,-0xa0(%r10) |
| + vmovdqa %xmm8,-0x90(%r10) |
| + vmovdqa %xmm9,-0x80(%r10) |
| + vmovdqa %xmm10,-0x70(%r10) |
| + vmovdqa %xmm11,-0x60(%r10) |
| + vmovdqa %xmm12,-0x50(%r10) |
| + vmovdqa %xmm13,-0x40(%r10) |
| + vmovdqa %xmm14,-0x30(%r10) |
| + vmovdqa %xmm15,-0x20(%r10) |
| +.Ldo_avx2_body$suffix: |
| ___ |
| $code.=<<___; |
| lea .Lconst(%rip),%rcx |
| @@ -1794,11 +1901,11 @@ $code.=<<___; |
| |
| vpaddq $H2,$T2,$H2 # accumulate input |
| sub \$64,$len |
| - jz .Ltail_avx2 |
| - jmp .Loop_avx2 |
| + jz .Ltail_avx2$suffix |
| + jmp .Loop_avx2$suffix |
| |
| .align 32 |
| -.Loop_avx2: |
| +.Loop_avx2$suffix: |
| ################################################################ |
| # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 |
| # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 |
| @@ -1946,10 +2053,10 @@ $code.=<<___; |
| vpor 32(%rcx),$T4,$T4 # padbit, yes, always |
| |
| sub \$64,$len |
| - jnz .Loop_avx2 |
| + jnz .Loop_avx2$suffix |
| |
| .byte 0x66,0x90 |
| -.Ltail_avx2: |
| +.Ltail_avx2$suffix: |
| ################################################################ |
| # while above multiplications were by r^4 in all lanes, in last |
| # iteration we multiply least significant lane by r^4 and most |
| @@ -2087,37 +2194,29 @@ $code.=<<___; |
| vmovd %x#$H4,`4*4-48-64`($ctx) |
| ___ |
| $code.=<<___ if ($win64); |
| - vmovdqa 0x50(%r11),%xmm6 |
| - vmovdqa 0x60(%r11),%xmm7 |
| - vmovdqa 0x70(%r11),%xmm8 |
| - vmovdqa 0x80(%r11),%xmm9 |
| - vmovdqa 0x90(%r11),%xmm10 |
| - vmovdqa 0xa0(%r11),%xmm11 |
| - vmovdqa 0xb0(%r11),%xmm12 |
| - vmovdqa 0xc0(%r11),%xmm13 |
| - vmovdqa 0xd0(%r11),%xmm14 |
| - vmovdqa 0xe0(%r11),%xmm15 |
| - lea 0xf8(%r11),%rsp |
| -.Ldo_avx2_epilogue: |
| + vmovdqa -0xb0(%r10),%xmm6 |
| + vmovdqa -0xa0(%r10),%xmm7 |
| + vmovdqa -0x90(%r10),%xmm8 |
| + vmovdqa -0x80(%r10),%xmm9 |
| + vmovdqa -0x70(%r10),%xmm10 |
| + vmovdqa -0x60(%r10),%xmm11 |
| + vmovdqa -0x50(%r10),%xmm12 |
| + vmovdqa -0x40(%r10),%xmm13 |
| + vmovdqa -0x30(%r10),%xmm14 |
| + vmovdqa -0x20(%r10),%xmm15 |
| + lea -8(%r10),%rsp |
| +.Ldo_avx2_epilogue$suffix: |
| ___ |
| $code.=<<___ if (!$win64); |
| - lea 8(%r11),%rsp |
| -.cfi_def_cfa %rsp,8 |
| + lea -8(%r10),%rsp |
| +.cfi_def_cfa_register %rsp |
| ___ |
| $code.=<<___; |
| vzeroupper |
| ret |
| .cfi_endproc |
| -.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 |
| ___ |
| -####################################################################### |
| -if ($avx>2) { |
| -# On entry we have input length divisible by 64. But since inner loop |
| -# processes 128 bytes per iteration, cases when length is not divisible |
| -# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this |
| -# reason stack layout is kept identical to poly1305_blocks_avx2. If not |
| -# for this tail, we wouldn't have to even allocate stack frame... |
| - |
| +if($avx > 2 && $avx512) { |
| my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); |
| my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); |
| my $PADBIT="%zmm30"; |
| @@ -2128,32 +2227,29 @@ map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); |
| map(s/%y/%z/,($MASK)); |
| |
| $code.=<<___; |
| -.type poly1305_blocks_avx512,\@function,4 |
| -.align 32 |
| -poly1305_blocks_avx512: |
| .cfi_startproc |
| .Lblocks_avx512: |
| mov \$15,%eax |
| kmovw %eax,%k2 |
| ___ |
| $code.=<<___ if (!$win64); |
| - lea -8(%rsp),%r11 |
| -.cfi_def_cfa %r11,16 |
| + lea 8(%rsp),%r10 |
| +.cfi_def_cfa_register %r10 |
| sub \$0x128,%rsp |
| ___ |
| $code.=<<___ if ($win64); |
| - lea -0xf8(%rsp),%r11 |
| + lea 8(%rsp),%r10 |
| sub \$0x1c8,%rsp |
| - vmovdqa %xmm6,0x50(%r11) |
| - vmovdqa %xmm7,0x60(%r11) |
| - vmovdqa %xmm8,0x70(%r11) |
| - vmovdqa %xmm9,0x80(%r11) |
| - vmovdqa %xmm10,0x90(%r11) |
| - vmovdqa %xmm11,0xa0(%r11) |
| - vmovdqa %xmm12,0xb0(%r11) |
| - vmovdqa %xmm13,0xc0(%r11) |
| - vmovdqa %xmm14,0xd0(%r11) |
| - vmovdqa %xmm15,0xe0(%r11) |
| + vmovdqa %xmm6,-0xb0(%r10) |
| + vmovdqa %xmm7,-0xa0(%r10) |
| + vmovdqa %xmm8,-0x90(%r10) |
| + vmovdqa %xmm9,-0x80(%r10) |
| + vmovdqa %xmm10,-0x70(%r10) |
| + vmovdqa %xmm11,-0x60(%r10) |
| + vmovdqa %xmm12,-0x50(%r10) |
| + vmovdqa %xmm13,-0x40(%r10) |
| + vmovdqa %xmm14,-0x30(%r10) |
| + vmovdqa %xmm15,-0x20(%r10) |
| .Ldo_avx512_body: |
| ___ |
| $code.=<<___; |
| @@ -2679,7 +2775,7 @@ $code.=<<___; |
| |
| lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 |
| add \$64,$len |
| - jnz .Ltail_avx2 |
| + jnz .Ltail_avx2$suffix |
| |
| vpsubq $T2,$H2,$H2 # undo input accumulation |
| vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced |
| @@ -2690,29 +2786,61 @@ $code.=<<___; |
| vzeroall |
| ___ |
| $code.=<<___ if ($win64); |
| - movdqa 0x50(%r11),%xmm6 |
| - movdqa 0x60(%r11),%xmm7 |
| - movdqa 0x70(%r11),%xmm8 |
| - movdqa 0x80(%r11),%xmm9 |
| - movdqa 0x90(%r11),%xmm10 |
| - movdqa 0xa0(%r11),%xmm11 |
| - movdqa 0xb0(%r11),%xmm12 |
| - movdqa 0xc0(%r11),%xmm13 |
| - movdqa 0xd0(%r11),%xmm14 |
| - movdqa 0xe0(%r11),%xmm15 |
| - lea 0xf8(%r11),%rsp |
| + movdqa -0xb0(%r10),%xmm6 |
| + movdqa -0xa0(%r10),%xmm7 |
| + movdqa -0x90(%r10),%xmm8 |
| + movdqa -0x80(%r10),%xmm9 |
| + movdqa -0x70(%r10),%xmm10 |
| + movdqa -0x60(%r10),%xmm11 |
| + movdqa -0x50(%r10),%xmm12 |
| + movdqa -0x40(%r10),%xmm13 |
| + movdqa -0x30(%r10),%xmm14 |
| + movdqa -0x20(%r10),%xmm15 |
| + lea -8(%r10),%rsp |
| .Ldo_avx512_epilogue: |
| ___ |
| $code.=<<___ if (!$win64); |
| - lea 8(%r11),%rsp |
| -.cfi_def_cfa %rsp,8 |
| + lea -8(%r10),%rsp |
| +.cfi_def_cfa_register %rsp |
| ___ |
| $code.=<<___; |
| ret |
| .cfi_endproc |
| -.size poly1305_blocks_avx512,.-poly1305_blocks_avx512 |
| ___ |
| -if ($avx>3) { |
| + |
| +} |
| + |
| +} |
| + |
| +&declare_function("poly1305_blocks_avx2", 32, 4); |
| +poly1305_blocks_avxN(0); |
| +&end_function("poly1305_blocks_avx2"); |
| + |
| +if($kernel) { |
| + $code .= "#endif\n"; |
| +} |
| + |
| +####################################################################### |
| +if ($avx>2) { |
| +# On entry we have input length divisible by 64. But since inner loop |
| +# processes 128 bytes per iteration, cases when length is not divisible |
| +# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this |
| +# reason stack layout is kept identical to poly1305_blocks_avx2. If not |
| +# for this tail, we wouldn't have to even allocate stack frame... |
| + |
| +if($kernel) { |
| + $code .= "#ifdef CONFIG_AS_AVX512\n"; |
| +} |
| + |
| +&declare_function("poly1305_blocks_avx512", 32, 4); |
| +poly1305_blocks_avxN(1); |
| +&end_function("poly1305_blocks_avx512"); |
| + |
| +if ($kernel) { |
| + $code .= "#endif\n"; |
| +} |
| + |
| +if (!$kernel && $avx>3) { |
| ######################################################################## |
| # VPMADD52 version using 2^44 radix. |
| # |
| @@ -3753,45 +3881,9 @@ poly1305_emit_base2_44: |
| .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 |
| ___ |
| } } } |
| -$code.=<<___; |
| -.align 64 |
| -.Lconst: |
| -.Lmask24: |
| -.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 |
| -.L129: |
| -.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 |
| -.Lmask26: |
| -.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 |
| -.Lpermd_avx2: |
| -.long 2,2,2,3,2,0,2,1 |
| -.Lpermd_avx512: |
| -.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 |
| - |
| -.L2_44_inp_permd: |
| -.long 0,1,1,2,2,3,7,7 |
| -.L2_44_inp_shift: |
| -.quad 0,12,24,64 |
| -.L2_44_mask: |
| -.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff |
| -.L2_44_shift_rgt: |
| -.quad 44,44,42,64 |
| -.L2_44_shift_lft: |
| -.quad 8,8,10,64 |
| - |
| -.align 64 |
| -.Lx_mask44: |
| -.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff |
| -.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff |
| -.Lx_mask42: |
| -.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff |
| -.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff |
| -___ |
| } |
| -$code.=<<___; |
| -.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" |
| -.align 16 |
| -___ |
| |
| +if (!$kernel) |
| { # chacha20-poly1305 helpers |
| my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order |
| ("%rdi","%rsi","%rdx","%rcx"); # Unix order |
| @@ -4038,17 +4130,17 @@ avx_handler: |
| |
| .section .pdata |
| .align 4 |
| - .rva .LSEH_begin_poly1305_init |
| - .rva .LSEH_end_poly1305_init |
| - .rva .LSEH_info_poly1305_init |
| + .rva .LSEH_begin_poly1305_init_x86_64 |
| + .rva .LSEH_end_poly1305_init_x86_64 |
| + .rva .LSEH_info_poly1305_init_x86_64 |
| |
| - .rva .LSEH_begin_poly1305_blocks |
| - .rva .LSEH_end_poly1305_blocks |
| - .rva .LSEH_info_poly1305_blocks |
| + .rva .LSEH_begin_poly1305_blocks_x86_64 |
| + .rva .LSEH_end_poly1305_blocks_x86_64 |
| + .rva .LSEH_info_poly1305_blocks_x86_64 |
| |
| - .rva .LSEH_begin_poly1305_emit |
| - .rva .LSEH_end_poly1305_emit |
| - .rva .LSEH_info_poly1305_emit |
| + .rva .LSEH_begin_poly1305_emit_x86_64 |
| + .rva .LSEH_end_poly1305_emit_x86_64 |
| + .rva .LSEH_info_poly1305_emit_x86_64 |
| ___ |
| $code.=<<___ if ($avx); |
| .rva .LSEH_begin_poly1305_blocks_avx |
| @@ -4088,20 +4180,20 @@ ___ |
| $code.=<<___; |
| .section .xdata |
| .align 8 |
| -.LSEH_info_poly1305_init: |
| +.LSEH_info_poly1305_init_x86_64: |
| .byte 9,0,0,0 |
| .rva se_handler |
| - .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init |
| + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 |
| |
| -.LSEH_info_poly1305_blocks: |
| +.LSEH_info_poly1305_blocks_x86_64: |
| .byte 9,0,0,0 |
| .rva se_handler |
| .rva .Lblocks_body,.Lblocks_epilogue |
| |
| -.LSEH_info_poly1305_emit: |
| +.LSEH_info_poly1305_emit_x86_64: |
| .byte 9,0,0,0 |
| .rva se_handler |
| - .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit |
| + .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64 |
| ___ |
| $code.=<<___ if ($avx); |
| .LSEH_info_poly1305_blocks_avx_1: |
| @@ -4148,12 +4240,26 @@ $code.=<<___ if ($avx>2); |
| ___ |
| } |
| |
| +open SELF,$0; |
| +while(<SELF>) { |
| + next if (/^#!/); |
| + last if (!s/^#/\/\// and !/^$/); |
| + print; |
| +} |
| +close SELF; |
| + |
| foreach (split('\n',$code)) { |
| s/\`([^\`]*)\`/eval($1)/ge; |
| s/%r([a-z]+)#d/%e$1/g; |
| s/%r([0-9]+)#d/%r$1d/g; |
| s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; |
| |
| + if ($kernel) { |
| + s/(^\.type.*),[0-9]+$/\1/; |
| + s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/; |
| + next if /^\.cfi.*/; |
| + } |
| + |
| print $_,"\n"; |
| } |
| close STDOUT; |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index edb7113e36f3..657363588e0c 100644 |
| |
| |
| @@ -1,8 +1,6 @@ |
| -// SPDX-License-Identifier: GPL-2.0-or-later |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| /* |
| - * Poly1305 authenticator algorithm, RFC7539, SIMD glue code |
| - * |
| - * Copyright (C) 2015 Martin Willi |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| */ |
| |
| #include <crypto/algapi.h> |
| @@ -13,279 +11,170 @@ |
| #include <linux/jump_label.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| +#include <asm/intel-family.h> |
| #include <asm/simd.h> |
| |
| -asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, |
| - const u32 *r, unsigned int blocks); |
| -asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, |
| - unsigned int blocks, const u32 *u); |
| -asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, |
| - unsigned int blocks, const u32 *u); |
| - |
| -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); |
| +asmlinkage void poly1305_init_x86_64(void *ctx, |
| + const u8 key[POLY1305_KEY_SIZE]); |
| +asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, |
| + const size_t len, const u32 padbit); |
| +asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], |
| + const u32 nonce[4]); |
| +asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], |
| + const u32 nonce[4]); |
| +asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, |
| + const u32 padbit); |
| +asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, |
| + const u32 padbit); |
| +asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, |
| + const size_t len, const u32 padbit); |
| + |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); |
| static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); |
| + |
| +struct poly1305_arch_internal { |
| + union { |
| + struct { |
| + u32 h[5]; |
| + u32 is_base2_26; |
| + }; |
| + u64 hs[3]; |
| + }; |
| + u64 r[2]; |
| + u64 pad; |
| + struct { u32 r2, r1, r4, r3; } rn[9]; |
| +}; |
| |
| -static inline u64 mlt(u64 a, u64 b) |
| +/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit |
| + * the unfortunate situation of using AVX and then having to go back to scalar |
| + * -- because the user is silly and has called the update function from two |
| + * separate contexts -- then we need to convert back to the original base before |
| + * proceeding. It is possible to reason that the initial reduction below is |
| + * sufficient given the implementation invariants. However, for an avoidance of |
| + * doubt and because this is not performance critical, we do the full reduction |
| + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py |
| + */ |
| +static void convert_to_base2_64(void *ctx) |
| { |
| - return a * b; |
| -} |
| + struct poly1305_arch_internal *state = ctx; |
| + u32 cy; |
| |
| -static inline u32 sr(u64 v, u_char n) |
| -{ |
| - return v >> n; |
| -} |
| + if (!state->is_base2_26) |
| + return; |
| |
| -static inline u32 and(u32 v, u32 mask) |
| -{ |
| - return v & mask; |
| + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; |
| + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; |
| + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; |
| + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; |
| + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; |
| + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); |
| + state->hs[2] = state->h[4] >> 24; |
| +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) |
| + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); |
| + state->hs[2] &= 3; |
| + state->hs[0] += cy; |
| + state->hs[1] += (cy = ULT(state->hs[0], cy)); |
| + state->hs[2] += ULT(state->hs[1], cy); |
| +#undef ULT |
| + state->is_base2_26 = 0; |
| } |
| |
| -static void poly1305_simd_mult(u32 *a, const u32 *b) |
| +static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE]) |
| { |
| - u8 m[POLY1305_BLOCK_SIZE]; |
| - |
| - memset(m, 0, sizeof(m)); |
| - /* The poly1305 block function adds a hi-bit to the accumulator which |
| - * we don't need for key multiplication; compensate for it. */ |
| - a[4] -= 1 << 24; |
| - poly1305_block_sse2(a, m, b, 1); |
| + poly1305_init_x86_64(ctx, key); |
| } |
| |
| -static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key) |
| +static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, |
| + const u32 padbit) |
| { |
| - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ |
| - key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; |
| - key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; |
| - key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; |
| - key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; |
| - key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; |
| -} |
| + struct poly1305_arch_internal *state = ctx; |
| |
| -static void poly1305_integer_blocks(struct poly1305_state *state, |
| - const struct poly1305_key *key, |
| - const void *src, |
| - unsigned int nblocks, u32 hibit) |
| -{ |
| - u32 r0, r1, r2, r3, r4; |
| - u32 s1, s2, s3, s4; |
| - u32 h0, h1, h2, h3, h4; |
| - u64 d0, d1, d2, d3, d4; |
| + /* SIMD disables preemption, so relax after processing each page. */ |
| + BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || |
| + PAGE_SIZE % POLY1305_BLOCK_SIZE); |
| |
| - if (!nblocks) |
| + if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || |
| + (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || |
| + !crypto_simd_usable()) { |
| + convert_to_base2_64(ctx); |
| + poly1305_blocks_x86_64(ctx, inp, len, padbit); |
| return; |
| + } |
| |
| - r0 = key->r[0]; |
| - r1 = key->r[1]; |
| - r2 = key->r[2]; |
| - r3 = key->r[3]; |
| - r4 = key->r[4]; |
| - |
| - s1 = r1 * 5; |
| - s2 = r2 * 5; |
| - s3 = r3 * 5; |
| - s4 = r4 * 5; |
| - |
| - h0 = state->h[0]; |
| - h1 = state->h[1]; |
| - h2 = state->h[2]; |
| - h3 = state->h[3]; |
| - h4 = state->h[4]; |
| - |
| - do { |
| - /* h += m[i] */ |
| - h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; |
| - h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; |
| - h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; |
| - h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; |
| - h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); |
| - |
| - /* h *= r */ |
| - d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + |
| - mlt(h3, s2) + mlt(h4, s1); |
| - d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + |
| - mlt(h3, s3) + mlt(h4, s2); |
| - d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + |
| - mlt(h3, s4) + mlt(h4, s3); |
| - d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + |
| - mlt(h3, r0) + mlt(h4, s4); |
| - d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + |
| - mlt(h3, r1) + mlt(h4, r0); |
| - |
| - /* (partial) h %= p */ |
| - d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); |
| - d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); |
| - d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); |
| - d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); |
| - h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); |
| - h1 += h0 >> 26; h0 = h0 & 0x3ffffff; |
| - |
| - src += POLY1305_BLOCK_SIZE; |
| - } while (--nblocks); |
| - |
| - state->h[0] = h0; |
| - state->h[1] = h1; |
| - state->h[2] = h2; |
| - state->h[3] = h3; |
| - state->h[4] = h4; |
| + for (;;) { |
| + const size_t bytes = min_t(size_t, len, PAGE_SIZE); |
| + |
| + kernel_fpu_begin(); |
| + if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) |
| + poly1305_blocks_avx512(ctx, inp, bytes, padbit); |
| + else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2)) |
| + poly1305_blocks_avx2(ctx, inp, bytes, padbit); |
| + else |
| + poly1305_blocks_avx(ctx, inp, bytes, padbit); |
| + kernel_fpu_end(); |
| + len -= bytes; |
| + if (!len) |
| + break; |
| + inp += bytes; |
| + } |
| } |
| |
| -static void poly1305_integer_emit(const struct poly1305_state *state, void *dst) |
| +static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], |
| + const u32 nonce[4]) |
| { |
| - u32 h0, h1, h2, h3, h4; |
| - u32 g0, g1, g2, g3, g4; |
| - u32 mask; |
| - |
| - /* fully carry h */ |
| - h0 = state->h[0]; |
| - h1 = state->h[1]; |
| - h2 = state->h[2]; |
| - h3 = state->h[3]; |
| - h4 = state->h[4]; |
| - |
| - h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; |
| - h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; |
| - h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; |
| - h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; |
| - h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; |
| - |
| - /* compute h + -p */ |
| - g0 = h0 + 5; |
| - g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; |
| - g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; |
| - g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; |
| - g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; |
| - |
| - /* select h if h < p, or h + -p if h >= p */ |
| - mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; |
| - g0 &= mask; |
| - g1 &= mask; |
| - g2 &= mask; |
| - g3 &= mask; |
| - g4 &= mask; |
| - mask = ~mask; |
| - h0 = (h0 & mask) | g0; |
| - h1 = (h1 & mask) | g1; |
| - h2 = (h2 & mask) | g2; |
| - h3 = (h3 & mask) | g3; |
| - h4 = (h4 & mask) | g4; |
| - |
| - /* h = h % (2^128) */ |
| - put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); |
| - put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); |
| - put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); |
| - put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); |
| + struct poly1305_arch_internal *state = ctx; |
| + |
| + if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || |
| + !state->is_base2_26 || !crypto_simd_usable()) { |
| + convert_to_base2_64(ctx); |
| + poly1305_emit_x86_64(ctx, mac, nonce); |
| + } else |
| + poly1305_emit_avx(ctx, mac, nonce); |
| } |
| |
| -void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) |
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) |
| { |
| - poly1305_integer_setkey(desc->opaque_r, key); |
| - desc->s[0] = get_unaligned_le32(key + 16); |
| - desc->s[1] = get_unaligned_le32(key + 20); |
| - desc->s[2] = get_unaligned_le32(key + 24); |
| - desc->s[3] = get_unaligned_le32(key + 28); |
| - poly1305_core_init(&desc->h); |
| - desc->buflen = 0; |
| - desc->sset = true; |
| - desc->rset = 1; |
| + poly1305_simd_init(&dctx->h, key); |
| + dctx->s[0] = get_unaligned_le32(&key[16]); |
| + dctx->s[1] = get_unaligned_le32(&key[20]); |
| + dctx->s[2] = get_unaligned_le32(&key[24]); |
| + dctx->s[3] = get_unaligned_le32(&key[28]); |
| + dctx->buflen = 0; |
| + dctx->sset = true; |
| } |
| -EXPORT_SYMBOL_GPL(poly1305_init_arch); |
| +EXPORT_SYMBOL(poly1305_init_arch); |
| |
| -static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen) |
| +static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx, |
| + const u8 *inp, unsigned int len) |
| { |
| - if (!dctx->sset) { |
| - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_integer_setkey(dctx->r, src); |
| - src += POLY1305_BLOCK_SIZE; |
| - srclen -= POLY1305_BLOCK_SIZE; |
| + unsigned int acc = 0; |
| + if (unlikely(!dctx->sset)) { |
| + if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) { |
| + poly1305_simd_init(&dctx->h, inp); |
| + inp += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + acc += POLY1305_BLOCK_SIZE; |
| dctx->rset = 1; |
| } |
| - if (srclen >= POLY1305_BLOCK_SIZE) { |
| - dctx->s[0] = get_unaligned_le32(src + 0); |
| - dctx->s[1] = get_unaligned_le32(src + 4); |
| - dctx->s[2] = get_unaligned_le32(src + 8); |
| - dctx->s[3] = get_unaligned_le32(src + 12); |
| - src += POLY1305_BLOCK_SIZE; |
| - srclen -= POLY1305_BLOCK_SIZE; |
| + if (len >= POLY1305_BLOCK_SIZE) { |
| + dctx->s[0] = get_unaligned_le32(&inp[0]); |
| + dctx->s[1] = get_unaligned_le32(&inp[4]); |
| + dctx->s[2] = get_unaligned_le32(&inp[8]); |
| + dctx->s[3] = get_unaligned_le32(&inp[12]); |
| + inp += POLY1305_BLOCK_SIZE; |
| + len -= POLY1305_BLOCK_SIZE; |
| + acc += POLY1305_BLOCK_SIZE; |
| dctx->sset = true; |
| } |
| } |
| - return srclen; |
| -} |
| - |
| -static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen) |
| -{ |
| - unsigned int datalen; |
| - |
| - if (unlikely(!dctx->sset)) { |
| - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); |
| - src += srclen - datalen; |
| - srclen = datalen; |
| - } |
| - if (srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src, |
| - srclen / POLY1305_BLOCK_SIZE, 1); |
| - srclen %= POLY1305_BLOCK_SIZE; |
| - } |
| - return srclen; |
| -} |
| - |
| -static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| - const u8 *src, unsigned int srclen) |
| -{ |
| - unsigned int blocks, datalen; |
| - |
| - if (unlikely(!dctx->sset)) { |
| - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); |
| - src += srclen - datalen; |
| - srclen = datalen; |
| - } |
| - |
| - if (IS_ENABLED(CONFIG_AS_AVX2) && |
| - static_branch_likely(&poly1305_use_avx2) && |
| - srclen >= POLY1305_BLOCK_SIZE * 4) { |
| - if (unlikely(dctx->rset < 4)) { |
| - if (dctx->rset < 2) { |
| - dctx->r[1] = dctx->r[0]; |
| - poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); |
| - } |
| - dctx->r[2] = dctx->r[1]; |
| - poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); |
| - dctx->r[3] = dctx->r[2]; |
| - poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); |
| - dctx->rset = 4; |
| - } |
| - blocks = srclen / (POLY1305_BLOCK_SIZE * 4); |
| - poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, |
| - dctx->r[1].r); |
| - src += POLY1305_BLOCK_SIZE * 4 * blocks; |
| - srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; |
| - } |
| - |
| - if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { |
| - if (unlikely(dctx->rset < 2)) { |
| - dctx->r[1] = dctx->r[0]; |
| - poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); |
| - dctx->rset = 2; |
| - } |
| - blocks = srclen / (POLY1305_BLOCK_SIZE * 2); |
| - poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, |
| - blocks, dctx->r[1].r); |
| - src += POLY1305_BLOCK_SIZE * 2 * blocks; |
| - srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; |
| - } |
| - if (srclen >= POLY1305_BLOCK_SIZE) { |
| - poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); |
| - srclen -= POLY1305_BLOCK_SIZE; |
| - } |
| - return srclen; |
| + return acc; |
| } |
| |
| void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| unsigned int srclen) |
| { |
| - unsigned int bytes; |
| + unsigned int bytes, used; |
| |
| if (unlikely(dctx->buflen)) { |
| bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); |
| @@ -295,31 +184,19 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| dctx->buflen += bytes; |
| |
| if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
| - if (static_branch_likely(&poly1305_use_simd) && |
| - likely(crypto_simd_usable())) { |
| - kernel_fpu_begin(); |
| - poly1305_simd_blocks(dctx, dctx->buf, |
| - POLY1305_BLOCK_SIZE); |
| - kernel_fpu_end(); |
| - } else { |
| - poly1305_scalar_blocks(dctx, dctx->buf, |
| - POLY1305_BLOCK_SIZE); |
| - } |
| + if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE))) |
| + poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); |
| dctx->buflen = 0; |
| } |
| } |
| |
| if (likely(srclen >= POLY1305_BLOCK_SIZE)) { |
| - if (static_branch_likely(&poly1305_use_simd) && |
| - likely(crypto_simd_usable())) { |
| - kernel_fpu_begin(); |
| - bytes = poly1305_simd_blocks(dctx, src, srclen); |
| - kernel_fpu_end(); |
| - } else { |
| - bytes = poly1305_scalar_blocks(dctx, src, srclen); |
| - } |
| - src += srclen - bytes; |
| - srclen = bytes; |
| + bytes = round_down(srclen, POLY1305_BLOCK_SIZE); |
| + srclen -= bytes; |
| + used = crypto_poly1305_setdctxkey(dctx, src, bytes); |
| + if (likely(bytes - used)) |
| + poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1); |
| + src += bytes; |
| } |
| |
| if (unlikely(srclen)) { |
| @@ -329,31 +206,17 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| } |
| EXPORT_SYMBOL(poly1305_update_arch); |
| |
| -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst) |
| +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| { |
| - __le32 digest[4]; |
| - u64 f = 0; |
| - |
| - if (unlikely(desc->buflen)) { |
| - desc->buf[desc->buflen++] = 1; |
| - memset(desc->buf + desc->buflen, 0, |
| - POLY1305_BLOCK_SIZE - desc->buflen); |
| - poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0); |
| + if (unlikely(dctx->buflen)) { |
| + dctx->buf[dctx->buflen++] = 1; |
| + memset(dctx->buf + dctx->buflen, 0, |
| + POLY1305_BLOCK_SIZE - dctx->buflen); |
| + poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| } |
| |
| - poly1305_integer_emit(&desc->h, digest); |
| - |
| - /* mac = (h + s) % (2^128) */ |
| - f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; |
| - put_unaligned_le32(f, dst + 0); |
| - f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; |
| - put_unaligned_le32(f, dst + 4); |
| - f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; |
| - put_unaligned_le32(f, dst + 8); |
| - f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; |
| - put_unaligned_le32(f, dst + 12); |
| - |
| - *desc = (struct poly1305_desc_ctx){}; |
| + poly1305_simd_emit(&dctx->h, dst, dctx->s); |
| + *dctx = (struct poly1305_desc_ctx){}; |
| } |
| EXPORT_SYMBOL(poly1305_final_arch); |
| |
| @@ -361,38 +224,34 @@ static int crypto_poly1305_init(struct shash_desc *desc) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| |
| - poly1305_core_init(&dctx->h); |
| - dctx->buflen = 0; |
| - dctx->rset = 0; |
| - dctx->sset = false; |
| - |
| + *dctx = (struct poly1305_desc_ctx){}; |
| return 0; |
| } |
| |
| -static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| +static int crypto_poly1305_update(struct shash_desc *desc, |
| + const u8 *src, unsigned int srclen) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| |
| - if (unlikely(!dctx->sset)) |
| - return -ENOKEY; |
| - |
| - poly1305_final_arch(dctx, dst); |
| + poly1305_update_arch(dctx, src, srclen); |
| return 0; |
| } |
| |
| -static int poly1305_simd_update(struct shash_desc *desc, |
| - const u8 *src, unsigned int srclen) |
| +static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) |
| { |
| struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
| |
| - poly1305_update_arch(dctx, src, srclen); |
| + if (unlikely(!dctx->sset)) |
| + return -ENOKEY; |
| + |
| + poly1305_final_arch(dctx, dst); |
| return 0; |
| } |
| |
| static struct shash_alg alg = { |
| .digestsize = POLY1305_DIGEST_SIZE, |
| .init = crypto_poly1305_init, |
| - .update = poly1305_simd_update, |
| + .update = crypto_poly1305_update, |
| .final = crypto_poly1305_final, |
| .descsize = sizeof(struct poly1305_desc_ctx), |
| .base = { |
| @@ -406,17 +265,19 @@ static struct shash_alg alg = { |
| |
| static int __init poly1305_simd_mod_init(void) |
| { |
| - if (!boot_cpu_has(X86_FEATURE_XMM2)) |
| - return 0; |
| - |
| - static_branch_enable(&poly1305_use_simd); |
| - |
| - if (IS_ENABLED(CONFIG_AS_AVX2) && |
| - boot_cpu_has(X86_FEATURE_AVX) && |
| + if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) && |
| + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) |
| + static_branch_enable(&poly1305_use_avx); |
| + if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && |
| boot_cpu_has(X86_FEATURE_AVX2) && |
| cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) |
| static_branch_enable(&poly1305_use_avx2); |
| - |
| + if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && |
| + boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && |
| + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && |
| + /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ |
| + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X) |
| + static_branch_enable(&poly1305_use_avx512); |
| return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0; |
| } |
| |
| @@ -430,7 +291,7 @@ module_init(poly1305_simd_mod_init); |
| module_exit(poly1305_simd_mod_exit); |
| |
| MODULE_LICENSE("GPL"); |
| -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| MODULE_DESCRIPTION("Poly1305 authenticator"); |
| MODULE_ALIAS_CRYPTO("poly1305"); |
| MODULE_ALIAS_CRYPTO("poly1305-simd"); |
| diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig |
| index 0b2c4fce26d9..14c032de276e 100644 |
| |
| |
| @@ -90,7 +90,7 @@ config CRYPTO_LIB_DES |
| config CRYPTO_LIB_POLY1305_RSIZE |
| int |
| default 2 if MIPS |
| - default 4 if X86_64 |
| + default 11 if X86_64 |
| default 9 if ARM || ARM64 |
| default 1 |
| |
| -- |
| 2.18.2 |
| |
| |
| From 35dd657ae5c12cfd9ff4322040b3a8f2ae079f96 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 5 Jan 2020 22:40:49 -0500 |
| Subject: [PATCH 044/100] crypto: {arm,arm64,mips}/poly1305 - remove redundant |
| non-reduction from emit |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit 31899908a0d248b030b4464425b86c717e0007d4 upstream. |
| |
| This appears to be some kind of copy and paste error, and is actually |
| dead code. |
| |
| Pre: f = 0 ⇒ (f >> 32) = 0 |
| f = (f >> 32) + le32_to_cpu(digest[0]); |
| Post: 0 ≤ f < 2³² |
| put_unaligned_le32(f, dst); |
| |
| Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0 |
| f = (f >> 32) + le32_to_cpu(digest[1]); |
| Post: 0 ≤ f < 2³² |
| put_unaligned_le32(f, dst + 4); |
| |
| Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0 |
| f = (f >> 32) + le32_to_cpu(digest[2]); |
| Post: 0 ≤ f < 2³² |
| put_unaligned_le32(f, dst + 8); |
| |
| Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0 |
| f = (f >> 32) + le32_to_cpu(digest[3]); |
| Post: 0 ≤ f < 2³² |
| put_unaligned_le32(f, dst + 12); |
| |
| Therefore this sequence is redundant. And Andy's code appears to handle |
| misalignment acceptably. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Tested-by: Ard Biesheuvel <ardb@kernel.org> |
| Reviewed-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/poly1305-glue.c | 18 ++---------------- |
| arch/arm64/crypto/poly1305-glue.c | 18 ++---------------- |
| arch/mips/crypto/poly1305-glue.c | 18 ++---------------- |
| 3 files changed, 6 insertions(+), 48 deletions(-) |
| |
| diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c |
| index abe3f2d587dc..ceec04ec2f40 100644 |
| |
| |
| @@ -20,7 +20,7 @@ |
| |
| void poly1305_init_arm(void *state, const u8 *key); |
| void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); |
| -void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); |
| +void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); |
| |
| void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) |
| { |
| @@ -179,9 +179,6 @@ EXPORT_SYMBOL(poly1305_update_arch); |
| |
| void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| { |
| - __le32 digest[4]; |
| - u64 f = 0; |
| - |
| if (unlikely(dctx->buflen)) { |
| dctx->buf[dctx->buflen++] = 1; |
| memset(dctx->buf + dctx->buflen, 0, |
| @@ -189,18 +186,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| } |
| |
| - poly1305_emit_arm(&dctx->h, digest, dctx->s); |
| - |
| - /* mac = (h + s) % (2^128) */ |
| - f = (f >> 32) + le32_to_cpu(digest[0]); |
| - put_unaligned_le32(f, dst); |
| - f = (f >> 32) + le32_to_cpu(digest[1]); |
| - put_unaligned_le32(f, dst + 4); |
| - f = (f >> 32) + le32_to_cpu(digest[2]); |
| - put_unaligned_le32(f, dst + 8); |
| - f = (f >> 32) + le32_to_cpu(digest[3]); |
| - put_unaligned_le32(f, dst + 12); |
| - |
| + poly1305_emit_arm(&dctx->h, dst, dctx->s); |
| *dctx = (struct poly1305_desc_ctx){}; |
| } |
| EXPORT_SYMBOL(poly1305_final_arch); |
| diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c |
| index 83a2338a8826..e97b092f56b8 100644 |
| |
| |
| @@ -21,7 +21,7 @@ |
| asmlinkage void poly1305_init_arm64(void *state, const u8 *key); |
| asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); |
| asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); |
| -asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); |
| +asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); |
| |
| static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
| |
| @@ -162,9 +162,6 @@ EXPORT_SYMBOL(poly1305_update_arch); |
| |
| void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| { |
| - __le32 digest[4]; |
| - u64 f = 0; |
| - |
| if (unlikely(dctx->buflen)) { |
| dctx->buf[dctx->buflen++] = 1; |
| memset(dctx->buf + dctx->buflen, 0, |
| @@ -172,18 +169,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| } |
| |
| - poly1305_emit(&dctx->h, digest, dctx->s); |
| - |
| - /* mac = (h + s) % (2^128) */ |
| - f = (f >> 32) + le32_to_cpu(digest[0]); |
| - put_unaligned_le32(f, dst); |
| - f = (f >> 32) + le32_to_cpu(digest[1]); |
| - put_unaligned_le32(f, dst + 4); |
| - f = (f >> 32) + le32_to_cpu(digest[2]); |
| - put_unaligned_le32(f, dst + 8); |
| - f = (f >> 32) + le32_to_cpu(digest[3]); |
| - put_unaligned_le32(f, dst + 12); |
| - |
| + poly1305_emit(&dctx->h, dst, dctx->s); |
| *dctx = (struct poly1305_desc_ctx){}; |
| } |
| EXPORT_SYMBOL(poly1305_final_arch); |
| diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c |
| index b37d29cf5d0a..fc881b46d911 100644 |
| |
| |
| @@ -15,7 +15,7 @@ |
| |
| asmlinkage void poly1305_init_mips(void *state, const u8 *key); |
| asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); |
| -asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); |
| +asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); |
| |
| void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) |
| { |
| @@ -134,9 +134,6 @@ EXPORT_SYMBOL(poly1305_update_arch); |
| |
| void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| { |
| - __le32 digest[4]; |
| - u64 f = 0; |
| - |
| if (unlikely(dctx->buflen)) { |
| dctx->buf[dctx->buflen++] = 1; |
| memset(dctx->buf + dctx->buflen, 0, |
| @@ -144,18 +141,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
| poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); |
| } |
| |
| - poly1305_emit_mips(&dctx->h, digest, dctx->s); |
| - |
| - /* mac = (h + s) % (2^128) */ |
| - f = (f >> 32) + le32_to_cpu(digest[0]); |
| - put_unaligned_le32(f, dst); |
| - f = (f >> 32) + le32_to_cpu(digest[1]); |
| - put_unaligned_le32(f, dst + 4); |
| - f = (f >> 32) + le32_to_cpu(digest[2]); |
| - put_unaligned_le32(f, dst + 8); |
| - f = (f >> 32) + le32_to_cpu(digest[3]); |
| - put_unaligned_le32(f, dst + 12); |
| - |
| + poly1305_emit_mips(&dctx->h, dst, dctx->s); |
| *dctx = (struct poly1305_desc_ctx){}; |
| } |
| EXPORT_SYMBOL(poly1305_final_arch); |
| -- |
| 2.18.2 |
| |
| |
| From 10debe0a7c7c0cfbd09cd459ffed05c68f21a31b Mon Sep 17 00:00:00 2001 |
| From: Herbert Xu <herbert@gondor.apana.org.au> |
| Date: Wed, 8 Jan 2020 12:37:35 +0800 |
| Subject: [PATCH 045/100] crypto: curve25519 - Fix selftest build error |
| |
| commit a8bdf2c42ee4d1ee42af1f3601f85de94e70a421 upstream. |
| |
| If CRYPTO_CURVE25519 is y, CRYPTO_LIB_CURVE25519_GENERIC will be |
| y, but CRYPTO_LIB_CURVE25519 may be set to m, this causes build |
| errors: |
| |
| lib/crypto/curve25519-selftest.o: In function `curve25519': |
| curve25519-selftest.c:(.text.unlikely+0xc): undefined reference to `curve25519_arch' |
| lib/crypto/curve25519-selftest.o: In function `curve25519_selftest': |
| curve25519-selftest.c:(.init.text+0x17e): undefined reference to `curve25519_base_arch' |
| |
| This is because the curve25519 self-test code is being controlled |
| by the GENERIC option rather than the overall CURVE25519 option, |
| as is the case with blake2s. To recap, the GENERIC and ARCH options |
| for CURVE25519 are internal only and selected by users such as |
| the Crypto API, or the externally visible CURVE25519 option which |
| in turn is selected by wireguard. The self-test is specific to the |
| the external CURVE25519 option and should not be enabled by the |
| Crypto API. |
| |
| This patch fixes this by splitting the GENERIC module from the |
| CURVE25519 module with the latter now containing just the self-test. |
| |
| Reported-by: Hulk Robot <hulkci@huawei.com> |
| Fixes: aa127963f1ca ("crypto: lib/curve25519 - re-add selftests") |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| lib/crypto/Makefile | 9 ++++++--- |
| lib/crypto/curve25519-generic.c | 24 ++++++++++++++++++++++++ |
| lib/crypto/curve25519.c | 7 ------- |
| 3 files changed, 30 insertions(+), 10 deletions(-) |
| create mode 100644 lib/crypto/curve25519-generic.c |
| |
| diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile |
| index 6ecaf83a5a9a..3a435629d9ce 100644 |
| |
| |
| @@ -19,9 +19,12 @@ libblake2s-y += blake2s.o |
| obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o |
| libchacha20poly1305-y += chacha20poly1305.o |
| |
| -obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o |
| -libcurve25519-y := curve25519-fiat32.o |
| -libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o |
| +obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o |
| +libcurve25519-generic-y := curve25519-fiat32.o |
| +libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o |
| +libcurve25519-generic-y += curve25519-generic.o |
| + |
| +obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o |
| libcurve25519-y += curve25519.o |
| |
| obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o |
| diff --git a/lib/crypto/curve25519-generic.c b/lib/crypto/curve25519-generic.c |
| new file mode 100644 |
| index 000000000000..de7c99172fa2 |
| |
| |
| @@ -0,0 +1,24 @@ |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This is an implementation of the Curve25519 ECDH algorithm, using either |
| + * a 32-bit implementation or a 64-bit implementation with 128-bit integers, |
| + * depending on what is supported by the target compiler. |
| + * |
| + * Information: https://cr.yp.to/ecdh.html |
| + */ |
| + |
| +#include <crypto/curve25519.h> |
| +#include <linux/module.h> |
| + |
| +const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; |
| +const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; |
| + |
| +EXPORT_SYMBOL(curve25519_null_point); |
| +EXPORT_SYMBOL(curve25519_base_point); |
| +EXPORT_SYMBOL(curve25519_generic); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_DESCRIPTION("Curve25519 scalar multiplication"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c |
| index c03ccdb99434..288a62cd29b2 100644 |
| |
| |
| @@ -15,13 +15,6 @@ |
| |
| bool curve25519_selftest(void); |
| |
| -const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; |
| -const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; |
| - |
| -EXPORT_SYMBOL(curve25519_null_point); |
| -EXPORT_SYMBOL(curve25519_base_point); |
| -EXPORT_SYMBOL(curve25519_generic); |
| - |
| static int __init mod_init(void) |
| { |
| if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && |
| -- |
| 2.18.2 |
| |
| |
| From e82348f61b5731bae5b1d97cb6f3aefd4a6d6b95 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 16 Jan 2020 18:23:55 +0100 |
| Subject: [PATCH 046/100] crypto: x86/poly1305 - fix .gitignore typo |
| |
| commit 1f6868995326cc82102049e349d8dbd116bdb656 upstream. |
| |
| Admist the kbuild robot induced changes, the .gitignore file for the |
| generated file wasn't updated with the non-clashing filename. This |
| commit adjusts that. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/.gitignore | 2 +- |
| 1 file changed, 1 insertion(+), 1 deletion(-) |
| |
| diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore |
| index c406ea6571fa..30be0400a439 100644 |
| |
| |
| @@ -1 +1 @@ |
| -poly1305-x86_64.S |
| +poly1305-x86_64-cryptogams.S |
| -- |
| 2.18.2 |
| |
| |
| From 50ab09a1e41c36049b0b4d194ac3998b6e98bf09 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 16 Jan 2020 21:26:34 +0100 |
| Subject: [PATCH 047/100] crypto: chacha20poly1305 - add back missing test |
| vectors and test chunking |
| |
| commit 72c7943792c9e7788ddd182337bcf8f650cf56f5 upstream. |
| |
| When this was originally ported, the 12-byte nonce vectors were left out |
| to keep things simple. I agree that we don't need nor want a library |
| interface for 12-byte nonces. But these test vectors were specially |
| crafted to look at issues in the underlying primitives and related |
| interactions. Therefore, we actually want to keep around all of the |
| test vectors, and simply have a helper function to test them with. |
| |
| Secondly, the sglist-based chunking code in the library interface is |
| rather complicated, so this adds a developer-only test for ensuring that |
| all the book keeping is correct, across a wide array of possibilities. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| lib/crypto/chacha20poly1305-selftest.c | 1712 +++++++++++++++++++++++- |
| 1 file changed, 1698 insertions(+), 14 deletions(-) |
| |
| diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c |
| index 465de46dbdef..c391a91364e9 100644 |
| |
| |
| @@ -4,6 +4,7 @@ |
| */ |
| |
| #include <crypto/chacha20poly1305.h> |
| +#include <crypto/chacha.h> |
| #include <crypto/poly1305.h> |
| |
| #include <asm/unaligned.h> |
| @@ -1926,6 +1927,1104 @@ static const u8 enc_key012[] __initconst = { |
| 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 |
| }; |
| |
| +/* wycheproof - rfc7539 */ |
| +static const u8 enc_input013[] __initconst = { |
| + 0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, |
| + 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c, |
| + 0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, |
| + 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73, |
| + 0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, |
| + 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63, |
| + 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66, |
| + 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f, |
| + 0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20, |
| + 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20, |
| + 0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75, |
| + 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73, |
| + 0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, |
| + 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69, |
| + 0x74, 0x2e |
| +}; |
| +static const u8 enc_output013[] __initconst = { |
| + 0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb, |
| + 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2, |
| + 0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe, |
| + 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6, |
| + 0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12, |
| + 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b, |
| + 0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29, |
| + 0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36, |
| + 0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c, |
| + 0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58, |
| + 0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94, |
| + 0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc, |
| + 0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d, |
| + 0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b, |
| + 0x61, 0x16, 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, |
| + 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, |
| + 0x06, 0x91 |
| +}; |
| +static const u8 enc_assoc013[] __initconst = { |
| + 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, |
| + 0xc4, 0xc5, 0xc6, 0xc7 |
| +}; |
| +static const u8 enc_nonce013[] __initconst = { |
| + 0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43, |
| + 0x44, 0x45, 0x46, 0x47 |
| +}; |
| +static const u8 enc_key013[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input014[] __initconst = { }; |
| +static const u8 enc_output014[] __initconst = { |
| + 0x76, 0xac, 0xb3, 0x42, 0xcf, 0x31, 0x66, 0xa5, |
| + 0xb6, 0x3c, 0x0c, 0x0e, 0xa1, 0x38, 0x3c, 0x8d |
| +}; |
| +static const u8 enc_assoc014[] __initconst = { }; |
| +static const u8 enc_nonce014[] __initconst = { |
| + 0x4d, 0xa5, 0xbf, 0x8d, 0xfd, 0x58, 0x52, 0xc1, |
| + 0xea, 0x12, 0x37, 0x9d |
| +}; |
| +static const u8 enc_key014[] __initconst = { |
| + 0x80, 0xba, 0x31, 0x92, 0xc8, 0x03, 0xce, 0x96, |
| + 0x5e, 0xa3, 0x71, 0xd5, 0xff, 0x07, 0x3c, 0xf0, |
| + 0xf4, 0x3b, 0x6a, 0x2a, 0xb5, 0x76, 0xb2, 0x08, |
| + 0x42, 0x6e, 0x11, 0x40, 0x9c, 0x09, 0xb9, 0xb0 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input015[] __initconst = { }; |
| +static const u8 enc_output015[] __initconst = { |
| + 0x90, 0x6f, 0xa6, 0x28, 0x4b, 0x52, 0xf8, 0x7b, |
| + 0x73, 0x59, 0xcb, 0xaa, 0x75, 0x63, 0xc7, 0x09 |
| +}; |
| +static const u8 enc_assoc015[] __initconst = { |
| + 0xbd, 0x50, 0x67, 0x64, 0xf2, 0xd2, 0xc4, 0x10 |
| +}; |
| +static const u8 enc_nonce015[] __initconst = { |
| + 0xa9, 0x2e, 0xf0, 0xac, 0x99, 0x1d, 0xd5, 0x16, |
| + 0xa3, 0xc6, 0xf6, 0x89 |
| +}; |
| +static const u8 enc_key015[] __initconst = { |
| + 0x7a, 0x4c, 0xd7, 0x59, 0x17, 0x2e, 0x02, 0xeb, |
| + 0x20, 0x4d, 0xb2, 0xc3, 0xf5, 0xc7, 0x46, 0x22, |
| + 0x7d, 0xf5, 0x84, 0xfc, 0x13, 0x45, 0x19, 0x63, |
| + 0x91, 0xdb, 0xb9, 0x57, 0x7a, 0x25, 0x07, 0x42 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input016[] __initconst = { |
| + 0x2a |
| +}; |
| +static const u8 enc_output016[] __initconst = { |
| + 0x3a, 0xca, 0xc2, 0x7d, 0xec, 0x09, 0x68, 0x80, |
| + 0x1e, 0x9f, 0x6e, 0xde, 0xd6, 0x9d, 0x80, 0x75, |
| + 0x22 |
| +}; |
| +static const u8 enc_assoc016[] __initconst = { }; |
| +static const u8 enc_nonce016[] __initconst = { |
| + 0x99, 0xe2, 0x3e, 0xc4, 0x89, 0x85, 0xbc, 0xcd, |
| + 0xee, 0xab, 0x60, 0xf1 |
| +}; |
| +static const u8 enc_key016[] __initconst = { |
| + 0xcc, 0x56, 0xb6, 0x80, 0x55, 0x2e, 0xb7, 0x50, |
| + 0x08, 0xf5, 0x48, 0x4b, 0x4c, 0xb8, 0x03, 0xfa, |
| + 0x50, 0x63, 0xeb, 0xd6, 0xea, 0xb9, 0x1f, 0x6a, |
| + 0xb6, 0xae, 0xf4, 0x91, 0x6a, 0x76, 0x62, 0x73 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input017[] __initconst = { |
| + 0x51 |
| +}; |
| +static const u8 enc_output017[] __initconst = { |
| + 0xc4, 0x16, 0x83, 0x10, 0xca, 0x45, 0xb1, 0xf7, |
| + 0xc6, 0x6c, 0xad, 0x4e, 0x99, 0xe4, 0x3f, 0x72, |
| + 0xb9 |
| +}; |
| +static const u8 enc_assoc017[] __initconst = { |
| + 0x91, 0xca, 0x6c, 0x59, 0x2c, 0xbc, 0xca, 0x53 |
| +}; |
| +static const u8 enc_nonce017[] __initconst = { |
| + 0xab, 0x0d, 0xca, 0x71, 0x6e, 0xe0, 0x51, 0xd2, |
| + 0x78, 0x2f, 0x44, 0x03 |
| +}; |
| +static const u8 enc_key017[] __initconst = { |
| + 0x46, 0xf0, 0x25, 0x49, 0x65, 0xf7, 0x69, 0xd5, |
| + 0x2b, 0xdb, 0x4a, 0x70, 0xb4, 0x43, 0x19, 0x9f, |
| + 0x8e, 0xf2, 0x07, 0x52, 0x0d, 0x12, 0x20, 0xc5, |
| + 0x5e, 0x4b, 0x70, 0xf0, 0xfd, 0xa6, 0x20, 0xee |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input018[] __initconst = { |
| + 0x5c, 0x60 |
| +}; |
| +static const u8 enc_output018[] __initconst = { |
| + 0x4d, 0x13, 0x91, 0xe8, 0xb6, 0x1e, 0xfb, 0x39, |
| + 0xc1, 0x22, 0x19, 0x54, 0x53, 0x07, 0x7b, 0x22, |
| + 0xe5, 0xe2 |
| +}; |
| +static const u8 enc_assoc018[] __initconst = { }; |
| +static const u8 enc_nonce018[] __initconst = { |
| + 0x46, 0x1a, 0xf1, 0x22, 0xe9, 0xf2, 0xe0, 0x34, |
| + 0x7e, 0x03, 0xf2, 0xdb |
| +}; |
| +static const u8 enc_key018[] __initconst = { |
| + 0x2f, 0x7f, 0x7e, 0x4f, 0x59, 0x2b, 0xb3, 0x89, |
| + 0x19, 0x49, 0x89, 0x74, 0x35, 0x07, 0xbf, 0x3e, |
| + 0xe9, 0xcb, 0xde, 0x17, 0x86, 0xb6, 0x69, 0x5f, |
| + 0xe6, 0xc0, 0x25, 0xfd, 0x9b, 0xa4, 0xc1, 0x00 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input019[] __initconst = { |
| + 0xdd, 0xf2 |
| +}; |
| +static const u8 enc_output019[] __initconst = { |
| + 0xb6, 0x0d, 0xea, 0xd0, 0xfd, 0x46, 0x97, 0xec, |
| + 0x2e, 0x55, 0x58, 0x23, 0x77, 0x19, 0xd0, 0x24, |
| + 0x37, 0xa2 |
| +}; |
| +static const u8 enc_assoc019[] __initconst = { |
| + 0x88, 0x36, 0x4f, 0xc8, 0x06, 0x05, 0x18, 0xbf |
| +}; |
| +static const u8 enc_nonce019[] __initconst = { |
| + 0x61, 0x54, 0x6b, 0xa5, 0xf1, 0x72, 0x05, 0x90, |
| + 0xb6, 0x04, 0x0a, 0xc6 |
| +}; |
| +static const u8 enc_key019[] __initconst = { |
| + 0xc8, 0x83, 0x3d, 0xce, 0x5e, 0xa9, 0xf2, 0x48, |
| + 0xaa, 0x20, 0x30, 0xea, 0xcf, 0xe7, 0x2b, 0xff, |
| + 0xe6, 0x9a, 0x62, 0x0c, 0xaf, 0x79, 0x33, 0x44, |
| + 0xe5, 0x71, 0x8f, 0xe0, 0xd7, 0xab, 0x1a, 0x58 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input020[] __initconst = { |
| + 0xab, 0x85, 0xe9, 0xc1, 0x57, 0x17, 0x31 |
| +}; |
| +static const u8 enc_output020[] __initconst = { |
| + 0x5d, 0xfe, 0x34, 0x40, 0xdb, 0xb3, 0xc3, 0xed, |
| + 0x7a, 0x43, 0x4e, 0x26, 0x02, 0xd3, 0x94, 0x28, |
| + 0x1e, 0x0a, 0xfa, 0x9f, 0xb7, 0xaa, 0x42 |
| +}; |
| +static const u8 enc_assoc020[] __initconst = { }; |
| +static const u8 enc_nonce020[] __initconst = { |
| + 0x3c, 0x4e, 0x65, 0x4d, 0x66, 0x3f, 0xa4, 0x59, |
| + 0x6d, 0xc5, 0x5b, 0xb7 |
| +}; |
| +static const u8 enc_key020[] __initconst = { |
| + 0x55, 0x56, 0x81, 0x58, 0xd3, 0xa6, 0x48, 0x3f, |
| + 0x1f, 0x70, 0x21, 0xea, 0xb6, 0x9b, 0x70, 0x3f, |
| + 0x61, 0x42, 0x51, 0xca, 0xdc, 0x1a, 0xf5, 0xd3, |
| + 0x4a, 0x37, 0x4f, 0xdb, 0xfc, 0x5a, 0xda, 0xc7 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input021[] __initconst = { |
| + 0x4e, 0xe5, 0xcd, 0xa2, 0x0d, 0x42, 0x90 |
| +}; |
| +static const u8 enc_output021[] __initconst = { |
| + 0x4b, 0xd4, 0x72, 0x12, 0x94, 0x1c, 0xe3, 0x18, |
| + 0x5f, 0x14, 0x08, 0xee, 0x7f, 0xbf, 0x18, 0xf5, |
| + 0xab, 0xad, 0x6e, 0x22, 0x53, 0xa1, 0xba |
| +}; |
| +static const u8 enc_assoc021[] __initconst = { |
| + 0x84, 0xe4, 0x6b, 0xe8, 0xc0, 0x91, 0x90, 0x53 |
| +}; |
| +static const u8 enc_nonce021[] __initconst = { |
| + 0x58, 0x38, 0x93, 0x75, 0xc6, 0x9e, 0xe3, 0x98, |
| + 0xde, 0x94, 0x83, 0x96 |
| +}; |
| +static const u8 enc_key021[] __initconst = { |
| + 0xe3, 0xc0, 0x9e, 0x7f, 0xab, 0x1a, 0xef, 0xb5, |
| + 0x16, 0xda, 0x6a, 0x33, 0x02, 0x2a, 0x1d, 0xd4, |
| + 0xeb, 0x27, 0x2c, 0x80, 0xd5, 0x40, 0xc5, 0xda, |
| + 0x52, 0xa7, 0x30, 0xf3, 0x4d, 0x84, 0x0d, 0x7f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input022[] __initconst = { |
| + 0xbe, 0x33, 0x08, 0xf7, 0x2a, 0x2c, 0x6a, 0xed |
| +}; |
| +static const u8 enc_output022[] __initconst = { |
| + 0x8e, 0x94, 0x39, 0xa5, 0x6e, 0xee, 0xc8, 0x17, |
| + 0xfb, 0xe8, 0xa6, 0xed, 0x8f, 0xab, 0xb1, 0x93, |
| + 0x75, 0x39, 0xdd, 0x6c, 0x00, 0xe9, 0x00, 0x21 |
| +}; |
| +static const u8 enc_assoc022[] __initconst = { }; |
| +static const u8 enc_nonce022[] __initconst = { |
| + 0x4f, 0x07, 0xaf, 0xed, 0xfd, 0xc3, 0xb6, 0xc2, |
| + 0x36, 0x18, 0x23, 0xd3 |
| +}; |
| +static const u8 enc_key022[] __initconst = { |
| + 0x51, 0xe4, 0xbf, 0x2b, 0xad, 0x92, 0xb7, 0xaf, |
| + 0xf1, 0xa4, 0xbc, 0x05, 0x55, 0x0b, 0xa8, 0x1d, |
| + 0xf4, 0xb9, 0x6f, 0xab, 0xf4, 0x1c, 0x12, 0xc7, |
| + 0xb0, 0x0e, 0x60, 0xe4, 0x8d, 0xb7, 0xe1, 0x52 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input023[] __initconst = { |
| + 0xa4, 0xc9, 0xc2, 0x80, 0x1b, 0x71, 0xf7, 0xdf |
| +}; |
| +static const u8 enc_output023[] __initconst = { |
| + 0xb9, 0xb9, 0x10, 0x43, 0x3a, 0xf0, 0x52, 0xb0, |
| + 0x45, 0x30, 0xf5, 0x1a, 0xee, 0xe0, 0x24, 0xe0, |
| + 0xa4, 0x45, 0xa6, 0x32, 0x8f, 0xa6, 0x7a, 0x18 |
| +}; |
| +static const u8 enc_assoc023[] __initconst = { |
| + 0x66, 0xc0, 0xae, 0x70, 0x07, 0x6c, 0xb1, 0x4d |
| +}; |
| +static const u8 enc_nonce023[] __initconst = { |
| + 0xb4, 0xea, 0x66, 0x6e, 0xe1, 0x19, 0x56, 0x33, |
| + 0x66, 0x48, 0x4a, 0x78 |
| +}; |
| +static const u8 enc_key023[] __initconst = { |
| + 0x11, 0x31, 0xc1, 0x41, 0x85, 0x77, 0xa0, 0x54, |
| + 0xde, 0x7a, 0x4a, 0xc5, 0x51, 0x95, 0x0f, 0x1a, |
| + 0x05, 0x3f, 0x9a, 0xe4, 0x6e, 0x5b, 0x75, 0xfe, |
| + 0x4a, 0xbd, 0x56, 0x08, 0xd7, 0xcd, 0xda, 0xdd |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input024[] __initconst = { |
| + 0x42, 0xba, 0xae, 0x59, 0x78, 0xfe, 0xaf, 0x5c, |
| + 0x36, 0x8d, 0x14, 0xe0 |
| +}; |
| +static const u8 enc_output024[] __initconst = { |
| + 0xff, 0x7d, 0xc2, 0x03, 0xb2, 0x6c, 0x46, 0x7a, |
| + 0x6b, 0x50, 0xdb, 0x33, 0x57, 0x8c, 0x0f, 0x27, |
| + 0x58, 0xc2, 0xe1, 0x4e, 0x36, 0xd4, 0xfc, 0x10, |
| + 0x6d, 0xcb, 0x29, 0xb4 |
| +}; |
| +static const u8 enc_assoc024[] __initconst = { }; |
| +static const u8 enc_nonce024[] __initconst = { |
| + 0x9a, 0x59, 0xfc, 0xe2, 0x6d, 0xf0, 0x00, 0x5e, |
| + 0x07, 0x53, 0x86, 0x56 |
| +}; |
| +static const u8 enc_key024[] __initconst = { |
| + 0x99, 0xb6, 0x2b, 0xd5, 0xaf, 0xbe, 0x3f, 0xb0, |
| + 0x15, 0xbd, 0xe9, 0x3f, 0x0a, 0xbf, 0x48, 0x39, |
| + 0x57, 0xa1, 0xc3, 0xeb, 0x3c, 0xa5, 0x9c, 0xb5, |
| + 0x0b, 0x39, 0xf7, 0xf8, 0xa9, 0xcc, 0x51, 0xbe |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input025[] __initconst = { |
| + 0xfd, 0xc8, 0x5b, 0x94, 0xa4, 0xb2, 0xa6, 0xb7, |
| + 0x59, 0xb1, 0xa0, 0xda |
| +}; |
| +static const u8 enc_output025[] __initconst = { |
| + 0x9f, 0x88, 0x16, 0xde, 0x09, 0x94, 0xe9, 0x38, |
| + 0xd9, 0xe5, 0x3f, 0x95, 0xd0, 0x86, 0xfc, 0x6c, |
| + 0x9d, 0x8f, 0xa9, 0x15, 0xfd, 0x84, 0x23, 0xa7, |
| + 0xcf, 0x05, 0x07, 0x2f |
| +}; |
| +static const u8 enc_assoc025[] __initconst = { |
| + 0xa5, 0x06, 0xe1, 0xa5, 0xc6, 0x90, 0x93, 0xf9 |
| +}; |
| +static const u8 enc_nonce025[] __initconst = { |
| + 0x58, 0xdb, 0xd4, 0xad, 0x2c, 0x4a, 0xd3, 0x5d, |
| + 0xd9, 0x06, 0xe9, 0xce |
| +}; |
| +static const u8 enc_key025[] __initconst = { |
| + 0x85, 0xf3, 0x5b, 0x62, 0x82, 0xcf, 0xf4, 0x40, |
| + 0xbc, 0x10, 0x20, 0xc8, 0x13, 0x6f, 0xf2, 0x70, |
| + 0x31, 0x11, 0x0f, 0xa6, 0x3e, 0xc1, 0x6f, 0x1e, |
| + 0x82, 0x51, 0x18, 0xb0, 0x06, 0xb9, 0x12, 0x57 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input026[] __initconst = { |
| + 0x51, 0xf8, 0xc1, 0xf7, 0x31, 0xea, 0x14, 0xac, |
| + 0xdb, 0x21, 0x0a, 0x6d, 0x97, 0x3e, 0x07 |
| +}; |
| +static const u8 enc_output026[] __initconst = { |
| + 0x0b, 0x29, 0x63, 0x8e, 0x1f, 0xbd, 0xd6, 0xdf, |
| + 0x53, 0x97, 0x0b, 0xe2, 0x21, 0x00, 0x42, 0x2a, |
| + 0x91, 0x34, 0x08, 0x7d, 0x67, 0xa4, 0x6e, 0x79, |
| + 0x17, 0x8d, 0x0a, 0x93, 0xf5, 0xe1, 0xd2 |
| +}; |
| +static const u8 enc_assoc026[] __initconst = { }; |
| +static const u8 enc_nonce026[] __initconst = { |
| + 0x68, 0xab, 0x7f, 0xdb, 0xf6, 0x19, 0x01, 0xda, |
| + 0xd4, 0x61, 0xd2, 0x3c |
| +}; |
| +static const u8 enc_key026[] __initconst = { |
| + 0x67, 0x11, 0x96, 0x27, 0xbd, 0x98, 0x8e, 0xda, |
| + 0x90, 0x62, 0x19, 0xe0, 0x8c, 0x0d, 0x0d, 0x77, |
| + 0x9a, 0x07, 0xd2, 0x08, 0xce, 0x8a, 0x4f, 0xe0, |
| + 0x70, 0x9a, 0xf7, 0x55, 0xee, 0xec, 0x6d, 0xcb |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input027[] __initconst = { |
| + 0x97, 0x46, 0x9d, 0xa6, 0x67, 0xd6, 0x11, 0x0f, |
| + 0x9c, 0xbd, 0xa1, 0xd1, 0xa2, 0x06, 0x73 |
| +}; |
| +static const u8 enc_output027[] __initconst = { |
| + 0x32, 0xdb, 0x66, 0xc4, 0xa3, 0x81, 0x9d, 0x81, |
| + 0x55, 0x74, 0x55, 0xe5, 0x98, 0x0f, 0xed, 0xfe, |
| + 0xae, 0x30, 0xde, 0xc9, 0x4e, 0x6a, 0xd3, 0xa9, |
| + 0xee, 0xa0, 0x6a, 0x0d, 0x70, 0x39, 0x17 |
| +}; |
| +static const u8 enc_assoc027[] __initconst = { |
| + 0x64, 0x53, 0xa5, 0x33, 0x84, 0x63, 0x22, 0x12 |
| +}; |
| +static const u8 enc_nonce027[] __initconst = { |
| + 0xd9, 0x5b, 0x32, 0x43, 0xaf, 0xae, 0xf7, 0x14, |
| + 0xc5, 0x03, 0x5b, 0x6a |
| +}; |
| +static const u8 enc_key027[] __initconst = { |
| + 0xe6, 0xf1, 0x11, 0x8d, 0x41, 0xe4, 0xb4, 0x3f, |
| + 0xb5, 0x82, 0x21, 0xb7, 0xed, 0x79, 0x67, 0x38, |
| + 0x34, 0xe0, 0xd8, 0xac, 0x5c, 0x4f, 0xa6, 0x0b, |
| + 0xbc, 0x8b, 0xc4, 0x89, 0x3a, 0x58, 0x89, 0x4d |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input028[] __initconst = { |
| + 0x54, 0x9b, 0x36, 0x5a, 0xf9, 0x13, 0xf3, 0xb0, |
| + 0x81, 0x13, 0x1c, 0xcb, 0x6b, 0x82, 0x55, 0x88 |
| +}; |
| +static const u8 enc_output028[] __initconst = { |
| + 0xe9, 0x11, 0x0e, 0x9f, 0x56, 0xab, 0x3c, 0xa4, |
| + 0x83, 0x50, 0x0c, 0xea, 0xba, 0xb6, 0x7a, 0x13, |
| + 0x83, 0x6c, 0xca, 0xbf, 0x15, 0xa6, 0xa2, 0x2a, |
| + 0x51, 0xc1, 0x07, 0x1c, 0xfa, 0x68, 0xfa, 0x0c |
| +}; |
| +static const u8 enc_assoc028[] __initconst = { }; |
| +static const u8 enc_nonce028[] __initconst = { |
| + 0x2f, 0xcb, 0x1b, 0x38, 0xa9, 0x9e, 0x71, 0xb8, |
| + 0x47, 0x40, 0xad, 0x9b |
| +}; |
| +static const u8 enc_key028[] __initconst = { |
| + 0x59, 0xd4, 0xea, 0xfb, 0x4d, 0xe0, 0xcf, 0xc7, |
| + 0xd3, 0xdb, 0x99, 0xa8, 0xf5, 0x4b, 0x15, 0xd7, |
| + 0xb3, 0x9f, 0x0a, 0xcc, 0x8d, 0xa6, 0x97, 0x63, |
| + 0xb0, 0x19, 0xc1, 0x69, 0x9f, 0x87, 0x67, 0x4a |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input029[] __initconst = { |
| + 0x55, 0xa4, 0x65, 0x64, 0x4f, 0x5b, 0x65, 0x09, |
| + 0x28, 0xcb, 0xee, 0x7c, 0x06, 0x32, 0x14, 0xd6 |
| +}; |
| +static const u8 enc_output029[] __initconst = { |
| + 0xe4, 0xb1, 0x13, 0xcb, 0x77, 0x59, 0x45, 0xf3, |
| + 0xd3, 0xa8, 0xae, 0x9e, 0xc1, 0x41, 0xc0, 0x0c, |
| + 0x7c, 0x43, 0xf1, 0x6c, 0xe0, 0x96, 0xd0, 0xdc, |
| + 0x27, 0xc9, 0x58, 0x49, 0xdc, 0x38, 0x3b, 0x7d |
| +}; |
| +static const u8 enc_assoc029[] __initconst = { |
| + 0x03, 0x45, 0x85, 0x62, 0x1a, 0xf8, 0xd7, 0xff |
| +}; |
| +static const u8 enc_nonce029[] __initconst = { |
| + 0x11, 0x8a, 0x69, 0x64, 0xc2, 0xd3, 0xe3, 0x80, |
| + 0x07, 0x1f, 0x52, 0x66 |
| +}; |
| +static const u8 enc_key029[] __initconst = { |
| + 0xb9, 0x07, 0xa4, 0x50, 0x75, 0x51, 0x3f, 0xe8, |
| + 0xa8, 0x01, 0x9e, 0xde, 0xe3, 0xf2, 0x59, 0x14, |
| + 0x87, 0xb2, 0xa0, 0x30, 0xb0, 0x3c, 0x6e, 0x1d, |
| + 0x77, 0x1c, 0x86, 0x25, 0x71, 0xd2, 0xea, 0x1e |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input030[] __initconst = { |
| + 0x3f, 0xf1, 0x51, 0x4b, 0x1c, 0x50, 0x39, 0x15, |
| + 0x91, 0x8f, 0x0c, 0x0c, 0x31, 0x09, 0x4a, 0x6e, |
| + 0x1f |
| +}; |
| +static const u8 enc_output030[] __initconst = { |
| + 0x02, 0xcc, 0x3a, 0xcb, 0x5e, 0xe1, 0xfc, 0xdd, |
| + 0x12, 0xa0, 0x3b, 0xb8, 0x57, 0x97, 0x64, 0x74, |
| + 0xd3, 0xd8, 0x3b, 0x74, 0x63, 0xa2, 0xc3, 0x80, |
| + 0x0f, 0xe9, 0x58, 0xc2, 0x8e, 0xaa, 0x29, 0x08, |
| + 0x13 |
| +}; |
| +static const u8 enc_assoc030[] __initconst = { }; |
| +static const u8 enc_nonce030[] __initconst = { |
| + 0x45, 0xaa, 0xa3, 0xe5, 0xd1, 0x6d, 0x2d, 0x42, |
| + 0xdc, 0x03, 0x44, 0x5d |
| +}; |
| +static const u8 enc_key030[] __initconst = { |
| + 0x3b, 0x24, 0x58, 0xd8, 0x17, 0x6e, 0x16, 0x21, |
| + 0xc0, 0xcc, 0x24, 0xc0, 0xc0, 0xe2, 0x4c, 0x1e, |
| + 0x80, 0xd7, 0x2f, 0x7e, 0xe9, 0x14, 0x9a, 0x4b, |
| + 0x16, 0x61, 0x76, 0x62, 0x96, 0x16, 0xd0, 0x11 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input031[] __initconst = { |
| + 0x63, 0x85, 0x8c, 0xa3, 0xe2, 0xce, 0x69, 0x88, |
| + 0x7b, 0x57, 0x8a, 0x3c, 0x16, 0x7b, 0x42, 0x1c, |
| + 0x9c |
| +}; |
| +static const u8 enc_output031[] __initconst = { |
| + 0x35, 0x76, 0x64, 0x88, 0xd2, 0xbc, 0x7c, 0x2b, |
| + 0x8d, 0x17, 0xcb, 0xbb, 0x9a, 0xbf, 0xad, 0x9e, |
| + 0x6d, 0x1f, 0x39, 0x1e, 0x65, 0x7b, 0x27, 0x38, |
| + 0xdd, 0xa0, 0x84, 0x48, 0xcb, 0xa2, 0x81, 0x1c, |
| + 0xeb |
| +}; |
| +static const u8 enc_assoc031[] __initconst = { |
| + 0x9a, 0xaf, 0x29, 0x9e, 0xee, 0xa7, 0x8f, 0x79 |
| +}; |
| +static const u8 enc_nonce031[] __initconst = { |
| + 0xf0, 0x38, 0x4f, 0xb8, 0x76, 0x12, 0x14, 0x10, |
| + 0x63, 0x3d, 0x99, 0x3d |
| +}; |
| +static const u8 enc_key031[] __initconst = { |
| + 0xf6, 0x0c, 0x6a, 0x1b, 0x62, 0x57, 0x25, 0xf7, |
| + 0x6c, 0x70, 0x37, 0xb4, 0x8f, 0xe3, 0x57, 0x7f, |
| + 0xa7, 0xf7, 0xb8, 0x7b, 0x1b, 0xd5, 0xa9, 0x82, |
| + 0x17, 0x6d, 0x18, 0x23, 0x06, 0xff, 0xb8, 0x70 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input032[] __initconst = { |
| + 0x10, 0xf1, 0xec, 0xf9, 0xc6, 0x05, 0x84, 0x66, |
| + 0x5d, 0x9a, 0xe5, 0xef, 0xe2, 0x79, 0xe7, 0xf7, |
| + 0x37, 0x7e, 0xea, 0x69, 0x16, 0xd2, 0xb1, 0x11 |
| +}; |
| +static const u8 enc_output032[] __initconst = { |
| + 0x42, 0xf2, 0x6c, 0x56, 0xcb, 0x4b, 0xe2, 0x1d, |
| + 0x9d, 0x8d, 0x0c, 0x80, 0xfc, 0x99, 0xdd, 0xe0, |
| + 0x0d, 0x75, 0xf3, 0x80, 0x74, 0xbf, 0xe7, 0x64, |
| + 0x54, 0xaa, 0x7e, 0x13, 0xd4, 0x8f, 0xff, 0x7d, |
| + 0x75, 0x57, 0x03, 0x94, 0x57, 0x04, 0x0a, 0x3a |
| +}; |
| +static const u8 enc_assoc032[] __initconst = { }; |
| +static const u8 enc_nonce032[] __initconst = { |
| + 0xe6, 0xb1, 0xad, 0xf2, 0xfd, 0x58, 0xa8, 0x76, |
| + 0x2c, 0x65, 0xf3, 0x1b |
| +}; |
| +static const u8 enc_key032[] __initconst = { |
| + 0x02, 0x12, 0xa8, 0xde, 0x50, 0x07, 0xed, 0x87, |
| + 0xb3, 0x3f, 0x1a, 0x70, 0x90, 0xb6, 0x11, 0x4f, |
| + 0x9e, 0x08, 0xce, 0xfd, 0x96, 0x07, 0xf2, 0xc2, |
| + 0x76, 0xbd, 0xcf, 0xdb, 0xc5, 0xce, 0x9c, 0xd7 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input033[] __initconst = { |
| + 0x92, 0x22, 0xf9, 0x01, 0x8e, 0x54, 0xfd, 0x6d, |
| + 0xe1, 0x20, 0x08, 0x06, 0xa9, 0xee, 0x8e, 0x4c, |
| + 0xc9, 0x04, 0xd2, 0x9f, 0x25, 0xcb, 0xa1, 0x93 |
| +}; |
| +static const u8 enc_output033[] __initconst = { |
| + 0x12, 0x30, 0x32, 0x43, 0x7b, 0x4b, 0xfd, 0x69, |
| + 0x20, 0xe8, 0xf7, 0xe7, 0xe0, 0x08, 0x7a, 0xe4, |
| + 0x88, 0x9e, 0xbe, 0x7a, 0x0a, 0xd0, 0xe9, 0x00, |
| + 0x3c, 0xf6, 0x8f, 0x17, 0x95, 0x50, 0xda, 0x63, |
| + 0xd3, 0xb9, 0x6c, 0x2d, 0x55, 0x41, 0x18, 0x65 |
| +}; |
| +static const u8 enc_assoc033[] __initconst = { |
| + 0x3e, 0x8b, 0xc5, 0xad, 0xe1, 0x82, 0xff, 0x08 |
| +}; |
| +static const u8 enc_nonce033[] __initconst = { |
| + 0x6b, 0x28, 0x2e, 0xbe, 0xcc, 0x54, 0x1b, 0xcd, |
| + 0x78, 0x34, 0xed, 0x55 |
| +}; |
| +static const u8 enc_key033[] __initconst = { |
| + 0xc5, 0xbc, 0x09, 0x56, 0x56, 0x46, 0xe7, 0xed, |
| + 0xda, 0x95, 0x4f, 0x1f, 0x73, 0x92, 0x23, 0xda, |
| + 0xda, 0x20, 0xb9, 0x5c, 0x44, 0xab, 0x03, 0x3d, |
| + 0x0f, 0xae, 0x4b, 0x02, 0x83, 0xd1, 0x8b, 0xe3 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input034[] __initconst = { |
| + 0xb0, 0x53, 0x99, 0x92, 0x86, 0xa2, 0x82, 0x4f, |
| + 0x42, 0xcc, 0x8c, 0x20, 0x3a, 0xb2, 0x4e, 0x2c, |
| + 0x97, 0xa6, 0x85, 0xad, 0xcc, 0x2a, 0xd3, 0x26, |
| + 0x62, 0x55, 0x8e, 0x55, 0xa5, 0xc7, 0x29 |
| +}; |
| +static const u8 enc_output034[] __initconst = { |
| + 0x45, 0xc7, 0xd6, 0xb5, 0x3a, 0xca, 0xd4, 0xab, |
| + 0xb6, 0x88, 0x76, 0xa6, 0xe9, 0x6a, 0x48, 0xfb, |
| + 0x59, 0x52, 0x4d, 0x2c, 0x92, 0xc9, 0xd8, 0xa1, |
| + 0x89, 0xc9, 0xfd, 0x2d, 0xb9, 0x17, 0x46, 0x56, |
| + 0x6d, 0x3c, 0xa1, 0x0e, 0x31, 0x1b, 0x69, 0x5f, |
| + 0x3e, 0xae, 0x15, 0x51, 0x65, 0x24, 0x93 |
| +}; |
| +static const u8 enc_assoc034[] __initconst = { }; |
| +static const u8 enc_nonce034[] __initconst = { |
| + 0x04, 0xa9, 0xbe, 0x03, 0x50, 0x8a, 0x5f, 0x31, |
| + 0x37, 0x1a, 0x6f, 0xd2 |
| +}; |
| +static const u8 enc_key034[] __initconst = { |
| + 0x2e, 0xb5, 0x1c, 0x46, 0x9a, 0xa8, 0xeb, 0x9e, |
| + 0x6c, 0x54, 0xa8, 0x34, 0x9b, 0xae, 0x50, 0xa2, |
| + 0x0f, 0x0e, 0x38, 0x27, 0x11, 0xbb, 0xa1, 0x15, |
| + 0x2c, 0x42, 0x4f, 0x03, 0xb6, 0x67, 0x1d, 0x71 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input035[] __initconst = { |
| + 0xf4, 0x52, 0x06, 0xab, 0xc2, 0x55, 0x52, 0xb2, |
| + 0xab, 0xc9, 0xab, 0x7f, 0xa2, 0x43, 0x03, 0x5f, |
| + 0xed, 0xaa, 0xdd, 0xc3, 0xb2, 0x29, 0x39, 0x56, |
| + 0xf1, 0xea, 0x6e, 0x71, 0x56, 0xe7, 0xeb |
| +}; |
| +static const u8 enc_output035[] __initconst = { |
| + 0x46, 0xa8, 0x0c, 0x41, 0x87, 0x02, 0x47, 0x20, |
| + 0x08, 0x46, 0x27, 0x58, 0x00, 0x80, 0xdd, 0xe5, |
| + 0xa3, 0xf4, 0xa1, 0x10, 0x93, 0xa7, 0x07, 0x6e, |
| + 0xd6, 0xf3, 0xd3, 0x26, 0xbc, 0x7b, 0x70, 0x53, |
| + 0x4d, 0x4a, 0xa2, 0x83, 0x5a, 0x52, 0xe7, 0x2d, |
| + 0x14, 0xdf, 0x0e, 0x4f, 0x47, 0xf2, 0x5f |
| +}; |
| +static const u8 enc_assoc035[] __initconst = { |
| + 0x37, 0x46, 0x18, 0xa0, 0x6e, 0xa9, 0x8a, 0x48 |
| +}; |
| +static const u8 enc_nonce035[] __initconst = { |
| + 0x47, 0x0a, 0x33, 0x9e, 0xcb, 0x32, 0x19, 0xb8, |
| + 0xb8, 0x1a, 0x1f, 0x8b |
| +}; |
| +static const u8 enc_key035[] __initconst = { |
| + 0x7f, 0x5b, 0x74, 0xc0, 0x7e, 0xd1, 0xb4, 0x0f, |
| + 0xd1, 0x43, 0x58, 0xfe, 0x2f, 0xf2, 0xa7, 0x40, |
| + 0xc1, 0x16, 0xc7, 0x70, 0x65, 0x10, 0xe6, 0xa4, |
| + 0x37, 0xf1, 0x9e, 0xa4, 0x99, 0x11, 0xce, 0xc4 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input036[] __initconst = { |
| + 0xb9, 0xc5, 0x54, 0xcb, 0xc3, 0x6a, 0xc1, 0x8a, |
| + 0xe8, 0x97, 0xdf, 0x7b, 0xee, 0xca, 0xc1, 0xdb, |
| + 0xeb, 0x4e, 0xaf, 0xa1, 0x56, 0xbb, 0x60, 0xce, |
| + 0x2e, 0x5d, 0x48, 0xf0, 0x57, 0x15, 0xe6, 0x78 |
| +}; |
| +static const u8 enc_output036[] __initconst = { |
| + 0xea, 0x29, 0xaf, 0xa4, 0x9d, 0x36, 0xe8, 0x76, |
| + 0x0f, 0x5f, 0xe1, 0x97, 0x23, 0xb9, 0x81, 0x1e, |
| + 0xd5, 0xd5, 0x19, 0x93, 0x4a, 0x44, 0x0f, 0x50, |
| + 0x81, 0xac, 0x43, 0x0b, 0x95, 0x3b, 0x0e, 0x21, |
| + 0x22, 0x25, 0x41, 0xaf, 0x46, 0xb8, 0x65, 0x33, |
| + 0xc6, 0xb6, 0x8d, 0x2f, 0xf1, 0x08, 0xa7, 0xea |
| +}; |
| +static const u8 enc_assoc036[] __initconst = { }; |
| +static const u8 enc_nonce036[] __initconst = { |
| + 0x72, 0xcf, 0xd9, 0x0e, 0xf3, 0x02, 0x6c, 0xa2, |
| + 0x2b, 0x7e, 0x6e, 0x6a |
| +}; |
| +static const u8 enc_key036[] __initconst = { |
| + 0xe1, 0x73, 0x1d, 0x58, 0x54, 0xe1, 0xb7, 0x0c, |
| + 0xb3, 0xff, 0xe8, 0xb7, 0x86, 0xa2, 0xb3, 0xeb, |
| + 0xf0, 0x99, 0x43, 0x70, 0x95, 0x47, 0x57, 0xb9, |
| + 0xdc, 0x8c, 0x7b, 0xc5, 0x35, 0x46, 0x34, 0xa3 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input037[] __initconst = { |
| + 0x6b, 0x26, 0x04, 0x99, 0x6c, 0xd3, 0x0c, 0x14, |
| + 0xa1, 0x3a, 0x52, 0x57, 0xed, 0x6c, 0xff, 0xd3, |
| + 0xbc, 0x5e, 0x29, 0xd6, 0xb9, 0x7e, 0xb1, 0x79, |
| + 0x9e, 0xb3, 0x35, 0xe2, 0x81, 0xea, 0x45, 0x1e |
| +}; |
| +static const u8 enc_output037[] __initconst = { |
| + 0x6d, 0xad, 0x63, 0x78, 0x97, 0x54, 0x4d, 0x8b, |
| + 0xf6, 0xbe, 0x95, 0x07, 0xed, 0x4d, 0x1b, 0xb2, |
| + 0xe9, 0x54, 0xbc, 0x42, 0x7e, 0x5d, 0xe7, 0x29, |
| + 0xda, 0xf5, 0x07, 0x62, 0x84, 0x6f, 0xf2, 0xf4, |
| + 0x7b, 0x99, 0x7d, 0x93, 0xc9, 0x82, 0x18, 0x9d, |
| + 0x70, 0x95, 0xdc, 0x79, 0x4c, 0x74, 0x62, 0x32 |
| +}; |
| +static const u8 enc_assoc037[] __initconst = { |
| + 0x23, 0x33, 0xe5, 0xce, 0x0f, 0x93, 0xb0, 0x59 |
| +}; |
| +static const u8 enc_nonce037[] __initconst = { |
| + 0x26, 0x28, 0x80, 0xd4, 0x75, 0xf3, 0xda, 0xc5, |
| + 0x34, 0x0d, 0xd1, 0xb8 |
| +}; |
| +static const u8 enc_key037[] __initconst = { |
| + 0x27, 0xd8, 0x60, 0x63, 0x1b, 0x04, 0x85, 0xa4, |
| + 0x10, 0x70, 0x2f, 0xea, 0x61, 0xbc, 0x87, 0x3f, |
| + 0x34, 0x42, 0x26, 0x0c, 0xad, 0xed, 0x4a, 0xbd, |
| + 0xe2, 0x5b, 0x78, 0x6a, 0x2d, 0x97, 0xf1, 0x45 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input038[] __initconst = { |
| + 0x97, 0x3d, 0x0c, 0x75, 0x38, 0x26, 0xba, 0xe4, |
| + 0x66, 0xcf, 0x9a, 0xbb, 0x34, 0x93, 0x15, 0x2e, |
| + 0x9d, 0xe7, 0x81, 0x9e, 0x2b, 0xd0, 0xc7, 0x11, |
| + 0x71, 0x34, 0x6b, 0x4d, 0x2c, 0xeb, 0xf8, 0x04, |
| + 0x1a, 0xa3, 0xce, 0xdc, 0x0d, 0xfd, 0x7b, 0x46, |
| + 0x7e, 0x26, 0x22, 0x8b, 0xc8, 0x6c, 0x9a |
| +}; |
| +static const u8 enc_output038[] __initconst = { |
| + 0xfb, 0xa7, 0x8a, 0xe4, 0xf9, 0xd8, 0x08, 0xa6, |
| + 0x2e, 0x3d, 0xa4, 0x0b, 0xe2, 0xcb, 0x77, 0x00, |
| + 0xc3, 0x61, 0x3d, 0x9e, 0xb2, 0xc5, 0x29, 0xc6, |
| + 0x52, 0xe7, 0x6a, 0x43, 0x2c, 0x65, 0x8d, 0x27, |
| + 0x09, 0x5f, 0x0e, 0xb8, 0xf9, 0x40, 0xc3, 0x24, |
| + 0x98, 0x1e, 0xa9, 0x35, 0xe5, 0x07, 0xf9, 0x8f, |
| + 0x04, 0x69, 0x56, 0xdb, 0x3a, 0x51, 0x29, 0x08, |
| + 0xbd, 0x7a, 0xfc, 0x8f, 0x2a, 0xb0, 0xa9 |
| +}; |
| +static const u8 enc_assoc038[] __initconst = { }; |
| +static const u8 enc_nonce038[] __initconst = { |
| + 0xe7, 0x4a, 0x51, 0x5e, 0x7e, 0x21, 0x02, 0xb9, |
| + 0x0b, 0xef, 0x55, 0xd2 |
| +}; |
| +static const u8 enc_key038[] __initconst = { |
| + 0xcf, 0x0d, 0x40, 0xa4, 0x64, 0x4e, 0x5f, 0x51, |
| + 0x81, 0x51, 0x65, 0xd5, 0x30, 0x1b, 0x22, 0x63, |
| + 0x1f, 0x45, 0x44, 0xc4, 0x9a, 0x18, 0x78, 0xe3, |
| + 0xa0, 0xa5, 0xe8, 0xe1, 0xaa, 0xe0, 0xf2, 0x64 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input039[] __initconst = { |
| + 0xa9, 0x89, 0x95, 0x50, 0x4d, 0xf1, 0x6f, 0x74, |
| + 0x8b, 0xfb, 0x77, 0x85, 0xff, 0x91, 0xee, 0xb3, |
| + 0xb6, 0x60, 0xea, 0x9e, 0xd3, 0x45, 0x0c, 0x3d, |
| + 0x5e, 0x7b, 0x0e, 0x79, 0xef, 0x65, 0x36, 0x59, |
| + 0xa9, 0x97, 0x8d, 0x75, 0x54, 0x2e, 0xf9, 0x1c, |
| + 0x45, 0x67, 0x62, 0x21, 0x56, 0x40, 0xb9 |
| +}; |
| +static const u8 enc_output039[] __initconst = { |
| + 0xa1, 0xff, 0xed, 0x80, 0x76, 0x18, 0x29, 0xec, |
| + 0xce, 0x24, 0x2e, 0x0e, 0x88, 0xb1, 0x38, 0x04, |
| + 0x90, 0x16, 0xbc, 0xa0, 0x18, 0xda, 0x2b, 0x6e, |
| + 0x19, 0x98, 0x6b, 0x3e, 0x31, 0x8c, 0xae, 0x8d, |
| + 0x80, 0x61, 0x98, 0xfb, 0x4c, 0x52, 0x7c, 0xc3, |
| + 0x93, 0x50, 0xeb, 0xdd, 0xea, 0xc5, 0x73, 0xc4, |
| + 0xcb, 0xf0, 0xbe, 0xfd, 0xa0, 0xb7, 0x02, 0x42, |
| + 0xc6, 0x40, 0xd7, 0xcd, 0x02, 0xd7, 0xa3 |
| +}; |
| +static const u8 enc_assoc039[] __initconst = { |
| + 0xb3, 0xe4, 0x06, 0x46, 0x83, 0xb0, 0x2d, 0x84 |
| +}; |
| +static const u8 enc_nonce039[] __initconst = { |
| + 0xd4, 0xd8, 0x07, 0x34, 0x16, 0x83, 0x82, 0x5b, |
| + 0x31, 0xcd, 0x4d, 0x95 |
| +}; |
| +static const u8 enc_key039[] __initconst = { |
| + 0x6c, 0xbf, 0xd7, 0x1c, 0x64, 0x5d, 0x18, 0x4c, |
| + 0xf5, 0xd2, 0x3c, 0x40, 0x2b, 0xdb, 0x0d, 0x25, |
| + 0xec, 0x54, 0x89, 0x8c, 0x8a, 0x02, 0x73, 0xd4, |
| + 0x2e, 0xb5, 0xbe, 0x10, 0x9f, 0xdc, 0xb2, 0xac |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input040[] __initconst = { |
| + 0xd0, 0x96, 0x80, 0x31, 0x81, 0xbe, 0xef, 0x9e, |
| + 0x00, 0x8f, 0xf8, 0x5d, 0x5d, 0xdc, 0x38, 0xdd, |
| + 0xac, 0xf0, 0xf0, 0x9e, 0xe5, 0xf7, 0xe0, 0x7f, |
| + 0x1e, 0x40, 0x79, 0xcb, 0x64, 0xd0, 0xdc, 0x8f, |
| + 0x5e, 0x67, 0x11, 0xcd, 0x49, 0x21, 0xa7, 0x88, |
| + 0x7d, 0xe7, 0x6e, 0x26, 0x78, 0xfd, 0xc6, 0x76, |
| + 0x18, 0xf1, 0x18, 0x55, 0x86, 0xbf, 0xea, 0x9d, |
| + 0x4c, 0x68, 0x5d, 0x50, 0xe4, 0xbb, 0x9a, 0x82 |
| +}; |
| +static const u8 enc_output040[] __initconst = { |
| + 0x9a, 0x4e, 0xf2, 0x2b, 0x18, 0x16, 0x77, 0xb5, |
| + 0x75, 0x5c, 0x08, 0xf7, 0x47, 0xc0, 0xf8, 0xd8, |
| + 0xe8, 0xd4, 0xc1, 0x8a, 0x9c, 0xc2, 0x40, 0x5c, |
| + 0x12, 0xbb, 0x51, 0xbb, 0x18, 0x72, 0xc8, 0xe8, |
| + 0xb8, 0x77, 0x67, 0x8b, 0xec, 0x44, 0x2c, 0xfc, |
| + 0xbb, 0x0f, 0xf4, 0x64, 0xa6, 0x4b, 0x74, 0x33, |
| + 0x2c, 0xf0, 0x72, 0x89, 0x8c, 0x7e, 0x0e, 0xdd, |
| + 0xf6, 0x23, 0x2e, 0xa6, 0xe2, 0x7e, 0xfe, 0x50, |
| + 0x9f, 0xf3, 0x42, 0x7a, 0x0f, 0x32, 0xfa, 0x56, |
| + 0x6d, 0x9c, 0xa0, 0xa7, 0x8a, 0xef, 0xc0, 0x13 |
| +}; |
| +static const u8 enc_assoc040[] __initconst = { }; |
| +static const u8 enc_nonce040[] __initconst = { |
| + 0xd6, 0x10, 0x40, 0xa3, 0x13, 0xed, 0x49, 0x28, |
| + 0x23, 0xcc, 0x06, 0x5b |
| +}; |
| +static const u8 enc_key040[] __initconst = { |
| + 0x5b, 0x1d, 0x10, 0x35, 0xc0, 0xb1, 0x7e, 0xe0, |
| + 0xb0, 0x44, 0x47, 0x67, 0xf8, 0x0a, 0x25, 0xb8, |
| + 0xc1, 0xb7, 0x41, 0xf4, 0xb5, 0x0a, 0x4d, 0x30, |
| + 0x52, 0x22, 0x6b, 0xaa, 0x1c, 0x6f, 0xb7, 0x01 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input041[] __initconst = { |
| + 0x94, 0xee, 0x16, 0x6d, 0x6d, 0x6e, 0xcf, 0x88, |
| + 0x32, 0x43, 0x71, 0x36, 0xb4, 0xae, 0x80, 0x5d, |
| + 0x42, 0x88, 0x64, 0x35, 0x95, 0x86, 0xd9, 0x19, |
| + 0x3a, 0x25, 0x01, 0x62, 0x93, 0xed, 0xba, 0x44, |
| + 0x3c, 0x58, 0xe0, 0x7e, 0x7b, 0x71, 0x95, 0xec, |
| + 0x5b, 0xd8, 0x45, 0x82, 0xa9, 0xd5, 0x6c, 0x8d, |
| + 0x4a, 0x10, 0x8c, 0x7d, 0x7c, 0xe3, 0x4e, 0x6c, |
| + 0x6f, 0x8e, 0xa1, 0xbe, 0xc0, 0x56, 0x73, 0x17 |
| +}; |
| +static const u8 enc_output041[] __initconst = { |
| + 0x5f, 0xbb, 0xde, 0xcc, 0x34, 0xbe, 0x20, 0x16, |
| + 0x14, 0xf6, 0x36, 0x03, 0x1e, 0xeb, 0x42, 0xf1, |
| + 0xca, 0xce, 0x3c, 0x79, 0xa1, 0x2c, 0xff, 0xd8, |
| + 0x71, 0xee, 0x8e, 0x73, 0x82, 0x0c, 0x82, 0x97, |
| + 0x49, 0xf1, 0xab, 0xb4, 0x29, 0x43, 0x67, 0x84, |
| + 0x9f, 0xb6, 0xc2, 0xaa, 0x56, 0xbd, 0xa8, 0xa3, |
| + 0x07, 0x8f, 0x72, 0x3d, 0x7c, 0x1c, 0x85, 0x20, |
| + 0x24, 0xb0, 0x17, 0xb5, 0x89, 0x73, 0xfb, 0x1e, |
| + 0x09, 0x26, 0x3d, 0xa7, 0xb4, 0xcb, 0x92, 0x14, |
| + 0x52, 0xf9, 0x7d, 0xca, 0x40, 0xf5, 0x80, 0xec |
| +}; |
| +static const u8 enc_assoc041[] __initconst = { |
| + 0x71, 0x93, 0xf6, 0x23, 0x66, 0x33, 0x21, 0xa2 |
| +}; |
| +static const u8 enc_nonce041[] __initconst = { |
| + 0xd3, 0x1c, 0x21, 0xab, 0xa1, 0x75, 0xb7, 0x0d, |
| + 0xe4, 0xeb, 0xb1, 0x9c |
| +}; |
| +static const u8 enc_key041[] __initconst = { |
| + 0x97, 0xd6, 0x35, 0xc4, 0xf4, 0x75, 0x74, 0xd9, |
| + 0x99, 0x8a, 0x90, 0x87, 0x5d, 0xa1, 0xd3, 0xa2, |
| + 0x84, 0xb7, 0x55, 0xb2, 0xd3, 0x92, 0x97, 0xa5, |
| + 0x72, 0x52, 0x35, 0x19, 0x0e, 0x10, 0xa9, 0x7e |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input042[] __initconst = { |
| + 0xb4, 0x29, 0xeb, 0x80, 0xfb, 0x8f, 0xe8, 0xba, |
| + 0xed, 0xa0, 0xc8, 0x5b, 0x9c, 0x33, 0x34, 0x58, |
| + 0xe7, 0xc2, 0x99, 0x2e, 0x55, 0x84, 0x75, 0x06, |
| + 0x9d, 0x12, 0xd4, 0x5c, 0x22, 0x21, 0x75, 0x64, |
| + 0x12, 0x15, 0x88, 0x03, 0x22, 0x97, 0xef, 0xf5, |
| + 0x67, 0x83, 0x74, 0x2a, 0x5f, 0xc2, 0x2d, 0x74, |
| + 0x10, 0xff, 0xb2, 0x9d, 0x66, 0x09, 0x86, 0x61, |
| + 0xd7, 0x6f, 0x12, 0x6c, 0x3c, 0x27, 0x68, 0x9e, |
| + 0x43, 0xb3, 0x72, 0x67, 0xca, 0xc5, 0xa3, 0xa6, |
| + 0xd3, 0xab, 0x49, 0xe3, 0x91, 0xda, 0x29, 0xcd, |
| + 0x30, 0x54, 0xa5, 0x69, 0x2e, 0x28, 0x07, 0xe4, |
| + 0xc3, 0xea, 0x46, 0xc8, 0x76, 0x1d, 0x50, 0xf5, |
| + 0x92 |
| +}; |
| +static const u8 enc_output042[] __initconst = { |
| + 0xd0, 0x10, 0x2f, 0x6c, 0x25, 0x8b, 0xf4, 0x97, |
| + 0x42, 0xce, 0xc3, 0x4c, 0xf2, 0xd0, 0xfe, 0xdf, |
| + 0x23, 0xd1, 0x05, 0xfb, 0x4c, 0x84, 0xcf, 0x98, |
| + 0x51, 0x5e, 0x1b, 0xc9, 0xa6, 0x4f, 0x8a, 0xd5, |
| + 0xbe, 0x8f, 0x07, 0x21, 0xbd, 0xe5, 0x06, 0x45, |
| + 0xd0, 0x00, 0x83, 0xc3, 0xa2, 0x63, 0xa3, 0x10, |
| + 0x53, 0xb7, 0x60, 0x24, 0x5f, 0x52, 0xae, 0x28, |
| + 0x66, 0xa5, 0xec, 0x83, 0xb1, 0x9f, 0x61, 0xbe, |
| + 0x1d, 0x30, 0xd5, 0xc5, 0xd9, 0xfe, 0xcc, 0x4c, |
| + 0xbb, 0xe0, 0x8f, 0xd3, 0x85, 0x81, 0x3a, 0x2a, |
| + 0xa3, 0x9a, 0x00, 0xff, 0x9c, 0x10, 0xf7, 0xf2, |
| + 0x37, 0x02, 0xad, 0xd1, 0xe4, 0xb2, 0xff, 0xa3, |
| + 0x1c, 0x41, 0x86, 0x5f, 0xc7, 0x1d, 0xe1, 0x2b, |
| + 0x19, 0x61, 0x21, 0x27, 0xce, 0x49, 0x99, 0x3b, |
| + 0xb0 |
| +}; |
| +static const u8 enc_assoc042[] __initconst = { }; |
| +static const u8 enc_nonce042[] __initconst = { |
| + 0x17, 0xc8, 0x6a, 0x8a, 0xbb, 0xb7, 0xe0, 0x03, |
| + 0xac, 0xde, 0x27, 0x99 |
| +}; |
| +static const u8 enc_key042[] __initconst = { |
| + 0xfe, 0x6e, 0x55, 0xbd, 0xae, 0xd1, 0xf7, 0x28, |
| + 0x4c, 0xa5, 0xfc, 0x0f, 0x8c, 0x5f, 0x2b, 0x8d, |
| + 0xf5, 0x6d, 0xc0, 0xf4, 0x9e, 0x8c, 0xa6, 0x6a, |
| + 0x41, 0x99, 0x5e, 0x78, 0x33, 0x51, 0xf9, 0x01 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input043[] __initconst = { |
| + 0xce, 0xb5, 0x34, 0xce, 0x50, 0xdc, 0x23, 0xff, |
| + 0x63, 0x8a, 0xce, 0x3e, 0xf6, 0x3a, 0xb2, 0xcc, |
| + 0x29, 0x73, 0xee, 0xad, 0xa8, 0x07, 0x85, 0xfc, |
| + 0x16, 0x5d, 0x06, 0xc2, 0xf5, 0x10, 0x0f, 0xf5, |
| + 0xe8, 0xab, 0x28, 0x82, 0xc4, 0x75, 0xaf, 0xcd, |
| + 0x05, 0xcc, 0xd4, 0x9f, 0x2e, 0x7d, 0x8f, 0x55, |
| + 0xef, 0x3a, 0x72, 0xe3, 0xdc, 0x51, 0xd6, 0x85, |
| + 0x2b, 0x8e, 0x6b, 0x9e, 0x7a, 0xec, 0xe5, 0x7b, |
| + 0xe6, 0x55, 0x6b, 0x0b, 0x6d, 0x94, 0x13, 0xe3, |
| + 0x3f, 0xc5, 0xfc, 0x24, 0xa9, 0xa2, 0x05, 0xad, |
| + 0x59, 0x57, 0x4b, 0xb3, 0x9d, 0x94, 0x4a, 0x92, |
| + 0xdc, 0x47, 0x97, 0x0d, 0x84, 0xa6, 0xad, 0x31, |
| + 0x76 |
| +}; |
| +static const u8 enc_output043[] __initconst = { |
| + 0x75, 0x45, 0x39, 0x1b, 0x51, 0xde, 0x01, 0xd5, |
| + 0xc5, 0x3d, 0xfa, 0xca, 0x77, 0x79, 0x09, 0x06, |
| + 0x3e, 0x58, 0xed, 0xee, 0x4b, 0xb1, 0x22, 0x7e, |
| + 0x71, 0x10, 0xac, 0x4d, 0x26, 0x20, 0xc2, 0xae, |
| + 0xc2, 0xf8, 0x48, 0xf5, 0x6d, 0xee, 0xb0, 0x37, |
| + 0xa8, 0xdc, 0xed, 0x75, 0xaf, 0xa8, 0xa6, 0xc8, |
| + 0x90, 0xe2, 0xde, 0xe4, 0x2f, 0x95, 0x0b, 0xb3, |
| + 0x3d, 0x9e, 0x24, 0x24, 0xd0, 0x8a, 0x50, 0x5d, |
| + 0x89, 0x95, 0x63, 0x97, 0x3e, 0xd3, 0x88, 0x70, |
| + 0xf3, 0xde, 0x6e, 0xe2, 0xad, 0xc7, 0xfe, 0x07, |
| + 0x2c, 0x36, 0x6c, 0x14, 0xe2, 0xcf, 0x7c, 0xa6, |
| + 0x2f, 0xb3, 0xd3, 0x6b, 0xee, 0x11, 0x68, 0x54, |
| + 0x61, 0xb7, 0x0d, 0x44, 0xef, 0x8c, 0x66, 0xc5, |
| + 0xc7, 0xbb, 0xf1, 0x0d, 0xca, 0xdd, 0x7f, 0xac, |
| + 0xf6 |
| +}; |
| +static const u8 enc_assoc043[] __initconst = { |
| + 0xa1, 0x1c, 0x40, 0xb6, 0x03, 0x76, 0x73, 0x30 |
| +}; |
| +static const u8 enc_nonce043[] __initconst = { |
| + 0x46, 0x36, 0x2f, 0x45, 0xd6, 0x37, 0x9e, 0x63, |
| + 0xe5, 0x22, 0x94, 0x60 |
| +}; |
| +static const u8 enc_key043[] __initconst = { |
| + 0xaa, 0xbc, 0x06, 0x34, 0x74, 0xe6, 0x5c, 0x4c, |
| + 0x3e, 0x9b, 0xdc, 0x48, 0x0d, 0xea, 0x97, 0xb4, |
| + 0x51, 0x10, 0xc8, 0x61, 0x88, 0x46, 0xff, 0x6b, |
| + 0x15, 0xbd, 0xd2, 0xa4, 0xa5, 0x68, 0x2c, 0x4e |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input044[] __initconst = { |
| + 0xe5, 0xcc, 0xaa, 0x44, 0x1b, 0xc8, 0x14, 0x68, |
| + 0x8f, 0x8f, 0x6e, 0x8f, 0x28, 0xb5, 0x00, 0xb2 |
| +}; |
| +static const u8 enc_output044[] __initconst = { |
| + 0x7e, 0x72, 0xf5, 0xa1, 0x85, 0xaf, 0x16, 0xa6, |
| + 0x11, 0x92, 0x1b, 0x43, 0x8f, 0x74, 0x9f, 0x0b, |
| + 0x12, 0x42, 0xc6, 0x70, 0x73, 0x23, 0x34, 0x02, |
| + 0x9a, 0xdf, 0xe1, 0xc5, 0x00, 0x16, 0x51, 0xe4 |
| +}; |
| +static const u8 enc_assoc044[] __initconst = { |
| + 0x02 |
| +}; |
| +static const u8 enc_nonce044[] __initconst = { |
| + 0x87, 0x34, 0x5f, 0x10, 0x55, 0xfd, 0x9e, 0x21, |
| + 0x02, 0xd5, 0x06, 0x56 |
| +}; |
| +static const u8 enc_key044[] __initconst = { |
| + 0x7d, 0x00, 0xb4, 0x80, 0x95, 0xad, 0xfa, 0x32, |
| + 0x72, 0x05, 0x06, 0x07, 0xb2, 0x64, 0x18, 0x50, |
| + 0x02, 0xba, 0x99, 0x95, 0x7c, 0x49, 0x8b, 0xe0, |
| + 0x22, 0x77, 0x0f, 0x2c, 0xe2, 0xf3, 0x14, 0x3c |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input045[] __initconst = { |
| + 0x02, 0xcd, 0xe1, 0x68, 0xfb, 0xa3, 0xf5, 0x44, |
| + 0xbb, 0xd0, 0x33, 0x2f, 0x7a, 0xde, 0xad, 0xa8 |
| +}; |
| +static const u8 enc_output045[] __initconst = { |
| + 0x85, 0xf2, 0x9a, 0x71, 0x95, 0x57, 0xcd, 0xd1, |
| + 0x4d, 0x1f, 0x8f, 0xff, 0xab, 0x6d, 0x9e, 0x60, |
| + 0x73, 0x2c, 0xa3, 0x2b, 0xec, 0xd5, 0x15, 0xa1, |
| + 0xed, 0x35, 0x3f, 0x54, 0x2e, 0x99, 0x98, 0x58 |
| +}; |
| +static const u8 enc_assoc045[] __initconst = { |
| + 0xb6, 0x48 |
| +}; |
| +static const u8 enc_nonce045[] __initconst = { |
| + 0x87, 0xa3, 0x16, 0x3e, 0xc0, 0x59, 0x8a, 0xd9, |
| + 0x5b, 0x3a, 0xa7, 0x13 |
| +}; |
| +static const u8 enc_key045[] __initconst = { |
| + 0x64, 0x32, 0x71, 0x7f, 0x1d, 0xb8, 0x5e, 0x41, |
| + 0xac, 0x78, 0x36, 0xbc, 0xe2, 0x51, 0x85, 0xa0, |
| + 0x80, 0xd5, 0x76, 0x2b, 0x9e, 0x2b, 0x18, 0x44, |
| + 0x4b, 0x6e, 0xc7, 0x2c, 0x3b, 0xd8, 0xe4, 0xdc |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input046[] __initconst = { |
| + 0x16, 0xdd, 0xd2, 0x3f, 0xf5, 0x3f, 0x3d, 0x23, |
| + 0xc0, 0x63, 0x34, 0x48, 0x70, 0x40, 0xeb, 0x47 |
| +}; |
| +static const u8 enc_output046[] __initconst = { |
| + 0xc1, 0xb2, 0x95, 0x93, 0x6d, 0x56, 0xfa, 0xda, |
| + 0xc0, 0x3e, 0x5f, 0x74, 0x2b, 0xff, 0x73, 0xa1, |
| + 0x39, 0xc4, 0x57, 0xdb, 0xab, 0x66, 0x38, 0x2b, |
| + 0xab, 0xb3, 0xb5, 0x58, 0x00, 0xcd, 0xa5, 0xb8 |
| +}; |
| +static const u8 enc_assoc046[] __initconst = { |
| + 0xbd, 0x4c, 0xd0, 0x2f, 0xc7, 0x50, 0x2b, 0xbd, |
| + 0xbd, 0xf6, 0xc9, 0xa3, 0xcb, 0xe8, 0xf0 |
| +}; |
| +static const u8 enc_nonce046[] __initconst = { |
| + 0x6f, 0x57, 0x3a, 0xa8, 0x6b, 0xaa, 0x49, 0x2b, |
| + 0xa4, 0x65, 0x96, 0xdf |
| +}; |
| +static const u8 enc_key046[] __initconst = { |
| + 0x8e, 0x34, 0xcf, 0x73, 0xd2, 0x45, 0xa1, 0x08, |
| + 0x2a, 0x92, 0x0b, 0x86, 0x36, 0x4e, 0xb8, 0x96, |
| + 0xc4, 0x94, 0x64, 0x67, 0xbc, 0xb3, 0xd5, 0x89, |
| + 0x29, 0xfc, 0xb3, 0x66, 0x90, 0xe6, 0x39, 0x4f |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input047[] __initconst = { |
| + 0x62, 0x3b, 0x78, 0x50, 0xc3, 0x21, 0xe2, 0xcf, |
| + 0x0c, 0x6f, 0xbc, 0xc8, 0xdf, 0xd1, 0xaf, 0xf2 |
| +}; |
| +static const u8 enc_output047[] __initconst = { |
| + 0xc8, 0x4c, 0x9b, 0xb7, 0xc6, 0x1c, 0x1b, 0xcb, |
| + 0x17, 0x77, 0x2a, 0x1c, 0x50, 0x0c, 0x50, 0x95, |
| + 0xdb, 0xad, 0xf7, 0xa5, 0x13, 0x8c, 0xa0, 0x34, |
| + 0x59, 0xa2, 0xcd, 0x65, 0x83, 0x1e, 0x09, 0x2f |
| +}; |
| +static const u8 enc_assoc047[] __initconst = { |
| + 0x89, 0xcc, 0xe9, 0xfb, 0x47, 0x44, 0x1d, 0x07, |
| + 0xe0, 0x24, 0x5a, 0x66, 0xfe, 0x8b, 0x77, 0x8b |
| +}; |
| +static const u8 enc_nonce047[] __initconst = { |
| + 0x1a, 0x65, 0x18, 0xf0, 0x2e, 0xde, 0x1d, 0xa6, |
| + 0x80, 0x92, 0x66, 0xd9 |
| +}; |
| +static const u8 enc_key047[] __initconst = { |
| + 0xcb, 0x55, 0x75, 0xf5, 0xc7, 0xc4, 0x5c, 0x91, |
| + 0xcf, 0x32, 0x0b, 0x13, 0x9f, 0xb5, 0x94, 0x23, |
| + 0x75, 0x60, 0xd0, 0xa3, 0xe6, 0xf8, 0x65, 0xa6, |
| + 0x7d, 0x4f, 0x63, 0x3f, 0x2c, 0x08, 0xf0, 0x16 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input048[] __initconst = { |
| + 0x87, 0xb3, 0xa4, 0xd7, 0xb2, 0x6d, 0x8d, 0x32, |
| + 0x03, 0xa0, 0xde, 0x1d, 0x64, 0xef, 0x82, 0xe3 |
| +}; |
| +static const u8 enc_output048[] __initconst = { |
| + 0x94, 0xbc, 0x80, 0x62, 0x1e, 0xd1, 0xe7, 0x1b, |
| + 0x1f, 0xd2, 0xb5, 0xc3, 0xa1, 0x5e, 0x35, 0x68, |
| + 0x33, 0x35, 0x11, 0x86, 0x17, 0x96, 0x97, 0x84, |
| + 0x01, 0x59, 0x8b, 0x96, 0x37, 0x22, 0xf5, 0xb3 |
| +}; |
| +static const u8 enc_assoc048[] __initconst = { |
| + 0xd1, 0x9f, 0x2d, 0x98, 0x90, 0x95, 0xf7, 0xab, |
| + 0x03, 0xa5, 0xfd, 0xe8, 0x44, 0x16, 0xe0, 0x0c, |
| + 0x0e |
| +}; |
| +static const u8 enc_nonce048[] __initconst = { |
| + 0x56, 0x4d, 0xee, 0x49, 0xab, 0x00, 0xd2, 0x40, |
| + 0xfc, 0x10, 0x68, 0xc3 |
| +}; |
| +static const u8 enc_key048[] __initconst = { |
| + 0xa5, 0x56, 0x9e, 0x72, 0x9a, 0x69, 0xb2, 0x4b, |
| + 0xa6, 0xe0, 0xff, 0x15, 0xc4, 0x62, 0x78, 0x97, |
| + 0x43, 0x68, 0x24, 0xc9, 0x41, 0xe9, 0xd0, 0x0b, |
| + 0x2e, 0x93, 0xfd, 0xdc, 0x4b, 0xa7, 0x76, 0x57 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input049[] __initconst = { |
| + 0xe6, 0x01, 0xb3, 0x85, 0x57, 0x79, 0x7d, 0xa2, |
| + 0xf8, 0xa4, 0x10, 0x6a, 0x08, 0x9d, 0x1d, 0xa6 |
| +}; |
| +static const u8 enc_output049[] __initconst = { |
| + 0x29, 0x9b, 0x5d, 0x3f, 0x3d, 0x03, 0xc0, 0x87, |
| + 0x20, 0x9a, 0x16, 0xe2, 0x85, 0x14, 0x31, 0x11, |
| + 0x4b, 0x45, 0x4e, 0xd1, 0x98, 0xde, 0x11, 0x7e, |
| + 0x83, 0xec, 0x49, 0xfa, 0x8d, 0x85, 0x08, 0xd6 |
| +}; |
| +static const u8 enc_assoc049[] __initconst = { |
| + 0x5e, 0x64, 0x70, 0xfa, 0xcd, 0x99, 0xc1, 0xd8, |
| + 0x1e, 0x37, 0xcd, 0x44, 0x01, 0x5f, 0xe1, 0x94, |
| + 0x80, 0xa2, 0xa4, 0xd3, 0x35, 0x2a, 0x4f, 0xf5, |
| + 0x60, 0xc0, 0x64, 0x0f, 0xdb, 0xda |
| +}; |
| +static const u8 enc_nonce049[] __initconst = { |
| + 0xdf, 0x87, 0x13, 0xe8, 0x7e, 0xc3, 0xdb, 0xcf, |
| + 0xad, 0x14, 0xd5, 0x3e |
| +}; |
| +static const u8 enc_key049[] __initconst = { |
| + 0x56, 0x20, 0x74, 0x65, 0xb4, 0xe4, 0x8e, 0x6d, |
| + 0x04, 0x63, 0x0f, 0x4a, 0x42, 0xf3, 0x5c, 0xfc, |
| + 0x16, 0x3a, 0xb2, 0x89, 0xc2, 0x2a, 0x2b, 0x47, |
| + 0x84, 0xf6, 0xf9, 0x29, 0x03, 0x30, 0xbe, 0xe0 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input050[] __initconst = { |
| + 0xdc, 0x9e, 0x9e, 0xaf, 0x11, 0xe3, 0x14, 0x18, |
| + 0x2d, 0xf6, 0xa4, 0xeb, 0xa1, 0x7a, 0xec, 0x9c |
| +}; |
| +static const u8 enc_output050[] __initconst = { |
| + 0x60, 0x5b, 0xbf, 0x90, 0xae, 0xb9, 0x74, 0xf6, |
| + 0x60, 0x2b, 0xc7, 0x78, 0x05, 0x6f, 0x0d, 0xca, |
| + 0x38, 0xea, 0x23, 0xd9, 0x90, 0x54, 0xb4, 0x6b, |
| + 0x42, 0xff, 0xe0, 0x04, 0x12, 0x9d, 0x22, 0x04 |
| +}; |
| +static const u8 enc_assoc050[] __initconst = { |
| + 0xba, 0x44, 0x6f, 0x6f, 0x9a, 0x0c, 0xed, 0x22, |
| + 0x45, 0x0f, 0xeb, 0x10, 0x73, 0x7d, 0x90, 0x07, |
| + 0xfd, 0x69, 0xab, 0xc1, 0x9b, 0x1d, 0x4d, 0x90, |
| + 0x49, 0xa5, 0x55, 0x1e, 0x86, 0xec, 0x2b, 0x37 |
| +}; |
| +static const u8 enc_nonce050[] __initconst = { |
| + 0x8d, 0xf4, 0xb1, 0x5a, 0x88, 0x8c, 0x33, 0x28, |
| + 0x6a, 0x7b, 0x76, 0x51 |
| +}; |
| +static const u8 enc_key050[] __initconst = { |
| + 0x39, 0x37, 0x98, 0x6a, 0xf8, 0x6d, 0xaf, 0xc1, |
| + 0xba, 0x0c, 0x46, 0x72, 0xd8, 0xab, 0xc4, 0x6c, |
| + 0x20, 0x70, 0x62, 0x68, 0x2d, 0x9c, 0x26, 0x4a, |
| + 0xb0, 0x6d, 0x6c, 0x58, 0x07, 0x20, 0x51, 0x30 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input051[] __initconst = { |
| + 0x81, 0xce, 0x84, 0xed, 0xe9, 0xb3, 0x58, 0x59, |
| + 0xcc, 0x8c, 0x49, 0xa8, 0xf6, 0xbe, 0x7d, 0xc6 |
| +}; |
| +static const u8 enc_output051[] __initconst = { |
| + 0x7b, 0x7c, 0xe0, 0xd8, 0x24, 0x80, 0x9a, 0x70, |
| + 0xde, 0x32, 0x56, 0x2c, 0xcf, 0x2c, 0x2b, 0xbd, |
| + 0x15, 0xd4, 0x4a, 0x00, 0xce, 0x0d, 0x19, 0xb4, |
| + 0x23, 0x1f, 0x92, 0x1e, 0x22, 0xbc, 0x0a, 0x43 |
| +}; |
| +static const u8 enc_assoc051[] __initconst = { |
| + 0xd4, 0x1a, 0x82, 0x8d, 0x5e, 0x71, 0x82, 0x92, |
| + 0x47, 0x02, 0x19, 0x05, 0x40, 0x2e, 0xa2, 0x57, |
| + 0xdc, 0xcb, 0xc3, 0xb8, 0x0f, 0xcd, 0x56, 0x75, |
| + 0x05, 0x6b, 0x68, 0xbb, 0x59, 0xe6, 0x2e, 0x88, |
| + 0x73 |
| +}; |
| +static const u8 enc_nonce051[] __initconst = { |
| + 0xbe, 0x40, 0xe5, 0xf1, 0xa1, 0x18, 0x17, 0xa0, |
| + 0xa8, 0xfa, 0x89, 0x49 |
| +}; |
| +static const u8 enc_key051[] __initconst = { |
| + 0x36, 0x37, 0x2a, 0xbc, 0xdb, 0x78, 0xe0, 0x27, |
| + 0x96, 0x46, 0xac, 0x3d, 0x17, 0x6b, 0x96, 0x74, |
| + 0xe9, 0x15, 0x4e, 0xec, 0xf0, 0xd5, 0x46, 0x9c, |
| + 0x65, 0x1e, 0xc7, 0xe1, 0x6b, 0x4c, 0x11, 0x99 |
| +}; |
| + |
| +/* wycheproof - misc */ |
| +static const u8 enc_input052[] __initconst = { |
| + 0xa6, 0x67, 0x47, 0xc8, 0x9e, 0x85, 0x7a, 0xf3, |
| + 0xa1, 0x8e, 0x2c, 0x79, 0x50, 0x00, 0x87, 0xed |
| +}; |
| +static const u8 enc_output052[] __initconst = { |
| + 0xca, 0x82, 0xbf, 0xf3, 0xe2, 0xf3, 0x10, 0xcc, |
| + 0xc9, 0x76, 0x67, 0x2c, 0x44, 0x15, 0xe6, 0x9b, |
| + 0x57, 0x63, 0x8c, 0x62, 0xa5, 0xd8, 0x5d, 0xed, |
| + 0x77, 0x4f, 0x91, 0x3c, 0x81, 0x3e, 0xa0, 0x32 |
| +}; |
| +static const u8 enc_assoc052[] __initconst = { |
| + 0x3f, 0x2d, 0xd4, 0x9b, 0xbf, 0x09, 0xd6, 0x9a, |
| + 0x78, 0xa3, 0xd8, 0x0e, 0xa2, 0x56, 0x66, 0x14, |
| + 0xfc, 0x37, 0x94, 0x74, 0x19, 0x6c, 0x1a, 0xae, |
| + 0x84, 0x58, 0x3d, 0xa7, 0x3d, 0x7f, 0xf8, 0x5c, |
| + 0x6f, 0x42, 0xca, 0x42, 0x05, 0x6a, 0x97, 0x92, |
| + 0xcc, 0x1b, 0x9f, 0xb3, 0xc7, 0xd2, 0x61 |
| +}; |
| +static const u8 enc_nonce052[] __initconst = { |
| + 0x84, 0xc8, 0x7d, 0xae, 0x4e, 0xee, 0x27, 0x73, |
| + 0x0e, 0xc3, 0x5d, 0x12 |
| +}; |
| +static const u8 enc_key052[] __initconst = { |
| + 0x9f, 0x14, 0x79, 0xed, 0x09, 0x7d, 0x7f, 0xe5, |
| + 0x29, 0xc1, 0x1f, 0x2f, 0x5a, 0xdd, 0x9a, 0xaf, |
| + 0xf4, 0xa1, 0xca, 0x0b, 0x68, 0x99, 0x7a, 0x2c, |
| + 0xb7, 0xf7, 0x97, 0x49, 0xbd, 0x90, 0xaa, 0xf4 |
| +}; |
| + |
| /* wycheproof - misc */ |
| static const u8 enc_input053[] __initconst = { |
| 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, |
| @@ -2759,6 +3858,126 @@ static const u8 enc_key073[] __initconst = { |
| 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| }; |
| |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input074[] __initconst = { |
| + 0xd4, 0x50, 0x0b, 0xf0, 0x09, 0x49, 0x35, 0x51, |
| + 0xc3, 0x80, 0xad, 0xf5, 0x2c, 0x57, 0x3a, 0x69, |
| + 0xdf, 0x7e, 0x8b, 0x76, 0x24, 0x63, 0x33, 0x0f, |
| + 0xac, 0xc1, 0x6a, 0x57, 0x26, 0xbe, 0x71, 0x90, |
| + 0xc6, 0x3c, 0x5a, 0x1c, 0x92, 0x65, 0x84, 0xa0, |
| + 0x96, 0x75, 0x68, 0x28, 0xdc, 0xdc, 0x64, 0xac, |
| + 0xdf, 0x96, 0x3d, 0x93, 0x1b, 0xf1, 0xda, 0xe2, |
| + 0x38, 0xf3, 0xf1, 0x57, 0x22, 0x4a, 0xc4, 0xb5, |
| + 0x42, 0xd7, 0x85, 0xb0, 0xdd, 0x84, 0xdb, 0x6b, |
| + 0xe3, 0xbc, 0x5a, 0x36, 0x63, 0xe8, 0x41, 0x49, |
| + 0xff, 0xbe, 0xd0, 0x9e, 0x54, 0xf7, 0x8f, 0x16, |
| + 0xa8, 0x22, 0x3b, 0x24, 0xcb, 0x01, 0x9f, 0x58, |
| + 0xb2, 0x1b, 0x0e, 0x55, 0x1e, 0x7a, 0xa0, 0x73, |
| + 0x27, 0x62, 0x95, 0x51, 0x37, 0x6c, 0xcb, 0xc3, |
| + 0x93, 0x76, 0x71, 0xa0, 0x62, 0x9b, 0xd9, 0x5c, |
| + 0x99, 0x15, 0xc7, 0x85, 0x55, 0x77, 0x1e, 0x7a |
| +}; |
| +static const u8 enc_output074[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x0b, 0x30, 0x0d, 0x8d, 0xa5, 0x6c, 0x21, 0x85, |
| + 0x75, 0x52, 0x79, 0x55, 0x3c, 0x4c, 0x82, 0xca |
| +}; |
| +static const u8 enc_assoc074[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce074[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x00, 0x02, 0x50, 0x6e |
| +}; |
| +static const u8 enc_key074[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| +/* wycheproof - checking for int overflows */ |
| +static const u8 enc_input075[] __initconst = { |
| + 0x7d, 0xe8, 0x7f, 0x67, 0x29, 0x94, 0x52, 0x75, |
| + 0xd0, 0x65, 0x5d, 0xa4, 0xc7, 0xfd, 0xe4, 0x56, |
| + 0x9e, 0x16, 0xf1, 0x11, 0xb5, 0xeb, 0x26, 0xc2, |
| + 0x2d, 0x85, 0x9e, 0x3f, 0xf8, 0x22, 0xec, 0xed, |
| + 0x3a, 0x6d, 0xd9, 0xa6, 0x0f, 0x22, 0x95, 0x7f, |
| + 0x7b, 0x7c, 0x85, 0x7e, 0x88, 0x22, 0xeb, 0x9f, |
| + 0xe0, 0xb8, 0xd7, 0x02, 0x21, 0x41, 0xf2, 0xd0, |
| + 0xb4, 0x8f, 0x4b, 0x56, 0x12, 0xd3, 0x22, 0xa8, |
| + 0x8d, 0xd0, 0xfe, 0x0b, 0x4d, 0x91, 0x79, 0x32, |
| + 0x4f, 0x7c, 0x6c, 0x9e, 0x99, 0x0e, 0xfb, 0xd8, |
| + 0x0e, 0x5e, 0xd6, 0x77, 0x58, 0x26, 0x49, 0x8b, |
| + 0x1e, 0xfe, 0x0f, 0x71, 0xa0, 0xf3, 0xec, 0x5b, |
| + 0x29, 0xcb, 0x28, 0xc2, 0x54, 0x0a, 0x7d, 0xcd, |
| + 0x51, 0xb7, 0xda, 0xae, 0xe0, 0xff, 0x4a, 0x7f, |
| + 0x3a, 0xc1, 0xee, 0x54, 0xc2, 0x9e, 0xe4, 0xc1, |
| + 0x70, 0xde, 0x40, 0x8f, 0x66, 0x69, 0x21, 0x94 |
| +}; |
| +static const u8 enc_output075[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xc5, 0x78, 0xe2, 0xaa, 0x44, 0xd3, 0x09, 0xb7, |
| + 0xb6, 0xa5, 0x19, 0x3b, 0xdc, 0x61, 0x18, 0xf5 |
| +}; |
| +static const u8 enc_assoc075[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_nonce075[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x00, 0x03, 0x18, 0xa5 |
| +}; |
| +static const u8 enc_key075[] __initconst = { |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, |
| + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 |
| +}; |
| + |
| /* wycheproof - checking for int overflows */ |
| static const u8 enc_input076[] __initconst = { |
| 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85, |
| @@ -3349,6 +4568,286 @@ static const u8 enc_key085[] __initconst = { |
| 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| }; |
| |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input086[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output086[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f |
| +}; |
| +static const u8 enc_assoc086[] __initconst = { |
| + 0x85, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xa6, 0x90, 0x2f, 0xcb, 0xc8, 0x83, 0xbb, 0xc1, |
| + 0x80, 0xb2, 0x56, 0xae, 0x34, 0xad, 0x7f, 0x00 |
| +}; |
| +static const u8 enc_nonce086[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key086[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input087[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output087[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_assoc087[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x24, 0x7e, 0x50, 0x64, 0x2a, 0x1c, 0x0a, 0x2f, |
| + 0x8f, 0x77, 0x21, 0x96, 0x09, 0xdb, 0xa9, 0x58 |
| +}; |
| +static const u8 enc_nonce087[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key087[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input088[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output088[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| +}; |
| +static const u8 enc_assoc088[] __initconst = { |
| + 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xd9, 0xe7, 0x2c, 0x06, 0x4a, 0xc8, 0x96, 0x1f, |
| + 0x3f, 0xa5, 0x85, 0xe0, 0xe2, 0xab, 0xd6, 0x00 |
| +}; |
| +static const u8 enc_nonce088[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key088[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input089[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output089[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, |
| + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 |
| +}; |
| +static const u8 enc_assoc089[] __initconst = { |
| + 0x65, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x95, 0xaf, 0x0f, 0x4d, 0x0b, 0x68, 0x6e, 0xae, |
| + 0xcc, 0xca, 0x43, 0x07, 0xd5, 0x96, 0xf5, 0x02 |
| +}; |
| +static const u8 enc_nonce089[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key089[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input090[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output090[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, |
| + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f |
| +}; |
| +static const u8 enc_assoc090[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x85, 0x40, 0xb4, 0x64, 0x35, 0x77, 0x07, 0xbe, |
| + 0x3a, 0x39, 0xd5, 0x5c, 0x34, 0xf8, 0xbc, 0xb3 |
| +}; |
| +static const u8 enc_nonce090[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key090[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input091[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output091[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, |
| + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_assoc091[] __initconst = { |
| + 0x4f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x66, 0x23, 0xd9, 0x90, 0xb8, 0x98, 0xd8, 0x30, |
| + 0xd2, 0x12, 0xaf, 0x23, 0x83, 0x33, 0x07, 0x01 |
| +}; |
| +static const u8 enc_nonce091[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key091[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| +/* wycheproof - special case tag */ |
| +static const u8 enc_input092[] __initconst = { |
| + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, |
| + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, |
| + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, |
| + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, |
| + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, |
| + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, |
| + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, |
| + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d |
| +}; |
| +static const u8 enc_output092[] __initconst = { |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| +}; |
| +static const u8 enc_assoc092[] __initconst = { |
| + 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
| + 0x5f, 0x16, 0xd0, 0x9f, 0x17, 0x78, 0x72, 0x11, |
| + 0xb7, 0xd4, 0x84, 0xe0, 0x24, 0xf8, 0x97, 0x01 |
| +}; |
| +static const u8 enc_nonce092[] __initconst = { |
| + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| + 0x08, 0x09, 0x0a, 0x0b |
| +}; |
| +static const u8 enc_key092[] __initconst = { |
| + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f |
| +}; |
| + |
| /* wycheproof - edge case intermediate sums in poly1305 */ |
| static const u8 enc_input093[] __initconst = { |
| 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d, |
| @@ -4455,6 +5954,86 @@ chacha20poly1305_enc_vectors[] __initconst = { |
| sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) }, |
| { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012, |
| sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) }, |
| + { enc_input013, enc_output013, enc_assoc013, enc_nonce013, enc_key013, |
| + sizeof(enc_input013), sizeof(enc_assoc013), sizeof(enc_nonce013) }, |
| + { enc_input014, enc_output014, enc_assoc014, enc_nonce014, enc_key014, |
| + sizeof(enc_input014), sizeof(enc_assoc014), sizeof(enc_nonce014) }, |
| + { enc_input015, enc_output015, enc_assoc015, enc_nonce015, enc_key015, |
| + sizeof(enc_input015), sizeof(enc_assoc015), sizeof(enc_nonce015) }, |
| + { enc_input016, enc_output016, enc_assoc016, enc_nonce016, enc_key016, |
| + sizeof(enc_input016), sizeof(enc_assoc016), sizeof(enc_nonce016) }, |
| + { enc_input017, enc_output017, enc_assoc017, enc_nonce017, enc_key017, |
| + sizeof(enc_input017), sizeof(enc_assoc017), sizeof(enc_nonce017) }, |
| + { enc_input018, enc_output018, enc_assoc018, enc_nonce018, enc_key018, |
| + sizeof(enc_input018), sizeof(enc_assoc018), sizeof(enc_nonce018) }, |
| + { enc_input019, enc_output019, enc_assoc019, enc_nonce019, enc_key019, |
| + sizeof(enc_input019), sizeof(enc_assoc019), sizeof(enc_nonce019) }, |
| + { enc_input020, enc_output020, enc_assoc020, enc_nonce020, enc_key020, |
| + sizeof(enc_input020), sizeof(enc_assoc020), sizeof(enc_nonce020) }, |
| + { enc_input021, enc_output021, enc_assoc021, enc_nonce021, enc_key021, |
| + sizeof(enc_input021), sizeof(enc_assoc021), sizeof(enc_nonce021) }, |
| + { enc_input022, enc_output022, enc_assoc022, enc_nonce022, enc_key022, |
| + sizeof(enc_input022), sizeof(enc_assoc022), sizeof(enc_nonce022) }, |
| + { enc_input023, enc_output023, enc_assoc023, enc_nonce023, enc_key023, |
| + sizeof(enc_input023), sizeof(enc_assoc023), sizeof(enc_nonce023) }, |
| + { enc_input024, enc_output024, enc_assoc024, enc_nonce024, enc_key024, |
| + sizeof(enc_input024), sizeof(enc_assoc024), sizeof(enc_nonce024) }, |
| + { enc_input025, enc_output025, enc_assoc025, enc_nonce025, enc_key025, |
| + sizeof(enc_input025), sizeof(enc_assoc025), sizeof(enc_nonce025) }, |
| + { enc_input026, enc_output026, enc_assoc026, enc_nonce026, enc_key026, |
| + sizeof(enc_input026), sizeof(enc_assoc026), sizeof(enc_nonce026) }, |
| + { enc_input027, enc_output027, enc_assoc027, enc_nonce027, enc_key027, |
| + sizeof(enc_input027), sizeof(enc_assoc027), sizeof(enc_nonce027) }, |
| + { enc_input028, enc_output028, enc_assoc028, enc_nonce028, enc_key028, |
| + sizeof(enc_input028), sizeof(enc_assoc028), sizeof(enc_nonce028) }, |
| + { enc_input029, enc_output029, enc_assoc029, enc_nonce029, enc_key029, |
| + sizeof(enc_input029), sizeof(enc_assoc029), sizeof(enc_nonce029) }, |
| + { enc_input030, enc_output030, enc_assoc030, enc_nonce030, enc_key030, |
| + sizeof(enc_input030), sizeof(enc_assoc030), sizeof(enc_nonce030) }, |
| + { enc_input031, enc_output031, enc_assoc031, enc_nonce031, enc_key031, |
| + sizeof(enc_input031), sizeof(enc_assoc031), sizeof(enc_nonce031) }, |
| + { enc_input032, enc_output032, enc_assoc032, enc_nonce032, enc_key032, |
| + sizeof(enc_input032), sizeof(enc_assoc032), sizeof(enc_nonce032) }, |
| + { enc_input033, enc_output033, enc_assoc033, enc_nonce033, enc_key033, |
| + sizeof(enc_input033), sizeof(enc_assoc033), sizeof(enc_nonce033) }, |
| + { enc_input034, enc_output034, enc_assoc034, enc_nonce034, enc_key034, |
| + sizeof(enc_input034), sizeof(enc_assoc034), sizeof(enc_nonce034) }, |
| + { enc_input035, enc_output035, enc_assoc035, enc_nonce035, enc_key035, |
| + sizeof(enc_input035), sizeof(enc_assoc035), sizeof(enc_nonce035) }, |
| + { enc_input036, enc_output036, enc_assoc036, enc_nonce036, enc_key036, |
| + sizeof(enc_input036), sizeof(enc_assoc036), sizeof(enc_nonce036) }, |
| + { enc_input037, enc_output037, enc_assoc037, enc_nonce037, enc_key037, |
| + sizeof(enc_input037), sizeof(enc_assoc037), sizeof(enc_nonce037) }, |
| + { enc_input038, enc_output038, enc_assoc038, enc_nonce038, enc_key038, |
| + sizeof(enc_input038), sizeof(enc_assoc038), sizeof(enc_nonce038) }, |
| + { enc_input039, enc_output039, enc_assoc039, enc_nonce039, enc_key039, |
| + sizeof(enc_input039), sizeof(enc_assoc039), sizeof(enc_nonce039) }, |
| + { enc_input040, enc_output040, enc_assoc040, enc_nonce040, enc_key040, |
| + sizeof(enc_input040), sizeof(enc_assoc040), sizeof(enc_nonce040) }, |
| + { enc_input041, enc_output041, enc_assoc041, enc_nonce041, enc_key041, |
| + sizeof(enc_input041), sizeof(enc_assoc041), sizeof(enc_nonce041) }, |
| + { enc_input042, enc_output042, enc_assoc042, enc_nonce042, enc_key042, |
| + sizeof(enc_input042), sizeof(enc_assoc042), sizeof(enc_nonce042) }, |
| + { enc_input043, enc_output043, enc_assoc043, enc_nonce043, enc_key043, |
| + sizeof(enc_input043), sizeof(enc_assoc043), sizeof(enc_nonce043) }, |
| + { enc_input044, enc_output044, enc_assoc044, enc_nonce044, enc_key044, |
| + sizeof(enc_input044), sizeof(enc_assoc044), sizeof(enc_nonce044) }, |
| + { enc_input045, enc_output045, enc_assoc045, enc_nonce045, enc_key045, |
| + sizeof(enc_input045), sizeof(enc_assoc045), sizeof(enc_nonce045) }, |
| + { enc_input046, enc_output046, enc_assoc046, enc_nonce046, enc_key046, |
| + sizeof(enc_input046), sizeof(enc_assoc046), sizeof(enc_nonce046) }, |
| + { enc_input047, enc_output047, enc_assoc047, enc_nonce047, enc_key047, |
| + sizeof(enc_input047), sizeof(enc_assoc047), sizeof(enc_nonce047) }, |
| + { enc_input048, enc_output048, enc_assoc048, enc_nonce048, enc_key048, |
| + sizeof(enc_input048), sizeof(enc_assoc048), sizeof(enc_nonce048) }, |
| + { enc_input049, enc_output049, enc_assoc049, enc_nonce049, enc_key049, |
| + sizeof(enc_input049), sizeof(enc_assoc049), sizeof(enc_nonce049) }, |
| + { enc_input050, enc_output050, enc_assoc050, enc_nonce050, enc_key050, |
| + sizeof(enc_input050), sizeof(enc_assoc050), sizeof(enc_nonce050) }, |
| + { enc_input051, enc_output051, enc_assoc051, enc_nonce051, enc_key051, |
| + sizeof(enc_input051), sizeof(enc_assoc051), sizeof(enc_nonce051) }, |
| + { enc_input052, enc_output052, enc_assoc052, enc_nonce052, enc_key052, |
| + sizeof(enc_input052), sizeof(enc_assoc052), sizeof(enc_nonce052) }, |
| { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053, |
| sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) }, |
| { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054, |
| @@ -4497,6 +6076,10 @@ chacha20poly1305_enc_vectors[] __initconst = { |
| sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) }, |
| { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073, |
| sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) }, |
| + { enc_input074, enc_output074, enc_assoc074, enc_nonce074, enc_key074, |
| + sizeof(enc_input074), sizeof(enc_assoc074), sizeof(enc_nonce074) }, |
| + { enc_input075, enc_output075, enc_assoc075, enc_nonce075, enc_key075, |
| + sizeof(enc_input075), sizeof(enc_assoc075), sizeof(enc_nonce075) }, |
| { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076, |
| sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) }, |
| { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077, |
| @@ -4517,6 +6100,20 @@ chacha20poly1305_enc_vectors[] __initconst = { |
| sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) }, |
| { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085, |
| sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) }, |
| + { enc_input086, enc_output086, enc_assoc086, enc_nonce086, enc_key086, |
| + sizeof(enc_input086), sizeof(enc_assoc086), sizeof(enc_nonce086) }, |
| + { enc_input087, enc_output087, enc_assoc087, enc_nonce087, enc_key087, |
| + sizeof(enc_input087), sizeof(enc_assoc087), sizeof(enc_nonce087) }, |
| + { enc_input088, enc_output088, enc_assoc088, enc_nonce088, enc_key088, |
| + sizeof(enc_input088), sizeof(enc_assoc088), sizeof(enc_nonce088) }, |
| + { enc_input089, enc_output089, enc_assoc089, enc_nonce089, enc_key089, |
| + sizeof(enc_input089), sizeof(enc_assoc089), sizeof(enc_nonce089) }, |
| + { enc_input090, enc_output090, enc_assoc090, enc_nonce090, enc_key090, |
| + sizeof(enc_input090), sizeof(enc_assoc090), sizeof(enc_nonce090) }, |
| + { enc_input091, enc_output091, enc_assoc091, enc_nonce091, enc_key091, |
| + sizeof(enc_input091), sizeof(enc_assoc091), sizeof(enc_nonce091) }, |
| + { enc_input092, enc_output092, enc_assoc092, enc_nonce092, enc_key092, |
| + sizeof(enc_input092), sizeof(enc_assoc092), sizeof(enc_nonce092) }, |
| { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093, |
| sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) }, |
| { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094, |
| @@ -7224,6 +8821,43 @@ xchacha20poly1305_dec_vectors[] __initconst = { |
| sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) } |
| }; |
| |
| +/* This is for the selftests-only, since it is only useful for the purpose of |
| + * testing the underlying primitives and interactions. |
| + */ |
| +static void __init |
| +chacha20poly1305_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len, |
| + const u8 *ad, const size_t ad_len, |
| + const u8 nonce[12], |
| + const u8 key[CHACHA20POLY1305_KEY_SIZE]) |
| +{ |
| + const u8 *pad0 = page_address(ZERO_PAGE(0)); |
| + struct poly1305_desc_ctx poly1305_state; |
| + u32 chacha20_state[CHACHA_STATE_WORDS]; |
| + union { |
| + u8 block0[POLY1305_KEY_SIZE]; |
| + __le64 lens[2]; |
| + } b = {{ 0 }}; |
| + u8 bottom_row[16] = { 0 }; |
| + u32 le_key[8]; |
| + int i; |
| + |
| + memcpy(&bottom_row[4], nonce, 12); |
| + for (i = 0; i < 8; ++i) |
| + le_key[i] = get_unaligned_le32(key + sizeof(le_key[i]) * i); |
| + chacha_init(chacha20_state, le_key, bottom_row); |
| + chacha20_crypt(chacha20_state, b.block0, b.block0, sizeof(b.block0)); |
| + poly1305_init(&poly1305_state, b.block0); |
| + poly1305_update(&poly1305_state, ad, ad_len); |
| + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf); |
| + chacha20_crypt(chacha20_state, dst, src, src_len); |
| + poly1305_update(&poly1305_state, dst, src_len); |
| + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf); |
| + b.lens[0] = cpu_to_le64(ad_len); |
| + b.lens[1] = cpu_to_le64(src_len); |
| + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); |
| + poly1305_final(&poly1305_state, dst + src_len); |
| +} |
| + |
| static void __init |
| chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| const u8 *ad, const size_t ad_len, |
| @@ -7233,6 +8867,9 @@ chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, |
| if (nonce_len == 8) |
| chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, |
| get_unaligned_le64(nonce), key); |
| + else if (nonce_len == 12) |
| + chacha20poly1305_encrypt_bignonce(dst, src, src_len, ad, |
| + ad_len, nonce, key); |
| else |
| BUG(); |
| } |
| @@ -7248,14 +8885,14 @@ decryption_success(bool func_ret, bool expect_failure, int memcmp_result) |
| bool __init chacha20poly1305_selftest(void) |
| { |
| enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; |
| - size_t i; |
| - u8 *computed_output = NULL, *heap_src = NULL; |
| - struct scatterlist sg_src; |
| + size_t i, j, k, total_len; |
| + u8 *computed_output = NULL, *input = NULL; |
| bool success = true, ret; |
| + struct scatterlist sg_src[3]; |
| |
| - heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); |
| computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); |
| - if (!heap_src || !computed_output) { |
| + input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); |
| + if (!computed_output || !input) { |
| pr_err("chacha20poly1305 self-test malloc: FAIL\n"); |
| success = false; |
| goto out; |
| @@ -7284,17 +8921,17 @@ bool __init chacha20poly1305_selftest(void) |
| for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { |
| if (chacha20poly1305_enc_vectors[i].nlen != 8) |
| continue; |
| - memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, |
| + memcpy(computed_output, chacha20poly1305_enc_vectors[i].input, |
| chacha20poly1305_enc_vectors[i].ilen); |
| - sg_init_one(&sg_src, heap_src, |
| + sg_init_one(sg_src, computed_output, |
| chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE); |
| - chacha20poly1305_encrypt_sg_inplace(&sg_src, |
| + ret = chacha20poly1305_encrypt_sg_inplace(sg_src, |
| chacha20poly1305_enc_vectors[i].ilen, |
| chacha20poly1305_enc_vectors[i].assoc, |
| chacha20poly1305_enc_vectors[i].alen, |
| get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce), |
| chacha20poly1305_enc_vectors[i].key); |
| - if (memcmp(heap_src, |
| + if (!ret || memcmp(computed_output, |
| chacha20poly1305_enc_vectors[i].output, |
| chacha20poly1305_enc_vectors[i].ilen + |
| POLY1305_DIGEST_SIZE)) { |
| @@ -7326,11 +8963,11 @@ bool __init chacha20poly1305_selftest(void) |
| } |
| |
| for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { |
| - memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, |
| + memcpy(computed_output, chacha20poly1305_dec_vectors[i].input, |
| chacha20poly1305_dec_vectors[i].ilen); |
| - sg_init_one(&sg_src, heap_src, |
| + sg_init_one(sg_src, computed_output, |
| chacha20poly1305_dec_vectors[i].ilen); |
| - ret = chacha20poly1305_decrypt_sg_inplace(&sg_src, |
| + ret = chacha20poly1305_decrypt_sg_inplace(sg_src, |
| chacha20poly1305_dec_vectors[i].ilen, |
| chacha20poly1305_dec_vectors[i].assoc, |
| chacha20poly1305_dec_vectors[i].alen, |
| @@ -7338,7 +8975,7 @@ bool __init chacha20poly1305_selftest(void) |
| chacha20poly1305_dec_vectors[i].key); |
| if (!decryption_success(ret, |
| chacha20poly1305_dec_vectors[i].failure, |
| - memcmp(heap_src, chacha20poly1305_dec_vectors[i].output, |
| + memcmp(computed_output, chacha20poly1305_dec_vectors[i].output, |
| chacha20poly1305_dec_vectors[i].ilen - |
| POLY1305_DIGEST_SIZE))) { |
| pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n", |
| @@ -7365,6 +9002,7 @@ bool __init chacha20poly1305_selftest(void) |
| success = false; |
| } |
| } |
| + |
| for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) { |
| memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); |
| ret = xchacha20poly1305_decrypt(computed_output, |
| @@ -7386,8 +9024,54 @@ bool __init chacha20poly1305_selftest(void) |
| } |
| } |
| |
| + for (total_len = POLY1305_DIGEST_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST) |
| + && total_len <= 1 << 10; ++total_len) { |
| + for (i = 0; i <= total_len; ++i) { |
| + for (j = i; j <= total_len; ++j) { |
| + sg_init_table(sg_src, 3); |
| + sg_set_buf(&sg_src[0], input, i); |
| + sg_set_buf(&sg_src[1], input + i, j - i); |
| + sg_set_buf(&sg_src[2], input + j, total_len - j); |
| + memset(computed_output, 0, total_len); |
| + memset(input, 0, total_len); |
| + |
| + if (!chacha20poly1305_encrypt_sg_inplace(sg_src, |
| + total_len - POLY1305_DIGEST_SIZE, NULL, 0, |
| + 0, enc_key001)) |
| + goto chunkfail; |
| + chacha20poly1305_encrypt(computed_output, |
| + computed_output, |
| + total_len - POLY1305_DIGEST_SIZE, NULL, 0, 0, |
| + enc_key001); |
| + if (memcmp(computed_output, input, total_len)) |
| + goto chunkfail; |
| + if (!chacha20poly1305_decrypt(computed_output, |
| + input, total_len, NULL, 0, 0, enc_key001)) |
| + goto chunkfail; |
| + for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) { |
| + if (computed_output[k]) |
| + goto chunkfail; |
| + } |
| + if (!chacha20poly1305_decrypt_sg_inplace(sg_src, |
| + total_len, NULL, 0, 0, enc_key001)) |
| + goto chunkfail; |
| + for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) { |
| + if (input[k]) |
| + goto chunkfail; |
| + } |
| + continue; |
| + |
| + chunkfail: |
| + pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n", |
| + total_len, i, j); |
| + success = false; |
| + } |
| + |
| + } |
| + } |
| + |
| out: |
| - kfree(heap_src); |
| kfree(computed_output); |
| + kfree(input); |
| return success; |
| } |
| -- |
| 2.18.2 |
| |
| |
| From 1cb521f8391f1fa9eaaf55c56d9ecdc97030f5d9 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 17 Jan 2020 11:42:22 +0100 |
| Subject: [PATCH 048/100] crypto: x86/poly1305 - emit does base conversion |
| itself |
| |
| commit f9e7fe32a792726186301423ff63a465d63386e1 upstream. |
| |
| The emit code does optional base conversion itself in assembly, so we |
| don't need to do that here. Also, neither one of these functions uses |
| simd instructions, so checking for that doesn't make sense either. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/poly1305_glue.c | 8 ++------ |
| 1 file changed, 2 insertions(+), 6 deletions(-) |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index 657363588e0c..79bb58737d52 100644 |
| |
| |
| @@ -123,13 +123,9 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, |
| static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], |
| const u32 nonce[4]) |
| { |
| - struct poly1305_arch_internal *state = ctx; |
| - |
| - if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || |
| - !state->is_base2_26 || !crypto_simd_usable()) { |
| - convert_to_base2_64(ctx); |
| + if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx)) |
| poly1305_emit_x86_64(ctx, mac, nonce); |
| - } else |
| + else |
| poly1305_emit_avx(ctx, mac, nonce); |
| } |
| |
| -- |
| 2.18.2 |
| |
| |
| From 286fdbb5f1330fe73266f14df8398eac1c6db2d3 Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ardb@kernel.org> |
| Date: Fri, 17 Jan 2020 17:43:18 +0100 |
| Subject: [PATCH 049/100] crypto: arm/chacha - fix build failured when kernel |
| mode NEON is disabled |
| |
| commit 0bc81767c5bd9d005fae1099fb39eb3688370cb1 upstream. |
| |
| When the ARM accelerated ChaCha driver is built as part of a configuration |
| that has kernel mode NEON disabled, we expect the compiler to propagate |
| the build time constant expression IS_ENABLED(CONFIG_KERNEL_MODE_NEON) in |
| a way that eliminates all the cross-object references to the actual NEON |
| routines, which allows the chacha-neon-core.o object to be omitted from |
| the build entirely. |
| |
| Unfortunately, this fails to work as expected in some cases, and we may |
| end up with a build error such as |
| |
| chacha-glue.c:(.text+0xc0): undefined reference to `chacha_4block_xor_neon' |
| |
| caused by the fact that chacha_doneon() has not been eliminated from the |
| object code, even though it will never be called in practice. |
| |
| Let's fix this by adding some IS_ENABLED(CONFIG_KERNEL_MODE_NEON) tests |
| that are not strictly needed from a logical point of view, but should |
| help the compiler infer that the NEON code paths are unreachable in |
| those cases. |
| |
| Fixes: b36d8c09e710c71f ("crypto: arm/chacha - remove dependency on generic ...") |
| Reported-by: Russell King <linux@armlinux.org.uk> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/chacha-glue.c | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c |
| index 7bdf8823066d..893692ed12b7 100644 |
| |
| |
| @@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skcipher_request *req, |
| if (nbytes < walk.total) |
| nbytes = round_down(nbytes, walk.stride); |
| |
| - if (!neon) { |
| + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
| chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
| nbytes, state, ctx->nrounds); |
| state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); |
| @@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_request *req, bool neon) |
| |
| chacha_init_generic(state, ctx->key, req->iv); |
| |
| - if (!neon) { |
| + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
| hchacha_block_arm(state, subctx.key, ctx->nrounds); |
| } else { |
| kernel_neon_begin(); |
| -- |
| 2.18.2 |
| |
| |
| From 0754cc145e417709e9be36785aeac0c4c4d5d5c1 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 17 Jan 2020 12:01:36 +0100 |
| Subject: [PATCH 050/100] crypto: Kconfig - allow tests to be disabled when |
| manager is disabled |
| |
| commit 2343d1529aff8b552589f622c23932035ed7a05d upstream. |
| |
| The library code uses CRYPTO_MANAGER_DISABLE_TESTS to conditionalize its |
| tests, but the library code can also exist without CRYPTO_MANAGER. That |
| means on minimal configs, the test code winds up being built with no way |
| to disable it. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| crypto/Kconfig | 4 ---- |
| 1 file changed, 4 deletions(-) |
| |
| diff --git a/crypto/Kconfig b/crypto/Kconfig |
| index b8b738bcc312..8fcf630471dc 100644 |
| |
| |
| @@ -136,8 +136,6 @@ config CRYPTO_USER |
| Userspace configuration for cryptographic instantiations such as |
| cbc(aes). |
| |
| -if CRYPTO_MANAGER2 |
| - |
| config CRYPTO_MANAGER_DISABLE_TESTS |
| bool "Disable run-time self tests" |
| default y |
| @@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS |
| This is intended for developer use only, as these tests take much |
| longer to run than the normal self tests. |
| |
| -endif # if CRYPTO_MANAGER2 |
| - |
| config CRYPTO_GF128MUL |
| tristate |
| |
| -- |
| 2.18.2 |
| |
| |
| From 6de0d949776ff25dda2069b744db5e011e33056c Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 6 Feb 2020 12:42:01 +0100 |
| Subject: [PATCH 051/100] crypto: chacha20poly1305 - prevent integer overflow |
| on large input |
| |
| commit c9cc0517bba9f0213f1e55172feceb99e5512daf upstream. |
| |
| This code assigns src_len (size_t) to sl (int), which causes problems |
| when src_len is very large. Probably nobody in the kernel should be |
| passing this much data to chacha20poly1305 all in one go anyway, so I |
| don't think we need to change the algorithm or introduce larger types |
| or anything. But we should at least error out early in this case and |
| print a warning so that we get reports if this does happen and can look |
| into why anybody is possibly passing it that much data or if they're |
| accidently passing -1 or similar. |
| |
| Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine") |
| Cc: Ard Biesheuvel <ardb@kernel.org> |
| Cc: stable@vger.kernel.org # 5.5+ |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Acked-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| lib/crypto/chacha20poly1305.c | 3 +++ |
| 1 file changed, 3 insertions(+) |
| |
| diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c |
| index 6d83cafebc69..ad0699ce702f 100644 |
| |
| |
| @@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, |
| __le64 lens[2]; |
| } b __aligned(16); |
| |
| + if (WARN_ON(src_len > INT_MAX)) |
| + return false; |
| + |
| chacha_load_key(b.k, key); |
| |
| b.iv[0] = 0; |
| -- |
| 2.18.2 |
| |
| |
| From 214f482df388cc9b22d443813acfb8eeebb9b7c6 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 1 Mar 2020 22:52:35 +0800 |
| Subject: [PATCH 052/100] crypto: x86/curve25519 - support assemblers with no |
| adx support |
| |
| commit 1579f1bc3b753d17a44de3457d5c6f4a5b14c752 upstream. |
| |
| Some older version of GAS do not support the ADX instructions, similarly |
| to how they also don't support AVX and such. This commit adds the same |
| build-time detection mechanisms we use for AVX and others for ADX, and |
| then makes sure that the curve25519 library dispatcher calls the right |
| functions. |
| |
| Reported-by: Willy Tarreau <w@1wt.eu> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/Makefile | 5 +++-- |
| arch/x86/crypto/Makefile | 7 ++++++- |
| include/crypto/curve25519.h | 6 ++++-- |
| 3 files changed, 13 insertions(+), 5 deletions(-) |
| |
| diff --git a/arch/x86/Makefile b/arch/x86/Makefile |
| index 94df0868804b..513a55562d75 100644 |
| |
| |
| @@ -194,9 +194,10 @@ avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) |
| avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1) |
| sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) |
| sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) |
| +adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1) |
| |
| -KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) |
| -KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) |
| +KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) |
| +KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) |
| |
| KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) |
| |
| diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile |
| index b69e00bf20b8..8c2e9eadee8a 100644 |
| |
| |
| @@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ |
| avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no) |
| sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) |
| sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) |
| +adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no) |
| |
| obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o |
| |
| @@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o |
| |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o |
| obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o |
| -obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o |
| + |
| +# These modules require the assembler to support ADX. |
| +ifeq ($(adx_supported),yes) |
| + obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o |
| +endif |
| |
| # These modules require assembler to support AVX. |
| ifeq ($(avx_supported),yes) |
| diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h |
| index 4e6dc840b159..9ecb3c1f0f15 100644 |
| |
| |
| @@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], |
| const u8 secret[CURVE25519_KEY_SIZE], |
| const u8 basepoint[CURVE25519_KEY_SIZE]) |
| { |
| - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) && |
| + (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX))) |
| curve25519_arch(mypublic, secret, basepoint); |
| else |
| curve25519_generic(mypublic, secret, basepoint); |
| @@ -49,7 +50,8 @@ __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], |
| CURVE25519_KEY_SIZE))) |
| return false; |
| |
| - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) |
| + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) && |
| + (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX))) |
| curve25519_base_arch(pub, secret); |
| else |
| curve25519_generic(pub, secret, curve25519_base_point); |
| -- |
| 2.18.2 |
| |
| |
| From 232561dc8a8eb5139d8124cf5011e6db8fbb9e3f Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 18 Mar 2020 20:27:32 -0600 |
| Subject: [PATCH 053/100] crypto: arm64/chacha - correctly walk through blocks |
| |
| commit c8cfcb78c65877313cda7bcbace624d3dbd1f3b3 upstream. |
| |
| Prior, passing in chunks of 2, 3, or 4, followed by any additional |
| chunks would result in the chacha state counter getting out of sync, |
| resulting in incorrect encryption/decryption, which is a pretty nasty |
| crypto vuln: "why do images look weird on webpages?" WireGuard users |
| never experienced this prior, because we have always, out of tree, used |
| a different crypto library, until the recent Frankenzinc addition. This |
| commit fixes the issue by advancing the pointers and state counter by |
| the actual size processed. It also fixes up a bug in the (optional, |
| costly) stride test that prevented it from running on arm64. |
| |
| Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") |
| Reported-and-tested-by: Emil Renner Berthing <kernel@esmil.dk> |
| Cc: Ard Biesheuvel <ardb@kernel.org> |
| Cc: stable@vger.kernel.org # v5.5+ |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Reviewed-by: Eric Biggers <ebiggers@google.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm64/crypto/chacha-neon-glue.c | 8 ++++---- |
| lib/crypto/chacha20poly1305-selftest.c | 11 ++++++++--- |
| 2 files changed, 12 insertions(+), 7 deletions(-) |
| |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index 71c11d2e9fcd..218943612261 100644 |
| |
| |
| @@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| break; |
| } |
| chacha_4block_xor_neon(state, dst, src, nrounds, l); |
| - bytes -= CHACHA_BLOCK_SIZE * 5; |
| - src += CHACHA_BLOCK_SIZE * 5; |
| - dst += CHACHA_BLOCK_SIZE * 5; |
| - state[12] += 5; |
| + bytes -= l; |
| + src += l; |
| + dst += l; |
| + state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); |
| } |
| } |
| |
| diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c |
| index c391a91364e9..fa43deda2660 100644 |
| |
| |
| @@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(void) |
| && total_len <= 1 << 10; ++total_len) { |
| for (i = 0; i <= total_len; ++i) { |
| for (j = i; j <= total_len; ++j) { |
| + k = 0; |
| sg_init_table(sg_src, 3); |
| - sg_set_buf(&sg_src[0], input, i); |
| - sg_set_buf(&sg_src[1], input + i, j - i); |
| - sg_set_buf(&sg_src[2], input + j, total_len - j); |
| + if (i) |
| + sg_set_buf(&sg_src[k++], input, i); |
| + if (j - i) |
| + sg_set_buf(&sg_src[k++], input + i, j - i); |
| + if (total_len - j) |
| + sg_set_buf(&sg_src[k++], input + j, total_len - j); |
| + sg_init_marker(sg_src, k); |
| memset(computed_output, 0, total_len); |
| memset(input, 0, total_len); |
| |
| -- |
| 2.18.2 |
| |
| |
| From e0f9507b756594951d5d4ea1591387aa62d4caa0 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 20 Jan 2020 18:18:15 +0100 |
| Subject: [PATCH 054/100] crypto: x86/curve25519 - replace with formally |
| verified implementation |
| |
| commit 07b586fe06625b0b610dc3d3a969c51913d143d4 upstream. |
| |
| This comes from INRIA's HACL*/Vale. It implements the same algorithm and |
| implementation strategy as the code it replaces, only this code has been |
| formally verified, sans the base point multiplication, which uses code |
| similar to prior, only it uses the formally verified field arithmetic |
| alongside reproducable ladder generation steps. This doesn't have a |
| pure-bmi2 version, which means haswell no longer benefits, but the |
| increased (doubled) code complexity is not worth it for a single |
| generation of chips that's already old. |
| |
| Performance-wise, this is around 1% slower on older microarchitectures, |
| and slightly faster on newer microarchitectures, mainly 10nm ones or |
| backports of 10nm to 14nm. This implementation is "everest" below: |
| |
| Xeon E5-2680 v4 (Broadwell) |
| |
| armfazh: 133340 cycles per call |
| everest: 133436 cycles per call |
| |
| Xeon Gold 5120 (Sky Lake Server) |
| |
| armfazh: 112636 cycles per call |
| everest: 113906 cycles per call |
| |
| Core i5-6300U (Sky Lake Client) |
| |
| armfazh: 116810 cycles per call |
| everest: 117916 cycles per call |
| |
| Core i7-7600U (Kaby Lake) |
| |
| armfazh: 119523 cycles per call |
| everest: 119040 cycles per call |
| |
| Core i7-8750H (Coffee Lake) |
| |
| armfazh: 113914 cycles per call |
| everest: 113650 cycles per call |
| |
| Core i9-9880H (Coffee Lake Refresh) |
| |
| armfazh: 112616 cycles per call |
| everest: 114082 cycles per call |
| |
| Core i3-8121U (Cannon Lake) |
| |
| armfazh: 113202 cycles per call |
| everest: 111382 cycles per call |
| |
| Core i7-8265U (Whiskey Lake) |
| |
| armfazh: 127307 cycles per call |
| everest: 127697 cycles per call |
| |
| Core i7-8550U (Kaby Lake Refresh) |
| |
| armfazh: 127522 cycles per call |
| everest: 127083 cycles per call |
| |
| Xeon Platinum 8275CL (Cascade Lake) |
| |
| armfazh: 114380 cycles per call |
| everest: 114656 cycles per call |
| |
| Achieving these kind of results with formally verified code is quite |
| remarkable, especialy considering that performance is favorable for |
| newer chips. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/curve25519-x86_64.c | 3546 ++++++++++----------------- |
| 1 file changed, 1292 insertions(+), 2254 deletions(-) |
| |
| diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c |
| index eec7d2d24239..e4e58b8e9afe 100644 |
| |
| |
| @@ -1,8 +1,7 @@ |
| -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
| +// SPDX-License-Identifier: GPL-2.0 OR MIT |
| /* |
| - * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved. |
| - * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| - * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. |
| + * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation |
| */ |
| |
| #include <crypto/curve25519.h> |
| @@ -16,2337 +15,1378 @@ |
| #include <asm/cpufeature.h> |
| #include <asm/processor.h> |
| |
| -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); |
| -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); |
| - |
| -enum { NUM_WORDS_ELTFP25519 = 4 }; |
| -typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; |
| -typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; |
| - |
| -#define mul_eltfp25519_1w_adx(c, a, b) do { \ |
| - mul_256x256_integer_adx(m.buffer, a, b); \ |
| - red_eltfp25519_1w_adx(c, m.buffer); \ |
| -} while (0) |
| - |
| -#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ |
| - mul_256x256_integer_bmi2(m.buffer, a, b); \ |
| - red_eltfp25519_1w_bmi2(c, m.buffer); \ |
| -} while (0) |
| - |
| -#define sqr_eltfp25519_1w_adx(a) do { \ |
| - sqr_256x256_integer_adx(m.buffer, a); \ |
| - red_eltfp25519_1w_adx(a, m.buffer); \ |
| -} while (0) |
| - |
| -#define sqr_eltfp25519_1w_bmi2(a) do { \ |
| - sqr_256x256_integer_bmi2(m.buffer, a); \ |
| - red_eltfp25519_1w_bmi2(a, m.buffer); \ |
| -} while (0) |
| - |
| -#define mul_eltfp25519_2w_adx(c, a, b) do { \ |
| - mul2_256x256_integer_adx(m.buffer, a, b); \ |
| - red_eltfp25519_2w_adx(c, m.buffer); \ |
| -} while (0) |
| - |
| -#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ |
| - mul2_256x256_integer_bmi2(m.buffer, a, b); \ |
| - red_eltfp25519_2w_bmi2(c, m.buffer); \ |
| -} while (0) |
| - |
| -#define sqr_eltfp25519_2w_adx(a) do { \ |
| - sqr2_256x256_integer_adx(m.buffer, a); \ |
| - red_eltfp25519_2w_adx(a, m.buffer); \ |
| -} while (0) |
| - |
| -#define sqr_eltfp25519_2w_bmi2(a) do { \ |
| - sqr2_256x256_integer_bmi2(m.buffer, a); \ |
| - red_eltfp25519_2w_bmi2(a, m.buffer); \ |
| -} while (0) |
| - |
| -#define sqrn_eltfp25519_1w_adx(a, times) do { \ |
| - int ____counter = (times); \ |
| - while (____counter-- > 0) \ |
| - sqr_eltfp25519_1w_adx(a); \ |
| -} while (0) |
| - |
| -#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ |
| - int ____counter = (times); \ |
| - while (____counter-- > 0) \ |
| - sqr_eltfp25519_1w_bmi2(a); \ |
| -} while (0) |
| - |
| -#define copy_eltfp25519_1w(C, A) do { \ |
| - (C)[0] = (A)[0]; \ |
| - (C)[1] = (A)[1]; \ |
| - (C)[2] = (A)[2]; \ |
| - (C)[3] = (A)[3]; \ |
| -} while (0) |
| - |
| -#define setzero_eltfp25519_1w(C) do { \ |
| - (C)[0] = 0; \ |
| - (C)[1] = 0; \ |
| - (C)[2] = 0; \ |
| - (C)[3] = 0; \ |
| -} while (0) |
| - |
| -__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { |
| - /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, |
| - 0xffffffffffffffffUL, 0x5fffffffffffffffUL, |
| - /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, |
| - 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, |
| - /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, |
| - 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, |
| - /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, |
| - 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, |
| - /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, |
| - 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, |
| - /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, |
| - 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, |
| - /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, |
| - 0xc1c20d06231f7614UL, 0x2938218da274f972UL, |
| - /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, |
| - 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, |
| - /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, |
| - 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, |
| - /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, |
| - 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, |
| - /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, |
| - 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, |
| - /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, |
| - 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, |
| - /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, |
| - 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, |
| - /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, |
| - 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, |
| - /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, |
| - 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, |
| - /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, |
| - 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, |
| - /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, |
| - 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, |
| - /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, |
| - 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, |
| - /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, |
| - 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, |
| - /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, |
| - 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, |
| - /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, |
| - 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, |
| - /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, |
| - 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, |
| - /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, |
| - 0x23758739f630a257UL, 0x295a407a01a78580UL, |
| - /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, |
| - 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, |
| - /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, |
| - 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, |
| - /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, |
| - 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, |
| - /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, |
| - 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, |
| - /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, |
| - 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, |
| - /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, |
| - 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, |
| - /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, |
| - 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, |
| - /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, |
| - 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, |
| - /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, |
| - 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, |
| - /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, |
| - 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, |
| - /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, |
| - 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, |
| - /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, |
| - 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, |
| - /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, |
| - 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, |
| - /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, |
| - 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, |
| - /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, |
| - 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, |
| - /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, |
| - 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, |
| - /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, |
| - 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, |
| - /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, |
| - 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, |
| - /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, |
| - 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, |
| - /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, |
| - 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, |
| - /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, |
| - 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, |
| - /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, |
| - 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, |
| - /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, |
| - 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, |
| - /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, |
| - 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, |
| - /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, |
| - 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, |
| - /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, |
| - 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, |
| - /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, |
| - 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, |
| - /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, |
| - 0xc189218075e91436UL, 0x6d9284169b3b8484UL, |
| - /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, |
| - 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, |
| - /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, |
| - 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, |
| - /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, |
| - 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, |
| - /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, |
| - 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, |
| - /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, |
| - 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, |
| - /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, |
| - 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, |
| - /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, |
| - 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, |
| - /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, |
| - 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, |
| - /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, |
| - 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, |
| - /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, |
| - 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, |
| - /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, |
| - 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, |
| - /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, |
| - 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, |
| - /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, |
| - 0x25232973322dbef4UL, 0x445dc4758c17f770UL, |
| - /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, |
| - 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, |
| - /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, |
| - 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, |
| - /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, |
| - 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, |
| - /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, |
| - 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, |
| - /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, |
| - 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, |
| - /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, |
| - 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, |
| - /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, |
| - 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, |
| - /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, |
| - 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, |
| - /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, |
| - 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, |
| - /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, |
| - 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, |
| - /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, |
| - 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, |
| - /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, |
| - 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, |
| - /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, |
| - 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, |
| - /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, |
| - 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, |
| - /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, |
| - 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, |
| - /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, |
| - 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, |
| - /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, |
| - 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, |
| - /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, |
| - 0x894d1d855ae52359UL, 0x68e122157b743d69UL, |
| - /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, |
| - 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, |
| - /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, |
| - 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, |
| - /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, |
| - 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, |
| - /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, |
| - 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, |
| - /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, |
| - 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, |
| - /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, |
| - 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, |
| - /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, |
| - 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, |
| - /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, |
| - 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, |
| - /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, |
| - 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, |
| - /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, |
| - 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, |
| - /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, |
| - 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, |
| - /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, |
| - 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, |
| - /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, |
| - 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, |
| - /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, |
| - 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, |
| - /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, |
| - 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, |
| - /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, |
| - 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, |
| - /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, |
| - 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, |
| - /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, |
| - 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, |
| - /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, |
| - 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, |
| - /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, |
| - 0x4a497962066e6043UL, 0x705b3aab41355b44UL, |
| - /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, |
| - 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, |
| - /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, |
| - 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, |
| - /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, |
| - 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, |
| - /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, |
| - 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, |
| - /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, |
| - 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, |
| - /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, |
| - 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, |
| - /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, |
| - 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, |
| - /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, |
| - 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, |
| - /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, |
| - 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, |
| - /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, |
| - 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, |
| - /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, |
| - 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, |
| - /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, |
| - 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, |
| - /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, |
| - 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, |
| - /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, |
| - 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, |
| - /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, |
| - 0x508e862f121692fcUL, 0x3a81907fa093c291UL, |
| - /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, |
| - 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, |
| - /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, |
| - 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, |
| - /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, |
| - 0xe488de11d761e352UL, 0x0e878a01a085545cUL, |
| - /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, |
| - 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, |
| - /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, |
| - 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, |
| - /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, |
| - 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, |
| - /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, |
| - 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, |
| - /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, |
| - 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, |
| - /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, |
| - 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, |
| - /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, |
| - 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, |
| - /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, |
| - 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, |
| - /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, |
| - 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, |
| - /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, |
| - 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, |
| - /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, |
| - 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, |
| - /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, |
| - 0x266fd5809208f294UL, 0x5c847085619a26b9UL, |
| - /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, |
| - 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, |
| - /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, |
| - 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, |
| - /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, |
| - 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, |
| - /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, |
| - 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, |
| - /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, |
| - 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, |
| - /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, |
| - 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, |
| - /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, |
| - 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, |
| - /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, |
| - 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, |
| - /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, |
| - 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, |
| - /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, |
| - 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, |
| - /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, |
| - 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, |
| - /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, |
| - 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, |
| - /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, |
| - 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, |
| - /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, |
| - 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, |
| - /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, |
| - 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, |
| - /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, |
| - 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, |
| - /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, |
| - 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, |
| - /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, |
| - 0x52d17436309d4253UL, 0x356f97e13efae576UL, |
| - /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, |
| - 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, |
| - /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, |
| - 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, |
| - /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, |
| - 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, |
| - /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, |
| - 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, |
| - /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, |
| - 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, |
| - /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, |
| - 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, |
| - /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, |
| - 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, |
| - /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, |
| - 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, |
| - /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, |
| - 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, |
| - /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, |
| - 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, |
| - /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, |
| - 0x497d723f802e88e1UL, 0x30684dea602f408dUL, |
| - /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, |
| - 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, |
| - /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, |
| - 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, |
| - /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, |
| - 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, |
| - /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, |
| - 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, |
| - /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, |
| - 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, |
| - /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, |
| - 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, |
| - /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, |
| - 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, |
| - /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, |
| - 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, |
| - /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, |
| - 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, |
| - /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, |
| - 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, |
| - /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, |
| - 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, |
| - /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, |
| - 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, |
| - /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, |
| - 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, |
| - /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, |
| - 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, |
| - /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, |
| - 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, |
| - /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, |
| - 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, |
| - /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, |
| - 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, |
| - /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, |
| - 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, |
| - /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, |
| - 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, |
| - /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, |
| - 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, |
| - /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, |
| - 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, |
| - /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, |
| - 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, |
| - /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, |
| - 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, |
| - /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, |
| - 0x81004b71e33cc191UL, 0x44e6be345122803cUL, |
| - /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, |
| - 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, |
| - /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, |
| - 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, |
| - /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, |
| - 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, |
| - /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, |
| - 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, |
| - /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, |
| - 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, |
| - /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, |
| - 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, |
| - /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, |
| - 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, |
| - /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, |
| - 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, |
| - /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, |
| - 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, |
| - /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, |
| - 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, |
| - /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, |
| - 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, |
| - /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, |
| - 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, |
| - /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, |
| - 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, |
| - /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, |
| - 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, |
| - /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, |
| - 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, |
| - /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, |
| - 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, |
| - /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, |
| - 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, |
| - /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, |
| - 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, |
| - /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, |
| - 0x33979624f0e917beUL, 0x2c018dc527356b30UL, |
| - /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, |
| - 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, |
| - /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, |
| - 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, |
| - /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, |
| - 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, |
| - /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, |
| - 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, |
| - /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, |
| - 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, |
| - /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, |
| - 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, |
| - /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, |
| - 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, |
| - /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, |
| - 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, |
| - /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, |
| - 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, |
| - /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, |
| - 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, |
| - /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, |
| - 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, |
| - /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, |
| - 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, |
| - /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, |
| - 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, |
| - /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, |
| - 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, |
| - /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, |
| - 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, |
| - /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, |
| - 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, |
| - /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, |
| - 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, |
| - /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, |
| - 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, |
| - /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, |
| - 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, |
| - /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, |
| - 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, |
| - /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, |
| - 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, |
| - /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, |
| - 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, |
| - /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, |
| - 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, |
| - /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, |
| - 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, |
| - /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, |
| - 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, |
| - /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, |
| - 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, |
| - /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, |
| - 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, |
| - /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, |
| - 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, |
| - /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, |
| - 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, |
| - /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, |
| - 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, |
| - /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, |
| - 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, |
| - /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, |
| - 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, |
| - /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, |
| - 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, |
| - /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, |
| - 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, |
| - /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, |
| - 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, |
| - /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, |
| - 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, |
| - /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, |
| - 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, |
| - /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, |
| - 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, |
| - /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, |
| - 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, |
| - /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, |
| - 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, |
| - /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, |
| - 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, |
| - /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, |
| - 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, |
| - /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, |
| - 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, |
| - /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, |
| - 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, |
| - /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, |
| - 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, |
| - /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, |
| - 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, |
| - /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, |
| - 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, |
| - /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, |
| - 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL |
| -}; |
| - |
| -/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] |
| - * a is two 256-bit integers: a0[0:3] and a1[4:7] |
| - * b is two 256-bit integers: b0[0:3] and b1[4:7] |
| - */ |
| -static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, |
| - const u64 *const b) |
| -{ |
| - asm volatile( |
| - "xorl %%r14d, %%r14d ;" |
| - "movq (%1), %%rdx; " /* A[0] */ |
| - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "movq %%r8, (%0) ;" |
| - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ |
| - "adox %%r10, %%r15 ;" |
| - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ |
| - "adox %%r8, %%rax ;" |
| - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ |
| - "adox %%r10, %%rbx ;" |
| - /******************************************/ |
| - "adox %%r14, %%rcx ;" |
| - |
| - "movq 8(%1), %%rdx; " /* A[1] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| - "adox %%r15, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| - "adox %%r10, %%r9 ;" |
| - "adcx %%r9, %%rax ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ |
| - "adox %%r8, %%r11 ;" |
| - "adcx %%r11, %%rbx ;" |
| - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ |
| - "adox %%r10, %%r13 ;" |
| - "adcx %%r13, %%rcx ;" |
| - /******************************************/ |
| - "adox %%r14, %%r15 ;" |
| - "adcx %%r14, %%r15 ;" |
| - |
| - "movq 16(%1), %%rdx; " /* A[2] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| - "adox %%rax, %%r8 ;" |
| - "movq %%r8, 16(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| - "adox %%r10, %%r9 ;" |
| - "adcx %%r9, %%rbx ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ |
| - "adox %%r8, %%r11 ;" |
| - "adcx %%r11, %%rcx ;" |
| - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ |
| - "adox %%r10, %%r13 ;" |
| - "adcx %%r13, %%r15 ;" |
| - /******************************************/ |
| - "adox %%r14, %%rax ;" |
| - "adcx %%r14, %%rax ;" |
| - |
| - "movq 24(%1), %%rdx; " /* A[3] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| - "adox %%rbx, %%r8 ;" |
| - "movq %%r8, 24(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| - "adox %%r10, %%r9 ;" |
| - "adcx %%r9, %%rcx ;" |
| - "movq %%rcx, 32(%0) ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ |
| - "adox %%r8, %%r11 ;" |
| - "adcx %%r11, %%r15 ;" |
| - "movq %%r15, 40(%0) ;" |
| - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ |
| - "adox %%r10, %%r13 ;" |
| - "adcx %%r13, %%rax ;" |
| - "movq %%rax, 48(%0) ;" |
| - /******************************************/ |
| - "adox %%r14, %%rbx ;" |
| - "adcx %%r14, %%rbx ;" |
| - "movq %%rbx, 56(%0) ;" |
| - |
| - "movq 32(%1), %%rdx; " /* C[0] */ |
| - "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "movq %%r8, 64(%0);" |
| - "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ |
| - "adox %%r10, %%r15 ;" |
| - "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ |
| - "adox %%r8, %%rax ;" |
| - "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ |
| - "adox %%r10, %%rbx ;" |
| - /******************************************/ |
| - "adox %%r14, %%rcx ;" |
| - |
| - "movq 40(%1), %%rdx; " /* C[1] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ |
| - "adox %%r15, %%r8 ;" |
| - "movq %%r8, 72(%0);" |
| - "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ |
| - "adox %%r10, %%r9 ;" |
| - "adcx %%r9, %%rax ;" |
| - "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ |
| - "adox %%r8, %%r11 ;" |
| - "adcx %%r11, %%rbx ;" |
| - "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ |
| - "adox %%r10, %%r13 ;" |
| - "adcx %%r13, %%rcx ;" |
| - /******************************************/ |
| - "adox %%r14, %%r15 ;" |
| - "adcx %%r14, %%r15 ;" |
| - |
| - "movq 48(%1), %%rdx; " /* C[2] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ |
| - "adox %%rax, %%r8 ;" |
| - "movq %%r8, 80(%0);" |
| - "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ |
| - "adox %%r10, %%r9 ;" |
| - "adcx %%r9, %%rbx ;" |
| - "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ |
| - "adox %%r8, %%r11 ;" |
| - "adcx %%r11, %%rcx ;" |
| - "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ |
| - "adox %%r10, %%r13 ;" |
| - "adcx %%r13, %%r15 ;" |
| - /******************************************/ |
| - "adox %%r14, %%rax ;" |
| - "adcx %%r14, %%rax ;" |
| - |
| - "movq 56(%1), %%rdx; " /* C[3] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ |
| - "adox %%rbx, %%r8 ;" |
| - "movq %%r8, 88(%0);" |
| - "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ |
| - "adox %%r10, %%r9 ;" |
| - "adcx %%r9, %%rcx ;" |
| - "movq %%rcx, 96(%0) ;" |
| - "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ |
| - "adox %%r8, %%r11 ;" |
| - "adcx %%r11, %%r15 ;" |
| - "movq %%r15, 104(%0) ;" |
| - "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ |
| - "adox %%r10, %%r13 ;" |
| - "adcx %%r13, %%rax ;" |
| - "movq %%rax, 112(%0) ;" |
| - /******************************************/ |
| - "adox %%r14, %%rbx ;" |
| - "adcx %%r14, %%rbx ;" |
| - "movq %%rbx, 120(%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11", "%r13", "%r14", "%r15"); |
| -} |
| - |
| -static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, |
| - const u64 *const b) |
| +static __always_inline u64 eq_mask(u64 a, u64 b) |
| { |
| - asm volatile( |
| - "movq (%1), %%rdx; " /* A[0] */ |
| - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ |
| - "movq %%r8, (%0) ;" |
| - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ |
| - "addq %%r10, %%r15 ;" |
| - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ |
| - "adcq %%r8, %%rax ;" |
| - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ |
| - "adcq %%r10, %%rbx ;" |
| - /******************************************/ |
| - "adcq $0, %%rcx ;" |
| - |
| - "movq 8(%1), %%rdx; " /* A[1] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| - "addq %%r15, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%r15 ;" |
| - |
| - "addq %%r9, %%rax ;" |
| - "adcq %%r11, %%rbx ;" |
| - "adcq %%r13, %%rcx ;" |
| - "adcq $0, %%r15 ;" |
| - |
| - "movq 16(%1), %%rdx; " /* A[2] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| - "addq %%rax, %%r8 ;" |
| - "movq %%r8, 16(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%rax ;" |
| - |
| - "addq %%r9, %%rbx ;" |
| - "adcq %%r11, %%rcx ;" |
| - "adcq %%r13, %%r15 ;" |
| - "adcq $0, %%rax ;" |
| - |
| - "movq 24(%1), %%rdx; " /* A[3] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| - "addq %%rbx, %%r8 ;" |
| - "movq %%r8, 24(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%rbx ;" |
| - |
| - "addq %%r9, %%rcx ;" |
| - "movq %%rcx, 32(%0) ;" |
| - "adcq %%r11, %%r15 ;" |
| - "movq %%r15, 40(%0) ;" |
| - "adcq %%r13, %%rax ;" |
| - "movq %%rax, 48(%0) ;" |
| - "adcq $0, %%rbx ;" |
| - "movq %%rbx, 56(%0) ;" |
| - |
| - "movq 32(%1), %%rdx; " /* C[0] */ |
| - "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ |
| - "movq %%r8, 64(%0) ;" |
| - "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ |
| - "addq %%r10, %%r15 ;" |
| - "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ |
| - "adcq %%r8, %%rax ;" |
| - "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ |
| - "adcq %%r10, %%rbx ;" |
| - /******************************************/ |
| - "adcq $0, %%rcx ;" |
| - |
| - "movq 40(%1), %%rdx; " /* C[1] */ |
| - "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ |
| - "addq %%r15, %%r8 ;" |
| - "movq %%r8, 72(%0) ;" |
| - "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%r15 ;" |
| - |
| - "addq %%r9, %%rax ;" |
| - "adcq %%r11, %%rbx ;" |
| - "adcq %%r13, %%rcx ;" |
| - "adcq $0, %%r15 ;" |
| - |
| - "movq 48(%1), %%rdx; " /* C[2] */ |
| - "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ |
| - "addq %%rax, %%r8 ;" |
| - "movq %%r8, 80(%0) ;" |
| - "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%rax ;" |
| - |
| - "addq %%r9, %%rbx ;" |
| - "adcq %%r11, %%rcx ;" |
| - "adcq %%r13, %%r15 ;" |
| - "adcq $0, %%rax ;" |
| - |
| - "movq 56(%1), %%rdx; " /* C[3] */ |
| - "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ |
| - "addq %%rbx, %%r8 ;" |
| - "movq %%r8, 88(%0) ;" |
| - "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%rbx ;" |
| - |
| - "addq %%r9, %%rcx ;" |
| - "movq %%rcx, 96(%0) ;" |
| - "adcq %%r11, %%r15 ;" |
| - "movq %%r15, 104(%0) ;" |
| - "adcq %%r13, %%rax ;" |
| - "movq %%rax, 112(%0) ;" |
| - "adcq $0, %%rbx ;" |
| - "movq %%rbx, 120(%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11", "%r13", "%r15"); |
| + u64 x = a ^ b; |
| + u64 minus_x = ~x + (u64)1U; |
| + u64 x_or_minus_x = x | minus_x; |
| + u64 xnx = x_or_minus_x >> (u32)63U; |
| + return xnx - (u64)1U; |
| } |
| |
| -static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) |
| +static __always_inline u64 gte_mask(u64 a, u64 b) |
| { |
| - asm volatile( |
| - "movq (%1), %%rdx ;" /* A[0] */ |
| - "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ |
| - "xorl %%r15d, %%r15d;" |
| - "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ |
| - "adcx %%r14, %%r9 ;" |
| - "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ |
| - "adcx %%rax, %%r10 ;" |
| - "movq 24(%1), %%rdx ;" /* A[3] */ |
| - "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ |
| - "adcx %%rcx, %%r11 ;" |
| - "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ |
| - "adcx %%rax, %%rbx ;" |
| - "movq 8(%1), %%rdx ;" /* A[1] */ |
| - "adcx %%r15, %%r13 ;" |
| - "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ |
| - "movq $0, %%r14 ;" |
| - /******************************************/ |
| - "adcx %%r15, %%r14 ;" |
| - |
| - "xorl %%r15d, %%r15d;" |
| - "adox %%rax, %%r10 ;" |
| - "adcx %%r8, %%r8 ;" |
| - "adox %%rcx, %%r11 ;" |
| - "adcx %%r9, %%r9 ;" |
| - "adox %%r15, %%rbx ;" |
| - "adcx %%r10, %%r10 ;" |
| - "adox %%r15, %%r13 ;" |
| - "adcx %%r11, %%r11 ;" |
| - "adox %%r15, %%r14 ;" |
| - "adcx %%rbx, %%rbx ;" |
| - "adcx %%r13, %%r13 ;" |
| - "adcx %%r14, %%r14 ;" |
| - |
| - "movq (%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ |
| - /*******************/ |
| - "movq %%rax, 0(%0) ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "movq 8(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ |
| - "adcq %%rax, %%r9 ;" |
| - "movq %%r9, 16(%0) ;" |
| - "adcq %%rcx, %%r10 ;" |
| - "movq %%r10, 24(%0) ;" |
| - "movq 16(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ |
| - "adcq %%rax, %%r11 ;" |
| - "movq %%r11, 32(%0) ;" |
| - "adcq %%rcx, %%rbx ;" |
| - "movq %%rbx, 40(%0) ;" |
| - "movq 24(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ |
| - "adcq %%rax, %%r13 ;" |
| - "movq %%r13, 48(%0) ;" |
| - "adcq %%rcx, %%r14 ;" |
| - "movq %%r14, 56(%0) ;" |
| - |
| - |
| - "movq 32(%1), %%rdx ;" /* B[0] */ |
| - "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ |
| - "xorl %%r15d, %%r15d;" |
| - "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ |
| - "adcx %%r14, %%r9 ;" |
| - "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ |
| - "adcx %%rax, %%r10 ;" |
| - "movq 56(%1), %%rdx ;" /* B[3] */ |
| - "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ |
| - "adcx %%rcx, %%r11 ;" |
| - "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ |
| - "adcx %%rax, %%rbx ;" |
| - "movq 40(%1), %%rdx ;" /* B[1] */ |
| - "adcx %%r15, %%r13 ;" |
| - "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ |
| - "movq $0, %%r14 ;" |
| - /******************************************/ |
| - "adcx %%r15, %%r14 ;" |
| - |
| - "xorl %%r15d, %%r15d;" |
| - "adox %%rax, %%r10 ;" |
| - "adcx %%r8, %%r8 ;" |
| - "adox %%rcx, %%r11 ;" |
| - "adcx %%r9, %%r9 ;" |
| - "adox %%r15, %%rbx ;" |
| - "adcx %%r10, %%r10 ;" |
| - "adox %%r15, %%r13 ;" |
| - "adcx %%r11, %%r11 ;" |
| - "adox %%r15, %%r14 ;" |
| - "adcx %%rbx, %%rbx ;" |
| - "adcx %%r13, %%r13 ;" |
| - "adcx %%r14, %%r14 ;" |
| - |
| - "movq 32(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ |
| - /*******************/ |
| - "movq %%rax, 64(%0) ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 72(%0) ;" |
| - "movq 40(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ |
| - "adcq %%rax, %%r9 ;" |
| - "movq %%r9, 80(%0) ;" |
| - "adcq %%rcx, %%r10 ;" |
| - "movq %%r10, 88(%0) ;" |
| - "movq 48(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ |
| - "adcq %%rax, %%r11 ;" |
| - "movq %%r11, 96(%0) ;" |
| - "adcq %%rcx, %%rbx ;" |
| - "movq %%rbx, 104(%0) ;" |
| - "movq 56(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ |
| - "adcq %%rax, %%r13 ;" |
| - "movq %%r13, 112(%0) ;" |
| - "adcq %%rcx, %%r14 ;" |
| - "movq %%r14, 120(%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11", "%r13", "%r14", "%r15"); |
| + u64 x = a; |
| + u64 y = b; |
| + u64 x_xor_y = x ^ y; |
| + u64 x_sub_y = x - y; |
| + u64 x_sub_y_xor_y = x_sub_y ^ y; |
| + u64 q = x_xor_y | x_sub_y_xor_y; |
| + u64 x_xor_q = x ^ q; |
| + u64 x_xor_q_ = x_xor_q >> (u32)63U; |
| + return x_xor_q_ - (u64)1U; |
| } |
| |
| -static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) |
| +/* Computes the addition of four-element f1 with value in f2 |
| + * and returns the carry (if any) */ |
| +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) |
| { |
| - asm volatile( |
| - "movq 8(%1), %%rdx ;" /* A[1] */ |
| - "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ |
| - "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ |
| - "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ |
| - |
| - "movq 16(%1), %%rdx ;" /* A[2] */ |
| - "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ |
| - "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ |
| - |
| - "addq %%rax, %%r9 ;" |
| - "adcq %%rdx, %%r10 ;" |
| - "adcq %%rcx, %%r11 ;" |
| - "adcq %%r14, %%r15 ;" |
| - "adcq $0, %%r13 ;" |
| - "movq $0, %%r14 ;" |
| - "adcq $0, %%r14 ;" |
| - |
| - "movq (%1), %%rdx ;" /* A[0] */ |
| - "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ |
| - |
| - "addq %%rax, %%r10 ;" |
| - "adcq %%rcx, %%r11 ;" |
| - "adcq $0, %%r15 ;" |
| - "adcq $0, %%r13 ;" |
| - "adcq $0, %%r14 ;" |
| - |
| - "shldq $1, %%r13, %%r14 ;" |
| - "shldq $1, %%r15, %%r13 ;" |
| - "shldq $1, %%r11, %%r15 ;" |
| - "shldq $1, %%r10, %%r11 ;" |
| - "shldq $1, %%r9, %%r10 ;" |
| - "shldq $1, %%r8, %%r9 ;" |
| - "shlq $1, %%r8 ;" |
| - |
| - /*******************/ |
| - "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ |
| - /*******************/ |
| - "movq %%rax, 0(%0) ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "movq 8(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ |
| - "adcq %%rax, %%r9 ;" |
| - "movq %%r9, 16(%0) ;" |
| - "adcq %%rcx, %%r10 ;" |
| - "movq %%r10, 24(%0) ;" |
| - "movq 16(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ |
| - "adcq %%rax, %%r11 ;" |
| - "movq %%r11, 32(%0) ;" |
| - "adcq %%rcx, %%r15 ;" |
| - "movq %%r15, 40(%0) ;" |
| - "movq 24(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ |
| - "adcq %%rax, %%r13 ;" |
| - "movq %%r13, 48(%0) ;" |
| - "adcq %%rcx, %%r14 ;" |
| - "movq %%r14, 56(%0) ;" |
| - |
| - "movq 40(%1), %%rdx ;" /* B[1] */ |
| - "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ |
| - "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ |
| - "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ |
| - |
| - "movq 48(%1), %%rdx ;" /* B[2] */ |
| - "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ |
| - "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ |
| - |
| - "addq %%rax, %%r9 ;" |
| - "adcq %%rdx, %%r10 ;" |
| - "adcq %%rcx, %%r11 ;" |
| - "adcq %%r14, %%r15 ;" |
| - "adcq $0, %%r13 ;" |
| - "movq $0, %%r14 ;" |
| - "adcq $0, %%r14 ;" |
| - |
| - "movq 32(%1), %%rdx ;" /* B[0] */ |
| - "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ |
| - |
| - "addq %%rax, %%r10 ;" |
| - "adcq %%rcx, %%r11 ;" |
| - "adcq $0, %%r15 ;" |
| - "adcq $0, %%r13 ;" |
| - "adcq $0, %%r14 ;" |
| - |
| - "shldq $1, %%r13, %%r14 ;" |
| - "shldq $1, %%r15, %%r13 ;" |
| - "shldq $1, %%r11, %%r15 ;" |
| - "shldq $1, %%r10, %%r11 ;" |
| - "shldq $1, %%r9, %%r10 ;" |
| - "shldq $1, %%r8, %%r9 ;" |
| - "shlq $1, %%r8 ;" |
| - |
| - /*******************/ |
| - "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ |
| - /*******************/ |
| - "movq %%rax, 64(%0) ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 72(%0) ;" |
| - "movq 40(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ |
| - "adcq %%rax, %%r9 ;" |
| - "movq %%r9, 80(%0) ;" |
| - "adcq %%rcx, %%r10 ;" |
| - "movq %%r10, 88(%0) ;" |
| - "movq 48(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ |
| - "adcq %%rax, %%r11 ;" |
| - "movq %%r11, 96(%0) ;" |
| - "adcq %%rcx, %%r15 ;" |
| - "movq %%r15, 104(%0) ;" |
| - "movq 56(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ |
| - "adcq %%rax, %%r13 ;" |
| - "movq %%r13, 112(%0) ;" |
| - "adcq %%rcx, %%r14 ;" |
| - "movq %%r14, 120(%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| - "%r11", "%r13", "%r14", "%r15"); |
| -} |
| + u64 carry_r; |
| |
| -static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) |
| -{ |
| asm volatile( |
| - "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ |
| - "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ |
| - "xorl %%ebx, %%ebx ;" |
| - "adox (%1), %%r8 ;" |
| - "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ |
| - "adcx %%r10, %%r9 ;" |
| - "adox 8(%1), %%r9 ;" |
| - "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ |
| - "adcx %%r11, %%r10 ;" |
| - "adox 16(%1), %%r10 ;" |
| - "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ |
| - "adcx %%rax, %%r11 ;" |
| - "adox 24(%1), %%r11 ;" |
| - /***************************************/ |
| - "adcx %%rbx, %%rcx ;" |
| - "adox %%rbx, %%rcx ;" |
| - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ |
| - "adcx %%rcx, %%r8 ;" |
| - "adcx %%rbx, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcx %%rbx, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcx %%rbx, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - |
| - "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ |
| - "xorl %%ebx, %%ebx ;" |
| - "adox 64(%1), %%r8 ;" |
| - "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ |
| - "adcx %%r10, %%r9 ;" |
| - "adox 72(%1), %%r9 ;" |
| - "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ |
| - "adcx %%r11, %%r10 ;" |
| - "adox 80(%1), %%r10 ;" |
| - "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ |
| - "adcx %%rax, %%r11 ;" |
| - "adox 88(%1), %%r11 ;" |
| - /****************************************/ |
| - "adcx %%rbx, %%rcx ;" |
| - "adox %%rbx, %%rcx ;" |
| - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ |
| - "adcx %%rcx, %%r8 ;" |
| - "adcx %%rbx, %%r9 ;" |
| - "movq %%r9, 40(%0) ;" |
| - "adcx %%rbx, %%r10 ;" |
| - "movq %%r10, 48(%0) ;" |
| - "adcx %%rbx, %%r11 ;" |
| - "movq %%r11, 56(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 32(%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11"); |
| -} |
| + /* Clear registers to propagate the carry bit */ |
| + " xor %%r8, %%r8;" |
| + " xor %%r9, %%r9;" |
| + " xor %%r10, %%r10;" |
| + " xor %%r11, %%r11;" |
| + " xor %1, %1;" |
| + |
| + /* Begin addition chain */ |
| + " addq 0(%3), %0;" |
| + " movq %0, 0(%2);" |
| + " adcxq 8(%3), %%r8;" |
| + " movq %%r8, 8(%2);" |
| + " adcxq 16(%3), %%r9;" |
| + " movq %%r9, 16(%2);" |
| + " adcxq 24(%3), %%r10;" |
| + " movq %%r10, 24(%2);" |
| + |
| + /* Return the carry bit in a register */ |
| + " adcx %%r11, %1;" |
| + : "+&r" (f2), "=&r" (carry_r) |
| + : "r" (out), "r" (f1) |
| + : "%r8", "%r9", "%r10", "%r11", "memory", "cc" |
| + ); |
| |
| -static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) |
| -{ |
| - asm volatile( |
| - "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ |
| - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| - "addq %%r10, %%r9 ;" |
| - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ |
| - "adcq %%r11, %%r10 ;" |
| - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| - "adcq %%rax, %%r11 ;" |
| - /***************************************/ |
| - "adcq $0, %%rcx ;" |
| - "addq (%1), %%r8 ;" |
| - "adcq 8(%1), %%r9 ;" |
| - "adcq 16(%1), %%r10 ;" |
| - "adcq 24(%1), %%r11 ;" |
| - "adcq $0, %%rcx ;" |
| - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ |
| - "addq %%rcx, %%r8 ;" |
| - "adcq $0, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcq $0, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcq $0, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - |
| - "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| - "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| - "addq %%r10, %%r9 ;" |
| - "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ |
| - "adcq %%r11, %%r10 ;" |
| - "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| - "adcq %%rax, %%r11 ;" |
| - /****************************************/ |
| - "adcq $0, %%rcx ;" |
| - "addq 64(%1), %%r8 ;" |
| - "adcq 72(%1), %%r9 ;" |
| - "adcq 80(%1), %%r10 ;" |
| - "adcq 88(%1), %%r11 ;" |
| - "adcq $0, %%rcx ;" |
| - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ |
| - "addq %%rcx, %%r8 ;" |
| - "adcq $0, %%r9 ;" |
| - "movq %%r9, 40(%0) ;" |
| - "adcq $0, %%r10 ;" |
| - "movq %%r10, 48(%0) ;" |
| - "adcq $0, %%r11 ;" |
| - "movq %%r11, 56(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 32(%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| - "%r11"); |
| + return carry_r; |
| } |
| |
| -static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, |
| - const u64 *const b) |
| +/* Computes the field addition of two field elements */ |
| +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) |
| { |
| asm volatile( |
| - "movq (%1), %%rdx; " /* A[0] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "movq %%r8, (%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ |
| - "adox %%r9, %%r10 ;" |
| - "movq %%r10, 8(%0) ;" |
| - "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ |
| - "adox %%r11, %%r15 ;" |
| - "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ |
| - "adox %%r13, %%r14 ;" |
| - "movq $0, %%rax ;" |
| - /******************************************/ |
| - "adox %%rdx, %%rax ;" |
| - |
| - "movq 8(%1), %%rdx; " /* A[1] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "adcx 8(%0), %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| - "adox %%r9, %%r10 ;" |
| - "adcx %%r15, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ |
| - "adox %%r11, %%r15 ;" |
| - "adcx %%r14, %%r15 ;" |
| - "movq $0, %%r8 ;" |
| - "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ |
| - "adox %%r13, %%r14 ;" |
| - "adcx %%rax, %%r14 ;" |
| - "movq $0, %%rax ;" |
| - /******************************************/ |
| - "adox %%rdx, %%rax ;" |
| - "adcx %%r8, %%rax ;" |
| - |
| - "movq 16(%1), %%rdx; " /* A[2] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "adcx 16(%0), %%r8 ;" |
| - "movq %%r8, 16(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| - "adox %%r9, %%r10 ;" |
| - "adcx %%r15, %%r10 ;" |
| - "movq %%r10, 24(%0) ;" |
| - "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ |
| - "adox %%r11, %%r15 ;" |
| - "adcx %%r14, %%r15 ;" |
| - "movq $0, %%r8 ;" |
| - "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ |
| - "adox %%r13, %%r14 ;" |
| - "adcx %%rax, %%r14 ;" |
| - "movq $0, %%rax ;" |
| - /******************************************/ |
| - "adox %%rdx, %%rax ;" |
| - "adcx %%r8, %%rax ;" |
| - |
| - "movq 24(%1), %%rdx; " /* A[3] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| - "xorl %%r10d, %%r10d ;" |
| - "adcx 24(%0), %%r8 ;" |
| - "movq %%r8, 24(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| - "adox %%r9, %%r10 ;" |
| - "adcx %%r15, %%r10 ;" |
| - "movq %%r10, 32(%0) ;" |
| - "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ |
| - "adox %%r11, %%r15 ;" |
| - "adcx %%r14, %%r15 ;" |
| - "movq %%r15, 40(%0) ;" |
| - "movq $0, %%r8 ;" |
| - "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ |
| - "adox %%r13, %%r14 ;" |
| - "adcx %%rax, %%r14 ;" |
| - "movq %%r14, 48(%0) ;" |
| - "movq $0, %%rax ;" |
| - /******************************************/ |
| - "adox %%rdx, %%rax ;" |
| - "adcx %%r8, %%rax ;" |
| - "movq %%rax, 56(%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", |
| - "%r13", "%r14", "%r15"); |
| + /* Compute the raw addition of f1 + f2 */ |
| + " movq 0(%0), %%r8;" |
| + " addq 0(%2), %%r8;" |
| + " movq 8(%0), %%r9;" |
| + " adcxq 8(%2), %%r9;" |
| + " movq 16(%0), %%r10;" |
| + " adcxq 16(%2), %%r10;" |
| + " movq 24(%0), %%r11;" |
| + " adcxq 24(%2), %%r11;" |
| + |
| + /* Wrap the result back into the field */ |
| + |
| + /* Step 1: Compute carry*38 */ |
| + " mov $0, %%rax;" |
| + " mov $38, %0;" |
| + " cmovc %0, %%rax;" |
| + |
| + /* Step 2: Add carry*38 to the original sum */ |
| + " xor %%rcx, %%rcx;" |
| + " add %%rax, %%r8;" |
| + " adcx %%rcx, %%r9;" |
| + " movq %%r9, 8(%1);" |
| + " adcx %%rcx, %%r10;" |
| + " movq %%r10, 16(%1);" |
| + " adcx %%rcx, %%r11;" |
| + " movq %%r11, 24(%1);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %0, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 0(%1);" |
| + : "+&r" (f2) |
| + : "r" (out), "r" (f1) |
| + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" |
| + ); |
| } |
| |
| -static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, |
| - const u64 *const b) |
| +/* Computes the field substraction of two field elements */ |
| +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) |
| { |
| asm volatile( |
| - "movq (%1), %%rdx; " /* A[0] */ |
| - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ |
| - "movq %%r8, (%0) ;" |
| - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ |
| - "addq %%r10, %%r15 ;" |
| - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ |
| - "adcq %%r8, %%rax ;" |
| - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ |
| - "adcq %%r10, %%rbx ;" |
| - /******************************************/ |
| - "adcq $0, %%rcx ;" |
| - |
| - "movq 8(%1), %%rdx; " /* A[1] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ |
| - "addq %%r15, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%r15 ;" |
| - |
| - "addq %%r9, %%rax ;" |
| - "adcq %%r11, %%rbx ;" |
| - "adcq %%r13, %%rcx ;" |
| - "adcq $0, %%r15 ;" |
| - |
| - "movq 16(%1), %%rdx; " /* A[2] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ |
| - "addq %%rax, %%r8 ;" |
| - "movq %%r8, 16(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%rax ;" |
| - |
| - "addq %%r9, %%rbx ;" |
| - "adcq %%r11, %%rcx ;" |
| - "adcq %%r13, %%r15 ;" |
| - "adcq $0, %%rax ;" |
| - |
| - "movq 24(%1), %%rdx; " /* A[3] */ |
| - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ |
| - "addq %%rbx, %%r8 ;" |
| - "movq %%r8, 24(%0) ;" |
| - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ |
| - "adcq %%r10, %%r9 ;" |
| - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ |
| - "adcq %%r8, %%r11 ;" |
| - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ |
| - "adcq %%r10, %%r13 ;" |
| - /******************************************/ |
| - "adcq $0, %%rbx ;" |
| - |
| - "addq %%r9, %%rcx ;" |
| - "movq %%rcx, 32(%0) ;" |
| - "adcq %%r11, %%r15 ;" |
| - "movq %%r15, 40(%0) ;" |
| - "adcq %%r13, %%rax ;" |
| - "movq %%rax, 48(%0) ;" |
| - "adcq $0, %%rbx ;" |
| - "movq %%rbx, 56(%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11", "%r13", "%r15"); |
| + /* Compute the raw substraction of f1-f2 */ |
| + " movq 0(%1), %%r8;" |
| + " subq 0(%2), %%r8;" |
| + " movq 8(%1), %%r9;" |
| + " sbbq 8(%2), %%r9;" |
| + " movq 16(%1), %%r10;" |
| + " sbbq 16(%2), %%r10;" |
| + " movq 24(%1), %%r11;" |
| + " sbbq 24(%2), %%r11;" |
| + |
| + /* Wrap the result back into the field */ |
| + |
| + /* Step 1: Compute carry*38 */ |
| + " mov $0, %%rax;" |
| + " mov $38, %%rcx;" |
| + " cmovc %%rcx, %%rax;" |
| + |
| + /* Step 2: Substract carry*38 from the original difference */ |
| + " sub %%rax, %%r8;" |
| + " sbb $0, %%r9;" |
| + " sbb $0, %%r10;" |
| + " sbb $0, %%r11;" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rcx, %%rax;" |
| + " sub %%rax, %%r8;" |
| + |
| + /* Store the result */ |
| + " movq %%r8, 0(%0);" |
| + " movq %%r9, 8(%0);" |
| + " movq %%r10, 16(%0);" |
| + " movq %%r11, 24(%0);" |
| + : |
| + : "r" (out), "r" (f1), "r" (f2) |
| + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" |
| + ); |
| } |
| |
| -static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) |
| +/* Computes a field multiplication: out <- f1 * f2 |
| + * Uses the 8-element buffer tmp for intermediate results */ |
| +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| { |
| asm volatile( |
| - "movq (%1), %%rdx ;" /* A[0] */ |
| - "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ |
| - "xorl %%r15d, %%r15d;" |
| - "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ |
| - "adcx %%r14, %%r9 ;" |
| - "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ |
| - "adcx %%rax, %%r10 ;" |
| - "movq 24(%1), %%rdx ;" /* A[3] */ |
| - "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ |
| - "adcx %%rcx, %%r11 ;" |
| - "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ |
| - "adcx %%rax, %%rbx ;" |
| - "movq 8(%1), %%rdx ;" /* A[1] */ |
| - "adcx %%r15, %%r13 ;" |
| - "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ |
| - "movq $0, %%r14 ;" |
| - /******************************************/ |
| - "adcx %%r15, %%r14 ;" |
| - |
| - "xorl %%r15d, %%r15d;" |
| - "adox %%rax, %%r10 ;" |
| - "adcx %%r8, %%r8 ;" |
| - "adox %%rcx, %%r11 ;" |
| - "adcx %%r9, %%r9 ;" |
| - "adox %%r15, %%rbx ;" |
| - "adcx %%r10, %%r10 ;" |
| - "adox %%r15, %%r13 ;" |
| - "adcx %%r11, %%r11 ;" |
| - "adox %%r15, %%r14 ;" |
| - "adcx %%rbx, %%rbx ;" |
| - "adcx %%r13, %%r13 ;" |
| - "adcx %%r14, %%r14 ;" |
| - |
| - "movq (%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ |
| - /*******************/ |
| - "movq %%rax, 0(%0) ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "movq 8(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ |
| - "adcq %%rax, %%r9 ;" |
| - "movq %%r9, 16(%0) ;" |
| - "adcq %%rcx, %%r10 ;" |
| - "movq %%r10, 24(%0) ;" |
| - "movq 16(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ |
| - "adcq %%rax, %%r11 ;" |
| - "movq %%r11, 32(%0) ;" |
| - "adcq %%rcx, %%rbx ;" |
| - "movq %%rbx, 40(%0) ;" |
| - "movq 24(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ |
| - "adcq %%rax, %%r13 ;" |
| - "movq %%r13, 48(%0) ;" |
| - "adcq %%rcx, %%r14 ;" |
| - "movq %%r14, 56(%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11", "%r13", "%r14", "%r15"); |
| + /* Compute the raw multiplication: tmp <- src1 * src2 */ |
| + |
| + /* Compute src1[0] * src2 */ |
| + " movq 0(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" |
| + /* Compute src1[1] * src2 */ |
| + " movq 8(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| + /* Compute src1[2] * src2 */ |
| + " movq 16(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| + /* Compute src1[3] * src2 */ |
| + " movq 24(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" |
| + /* Line up pointers */ |
| + " mov %0, %1;" |
| + " mov %2, %0;" |
| + |
| + /* Wrap the result back into the field */ |
| + |
| + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| + " mov $38, %%rdx;" |
| + " mulxq 32(%1), %%r8, %%r13;" |
| + " xor %3, %3;" |
| + " adoxq 0(%1), %%r8;" |
| + " mulxq 40(%1), %%r9, %%r12;" |
| + " adcx %%r13, %%r9;" |
| + " adoxq 8(%1), %%r9;" |
| + " mulxq 48(%1), %%r10, %%r13;" |
| + " adcx %%r12, %%r10;" |
| + " adoxq 16(%1), %%r10;" |
| + " mulxq 56(%1), %%r11, %%rax;" |
| + " adcx %%r13, %%r11;" |
| + " adoxq 24(%1), %%r11;" |
| + " adcx %3, %%rax;" |
| + " adox %3, %%rax;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %3, %%r9;" |
| + " movq %%r9, 8(%0);" |
| + " adcx %3, %%r10;" |
| + " movq %%r10, 16(%0);" |
| + " adcx %3, %%r11;" |
| + " movq %%r11, 24(%0);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 0(%0);" |
| + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) |
| + : |
| + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" |
| + ); |
| } |
| |
| -static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) |
| +/* Computes two field multiplications: |
| + * out[0] <- f1[0] * f2[0] |
| + * out[1] <- f1[1] * f2[1] |
| + * Uses the 16-element buffer tmp for intermediate results. */ |
| +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| { |
| asm volatile( |
| - "movq 8(%1), %%rdx ;" /* A[1] */ |
| - "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ |
| - "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ |
| - "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ |
| - |
| - "movq 16(%1), %%rdx ;" /* A[2] */ |
| - "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ |
| - "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ |
| - |
| - "addq %%rax, %%r9 ;" |
| - "adcq %%rdx, %%r10 ;" |
| - "adcq %%rcx, %%r11 ;" |
| - "adcq %%r14, %%r15 ;" |
| - "adcq $0, %%r13 ;" |
| - "movq $0, %%r14 ;" |
| - "adcq $0, %%r14 ;" |
| - |
| - "movq (%1), %%rdx ;" /* A[0] */ |
| - "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ |
| - |
| - "addq %%rax, %%r10 ;" |
| - "adcq %%rcx, %%r11 ;" |
| - "adcq $0, %%r15 ;" |
| - "adcq $0, %%r13 ;" |
| - "adcq $0, %%r14 ;" |
| - |
| - "shldq $1, %%r13, %%r14 ;" |
| - "shldq $1, %%r15, %%r13 ;" |
| - "shldq $1, %%r11, %%r15 ;" |
| - "shldq $1, %%r10, %%r11 ;" |
| - "shldq $1, %%r9, %%r10 ;" |
| - "shldq $1, %%r8, %%r9 ;" |
| - "shlq $1, %%r8 ;" |
| - |
| - /*******************/ |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ |
| - /*******************/ |
| - "movq %%rax, 0(%0) ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, 8(%0) ;" |
| - "movq 8(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ |
| - "adcq %%rax, %%r9 ;" |
| - "movq %%r9, 16(%0) ;" |
| - "adcq %%rcx, %%r10 ;" |
| - "movq %%r10, 24(%0) ;" |
| - "movq 16(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ |
| - "adcq %%rax, %%r11 ;" |
| - "movq %%r11, 32(%0) ;" |
| - "adcq %%rcx, %%r15 ;" |
| - "movq %%r15, 40(%0) ;" |
| - "movq 24(%1), %%rdx ;" |
| - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ |
| - "adcq %%rax, %%r13 ;" |
| - "movq %%r13, 48(%0) ;" |
| - "adcq %%rcx, %%r14 ;" |
| - "movq %%r14, 56(%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| - "%r11", "%r13", "%r14", "%r15"); |
| + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ |
| + |
| + /* Compute src1[0] * src2 */ |
| + " movq 0(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" |
| + /* Compute src1[1] * src2 */ |
| + " movq 8(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| + /* Compute src1[2] * src2 */ |
| + " movq 16(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| + /* Compute src1[3] * src2 */ |
| + " movq 24(%1), %%rdx;" |
| + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" |
| + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" |
| + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" |
| + |
| + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ |
| + |
| + /* Compute src1[0] * src2 */ |
| + " movq 32(%1), %%rdx;" |
| + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" |
| + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" |
| + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" |
| + /* Compute src1[1] * src2 */ |
| + " movq 40(%1), %%rdx;" |
| + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" |
| + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| + /* Compute src1[2] * src2 */ |
| + " movq 48(%1), %%rdx;" |
| + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" |
| + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| + /* Compute src1[3] * src2 */ |
| + " movq 56(%1), %%rdx;" |
| + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" |
| + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" |
| + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" |
| + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" |
| + /* Line up pointers */ |
| + " mov %0, %1;" |
| + " mov %2, %0;" |
| + |
| + /* Wrap the results back into the field */ |
| + |
| + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| + " mov $38, %%rdx;" |
| + " mulxq 32(%1), %%r8, %%r13;" |
| + " xor %3, %3;" |
| + " adoxq 0(%1), %%r8;" |
| + " mulxq 40(%1), %%r9, %%r12;" |
| + " adcx %%r13, %%r9;" |
| + " adoxq 8(%1), %%r9;" |
| + " mulxq 48(%1), %%r10, %%r13;" |
| + " adcx %%r12, %%r10;" |
| + " adoxq 16(%1), %%r10;" |
| + " mulxq 56(%1), %%r11, %%rax;" |
| + " adcx %%r13, %%r11;" |
| + " adoxq 24(%1), %%r11;" |
| + " adcx %3, %%rax;" |
| + " adox %3, %%rax;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %3, %%r9;" |
| + " movq %%r9, 8(%0);" |
| + " adcx %3, %%r10;" |
| + " movq %%r10, 16(%0);" |
| + " adcx %3, %%r11;" |
| + " movq %%r11, 24(%0);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 0(%0);" |
| + |
| + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| + " mov $38, %%rdx;" |
| + " mulxq 96(%1), %%r8, %%r13;" |
| + " xor %3, %3;" |
| + " adoxq 64(%1), %%r8;" |
| + " mulxq 104(%1), %%r9, %%r12;" |
| + " adcx %%r13, %%r9;" |
| + " adoxq 72(%1), %%r9;" |
| + " mulxq 112(%1), %%r10, %%r13;" |
| + " adcx %%r12, %%r10;" |
| + " adoxq 80(%1), %%r10;" |
| + " mulxq 120(%1), %%r11, %%rax;" |
| + " adcx %%r13, %%r11;" |
| + " adoxq 88(%1), %%r11;" |
| + " adcx %3, %%rax;" |
| + " adox %3, %%rax;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %3, %%r9;" |
| + " movq %%r9, 40(%0);" |
| + " adcx %3, %%r10;" |
| + " movq %%r10, 48(%0);" |
| + " adcx %3, %%r11;" |
| + " movq %%r11, 56(%0);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 32(%0);" |
| + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) |
| + : |
| + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" |
| + ); |
| } |
| |
| -static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) |
| +/* Computes the field multiplication of four-element f1 with value in f2 */ |
| +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) |
| { |
| - asm volatile( |
| - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ |
| - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| - "xorl %%ebx, %%ebx ;" |
| - "adox (%1), %%r8 ;" |
| - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| - "adcx %%r10, %%r9 ;" |
| - "adox 8(%1), %%r9 ;" |
| - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ |
| - "adcx %%r11, %%r10 ;" |
| - "adox 16(%1), %%r10 ;" |
| - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| - "adcx %%rax, %%r11 ;" |
| - "adox 24(%1), %%r11 ;" |
| - /***************************************/ |
| - "adcx %%rbx, %%rcx ;" |
| - "adox %%rbx, %%rcx ;" |
| - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ |
| - "adcx %%rcx, %%r8 ;" |
| - "adcx %%rbx, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcx %%rbx, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcx %%rbx, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", |
| - "%r10", "%r11"); |
| -} |
| + register u64 f2_r asm("rdx") = f2; |
| |
| -static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) |
| -{ |
| asm volatile( |
| - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ |
| - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ |
| - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ |
| - "addq %%r10, %%r9 ;" |
| - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ |
| - "adcq %%r11, %%r10 ;" |
| - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ |
| - "adcq %%rax, %%r11 ;" |
| - /***************************************/ |
| - "adcq $0, %%rcx ;" |
| - "addq (%1), %%r8 ;" |
| - "adcq 8(%1), %%r9 ;" |
| - "adcq 16(%1), %%r10 ;" |
| - "adcq 24(%1), %%r11 ;" |
| - "adcq $0, %%rcx ;" |
| - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ |
| - "addq %%rcx, %%r8 ;" |
| - "adcq $0, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcq $0, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcq $0, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - : |
| - : "r"(c), "r"(a) |
| - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| - "%r11"); |
| + /* Compute the raw multiplication of f1*f2 */ |
| + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ |
| + " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ |
| + " add %%rcx, %%r9;" |
| + " mov $0, %%rcx;" |
| + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ |
| + " adcx %%r12, %%r10;" |
| + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ |
| + " adcx %%r13, %%r11;" |
| + " adcx %%rcx, %%rax;" |
| + |
| + /* Wrap the result back into the field */ |
| + |
| + /* Step 1: Compute carry*38 */ |
| + " mov $38, %%rdx;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %%rcx, %%r9;" |
| + " movq %%r9, 8(%1);" |
| + " adcx %%rcx, %%r10;" |
| + " movq %%r10, 16(%1);" |
| + " adcx %%rcx, %%r11;" |
| + " movq %%r11, 24(%1);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 0(%1);" |
| + : "+&r" (f2_r) |
| + : "r" (out), "r" (f1) |
| + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" |
| + ); |
| } |
| |
| -static __always_inline void |
| -add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) |
| +/* Computes p1 <- bit ? p2 : p1 in constant time */ |
| +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) |
| { |
| asm volatile( |
| - "mov $38, %%eax ;" |
| - "xorl %%ecx, %%ecx ;" |
| - "movq (%2), %%r8 ;" |
| - "adcx (%1), %%r8 ;" |
| - "movq 8(%2), %%r9 ;" |
| - "adcx 8(%1), %%r9 ;" |
| - "movq 16(%2), %%r10 ;" |
| - "adcx 16(%1), %%r10 ;" |
| - "movq 24(%2), %%r11 ;" |
| - "adcx 24(%1), %%r11 ;" |
| - "cmovc %%eax, %%ecx ;" |
| - "xorl %%eax, %%eax ;" |
| - "adcx %%rcx, %%r8 ;" |
| - "adcx %%rax, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcx %%rax, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcx %%rax, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $38, %%ecx ;" |
| - "cmovc %%ecx, %%eax ;" |
| - "addq %%rax, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); |
| + /* Invert the polarity of bit to match cmov expectations */ |
| + " add $18446744073709551615, %0;" |
| + |
| + /* cswap p1[0], p2[0] */ |
| + " movq 0(%1), %%r8;" |
| + " movq 0(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 0(%1);" |
| + " movq %%r9, 0(%2);" |
| + |
| + /* cswap p1[1], p2[1] */ |
| + " movq 8(%1), %%r8;" |
| + " movq 8(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 8(%1);" |
| + " movq %%r9, 8(%2);" |
| + |
| + /* cswap p1[2], p2[2] */ |
| + " movq 16(%1), %%r8;" |
| + " movq 16(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 16(%1);" |
| + " movq %%r9, 16(%2);" |
| + |
| + /* cswap p1[3], p2[3] */ |
| + " movq 24(%1), %%r8;" |
| + " movq 24(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 24(%1);" |
| + " movq %%r9, 24(%2);" |
| + |
| + /* cswap p1[4], p2[4] */ |
| + " movq 32(%1), %%r8;" |
| + " movq 32(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 32(%1);" |
| + " movq %%r9, 32(%2);" |
| + |
| + /* cswap p1[5], p2[5] */ |
| + " movq 40(%1), %%r8;" |
| + " movq 40(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 40(%1);" |
| + " movq %%r9, 40(%2);" |
| + |
| + /* cswap p1[6], p2[6] */ |
| + " movq 48(%1), %%r8;" |
| + " movq 48(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 48(%1);" |
| + " movq %%r9, 48(%2);" |
| + |
| + /* cswap p1[7], p2[7] */ |
| + " movq 56(%1), %%r8;" |
| + " movq 56(%2), %%r9;" |
| + " mov %%r8, %%r10;" |
| + " cmovc %%r9, %%r8;" |
| + " cmovc %%r10, %%r9;" |
| + " movq %%r8, 56(%1);" |
| + " movq %%r9, 56(%2);" |
| + : "+&r" (bit) |
| + : "r" (p1), "r" (p2) |
| + : "%r8", "%r9", "%r10", "memory", "cc" |
| + ); |
| } |
| |
| -static __always_inline void |
| -add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) |
| +/* Computes the square of a field element: out <- f * f |
| + * Uses the 8-element buffer tmp for intermediate results */ |
| +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| { |
| asm volatile( |
| - "mov $38, %%eax ;" |
| - "movq (%2), %%r8 ;" |
| - "addq (%1), %%r8 ;" |
| - "movq 8(%2), %%r9 ;" |
| - "adcq 8(%1), %%r9 ;" |
| - "movq 16(%2), %%r10 ;" |
| - "adcq 16(%1), %%r10 ;" |
| - "movq 24(%2), %%r11 ;" |
| - "adcq 24(%1), %%r11 ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%eax, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "adcq $0, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcq $0, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcq $0, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%eax, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); |
| + /* Compute the raw multiplication: tmp <- f * f */ |
| + |
| + /* Step 1: Compute all partial products */ |
| + " movq 0(%1), %%rdx;" /* f[0] */ |
| + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ |
| + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| + " movq 24(%1), %%rdx;" /* f[3] */ |
| + " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ |
| + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ |
| + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ |
| + |
| + /* Step 2: Compute two parallel carry chains */ |
| + " xor %%r15, %%r15;" |
| + " adox %%rax, %%r10;" |
| + " adcx %%r8, %%r8;" |
| + " adox %%rcx, %%r11;" |
| + " adcx %%r9, %%r9;" |
| + " adox %%r15, %%r12;" |
| + " adcx %%r10, %%r10;" |
| + " adox %%r15, %%r13;" |
| + " adcx %%r11, %%r11;" |
| + " adox %%r15, %%r14;" |
| + " adcx %%r12, %%r12;" |
| + " adcx %%r13, %%r13;" |
| + " adcx %%r14, %%r14;" |
| + |
| + /* Step 3: Compute intermediate squares */ |
| + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ |
| + " movq %%rax, 0(%0);" |
| + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" |
| + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ |
| + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" |
| + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" |
| + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" |
| + " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" |
| + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" |
| + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" |
| + |
| + /* Line up pointers */ |
| + " mov %0, %1;" |
| + " mov %2, %0;" |
| + |
| + /* Wrap the result back into the field */ |
| + |
| + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| + " mov $38, %%rdx;" |
| + " mulxq 32(%1), %%r8, %%r13;" |
| + " xor %%rcx, %%rcx;" |
| + " adoxq 0(%1), %%r8;" |
| + " mulxq 40(%1), %%r9, %%r12;" |
| + " adcx %%r13, %%r9;" |
| + " adoxq 8(%1), %%r9;" |
| + " mulxq 48(%1), %%r10, %%r13;" |
| + " adcx %%r12, %%r10;" |
| + " adoxq 16(%1), %%r10;" |
| + " mulxq 56(%1), %%r11, %%rax;" |
| + " adcx %%r13, %%r11;" |
| + " adoxq 24(%1), %%r11;" |
| + " adcx %%rcx, %%rax;" |
| + " adox %%rcx, %%rax;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %%rcx, %%r9;" |
| + " movq %%r9, 8(%0);" |
| + " adcx %%rcx, %%r10;" |
| + " movq %%r10, 16(%0);" |
| + " adcx %%rcx, %%r11;" |
| + " movq %%r11, 24(%0);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 0(%0);" |
| + : "+&r" (tmp), "+&r" (f), "+&r" (out) |
| + : |
| + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" |
| + ); |
| } |
| |
| -static __always_inline void |
| -sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) |
| +/* Computes two field squarings: |
| + * out[0] <- f[0] * f[0] |
| + * out[1] <- f[1] * f[1] |
| + * Uses the 16-element buffer tmp for intermediate results */ |
| +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| { |
| asm volatile( |
| - "mov $38, %%eax ;" |
| - "movq (%1), %%r8 ;" |
| - "subq (%2), %%r8 ;" |
| - "movq 8(%1), %%r9 ;" |
| - "sbbq 8(%2), %%r9 ;" |
| - "movq 16(%1), %%r10 ;" |
| - "sbbq 16(%2), %%r10 ;" |
| - "movq 24(%1), %%r11 ;" |
| - "sbbq 24(%2), %%r11 ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%eax, %%ecx ;" |
| - "subq %%rcx, %%r8 ;" |
| - "sbbq $0, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "sbbq $0, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "sbbq $0, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%eax, %%ecx ;" |
| - "subq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(b) |
| - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); |
| + /* Step 1: Compute all partial products */ |
| + " movq 0(%1), %%rdx;" /* f[0] */ |
| + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ |
| + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| + " movq 24(%1), %%rdx;" /* f[3] */ |
| + " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ |
| + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ |
| + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ |
| + |
| + /* Step 2: Compute two parallel carry chains */ |
| + " xor %%r15, %%r15;" |
| + " adox %%rax, %%r10;" |
| + " adcx %%r8, %%r8;" |
| + " adox %%rcx, %%r11;" |
| + " adcx %%r9, %%r9;" |
| + " adox %%r15, %%r12;" |
| + " adcx %%r10, %%r10;" |
| + " adox %%r15, %%r13;" |
| + " adcx %%r11, %%r11;" |
| + " adox %%r15, %%r14;" |
| + " adcx %%r12, %%r12;" |
| + " adcx %%r13, %%r13;" |
| + " adcx %%r14, %%r14;" |
| + |
| + /* Step 3: Compute intermediate squares */ |
| + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ |
| + " movq %%rax, 0(%0);" |
| + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" |
| + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ |
| + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" |
| + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" |
| + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" |
| + " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" |
| + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" |
| + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" |
| + |
| + /* Step 1: Compute all partial products */ |
| + " movq 32(%1), %%rdx;" /* f[0] */ |
| + " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ |
| + " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| + " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| + " movq 56(%1), %%rdx;" /* f[3] */ |
| + " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| + " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ |
| + " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ |
| + " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ |
| + |
| + /* Step 2: Compute two parallel carry chains */ |
| + " xor %%r15, %%r15;" |
| + " adox %%rax, %%r10;" |
| + " adcx %%r8, %%r8;" |
| + " adox %%rcx, %%r11;" |
| + " adcx %%r9, %%r9;" |
| + " adox %%r15, %%r12;" |
| + " adcx %%r10, %%r10;" |
| + " adox %%r15, %%r13;" |
| + " adcx %%r11, %%r11;" |
| + " adox %%r15, %%r14;" |
| + " adcx %%r12, %%r12;" |
| + " adcx %%r13, %%r13;" |
| + " adcx %%r14, %%r14;" |
| + |
| + /* Step 3: Compute intermediate squares */ |
| + " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ |
| + " movq %%rax, 64(%0);" |
| + " add %%rcx, %%r8;" " movq %%r8, 72(%0);" |
| + " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ |
| + " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" |
| + " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" |
| + " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| + " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" |
| + " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" |
| + " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| + " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" |
| + " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" |
| + |
| + /* Line up pointers */ |
| + " mov %0, %1;" |
| + " mov %2, %0;" |
| + |
| + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| + " mov $38, %%rdx;" |
| + " mulxq 32(%1), %%r8, %%r13;" |
| + " xor %%rcx, %%rcx;" |
| + " adoxq 0(%1), %%r8;" |
| + " mulxq 40(%1), %%r9, %%r12;" |
| + " adcx %%r13, %%r9;" |
| + " adoxq 8(%1), %%r9;" |
| + " mulxq 48(%1), %%r10, %%r13;" |
| + " adcx %%r12, %%r10;" |
| + " adoxq 16(%1), %%r10;" |
| + " mulxq 56(%1), %%r11, %%rax;" |
| + " adcx %%r13, %%r11;" |
| + " adoxq 24(%1), %%r11;" |
| + " adcx %%rcx, %%rax;" |
| + " adox %%rcx, %%rax;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %%rcx, %%r9;" |
| + " movq %%r9, 8(%0);" |
| + " adcx %%rcx, %%r10;" |
| + " movq %%r10, 16(%0);" |
| + " adcx %%rcx, %%r11;" |
| + " movq %%r11, 24(%0);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 0(%0);" |
| + |
| + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| + " mov $38, %%rdx;" |
| + " mulxq 96(%1), %%r8, %%r13;" |
| + " xor %%rcx, %%rcx;" |
| + " adoxq 64(%1), %%r8;" |
| + " mulxq 104(%1), %%r9, %%r12;" |
| + " adcx %%r13, %%r9;" |
| + " adoxq 72(%1), %%r9;" |
| + " mulxq 112(%1), %%r10, %%r13;" |
| + " adcx %%r12, %%r10;" |
| + " adoxq 80(%1), %%r10;" |
| + " mulxq 120(%1), %%r11, %%rax;" |
| + " adcx %%r13, %%r11;" |
| + " adoxq 88(%1), %%r11;" |
| + " adcx %%rcx, %%rax;" |
| + " adox %%rcx, %%rax;" |
| + " imul %%rdx, %%rax;" |
| + |
| + /* Step 2: Fold the carry back into dst */ |
| + " add %%rax, %%r8;" |
| + " adcx %%rcx, %%r9;" |
| + " movq %%r9, 40(%0);" |
| + " adcx %%rcx, %%r10;" |
| + " movq %%r10, 48(%0);" |
| + " adcx %%rcx, %%r11;" |
| + " movq %%r11, 56(%0);" |
| + |
| + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| + " mov $0, %%rax;" |
| + " cmovc %%rdx, %%rax;" |
| + " add %%rax, %%r8;" |
| + " movq %%r8, 32(%0);" |
| + : "+&r" (tmp), "+&r" (f), "+&r" (out) |
| + : |
| + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" |
| + ); |
| } |
| |
| -/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ |
| -static __always_inline void |
| -mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) |
| +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) |
| { |
| - const u64 a24 = 121666; |
| - asm volatile( |
| - "movq %2, %%rdx ;" |
| - "mulx (%1), %%r8, %%r10 ;" |
| - "mulx 8(%1), %%r9, %%r11 ;" |
| - "addq %%r10, %%r9 ;" |
| - "mulx 16(%1), %%r10, %%rax ;" |
| - "adcq %%r11, %%r10 ;" |
| - "mulx 24(%1), %%r11, %%rcx ;" |
| - "adcq %%rax, %%r11 ;" |
| - /**************************/ |
| - "adcq $0, %%rcx ;" |
| - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ |
| - "imul %%rdx, %%rcx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "adcq $0, %%r9 ;" |
| - "movq %%r9, 8(%0) ;" |
| - "adcq $0, %%r10 ;" |
| - "movq %%r10, 16(%0) ;" |
| - "adcq $0, %%r11 ;" |
| - "movq %%r11, 24(%0) ;" |
| - "mov $0, %%ecx ;" |
| - "cmovc %%edx, %%ecx ;" |
| - "addq %%rcx, %%r8 ;" |
| - "movq %%r8, (%0) ;" |
| - : |
| - : "r"(c), "r"(a), "r"(a24) |
| - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", |
| - "%r11"); |
| + u64 *nq = p01_tmp1; |
| + u64 *nq_p1 = p01_tmp1 + (u32)8U; |
| + u64 *tmp1 = p01_tmp1 + (u32)16U; |
| + u64 *x1 = q; |
| + u64 *x2 = nq; |
| + u64 *z2 = nq + (u32)4U; |
| + u64 *z3 = nq_p1 + (u32)4U; |
| + u64 *a = tmp1; |
| + u64 *b = tmp1 + (u32)4U; |
| + u64 *ab = tmp1; |
| + u64 *dc = tmp1 + (u32)8U; |
| + u64 *x3; |
| + u64 *z31; |
| + u64 *d0; |
| + u64 *c0; |
| + u64 *a1; |
| + u64 *b1; |
| + u64 *d; |
| + u64 *c; |
| + u64 *ab1; |
| + u64 *dc1; |
| + fadd(a, x2, z2); |
| + fsub(b, x2, z2); |
| + x3 = nq_p1; |
| + z31 = nq_p1 + (u32)4U; |
| + d0 = dc; |
| + c0 = dc + (u32)4U; |
| + fadd(c0, x3, z31); |
| + fsub(d0, x3, z31); |
| + fmul2(dc, dc, ab, tmp2); |
| + fadd(x3, d0, c0); |
| + fsub(z31, d0, c0); |
| + a1 = tmp1; |
| + b1 = tmp1 + (u32)4U; |
| + d = tmp1 + (u32)8U; |
| + c = tmp1 + (u32)12U; |
| + ab1 = tmp1; |
| + dc1 = tmp1 + (u32)8U; |
| + fsqr2(dc1, ab1, tmp2); |
| + fsqr2(nq_p1, nq_p1, tmp2); |
| + a1[0U] = c[0U]; |
| + a1[1U] = c[1U]; |
| + a1[2U] = c[2U]; |
| + a1[3U] = c[3U]; |
| + fsub(c, d, c); |
| + fmul_scalar(b1, c, (u64)121665U); |
| + fadd(b1, b1, d); |
| + fmul2(nq, dc1, ab1, tmp2); |
| + fmul(z3, z3, x1, tmp2); |
| } |
| |
| -static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) |
| +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) |
| { |
| - struct { |
| - eltfp25519_1w_buffer buffer; |
| - eltfp25519_1w x0, x1, x2; |
| - } __aligned(32) m; |
| - u64 *T[4]; |
| - |
| - T[0] = m.x0; |
| - T[1] = c; /* x^(-1) */ |
| - T[2] = m.x1; |
| - T[3] = m.x2; |
| - |
| - copy_eltfp25519_1w(T[1], a); |
| - sqrn_eltfp25519_1w_adx(T[1], 1); |
| - copy_eltfp25519_1w(T[2], T[1]); |
| - sqrn_eltfp25519_1w_adx(T[2], 2); |
| - mul_eltfp25519_1w_adx(T[0], a, T[2]); |
| - mul_eltfp25519_1w_adx(T[1], T[1], T[0]); |
| - copy_eltfp25519_1w(T[2], T[1]); |
| - sqrn_eltfp25519_1w_adx(T[2], 1); |
| - mul_eltfp25519_1w_adx(T[0], T[0], T[2]); |
| - copy_eltfp25519_1w(T[2], T[0]); |
| - sqrn_eltfp25519_1w_adx(T[2], 5); |
| - mul_eltfp25519_1w_adx(T[0], T[0], T[2]); |
| - copy_eltfp25519_1w(T[2], T[0]); |
| - sqrn_eltfp25519_1w_adx(T[2], 10); |
| - mul_eltfp25519_1w_adx(T[2], T[2], T[0]); |
| - copy_eltfp25519_1w(T[3], T[2]); |
| - sqrn_eltfp25519_1w_adx(T[3], 20); |
| - mul_eltfp25519_1w_adx(T[3], T[3], T[2]); |
| - sqrn_eltfp25519_1w_adx(T[3], 10); |
| - mul_eltfp25519_1w_adx(T[3], T[3], T[0]); |
| - copy_eltfp25519_1w(T[0], T[3]); |
| - sqrn_eltfp25519_1w_adx(T[0], 50); |
| - mul_eltfp25519_1w_adx(T[0], T[0], T[3]); |
| - copy_eltfp25519_1w(T[2], T[0]); |
| - sqrn_eltfp25519_1w_adx(T[2], 100); |
| - mul_eltfp25519_1w_adx(T[2], T[2], T[0]); |
| - sqrn_eltfp25519_1w_adx(T[2], 50); |
| - mul_eltfp25519_1w_adx(T[2], T[2], T[3]); |
| - sqrn_eltfp25519_1w_adx(T[2], 5); |
| - mul_eltfp25519_1w_adx(T[1], T[1], T[2]); |
| - |
| - memzero_explicit(&m, sizeof(m)); |
| + u64 *x2 = nq; |
| + u64 *z2 = nq + (u32)4U; |
| + u64 *a = tmp1; |
| + u64 *b = tmp1 + (u32)4U; |
| + u64 *d = tmp1 + (u32)8U; |
| + u64 *c = tmp1 + (u32)12U; |
| + u64 *ab = tmp1; |
| + u64 *dc = tmp1 + (u32)8U; |
| + fadd(a, x2, z2); |
| + fsub(b, x2, z2); |
| + fsqr2(dc, ab, tmp2); |
| + a[0U] = c[0U]; |
| + a[1U] = c[1U]; |
| + a[2U] = c[2U]; |
| + a[3U] = c[3U]; |
| + fsub(c, d, c); |
| + fmul_scalar(b, c, (u64)121665U); |
| + fadd(b, b, d); |
| + fmul2(nq, dc, ab, tmp2); |
| } |
| |
| -static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) |
| +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) |
| { |
| - struct { |
| - eltfp25519_1w_buffer buffer; |
| - eltfp25519_1w x0, x1, x2; |
| - } __aligned(32) m; |
| - u64 *T[5]; |
| - |
| - T[0] = m.x0; |
| - T[1] = c; /* x^(-1) */ |
| - T[2] = m.x1; |
| - T[3] = m.x2; |
| - |
| - copy_eltfp25519_1w(T[1], a); |
| - sqrn_eltfp25519_1w_bmi2(T[1], 1); |
| - copy_eltfp25519_1w(T[2], T[1]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 2); |
| - mul_eltfp25519_1w_bmi2(T[0], a, T[2]); |
| - mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); |
| - copy_eltfp25519_1w(T[2], T[1]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 1); |
| - mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); |
| - copy_eltfp25519_1w(T[2], T[0]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 5); |
| - mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); |
| - copy_eltfp25519_1w(T[2], T[0]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 10); |
| - mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); |
| - copy_eltfp25519_1w(T[3], T[2]); |
| - sqrn_eltfp25519_1w_bmi2(T[3], 20); |
| - mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); |
| - sqrn_eltfp25519_1w_bmi2(T[3], 10); |
| - mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); |
| - copy_eltfp25519_1w(T[0], T[3]); |
| - sqrn_eltfp25519_1w_bmi2(T[0], 50); |
| - mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); |
| - copy_eltfp25519_1w(T[2], T[0]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 100); |
| - mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 50); |
| - mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); |
| - sqrn_eltfp25519_1w_bmi2(T[2], 5); |
| - mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); |
| - |
| - memzero_explicit(&m, sizeof(m)); |
| + u64 tmp2[16U] = { 0U }; |
| + u64 p01_tmp1_swap[33U] = { 0U }; |
| + u64 *p0 = p01_tmp1_swap; |
| + u64 *p01 = p01_tmp1_swap; |
| + u64 *p03 = p01; |
| + u64 *p11 = p01 + (u32)8U; |
| + u64 *x0; |
| + u64 *z0; |
| + u64 *p01_tmp1; |
| + u64 *p01_tmp11; |
| + u64 *nq10; |
| + u64 *nq_p11; |
| + u64 *swap1; |
| + u64 sw0; |
| + u64 *nq1; |
| + u64 *tmp1; |
| + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); |
| + x0 = p03; |
| + z0 = p03 + (u32)4U; |
| + x0[0U] = (u64)1U; |
| + x0[1U] = (u64)0U; |
| + x0[2U] = (u64)0U; |
| + x0[3U] = (u64)0U; |
| + z0[0U] = (u64)0U; |
| + z0[1U] = (u64)0U; |
| + z0[2U] = (u64)0U; |
| + z0[3U] = (u64)0U; |
| + p01_tmp1 = p01_tmp1_swap; |
| + p01_tmp11 = p01_tmp1_swap; |
| + nq10 = p01_tmp1_swap; |
| + nq_p11 = p01_tmp1_swap + (u32)8U; |
| + swap1 = p01_tmp1_swap + (u32)32U; |
| + cswap2((u64)1U, nq10, nq_p11); |
| + point_add_and_double(init1, p01_tmp11, tmp2); |
| + swap1[0U] = (u64)1U; |
| + { |
| + u32 i; |
| + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { |
| + u64 *p01_tmp12 = p01_tmp1_swap; |
| + u64 *swap2 = p01_tmp1_swap + (u32)32U; |
| + u64 *nq2 = p01_tmp12; |
| + u64 *nq_p12 = p01_tmp12 + (u32)8U; |
| + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); |
| + u64 sw = swap2[0U] ^ bit; |
| + cswap2(sw, nq2, nq_p12); |
| + point_add_and_double(init1, p01_tmp12, tmp2); |
| + swap2[0U] = bit; |
| + } |
| + } |
| + sw0 = swap1[0U]; |
| + cswap2(sw0, nq10, nq_p11); |
| + nq1 = p01_tmp1; |
| + tmp1 = p01_tmp1 + (u32)16U; |
| + point_double(nq1, tmp1, tmp2); |
| + point_double(nq1, tmp1, tmp2); |
| + point_double(nq1, tmp1, tmp2); |
| + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); |
| + |
| + memzero_explicit(tmp2, sizeof(tmp2)); |
| + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); |
| } |
| |
| -/* Given c, a 256-bit number, fred_eltfp25519_1w updates c |
| - * with a number such that 0 <= C < 2**255-19. |
| - */ |
| -static __always_inline void fred_eltfp25519_1w(u64 *const c) |
| +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) |
| { |
| - u64 tmp0 = 38, tmp1 = 19; |
| - asm volatile( |
| - "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ |
| - "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ |
| - |
| - /* Add either 19 or 38 to c */ |
| - "addq %4, %0 ;" |
| - "adcq $0, %1 ;" |
| - "adcq $0, %2 ;" |
| - "adcq $0, %3 ;" |
| - |
| - /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ |
| - "movl $0, %k4 ;" |
| - "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ |
| - "btrq $63, %3 ;" /* Clear bit 255 */ |
| - |
| - /* Subtract 19 if necessary */ |
| - "subq %4, %0 ;" |
| - "sbbq $0, %1 ;" |
| - "sbbq $0, %2 ;" |
| - "sbbq $0, %3 ;" |
| - |
| - : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), |
| - "+r"(tmp1) |
| - : |
| - : "memory", "cc"); |
| + u32 i; |
| + fsqr(o, inp, tmp); |
| + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) |
| + fsqr(o, o, tmp); |
| } |
| |
| -static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) |
| +static void finv(u64 *o, const u64 *i, u64 *tmp) |
| { |
| - u64 temp; |
| - asm volatile( |
| - "test %9, %9 ;" |
| - "movq %0, %8 ;" |
| - "cmovnzq %4, %0 ;" |
| - "cmovnzq %8, %4 ;" |
| - "movq %1, %8 ;" |
| - "cmovnzq %5, %1 ;" |
| - "cmovnzq %8, %5 ;" |
| - "movq %2, %8 ;" |
| - "cmovnzq %6, %2 ;" |
| - "cmovnzq %8, %6 ;" |
| - "movq %3, %8 ;" |
| - "cmovnzq %7, %3 ;" |
| - "cmovnzq %8, %7 ;" |
| - : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), |
| - "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), |
| - "=r"(temp) |
| - : "r"(bit) |
| - : "cc" |
| - ); |
| + u64 t1[16U] = { 0U }; |
| + u64 *a0 = t1; |
| + u64 *b = t1 + (u32)4U; |
| + u64 *c = t1 + (u32)8U; |
| + u64 *t00 = t1 + (u32)12U; |
| + u64 *tmp1 = tmp; |
| + u64 *a; |
| + u64 *t0; |
| + fsquare_times(a0, i, tmp1, (u32)1U); |
| + fsquare_times(t00, a0, tmp1, (u32)2U); |
| + fmul(b, t00, i, tmp); |
| + fmul(a0, b, a0, tmp); |
| + fsquare_times(t00, a0, tmp1, (u32)1U); |
| + fmul(b, t00, b, tmp); |
| + fsquare_times(t00, b, tmp1, (u32)5U); |
| + fmul(b, t00, b, tmp); |
| + fsquare_times(t00, b, tmp1, (u32)10U); |
| + fmul(c, t00, b, tmp); |
| + fsquare_times(t00, c, tmp1, (u32)20U); |
| + fmul(t00, t00, c, tmp); |
| + fsquare_times(t00, t00, tmp1, (u32)10U); |
| + fmul(b, t00, b, tmp); |
| + fsquare_times(t00, b, tmp1, (u32)50U); |
| + fmul(c, t00, b, tmp); |
| + fsquare_times(t00, c, tmp1, (u32)100U); |
| + fmul(t00, t00, c, tmp); |
| + fsquare_times(t00, t00, tmp1, (u32)50U); |
| + fmul(t00, t00, b, tmp); |
| + fsquare_times(t00, t00, tmp1, (u32)5U); |
| + a = t1; |
| + t0 = t1 + (u32)12U; |
| + fmul(o, t0, a, tmp); |
| } |
| |
| -static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) |
| +static void store_felem(u64 *b, u64 *f) |
| { |
| - asm volatile( |
| - "test %4, %4 ;" |
| - "cmovnzq %5, %0 ;" |
| - "cmovnzq %6, %1 ;" |
| - "cmovnzq %7, %2 ;" |
| - "cmovnzq %8, %3 ;" |
| - : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) |
| - : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) |
| - : "cc" |
| - ); |
| + u64 f30 = f[3U]; |
| + u64 top_bit0 = f30 >> (u32)63U; |
| + u64 carry0; |
| + u64 f31; |
| + u64 top_bit; |
| + u64 carry; |
| + u64 f0; |
| + u64 f1; |
| + u64 f2; |
| + u64 f3; |
| + u64 m0; |
| + u64 m1; |
| + u64 m2; |
| + u64 m3; |
| + u64 mask; |
| + u64 f0_; |
| + u64 f1_; |
| + u64 f2_; |
| + u64 f3_; |
| + u64 o0; |
| + u64 o1; |
| + u64 o2; |
| + u64 o3; |
| + f[3U] = f30 & (u64)0x7fffffffffffffffU; |
| + carry0 = add_scalar(f, f, (u64)19U * top_bit0); |
| + f31 = f[3U]; |
| + top_bit = f31 >> (u32)63U; |
| + f[3U] = f31 & (u64)0x7fffffffffffffffU; |
| + carry = add_scalar(f, f, (u64)19U * top_bit); |
| + f0 = f[0U]; |
| + f1 = f[1U]; |
| + f2 = f[2U]; |
| + f3 = f[3U]; |
| + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); |
| + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); |
| + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); |
| + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); |
| + mask = ((m0 & m1) & m2) & m3; |
| + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); |
| + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); |
| + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); |
| + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); |
| + o0 = f0_; |
| + o1 = f1_; |
| + o2 = f2_; |
| + o3 = f3_; |
| + b[0U] = o0; |
| + b[1U] = o1; |
| + b[2U] = o2; |
| + b[3U] = o3; |
| } |
| |
| -static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], |
| - const u8 private_key[CURVE25519_KEY_SIZE], |
| - const u8 session_key[CURVE25519_KEY_SIZE]) |
| +static void encode_point(u8 *o, const u64 *i) |
| { |
| - struct { |
| - u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| - u64 workspace[6 * NUM_WORDS_ELTFP25519]; |
| - u8 session[CURVE25519_KEY_SIZE]; |
| - u8 private[CURVE25519_KEY_SIZE]; |
| - } __aligned(32) m; |
| - |
| - int i = 0, j = 0; |
| - u64 prev = 0; |
| - u64 *const X1 = (u64 *)m.session; |
| - u64 *const key = (u64 *)m.private; |
| - u64 *const Px = m.coordinates + 0; |
| - u64 *const Pz = m.coordinates + 4; |
| - u64 *const Qx = m.coordinates + 8; |
| - u64 *const Qz = m.coordinates + 12; |
| - u64 *const X2 = Qx; |
| - u64 *const Z2 = Qz; |
| - u64 *const X3 = Px; |
| - u64 *const Z3 = Pz; |
| - u64 *const X2Z2 = Qx; |
| - u64 *const X3Z3 = Px; |
| - |
| - u64 *const A = m.workspace + 0; |
| - u64 *const B = m.workspace + 4; |
| - u64 *const D = m.workspace + 8; |
| - u64 *const C = m.workspace + 12; |
| - u64 *const DA = m.workspace + 16; |
| - u64 *const CB = m.workspace + 20; |
| - u64 *const AB = A; |
| - u64 *const DC = D; |
| - u64 *const DACB = DA; |
| - |
| - memcpy(m.private, private_key, sizeof(m.private)); |
| - memcpy(m.session, session_key, sizeof(m.session)); |
| - |
| - curve25519_clamp_secret(m.private); |
| - |
| - /* As in the draft: |
| - * When receiving such an array, implementations of curve25519 |
| - * MUST mask the most-significant bit in the final byte. This |
| - * is done to preserve compatibility with point formats which |
| - * reserve the sign bit for use in other protocols and to |
| - * increase resistance to implementation fingerprinting |
| - */ |
| - m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; |
| - |
| - copy_eltfp25519_1w(Px, X1); |
| - setzero_eltfp25519_1w(Pz); |
| - setzero_eltfp25519_1w(Qx); |
| - setzero_eltfp25519_1w(Qz); |
| - |
| - Pz[0] = 1; |
| - Qx[0] = 1; |
| - |
| - /* main-loop */ |
| - prev = 0; |
| - j = 62; |
| - for (i = 3; i >= 0; --i) { |
| - while (j >= 0) { |
| - u64 bit = (key[i] >> j) & 0x1; |
| - u64 swap = bit ^ prev; |
| - prev = bit; |
| - |
| - add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ |
| - sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ |
| - add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ |
| - sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ |
| - mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ |
| - |
| - cselect(swap, A, C); |
| - cselect(swap, B, D); |
| - |
| - sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ |
| - add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ |
| - sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ |
| - sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ |
| - |
| - copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ |
| - sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ |
| - |
| - mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ |
| - add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ |
| - mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ |
| - mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ |
| - --j; |
| - } |
| - j = 63; |
| - } |
| - |
| - inv_eltfp25519_1w_adx(A, Qz); |
| - mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); |
| - fred_eltfp25519_1w((u64 *)shared); |
| - |
| - memzero_explicit(&m, sizeof(m)); |
| + const u64 *x = i; |
| + const u64 *z = i + (u32)4U; |
| + u64 tmp[4U] = { 0U }; |
| + u64 tmp_w[16U] = { 0U }; |
| + finv(tmp, z, tmp_w); |
| + fmul(tmp, tmp, x, tmp_w); |
| + store_felem((u64 *)o, tmp); |
| } |
| |
| -static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], |
| - const u8 private_key[CURVE25519_KEY_SIZE]) |
| +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) |
| { |
| - struct { |
| - u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| - u64 workspace[4 * NUM_WORDS_ELTFP25519]; |
| - u8 private[CURVE25519_KEY_SIZE]; |
| - } __aligned(32) m; |
| - |
| - const int ite[4] = { 64, 64, 64, 63 }; |
| - const int q = 3; |
| - u64 swap = 1; |
| - |
| - int i = 0, j = 0, k = 0; |
| - u64 *const key = (u64 *)m.private; |
| - u64 *const Ur1 = m.coordinates + 0; |
| - u64 *const Zr1 = m.coordinates + 4; |
| - u64 *const Ur2 = m.coordinates + 8; |
| - u64 *const Zr2 = m.coordinates + 12; |
| - |
| - u64 *const UZr1 = m.coordinates + 0; |
| - u64 *const ZUr2 = m.coordinates + 8; |
| - |
| - u64 *const A = m.workspace + 0; |
| - u64 *const B = m.workspace + 4; |
| - u64 *const C = m.workspace + 8; |
| - u64 *const D = m.workspace + 12; |
| - |
| - u64 *const AB = m.workspace + 0; |
| - u64 *const CD = m.workspace + 8; |
| - |
| - const u64 *const P = table_ladder_8k; |
| - |
| - memcpy(m.private, private_key, sizeof(m.private)); |
| - |
| - curve25519_clamp_secret(m.private); |
| - |
| - setzero_eltfp25519_1w(Ur1); |
| - setzero_eltfp25519_1w(Zr1); |
| - setzero_eltfp25519_1w(Zr2); |
| - Ur1[0] = 1; |
| - Zr1[0] = 1; |
| - Zr2[0] = 1; |
| - |
| - /* G-S */ |
| - Ur2[3] = 0x1eaecdeee27cab34UL; |
| - Ur2[2] = 0xadc7a0b9235d48e2UL; |
| - Ur2[1] = 0xbbf095ae14b2edf8UL; |
| - Ur2[0] = 0x7e94e1fec82faabdUL; |
| - |
| - /* main-loop */ |
| - j = q; |
| - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { |
| - while (j < ite[i]) { |
| - u64 bit = (key[i] >> j) & 0x1; |
| - k = (64 * i + j - q); |
| - swap = swap ^ bit; |
| - cswap(swap, Ur1, Ur2); |
| - cswap(swap, Zr1, Zr2); |
| - swap = bit; |
| - /* Addition */ |
| - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| - add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| - mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ |
| - sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ |
| - add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ |
| - sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ |
| - mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ |
| - ++j; |
| + u64 init1[8U] = { 0U }; |
| + u64 tmp[4U] = { 0U }; |
| + u64 tmp3; |
| + u64 *x; |
| + u64 *z; |
| + { |
| + u32 i; |
| + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { |
| + u64 *os = tmp; |
| + const u8 *bj = pub + i * (u32)8U; |
| + u64 u = *(u64 *)bj; |
| + u64 r = u; |
| + u64 x0 = r; |
| + os[i] = x0; |
| } |
| - j = 0; |
| } |
| - |
| - /* Doubling */ |
| - for (i = 0; i < q; ++i) { |
| - add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| - sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ |
| - copy_eltfp25519_1w(C, B); /* C = B */ |
| - sub_eltfp25519_1w(B, A, B); /* B = A-B */ |
| - mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ |
| - add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ |
| - mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ |
| - } |
| - |
| - /* Convert to affine coordinates */ |
| - inv_eltfp25519_1w_adx(A, Zr1); |
| - mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); |
| - fred_eltfp25519_1w((u64 *)session_key); |
| - |
| - memzero_explicit(&m, sizeof(m)); |
| + tmp3 = tmp[3U]; |
| + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; |
| + x = init1; |
| + z = init1 + (u32)4U; |
| + z[0U] = (u64)1U; |
| + z[1U] = (u64)0U; |
| + z[2U] = (u64)0U; |
| + z[3U] = (u64)0U; |
| + x[0U] = tmp[0U]; |
| + x[1U] = tmp[1U]; |
| + x[2U] = tmp[2U]; |
| + x[3U] = tmp[3U]; |
| + montgomery_ladder(init1, priv, init1); |
| + encode_point(out, init1); |
| } |
| |
| -static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], |
| - const u8 private_key[CURVE25519_KEY_SIZE], |
| - const u8 session_key[CURVE25519_KEY_SIZE]) |
| -{ |
| - struct { |
| - u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| - u64 workspace[6 * NUM_WORDS_ELTFP25519]; |
| - u8 session[CURVE25519_KEY_SIZE]; |
| - u8 private[CURVE25519_KEY_SIZE]; |
| - } __aligned(32) m; |
| - |
| - int i = 0, j = 0; |
| - u64 prev = 0; |
| - u64 *const X1 = (u64 *)m.session; |
| - u64 *const key = (u64 *)m.private; |
| - u64 *const Px = m.coordinates + 0; |
| - u64 *const Pz = m.coordinates + 4; |
| - u64 *const Qx = m.coordinates + 8; |
| - u64 *const Qz = m.coordinates + 12; |
| - u64 *const X2 = Qx; |
| - u64 *const Z2 = Qz; |
| - u64 *const X3 = Px; |
| - u64 *const Z3 = Pz; |
| - u64 *const X2Z2 = Qx; |
| - u64 *const X3Z3 = Px; |
| - |
| - u64 *const A = m.workspace + 0; |
| - u64 *const B = m.workspace + 4; |
| - u64 *const D = m.workspace + 8; |
| - u64 *const C = m.workspace + 12; |
| - u64 *const DA = m.workspace + 16; |
| - u64 *const CB = m.workspace + 20; |
| - u64 *const AB = A; |
| - u64 *const DC = D; |
| - u64 *const DACB = DA; |
| - |
| - memcpy(m.private, private_key, sizeof(m.private)); |
| - memcpy(m.session, session_key, sizeof(m.session)); |
| - |
| - curve25519_clamp_secret(m.private); |
| - |
| - /* As in the draft: |
| - * When receiving such an array, implementations of curve25519 |
| - * MUST mask the most-significant bit in the final byte. This |
| - * is done to preserve compatibility with point formats which |
| - * reserve the sign bit for use in other protocols and to |
| - * increase resistance to implementation fingerprinting |
| - */ |
| - m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; |
| - |
| - copy_eltfp25519_1w(Px, X1); |
| - setzero_eltfp25519_1w(Pz); |
| - setzero_eltfp25519_1w(Qx); |
| - setzero_eltfp25519_1w(Qz); |
| - |
| - Pz[0] = 1; |
| - Qx[0] = 1; |
| - |
| - /* main-loop */ |
| - prev = 0; |
| - j = 62; |
| - for (i = 3; i >= 0; --i) { |
| - while (j >= 0) { |
| - u64 bit = (key[i] >> j) & 0x1; |
| - u64 swap = bit ^ prev; |
| - prev = bit; |
| - |
| - add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ |
| - sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ |
| - add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ |
| - sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ |
| - mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ |
| - |
| - cselect(swap, A, C); |
| - cselect(swap, B, D); |
| - |
| - sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ |
| - add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ |
| - sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ |
| - sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ |
| - |
| - copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ |
| - sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ |
| - |
| - mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ |
| - add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ |
| - mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ |
| - mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ |
| - --j; |
| - } |
| - j = 63; |
| - } |
| - |
| - inv_eltfp25519_1w_bmi2(A, Qz); |
| - mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); |
| - fred_eltfp25519_1w((u64 *)shared); |
| +/* The below constants were generated using this sage script: |
| + * |
| + * #!/usr/bin/env sage |
| + * import sys |
| + * from sage.all import * |
| + * def limbs(n): |
| + * n = int(n) |
| + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) |
| + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l |
| + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) |
| + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] |
| + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) |
| + * print("static const u64 table_ladder[] = {") |
| + * p = ec.lift_x(9) |
| + * for i in range(252): |
| + * l = (p[0] + p[2]) / (p[0] - p[2]) |
| + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) |
| + * p = p * 2 |
| + * print("};") |
| + * |
| + */ |
| |
| - memzero_explicit(&m, sizeof(m)); |
| -} |
| +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; |
| + |
| +static const u64 table_ladder[] = { |
| + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, |
| + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, |
| + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, |
| + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, |
| + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, |
| + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, |
| + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, |
| + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, |
| + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, |
| + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, |
| + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, |
| + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, |
| + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, |
| + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, |
| + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, |
| + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, |
| + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, |
| + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, |
| + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, |
| + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, |
| + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, |
| + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, |
| + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, |
| + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, |
| + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, |
| + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, |
| + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, |
| + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, |
| + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, |
| + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, |
| + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, |
| + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, |
| + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, |
| + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, |
| + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, |
| + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, |
| + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, |
| + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, |
| + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, |
| + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, |
| + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, |
| + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, |
| + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, |
| + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, |
| + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, |
| + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, |
| + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, |
| + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, |
| + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, |
| + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, |
| + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, |
| + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, |
| + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, |
| + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, |
| + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, |
| + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, |
| + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, |
| + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, |
| + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, |
| + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, |
| + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, |
| + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, |
| + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, |
| + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, |
| + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, |
| + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, |
| + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, |
| + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, |
| + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, |
| + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, |
| + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, |
| + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, |
| + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, |
| + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, |
| + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, |
| + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, |
| + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, |
| + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, |
| + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, |
| + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, |
| + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, |
| + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, |
| + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, |
| + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, |
| + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, |
| + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, |
| + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, |
| + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, |
| + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, |
| + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, |
| + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, |
| + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, |
| + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, |
| + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, |
| + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, |
| + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, |
| + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, |
| + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, |
| + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, |
| + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, |
| + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, |
| + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, |
| + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, |
| + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, |
| + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, |
| + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, |
| + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, |
| + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, |
| + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, |
| + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, |
| + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, |
| + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, |
| + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, |
| + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, |
| + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, |
| + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, |
| + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, |
| + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, |
| + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, |
| + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, |
| + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, |
| + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, |
| + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, |
| + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, |
| + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, |
| + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, |
| + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, |
| + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, |
| + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, |
| + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, |
| + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, |
| + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, |
| + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, |
| + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, |
| + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, |
| + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, |
| + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, |
| + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, |
| + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, |
| + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, |
| + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, |
| + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, |
| + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, |
| + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, |
| + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, |
| + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, |
| + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, |
| + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, |
| + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, |
| + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, |
| + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, |
| + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, |
| + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, |
| + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, |
| + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, |
| + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, |
| + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, |
| + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, |
| + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, |
| + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, |
| + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, |
| + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, |
| + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, |
| + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, |
| + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, |
| + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, |
| + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, |
| + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, |
| + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, |
| + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, |
| + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, |
| + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, |
| + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, |
| + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, |
| + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, |
| + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, |
| + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, |
| + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, |
| + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, |
| + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, |
| + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, |
| + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, |
| + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, |
| + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, |
| + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, |
| + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, |
| + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, |
| + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, |
| + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, |
| + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, |
| + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, |
| + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, |
| + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, |
| + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, |
| + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, |
| + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, |
| + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, |
| + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, |
| + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, |
| + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, |
| + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, |
| + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, |
| + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, |
| + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, |
| + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, |
| + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, |
| + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, |
| + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, |
| + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, |
| + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, |
| + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, |
| + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, |
| + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, |
| + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, |
| + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, |
| + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, |
| + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, |
| + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, |
| + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, |
| + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, |
| + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, |
| + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, |
| + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, |
| + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, |
| + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, |
| + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, |
| + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, |
| + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, |
| + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, |
| + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, |
| + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, |
| + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, |
| + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, |
| + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, |
| + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, |
| + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, |
| + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, |
| + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, |
| + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, |
| + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, |
| + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, |
| + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, |
| + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, |
| + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, |
| + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, |
| + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, |
| + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, |
| + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, |
| + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, |
| + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, |
| + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, |
| + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL |
| +}; |
| |
| -static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], |
| - const u8 private_key[CURVE25519_KEY_SIZE]) |
| +static void curve25519_ever64_base(u8 *out, const u8 *priv) |
| { |
| - struct { |
| - u64 buffer[4 * NUM_WORDS_ELTFP25519]; |
| - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; |
| - u64 workspace[4 * NUM_WORDS_ELTFP25519]; |
| - u8 private[CURVE25519_KEY_SIZE]; |
| - } __aligned(32) m; |
| - |
| - const int ite[4] = { 64, 64, 64, 63 }; |
| - const int q = 3; |
| u64 swap = 1; |
| - |
| - int i = 0, j = 0, k = 0; |
| - u64 *const key = (u64 *)m.private; |
| - u64 *const Ur1 = m.coordinates + 0; |
| - u64 *const Zr1 = m.coordinates + 4; |
| - u64 *const Ur2 = m.coordinates + 8; |
| - u64 *const Zr2 = m.coordinates + 12; |
| - |
| - u64 *const UZr1 = m.coordinates + 0; |
| - u64 *const ZUr2 = m.coordinates + 8; |
| - |
| - u64 *const A = m.workspace + 0; |
| - u64 *const B = m.workspace + 4; |
| - u64 *const C = m.workspace + 8; |
| - u64 *const D = m.workspace + 12; |
| - |
| - u64 *const AB = m.workspace + 0; |
| - u64 *const CD = m.workspace + 8; |
| - |
| - const u64 *const P = table_ladder_8k; |
| - |
| - memcpy(m.private, private_key, sizeof(m.private)); |
| - |
| - curve25519_clamp_secret(m.private); |
| - |
| - setzero_eltfp25519_1w(Ur1); |
| - setzero_eltfp25519_1w(Zr1); |
| - setzero_eltfp25519_1w(Zr2); |
| - Ur1[0] = 1; |
| - Zr1[0] = 1; |
| - Zr2[0] = 1; |
| - |
| - /* G-S */ |
| - Ur2[3] = 0x1eaecdeee27cab34UL; |
| - Ur2[2] = 0xadc7a0b9235d48e2UL; |
| - Ur2[1] = 0xbbf095ae14b2edf8UL; |
| - Ur2[0] = 0x7e94e1fec82faabdUL; |
| - |
| - /* main-loop */ |
| - j = q; |
| - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { |
| - while (j < ite[i]) { |
| - u64 bit = (key[i] >> j) & 0x1; |
| - k = (64 * i + j - q); |
| + int i, j, k; |
| + u64 tmp[16 + 32 + 4]; |
| + u64 *x1 = &tmp[0]; |
| + u64 *z1 = &tmp[4]; |
| + u64 *x2 = &tmp[8]; |
| + u64 *z2 = &tmp[12]; |
| + u64 *xz1 = &tmp[0]; |
| + u64 *xz2 = &tmp[8]; |
| + u64 *a = &tmp[0 + 16]; |
| + u64 *b = &tmp[4 + 16]; |
| + u64 *c = &tmp[8 + 16]; |
| + u64 *ab = &tmp[0 + 16]; |
| + u64 *abcd = &tmp[0 + 16]; |
| + u64 *ef = &tmp[16 + 16]; |
| + u64 *efgh = &tmp[16 + 16]; |
| + u64 *key = &tmp[0 + 16 + 32]; |
| + |
| + memcpy(key, priv, 32); |
| + ((u8 *)key)[0] &= 248; |
| + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; |
| + |
| + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; |
| + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; |
| + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; |
| + memcpy(x2, p_minus_s, sizeof(p_minus_s)); |
| + |
| + j = 3; |
| + for (i = 0; i < 4; ++i) { |
| + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { |
| + u64 bit = (key[i] >> j) & 1; |
| + k = (64 * i + j - 3); |
| swap = swap ^ bit; |
| - cswap(swap, Ur1, Ur2); |
| - cswap(swap, Zr1, Zr2); |
| + cswap2(swap, xz1, xz2); |
| swap = bit; |
| - /* Addition */ |
| - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| - add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| - mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ |
| - sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ |
| - add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ |
| - sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ |
| - mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ |
| + fsub(b, x1, z1); |
| + fadd(a, x1, z1); |
| + fmul(c, &table_ladder[4 * k], b, ef); |
| + fsub(b, a, c); |
| + fadd(a, a, c); |
| + fsqr2(ab, ab, efgh); |
| + fmul2(xz1, xz2, ab, efgh); |
| ++j; |
| } |
| j = 0; |
| } |
| |
| - /* Doubling */ |
| - for (i = 0; i < q; ++i) { |
| - add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ |
| - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ |
| - sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ |
| - copy_eltfp25519_1w(C, B); /* C = B */ |
| - sub_eltfp25519_1w(B, A, B); /* B = A-B */ |
| - mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ |
| - add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ |
| - mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ |
| - } |
| + point_double(xz1, abcd, efgh); |
| + point_double(xz1, abcd, efgh); |
| + point_double(xz1, abcd, efgh); |
| + encode_point(out, xz1); |
| |
| - /* Convert to affine coordinates */ |
| - inv_eltfp25519_1w_bmi2(A, Zr1); |
| - mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); |
| - fred_eltfp25519_1w((u64 *)session_key); |
| - |
| - memzero_explicit(&m, sizeof(m)); |
| + memzero_explicit(tmp, sizeof(tmp)); |
| } |
| |
| +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); |
| + |
| void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], |
| const u8 secret[CURVE25519_KEY_SIZE], |
| const u8 basepoint[CURVE25519_KEY_SIZE]) |
| { |
| - if (static_branch_likely(&curve25519_use_adx)) |
| - curve25519_adx(mypublic, secret, basepoint); |
| - else if (static_branch_likely(&curve25519_use_bmi2)) |
| - curve25519_bmi2(mypublic, secret, basepoint); |
| + if (static_branch_likely(&curve25519_use_bmi2_adx)) |
| + curve25519_ever64(mypublic, secret, basepoint); |
| else |
| curve25519_generic(mypublic, secret, basepoint); |
| } |
| @@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch); |
| void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], |
| const u8 secret[CURVE25519_KEY_SIZE]) |
| { |
| - if (static_branch_likely(&curve25519_use_adx)) |
| - curve25519_adx_base(pub, secret); |
| - else if (static_branch_likely(&curve25519_use_bmi2)) |
| - curve25519_bmi2_base(pub, secret); |
| + if (static_branch_likely(&curve25519_use_bmi2_adx)) |
| + curve25519_ever64_base(pub, secret); |
| else |
| curve25519_generic(pub, secret, curve25519_base_point); |
| } |
| @@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = { |
| .max_size = curve25519_max_size, |
| }; |
| |
| + |
| static int __init curve25519_mod_init(void) |
| { |
| - if (boot_cpu_has(X86_FEATURE_BMI2)) |
| - static_branch_enable(&curve25519_use_bmi2); |
| - else if (boot_cpu_has(X86_FEATURE_ADX)) |
| - static_branch_enable(&curve25519_use_adx); |
| + if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) |
| + static_branch_enable(&curve25519_use_bmi2_adx); |
| else |
| return 0; |
| return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? |
| @@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit); |
| MODULE_ALIAS_CRYPTO("curve25519"); |
| MODULE_ALIAS_CRYPTO("curve25519-x86"); |
| MODULE_LICENSE("GPL v2"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| -- |
| 2.18.2 |
| |
| |
| From 1d46b9410c99a51d869bd4b7cd41ea32f2fea63d Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 1 Mar 2020 16:06:56 +0800 |
| Subject: [PATCH 055/100] crypto: x86/curve25519 - leave r12 as spare register |
| |
| commit dc7fc3a53ae158263196b1892b672aedf67796c5 upstream. |
| |
| This updates to the newer register selection proved by HACL*, which |
| leads to a more compact instruction encoding, and saves around 100 |
| cycles. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/x86/crypto/curve25519-x86_64.c | 110 ++++++++++++++-------------- |
| 1 file changed, 55 insertions(+), 55 deletions(-) |
| |
| diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c |
| index e4e58b8e9afe..8a17621f7d3a 100644 |
| |
| |
| @@ -167,28 +167,28 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " movq 0(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" |
| " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" |
| /* Compute src1[1] * src2 */ |
| " movq 8(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" |
| - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| /* Compute src1[2] * src2 */ |
| " movq 16(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" |
| - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| /* Compute src1[3] * src2 */ |
| " movq 24(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" |
| - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" |
| /* Line up pointers */ |
| @@ -202,11 +202,11 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " mulxq 32(%1), %%r8, %%r13;" |
| " xor %3, %3;" |
| " adoxq 0(%1), %%r8;" |
| - " mulxq 40(%1), %%r9, %%r12;" |
| + " mulxq 40(%1), %%r9, %%rbx;" |
| " adcx %%r13, %%r9;" |
| " adoxq 8(%1), %%r9;" |
| " mulxq 48(%1), %%r10, %%r13;" |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " adoxq 16(%1), %%r10;" |
| " mulxq 56(%1), %%r11, %%rax;" |
| " adcx %%r13, %%r11;" |
| @@ -231,7 +231,7 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " movq %%r8, 0(%0);" |
| : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) |
| : |
| - : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" |
| + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" |
| ); |
| } |
| |
| @@ -248,28 +248,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " movq 0(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" |
| " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" |
| /* Compute src1[1] * src2 */ |
| " movq 8(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" |
| - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| /* Compute src1[2] * src2 */ |
| " movq 16(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" |
| - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| /* Compute src1[3] * src2 */ |
| " movq 24(%1), %%rdx;" |
| " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" |
| - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" |
| - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" |
| + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" |
| + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" |
| " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" |
| |
| @@ -279,28 +279,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " movq 32(%1), %%rdx;" |
| " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" |
| " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" |
| - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" |
| + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" |
| " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" |
| /* Compute src1[1] * src2 */ |
| " movq 40(%1), %%rdx;" |
| " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" |
| - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" |
| - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" |
| + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" |
| " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| /* Compute src1[2] * src2 */ |
| " movq 48(%1), %%rdx;" |
| " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" |
| - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" |
| - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" |
| + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" |
| " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" |
| /* Compute src1[3] * src2 */ |
| " movq 56(%1), %%rdx;" |
| " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" |
| - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" |
| - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" |
| + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" |
| + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" |
| " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" |
| " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" |
| /* Line up pointers */ |
| @@ -314,11 +314,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " mulxq 32(%1), %%r8, %%r13;" |
| " xor %3, %3;" |
| " adoxq 0(%1), %%r8;" |
| - " mulxq 40(%1), %%r9, %%r12;" |
| + " mulxq 40(%1), %%r9, %%rbx;" |
| " adcx %%r13, %%r9;" |
| " adoxq 8(%1), %%r9;" |
| " mulxq 48(%1), %%r10, %%r13;" |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " adoxq 16(%1), %%r10;" |
| " mulxq 56(%1), %%r11, %%rax;" |
| " adcx %%r13, %%r11;" |
| @@ -347,11 +347,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " mulxq 96(%1), %%r8, %%r13;" |
| " xor %3, %3;" |
| " adoxq 64(%1), %%r8;" |
| - " mulxq 104(%1), %%r9, %%r12;" |
| + " mulxq 104(%1), %%r9, %%rbx;" |
| " adcx %%r13, %%r9;" |
| " adoxq 72(%1), %%r9;" |
| " mulxq 112(%1), %%r10, %%r13;" |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " adoxq 80(%1), %%r10;" |
| " mulxq 120(%1), %%r11, %%rax;" |
| " adcx %%r13, %%r11;" |
| @@ -376,7 +376,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| " movq %%r8, 32(%0);" |
| : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) |
| : |
| - : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" |
| + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" |
| ); |
| } |
| |
| @@ -388,11 +388,11 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) |
| asm volatile( |
| /* Compute the raw multiplication of f1*f2 */ |
| " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ |
| - " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ |
| + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ |
| " add %%rcx, %%r9;" |
| " mov $0, %%rcx;" |
| " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ |
| " adcx %%r13, %%r11;" |
| " adcx %%rcx, %%rax;" |
| @@ -419,7 +419,7 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) |
| " movq %%r8, 0(%1);" |
| : "+&r" (f2_r) |
| : "r" (out), "r" (f1) |
| - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" |
| + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc" |
| ); |
| } |
| |
| @@ -520,8 +520,8 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| " movq 24(%1), %%rdx;" /* f[3] */ |
| - " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| - " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ |
| + " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ |
| " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ |
| " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ |
| |
| @@ -531,12 +531,12 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| " adcx %%r8, %%r8;" |
| " adox %%rcx, %%r11;" |
| " adcx %%r9, %%r9;" |
| - " adox %%r15, %%r12;" |
| + " adox %%r15, %%rbx;" |
| " adcx %%r10, %%r10;" |
| " adox %%r15, %%r13;" |
| " adcx %%r11, %%r11;" |
| " adox %%r15, %%r14;" |
| - " adcx %%r12, %%r12;" |
| + " adcx %%rbx, %%rbx;" |
| " adcx %%r13, %%r13;" |
| " adcx %%r14, %%r14;" |
| |
| @@ -549,7 +549,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" |
| " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" |
| - " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" |
| + " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" |
| " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" |
| " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" |
| @@ -565,11 +565,11 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| " mulxq 32(%1), %%r8, %%r13;" |
| " xor %%rcx, %%rcx;" |
| " adoxq 0(%1), %%r8;" |
| - " mulxq 40(%1), %%r9, %%r12;" |
| + " mulxq 40(%1), %%r9, %%rbx;" |
| " adcx %%r13, %%r9;" |
| " adoxq 8(%1), %%r9;" |
| " mulxq 48(%1), %%r10, %%r13;" |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " adoxq 16(%1), %%r10;" |
| " mulxq 56(%1), %%r11, %%rax;" |
| " adcx %%r13, %%r11;" |
| @@ -594,7 +594,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| " movq %%r8, 0(%0);" |
| : "+&r" (tmp), "+&r" (f), "+&r" (out) |
| : |
| - : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" |
| + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" |
| ); |
| } |
| |
| @@ -611,8 +611,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| " movq 24(%1), %%rdx;" /* f[3] */ |
| - " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| - " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ |
| + " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ |
| " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ |
| " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ |
| |
| @@ -622,12 +622,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " adcx %%r8, %%r8;" |
| " adox %%rcx, %%r11;" |
| " adcx %%r9, %%r9;" |
| - " adox %%r15, %%r12;" |
| + " adox %%r15, %%rbx;" |
| " adcx %%r10, %%r10;" |
| " adox %%r15, %%r13;" |
| " adcx %%r11, %%r11;" |
| " adox %%r15, %%r14;" |
| - " adcx %%r12, %%r12;" |
| + " adcx %%rbx, %%rbx;" |
| " adcx %%r13, %%r13;" |
| " adcx %%r14, %%r14;" |
| |
| @@ -640,7 +640,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" |
| " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" |
| - " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" |
| + " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" |
| " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" |
| " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" |
| @@ -651,8 +651,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| " movq 56(%1), %%rdx;" /* f[3] */ |
| - " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| - " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ |
| + " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| + " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ |
| " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ |
| " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ |
| |
| @@ -662,12 +662,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " adcx %%r8, %%r8;" |
| " adox %%rcx, %%r11;" |
| " adcx %%r9, %%r9;" |
| - " adox %%r15, %%r12;" |
| + " adox %%r15, %%rbx;" |
| " adcx %%r10, %%r10;" |
| " adox %%r15, %%r13;" |
| " adcx %%r11, %%r11;" |
| " adox %%r15, %%r14;" |
| - " adcx %%r12, %%r12;" |
| + " adcx %%rbx, %%rbx;" |
| " adcx %%r13, %%r13;" |
| " adcx %%r14, %%r14;" |
| |
| @@ -680,7 +680,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" |
| " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" |
| - " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" |
| + " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);" |
| " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" |
| " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" |
| @@ -694,11 +694,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " mulxq 32(%1), %%r8, %%r13;" |
| " xor %%rcx, %%rcx;" |
| " adoxq 0(%1), %%r8;" |
| - " mulxq 40(%1), %%r9, %%r12;" |
| + " mulxq 40(%1), %%r9, %%rbx;" |
| " adcx %%r13, %%r9;" |
| " adoxq 8(%1), %%r9;" |
| " mulxq 48(%1), %%r10, %%r13;" |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " adoxq 16(%1), %%r10;" |
| " mulxq 56(%1), %%r11, %%rax;" |
| " adcx %%r13, %%r11;" |
| @@ -727,11 +727,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " mulxq 96(%1), %%r8, %%r13;" |
| " xor %%rcx, %%rcx;" |
| " adoxq 64(%1), %%r8;" |
| - " mulxq 104(%1), %%r9, %%r12;" |
| + " mulxq 104(%1), %%r9, %%rbx;" |
| " adcx %%r13, %%r9;" |
| " adoxq 72(%1), %%r9;" |
| " mulxq 112(%1), %%r10, %%r13;" |
| - " adcx %%r12, %%r10;" |
| + " adcx %%rbx, %%r10;" |
| " adoxq 80(%1), %%r10;" |
| " mulxq 120(%1), %%r11, %%rax;" |
| " adcx %%r13, %%r11;" |
| @@ -756,7 +756,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| " movq %%r8, 32(%0);" |
| : "+&r" (tmp), "+&r" (f), "+&r" (out) |
| : |
| - : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" |
| + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" |
| ); |
| } |
| |
| -- |
| 2.18.2 |
| |
| |
| From 1b621c6bdc78e16880d8aaae55dceade812c56fd Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 19 Mar 2020 11:56:17 -0600 |
| Subject: [PATCH 056/100] crypto: arm[64]/poly1305 - add artifact to .gitignore |
| files |
| |
| commit 6e4e00d8b68ca7eb30d08afb740033e0d36abe55 upstream. |
| |
| The .S_shipped yields a .S, and the pattern in these directories is to |
| add that to .gitignore so that git-status doesn't raise a fuss. |
| |
| Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") |
| Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") |
| Reported-by: Emil Renner Berthing <kernel@esmil.dk> |
| Cc: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/.gitignore | 1 + |
| arch/arm64/crypto/.gitignore | 1 + |
| 2 files changed, 2 insertions(+) |
| |
| diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore |
| index 31e1f538df7d..a3c7ad52a469 100644 |
| |
| |
| @@ -1,3 +1,4 @@ |
| aesbs-core.S |
| sha256-core.S |
| sha512-core.S |
| +poly1305-core.S |
| diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore |
| index 879df8781ed5..e403b1343328 100644 |
| |
| |
| @@ -1,2 +1,3 @@ |
| sha256-core.S |
| sha512-core.S |
| +poly1305-core.S |
| -- |
| 2.18.2 |
| |
| |
| From 04d656cc3b94e33050abc7df5cdc9b1e245aae84 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 23 Apr 2020 15:54:04 -0600 |
| Subject: [PATCH 057/100] crypto: arch/lib - limit simd usage to 4k chunks |
| |
| commit 706024a52c614b478b63f7728d202532ce6591a9 upstream. |
| |
| The initial Zinc patchset, after some mailing list discussion, contained |
| code to ensure that kernel_fpu_enable would not be kept on for more than |
| a 4k chunk, since it disables preemption. The choice of 4k isn't totally |
| scientific, but it's not a bad guess either, and it's what's used in |
| both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form |
| of PAGE_SIZE, which this commit corrects to be explicitly 4k for the |
| former two). |
| |
| Ard did some back of the envelope calculations and found that |
| at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k |
| means we have a maximum preemption disabling of 20us, which Sebastian |
| confirmed was probably a good limit. |
| |
| Unfortunately the chunking appears to have been left out of the final |
| patchset that added the glue code. So, this commit adds it back in. |
| |
| Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function") |
| Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") |
| Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function") |
| Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel") |
| Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") |
| Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") |
| Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation") |
| Cc: Eric Biggers <ebiggers@google.com> |
| Cc: Ard Biesheuvel <ardb@kernel.org> |
| Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Reviewed-by: Ard Biesheuvel <ardb@kernel.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| arch/arm/crypto/chacha-glue.c | 14 +++++++++++--- |
| arch/arm/crypto/poly1305-glue.c | 15 +++++++++++---- |
| arch/arm64/crypto/chacha-neon-glue.c | 14 +++++++++++--- |
| arch/arm64/crypto/poly1305-glue.c | 15 +++++++++++---- |
| arch/x86/crypto/blake2s-glue.c | 10 ++++------ |
| arch/x86/crypto/chacha_glue.c | 14 +++++++++++--- |
| arch/x86/crypto/poly1305_glue.c | 13 ++++++------- |
| 7 files changed, 65 insertions(+), 30 deletions(-) |
| |
| diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c |
| index 893692ed12b7..cd131b454c2e 100644 |
| |
| |
| @@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| return; |
| } |
| |
| - kernel_neon_begin(); |
| - chacha_doneon(state, dst, src, bytes, nrounds); |
| - kernel_neon_end(); |
| + do { |
| + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
| + |
| + kernel_neon_begin(); |
| + chacha_doneon(state, dst, src, todo, nrounds); |
| + kernel_neon_end(); |
| + |
| + bytes -= todo; |
| + src += todo; |
| + dst += todo; |
| + } while (bytes); |
| } |
| EXPORT_SYMBOL(chacha_crypt_arch); |
| |
| diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c |
| index ceec04ec2f40..13cfef4ae22e 100644 |
| |
| |
| @@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); |
| |
| if (static_branch_likely(&have_neon) && do_neon) { |
| - kernel_neon_begin(); |
| - poly1305_blocks_neon(&dctx->h, src, len, 1); |
| - kernel_neon_end(); |
| + do { |
| + unsigned int todo = min_t(unsigned int, len, SZ_4K); |
| + |
| + kernel_neon_begin(); |
| + poly1305_blocks_neon(&dctx->h, src, todo, 1); |
| + kernel_neon_end(); |
| + |
| + len -= todo; |
| + src += todo; |
| + } while (len); |
| } else { |
| poly1305_blocks_arm(&dctx->h, src, len, 1); |
| + src += len; |
| } |
| - src += len; |
| nbytes %= POLY1305_BLOCK_SIZE; |
| } |
| |
| diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c |
| index 218943612261..1d9824c4ae43 100644 |
| |
| |
| @@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| !crypto_simd_usable()) |
| return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
| |
| - kernel_neon_begin(); |
| - chacha_doneon(state, dst, src, bytes, nrounds); |
| - kernel_neon_end(); |
| + do { |
| + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
| + |
| + kernel_neon_begin(); |
| + chacha_doneon(state, dst, src, todo, nrounds); |
| + kernel_neon_end(); |
| + |
| + bytes -= todo; |
| + src += todo; |
| + dst += todo; |
| + } while (bytes); |
| } |
| EXPORT_SYMBOL(chacha_crypt_arch); |
| |
| diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c |
| index e97b092f56b8..f33ada70c4ed 100644 |
| |
| |
| @@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
| unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); |
| |
| if (static_branch_likely(&have_neon) && crypto_simd_usable()) { |
| - kernel_neon_begin(); |
| - poly1305_blocks_neon(&dctx->h, src, len, 1); |
| - kernel_neon_end(); |
| + do { |
| + unsigned int todo = min_t(unsigned int, len, SZ_4K); |
| + |
| + kernel_neon_begin(); |
| + poly1305_blocks_neon(&dctx->h, src, todo, 1); |
| + kernel_neon_end(); |
| + |
| + len -= todo; |
| + src += todo; |
| + } while (len); |
| } else { |
| poly1305_blocks(&dctx->h, src, len, 1); |
| + src += len; |
| } |
| - src += len; |
| nbytes %= POLY1305_BLOCK_SIZE; |
| } |
| |
| diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c |
| index 1d9ff8a45e1f..94ac5bdd9f6f 100644 |
| |
| |
| @@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state, |
| const u32 inc) |
| { |
| /* SIMD disables preemption, so relax after processing each page. */ |
| - BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); |
| + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); |
| |
| if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { |
| blake2s_compress_generic(state, block, nblocks, inc); |
| return; |
| } |
| |
| - for (;;) { |
| + do { |
| const size_t blocks = min_t(size_t, nblocks, |
| - PAGE_SIZE / BLAKE2S_BLOCK_SIZE); |
| + SZ_4K / BLAKE2S_BLOCK_SIZE); |
| |
| kernel_fpu_begin(); |
| if (IS_ENABLED(CONFIG_AS_AVX512) && |
| @@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state, |
| kernel_fpu_end(); |
| |
| nblocks -= blocks; |
| - if (!nblocks) |
| - break; |
| block += blocks * BLAKE2S_BLOCK_SIZE; |
| - } |
| + } while (nblocks); |
| } |
| EXPORT_SYMBOL(blake2s_compress_arch); |
| |
| diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c |
| index 1bebe11b9ec9..f3bfce21bc0d 100644 |
| |
| |
| @@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| bytes <= CHACHA_BLOCK_SIZE) |
| return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
| |
| - kernel_fpu_begin(); |
| - chacha_dosimd(state, dst, src, bytes, nrounds); |
| - kernel_fpu_end(); |
| + do { |
| + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
| + |
| + kernel_fpu_begin(); |
| + chacha_dosimd(state, dst, src, todo, nrounds); |
| + kernel_fpu_end(); |
| + |
| + bytes -= todo; |
| + src += todo; |
| + dst += todo; |
| + } while (bytes); |
| } |
| EXPORT_SYMBOL(chacha_crypt_arch); |
| |
| diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c |
| index 79bb58737d52..61b2bc8b6986 100644 |
| |
| |
| @@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, |
| struct poly1305_arch_internal *state = ctx; |
| |
| /* SIMD disables preemption, so relax after processing each page. */ |
| - BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || |
| - PAGE_SIZE % POLY1305_BLOCK_SIZE); |
| + BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || |
| + SZ_4K % POLY1305_BLOCK_SIZE); |
| |
| if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || |
| (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || |
| @@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, |
| return; |
| } |
| |
| - for (;;) { |
| - const size_t bytes = min_t(size_t, len, PAGE_SIZE); |
| + do { |
| + const size_t bytes = min_t(size_t, len, SZ_4K); |
| |
| kernel_fpu_begin(); |
| if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) |
| @@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, |
| else |
| poly1305_blocks_avx(ctx, inp, bytes, padbit); |
| kernel_fpu_end(); |
| + |
| len -= bytes; |
| - if (!len) |
| - break; |
| inp += bytes; |
| - } |
| + } while (len); |
| } |
| |
| static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], |
| -- |
| 2.18.2 |
| |
| |
| From af068e66b2ae1e5fbef3ff78dcd10867ad5af096 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 11 Feb 2020 20:47:05 +0100 |
| Subject: [PATCH 058/100] icmp: introduce helper for nat'd source address in |
| network device context |
| |
| commit 0b41713b606694257b90d61ba7e2712d8457648b upstream. |
| |
| This introduces a helper function to be called only by network drivers |
| that wraps calls to icmp[v6]_send in a conntrack transformation, in case |
| NAT has been used. We don't want to pollute the non-driver path, though, |
| so we introduce this as a helper to be called by places that actually |
| make use of this, as suggested by Florian. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Cc: Florian Westphal <fw@strlen.de> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/linux/icmpv6.h | 10 ++++++++++ |
| include/net/icmp.h | 6 ++++++ |
| net/ipv4/icmp.c | 33 +++++++++++++++++++++++++++++++++ |
| net/ipv6/ip6_icmp.c | 34 ++++++++++++++++++++++++++++++++++ |
| 4 files changed, 83 insertions(+) |
| |
| diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h |
| index a8f888976137..024b7a4cd98e 100644 |
| |
| |
| @@ -22,12 +22,22 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); |
| int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, |
| unsigned int data_len); |
| |
| +#if IS_ENABLED(CONFIG_NF_NAT) |
| +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); |
| +#else |
| +#define icmpv6_ndo_send icmpv6_send |
| +#endif |
| + |
| #else |
| |
| static inline void icmpv6_send(struct sk_buff *skb, |
| u8 type, u8 code, __u32 info) |
| { |
| +} |
| |
| +static inline void icmpv6_ndo_send(struct sk_buff *skb, |
| + u8 type, u8 code, __u32 info) |
| +{ |
| } |
| #endif |
| |
| diff --git a/include/net/icmp.h b/include/net/icmp.h |
| index 5d4bfdba9adf..9ac2d2672a93 100644 |
| |
| |
| @@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 |
| __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); |
| } |
| |
| +#if IS_ENABLED(CONFIG_NF_NAT) |
| +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); |
| +#else |
| +#define icmp_ndo_send icmp_send |
| +#endif |
| + |
| int icmp_rcv(struct sk_buff *skb); |
| int icmp_err(struct sk_buff *skb, u32 info); |
| int icmp_init(void); |
| diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c |
| index ac95ba78b903..9eb05a8139cb 100644 |
| |
| |
| @@ -747,6 +747,39 @@ out:; |
| } |
| EXPORT_SYMBOL(__icmp_send); |
| |
| +#if IS_ENABLED(CONFIG_NF_NAT) |
| +#include <net/netfilter/nf_conntrack.h> |
| +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) |
| +{ |
| + struct sk_buff *cloned_skb = NULL; |
| + enum ip_conntrack_info ctinfo; |
| + struct nf_conn *ct; |
| + __be32 orig_ip; |
| + |
| + ct = nf_ct_get(skb_in, &ctinfo); |
| + if (!ct || !(ct->status & IPS_SRC_NAT)) { |
| + icmp_send(skb_in, type, code, info); |
| + return; |
| + } |
| + |
| + if (skb_shared(skb_in)) |
| + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); |
| + |
| + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || |
| + (skb_network_header(skb_in) + sizeof(struct iphdr)) > |
| + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, |
| + skb_network_offset(skb_in) + sizeof(struct iphdr)))) |
| + goto out; |
| + |
| + orig_ip = ip_hdr(skb_in)->saddr; |
| + ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; |
| + icmp_send(skb_in, type, code, info); |
| + ip_hdr(skb_in)->saddr = orig_ip; |
| +out: |
| + consume_skb(cloned_skb); |
| +} |
| +EXPORT_SYMBOL(icmp_ndo_send); |
| +#endif |
| |
| static void icmp_socket_deliver(struct sk_buff *skb, u32 info) |
| { |
| diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c |
| index 02045494c24c..e0086758b6ee 100644 |
| |
| |
| @@ -45,4 +45,38 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) |
| rcu_read_unlock(); |
| } |
| EXPORT_SYMBOL(icmpv6_send); |
| + |
| +#if IS_ENABLED(CONFIG_NF_NAT) |
| +#include <net/netfilter/nf_conntrack.h> |
| +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) |
| +{ |
| + struct sk_buff *cloned_skb = NULL; |
| + enum ip_conntrack_info ctinfo; |
| + struct in6_addr orig_ip; |
| + struct nf_conn *ct; |
| + |
| + ct = nf_ct_get(skb_in, &ctinfo); |
| + if (!ct || !(ct->status & IPS_SRC_NAT)) { |
| + icmpv6_send(skb_in, type, code, info); |
| + return; |
| + } |
| + |
| + if (skb_shared(skb_in)) |
| + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); |
| + |
| + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || |
| + (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) > |
| + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, |
| + skb_network_offset(skb_in) + sizeof(struct ipv6hdr)))) |
| + goto out; |
| + |
| + orig_ip = ipv6_hdr(skb_in)->saddr; |
| + ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; |
| + icmpv6_send(skb_in, type, code, info); |
| + ipv6_hdr(skb_in)->saddr = orig_ip; |
| +out: |
| + consume_skb(cloned_skb); |
| +} |
| +EXPORT_SYMBOL(icmpv6_ndo_send); |
| +#endif |
| #endif |
| -- |
| 2.18.2 |
| |
| |
| From 93ca3bf7fc729df26023968388d2c063b177e828 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 9 Dec 2019 00:27:34 +0100 |
| Subject: [PATCH 059/100] net: WireGuard secure network tunnel |
| |
| commit e7096c131e5161fa3b8e52a650d7719d2857adfd upstream. |
| |
| WireGuard is a layer 3 secure networking tunnel made specifically for |
| the kernel, that aims to be much simpler and easier to audit than IPsec. |
| Extensive documentation and description of the protocol and |
| considerations, along with formal proofs of the cryptography, are |
| available at: |
| |
| * https://www.wireguard.com/ |
| * https://www.wireguard.com/papers/wireguard.pdf |
| |
| This commit implements WireGuard as a simple network device driver, |
| accessible in the usual RTNL way used by virtual network drivers. It |
| makes use of the udp_tunnel APIs, GRO, GSO, NAPI, and the usual set of |
| networking subsystem APIs. It has a somewhat novel multicore queueing |
| system designed for maximum throughput and minimal latency of encryption |
| operations, but it is implemented modestly using workqueues and NAPI. |
| Configuration is done via generic Netlink, and following a review from |
| the Netlink maintainer a year ago, several high profile userspace tools |
| have already implemented the API. |
| |
| This commit also comes with several different tests, both in-kernel |
| tests and out-of-kernel tests based on network namespaces, taking profit |
| of the fact that sockets used by WireGuard intentionally stay in the |
| namespace the WireGuard interface was originally created, exactly like |
| the semantics of userspace tun devices. See wireguard.com/netns/ for |
| pictures and examples. |
| |
| The source code is fairly short, but rather than combining everything |
| into a single file, WireGuard is developed as cleanly separable files, |
| making auditing and comprehension easier. Things are laid out as |
| follows: |
| |
| * noise.[ch], cookie.[ch], messages.h: These implement the bulk of the |
| cryptographic aspects of the protocol, and are mostly data-only in |
| nature, taking in buffers of bytes and spitting out buffers of |
| bytes. They also handle reference counting for their various shared |
| pieces of data, like keys and key lists. |
| |
| * ratelimiter.[ch]: Used as an integral part of cookie.[ch] for |
| ratelimiting certain types of cryptographic operations in accordance |
| with particular WireGuard semantics. |
| |
| * allowedips.[ch], peerlookup.[ch]: The main lookup structures of |
| WireGuard, the former being trie-like with particular semantics, an |
| integral part of the design of the protocol, and the latter just |
| being nice helper functions around the various hashtables we use. |
| |
| * device.[ch]: Implementation of functions for the netdevice and for |
| rtnl, responsible for maintaining the life of a given interface and |
| wiring it up to the rest of WireGuard. |
| |
| * peer.[ch]: Each interface has a list of peers, with helper functions |
| available here for creation, destruction, and reference counting. |
| |
| * socket.[ch]: Implementation of functions related to udp_socket and |
| the general set of kernel socket APIs, for sending and receiving |
| ciphertext UDP packets, and taking care of WireGuard-specific sticky |
| socket routing semantics for the automatic roaming. |
| |
| * netlink.[ch]: Userspace API entry point for configuring WireGuard |
| peers and devices. The API has been implemented by several userspace |
| tools and network management utility, and the WireGuard project |
| distributes the basic wg(8) tool. |
| |
| * queueing.[ch]: Shared function on the rx and tx path for handling |
| the various queues used in the multicore algorithms. |
| |
| * send.c: Handles encrypting outgoing packets in parallel on |
| multiple cores, before sending them in order on a single core, via |
| workqueues and ring buffers. Also handles sending handshake and cookie |
| messages as part of the protocol, in parallel. |
| |
| * receive.c: Handles decrypting incoming packets in parallel on |
| multiple cores, before passing them off in order to be ingested via |
| the rest of the networking subsystem with GRO via the typical NAPI |
| poll function. Also handles receiving handshake and cookie messages |
| as part of the protocol, in parallel. |
| |
| * timers.[ch]: Uses the timer wheel to implement protocol particular |
| event timeouts, and gives a set of very simple event-driven entry |
| point functions for callers. |
| |
| * main.c, version.h: Initialization and deinitialization of the module. |
| |
| * selftest/*.h: Runtime unit tests for some of the most security |
| sensitive functions. |
| |
| * tools/testing/selftests/wireguard/netns.sh: Aforementioned testing |
| script using network namespaces. |
| |
| This commit aims to be as self-contained as possible, implementing |
| WireGuard as a standalone module not needing much special handling or |
| coordination from the network subsystem. I expect for future |
| optimizations to the network stack to positively improve WireGuard, and |
| vice-versa, but for the time being, this exists as intentionally |
| standalone. |
| |
| We introduce a menu option for CONFIG_WIREGUARD, as well as providing a |
| verbose debug log and self-tests via CONFIG_WIREGUARD_DEBUG. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Cc: David Miller <davem@davemloft.net> |
| Cc: Greg KH <gregkh@linuxfoundation.org> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Cc: Herbert Xu <herbert@gondor.apana.org.au> |
| Cc: linux-crypto@vger.kernel.org |
| Cc: linux-kernel@vger.kernel.org |
| Cc: netdev@vger.kernel.org |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| [Jason: ported to 5.4 by doing the following: |
| - wg_get_device_start uses genl_family_attrbuf |
| - trival skb_redirect_reset change from 2c64605b590e is folded in] |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| MAINTAINERS | 8 + |
| drivers/net/Kconfig | 41 + |
| drivers/net/Makefile | 1 + |
| drivers/net/wireguard/Makefile | 18 + |
| drivers/net/wireguard/allowedips.c | 381 +++++++++ |
| drivers/net/wireguard/allowedips.h | 59 ++ |
| drivers/net/wireguard/cookie.c | 236 ++++++ |
| drivers/net/wireguard/cookie.h | 59 ++ |
| drivers/net/wireguard/device.c | 458 ++++++++++ |
| drivers/net/wireguard/device.h | 73 ++ |
| drivers/net/wireguard/main.c | 64 ++ |
| drivers/net/wireguard/messages.h | 128 +++ |
| drivers/net/wireguard/netlink.c | 648 +++++++++++++++ |
| drivers/net/wireguard/netlink.h | 12 + |
| drivers/net/wireguard/noise.c | 828 +++++++++++++++++++ |
| drivers/net/wireguard/noise.h | 137 +++ |
| drivers/net/wireguard/peer.c | 240 ++++++ |
| drivers/net/wireguard/peer.h | 83 ++ |
| drivers/net/wireguard/peerlookup.c | 221 +++++ |
| drivers/net/wireguard/peerlookup.h | 64 ++ |
| drivers/net/wireguard/queueing.c | 53 ++ |
| drivers/net/wireguard/queueing.h | 197 +++++ |
| drivers/net/wireguard/ratelimiter.c | 223 +++++ |
| drivers/net/wireguard/ratelimiter.h | 19 + |
| drivers/net/wireguard/receive.c | 595 +++++++++++++ |
| drivers/net/wireguard/selftest/allowedips.c | 683 +++++++++++++++ |
| drivers/net/wireguard/selftest/counter.c | 104 +++ |
| drivers/net/wireguard/selftest/ratelimiter.c | 226 +++++ |
| drivers/net/wireguard/send.c | 413 +++++++++ |
| drivers/net/wireguard/socket.c | 437 ++++++++++ |
| drivers/net/wireguard/socket.h | 44 + |
| drivers/net/wireguard/timers.c | 243 ++++++ |
| drivers/net/wireguard/timers.h | 31 + |
| drivers/net/wireguard/version.h | 1 + |
| include/uapi/linux/wireguard.h | 196 +++++ |
| tools/testing/selftests/wireguard/netns.sh | 537 ++++++++++++ |
| 36 files changed, 7761 insertions(+) |
| create mode 100644 drivers/net/wireguard/Makefile |
| create mode 100644 drivers/net/wireguard/allowedips.c |
| create mode 100644 drivers/net/wireguard/allowedips.h |
| create mode 100644 drivers/net/wireguard/cookie.c |
| create mode 100644 drivers/net/wireguard/cookie.h |
| create mode 100644 drivers/net/wireguard/device.c |
| create mode 100644 drivers/net/wireguard/device.h |
| create mode 100644 drivers/net/wireguard/main.c |
| create mode 100644 drivers/net/wireguard/messages.h |
| create mode 100644 drivers/net/wireguard/netlink.c |
| create mode 100644 drivers/net/wireguard/netlink.h |
| create mode 100644 drivers/net/wireguard/noise.c |
| create mode 100644 drivers/net/wireguard/noise.h |
| create mode 100644 drivers/net/wireguard/peer.c |
| create mode 100644 drivers/net/wireguard/peer.h |
| create mode 100644 drivers/net/wireguard/peerlookup.c |
| create mode 100644 drivers/net/wireguard/peerlookup.h |
| create mode 100644 drivers/net/wireguard/queueing.c |
| create mode 100644 drivers/net/wireguard/queueing.h |
| create mode 100644 drivers/net/wireguard/ratelimiter.c |
| create mode 100644 drivers/net/wireguard/ratelimiter.h |
| create mode 100644 drivers/net/wireguard/receive.c |
| create mode 100644 drivers/net/wireguard/selftest/allowedips.c |
| create mode 100644 drivers/net/wireguard/selftest/counter.c |
| create mode 100644 drivers/net/wireguard/selftest/ratelimiter.c |
| create mode 100644 drivers/net/wireguard/send.c |
| create mode 100644 drivers/net/wireguard/socket.c |
| create mode 100644 drivers/net/wireguard/socket.h |
| create mode 100644 drivers/net/wireguard/timers.c |
| create mode 100644 drivers/net/wireguard/timers.h |
| create mode 100644 drivers/net/wireguard/version.h |
| create mode 100644 include/uapi/linux/wireguard.h |
| create mode 100755 tools/testing/selftests/wireguard/netns.sh |
| |
| diff --git a/MAINTAINERS b/MAINTAINERS |
| index fe6fa5d3a63e..d05f78261f33 100644 |
| |
| |
| @@ -17583,6 +17583,14 @@ L: linux-gpio@vger.kernel.org |
| S: Maintained |
| F: drivers/gpio/gpio-ws16c48.c |
| |
| +WIREGUARD SECURE NETWORK TUNNEL |
| +M: Jason A. Donenfeld <Jason@zx2c4.com> |
| +S: Maintained |
| +F: drivers/net/wireguard/ |
| +F: tools/testing/selftests/wireguard/ |
| +L: wireguard@lists.zx2c4.com |
| +L: netdev@vger.kernel.org |
| + |
| WISTRON LAPTOP BUTTON DRIVER |
| M: Miloslav Trmac <mitr@volny.cz> |
| S: Maintained |
| diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig |
| index df3cd2589bcf..16ad145e22c9 100644 |
| |
| |
| @@ -71,6 +71,47 @@ config DUMMY |
| To compile this driver as a module, choose M here: the module |
| will be called dummy. |
| |
| +config WIREGUARD |
| + tristate "WireGuard secure network tunnel" |
| + depends on NET && INET |
| + depends on IPV6 || !IPV6 |
| + select NET_UDP_TUNNEL |
| + select DST_CACHE |
| + select CRYPTO |
| + select CRYPTO_LIB_CURVE25519 |
| + select CRYPTO_LIB_CHACHA20POLY1305 |
| + select CRYPTO_LIB_BLAKE2S |
| + select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT |
| + select CRYPTO_POLY1305_X86_64 if X86 && 64BIT |
| + select CRYPTO_BLAKE2S_X86 if X86 && 64BIT |
| + select CRYPTO_CURVE25519_X86 if X86 && 64BIT |
| + select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON |
| + select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON |
| + select CRYPTO_POLY1305_ARM if ARM |
| + select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON |
| + select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 |
| + select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) |
| + help |
| + WireGuard is a secure, fast, and easy to use replacement for IPSec |
| + that uses modern cryptography and clever networking tricks. It's |
| + designed to be fairly general purpose and abstract enough to fit most |
| + use cases, while at the same time remaining extremely simple to |
| + configure. See www.wireguard.com for more info. |
| + |
| + It's safe to say Y or M here, as the driver is very lightweight and |
| + is only in use when an administrator chooses to add an interface. |
| + |
| +config WIREGUARD_DEBUG |
| + bool "Debugging checks and verbose messages" |
| + depends on WIREGUARD |
| + help |
| + This will write log messages for handshake and other events |
| + that occur for a WireGuard interface. It will also perform some |
| + extra validation checks and unit tests at various points. This is |
| + only useful for debugging. |
| + |
| + Say N here unless you know what you're doing. |
| + |
| config EQUALIZER |
| tristate "EQL (serial line load balancing) support" |
| ---help--- |
| diff --git a/drivers/net/Makefile b/drivers/net/Makefile |
| index 0d3ba056cda3..953b7c12f0b0 100644 |
| |
| |
| @@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/ |
| obj-$(CONFIG_IPVLAN) += ipvlan/ |
| obj-$(CONFIG_IPVTAP) += ipvlan/ |
| obj-$(CONFIG_DUMMY) += dummy.o |
| +obj-$(CONFIG_WIREGUARD) += wireguard/ |
| obj-$(CONFIG_EQUALIZER) += eql.o |
| obj-$(CONFIG_IFB) += ifb.o |
| obj-$(CONFIG_MACSEC) += macsec.o |
| diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile |
| new file mode 100644 |
| index 000000000000..fc52b2cb500b |
| |
| |
| @@ -0,0 +1,18 @@ |
| +ccflags-y := -O3 |
| +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' |
| +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG |
| +wireguard-y := main.o |
| +wireguard-y += noise.o |
| +wireguard-y += device.o |
| +wireguard-y += peer.o |
| +wireguard-y += timers.o |
| +wireguard-y += queueing.o |
| +wireguard-y += send.o |
| +wireguard-y += receive.o |
| +wireguard-y += socket.o |
| +wireguard-y += peerlookup.o |
| +wireguard-y += allowedips.o |
| +wireguard-y += ratelimiter.o |
| +wireguard-y += cookie.o |
| +wireguard-y += netlink.o |
| +obj-$(CONFIG_WIREGUARD) := wireguard.o |
| diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c |
| new file mode 100644 |
| index 000000000000..72667d5399c3 |
| |
| |
| @@ -0,0 +1,381 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "allowedips.h" |
| +#include "peer.h" |
| + |
| +static void swap_endian(u8 *dst, const u8 *src, u8 bits) |
| +{ |
| + if (bits == 32) { |
| + *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); |
| + } else if (bits == 128) { |
| + ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); |
| + ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); |
| + } |
| +} |
| + |
| +static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, |
| + u8 cidr, u8 bits) |
| +{ |
| + node->cidr = cidr; |
| + node->bit_at_a = cidr / 8U; |
| +#ifdef __LITTLE_ENDIAN |
| + node->bit_at_a ^= (bits / 8U - 1U) % 8U; |
| +#endif |
| + node->bit_at_b = 7U - (cidr % 8U); |
| + node->bitlen = bits; |
| + memcpy(node->bits, src, bits / 8U); |
| +} |
| +#define CHOOSE_NODE(parent, key) \ |
| + parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] |
| + |
| +static void node_free_rcu(struct rcu_head *rcu) |
| +{ |
| + kfree(container_of(rcu, struct allowedips_node, rcu)); |
| +} |
| + |
| +static void push_rcu(struct allowedips_node **stack, |
| + struct allowedips_node __rcu *p, unsigned int *len) |
| +{ |
| + if (rcu_access_pointer(p)) { |
| + WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); |
| + stack[(*len)++] = rcu_dereference_raw(p); |
| + } |
| +} |
| + |
| +static void root_free_rcu(struct rcu_head *rcu) |
| +{ |
| + struct allowedips_node *node, *stack[128] = { |
| + container_of(rcu, struct allowedips_node, rcu) }; |
| + unsigned int len = 1; |
| + |
| + while (len > 0 && (node = stack[--len])) { |
| + push_rcu(stack, node->bit[0], &len); |
| + push_rcu(stack, node->bit[1], &len); |
| + kfree(node); |
| + } |
| +} |
| + |
| +static void root_remove_peer_lists(struct allowedips_node *root) |
| +{ |
| + struct allowedips_node *node, *stack[128] = { root }; |
| + unsigned int len = 1; |
| + |
| + while (len > 0 && (node = stack[--len])) { |
| + push_rcu(stack, node->bit[0], &len); |
| + push_rcu(stack, node->bit[1], &len); |
| + if (rcu_access_pointer(node->peer)) |
| + list_del(&node->peer_list); |
| + } |
| +} |
| + |
| +static void walk_remove_by_peer(struct allowedips_node __rcu **top, |
| + struct wg_peer *peer, struct mutex *lock) |
| +{ |
| +#define REF(p) rcu_access_pointer(p) |
| +#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) |
| +#define PUSH(p) ({ \ |
| + WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ |
| + stack[len++] = p; \ |
| + }) |
| + |
| + struct allowedips_node __rcu **stack[128], **nptr; |
| + struct allowedips_node *node, *prev; |
| + unsigned int len; |
| + |
| + if (unlikely(!peer || !REF(*top))) |
| + return; |
| + |
| + for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { |
| + nptr = stack[len - 1]; |
| + node = DEREF(nptr); |
| + if (!node) { |
| + --len; |
| + continue; |
| + } |
| + if (!prev || REF(prev->bit[0]) == node || |
| + REF(prev->bit[1]) == node) { |
| + if (REF(node->bit[0])) |
| + PUSH(&node->bit[0]); |
| + else if (REF(node->bit[1])) |
| + PUSH(&node->bit[1]); |
| + } else if (REF(node->bit[0]) == prev) { |
| + if (REF(node->bit[1])) |
| + PUSH(&node->bit[1]); |
| + } else { |
| + if (rcu_dereference_protected(node->peer, |
| + lockdep_is_held(lock)) == peer) { |
| + RCU_INIT_POINTER(node->peer, NULL); |
| + list_del_init(&node->peer_list); |
| + if (!node->bit[0] || !node->bit[1]) { |
| + rcu_assign_pointer(*nptr, DEREF( |
| + &node->bit[!REF(node->bit[0])])); |
| + call_rcu(&node->rcu, node_free_rcu); |
| + node = DEREF(nptr); |
| + } |
| + } |
| + --len; |
| + } |
| + } |
| + |
| +#undef REF |
| +#undef DEREF |
| +#undef PUSH |
| +} |
| + |
| +static unsigned int fls128(u64 a, u64 b) |
| +{ |
| + return a ? fls64(a) + 64U : fls64(b); |
| +} |
| + |
| +static u8 common_bits(const struct allowedips_node *node, const u8 *key, |
| + u8 bits) |
| +{ |
| + if (bits == 32) |
| + return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); |
| + else if (bits == 128) |
| + return 128U - fls128( |
| + *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], |
| + *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); |
| + return 0; |
| +} |
| + |
| +static bool prefix_matches(const struct allowedips_node *node, const u8 *key, |
| + u8 bits) |
| +{ |
| + /* This could be much faster if it actually just compared the common |
| + * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and |
| + * the rest, but it turns out that common_bits is already super fast on |
| + * modern processors, even taking into account the unfortunate bswap. |
| + * So, we just inline it like this instead. |
| + */ |
| + return common_bits(node, key, bits) >= node->cidr; |
| +} |
| + |
| +static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, |
| + const u8 *key) |
| +{ |
| + struct allowedips_node *node = trie, *found = NULL; |
| + |
| + while (node && prefix_matches(node, key, bits)) { |
| + if (rcu_access_pointer(node->peer)) |
| + found = node; |
| + if (node->cidr == bits) |
| + break; |
| + node = rcu_dereference_bh(CHOOSE_NODE(node, key)); |
| + } |
| + return found; |
| +} |
| + |
| +/* Returns a strong reference to a peer */ |
| +static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, |
| + const void *be_ip) |
| +{ |
| + /* Aligned so it can be passed to fls/fls64 */ |
| + u8 ip[16] __aligned(__alignof(u64)); |
| + struct allowedips_node *node; |
| + struct wg_peer *peer = NULL; |
| + |
| + swap_endian(ip, be_ip, bits); |
| + |
| + rcu_read_lock_bh(); |
| +retry: |
| + node = find_node(rcu_dereference_bh(root), bits, ip); |
| + if (node) { |
| + peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); |
| + if (!peer) |
| + goto retry; |
| + } |
| + rcu_read_unlock_bh(); |
| + return peer; |
| +} |
| + |
| +static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, |
| + u8 cidr, u8 bits, struct allowedips_node **rnode, |
| + struct mutex *lock) |
| +{ |
| + struct allowedips_node *node = rcu_dereference_protected(trie, |
| + lockdep_is_held(lock)); |
| + struct allowedips_node *parent = NULL; |
| + bool exact = false; |
| + |
| + while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { |
| + parent = node; |
| + if (parent->cidr == cidr) { |
| + exact = true; |
| + break; |
| + } |
| + node = rcu_dereference_protected(CHOOSE_NODE(parent, key), |
| + lockdep_is_held(lock)); |
| + } |
| + *rnode = parent; |
| + return exact; |
| +} |
| + |
| +static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, |
| + u8 cidr, struct wg_peer *peer, struct mutex *lock) |
| +{ |
| + struct allowedips_node *node, *parent, *down, *newnode; |
| + |
| + if (unlikely(cidr > bits || !peer)) |
| + return -EINVAL; |
| + |
| + if (!rcu_access_pointer(*trie)) { |
| + node = kzalloc(sizeof(*node), GFP_KERNEL); |
| + if (unlikely(!node)) |
| + return -ENOMEM; |
| + RCU_INIT_POINTER(node->peer, peer); |
| + list_add_tail(&node->peer_list, &peer->allowedips_list); |
| + copy_and_assign_cidr(node, key, cidr, bits); |
| + rcu_assign_pointer(*trie, node); |
| + return 0; |
| + } |
| + if (node_placement(*trie, key, cidr, bits, &node, lock)) { |
| + rcu_assign_pointer(node->peer, peer); |
| + list_move_tail(&node->peer_list, &peer->allowedips_list); |
| + return 0; |
| + } |
| + |
| + newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); |
| + if (unlikely(!newnode)) |
| + return -ENOMEM; |
| + RCU_INIT_POINTER(newnode->peer, peer); |
| + list_add_tail(&newnode->peer_list, &peer->allowedips_list); |
| + copy_and_assign_cidr(newnode, key, cidr, bits); |
| + |
| + if (!node) { |
| + down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); |
| + } else { |
| + down = rcu_dereference_protected(CHOOSE_NODE(node, key), |
| + lockdep_is_held(lock)); |
| + if (!down) { |
| + rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); |
| + return 0; |
| + } |
| + } |
| + cidr = min(cidr, common_bits(down, key, bits)); |
| + parent = node; |
| + |
| + if (newnode->cidr == cidr) { |
| + rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); |
| + if (!parent) |
| + rcu_assign_pointer(*trie, newnode); |
| + else |
| + rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), |
| + newnode); |
| + } else { |
| + node = kzalloc(sizeof(*node), GFP_KERNEL); |
| + if (unlikely(!node)) { |
| + kfree(newnode); |
| + return -ENOMEM; |
| + } |
| + INIT_LIST_HEAD(&node->peer_list); |
| + copy_and_assign_cidr(node, newnode->bits, cidr, bits); |
| + |
| + rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); |
| + rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); |
| + if (!parent) |
| + rcu_assign_pointer(*trie, node); |
| + else |
| + rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), |
| + node); |
| + } |
| + return 0; |
| +} |
| + |
| +void wg_allowedips_init(struct allowedips *table) |
| +{ |
| + table->root4 = table->root6 = NULL; |
| + table->seq = 1; |
| +} |
| + |
| +void wg_allowedips_free(struct allowedips *table, struct mutex *lock) |
| +{ |
| + struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; |
| + |
| + ++table->seq; |
| + RCU_INIT_POINTER(table->root4, NULL); |
| + RCU_INIT_POINTER(table->root6, NULL); |
| + if (rcu_access_pointer(old4)) { |
| + struct allowedips_node *node = rcu_dereference_protected(old4, |
| + lockdep_is_held(lock)); |
| + |
| + root_remove_peer_lists(node); |
| + call_rcu(&node->rcu, root_free_rcu); |
| + } |
| + if (rcu_access_pointer(old6)) { |
| + struct allowedips_node *node = rcu_dereference_protected(old6, |
| + lockdep_is_held(lock)); |
| + |
| + root_remove_peer_lists(node); |
| + call_rcu(&node->rcu, root_free_rcu); |
| + } |
| +} |
| + |
| +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, |
| + u8 cidr, struct wg_peer *peer, struct mutex *lock) |
| +{ |
| + /* Aligned so it can be passed to fls */ |
| + u8 key[4] __aligned(__alignof(u32)); |
| + |
| + ++table->seq; |
| + swap_endian(key, (const u8 *)ip, 32); |
| + return add(&table->root4, 32, key, cidr, peer, lock); |
| +} |
| + |
| +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, |
| + u8 cidr, struct wg_peer *peer, struct mutex *lock) |
| +{ |
| + /* Aligned so it can be passed to fls64 */ |
| + u8 key[16] __aligned(__alignof(u64)); |
| + |
| + ++table->seq; |
| + swap_endian(key, (const u8 *)ip, 128); |
| + return add(&table->root6, 128, key, cidr, peer, lock); |
| +} |
| + |
| +void wg_allowedips_remove_by_peer(struct allowedips *table, |
| + struct wg_peer *peer, struct mutex *lock) |
| +{ |
| + ++table->seq; |
| + walk_remove_by_peer(&table->root4, peer, lock); |
| + walk_remove_by_peer(&table->root6, peer, lock); |
| +} |
| + |
| +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) |
| +{ |
| + const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); |
| + swap_endian(ip, node->bits, node->bitlen); |
| + memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); |
| + if (node->cidr) |
| + ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); |
| + |
| + *cidr = node->cidr; |
| + return node->bitlen == 32 ? AF_INET : AF_INET6; |
| +} |
| + |
| +/* Returns a strong reference to a peer */ |
| +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, |
| + struct sk_buff *skb) |
| +{ |
| + if (skb->protocol == htons(ETH_P_IP)) |
| + return lookup(table->root4, 32, &ip_hdr(skb)->daddr); |
| + else if (skb->protocol == htons(ETH_P_IPV6)) |
| + return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); |
| + return NULL; |
| +} |
| + |
| +/* Returns a strong reference to a peer */ |
| +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, |
| + struct sk_buff *skb) |
| +{ |
| + if (skb->protocol == htons(ETH_P_IP)) |
| + return lookup(table->root4, 32, &ip_hdr(skb)->saddr); |
| + else if (skb->protocol == htons(ETH_P_IPV6)) |
| + return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); |
| + return NULL; |
| +} |
| + |
| +#include "selftest/allowedips.c" |
| diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h |
| new file mode 100644 |
| index 000000000000..e5c83cafcef4 |
| |
| |
| @@ -0,0 +1,59 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_ALLOWEDIPS_H |
| +#define _WG_ALLOWEDIPS_H |
| + |
| +#include <linux/mutex.h> |
| +#include <linux/ip.h> |
| +#include <linux/ipv6.h> |
| + |
| +struct wg_peer; |
| + |
| +struct allowedips_node { |
| + struct wg_peer __rcu *peer; |
| + struct allowedips_node __rcu *bit[2]; |
| + /* While it may seem scandalous that we waste space for v4, |
| + * we're alloc'ing to the nearest power of 2 anyway, so this |
| + * doesn't actually make a difference. |
| + */ |
| + u8 bits[16] __aligned(__alignof(u64)); |
| + u8 cidr, bit_at_a, bit_at_b, bitlen; |
| + |
| + /* Keep rarely used list at bottom to be beyond cache line. */ |
| + union { |
| + struct list_head peer_list; |
| + struct rcu_head rcu; |
| + }; |
| +}; |
| + |
| +struct allowedips { |
| + struct allowedips_node __rcu *root4; |
| + struct allowedips_node __rcu *root6; |
| + u64 seq; |
| +}; |
| + |
| +void wg_allowedips_init(struct allowedips *table); |
| +void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); |
| +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, |
| + u8 cidr, struct wg_peer *peer, struct mutex *lock); |
| +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, |
| + u8 cidr, struct wg_peer *peer, struct mutex *lock); |
| +void wg_allowedips_remove_by_peer(struct allowedips *table, |
| + struct wg_peer *peer, struct mutex *lock); |
| +/* The ip input pointer should be __aligned(__alignof(u64))) */ |
| +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr); |
| + |
| +/* These return a strong reference to a peer: */ |
| +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, |
| + struct sk_buff *skb); |
| +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, |
| + struct sk_buff *skb); |
| + |
| +#ifdef DEBUG |
| +bool wg_allowedips_selftest(void); |
| +#endif |
| + |
| +#endif /* _WG_ALLOWEDIPS_H */ |
| diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c |
| new file mode 100644 |
| index 000000000000..4956f0499c19 |
| |
| |
| @@ -0,0 +1,236 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "cookie.h" |
| +#include "peer.h" |
| +#include "device.h" |
| +#include "messages.h" |
| +#include "ratelimiter.h" |
| +#include "timers.h" |
| + |
| +#include <crypto/blake2s.h> |
| +#include <crypto/chacha20poly1305.h> |
| + |
| +#include <net/ipv6.h> |
| +#include <crypto/algapi.h> |
| + |
| +void wg_cookie_checker_init(struct cookie_checker *checker, |
| + struct wg_device *wg) |
| +{ |
| + init_rwsem(&checker->secret_lock); |
| + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); |
| + get_random_bytes(checker->secret, NOISE_HASH_LEN); |
| + checker->device = wg; |
| +} |
| + |
| +enum { COOKIE_KEY_LABEL_LEN = 8 }; |
| +static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----"; |
| +static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--"; |
| + |
| +static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], |
| + const u8 pubkey[NOISE_PUBLIC_KEY_LEN], |
| + const u8 label[COOKIE_KEY_LABEL_LEN]) |
| +{ |
| + struct blake2s_state blake; |
| + |
| + blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); |
| + blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); |
| + blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); |
| + blake2s_final(&blake, key); |
| +} |
| + |
| +/* Must hold peer->handshake.static_identity->lock */ |
| +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) |
| +{ |
| + if (likely(checker->device->static_identity.has_identity)) { |
| + precompute_key(checker->cookie_encryption_key, |
| + checker->device->static_identity.static_public, |
| + cookie_key_label); |
| + precompute_key(checker->message_mac1_key, |
| + checker->device->static_identity.static_public, |
| + mac1_key_label); |
| + } else { |
| + memset(checker->cookie_encryption_key, 0, |
| + NOISE_SYMMETRIC_KEY_LEN); |
| + memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); |
| + } |
| +} |
| + |
| +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) |
| +{ |
| + precompute_key(peer->latest_cookie.cookie_decryption_key, |
| + peer->handshake.remote_static, cookie_key_label); |
| + precompute_key(peer->latest_cookie.message_mac1_key, |
| + peer->handshake.remote_static, mac1_key_label); |
| +} |
| + |
| +void wg_cookie_init(struct cookie *cookie) |
| +{ |
| + memset(cookie, 0, sizeof(*cookie)); |
| + init_rwsem(&cookie->lock); |
| +} |
| + |
| +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, |
| + const u8 key[NOISE_SYMMETRIC_KEY_LEN]) |
| +{ |
| + len = len - sizeof(struct message_macs) + |
| + offsetof(struct message_macs, mac1); |
| + blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); |
| +} |
| + |
| +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, |
| + const u8 cookie[COOKIE_LEN]) |
| +{ |
| + len = len - sizeof(struct message_macs) + |
| + offsetof(struct message_macs, mac2); |
| + blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); |
| +} |
| + |
| +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, |
| + struct cookie_checker *checker) |
| +{ |
| + struct blake2s_state state; |
| + |
| + if (wg_birthdate_has_expired(checker->secret_birthdate, |
| + COOKIE_SECRET_MAX_AGE)) { |
| + down_write(&checker->secret_lock); |
| + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); |
| + get_random_bytes(checker->secret, NOISE_HASH_LEN); |
| + up_write(&checker->secret_lock); |
| + } |
| + |
| + down_read(&checker->secret_lock); |
| + |
| + blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); |
| + if (skb->protocol == htons(ETH_P_IP)) |
| + blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, |
| + sizeof(struct in_addr)); |
| + else if (skb->protocol == htons(ETH_P_IPV6)) |
| + blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, |
| + sizeof(struct in6_addr)); |
| + blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); |
| + blake2s_final(&state, cookie); |
| + |
| + up_read(&checker->secret_lock); |
| +} |
| + |
| +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, |
| + struct sk_buff *skb, |
| + bool check_cookie) |
| +{ |
| + struct message_macs *macs = (struct message_macs *) |
| + (skb->data + skb->len - sizeof(*macs)); |
| + enum cookie_mac_state ret; |
| + u8 computed_mac[COOKIE_LEN]; |
| + u8 cookie[COOKIE_LEN]; |
| + |
| + ret = INVALID_MAC; |
| + compute_mac1(computed_mac, skb->data, skb->len, |
| + checker->message_mac1_key); |
| + if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) |
| + goto out; |
| + |
| + ret = VALID_MAC_BUT_NO_COOKIE; |
| + |
| + if (!check_cookie) |
| + goto out; |
| + |
| + make_cookie(cookie, skb, checker); |
| + |
| + compute_mac2(computed_mac, skb->data, skb->len, cookie); |
| + if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) |
| + goto out; |
| + |
| + ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; |
| + if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) |
| + goto out; |
| + |
| + ret = VALID_MAC_WITH_COOKIE; |
| + |
| +out: |
| + return ret; |
| +} |
| + |
| +void wg_cookie_add_mac_to_packet(void *message, size_t len, |
| + struct wg_peer *peer) |
| +{ |
| + struct message_macs *macs = (struct message_macs *) |
| + ((u8 *)message + len - sizeof(*macs)); |
| + |
| + down_write(&peer->latest_cookie.lock); |
| + compute_mac1(macs->mac1, message, len, |
| + peer->latest_cookie.message_mac1_key); |
| + memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); |
| + peer->latest_cookie.have_sent_mac1 = true; |
| + up_write(&peer->latest_cookie.lock); |
| + |
| + down_read(&peer->latest_cookie.lock); |
| + if (peer->latest_cookie.is_valid && |
| + !wg_birthdate_has_expired(peer->latest_cookie.birthdate, |
| + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) |
| + compute_mac2(macs->mac2, message, len, |
| + peer->latest_cookie.cookie); |
| + else |
| + memset(macs->mac2, 0, COOKIE_LEN); |
| + up_read(&peer->latest_cookie.lock); |
| +} |
| + |
| +void wg_cookie_message_create(struct message_handshake_cookie *dst, |
| + struct sk_buff *skb, __le32 index, |
| + struct cookie_checker *checker) |
| +{ |
| + struct message_macs *macs = (struct message_macs *) |
| + ((u8 *)skb->data + skb->len - sizeof(*macs)); |
| + u8 cookie[COOKIE_LEN]; |
| + |
| + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); |
| + dst->receiver_index = index; |
| + get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); |
| + |
| + make_cookie(cookie, skb, checker); |
| + xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, |
| + macs->mac1, COOKIE_LEN, dst->nonce, |
| + checker->cookie_encryption_key); |
| +} |
| + |
| +void wg_cookie_message_consume(struct message_handshake_cookie *src, |
| + struct wg_device *wg) |
| +{ |
| + struct wg_peer *peer = NULL; |
| + u8 cookie[COOKIE_LEN]; |
| + bool ret; |
| + |
| + if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, |
| + INDEX_HASHTABLE_HANDSHAKE | |
| + INDEX_HASHTABLE_KEYPAIR, |
| + src->receiver_index, &peer))) |
| + return; |
| + |
| + down_read(&peer->latest_cookie.lock); |
| + if (unlikely(!peer->latest_cookie.have_sent_mac1)) { |
| + up_read(&peer->latest_cookie.lock); |
| + goto out; |
| + } |
| + ret = xchacha20poly1305_decrypt( |
| + cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), |
| + peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, |
| + peer->latest_cookie.cookie_decryption_key); |
| + up_read(&peer->latest_cookie.lock); |
| + |
| + if (ret) { |
| + down_write(&peer->latest_cookie.lock); |
| + memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); |
| + peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); |
| + peer->latest_cookie.is_valid = true; |
| + peer->latest_cookie.have_sent_mac1 = false; |
| + up_write(&peer->latest_cookie.lock); |
| + } else { |
| + net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", |
| + wg->dev->name); |
| + } |
| + |
| +out: |
| + wg_peer_put(peer); |
| +} |
| diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h |
| new file mode 100644 |
| index 000000000000..c4bd61ca03f2 |
| |
| |
| @@ -0,0 +1,59 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_COOKIE_H |
| +#define _WG_COOKIE_H |
| + |
| +#include "messages.h" |
| +#include <linux/rwsem.h> |
| + |
| +struct wg_peer; |
| + |
| +struct cookie_checker { |
| + u8 secret[NOISE_HASH_LEN]; |
| + u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN]; |
| + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; |
| + u64 secret_birthdate; |
| + struct rw_semaphore secret_lock; |
| + struct wg_device *device; |
| +}; |
| + |
| +struct cookie { |
| + u64 birthdate; |
| + bool is_valid; |
| + u8 cookie[COOKIE_LEN]; |
| + bool have_sent_mac1; |
| + u8 last_mac1_sent[COOKIE_LEN]; |
| + u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN]; |
| + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; |
| + struct rw_semaphore lock; |
| +}; |
| + |
| +enum cookie_mac_state { |
| + INVALID_MAC, |
| + VALID_MAC_BUT_NO_COOKIE, |
| + VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, |
| + VALID_MAC_WITH_COOKIE |
| +}; |
| + |
| +void wg_cookie_checker_init(struct cookie_checker *checker, |
| + struct wg_device *wg); |
| +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker); |
| +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer); |
| +void wg_cookie_init(struct cookie *cookie); |
| + |
| +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, |
| + struct sk_buff *skb, |
| + bool check_cookie); |
| +void wg_cookie_add_mac_to_packet(void *message, size_t len, |
| + struct wg_peer *peer); |
| + |
| +void wg_cookie_message_create(struct message_handshake_cookie *src, |
| + struct sk_buff *skb, __le32 index, |
| + struct cookie_checker *checker); |
| +void wg_cookie_message_consume(struct message_handshake_cookie *src, |
| + struct wg_device *wg); |
| + |
| +#endif /* _WG_COOKIE_H */ |
| diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c |
| new file mode 100644 |
| index 000000000000..16b19824b9ad |
| |
| |
| @@ -0,0 +1,458 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "queueing.h" |
| +#include "socket.h" |
| +#include "timers.h" |
| +#include "device.h" |
| +#include "ratelimiter.h" |
| +#include "peer.h" |
| +#include "messages.h" |
| + |
| +#include <linux/module.h> |
| +#include <linux/rtnetlink.h> |
| +#include <linux/inet.h> |
| +#include <linux/netdevice.h> |
| +#include <linux/inetdevice.h> |
| +#include <linux/if_arp.h> |
| +#include <linux/icmp.h> |
| +#include <linux/suspend.h> |
| +#include <net/icmp.h> |
| +#include <net/rtnetlink.h> |
| +#include <net/ip_tunnels.h> |
| +#include <net/addrconf.h> |
| + |
| +static LIST_HEAD(device_list); |
| + |
| +static int wg_open(struct net_device *dev) |
| +{ |
| + struct in_device *dev_v4 = __in_dev_get_rtnl(dev); |
| + struct inet6_dev *dev_v6 = __in6_dev_get(dev); |
| + struct wg_device *wg = netdev_priv(dev); |
| + struct wg_peer *peer; |
| + int ret; |
| + |
| + if (dev_v4) { |
| + /* At some point we might put this check near the ip_rt_send_ |
| + * redirect call of ip_forward in net/ipv4/ip_forward.c, similar |
| + * to the current secpath check. |
| + */ |
| + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); |
| + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; |
| + } |
| + if (dev_v6) |
| + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; |
| + |
| + ret = wg_socket_init(wg, wg->incoming_port); |
| + if (ret < 0) |
| + return ret; |
| + mutex_lock(&wg->device_update_lock); |
| + list_for_each_entry(peer, &wg->peer_list, peer_list) { |
| + wg_packet_send_staged_packets(peer); |
| + if (peer->persistent_keepalive_interval) |
| + wg_packet_send_keepalive(peer); |
| + } |
| + mutex_unlock(&wg->device_update_lock); |
| + return 0; |
| +} |
| + |
| +#ifdef CONFIG_PM_SLEEP |
| +static int wg_pm_notification(struct notifier_block *nb, unsigned long action, |
| + void *data) |
| +{ |
| + struct wg_device *wg; |
| + struct wg_peer *peer; |
| + |
| + /* If the machine is constantly suspending and resuming, as part of |
| + * its normal operation rather than as a somewhat rare event, then we |
| + * don't actually want to clear keys. |
| + */ |
| + if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID)) |
| + return 0; |
| + |
| + if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) |
| + return 0; |
| + |
| + rtnl_lock(); |
| + list_for_each_entry(wg, &device_list, device_list) { |
| + mutex_lock(&wg->device_update_lock); |
| + list_for_each_entry(peer, &wg->peer_list, peer_list) { |
| + del_timer(&peer->timer_zero_key_material); |
| + wg_noise_handshake_clear(&peer->handshake); |
| + wg_noise_keypairs_clear(&peer->keypairs); |
| + } |
| + mutex_unlock(&wg->device_update_lock); |
| + } |
| + rtnl_unlock(); |
| + rcu_barrier(); |
| + return 0; |
| +} |
| + |
| +static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; |
| +#endif |
| + |
| +static int wg_stop(struct net_device *dev) |
| +{ |
| + struct wg_device *wg = netdev_priv(dev); |
| + struct wg_peer *peer; |
| + |
| + mutex_lock(&wg->device_update_lock); |
| + list_for_each_entry(peer, &wg->peer_list, peer_list) { |
| + wg_packet_purge_staged_packets(peer); |
| + wg_timers_stop(peer); |
| + wg_noise_handshake_clear(&peer->handshake); |
| + wg_noise_keypairs_clear(&peer->keypairs); |
| + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); |
| + } |
| + mutex_unlock(&wg->device_update_lock); |
| + skb_queue_purge(&wg->incoming_handshakes); |
| + wg_socket_reinit(wg, NULL, NULL); |
| + return 0; |
| +} |
| + |
| +static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) |
| +{ |
| + struct wg_device *wg = netdev_priv(dev); |
| + struct sk_buff_head packets; |
| + struct wg_peer *peer; |
| + struct sk_buff *next; |
| + sa_family_t family; |
| + u32 mtu; |
| + int ret; |
| + |
| + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { |
| + ret = -EPROTONOSUPPORT; |
| + net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); |
| + goto err; |
| + } |
| + |
| + peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb); |
| + if (unlikely(!peer)) { |
| + ret = -ENOKEY; |
| + if (skb->protocol == htons(ETH_P_IP)) |
| + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n", |
| + dev->name, &ip_hdr(skb)->daddr); |
| + else if (skb->protocol == htons(ETH_P_IPV6)) |
| + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", |
| + dev->name, &ipv6_hdr(skb)->daddr); |
| + goto err; |
| + } |
| + |
| + family = READ_ONCE(peer->endpoint.addr.sa_family); |
| + if (unlikely(family != AF_INET && family != AF_INET6)) { |
| + ret = -EDESTADDRREQ; |
| + net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", |
| + dev->name, peer->internal_id); |
| + goto err_peer; |
| + } |
| + |
| + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
| + |
| + __skb_queue_head_init(&packets); |
| + if (!skb_is_gso(skb)) { |
| + skb_mark_not_on_list(skb); |
| + } else { |
| + struct sk_buff *segs = skb_gso_segment(skb, 0); |
| + |
| + if (unlikely(IS_ERR(segs))) { |
| + ret = PTR_ERR(segs); |
| + goto err_peer; |
| + } |
| + dev_kfree_skb(skb); |
| + skb = segs; |
| + } |
| + |
| + skb_list_walk_safe(skb, skb, next) { |
| + skb_mark_not_on_list(skb); |
| + |
| + skb = skb_share_check(skb, GFP_ATOMIC); |
| + if (unlikely(!skb)) |
| + continue; |
| + |
| + /* We only need to keep the original dst around for icmp, |
| + * so at this point we're in a position to drop it. |
| + */ |
| + skb_dst_drop(skb); |
| + |
| + PACKET_CB(skb)->mtu = mtu; |
| + |
| + __skb_queue_tail(&packets, skb); |
| + } |
| + |
| + spin_lock_bh(&peer->staged_packet_queue.lock); |
| + /* If the queue is getting too big, we start removing the oldest packets |
| + * until it's small again. We do this before adding the new packet, so |
| + * we don't remove GSO segments that are in excess. |
| + */ |
| + while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { |
| + dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); |
| + ++dev->stats.tx_dropped; |
| + } |
| + skb_queue_splice_tail(&packets, &peer->staged_packet_queue); |
| + spin_unlock_bh(&peer->staged_packet_queue.lock); |
| + |
| + wg_packet_send_staged_packets(peer); |
| + |
| + wg_peer_put(peer); |
| + return NETDEV_TX_OK; |
| + |
| +err_peer: |
| + wg_peer_put(peer); |
| +err: |
| + ++dev->stats.tx_errors; |
| + if (skb->protocol == htons(ETH_P_IP)) |
| + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
| + else if (skb->protocol == htons(ETH_P_IPV6)) |
| + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); |
| + kfree_skb(skb); |
| + return ret; |
| +} |
| + |
| +static const struct net_device_ops netdev_ops = { |
| + .ndo_open = wg_open, |
| + .ndo_stop = wg_stop, |
| + .ndo_start_xmit = wg_xmit, |
| + .ndo_get_stats64 = ip_tunnel_get_stats64 |
| +}; |
| + |
| +static void wg_destruct(struct net_device *dev) |
| +{ |
| + struct wg_device *wg = netdev_priv(dev); |
| + |
| + rtnl_lock(); |
| + list_del(&wg->device_list); |
| + rtnl_unlock(); |
| + mutex_lock(&wg->device_update_lock); |
| + wg->incoming_port = 0; |
| + wg_socket_reinit(wg, NULL, NULL); |
| + /* The final references are cleared in the below calls to destroy_workqueue. */ |
| + wg_peer_remove_all(wg); |
| + destroy_workqueue(wg->handshake_receive_wq); |
| + destroy_workqueue(wg->handshake_send_wq); |
| + destroy_workqueue(wg->packet_crypt_wq); |
| + wg_packet_queue_free(&wg->decrypt_queue, true); |
| + wg_packet_queue_free(&wg->encrypt_queue, true); |
| + rcu_barrier(); /* Wait for all the peers to be actually freed. */ |
| + wg_ratelimiter_uninit(); |
| + memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); |
| + skb_queue_purge(&wg->incoming_handshakes); |
| + free_percpu(dev->tstats); |
| + free_percpu(wg->incoming_handshakes_worker); |
| + if (wg->have_creating_net_ref) |
| + put_net(wg->creating_net); |
| + kvfree(wg->index_hashtable); |
| + kvfree(wg->peer_hashtable); |
| + mutex_unlock(&wg->device_update_lock); |
| + |
| + pr_debug("%s: Interface deleted\n", dev->name); |
| + free_netdev(dev); |
| +} |
| + |
| +static const struct device_type device_type = { .name = KBUILD_MODNAME }; |
| + |
| +static void wg_setup(struct net_device *dev) |
| +{ |
| + struct wg_device *wg = netdev_priv(dev); |
| + enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | |
| + NETIF_F_SG | NETIF_F_GSO | |
| + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; |
| + |
| + dev->netdev_ops = &netdev_ops; |
| + dev->hard_header_len = 0; |
| + dev->addr_len = 0; |
| + dev->needed_headroom = DATA_PACKET_HEAD_ROOM; |
| + dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); |
| + dev->type = ARPHRD_NONE; |
| + dev->flags = IFF_POINTOPOINT | IFF_NOARP; |
| + dev->priv_flags |= IFF_NO_QUEUE; |
| + dev->features |= NETIF_F_LLTX; |
| + dev->features |= WG_NETDEV_FEATURES; |
| + dev->hw_features |= WG_NETDEV_FEATURES; |
| + dev->hw_enc_features |= WG_NETDEV_FEATURES; |
| + dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - |
| + sizeof(struct udphdr) - |
| + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); |
| + |
| + SET_NETDEV_DEVTYPE(dev, &device_type); |
| + |
| + /* We need to keep the dst around in case of icmp replies. */ |
| + netif_keep_dst(dev); |
| + |
| + memset(wg, 0, sizeof(*wg)); |
| + wg->dev = dev; |
| +} |
| + |
| +static int wg_newlink(struct net *src_net, struct net_device *dev, |
| + struct nlattr *tb[], struct nlattr *data[], |
| + struct netlink_ext_ack *extack) |
| +{ |
| + struct wg_device *wg = netdev_priv(dev); |
| + int ret = -ENOMEM; |
| + |
| + wg->creating_net = src_net; |
| + init_rwsem(&wg->static_identity.lock); |
| + mutex_init(&wg->socket_update_lock); |
| + mutex_init(&wg->device_update_lock); |
| + skb_queue_head_init(&wg->incoming_handshakes); |
| + wg_allowedips_init(&wg->peer_allowedips); |
| + wg_cookie_checker_init(&wg->cookie_checker, wg); |
| + INIT_LIST_HEAD(&wg->peer_list); |
| + wg->device_update_gen = 1; |
| + |
| + wg->peer_hashtable = wg_pubkey_hashtable_alloc(); |
| + if (!wg->peer_hashtable) |
| + return ret; |
| + |
| + wg->index_hashtable = wg_index_hashtable_alloc(); |
| + if (!wg->index_hashtable) |
| + goto err_free_peer_hashtable; |
| + |
| + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); |
| + if (!dev->tstats) |
| + goto err_free_index_hashtable; |
| + |
| + wg->incoming_handshakes_worker = |
| + wg_packet_percpu_multicore_worker_alloc( |
| + wg_packet_handshake_receive_worker, wg); |
| + if (!wg->incoming_handshakes_worker) |
| + goto err_free_tstats; |
| + |
| + wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", |
| + WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); |
| + if (!wg->handshake_receive_wq) |
| + goto err_free_incoming_handshakes; |
| + |
| + wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", |
| + WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); |
| + if (!wg->handshake_send_wq) |
| + goto err_destroy_handshake_receive; |
| + |
| + wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", |
| + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); |
| + if (!wg->packet_crypt_wq) |
| + goto err_destroy_handshake_send; |
| + |
| + ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, |
| + true, MAX_QUEUED_PACKETS); |
| + if (ret < 0) |
| + goto err_destroy_packet_crypt; |
| + |
| + ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, |
| + true, MAX_QUEUED_PACKETS); |
| + if (ret < 0) |
| + goto err_free_encrypt_queue; |
| + |
| + ret = wg_ratelimiter_init(); |
| + if (ret < 0) |
| + goto err_free_decrypt_queue; |
| + |
| + ret = register_netdevice(dev); |
| + if (ret < 0) |
| + goto err_uninit_ratelimiter; |
| + |
| + list_add(&wg->device_list, &device_list); |
| + |
| + /* We wait until the end to assign priv_destructor, so that |
| + * register_netdevice doesn't call it for us if it fails. |
| + */ |
| + dev->priv_destructor = wg_destruct; |
| + |
| + pr_debug("%s: Interface created\n", dev->name); |
| + return ret; |
| + |
| +err_uninit_ratelimiter: |
| + wg_ratelimiter_uninit(); |
| +err_free_decrypt_queue: |
| + wg_packet_queue_free(&wg->decrypt_queue, true); |
| +err_free_encrypt_queue: |
| + wg_packet_queue_free(&wg->encrypt_queue, true); |
| +err_destroy_packet_crypt: |
| + destroy_workqueue(wg->packet_crypt_wq); |
| +err_destroy_handshake_send: |
| + destroy_workqueue(wg->handshake_send_wq); |
| +err_destroy_handshake_receive: |
| + destroy_workqueue(wg->handshake_receive_wq); |
| +err_free_incoming_handshakes: |
| + free_percpu(wg->incoming_handshakes_worker); |
| +err_free_tstats: |
| + free_percpu(dev->tstats); |
| +err_free_index_hashtable: |
| + kvfree(wg->index_hashtable); |
| +err_free_peer_hashtable: |
| + kvfree(wg->peer_hashtable); |
| + return ret; |
| +} |
| + |
| +static struct rtnl_link_ops link_ops __read_mostly = { |
| + .kind = KBUILD_MODNAME, |
| + .priv_size = sizeof(struct wg_device), |
| + .setup = wg_setup, |
| + .newlink = wg_newlink, |
| +}; |
| + |
| +static int wg_netdevice_notification(struct notifier_block *nb, |
| + unsigned long action, void *data) |
| +{ |
| + struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; |
| + struct wg_device *wg = netdev_priv(dev); |
| + |
| + ASSERT_RTNL(); |
| + |
| + if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) |
| + return 0; |
| + |
| + if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { |
| + put_net(wg->creating_net); |
| + wg->have_creating_net_ref = false; |
| + } else if (dev_net(dev) != wg->creating_net && |
| + !wg->have_creating_net_ref) { |
| + wg->have_creating_net_ref = true; |
| + get_net(wg->creating_net); |
| + } |
| + return 0; |
| +} |
| + |
| +static struct notifier_block netdevice_notifier = { |
| + .notifier_call = wg_netdevice_notification |
| +}; |
| + |
| +int __init wg_device_init(void) |
| +{ |
| + int ret; |
| + |
| +#ifdef CONFIG_PM_SLEEP |
| + ret = register_pm_notifier(&pm_notifier); |
| + if (ret) |
| + return ret; |
| +#endif |
| + |
| + ret = register_netdevice_notifier(&netdevice_notifier); |
| + if (ret) |
| + goto error_pm; |
| + |
| + ret = rtnl_link_register(&link_ops); |
| + if (ret) |
| + goto error_netdevice; |
| + |
| + return 0; |
| + |
| +error_netdevice: |
| + unregister_netdevice_notifier(&netdevice_notifier); |
| +error_pm: |
| +#ifdef CONFIG_PM_SLEEP |
| + unregister_pm_notifier(&pm_notifier); |
| +#endif |
| + return ret; |
| +} |
| + |
| +void wg_device_uninit(void) |
| +{ |
| + rtnl_link_unregister(&link_ops); |
| + unregister_netdevice_notifier(&netdevice_notifier); |
| +#ifdef CONFIG_PM_SLEEP |
| + unregister_pm_notifier(&pm_notifier); |
| +#endif |
| + rcu_barrier(); |
| +} |
| diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h |
| new file mode 100644 |
| index 000000000000..c91f3051c5c7 |
| |
| |
| @@ -0,0 +1,73 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_DEVICE_H |
| +#define _WG_DEVICE_H |
| + |
| +#include "noise.h" |
| +#include "allowedips.h" |
| +#include "peerlookup.h" |
| +#include "cookie.h" |
| + |
| +#include <linux/types.h> |
| +#include <linux/netdevice.h> |
| +#include <linux/workqueue.h> |
| +#include <linux/mutex.h> |
| +#include <linux/net.h> |
| +#include <linux/ptr_ring.h> |
| + |
| +struct wg_device; |
| + |
| +struct multicore_worker { |
| + void *ptr; |
| + struct work_struct work; |
| +}; |
| + |
| +struct crypt_queue { |
| + struct ptr_ring ring; |
| + union { |
| + struct { |
| + struct multicore_worker __percpu *worker; |
| + int last_cpu; |
| + }; |
| + struct work_struct work; |
| + }; |
| +}; |
| + |
| +struct wg_device { |
| + struct net_device *dev; |
| + struct crypt_queue encrypt_queue, decrypt_queue; |
| + struct sock __rcu *sock4, *sock6; |
| + struct net *creating_net; |
| + struct noise_static_identity static_identity; |
| + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; |
| + struct workqueue_struct *packet_crypt_wq; |
| + struct sk_buff_head incoming_handshakes; |
| + int incoming_handshake_cpu; |
| + struct multicore_worker __percpu *incoming_handshakes_worker; |
| + struct cookie_checker cookie_checker; |
| + struct pubkey_hashtable *peer_hashtable; |
| + struct index_hashtable *index_hashtable; |
| + struct allowedips peer_allowedips; |
| + struct mutex device_update_lock, socket_update_lock; |
| + struct list_head device_list, peer_list; |
| + unsigned int num_peers, device_update_gen; |
| + u32 fwmark; |
| + u16 incoming_port; |
| + bool have_creating_net_ref; |
| +}; |
| + |
| +int wg_device_init(void); |
| +void wg_device_uninit(void); |
| + |
| +/* Later after the dust settles, this can be moved into include/linux/skbuff.h, |
| + * where virtually all code that deals with GSO segs can benefit, around ~30 |
| + * drivers as of writing. |
| + */ |
| +#define skb_list_walk_safe(first, skb, next) \ |
| + for (skb = first, next = skb->next; skb; \ |
| + skb = next, next = skb ? skb->next : NULL) |
| + |
| +#endif /* _WG_DEVICE_H */ |
| diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c |
| new file mode 100644 |
| index 000000000000..10c0a40f6a9e |
| |
| |
| @@ -0,0 +1,64 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "version.h" |
| +#include "device.h" |
| +#include "noise.h" |
| +#include "queueing.h" |
| +#include "ratelimiter.h" |
| +#include "netlink.h" |
| + |
| +#include <uapi/linux/wireguard.h> |
| + |
| +#include <linux/version.h> |
| +#include <linux/init.h> |
| +#include <linux/module.h> |
| +#include <linux/genetlink.h> |
| +#include <net/rtnetlink.h> |
| + |
| +static int __init mod_init(void) |
| +{ |
| + int ret; |
| + |
| +#ifdef DEBUG |
| + if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || |
| + !wg_ratelimiter_selftest()) |
| + return -ENOTRECOVERABLE; |
| +#endif |
| + wg_noise_init(); |
| + |
| + ret = wg_device_init(); |
| + if (ret < 0) |
| + goto err_device; |
| + |
| + ret = wg_genetlink_init(); |
| + if (ret < 0) |
| + goto err_netlink; |
| + |
| + pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); |
| + pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n"); |
| + |
| + return 0; |
| + |
| +err_netlink: |
| + wg_device_uninit(); |
| +err_device: |
| + return ret; |
| +} |
| + |
| +static void __exit mod_exit(void) |
| +{ |
| + wg_genetlink_uninit(); |
| + wg_device_uninit(); |
| +} |
| + |
| +module_init(mod_init); |
| +module_exit(mod_exit); |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_DESCRIPTION("WireGuard secure network tunnel"); |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
| +MODULE_VERSION(WIREGUARD_VERSION); |
| +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); |
| +MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME); |
| diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h |
| new file mode 100644 |
| index 000000000000..b8a7b9ce32ba |
| |
| |
| @@ -0,0 +1,128 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_MESSAGES_H |
| +#define _WG_MESSAGES_H |
| + |
| +#include <crypto/curve25519.h> |
| +#include <crypto/chacha20poly1305.h> |
| +#include <crypto/blake2s.h> |
| + |
| +#include <linux/kernel.h> |
| +#include <linux/param.h> |
| +#include <linux/skbuff.h> |
| + |
| +enum noise_lengths { |
| + NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE, |
| + NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE, |
| + NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), |
| + NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE, |
| + NOISE_HASH_LEN = BLAKE2S_HASH_SIZE |
| +}; |
| + |
| +#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN) |
| + |
| +enum cookie_values { |
| + COOKIE_SECRET_MAX_AGE = 2 * 60, |
| + COOKIE_SECRET_LATENCY = 5, |
| + COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE, |
| + COOKIE_LEN = 16 |
| +}; |
| + |
| +enum counter_values { |
| + COUNTER_BITS_TOTAL = 2048, |
| + COUNTER_REDUNDANT_BITS = BITS_PER_LONG, |
| + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS |
| +}; |
| + |
| +enum limits { |
| + REKEY_AFTER_MESSAGES = 1ULL << 60, |
| + REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, |
| + REKEY_TIMEOUT = 5, |
| + REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, |
| + REKEY_AFTER_TIME = 120, |
| + REJECT_AFTER_TIME = 180, |
| + INITIATIONS_PER_SECOND = 50, |
| + MAX_PEERS_PER_DEVICE = 1U << 20, |
| + KEEPALIVE_TIMEOUT = 10, |
| + MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT, |
| + MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ |
| + MAX_STAGED_PACKETS = 128, |
| + MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ |
| +}; |
| + |
| +enum message_type { |
| + MESSAGE_INVALID = 0, |
| + MESSAGE_HANDSHAKE_INITIATION = 1, |
| + MESSAGE_HANDSHAKE_RESPONSE = 2, |
| + MESSAGE_HANDSHAKE_COOKIE = 3, |
| + MESSAGE_DATA = 4 |
| +}; |
| + |
| +struct message_header { |
| + /* The actual layout of this that we want is: |
| + * u8 type |
| + * u8 reserved_zero[3] |
| + * |
| + * But it turns out that by encoding this as little endian, |
| + * we achieve the same thing, and it makes checking faster. |
| + */ |
| + __le32 type; |
| +}; |
| + |
| +struct message_macs { |
| + u8 mac1[COOKIE_LEN]; |
| + u8 mac2[COOKIE_LEN]; |
| +}; |
| + |
| +struct message_handshake_initiation { |
| + struct message_header header; |
| + __le32 sender_index; |
| + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; |
| + u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; |
| + u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; |
| + struct message_macs macs; |
| +}; |
| + |
| +struct message_handshake_response { |
| + struct message_header header; |
| + __le32 sender_index; |
| + __le32 receiver_index; |
| + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; |
| + u8 encrypted_nothing[noise_encrypted_len(0)]; |
| + struct message_macs macs; |
| +}; |
| + |
| +struct message_handshake_cookie { |
| + struct message_header header; |
| + __le32 receiver_index; |
| + u8 nonce[COOKIE_NONCE_LEN]; |
| + u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; |
| +}; |
| + |
| +struct message_data { |
| + struct message_header header; |
| + __le32 key_idx; |
| + __le64 counter; |
| + u8 encrypted_data[]; |
| +}; |
| + |
| +#define message_data_len(plain_len) \ |
| + (noise_encrypted_len(plain_len) + sizeof(struct message_data)) |
| + |
| +enum message_alignments { |
| + MESSAGE_PADDING_MULTIPLE = 16, |
| + MESSAGE_MINIMUM_LENGTH = message_data_len(0) |
| +}; |
| + |
| +#define SKB_HEADER_LEN \ |
| + (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \ |
| + sizeof(struct udphdr) + NET_SKB_PAD) |
| +#define DATA_PACKET_HEAD_ROOM \ |
| + ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) |
| + |
| +enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ }; |
| + |
| +#endif /* _WG_MESSAGES_H */ |
| diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c |
| new file mode 100644 |
| index 000000000000..0739a2cd1920 |
| |
| |
| @@ -0,0 +1,648 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "netlink.h" |
| +#include "device.h" |
| +#include "peer.h" |
| +#include "socket.h" |
| +#include "queueing.h" |
| +#include "messages.h" |
| + |
| +#include <uapi/linux/wireguard.h> |
| + |
| +#include <linux/if.h> |
| +#include <net/genetlink.h> |
| +#include <net/sock.h> |
| +#include <crypto/algapi.h> |
| + |
| +static struct genl_family genl_family; |
| + |
| +static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { |
| + [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, |
| + [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, |
| + [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, |
| + [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, |
| + [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, |
| + [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, |
| + [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, |
| + [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } |
| +}; |
| + |
| +static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { |
| + [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, |
| + [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, |
| + [WGPEER_A_FLAGS] = { .type = NLA_U32 }, |
| + [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, |
| + [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, |
| + [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, |
| + [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, |
| + [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, |
| + [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, |
| + [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } |
| +}; |
| + |
| +static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { |
| + [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, |
| + [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, |
| + [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } |
| +}; |
| + |
| +static struct wg_device *lookup_interface(struct nlattr **attrs, |
| + struct sk_buff *skb) |
| +{ |
| + struct net_device *dev = NULL; |
| + |
| + if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) |
| + return ERR_PTR(-EBADR); |
| + if (attrs[WGDEVICE_A_IFINDEX]) |
| + dev = dev_get_by_index(sock_net(skb->sk), |
| + nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); |
| + else if (attrs[WGDEVICE_A_IFNAME]) |
| + dev = dev_get_by_name(sock_net(skb->sk), |
| + nla_data(attrs[WGDEVICE_A_IFNAME])); |
| + if (!dev) |
| + return ERR_PTR(-ENODEV); |
| + if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || |
| + strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { |
| + dev_put(dev); |
| + return ERR_PTR(-EOPNOTSUPP); |
| + } |
| + return netdev_priv(dev); |
| +} |
| + |
| +static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, |
| + int family) |
| +{ |
| + struct nlattr *allowedip_nest; |
| + |
| + allowedip_nest = nla_nest_start(skb, 0); |
| + if (!allowedip_nest) |
| + return -EMSGSIZE; |
| + |
| + if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || |
| + nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || |
| + nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? |
| + sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { |
| + nla_nest_cancel(skb, allowedip_nest); |
| + return -EMSGSIZE; |
| + } |
| + |
| + nla_nest_end(skb, allowedip_nest); |
| + return 0; |
| +} |
| + |
| +struct dump_ctx { |
| + struct wg_device *wg; |
| + struct wg_peer *next_peer; |
| + u64 allowedips_seq; |
| + struct allowedips_node *next_allowedip; |
| +}; |
| + |
| +#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) |
| + |
| +static int |
| +get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) |
| +{ |
| + |
| + struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); |
| + struct allowedips_node *allowedips_node = ctx->next_allowedip; |
| + bool fail; |
| + |
| + if (!peer_nest) |
| + return -EMSGSIZE; |
| + |
| + down_read(&peer->handshake.lock); |
| + fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, |
| + peer->handshake.remote_static); |
| + up_read(&peer->handshake.lock); |
| + if (fail) |
| + goto err; |
| + |
| + if (!allowedips_node) { |
| + const struct __kernel_timespec last_handshake = { |
| + .tv_sec = peer->walltime_last_handshake.tv_sec, |
| + .tv_nsec = peer->walltime_last_handshake.tv_nsec |
| + }; |
| + |
| + down_read(&peer->handshake.lock); |
| + fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, |
| + NOISE_SYMMETRIC_KEY_LEN, |
| + peer->handshake.preshared_key); |
| + up_read(&peer->handshake.lock); |
| + if (fail) |
| + goto err; |
| + |
| + if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, |
| + sizeof(last_handshake), &last_handshake) || |
| + nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, |
| + peer->persistent_keepalive_interval) || |
| + nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, |
| + WGPEER_A_UNSPEC) || |
| + nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, |
| + WGPEER_A_UNSPEC) || |
| + nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) |
| + goto err; |
| + |
| + read_lock_bh(&peer->endpoint_lock); |
| + if (peer->endpoint.addr.sa_family == AF_INET) |
| + fail = nla_put(skb, WGPEER_A_ENDPOINT, |
| + sizeof(peer->endpoint.addr4), |
| + &peer->endpoint.addr4); |
| + else if (peer->endpoint.addr.sa_family == AF_INET6) |
| + fail = nla_put(skb, WGPEER_A_ENDPOINT, |
| + sizeof(peer->endpoint.addr6), |
| + &peer->endpoint.addr6); |
| + read_unlock_bh(&peer->endpoint_lock); |
| + if (fail) |
| + goto err; |
| + allowedips_node = |
| + list_first_entry_or_null(&peer->allowedips_list, |
| + struct allowedips_node, peer_list); |
| + } |
| + if (!allowedips_node) |
| + goto no_allowedips; |
| + if (!ctx->allowedips_seq) |
| + ctx->allowedips_seq = peer->device->peer_allowedips.seq; |
| + else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) |
| + goto no_allowedips; |
| + |
| + allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); |
| + if (!allowedips_nest) |
| + goto err; |
| + |
| + list_for_each_entry_from(allowedips_node, &peer->allowedips_list, |
| + peer_list) { |
| + u8 cidr, ip[16] __aligned(__alignof(u64)); |
| + int family; |
| + |
| + family = wg_allowedips_read_node(allowedips_node, ip, &cidr); |
| + if (get_allowedips(skb, ip, cidr, family)) { |
| + nla_nest_end(skb, allowedips_nest); |
| + nla_nest_end(skb, peer_nest); |
| + ctx->next_allowedip = allowedips_node; |
| + return -EMSGSIZE; |
| + } |
| + } |
| + nla_nest_end(skb, allowedips_nest); |
| +no_allowedips: |
| + nla_nest_end(skb, peer_nest); |
| + ctx->next_allowedip = NULL; |
| + ctx->allowedips_seq = 0; |
| + return 0; |
| +err: |
| + nla_nest_cancel(skb, peer_nest); |
| + return -EMSGSIZE; |
| +} |
| + |
| +static int wg_get_device_start(struct netlink_callback *cb) |
| +{ |
| + struct nlattr **attrs = genl_family_attrbuf(&genl_family); |
| + struct wg_device *wg; |
| + int ret; |
| + |
| + ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, attrs, |
| + genl_family.maxattr, device_policy, NULL); |
| + if (ret < 0) |
| + return ret; |
| + wg = lookup_interface(attrs, cb->skb); |
| + if (IS_ERR(wg)) |
| + return PTR_ERR(wg); |
| + DUMP_CTX(cb)->wg = wg; |
| + return 0; |
| +} |
| + |
| +static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) |
| +{ |
| + struct wg_peer *peer, *next_peer_cursor; |
| + struct dump_ctx *ctx = DUMP_CTX(cb); |
| + struct wg_device *wg = ctx->wg; |
| + struct nlattr *peers_nest; |
| + int ret = -EMSGSIZE; |
| + bool done = true; |
| + void *hdr; |
| + |
| + rtnl_lock(); |
| + mutex_lock(&wg->device_update_lock); |
| + cb->seq = wg->device_update_gen; |
| + next_peer_cursor = ctx->next_peer; |
| + |
| + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, |
| + &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); |
| + if (!hdr) |
| + goto out; |
| + genl_dump_check_consistent(cb, hdr); |
| + |
| + if (!ctx->next_peer) { |
| + if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, |
| + wg->incoming_port) || |
| + nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || |
| + nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || |
| + nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) |
| + goto out; |
| + |
| + down_read(&wg->static_identity.lock); |
| + if (wg->static_identity.has_identity) { |
| + if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, |
| + NOISE_PUBLIC_KEY_LEN, |
| + wg->static_identity.static_private) || |
| + nla_put(skb, WGDEVICE_A_PUBLIC_KEY, |
| + NOISE_PUBLIC_KEY_LEN, |
| + wg->static_identity.static_public)) { |
| + up_read(&wg->static_identity.lock); |
| + goto out; |
| + } |
| + } |
| + up_read(&wg->static_identity.lock); |
| + } |
| + |
| + peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); |
| + if (!peers_nest) |
| + goto out; |
| + ret = 0; |
| + /* If the last cursor was removed via list_del_init in peer_remove, then |
| + * we just treat this the same as there being no more peers left. The |
| + * reason is that seq_nr should indicate to userspace that this isn't a |
| + * coherent dump anyway, so they'll try again. |
| + */ |
| + if (list_empty(&wg->peer_list) || |
| + (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { |
| + nla_nest_cancel(skb, peers_nest); |
| + goto out; |
| + } |
| + lockdep_assert_held(&wg->device_update_lock); |
| + peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); |
| + list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { |
| + if (get_peer(peer, skb, ctx)) { |
| + done = false; |
| + break; |
| + } |
| + next_peer_cursor = peer; |
| + } |
| + nla_nest_end(skb, peers_nest); |
| + |
| +out: |
| + if (!ret && !done && next_peer_cursor) |
| + wg_peer_get(next_peer_cursor); |
| + wg_peer_put(ctx->next_peer); |
| + mutex_unlock(&wg->device_update_lock); |
| + rtnl_unlock(); |
| + |
| + if (ret) { |
| + genlmsg_cancel(skb, hdr); |
| + return ret; |
| + } |
| + genlmsg_end(skb, hdr); |
| + if (done) { |
| + ctx->next_peer = NULL; |
| + return 0; |
| + } |
| + ctx->next_peer = next_peer_cursor; |
| + return skb->len; |
| + |
| + /* At this point, we can't really deal ourselves with safely zeroing out |
| + * the private key material after usage. This will need an additional API |
| + * in the kernel for marking skbs as zero_on_free. |
| + */ |
| +} |
| + |
| +static int wg_get_device_done(struct netlink_callback *cb) |
| +{ |
| + struct dump_ctx *ctx = DUMP_CTX(cb); |
| + |
| + if (ctx->wg) |
| + dev_put(ctx->wg->dev); |
| + wg_peer_put(ctx->next_peer); |
| + return 0; |
| +} |
| + |
| +static int set_port(struct wg_device *wg, u16 port) |
| +{ |
| + struct wg_peer *peer; |
| + |
| + if (wg->incoming_port == port) |
| + return 0; |
| + list_for_each_entry(peer, &wg->peer_list, peer_list) |
| + wg_socket_clear_peer_endpoint_src(peer); |
| + if (!netif_running(wg->dev)) { |
| + wg->incoming_port = port; |
| + return 0; |
| + } |
| + return wg_socket_init(wg, port); |
| +} |
| + |
| +static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) |
| +{ |
| + int ret = -EINVAL; |
| + u16 family; |
| + u8 cidr; |
| + |
| + if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || |
| + !attrs[WGALLOWEDIP_A_CIDR_MASK]) |
| + return ret; |
| + family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); |
| + cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); |
| + |
| + if (family == AF_INET && cidr <= 32 && |
| + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) |
| + ret = wg_allowedips_insert_v4( |
| + &peer->device->peer_allowedips, |
| + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, |
| + &peer->device->device_update_lock); |
| + else if (family == AF_INET6 && cidr <= 128 && |
| + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) |
| + ret = wg_allowedips_insert_v6( |
| + &peer->device->peer_allowedips, |
| + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, |
| + &peer->device->device_update_lock); |
| + |
| + return ret; |
| +} |
| + |
| +static int set_peer(struct wg_device *wg, struct nlattr **attrs) |
| +{ |
| + u8 *public_key = NULL, *preshared_key = NULL; |
| + struct wg_peer *peer = NULL; |
| + u32 flags = 0; |
| + int ret; |
| + |
| + ret = -EINVAL; |
| + if (attrs[WGPEER_A_PUBLIC_KEY] && |
| + nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) |
| + public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); |
| + else |
| + goto out; |
| + if (attrs[WGPEER_A_PRESHARED_KEY] && |
| + nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) |
| + preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); |
| + |
| + if (attrs[WGPEER_A_FLAGS]) |
| + flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); |
| + ret = -EOPNOTSUPP; |
| + if (flags & ~__WGPEER_F_ALL) |
| + goto out; |
| + |
| + ret = -EPFNOSUPPORT; |
| + if (attrs[WGPEER_A_PROTOCOL_VERSION]) { |
| + if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) |
| + goto out; |
| + } |
| + |
| + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, |
| + nla_data(attrs[WGPEER_A_PUBLIC_KEY])); |
| + ret = 0; |
| + if (!peer) { /* Peer doesn't exist yet. Add a new one. */ |
| + if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) |
| + goto out; |
| + |
| + /* The peer is new, so there aren't allowed IPs to remove. */ |
| + flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; |
| + |
| + down_read(&wg->static_identity.lock); |
| + if (wg->static_identity.has_identity && |
| + !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), |
| + wg->static_identity.static_public, |
| + NOISE_PUBLIC_KEY_LEN)) { |
| + /* We silently ignore peers that have the same public |
| + * key as the device. The reason we do it silently is |
| + * that we'd like for people to be able to reuse the |
| + * same set of API calls across peers. |
| + */ |
| + up_read(&wg->static_identity.lock); |
| + ret = 0; |
| + goto out; |
| + } |
| + up_read(&wg->static_identity.lock); |
| + |
| + peer = wg_peer_create(wg, public_key, preshared_key); |
| + if (IS_ERR(peer)) { |
| + /* Similar to the above, if the key is invalid, we skip |
| + * it without fanfare, so that services don't need to |
| + * worry about doing key validation themselves. |
| + */ |
| + ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); |
| + peer = NULL; |
| + goto out; |
| + } |
| + /* Take additional reference, as though we've just been |
| + * looked up. |
| + */ |
| + wg_peer_get(peer); |
| + } |
| + |
| + if (flags & WGPEER_F_REMOVE_ME) { |
| + wg_peer_remove(peer); |
| + goto out; |
| + } |
| + |
| + if (preshared_key) { |
| + down_write(&peer->handshake.lock); |
| + memcpy(&peer->handshake.preshared_key, preshared_key, |
| + NOISE_SYMMETRIC_KEY_LEN); |
| + up_write(&peer->handshake.lock); |
| + } |
| + |
| + if (attrs[WGPEER_A_ENDPOINT]) { |
| + struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); |
| + size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); |
| + |
| + if ((len == sizeof(struct sockaddr_in) && |
| + addr->sa_family == AF_INET) || |
| + (len == sizeof(struct sockaddr_in6) && |
| + addr->sa_family == AF_INET6)) { |
| + struct endpoint endpoint = { { { 0 } } }; |
| + |
| + memcpy(&endpoint.addr, addr, len); |
| + wg_socket_set_peer_endpoint(peer, &endpoint); |
| + } |
| + } |
| + |
| + if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) |
| + wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, |
| + &wg->device_update_lock); |
| + |
| + if (attrs[WGPEER_A_ALLOWEDIPS]) { |
| + struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; |
| + int rem; |
| + |
| + nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { |
| + ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, |
| + attr, allowedip_policy, NULL); |
| + if (ret < 0) |
| + goto out; |
| + ret = set_allowedip(peer, allowedip); |
| + if (ret < 0) |
| + goto out; |
| + } |
| + } |
| + |
| + if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { |
| + const u16 persistent_keepalive_interval = nla_get_u16( |
| + attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); |
| + const bool send_keepalive = |
| + !peer->persistent_keepalive_interval && |
| + persistent_keepalive_interval && |
| + netif_running(wg->dev); |
| + |
| + peer->persistent_keepalive_interval = persistent_keepalive_interval; |
| + if (send_keepalive) |
| + wg_packet_send_keepalive(peer); |
| + } |
| + |
| + if (netif_running(wg->dev)) |
| + wg_packet_send_staged_packets(peer); |
| + |
| +out: |
| + wg_peer_put(peer); |
| + if (attrs[WGPEER_A_PRESHARED_KEY]) |
| + memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), |
| + nla_len(attrs[WGPEER_A_PRESHARED_KEY])); |
| + return ret; |
| +} |
| + |
| +static int wg_set_device(struct sk_buff *skb, struct genl_info *info) |
| +{ |
| + struct wg_device *wg = lookup_interface(info->attrs, skb); |
| + u32 flags = 0; |
| + int ret; |
| + |
| + if (IS_ERR(wg)) { |
| + ret = PTR_ERR(wg); |
| + goto out_nodev; |
| + } |
| + |
| + rtnl_lock(); |
| + mutex_lock(&wg->device_update_lock); |
| + |
| + if (info->attrs[WGDEVICE_A_FLAGS]) |
| + flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); |
| + ret = -EOPNOTSUPP; |
| + if (flags & ~__WGDEVICE_F_ALL) |
| + goto out; |
| + |
| + ret = -EPERM; |
| + if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || |
| + info->attrs[WGDEVICE_A_FWMARK]) && |
| + !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) |
| + goto out; |
| + |
| + ++wg->device_update_gen; |
| + |
| + if (info->attrs[WGDEVICE_A_FWMARK]) { |
| + struct wg_peer *peer; |
| + |
| + wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); |
| + list_for_each_entry(peer, &wg->peer_list, peer_list) |
| + wg_socket_clear_peer_endpoint_src(peer); |
| + } |
| + |
| + if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { |
| + ret = set_port(wg, |
| + nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); |
| + if (ret) |
| + goto out; |
| + } |
| + |
| + if (flags & WGDEVICE_F_REPLACE_PEERS) |
| + wg_peer_remove_all(wg); |
| + |
| + if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && |
| + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == |
| + NOISE_PUBLIC_KEY_LEN) { |
| + u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); |
| + u8 public_key[NOISE_PUBLIC_KEY_LEN]; |
| + struct wg_peer *peer, *temp; |
| + |
| + if (!crypto_memneq(wg->static_identity.static_private, |
| + private_key, NOISE_PUBLIC_KEY_LEN)) |
| + goto skip_set_private_key; |
| + |
| + /* We remove before setting, to prevent race, which means doing |
| + * two 25519-genpub ops. |
| + */ |
| + if (curve25519_generate_public(public_key, private_key)) { |
| + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, |
| + public_key); |
| + if (peer) { |
| + wg_peer_put(peer); |
| + wg_peer_remove(peer); |
| + } |
| + } |
| + |
| + down_write(&wg->static_identity.lock); |
| + wg_noise_set_static_identity_private_key(&wg->static_identity, |
| + private_key); |
| + list_for_each_entry_safe(peer, temp, &wg->peer_list, |
| + peer_list) { |
| + if (wg_noise_precompute_static_static(peer)) |
| + wg_noise_expire_current_peer_keypairs(peer); |
| + else |
| + wg_peer_remove(peer); |
| + } |
| + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); |
| + up_write(&wg->static_identity.lock); |
| + } |
| +skip_set_private_key: |
| + |
| + if (info->attrs[WGDEVICE_A_PEERS]) { |
| + struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; |
| + int rem; |
| + |
| + nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { |
| + ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, |
| + peer_policy, NULL); |
| + if (ret < 0) |
| + goto out; |
| + ret = set_peer(wg, peer); |
| + if (ret < 0) |
| + goto out; |
| + } |
| + } |
| + ret = 0; |
| + |
| +out: |
| + mutex_unlock(&wg->device_update_lock); |
| + rtnl_unlock(); |
| + dev_put(wg->dev); |
| +out_nodev: |
| + if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) |
| + memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), |
| + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); |
| + return ret; |
| +} |
| + |
| +static const struct genl_ops genl_ops[] = { |
| + { |
| + .cmd = WG_CMD_GET_DEVICE, |
| + .start = wg_get_device_start, |
| + .dumpit = wg_get_device_dump, |
| + .done = wg_get_device_done, |
| + .flags = GENL_UNS_ADMIN_PERM |
| + }, { |
| + .cmd = WG_CMD_SET_DEVICE, |
| + .doit = wg_set_device, |
| + .flags = GENL_UNS_ADMIN_PERM |
| + } |
| +}; |
| + |
| +static struct genl_family genl_family __ro_after_init = { |
| + .ops = genl_ops, |
| + .n_ops = ARRAY_SIZE(genl_ops), |
| + .name = WG_GENL_NAME, |
| + .version = WG_GENL_VERSION, |
| + .maxattr = WGDEVICE_A_MAX, |
| + .module = THIS_MODULE, |
| + .policy = device_policy, |
| + .netnsok = true |
| +}; |
| + |
| +int __init wg_genetlink_init(void) |
| +{ |
| + return genl_register_family(&genl_family); |
| +} |
| + |
| +void __exit wg_genetlink_uninit(void) |
| +{ |
| + genl_unregister_family(&genl_family); |
| +} |
| diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h |
| new file mode 100644 |
| index 000000000000..15100d92e2e3 |
| |
| |
| @@ -0,0 +1,12 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_NETLINK_H |
| +#define _WG_NETLINK_H |
| + |
| +int wg_genetlink_init(void); |
| +void wg_genetlink_uninit(void); |
| + |
| +#endif /* _WG_NETLINK_H */ |
| diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c |
| new file mode 100644 |
| index 000000000000..d71c8db68a8c |
| |
| |
| @@ -0,0 +1,828 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "noise.h" |
| +#include "device.h" |
| +#include "peer.h" |
| +#include "messages.h" |
| +#include "queueing.h" |
| +#include "peerlookup.h" |
| + |
| +#include <linux/rcupdate.h> |
| +#include <linux/slab.h> |
| +#include <linux/bitmap.h> |
| +#include <linux/scatterlist.h> |
| +#include <linux/highmem.h> |
| +#include <crypto/algapi.h> |
| + |
| +/* This implements Noise_IKpsk2: |
| + * |
| + * <- s |
| + * ****** |
| + * -> e, es, s, ss, {t} |
| + * <- e, ee, se, psk, {} |
| + */ |
| + |
| +static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; |
| +static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com"; |
| +static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; |
| +static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; |
| +static atomic64_t keypair_counter = ATOMIC64_INIT(0); |
| + |
| +void __init wg_noise_init(void) |
| +{ |
| + struct blake2s_state blake; |
| + |
| + blake2s(handshake_init_chaining_key, handshake_name, NULL, |
| + NOISE_HASH_LEN, sizeof(handshake_name), 0); |
| + blake2s_init(&blake, NOISE_HASH_LEN); |
| + blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); |
| + blake2s_update(&blake, identifier_name, sizeof(identifier_name)); |
| + blake2s_final(&blake, handshake_init_hash); |
| +} |
| + |
| +/* Must hold peer->handshake.static_identity->lock */ |
| +bool wg_noise_precompute_static_static(struct wg_peer *peer) |
| +{ |
| + bool ret = true; |
| + |
| + down_write(&peer->handshake.lock); |
| + if (peer->handshake.static_identity->has_identity) |
| + ret = curve25519( |
| + peer->handshake.precomputed_static_static, |
| + peer->handshake.static_identity->static_private, |
| + peer->handshake.remote_static); |
| + else |
| + memset(peer->handshake.precomputed_static_static, 0, |
| + NOISE_PUBLIC_KEY_LEN); |
| + up_write(&peer->handshake.lock); |
| + return ret; |
| +} |
| + |
| +bool wg_noise_handshake_init(struct noise_handshake *handshake, |
| + struct noise_static_identity *static_identity, |
| + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], |
| + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], |
| + struct wg_peer *peer) |
| +{ |
| + memset(handshake, 0, sizeof(*handshake)); |
| + init_rwsem(&handshake->lock); |
| + handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; |
| + handshake->entry.peer = peer; |
| + memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); |
| + if (peer_preshared_key) |
| + memcpy(handshake->preshared_key, peer_preshared_key, |
| + NOISE_SYMMETRIC_KEY_LEN); |
| + handshake->static_identity = static_identity; |
| + handshake->state = HANDSHAKE_ZEROED; |
| + return wg_noise_precompute_static_static(peer); |
| +} |
| + |
| +static void handshake_zero(struct noise_handshake *handshake) |
| +{ |
| + memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); |
| + memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); |
| + memset(&handshake->hash, 0, NOISE_HASH_LEN); |
| + memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); |
| + handshake->remote_index = 0; |
| + handshake->state = HANDSHAKE_ZEROED; |
| +} |
| + |
| +void wg_noise_handshake_clear(struct noise_handshake *handshake) |
| +{ |
| + wg_index_hashtable_remove( |
| + handshake->entry.peer->device->index_hashtable, |
| + &handshake->entry); |
| + down_write(&handshake->lock); |
| + handshake_zero(handshake); |
| + up_write(&handshake->lock); |
| + wg_index_hashtable_remove( |
| + handshake->entry.peer->device->index_hashtable, |
| + &handshake->entry); |
| +} |
| + |
| +static struct noise_keypair *keypair_create(struct wg_peer *peer) |
| +{ |
| + struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); |
| + |
| + if (unlikely(!keypair)) |
| + return NULL; |
| + keypair->internal_id = atomic64_inc_return(&keypair_counter); |
| + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; |
| + keypair->entry.peer = peer; |
| + kref_init(&keypair->refcount); |
| + return keypair; |
| +} |
| + |
| +static void keypair_free_rcu(struct rcu_head *rcu) |
| +{ |
| + kzfree(container_of(rcu, struct noise_keypair, rcu)); |
| +} |
| + |
| +static void keypair_free_kref(struct kref *kref) |
| +{ |
| + struct noise_keypair *keypair = |
| + container_of(kref, struct noise_keypair, refcount); |
| + |
| + net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", |
| + keypair->entry.peer->device->dev->name, |
| + keypair->internal_id, |
| + keypair->entry.peer->internal_id); |
| + wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, |
| + &keypair->entry); |
| + call_rcu(&keypair->rcu, keypair_free_rcu); |
| +} |
| + |
| +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) |
| +{ |
| + if (unlikely(!keypair)) |
| + return; |
| + if (unlikely(unreference_now)) |
| + wg_index_hashtable_remove( |
| + keypair->entry.peer->device->index_hashtable, |
| + &keypair->entry); |
| + kref_put(&keypair->refcount, keypair_free_kref); |
| +} |
| + |
| +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) |
| +{ |
| + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), |
| + "Taking noise keypair reference without holding the RCU BH read lock"); |
| + if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) |
| + return NULL; |
| + return keypair; |
| +} |
| + |
| +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) |
| +{ |
| + struct noise_keypair *old; |
| + |
| + spin_lock_bh(&keypairs->keypair_update_lock); |
| + |
| + /* We zero the next_keypair before zeroing the others, so that |
| + * wg_noise_received_with_keypair returns early before subsequent ones |
| + * are zeroed. |
| + */ |
| + old = rcu_dereference_protected(keypairs->next_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + RCU_INIT_POINTER(keypairs->next_keypair, NULL); |
| + wg_noise_keypair_put(old, true); |
| + |
| + old = rcu_dereference_protected(keypairs->previous_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); |
| + wg_noise_keypair_put(old, true); |
| + |
| + old = rcu_dereference_protected(keypairs->current_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + RCU_INIT_POINTER(keypairs->current_keypair, NULL); |
| + wg_noise_keypair_put(old, true); |
| + |
| + spin_unlock_bh(&keypairs->keypair_update_lock); |
| +} |
| + |
| +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) |
| +{ |
| + struct noise_keypair *keypair; |
| + |
| + wg_noise_handshake_clear(&peer->handshake); |
| + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); |
| + |
| + spin_lock_bh(&peer->keypairs.keypair_update_lock); |
| + keypair = rcu_dereference_protected(peer->keypairs.next_keypair, |
| + lockdep_is_held(&peer->keypairs.keypair_update_lock)); |
| + if (keypair) |
| + keypair->sending.is_valid = false; |
| + keypair = rcu_dereference_protected(peer->keypairs.current_keypair, |
| + lockdep_is_held(&peer->keypairs.keypair_update_lock)); |
| + if (keypair) |
| + keypair->sending.is_valid = false; |
| + spin_unlock_bh(&peer->keypairs.keypair_update_lock); |
| +} |
| + |
| +static void add_new_keypair(struct noise_keypairs *keypairs, |
| + struct noise_keypair *new_keypair) |
| +{ |
| + struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; |
| + |
| + spin_lock_bh(&keypairs->keypair_update_lock); |
| + previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + next_keypair = rcu_dereference_protected(keypairs->next_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + current_keypair = rcu_dereference_protected(keypairs->current_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + if (new_keypair->i_am_the_initiator) { |
| + /* If we're the initiator, it means we've sent a handshake, and |
| + * received a confirmation response, which means this new |
| + * keypair can now be used. |
| + */ |
| + if (next_keypair) { |
| + /* If there already was a next keypair pending, we |
| + * demote it to be the previous keypair, and free the |
| + * existing current. Note that this means KCI can result |
| + * in this transition. It would perhaps be more sound to |
| + * always just get rid of the unused next keypair |
| + * instead of putting it in the previous slot, but this |
| + * might be a bit less robust. Something to think about |
| + * for the future. |
| + */ |
| + RCU_INIT_POINTER(keypairs->next_keypair, NULL); |
| + rcu_assign_pointer(keypairs->previous_keypair, |
| + next_keypair); |
| + wg_noise_keypair_put(current_keypair, true); |
| + } else /* If there wasn't an existing next keypair, we replace |
| + * the previous with the current one. |
| + */ |
| + rcu_assign_pointer(keypairs->previous_keypair, |
| + current_keypair); |
| + /* At this point we can get rid of the old previous keypair, and |
| + * set up the new keypair. |
| + */ |
| + wg_noise_keypair_put(previous_keypair, true); |
| + rcu_assign_pointer(keypairs->current_keypair, new_keypair); |
| + } else { |
| + /* If we're the responder, it means we can't use the new keypair |
| + * until we receive confirmation via the first data packet, so |
| + * we get rid of the existing previous one, the possibly |
| + * existing next one, and slide in the new next one. |
| + */ |
| + rcu_assign_pointer(keypairs->next_keypair, new_keypair); |
| + wg_noise_keypair_put(next_keypair, true); |
| + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); |
| + wg_noise_keypair_put(previous_keypair, true); |
| + } |
| + spin_unlock_bh(&keypairs->keypair_update_lock); |
| +} |
| + |
| +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, |
| + struct noise_keypair *received_keypair) |
| +{ |
| + struct noise_keypair *old_keypair; |
| + bool key_is_new; |
| + |
| + /* We first check without taking the spinlock. */ |
| + key_is_new = received_keypair == |
| + rcu_access_pointer(keypairs->next_keypair); |
| + if (likely(!key_is_new)) |
| + return false; |
| + |
| + spin_lock_bh(&keypairs->keypair_update_lock); |
| + /* After locking, we double check that things didn't change from |
| + * beneath us. |
| + */ |
| + if (unlikely(received_keypair != |
| + rcu_dereference_protected(keypairs->next_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)))) { |
| + spin_unlock_bh(&keypairs->keypair_update_lock); |
| + return false; |
| + } |
| + |
| + /* When we've finally received the confirmation, we slide the next |
| + * into the current, the current into the previous, and get rid of |
| + * the old previous. |
| + */ |
| + old_keypair = rcu_dereference_protected(keypairs->previous_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock)); |
| + rcu_assign_pointer(keypairs->previous_keypair, |
| + rcu_dereference_protected(keypairs->current_keypair, |
| + lockdep_is_held(&keypairs->keypair_update_lock))); |
| + wg_noise_keypair_put(old_keypair, true); |
| + rcu_assign_pointer(keypairs->current_keypair, received_keypair); |
| + RCU_INIT_POINTER(keypairs->next_keypair, NULL); |
| + |
| + spin_unlock_bh(&keypairs->keypair_update_lock); |
| + return true; |
| +} |
| + |
| +/* Must hold static_identity->lock */ |
| +void wg_noise_set_static_identity_private_key( |
| + struct noise_static_identity *static_identity, |
| + const u8 private_key[NOISE_PUBLIC_KEY_LEN]) |
| +{ |
| + memcpy(static_identity->static_private, private_key, |
| + NOISE_PUBLIC_KEY_LEN); |
| + curve25519_clamp_secret(static_identity->static_private); |
| + static_identity->has_identity = curve25519_generate_public( |
| + static_identity->static_public, private_key); |
| +} |
| + |
| +/* This is Hugo Krawczyk's HKDF: |
| + * - https://eprint.iacr.org/2010/264.pdf |
| + * - https://tools.ietf.org/html/rfc5869 |
| + */ |
| +static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, |
| + size_t first_len, size_t second_len, size_t third_len, |
| + size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) |
| +{ |
| + u8 output[BLAKE2S_HASH_SIZE + 1]; |
| + u8 secret[BLAKE2S_HASH_SIZE]; |
| + |
| + WARN_ON(IS_ENABLED(DEBUG) && |
| + (first_len > BLAKE2S_HASH_SIZE || |
| + second_len > BLAKE2S_HASH_SIZE || |
| + third_len > BLAKE2S_HASH_SIZE || |
| + ((second_len || second_dst || third_len || third_dst) && |
| + (!first_len || !first_dst)) || |
| + ((third_len || third_dst) && (!second_len || !second_dst)))); |
| + |
| + /* Extract entropy from data into secret */ |
| + blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); |
| + |
| + if (!first_dst || !first_len) |
| + goto out; |
| + |
| + /* Expand first key: key = secret, data = 0x1 */ |
| + output[0] = 1; |
| + blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); |
| + memcpy(first_dst, output, first_len); |
| + |
| + if (!second_dst || !second_len) |
| + goto out; |
| + |
| + /* Expand second key: key = secret, data = first-key || 0x2 */ |
| + output[BLAKE2S_HASH_SIZE] = 2; |
| + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, |
| + BLAKE2S_HASH_SIZE); |
| + memcpy(second_dst, output, second_len); |
| + |
| + if (!third_dst || !third_len) |
| + goto out; |
| + |
| + /* Expand third key: key = secret, data = second-key || 0x3 */ |
| + output[BLAKE2S_HASH_SIZE] = 3; |
| + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, |
| + BLAKE2S_HASH_SIZE); |
| + memcpy(third_dst, output, third_len); |
| + |
| +out: |
| + /* Clear sensitive data from stack */ |
| + memzero_explicit(secret, BLAKE2S_HASH_SIZE); |
| + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); |
| +} |
| + |
| +static void symmetric_key_init(struct noise_symmetric_key *key) |
| +{ |
| + spin_lock_init(&key->counter.receive.lock); |
| + atomic64_set(&key->counter.counter, 0); |
| + memset(key->counter.receive.backtrack, 0, |
| + sizeof(key->counter.receive.backtrack)); |
| + key->birthdate = ktime_get_coarse_boottime_ns(); |
| + key->is_valid = true; |
| +} |
| + |
| +static void derive_keys(struct noise_symmetric_key *first_dst, |
| + struct noise_symmetric_key *second_dst, |
| + const u8 chaining_key[NOISE_HASH_LEN]) |
| +{ |
| + kdf(first_dst->key, second_dst->key, NULL, NULL, |
| + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, |
| + chaining_key); |
| + symmetric_key_init(first_dst); |
| + symmetric_key_init(second_dst); |
| +} |
| + |
| +static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN], |
| + const u8 private[NOISE_PUBLIC_KEY_LEN], |
| + const u8 public[NOISE_PUBLIC_KEY_LEN]) |
| +{ |
| + u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; |
| + |
| + if (unlikely(!curve25519(dh_calculation, private, public))) |
| + return false; |
| + kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, |
| + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); |
| + memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); |
| + return true; |
| +} |
| + |
| +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) |
| +{ |
| + struct blake2s_state blake; |
| + |
| + blake2s_init(&blake, NOISE_HASH_LEN); |
| + blake2s_update(&blake, hash, NOISE_HASH_LEN); |
| + blake2s_update(&blake, src, src_len); |
| + blake2s_final(&blake, hash); |
| +} |
| + |
| +static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN], |
| + const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) |
| +{ |
| + u8 temp_hash[NOISE_HASH_LEN]; |
| + |
| + kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, |
| + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); |
| + mix_hash(hash, temp_hash, NOISE_HASH_LEN); |
| + memzero_explicit(temp_hash, NOISE_HASH_LEN); |
| +} |
| + |
| +static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], |
| + u8 hash[NOISE_HASH_LEN], |
| + const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) |
| +{ |
| + memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); |
| + memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); |
| + mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); |
| +} |
| + |
| +static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, |
| + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], |
| + u8 hash[NOISE_HASH_LEN]) |
| +{ |
| + chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, |
| + NOISE_HASH_LEN, |
| + 0 /* Always zero for Noise_IK */, key); |
| + mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); |
| +} |
| + |
| +static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, |
| + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], |
| + u8 hash[NOISE_HASH_LEN]) |
| +{ |
| + if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, |
| + hash, NOISE_HASH_LEN, |
| + 0 /* Always zero for Noise_IK */, key)) |
| + return false; |
| + mix_hash(hash, src_ciphertext, src_len); |
| + return true; |
| +} |
| + |
| +static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], |
| + const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], |
| + u8 chaining_key[NOISE_HASH_LEN], |
| + u8 hash[NOISE_HASH_LEN]) |
| +{ |
| + if (ephemeral_dst != ephemeral_src) |
| + memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); |
| + mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); |
| + kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, |
| + NOISE_PUBLIC_KEY_LEN, chaining_key); |
| +} |
| + |
| +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) |
| +{ |
| + struct timespec64 now; |
| + |
| + ktime_get_real_ts64(&now); |
| + |
| + /* In order to prevent some sort of infoleak from precise timers, we |
| + * round down the nanoseconds part to the closest rounded-down power of |
| + * two to the maximum initiations per second allowed anyway by the |
| + * implementation. |
| + */ |
| + now.tv_nsec = ALIGN_DOWN(now.tv_nsec, |
| + rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); |
| + |
| + /* https://cr.yp.to/libtai/tai64.html */ |
| + *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); |
| + *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); |
| +} |
| + |
| +bool |
| +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, |
| + struct noise_handshake *handshake) |
| +{ |
| + u8 timestamp[NOISE_TIMESTAMP_LEN]; |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN]; |
| + bool ret = false; |
| + |
| + /* We need to wait for crng _before_ taking any locks, since |
| + * curve25519_generate_secret uses get_random_bytes_wait. |
| + */ |
| + wait_for_random_bytes(); |
| + |
| + down_read(&handshake->static_identity->lock); |
| + down_write(&handshake->lock); |
| + |
| + if (unlikely(!handshake->static_identity->has_identity)) |
| + goto out; |
| + |
| + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); |
| + |
| + handshake_init(handshake->chaining_key, handshake->hash, |
| + handshake->remote_static); |
| + |
| + /* e */ |
| + curve25519_generate_secret(handshake->ephemeral_private); |
| + if (!curve25519_generate_public(dst->unencrypted_ephemeral, |
| + handshake->ephemeral_private)) |
| + goto out; |
| + message_ephemeral(dst->unencrypted_ephemeral, |
| + dst->unencrypted_ephemeral, handshake->chaining_key, |
| + handshake->hash); |
| + |
| + /* es */ |
| + if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, |
| + handshake->remote_static)) |
| + goto out; |
| + |
| + /* s */ |
| + message_encrypt(dst->encrypted_static, |
| + handshake->static_identity->static_public, |
| + NOISE_PUBLIC_KEY_LEN, key, handshake->hash); |
| + |
| + /* ss */ |
| + kdf(handshake->chaining_key, key, NULL, |
| + handshake->precomputed_static_static, NOISE_HASH_LEN, |
| + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, |
| + handshake->chaining_key); |
| + |
| + /* {t} */ |
| + tai64n_now(timestamp); |
| + message_encrypt(dst->encrypted_timestamp, timestamp, |
| + NOISE_TIMESTAMP_LEN, key, handshake->hash); |
| + |
| + dst->sender_index = wg_index_hashtable_insert( |
| + handshake->entry.peer->device->index_hashtable, |
| + &handshake->entry); |
| + |
| + handshake->state = HANDSHAKE_CREATED_INITIATION; |
| + ret = true; |
| + |
| +out: |
| + up_write(&handshake->lock); |
| + up_read(&handshake->static_identity->lock); |
| + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); |
| + return ret; |
| +} |
| + |
| +struct wg_peer * |
| +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, |
| + struct wg_device *wg) |
| +{ |
| + struct wg_peer *peer = NULL, *ret_peer = NULL; |
| + struct noise_handshake *handshake; |
| + bool replay_attack, flood_attack; |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN]; |
| + u8 chaining_key[NOISE_HASH_LEN]; |
| + u8 hash[NOISE_HASH_LEN]; |
| + u8 s[NOISE_PUBLIC_KEY_LEN]; |
| + u8 e[NOISE_PUBLIC_KEY_LEN]; |
| + u8 t[NOISE_TIMESTAMP_LEN]; |
| + u64 initiation_consumption; |
| + |
| + down_read(&wg->static_identity.lock); |
| + if (unlikely(!wg->static_identity.has_identity)) |
| + goto out; |
| + |
| + handshake_init(chaining_key, hash, wg->static_identity.static_public); |
| + |
| + /* e */ |
| + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); |
| + |
| + /* es */ |
| + if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) |
| + goto out; |
| + |
| + /* s */ |
| + if (!message_decrypt(s, src->encrypted_static, |
| + sizeof(src->encrypted_static), key, hash)) |
| + goto out; |
| + |
| + /* Lookup which peer we're actually talking to */ |
| + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); |
| + if (!peer) |
| + goto out; |
| + handshake = &peer->handshake; |
| + |
| + /* ss */ |
| + kdf(chaining_key, key, NULL, handshake->precomputed_static_static, |
| + NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, |
| + chaining_key); |
| + |
| + /* {t} */ |
| + if (!message_decrypt(t, src->encrypted_timestamp, |
| + sizeof(src->encrypted_timestamp), key, hash)) |
| + goto out; |
| + |
| + down_read(&handshake->lock); |
| + replay_attack = memcmp(t, handshake->latest_timestamp, |
| + NOISE_TIMESTAMP_LEN) <= 0; |
| + flood_attack = (s64)handshake->last_initiation_consumption + |
| + NSEC_PER_SEC / INITIATIONS_PER_SECOND > |
| + (s64)ktime_get_coarse_boottime_ns(); |
| + up_read(&handshake->lock); |
| + if (replay_attack || flood_attack) |
| + goto out; |
| + |
| + /* Success! Copy everything to peer */ |
| + down_write(&handshake->lock); |
| + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); |
| + if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) |
| + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); |
| + memcpy(handshake->hash, hash, NOISE_HASH_LEN); |
| + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); |
| + handshake->remote_index = src->sender_index; |
| + if ((s64)(handshake->last_initiation_consumption - |
| + (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) |
| + handshake->last_initiation_consumption = initiation_consumption; |
| + handshake->state = HANDSHAKE_CONSUMED_INITIATION; |
| + up_write(&handshake->lock); |
| + ret_peer = peer; |
| + |
| +out: |
| + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); |
| + memzero_explicit(hash, NOISE_HASH_LEN); |
| + memzero_explicit(chaining_key, NOISE_HASH_LEN); |
| + up_read(&wg->static_identity.lock); |
| + if (!ret_peer) |
| + wg_peer_put(peer); |
| + return ret_peer; |
| +} |
| + |
| +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, |
| + struct noise_handshake *handshake) |
| +{ |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN]; |
| + bool ret = false; |
| + |
| + /* We need to wait for crng _before_ taking any locks, since |
| + * curve25519_generate_secret uses get_random_bytes_wait. |
| + */ |
| + wait_for_random_bytes(); |
| + |
| + down_read(&handshake->static_identity->lock); |
| + down_write(&handshake->lock); |
| + |
| + if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) |
| + goto out; |
| + |
| + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); |
| + dst->receiver_index = handshake->remote_index; |
| + |
| + /* e */ |
| + curve25519_generate_secret(handshake->ephemeral_private); |
| + if (!curve25519_generate_public(dst->unencrypted_ephemeral, |
| + handshake->ephemeral_private)) |
| + goto out; |
| + message_ephemeral(dst->unencrypted_ephemeral, |
| + dst->unencrypted_ephemeral, handshake->chaining_key, |
| + handshake->hash); |
| + |
| + /* ee */ |
| + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, |
| + handshake->remote_ephemeral)) |
| + goto out; |
| + |
| + /* se */ |
| + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, |
| + handshake->remote_static)) |
| + goto out; |
| + |
| + /* psk */ |
| + mix_psk(handshake->chaining_key, handshake->hash, key, |
| + handshake->preshared_key); |
| + |
| + /* {} */ |
| + message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); |
| + |
| + dst->sender_index = wg_index_hashtable_insert( |
| + handshake->entry.peer->device->index_hashtable, |
| + &handshake->entry); |
| + |
| + handshake->state = HANDSHAKE_CREATED_RESPONSE; |
| + ret = true; |
| + |
| +out: |
| + up_write(&handshake->lock); |
| + up_read(&handshake->static_identity->lock); |
| + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); |
| + return ret; |
| +} |
| + |
| +struct wg_peer * |
| +wg_noise_handshake_consume_response(struct message_handshake_response *src, |
| + struct wg_device *wg) |
| +{ |
| + enum noise_handshake_state state = HANDSHAKE_ZEROED; |
| + struct wg_peer *peer = NULL, *ret_peer = NULL; |
| + struct noise_handshake *handshake; |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN]; |
| + u8 hash[NOISE_HASH_LEN]; |
| + u8 chaining_key[NOISE_HASH_LEN]; |
| + u8 e[NOISE_PUBLIC_KEY_LEN]; |
| + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; |
| + u8 static_private[NOISE_PUBLIC_KEY_LEN]; |
| + |
| + down_read(&wg->static_identity.lock); |
| + |
| + if (unlikely(!wg->static_identity.has_identity)) |
| + goto out; |
| + |
| + handshake = (struct noise_handshake *)wg_index_hashtable_lookup( |
| + wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, |
| + src->receiver_index, &peer); |
| + if (unlikely(!handshake)) |
| + goto out; |
| + |
| + down_read(&handshake->lock); |
| + state = handshake->state; |
| + memcpy(hash, handshake->hash, NOISE_HASH_LEN); |
| + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); |
| + memcpy(ephemeral_private, handshake->ephemeral_private, |
| + NOISE_PUBLIC_KEY_LEN); |
| + up_read(&handshake->lock); |
| + |
| + if (state != HANDSHAKE_CREATED_INITIATION) |
| + goto fail; |
| + |
| + /* e */ |
| + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); |
| + |
| + /* ee */ |
| + if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) |
| + goto fail; |
| + |
| + /* se */ |
| + if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) |
| + goto fail; |
| + |
| + /* psk */ |
| + mix_psk(chaining_key, hash, key, handshake->preshared_key); |
| + |
| + /* {} */ |
| + if (!message_decrypt(NULL, src->encrypted_nothing, |
| + sizeof(src->encrypted_nothing), key, hash)) |
| + goto fail; |
| + |
| + /* Success! Copy everything to peer */ |
| + down_write(&handshake->lock); |
| + /* It's important to check that the state is still the same, while we |
| + * have an exclusive lock. |
| + */ |
| + if (handshake->state != state) { |
| + up_write(&handshake->lock); |
| + goto fail; |
| + } |
| + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); |
| + memcpy(handshake->hash, hash, NOISE_HASH_LEN); |
| + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); |
| + handshake->remote_index = src->sender_index; |
| + handshake->state = HANDSHAKE_CONSUMED_RESPONSE; |
| + up_write(&handshake->lock); |
| + ret_peer = peer; |
| + goto out; |
| + |
| +fail: |
| + wg_peer_put(peer); |
| +out: |
| + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); |
| + memzero_explicit(hash, NOISE_HASH_LEN); |
| + memzero_explicit(chaining_key, NOISE_HASH_LEN); |
| + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); |
| + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); |
| + up_read(&wg->static_identity.lock); |
| + return ret_peer; |
| +} |
| + |
| +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, |
| + struct noise_keypairs *keypairs) |
| +{ |
| + struct noise_keypair *new_keypair; |
| + bool ret = false; |
| + |
| + down_write(&handshake->lock); |
| + if (handshake->state != HANDSHAKE_CREATED_RESPONSE && |
| + handshake->state != HANDSHAKE_CONSUMED_RESPONSE) |
| + goto out; |
| + |
| + new_keypair = keypair_create(handshake->entry.peer); |
| + if (!new_keypair) |
| + goto out; |
| + new_keypair->i_am_the_initiator = handshake->state == |
| + HANDSHAKE_CONSUMED_RESPONSE; |
| + new_keypair->remote_index = handshake->remote_index; |
| + |
| + if (new_keypair->i_am_the_initiator) |
| + derive_keys(&new_keypair->sending, &new_keypair->receiving, |
| + handshake->chaining_key); |
| + else |
| + derive_keys(&new_keypair->receiving, &new_keypair->sending, |
| + handshake->chaining_key); |
| + |
| + handshake_zero(handshake); |
| + rcu_read_lock_bh(); |
| + if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, |
| + handshake)->is_dead))) { |
| + add_new_keypair(keypairs, new_keypair); |
| + net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", |
| + handshake->entry.peer->device->dev->name, |
| + new_keypair->internal_id, |
| + handshake->entry.peer->internal_id); |
| + ret = wg_index_hashtable_replace( |
| + handshake->entry.peer->device->index_hashtable, |
| + &handshake->entry, &new_keypair->entry); |
| + } else { |
| + kzfree(new_keypair); |
| + } |
| + rcu_read_unlock_bh(); |
| + |
| +out: |
| + up_write(&handshake->lock); |
| + return ret; |
| +} |
| diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h |
| new file mode 100644 |
| index 000000000000..138a07bb817c |
| |
| |
| @@ -0,0 +1,137 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| +#ifndef _WG_NOISE_H |
| +#define _WG_NOISE_H |
| + |
| +#include "messages.h" |
| +#include "peerlookup.h" |
| + |
| +#include <linux/types.h> |
| +#include <linux/spinlock.h> |
| +#include <linux/atomic.h> |
| +#include <linux/rwsem.h> |
| +#include <linux/mutex.h> |
| +#include <linux/kref.h> |
| + |
| +union noise_counter { |
| + struct { |
| + u64 counter; |
| + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; |
| + spinlock_t lock; |
| + } receive; |
| + atomic64_t counter; |
| +}; |
| + |
| +struct noise_symmetric_key { |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN]; |
| + union noise_counter counter; |
| + u64 birthdate; |
| + bool is_valid; |
| +}; |
| + |
| +struct noise_keypair { |
| + struct index_hashtable_entry entry; |
| + struct noise_symmetric_key sending; |
| + struct noise_symmetric_key receiving; |
| + __le32 remote_index; |
| + bool i_am_the_initiator; |
| + struct kref refcount; |
| + struct rcu_head rcu; |
| + u64 internal_id; |
| +}; |
| + |
| +struct noise_keypairs { |
| + struct noise_keypair __rcu *current_keypair; |
| + struct noise_keypair __rcu *previous_keypair; |
| + struct noise_keypair __rcu *next_keypair; |
| + spinlock_t keypair_update_lock; |
| +}; |
| + |
| +struct noise_static_identity { |
| + u8 static_public[NOISE_PUBLIC_KEY_LEN]; |
| + u8 static_private[NOISE_PUBLIC_KEY_LEN]; |
| + struct rw_semaphore lock; |
| + bool has_identity; |
| +}; |
| + |
| +enum noise_handshake_state { |
| + HANDSHAKE_ZEROED, |
| + HANDSHAKE_CREATED_INITIATION, |
| + HANDSHAKE_CONSUMED_INITIATION, |
| + HANDSHAKE_CREATED_RESPONSE, |
| + HANDSHAKE_CONSUMED_RESPONSE |
| +}; |
| + |
| +struct noise_handshake { |
| + struct index_hashtable_entry entry; |
| + |
| + enum noise_handshake_state state; |
| + u64 last_initiation_consumption; |
| + |
| + struct noise_static_identity *static_identity; |
| + |
| + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; |
| + u8 remote_static[NOISE_PUBLIC_KEY_LEN]; |
| + u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; |
| + u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN]; |
| + |
| + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; |
| + |
| + u8 hash[NOISE_HASH_LEN]; |
| + u8 chaining_key[NOISE_HASH_LEN]; |
| + |
| + u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; |
| + __le32 remote_index; |
| + |
| + /* Protects all members except the immutable (after noise_handshake_ |
| + * init): remote_static, precomputed_static_static, static_identity. |
| + */ |
| + struct rw_semaphore lock; |
| +}; |
| + |
| +struct wg_device; |
| + |
| +void wg_noise_init(void); |
| +bool wg_noise_handshake_init(struct noise_handshake *handshake, |
| + struct noise_static_identity *static_identity, |
| + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], |
| + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], |
| + struct wg_peer *peer); |
| +void wg_noise_handshake_clear(struct noise_handshake *handshake); |
| +static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) |
| +{ |
| + atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - |
| + (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); |
| +} |
| + |
| +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); |
| +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); |
| +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); |
| +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, |
| + struct noise_keypair *received_keypair); |
| +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); |
| + |
| +void wg_noise_set_static_identity_private_key( |
| + struct noise_static_identity *static_identity, |
| + const u8 private_key[NOISE_PUBLIC_KEY_LEN]); |
| +bool wg_noise_precompute_static_static(struct wg_peer *peer); |
| + |
| +bool |
| +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, |
| + struct noise_handshake *handshake); |
| +struct wg_peer * |
| +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, |
| + struct wg_device *wg); |
| + |
| +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, |
| + struct noise_handshake *handshake); |
| +struct wg_peer * |
| +wg_noise_handshake_consume_response(struct message_handshake_response *src, |
| + struct wg_device *wg); |
| + |
| +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, |
| + struct noise_keypairs *keypairs); |
| + |
| +#endif /* _WG_NOISE_H */ |
| diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c |
| new file mode 100644 |
| index 000000000000..071eedf33f5a |
| |
| |
| @@ -0,0 +1,240 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "peer.h" |
| +#include "device.h" |
| +#include "queueing.h" |
| +#include "timers.h" |
| +#include "peerlookup.h" |
| +#include "noise.h" |
| + |
| +#include <linux/kref.h> |
| +#include <linux/lockdep.h> |
| +#include <linux/rcupdate.h> |
| +#include <linux/list.h> |
| + |
| +static atomic64_t peer_counter = ATOMIC64_INIT(0); |
| + |
| +struct wg_peer *wg_peer_create(struct wg_device *wg, |
| + const u8 public_key[NOISE_PUBLIC_KEY_LEN], |
| + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) |
| +{ |
| + struct wg_peer *peer; |
| + int ret = -ENOMEM; |
| + |
| + lockdep_assert_held(&wg->device_update_lock); |
| + |
| + if (wg->num_peers >= MAX_PEERS_PER_DEVICE) |
| + return ERR_PTR(ret); |
| + |
| + peer = kzalloc(sizeof(*peer), GFP_KERNEL); |
| + if (unlikely(!peer)) |
| + return ERR_PTR(ret); |
| + peer->device = wg; |
| + |
| + if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, |
| + public_key, preshared_key, peer)) { |
| + ret = -EKEYREJECTED; |
| + goto err_1; |
| + } |
| + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) |
| + goto err_1; |
| + if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, |
| + MAX_QUEUED_PACKETS)) |
| + goto err_2; |
| + if (wg_packet_queue_init(&peer->rx_queue, NULL, false, |
| + MAX_QUEUED_PACKETS)) |
| + goto err_3; |
| + |
| + peer->internal_id = atomic64_inc_return(&peer_counter); |
| + peer->serial_work_cpu = nr_cpumask_bits; |
| + wg_cookie_init(&peer->latest_cookie); |
| + wg_timers_init(peer); |
| + wg_cookie_checker_precompute_peer_keys(peer); |
| + spin_lock_init(&peer->keypairs.keypair_update_lock); |
| + INIT_WORK(&peer->transmit_handshake_work, |
| + wg_packet_handshake_send_worker); |
| + rwlock_init(&peer->endpoint_lock); |
| + kref_init(&peer->refcount); |
| + skb_queue_head_init(&peer->staged_packet_queue); |
| + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); |
| + set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); |
| + netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, |
| + NAPI_POLL_WEIGHT); |
| + napi_enable(&peer->napi); |
| + list_add_tail(&peer->peer_list, &wg->peer_list); |
| + INIT_LIST_HEAD(&peer->allowedips_list); |
| + wg_pubkey_hashtable_add(wg->peer_hashtable, peer); |
| + ++wg->num_peers; |
| + pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); |
| + return peer; |
| + |
| +err_3: |
| + wg_packet_queue_free(&peer->tx_queue, false); |
| +err_2: |
| + dst_cache_destroy(&peer->endpoint_cache); |
| +err_1: |
| + kfree(peer); |
| + return ERR_PTR(ret); |
| +} |
| + |
| +struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) |
| +{ |
| + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), |
| + "Taking peer reference without holding the RCU read lock"); |
| + if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) |
| + return NULL; |
| + return peer; |
| +} |
| + |
| +static void peer_make_dead(struct wg_peer *peer) |
| +{ |
| + /* Remove from configuration-time lookup structures. */ |
| + list_del_init(&peer->peer_list); |
| + wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, |
| + &peer->device->device_update_lock); |
| + wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); |
| + |
| + /* Mark as dead, so that we don't allow jumping contexts after. */ |
| + WRITE_ONCE(peer->is_dead, true); |
| + |
| + /* The caller must now synchronize_rcu() for this to take effect. */ |
| +} |
| + |
| +static void peer_remove_after_dead(struct wg_peer *peer) |
| +{ |
| + WARN_ON(!peer->is_dead); |
| + |
| + /* No more keypairs can be created for this peer, since is_dead protects |
| + * add_new_keypair, so we can now destroy existing ones. |
| + */ |
| + wg_noise_keypairs_clear(&peer->keypairs); |
| + |
| + /* Destroy all ongoing timers that were in-flight at the beginning of |
| + * this function. |
| + */ |
| + wg_timers_stop(peer); |
| + |
| + /* The transition between packet encryption/decryption queues isn't |
| + * guarded by is_dead, but each reference's life is strictly bounded by |
| + * two generations: once for parallel crypto and once for serial |
| + * ingestion, so we can simply flush twice, and be sure that we no |
| + * longer have references inside these queues. |
| + */ |
| + |
| + /* a) For encrypt/decrypt. */ |
| + flush_workqueue(peer->device->packet_crypt_wq); |
| + /* b.1) For send (but not receive, since that's napi). */ |
| + flush_workqueue(peer->device->packet_crypt_wq); |
| + /* b.2.1) For receive (but not send, since that's wq). */ |
| + napi_disable(&peer->napi); |
| + /* b.2.1) It's now safe to remove the napi struct, which must be done |
| + * here from process context. |
| + */ |
| + netif_napi_del(&peer->napi); |
| + |
| + /* Ensure any workstructs we own (like transmit_handshake_work or |
| + * clear_peer_work) no longer are in use. |
| + */ |
| + flush_workqueue(peer->device->handshake_send_wq); |
| + |
| + /* After the above flushes, a peer might still be active in a few |
| + * different contexts: 1) from xmit(), before hitting is_dead and |
| + * returning, 2) from wg_packet_consume_data(), before hitting is_dead |
| + * and returning, 3) from wg_receive_handshake_packet() after a point |
| + * where it has processed an incoming handshake packet, but where |
| + * all calls to pass it off to timers fails because of is_dead. We won't |
| + * have new references in (1) eventually, because we're removed from |
| + * allowedips; we won't have new references in (2) eventually, because |
| + * wg_index_hashtable_lookup will always return NULL, since we removed |
| + * all existing keypairs and no more can be created; we won't have new |
| + * references in (3) eventually, because we're removed from the pubkey |
| + * hash table, which allows for a maximum of one handshake response, |
| + * via the still-uncleared index hashtable entry, but not more than one, |
| + * and in wg_cookie_message_consume, the lookup eventually gets a peer |
| + * with a refcount of zero, so no new reference is taken. |
| + */ |
| + |
| + --peer->device->num_peers; |
| + wg_peer_put(peer); |
| +} |
| + |
| +/* We have a separate "remove" function make sure that all active places where |
| + * a peer is currently operating will eventually come to an end and not pass |
| + * their reference onto another context. |
| + */ |
| +void wg_peer_remove(struct wg_peer *peer) |
| +{ |
| + if (unlikely(!peer)) |
| + return; |
| + lockdep_assert_held(&peer->device->device_update_lock); |
| + |
| + peer_make_dead(peer); |
| + synchronize_rcu(); |
| + peer_remove_after_dead(peer); |
| +} |
| + |
| +void wg_peer_remove_all(struct wg_device *wg) |
| +{ |
| + struct wg_peer *peer, *temp; |
| + LIST_HEAD(dead_peers); |
| + |
| + lockdep_assert_held(&wg->device_update_lock); |
| + |
| + /* Avoid having to traverse individually for each one. */ |
| + wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); |
| + |
| + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { |
| + peer_make_dead(peer); |
| + list_add_tail(&peer->peer_list, &dead_peers); |
| + } |
| + synchronize_rcu(); |
| + list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) |
| + peer_remove_after_dead(peer); |
| +} |
| + |
| +static void rcu_release(struct rcu_head *rcu) |
| +{ |
| + struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); |
| + |
| + dst_cache_destroy(&peer->endpoint_cache); |
| + wg_packet_queue_free(&peer->rx_queue, false); |
| + wg_packet_queue_free(&peer->tx_queue, false); |
| + |
| + /* The final zeroing takes care of clearing any remaining handshake key |
| + * material and other potentially sensitive information. |
| + */ |
| + kzfree(peer); |
| +} |
| + |
| +static void kref_release(struct kref *refcount) |
| +{ |
| + struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); |
| + |
| + pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + |
| + /* Remove ourself from dynamic runtime lookup structures, now that the |
| + * last reference is gone. |
| + */ |
| + wg_index_hashtable_remove(peer->device->index_hashtable, |
| + &peer->handshake.entry); |
| + |
| + /* Remove any lingering packets that didn't have a chance to be |
| + * transmitted. |
| + */ |
| + wg_packet_purge_staged_packets(peer); |
| + |
| + /* Free the memory used. */ |
| + call_rcu(&peer->rcu, rcu_release); |
| +} |
| + |
| +void wg_peer_put(struct wg_peer *peer) |
| +{ |
| + if (unlikely(!peer)) |
| + return; |
| + kref_put(&peer->refcount, kref_release); |
| +} |
| diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h |
| new file mode 100644 |
| index 000000000000..23af40922997 |
| |
| |
| @@ -0,0 +1,83 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_PEER_H |
| +#define _WG_PEER_H |
| + |
| +#include "device.h" |
| +#include "noise.h" |
| +#include "cookie.h" |
| + |
| +#include <linux/types.h> |
| +#include <linux/netfilter.h> |
| +#include <linux/spinlock.h> |
| +#include <linux/kref.h> |
| +#include <net/dst_cache.h> |
| + |
| +struct wg_device; |
| + |
| +struct endpoint { |
| + union { |
| + struct sockaddr addr; |
| + struct sockaddr_in addr4; |
| + struct sockaddr_in6 addr6; |
| + }; |
| + union { |
| + struct { |
| + struct in_addr src4; |
| + /* Essentially the same as addr6->scope_id */ |
| + int src_if4; |
| + }; |
| + struct in6_addr src6; |
| + }; |
| +}; |
| + |
| +struct wg_peer { |
| + struct wg_device *device; |
| + struct crypt_queue tx_queue, rx_queue; |
| + struct sk_buff_head staged_packet_queue; |
| + int serial_work_cpu; |
| + struct noise_keypairs keypairs; |
| + struct endpoint endpoint; |
| + struct dst_cache endpoint_cache; |
| + rwlock_t endpoint_lock; |
| + struct noise_handshake handshake; |
| + atomic64_t last_sent_handshake; |
| + struct work_struct transmit_handshake_work, clear_peer_work; |
| + struct cookie latest_cookie; |
| + struct hlist_node pubkey_hash; |
| + u64 rx_bytes, tx_bytes; |
| + struct timer_list timer_retransmit_handshake, timer_send_keepalive; |
| + struct timer_list timer_new_handshake, timer_zero_key_material; |
| + struct timer_list timer_persistent_keepalive; |
| + unsigned int timer_handshake_attempts; |
| + u16 persistent_keepalive_interval; |
| + bool timer_need_another_keepalive; |
| + bool sent_lastminute_handshake; |
| + struct timespec64 walltime_last_handshake; |
| + struct kref refcount; |
| + struct rcu_head rcu; |
| + struct list_head peer_list; |
| + struct list_head allowedips_list; |
| + u64 internal_id; |
| + struct napi_struct napi; |
| + bool is_dead; |
| +}; |
| + |
| +struct wg_peer *wg_peer_create(struct wg_device *wg, |
| + const u8 public_key[NOISE_PUBLIC_KEY_LEN], |
| + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); |
| + |
| +struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer); |
| +static inline struct wg_peer *wg_peer_get(struct wg_peer *peer) |
| +{ |
| + kref_get(&peer->refcount); |
| + return peer; |
| +} |
| +void wg_peer_put(struct wg_peer *peer); |
| +void wg_peer_remove(struct wg_peer *peer); |
| +void wg_peer_remove_all(struct wg_device *wg); |
| + |
| +#endif /* _WG_PEER_H */ |
| diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c |
| new file mode 100644 |
| index 000000000000..e4deb331476b |
| |
| |
| @@ -0,0 +1,221 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "peerlookup.h" |
| +#include "peer.h" |
| +#include "noise.h" |
| + |
| +static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, |
| + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) |
| +{ |
| + /* siphash gives us a secure 64bit number based on a random key. Since |
| + * the bits are uniformly distributed, we can then mask off to get the |
| + * bits we need. |
| + */ |
| + const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key); |
| + |
| + return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)]; |
| +} |
| + |
| +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void) |
| +{ |
| + struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); |
| + |
| + if (!table) |
| + return NULL; |
| + |
| + get_random_bytes(&table->key, sizeof(table->key)); |
| + hash_init(table->hashtable); |
| + mutex_init(&table->lock); |
| + return table; |
| +} |
| + |
| +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, |
| + struct wg_peer *peer) |
| +{ |
| + mutex_lock(&table->lock); |
| + hlist_add_head_rcu(&peer->pubkey_hash, |
| + pubkey_bucket(table, peer->handshake.remote_static)); |
| + mutex_unlock(&table->lock); |
| +} |
| + |
| +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, |
| + struct wg_peer *peer) |
| +{ |
| + mutex_lock(&table->lock); |
| + hlist_del_init_rcu(&peer->pubkey_hash); |
| + mutex_unlock(&table->lock); |
| +} |
| + |
| +/* Returns a strong reference to a peer */ |
| +struct wg_peer * |
| +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, |
| + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) |
| +{ |
| + struct wg_peer *iter_peer, *peer = NULL; |
| + |
| + rcu_read_lock_bh(); |
| + hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), |
| + pubkey_hash) { |
| + if (!memcmp(pubkey, iter_peer->handshake.remote_static, |
| + NOISE_PUBLIC_KEY_LEN)) { |
| + peer = iter_peer; |
| + break; |
| + } |
| + } |
| + peer = wg_peer_get_maybe_zero(peer); |
| + rcu_read_unlock_bh(); |
| + return peer; |
| +} |
| + |
| +static struct hlist_head *index_bucket(struct index_hashtable *table, |
| + const __le32 index) |
| +{ |
| + /* Since the indices are random and thus all bits are uniformly |
| + * distributed, we can find its bucket simply by masking. |
| + */ |
| + return &table->hashtable[(__force u32)index & |
| + (HASH_SIZE(table->hashtable) - 1)]; |
| +} |
| + |
| +struct index_hashtable *wg_index_hashtable_alloc(void) |
| +{ |
| + struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); |
| + |
| + if (!table) |
| + return NULL; |
| + |
| + hash_init(table->hashtable); |
| + spin_lock_init(&table->lock); |
| + return table; |
| +} |
| + |
| +/* At the moment, we limit ourselves to 2^20 total peers, which generally might |
| + * amount to 2^20*3 items in this hashtable. The algorithm below works by |
| + * picking a random number and testing it. We can see that these limits mean we |
| + * usually succeed pretty quickly: |
| + * |
| + * >>> def calculation(tries, size): |
| + * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32)) |
| + * ... |
| + * >>> calculation(1, 2**20 * 3) |
| + * 0.999267578125 |
| + * >>> calculation(2, 2**20 * 3) |
| + * 0.0007318854331970215 |
| + * >>> calculation(3, 2**20 * 3) |
| + * 5.360489012673497e-07 |
| + * >>> calculation(4, 2**20 * 3) |
| + * 3.9261394135792216e-10 |
| + * |
| + * At the moment, we don't do any masking, so this algorithm isn't exactly |
| + * constant time in either the random guessing or in the hash list lookup. We |
| + * could require a minimum of 3 tries, which would successfully mask the |
| + * guessing. this would not, however, help with the growing hash lengths, which |
| + * is another thing to consider moving forward. |
| + */ |
| + |
| +__le32 wg_index_hashtable_insert(struct index_hashtable *table, |
| + struct index_hashtable_entry *entry) |
| +{ |
| + struct index_hashtable_entry *existing_entry; |
| + |
| + spin_lock_bh(&table->lock); |
| + hlist_del_init_rcu(&entry->index_hash); |
| + spin_unlock_bh(&table->lock); |
| + |
| + rcu_read_lock_bh(); |
| + |
| +search_unused_slot: |
| + /* First we try to find an unused slot, randomly, while unlocked. */ |
| + entry->index = (__force __le32)get_random_u32(); |
| + hlist_for_each_entry_rcu_bh(existing_entry, |
| + index_bucket(table, entry->index), |
| + index_hash) { |
| + if (existing_entry->index == entry->index) |
| + /* If it's already in use, we continue searching. */ |
| + goto search_unused_slot; |
| + } |
| + |
| + /* Once we've found an unused slot, we lock it, and then double-check |
| + * that nobody else stole it from us. |
| + */ |
| + spin_lock_bh(&table->lock); |
| + hlist_for_each_entry_rcu_bh(existing_entry, |
| + index_bucket(table, entry->index), |
| + index_hash) { |
| + if (existing_entry->index == entry->index) { |
| + spin_unlock_bh(&table->lock); |
| + /* If it was stolen, we start over. */ |
| + goto search_unused_slot; |
| + } |
| + } |
| + /* Otherwise, we know we have it exclusively (since we're locked), |
| + * so we insert. |
| + */ |
| + hlist_add_head_rcu(&entry->index_hash, |
| + index_bucket(table, entry->index)); |
| + spin_unlock_bh(&table->lock); |
| + |
| + rcu_read_unlock_bh(); |
| + |
| + return entry->index; |
| +} |
| + |
| +bool wg_index_hashtable_replace(struct index_hashtable *table, |
| + struct index_hashtable_entry *old, |
| + struct index_hashtable_entry *new) |
| +{ |
| + if (unlikely(hlist_unhashed(&old->index_hash))) |
| + return false; |
| + spin_lock_bh(&table->lock); |
| + new->index = old->index; |
| + hlist_replace_rcu(&old->index_hash, &new->index_hash); |
| + |
| + /* Calling init here NULLs out index_hash, and in fact after this |
| + * function returns, it's theoretically possible for this to get |
| + * reinserted elsewhere. That means the RCU lookup below might either |
| + * terminate early or jump between buckets, in which case the packet |
| + * simply gets dropped, which isn't terrible. |
| + */ |
| + INIT_HLIST_NODE(&old->index_hash); |
| + spin_unlock_bh(&table->lock); |
| + return true; |
| +} |
| + |
| +void wg_index_hashtable_remove(struct index_hashtable *table, |
| + struct index_hashtable_entry *entry) |
| +{ |
| + spin_lock_bh(&table->lock); |
| + hlist_del_init_rcu(&entry->index_hash); |
| + spin_unlock_bh(&table->lock); |
| +} |
| + |
| +/* Returns a strong reference to a entry->peer */ |
| +struct index_hashtable_entry * |
| +wg_index_hashtable_lookup(struct index_hashtable *table, |
| + const enum index_hashtable_type type_mask, |
| + const __le32 index, struct wg_peer **peer) |
| +{ |
| + struct index_hashtable_entry *iter_entry, *entry = NULL; |
| + |
| + rcu_read_lock_bh(); |
| + hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), |
| + index_hash) { |
| + if (iter_entry->index == index) { |
| + if (likely(iter_entry->type & type_mask)) |
| + entry = iter_entry; |
| + break; |
| + } |
| + } |
| + if (likely(entry)) { |
| + entry->peer = wg_peer_get_maybe_zero(entry->peer); |
| + if (likely(entry->peer)) |
| + *peer = entry->peer; |
| + else |
| + entry = NULL; |
| + } |
| + rcu_read_unlock_bh(); |
| + return entry; |
| +} |
| diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h |
| new file mode 100644 |
| index 000000000000..ced811797680 |
| |
| |
| @@ -0,0 +1,64 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_PEERLOOKUP_H |
| +#define _WG_PEERLOOKUP_H |
| + |
| +#include "messages.h" |
| + |
| +#include <linux/hashtable.h> |
| +#include <linux/mutex.h> |
| +#include <linux/siphash.h> |
| + |
| +struct wg_peer; |
| + |
| +struct pubkey_hashtable { |
| + /* TODO: move to rhashtable */ |
| + DECLARE_HASHTABLE(hashtable, 11); |
| + siphash_key_t key; |
| + struct mutex lock; |
| +}; |
| + |
| +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void); |
| +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, |
| + struct wg_peer *peer); |
| +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, |
| + struct wg_peer *peer); |
| +struct wg_peer * |
| +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, |
| + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]); |
| + |
| +struct index_hashtable { |
| + /* TODO: move to rhashtable */ |
| + DECLARE_HASHTABLE(hashtable, 13); |
| + spinlock_t lock; |
| +}; |
| + |
| +enum index_hashtable_type { |
| + INDEX_HASHTABLE_HANDSHAKE = 1U << 0, |
| + INDEX_HASHTABLE_KEYPAIR = 1U << 1 |
| +}; |
| + |
| +struct index_hashtable_entry { |
| + struct wg_peer *peer; |
| + struct hlist_node index_hash; |
| + enum index_hashtable_type type; |
| + __le32 index; |
| +}; |
| + |
| +struct index_hashtable *wg_index_hashtable_alloc(void); |
| +__le32 wg_index_hashtable_insert(struct index_hashtable *table, |
| + struct index_hashtable_entry *entry); |
| +bool wg_index_hashtable_replace(struct index_hashtable *table, |
| + struct index_hashtable_entry *old, |
| + struct index_hashtable_entry *new); |
| +void wg_index_hashtable_remove(struct index_hashtable *table, |
| + struct index_hashtable_entry *entry); |
| +struct index_hashtable_entry * |
| +wg_index_hashtable_lookup(struct index_hashtable *table, |
| + const enum index_hashtable_type type_mask, |
| + const __le32 index, struct wg_peer **peer); |
| + |
| +#endif /* _WG_PEERLOOKUP_H */ |
| diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c |
| new file mode 100644 |
| index 000000000000..5c964fcb994e |
| |
| |
| @@ -0,0 +1,53 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "queueing.h" |
| + |
| +struct multicore_worker __percpu * |
| +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) |
| +{ |
| + int cpu; |
| + struct multicore_worker __percpu *worker = |
| + alloc_percpu(struct multicore_worker); |
| + |
| + if (!worker) |
| + return NULL; |
| + |
| + for_each_possible_cpu(cpu) { |
| + per_cpu_ptr(worker, cpu)->ptr = ptr; |
| + INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); |
| + } |
| + return worker; |
| +} |
| + |
| +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, |
| + bool multicore, unsigned int len) |
| +{ |
| + int ret; |
| + |
| + memset(queue, 0, sizeof(*queue)); |
| + ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); |
| + if (ret) |
| + return ret; |
| + if (function) { |
| + if (multicore) { |
| + queue->worker = wg_packet_percpu_multicore_worker_alloc( |
| + function, queue); |
| + if (!queue->worker) |
| + return -ENOMEM; |
| + } else { |
| + INIT_WORK(&queue->work, function); |
| + } |
| + } |
| + return 0; |
| +} |
| + |
| +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) |
| +{ |
| + if (multicore) |
| + free_percpu(queue->worker); |
| + WARN_ON(!__ptr_ring_empty(&queue->ring)); |
| + ptr_ring_cleanup(&queue->ring, NULL); |
| +} |
| diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h |
| new file mode 100644 |
| index 000000000000..58fdd630b246 |
| |
| |
| @@ -0,0 +1,197 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_QUEUEING_H |
| +#define _WG_QUEUEING_H |
| + |
| +#include "peer.h" |
| +#include <linux/types.h> |
| +#include <linux/skbuff.h> |
| +#include <linux/ip.h> |
| +#include <linux/ipv6.h> |
| + |
| +struct wg_device; |
| +struct wg_peer; |
| +struct multicore_worker; |
| +struct crypt_queue; |
| +struct sk_buff; |
| + |
| +/* queueing.c APIs: */ |
| +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, |
| + bool multicore, unsigned int len); |
| +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); |
| +struct multicore_worker __percpu * |
| +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); |
| + |
| +/* receive.c APIs: */ |
| +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb); |
| +void wg_packet_handshake_receive_worker(struct work_struct *work); |
| +/* NAPI poll function: */ |
| +int wg_packet_rx_poll(struct napi_struct *napi, int budget); |
| +/* Workqueue worker: */ |
| +void wg_packet_decrypt_worker(struct work_struct *work); |
| + |
| +/* send.c APIs: */ |
| +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, |
| + bool is_retry); |
| +void wg_packet_send_handshake_response(struct wg_peer *peer); |
| +void wg_packet_send_handshake_cookie(struct wg_device *wg, |
| + struct sk_buff *initiating_skb, |
| + __le32 sender_index); |
| +void wg_packet_send_keepalive(struct wg_peer *peer); |
| +void wg_packet_purge_staged_packets(struct wg_peer *peer); |
| +void wg_packet_send_staged_packets(struct wg_peer *peer); |
| +/* Workqueue workers: */ |
| +void wg_packet_handshake_send_worker(struct work_struct *work); |
| +void wg_packet_tx_worker(struct work_struct *work); |
| +void wg_packet_encrypt_worker(struct work_struct *work); |
| + |
| +enum packet_state { |
| + PACKET_STATE_UNCRYPTED, |
| + PACKET_STATE_CRYPTED, |
| + PACKET_STATE_DEAD |
| +}; |
| + |
| +struct packet_cb { |
| + u64 nonce; |
| + struct noise_keypair *keypair; |
| + atomic_t state; |
| + u32 mtu; |
| + u8 ds; |
| +}; |
| + |
| +#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) |
| +#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) |
| + |
| +/* Returns either the correct skb->protocol value, or 0 if invalid. */ |
| +static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) |
| +{ |
| + if (skb_network_header(skb) >= skb->head && |
| + (skb_network_header(skb) + sizeof(struct iphdr)) <= |
| + skb_tail_pointer(skb) && |
| + ip_hdr(skb)->version == 4) |
| + return htons(ETH_P_IP); |
| + if (skb_network_header(skb) >= skb->head && |
| + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= |
| + skb_tail_pointer(skb) && |
| + ipv6_hdr(skb)->version == 6) |
| + return htons(ETH_P_IPV6); |
| + return 0; |
| +} |
| + |
| +static inline void wg_reset_packet(struct sk_buff *skb) |
| +{ |
| + const int pfmemalloc = skb->pfmemalloc; |
| + |
| + skb_scrub_packet(skb, true); |
| + memset(&skb->headers_start, 0, |
| + offsetof(struct sk_buff, headers_end) - |
| + offsetof(struct sk_buff, headers_start)); |
| + skb->pfmemalloc = pfmemalloc; |
| + skb->queue_mapping = 0; |
| + skb->nohdr = 0; |
| + skb->peeked = 0; |
| + skb->mac_len = 0; |
| + skb->dev = NULL; |
| +#ifdef CONFIG_NET_SCHED |
| + skb->tc_index = 0; |
| +#endif |
| + skb_reset_redirect(skb); |
| + skb->hdr_len = skb_headroom(skb); |
| + skb_reset_mac_header(skb); |
| + skb_reset_network_header(skb); |
| + skb_reset_transport_header(skb); |
| + skb_probe_transport_header(skb); |
| + skb_reset_inner_headers(skb); |
| +} |
| + |
| +static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) |
| +{ |
| + unsigned int cpu = *stored_cpu, cpu_index, i; |
| + |
| + if (unlikely(cpu == nr_cpumask_bits || |
| + !cpumask_test_cpu(cpu, cpu_online_mask))) { |
| + cpu_index = id % cpumask_weight(cpu_online_mask); |
| + cpu = cpumask_first(cpu_online_mask); |
| + for (i = 0; i < cpu_index; ++i) |
| + cpu = cpumask_next(cpu, cpu_online_mask); |
| + *stored_cpu = cpu; |
| + } |
| + return cpu; |
| +} |
| + |
| +/* This function is racy, in the sense that next is unlocked, so it could return |
| + * the same CPU twice. A race-free version of this would be to instead store an |
| + * atomic sequence number, do an increment-and-return, and then iterate through |
| + * every possible CPU until we get to that index -- choose_cpu. However that's |
| + * a bit slower, and it doesn't seem like this potential race actually |
| + * introduces any performance loss, so we live with it. |
| + */ |
| +static inline int wg_cpumask_next_online(int *next) |
| +{ |
| + int cpu = *next; |
| + |
| + while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) |
| + cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
| + *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
| + return cpu; |
| +} |
| + |
| +static inline int wg_queue_enqueue_per_device_and_peer( |
| + struct crypt_queue *device_queue, struct crypt_queue *peer_queue, |
| + struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) |
| +{ |
| + int cpu; |
| + |
| + atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); |
| + /* We first queue this up for the peer ingestion, but the consumer |
| + * will wait for the state to change to CRYPTED or DEAD before. |
| + */ |
| + if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) |
| + return -ENOSPC; |
| + /* Then we queue it up in the device queue, which consumes the |
| + * packet as soon as it can. |
| + */ |
| + cpu = wg_cpumask_next_online(next_cpu); |
| + if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) |
| + return -EPIPE; |
| + queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); |
| + return 0; |
| +} |
| + |
| +static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, |
| + struct sk_buff *skb, |
| + enum packet_state state) |
| +{ |
| + /* We take a reference, because as soon as we call atomic_set, the |
| + * peer can be freed from below us. |
| + */ |
| + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); |
| + |
| + atomic_set_release(&PACKET_CB(skb)->state, state); |
| + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, |
| + peer->internal_id), |
| + peer->device->packet_crypt_wq, &queue->work); |
| + wg_peer_put(peer); |
| +} |
| + |
| +static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, |
| + enum packet_state state) |
| +{ |
| + /* We take a reference, because as soon as we call atomic_set, the |
| + * peer can be freed from below us. |
| + */ |
| + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); |
| + |
| + atomic_set_release(&PACKET_CB(skb)->state, state); |
| + napi_schedule(&peer->napi); |
| + wg_peer_put(peer); |
| +} |
| + |
| +#ifdef DEBUG |
| +bool wg_packet_counter_selftest(void); |
| +#endif |
| + |
| +#endif /* _WG_QUEUEING_H */ |
| diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c |
| new file mode 100644 |
| index 000000000000..3fedd1d21f5e |
| |
| |
| @@ -0,0 +1,223 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "ratelimiter.h" |
| +#include <linux/siphash.h> |
| +#include <linux/mm.h> |
| +#include <linux/slab.h> |
| +#include <net/ip.h> |
| + |
| +static struct kmem_cache *entry_cache; |
| +static hsiphash_key_t key; |
| +static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock"); |
| +static DEFINE_MUTEX(init_lock); |
| +static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */ |
| +static atomic_t total_entries = ATOMIC_INIT(0); |
| +static unsigned int max_entries, table_size; |
| +static void wg_ratelimiter_gc_entries(struct work_struct *); |
| +static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries); |
| +static struct hlist_head *table_v4; |
| +#if IS_ENABLED(CONFIG_IPV6) |
| +static struct hlist_head *table_v6; |
| +#endif |
| + |
| +struct ratelimiter_entry { |
| + u64 last_time_ns, tokens, ip; |
| + void *net; |
| + spinlock_t lock; |
| + struct hlist_node hash; |
| + struct rcu_head rcu; |
| +}; |
| + |
| +enum { |
| + PACKETS_PER_SECOND = 20, |
| + PACKETS_BURSTABLE = 5, |
| + PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND, |
| + TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE |
| +}; |
| + |
| +static void entry_free(struct rcu_head *rcu) |
| +{ |
| + kmem_cache_free(entry_cache, |
| + container_of(rcu, struct ratelimiter_entry, rcu)); |
| + atomic_dec(&total_entries); |
| +} |
| + |
| +static void entry_uninit(struct ratelimiter_entry *entry) |
| +{ |
| + hlist_del_rcu(&entry->hash); |
| + call_rcu(&entry->rcu, entry_free); |
| +} |
| + |
| +/* Calling this function with a NULL work uninits all entries. */ |
| +static void wg_ratelimiter_gc_entries(struct work_struct *work) |
| +{ |
| + const u64 now = ktime_get_coarse_boottime_ns(); |
| + struct ratelimiter_entry *entry; |
| + struct hlist_node *temp; |
| + unsigned int i; |
| + |
| + for (i = 0; i < table_size; ++i) { |
| + spin_lock(&table_lock); |
| + hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) { |
| + if (unlikely(!work) || |
| + now - entry->last_time_ns > NSEC_PER_SEC) |
| + entry_uninit(entry); |
| + } |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) { |
| + if (unlikely(!work) || |
| + now - entry->last_time_ns > NSEC_PER_SEC) |
| + entry_uninit(entry); |
| + } |
| +#endif |
| + spin_unlock(&table_lock); |
| + if (likely(work)) |
| + cond_resched(); |
| + } |
| + if (likely(work)) |
| + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); |
| +} |
| + |
| +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) |
| +{ |
| + /* We only take the bottom half of the net pointer, so that we can hash |
| + * 3 words in the end. This way, siphash's len param fits into the final |
| + * u32, and we don't incur an extra round. |
| + */ |
| + const u32 net_word = (unsigned long)net; |
| + struct ratelimiter_entry *entry; |
| + struct hlist_head *bucket; |
| + u64 ip; |
| + |
| + if (skb->protocol == htons(ETH_P_IP)) { |
| + ip = (u64 __force)ip_hdr(skb)->saddr; |
| + bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) & |
| + (table_size - 1)]; |
| + } |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + else if (skb->protocol == htons(ETH_P_IPV6)) { |
| + /* Only use 64 bits, so as to ratelimit the whole /64. */ |
| + memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip)); |
| + bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) & |
| + (table_size - 1)]; |
| + } |
| +#endif |
| + else |
| + return false; |
| + rcu_read_lock(); |
| + hlist_for_each_entry_rcu(entry, bucket, hash) { |
| + if (entry->net == net && entry->ip == ip) { |
| + u64 now, tokens; |
| + bool ret; |
| + /* Quasi-inspired by nft_limit.c, but this is actually a |
| + * slightly different algorithm. Namely, we incorporate |
| + * the burst as part of the maximum tokens, rather than |
| + * as part of the rate. |
| + */ |
| + spin_lock(&entry->lock); |
| + now = ktime_get_coarse_boottime_ns(); |
| + tokens = min_t(u64, TOKEN_MAX, |
| + entry->tokens + now - |
| + entry->last_time_ns); |
| + entry->last_time_ns = now; |
| + ret = tokens >= PACKET_COST; |
| + entry->tokens = ret ? tokens - PACKET_COST : tokens; |
| + spin_unlock(&entry->lock); |
| + rcu_read_unlock(); |
| + return ret; |
| + } |
| + } |
| + rcu_read_unlock(); |
| + |
| + if (atomic_inc_return(&total_entries) > max_entries) |
| + goto err_oom; |
| + |
| + entry = kmem_cache_alloc(entry_cache, GFP_KERNEL); |
| + if (unlikely(!entry)) |
| + goto err_oom; |
| + |
| + entry->net = net; |
| + entry->ip = ip; |
| + INIT_HLIST_NODE(&entry->hash); |
| + spin_lock_init(&entry->lock); |
| + entry->last_time_ns = ktime_get_coarse_boottime_ns(); |
| + entry->tokens = TOKEN_MAX - PACKET_COST; |
| + spin_lock(&table_lock); |
| + hlist_add_head_rcu(&entry->hash, bucket); |
| + spin_unlock(&table_lock); |
| + return true; |
| + |
| +err_oom: |
| + atomic_dec(&total_entries); |
| + return false; |
| +} |
| + |
| +int wg_ratelimiter_init(void) |
| +{ |
| + mutex_lock(&init_lock); |
| + if (++init_refcnt != 1) |
| + goto out; |
| + |
| + entry_cache = KMEM_CACHE(ratelimiter_entry, 0); |
| + if (!entry_cache) |
| + goto err; |
| + |
| + /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting, |
| + * but what it shares in common is that it uses a massive hashtable. So, |
| + * we borrow their wisdom about good table sizes on different systems |
| + * dependent on RAM. This calculation here comes from there. |
| + */ |
| + table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 : |
| + max_t(unsigned long, 16, roundup_pow_of_two( |
| + (totalram_pages() << PAGE_SHIFT) / |
| + (1U << 14) / sizeof(struct hlist_head))); |
| + max_entries = table_size * 8; |
| + |
| + table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); |
| + if (unlikely(!table_v4)) |
| + goto err_kmemcache; |
| + |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); |
| + if (unlikely(!table_v6)) { |
| + kvfree(table_v4); |
| + goto err_kmemcache; |
| + } |
| +#endif |
| + |
| + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); |
| + get_random_bytes(&key, sizeof(key)); |
| +out: |
| + mutex_unlock(&init_lock); |
| + return 0; |
| + |
| +err_kmemcache: |
| + kmem_cache_destroy(entry_cache); |
| +err: |
| + --init_refcnt; |
| + mutex_unlock(&init_lock); |
| + return -ENOMEM; |
| +} |
| + |
| +void wg_ratelimiter_uninit(void) |
| +{ |
| + mutex_lock(&init_lock); |
| + if (!init_refcnt || --init_refcnt) |
| + goto out; |
| + |
| + cancel_delayed_work_sync(&gc_work); |
| + wg_ratelimiter_gc_entries(NULL); |
| + rcu_barrier(); |
| + kvfree(table_v4); |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + kvfree(table_v6); |
| +#endif |
| + kmem_cache_destroy(entry_cache); |
| +out: |
| + mutex_unlock(&init_lock); |
| +} |
| + |
| +#include "selftest/ratelimiter.c" |
| diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h |
| new file mode 100644 |
| index 000000000000..83067f71ea99 |
| |
| |
| @@ -0,0 +1,19 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_RATELIMITER_H |
| +#define _WG_RATELIMITER_H |
| + |
| +#include <linux/skbuff.h> |
| + |
| +int wg_ratelimiter_init(void); |
| +void wg_ratelimiter_uninit(void); |
| +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net); |
| + |
| +#ifdef DEBUG |
| +bool wg_ratelimiter_selftest(void); |
| +#endif |
| + |
| +#endif /* _WG_RATELIMITER_H */ |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| new file mode 100644 |
| index 000000000000..7e675f541491 |
| |
| |
| @@ -0,0 +1,595 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "queueing.h" |
| +#include "device.h" |
| +#include "peer.h" |
| +#include "timers.h" |
| +#include "messages.h" |
| +#include "cookie.h" |
| +#include "socket.h" |
| + |
| +#include <linux/ip.h> |
| +#include <linux/ipv6.h> |
| +#include <linux/udp.h> |
| +#include <net/ip_tunnels.h> |
| + |
| +/* Must be called with bh disabled. */ |
| +static void update_rx_stats(struct wg_peer *peer, size_t len) |
| +{ |
| + struct pcpu_sw_netstats *tstats = |
| + get_cpu_ptr(peer->device->dev->tstats); |
| + |
| + u64_stats_update_begin(&tstats->syncp); |
| + ++tstats->rx_packets; |
| + tstats->rx_bytes += len; |
| + peer->rx_bytes += len; |
| + u64_stats_update_end(&tstats->syncp); |
| + put_cpu_ptr(tstats); |
| +} |
| + |
| +#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) |
| + |
| +static size_t validate_header_len(struct sk_buff *skb) |
| +{ |
| + if (unlikely(skb->len < sizeof(struct message_header))) |
| + return 0; |
| + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && |
| + skb->len >= MESSAGE_MINIMUM_LENGTH) |
| + return sizeof(struct message_data); |
| + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && |
| + skb->len == sizeof(struct message_handshake_initiation)) |
| + return sizeof(struct message_handshake_initiation); |
| + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && |
| + skb->len == sizeof(struct message_handshake_response)) |
| + return sizeof(struct message_handshake_response); |
| + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && |
| + skb->len == sizeof(struct message_handshake_cookie)) |
| + return sizeof(struct message_handshake_cookie); |
| + return 0; |
| +} |
| + |
| +static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) |
| +{ |
| + size_t data_offset, data_len, header_len; |
| + struct udphdr *udp; |
| + |
| + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || |
| + skb_transport_header(skb) < skb->head || |
| + (skb_transport_header(skb) + sizeof(struct udphdr)) > |
| + skb_tail_pointer(skb))) |
| + return -EINVAL; /* Bogus IP header */ |
| + udp = udp_hdr(skb); |
| + data_offset = (u8 *)udp - skb->data; |
| + if (unlikely(data_offset > U16_MAX || |
| + data_offset + sizeof(struct udphdr) > skb->len)) |
| + /* Packet has offset at impossible location or isn't big enough |
| + * to have UDP fields. |
| + */ |
| + return -EINVAL; |
| + data_len = ntohs(udp->len); |
| + if (unlikely(data_len < sizeof(struct udphdr) || |
| + data_len > skb->len - data_offset)) |
| + /* UDP packet is reporting too small of a size or lying about |
| + * its size. |
| + */ |
| + return -EINVAL; |
| + data_len -= sizeof(struct udphdr); |
| + data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; |
| + if (unlikely(!pskb_may_pull(skb, |
| + data_offset + sizeof(struct message_header)) || |
| + pskb_trim(skb, data_len + data_offset) < 0)) |
| + return -EINVAL; |
| + skb_pull(skb, data_offset); |
| + if (unlikely(skb->len != data_len)) |
| + /* Final len does not agree with calculated len */ |
| + return -EINVAL; |
| + header_len = validate_header_len(skb); |
| + if (unlikely(!header_len)) |
| + return -EINVAL; |
| + __skb_push(skb, data_offset); |
| + if (unlikely(!pskb_may_pull(skb, data_offset + header_len))) |
| + return -EINVAL; |
| + __skb_pull(skb, data_offset); |
| + return 0; |
| +} |
| + |
| +static void wg_receive_handshake_packet(struct wg_device *wg, |
| + struct sk_buff *skb) |
| +{ |
| + enum cookie_mac_state mac_state; |
| + struct wg_peer *peer = NULL; |
| + /* This is global, so that our load calculation applies to the whole |
| + * system. We don't care about races with it at all. |
| + */ |
| + static u64 last_under_load; |
| + bool packet_needs_cookie; |
| + bool under_load; |
| + |
| + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) { |
| + net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", |
| + wg->dev->name, skb); |
| + wg_cookie_message_consume( |
| + (struct message_handshake_cookie *)skb->data, wg); |
| + return; |
| + } |
| + |
| + under_load = skb_queue_len(&wg->incoming_handshakes) >= |
| + MAX_QUEUED_INCOMING_HANDSHAKES / 8; |
| + if (under_load) |
| + last_under_load = ktime_get_coarse_boottime_ns(); |
| + else if (last_under_load) |
| + under_load = !wg_birthdate_has_expired(last_under_load, 1); |
| + mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, |
| + under_load); |
| + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || |
| + (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) { |
| + packet_needs_cookie = false; |
| + } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) { |
| + packet_needs_cookie = true; |
| + } else { |
| + net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", |
| + wg->dev->name, skb); |
| + return; |
| + } |
| + |
| + switch (SKB_TYPE_LE32(skb)) { |
| + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): { |
| + struct message_handshake_initiation *message = |
| + (struct message_handshake_initiation *)skb->data; |
| + |
| + if (packet_needs_cookie) { |
| + wg_packet_send_handshake_cookie(wg, skb, |
| + message->sender_index); |
| + return; |
| + } |
| + peer = wg_noise_handshake_consume_initiation(message, wg); |
| + if (unlikely(!peer)) { |
| + net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", |
| + wg->dev->name, skb); |
| + return; |
| + } |
| + wg_socket_set_peer_endpoint_from_skb(peer, skb); |
| + net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", |
| + wg->dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + wg_packet_send_handshake_response(peer); |
| + break; |
| + } |
| + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): { |
| + struct message_handshake_response *message = |
| + (struct message_handshake_response *)skb->data; |
| + |
| + if (packet_needs_cookie) { |
| + wg_packet_send_handshake_cookie(wg, skb, |
| + message->sender_index); |
| + return; |
| + } |
| + peer = wg_noise_handshake_consume_response(message, wg); |
| + if (unlikely(!peer)) { |
| + net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", |
| + wg->dev->name, skb); |
| + return; |
| + } |
| + wg_socket_set_peer_endpoint_from_skb(peer, skb); |
| + net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", |
| + wg->dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + if (wg_noise_handshake_begin_session(&peer->handshake, |
| + &peer->keypairs)) { |
| + wg_timers_session_derived(peer); |
| + wg_timers_handshake_complete(peer); |
| + /* Calling this function will either send any existing |
| + * packets in the queue and not send a keepalive, which |
| + * is the best case, Or, if there's nothing in the |
| + * queue, it will send a keepalive, in order to give |
| + * immediate confirmation of the session. |
| + */ |
| + wg_packet_send_keepalive(peer); |
| + } |
| + break; |
| + } |
| + } |
| + |
| + if (unlikely(!peer)) { |
| + WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n"); |
| + return; |
| + } |
| + |
| + local_bh_disable(); |
| + update_rx_stats(peer, skb->len); |
| + local_bh_enable(); |
| + |
| + wg_timers_any_authenticated_packet_received(peer); |
| + wg_timers_any_authenticated_packet_traversal(peer); |
| + wg_peer_put(peer); |
| +} |
| + |
| +void wg_packet_handshake_receive_worker(struct work_struct *work) |
| +{ |
| + struct wg_device *wg = container_of(work, struct multicore_worker, |
| + work)->ptr; |
| + struct sk_buff *skb; |
| + |
| + while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { |
| + wg_receive_handshake_packet(wg, skb); |
| + dev_kfree_skb(skb); |
| + cond_resched(); |
| + } |
| +} |
| + |
| +static void keep_key_fresh(struct wg_peer *peer) |
| +{ |
| + struct noise_keypair *keypair; |
| + bool send = false; |
| + |
| + if (peer->sent_lastminute_handshake) |
| + return; |
| + |
| + rcu_read_lock_bh(); |
| + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
| + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && |
| + keypair->i_am_the_initiator && |
| + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
| + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) |
| + send = true; |
| + rcu_read_unlock_bh(); |
| + |
| + if (send) { |
| + peer->sent_lastminute_handshake = true; |
| + wg_packet_send_queued_handshake_initiation(peer, false); |
| + } |
| +} |
| + |
| +static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) |
| +{ |
| + struct scatterlist sg[MAX_SKB_FRAGS + 8]; |
| + struct sk_buff *trailer; |
| + unsigned int offset; |
| + int num_frags; |
| + |
| + if (unlikely(!key)) |
| + return false; |
| + |
| + if (unlikely(!READ_ONCE(key->is_valid) || |
| + wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || |
| + key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { |
| + WRITE_ONCE(key->is_valid, false); |
| + return false; |
| + } |
| + |
| + PACKET_CB(skb)->nonce = |
| + le64_to_cpu(((struct message_data *)skb->data)->counter); |
| + |
| + /* We ensure that the network header is part of the packet before we |
| + * call skb_cow_data, so that there's no chance that data is removed |
| + * from the skb, so that later we can extract the original endpoint. |
| + */ |
| + offset = skb->data - skb_network_header(skb); |
| + skb_push(skb, offset); |
| + num_frags = skb_cow_data(skb, 0, &trailer); |
| + offset += sizeof(struct message_data); |
| + skb_pull(skb, offset); |
| + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) |
| + return false; |
| + |
| + sg_init_table(sg, num_frags); |
| + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) |
| + return false; |
| + |
| + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, |
| + PACKET_CB(skb)->nonce, |
| + key->key)) |
| + return false; |
| + |
| + /* Another ugly situation of pushing and pulling the header so as to |
| + * keep endpoint information intact. |
| + */ |
| + skb_push(skb, offset); |
| + if (pskb_trim(skb, skb->len - noise_encrypted_len(0))) |
| + return false; |
| + skb_pull(skb, offset); |
| + |
| + return true; |
| +} |
| + |
| +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ |
| +static bool counter_validate(union noise_counter *counter, u64 their_counter) |
| +{ |
| + unsigned long index, index_current, top, i; |
| + bool ret = false; |
| + |
| + spin_lock_bh(&counter->receive.lock); |
| + |
| + if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || |
| + their_counter >= REJECT_AFTER_MESSAGES)) |
| + goto out; |
| + |
| + ++their_counter; |
| + |
| + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < |
| + counter->receive.counter)) |
| + goto out; |
| + |
| + index = their_counter >> ilog2(BITS_PER_LONG); |
| + |
| + if (likely(their_counter > counter->receive.counter)) { |
| + index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); |
| + top = min_t(unsigned long, index - index_current, |
| + COUNTER_BITS_TOTAL / BITS_PER_LONG); |
| + for (i = 1; i <= top; ++i) |
| + counter->receive.backtrack[(i + index_current) & |
| + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; |
| + counter->receive.counter = their_counter; |
| + } |
| + |
| + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; |
| + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), |
| + &counter->receive.backtrack[index]); |
| + |
| +out: |
| + spin_unlock_bh(&counter->receive.lock); |
| + return ret; |
| +} |
| + |
| +#include "selftest/counter.c" |
| + |
| +static void wg_packet_consume_data_done(struct wg_peer *peer, |
| + struct sk_buff *skb, |
| + struct endpoint *endpoint) |
| +{ |
| + struct net_device *dev = peer->device->dev; |
| + unsigned int len, len_before_trim; |
| + struct wg_peer *routed_peer; |
| + |
| + wg_socket_set_peer_endpoint(peer, endpoint); |
| + |
| + if (unlikely(wg_noise_received_with_keypair(&peer->keypairs, |
| + PACKET_CB(skb)->keypair))) { |
| + wg_timers_handshake_complete(peer); |
| + wg_packet_send_staged_packets(peer); |
| + } |
| + |
| + keep_key_fresh(peer); |
| + |
| + wg_timers_any_authenticated_packet_received(peer); |
| + wg_timers_any_authenticated_packet_traversal(peer); |
| + |
| + /* A packet with length 0 is a keepalive packet */ |
| + if (unlikely(!skb->len)) { |
| + update_rx_stats(peer, message_data_len(0)); |
| + net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", |
| + dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + goto packet_processed; |
| + } |
| + |
| + wg_timers_data_received(peer); |
| + |
| + if (unlikely(skb_network_header(skb) < skb->head)) |
| + goto dishonest_packet_size; |
| + if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && |
| + (ip_hdr(skb)->version == 4 || |
| + (ip_hdr(skb)->version == 6 && |
| + pskb_network_may_pull(skb, sizeof(struct ipv6hdr))))))) |
| + goto dishonest_packet_type; |
| + |
| + skb->dev = dev; |
| + /* We've already verified the Poly1305 auth tag, which means this packet |
| + * was not modified in transit. We can therefore tell the networking |
| + * stack that all checksums of every layer of encapsulation have already |
| + * been checked "by the hardware" and therefore is unneccessary to check |
| + * again in software. |
| + */ |
| + skb->ip_summed = CHECKSUM_UNNECESSARY; |
| + skb->csum_level = ~0; /* All levels */ |
| + skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); |
| + if (skb->protocol == htons(ETH_P_IP)) { |
| + len = ntohs(ip_hdr(skb)->tot_len); |
| + if (unlikely(len < sizeof(struct iphdr))) |
| + goto dishonest_packet_size; |
| + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) |
| + IP_ECN_set_ce(ip_hdr(skb)); |
| + } else if (skb->protocol == htons(ETH_P_IPV6)) { |
| + len = ntohs(ipv6_hdr(skb)->payload_len) + |
| + sizeof(struct ipv6hdr); |
| + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) |
| + IP6_ECN_set_ce(skb, ipv6_hdr(skb)); |
| + } else { |
| + goto dishonest_packet_type; |
| + } |
| + |
| + if (unlikely(len > skb->len)) |
| + goto dishonest_packet_size; |
| + len_before_trim = skb->len; |
| + if (unlikely(pskb_trim(skb, len))) |
| + goto packet_processed; |
| + |
| + routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips, |
| + skb); |
| + wg_peer_put(routed_peer); /* We don't need the extra reference. */ |
| + |
| + if (unlikely(routed_peer != peer)) |
| + goto dishonest_packet_peer; |
| + |
| + if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { |
| + ++dev->stats.rx_dropped; |
| + net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", |
| + dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + } else { |
| + update_rx_stats(peer, message_data_len(len_before_trim)); |
| + } |
| + return; |
| + |
| +dishonest_packet_peer: |
| + net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", |
| + dev->name, skb, peer->internal_id, |
| + &peer->endpoint.addr); |
| + ++dev->stats.rx_errors; |
| + ++dev->stats.rx_frame_errors; |
| + goto packet_processed; |
| +dishonest_packet_type: |
| + net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", |
| + dev->name, peer->internal_id, &peer->endpoint.addr); |
| + ++dev->stats.rx_errors; |
| + ++dev->stats.rx_frame_errors; |
| + goto packet_processed; |
| +dishonest_packet_size: |
| + net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", |
| + dev->name, peer->internal_id, &peer->endpoint.addr); |
| + ++dev->stats.rx_errors; |
| + ++dev->stats.rx_length_errors; |
| + goto packet_processed; |
| +packet_processed: |
| + dev_kfree_skb(skb); |
| +} |
| + |
| +int wg_packet_rx_poll(struct napi_struct *napi, int budget) |
| +{ |
| + struct wg_peer *peer = container_of(napi, struct wg_peer, napi); |
| + struct crypt_queue *queue = &peer->rx_queue; |
| + struct noise_keypair *keypair; |
| + struct endpoint endpoint; |
| + enum packet_state state; |
| + struct sk_buff *skb; |
| + int work_done = 0; |
| + bool free; |
| + |
| + if (unlikely(budget <= 0)) |
| + return 0; |
| + |
| + while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && |
| + (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != |
| + PACKET_STATE_UNCRYPTED) { |
| + __ptr_ring_discard_one(&queue->ring); |
| + peer = PACKET_PEER(skb); |
| + keypair = PACKET_CB(skb)->keypair; |
| + free = true; |
| + |
| + if (unlikely(state != PACKET_STATE_CRYPTED)) |
| + goto next; |
| + |
| + if (unlikely(!counter_validate(&keypair->receiving.counter, |
| + PACKET_CB(skb)->nonce))) { |
| + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", |
| + peer->device->dev->name, |
| + PACKET_CB(skb)->nonce, |
| + keypair->receiving.counter.receive.counter); |
| + goto next; |
| + } |
| + |
| + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) |
| + goto next; |
| + |
| + wg_reset_packet(skb); |
| + wg_packet_consume_data_done(peer, skb, &endpoint); |
| + free = false; |
| + |
| +next: |
| + wg_noise_keypair_put(keypair, false); |
| + wg_peer_put(peer); |
| + if (unlikely(free)) |
| + dev_kfree_skb(skb); |
| + |
| + if (++work_done >= budget) |
| + break; |
| + } |
| + |
| + if (work_done < budget) |
| + napi_complete_done(napi, work_done); |
| + |
| + return work_done; |
| +} |
| + |
| +void wg_packet_decrypt_worker(struct work_struct *work) |
| +{ |
| + struct crypt_queue *queue = container_of(work, struct multicore_worker, |
| + work)->ptr; |
| + struct sk_buff *skb; |
| + |
| + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { |
| + enum packet_state state = likely(decrypt_packet(skb, |
| + &PACKET_CB(skb)->keypair->receiving)) ? |
| + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; |
| + wg_queue_enqueue_per_peer_napi(skb, state); |
| + } |
| +} |
| + |
| +static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) |
| +{ |
| + __le32 idx = ((struct message_data *)skb->data)->key_idx; |
| + struct wg_peer *peer = NULL; |
| + int ret; |
| + |
| + rcu_read_lock_bh(); |
| + PACKET_CB(skb)->keypair = |
| + (struct noise_keypair *)wg_index_hashtable_lookup( |
| + wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx, |
| + &peer); |
| + if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair))) |
| + goto err_keypair; |
| + |
| + if (unlikely(READ_ONCE(peer->is_dead))) |
| + goto err; |
| + |
| + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, |
| + &peer->rx_queue, skb, |
| + wg->packet_crypt_wq, |
| + &wg->decrypt_queue.last_cpu); |
| + if (unlikely(ret == -EPIPE)) |
| + wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); |
| + if (likely(!ret || ret == -EPIPE)) { |
| + rcu_read_unlock_bh(); |
| + return; |
| + } |
| +err: |
| + wg_noise_keypair_put(PACKET_CB(skb)->keypair, false); |
| +err_keypair: |
| + rcu_read_unlock_bh(); |
| + wg_peer_put(peer); |
| + dev_kfree_skb(skb); |
| +} |
| + |
| +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) |
| +{ |
| + if (unlikely(prepare_skb_header(skb, wg) < 0)) |
| + goto err; |
| + switch (SKB_TYPE_LE32(skb)) { |
| + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): |
| + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): |
| + case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { |
| + int cpu; |
| + |
| + if (skb_queue_len(&wg->incoming_handshakes) > |
| + MAX_QUEUED_INCOMING_HANDSHAKES || |
| + unlikely(!rng_is_initialized())) { |
| + net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", |
| + wg->dev->name, skb); |
| + goto err; |
| + } |
| + skb_queue_tail(&wg->incoming_handshakes, skb); |
| + /* Queues up a call to packet_process_queued_handshake_ |
| + * packets(skb): |
| + */ |
| + cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); |
| + queue_work_on(cpu, wg->handshake_receive_wq, |
| + &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); |
| + break; |
| + } |
| + case cpu_to_le32(MESSAGE_DATA): |
| + PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); |
| + wg_packet_consume_data(wg, skb); |
| + break; |
| + default: |
| + net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", |
| + wg->dev->name, skb); |
| + goto err; |
| + } |
| + return; |
| + |
| +err: |
| + dev_kfree_skb(skb); |
| +} |
| diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c |
| new file mode 100644 |
| index 000000000000..846db14cb046 |
| |
| |
| @@ -0,0 +1,683 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * This contains some basic static unit tests for the allowedips data structure. |
| + * It also has two additional modes that are disabled and meant to be used by |
| + * folks directly playing with this file. If you define the macro |
| + * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in |
| + * memory, it will be printed out as KERN_DEBUG in a format that can be passed |
| + * to graphviz (the dot command) to visualize it. If you define the macro |
| + * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of |
| + * randomized tests done against a trivial implementation, which may take |
| + * upwards of a half-hour to complete. There's no set of users who should be |
| + * enabling these, and the only developers that should go anywhere near these |
| + * nobs are the ones who are reading this comment. |
| + */ |
| + |
| +#ifdef DEBUG |
| + |
| +#include <linux/siphash.h> |
| + |
| +static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, |
| + u8 cidr) |
| +{ |
| + swap_endian(dst, src, bits); |
| + memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); |
| + if (cidr) |
| + dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); |
| +} |
| + |
| +static __init void print_node(struct allowedips_node *node, u8 bits) |
| +{ |
| + char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; |
| + char *fmt_declaration = KERN_DEBUG |
| + "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; |
| + char *style = "dotted"; |
| + u8 ip1[16], ip2[16]; |
| + u32 color = 0; |
| + |
| + if (bits == 32) { |
| + fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; |
| + fmt_declaration = KERN_DEBUG |
| + "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; |
| + } else if (bits == 128) { |
| + fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; |
| + fmt_declaration = KERN_DEBUG |
| + "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; |
| + } |
| + if (node->peer) { |
| + hsiphash_key_t key = { { 0 } }; |
| + |
| + memcpy(&key, &node->peer, sizeof(node->peer)); |
| + color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | |
| + hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | |
| + hsiphash_1u32(0xabad1dea, &key) % 200; |
| + style = "bold"; |
| + } |
| + swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); |
| + printk(fmt_declaration, ip1, node->cidr, style, color); |
| + if (node->bit[0]) { |
| + swap_endian_and_apply_cidr(ip2, |
| + rcu_dereference_raw(node->bit[0])->bits, bits, |
| + node->cidr); |
| + printk(fmt_connection, ip1, node->cidr, ip2, |
| + rcu_dereference_raw(node->bit[0])->cidr); |
| + print_node(rcu_dereference_raw(node->bit[0]), bits); |
| + } |
| + if (node->bit[1]) { |
| + swap_endian_and_apply_cidr(ip2, |
| + rcu_dereference_raw(node->bit[1])->bits, |
| + bits, node->cidr); |
| + printk(fmt_connection, ip1, node->cidr, ip2, |
| + rcu_dereference_raw(node->bit[1])->cidr); |
| + print_node(rcu_dereference_raw(node->bit[1]), bits); |
| + } |
| +} |
| + |
| +static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) |
| +{ |
| + printk(KERN_DEBUG "digraph trie {\n"); |
| + print_node(rcu_dereference_raw(top), bits); |
| + printk(KERN_DEBUG "}\n"); |
| +} |
| + |
| +enum { |
| + NUM_PEERS = 2000, |
| + NUM_RAND_ROUTES = 400, |
| + NUM_MUTATED_ROUTES = 100, |
| + NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30 |
| +}; |
| + |
| +struct horrible_allowedips { |
| + struct hlist_head head; |
| +}; |
| + |
| +struct horrible_allowedips_node { |
| + struct hlist_node table; |
| + union nf_inet_addr ip; |
| + union nf_inet_addr mask; |
| + u8 ip_version; |
| + void *value; |
| +}; |
| + |
| +static __init void horrible_allowedips_init(struct horrible_allowedips *table) |
| +{ |
| + INIT_HLIST_HEAD(&table->head); |
| +} |
| + |
| +static __init void horrible_allowedips_free(struct horrible_allowedips *table) |
| +{ |
| + struct horrible_allowedips_node *node; |
| + struct hlist_node *h; |
| + |
| + hlist_for_each_entry_safe(node, h, &table->head, table) { |
| + hlist_del(&node->table); |
| + kfree(node); |
| + } |
| +} |
| + |
| +static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) |
| +{ |
| + union nf_inet_addr mask; |
| + |
| + memset(&mask, 0x00, 128 / 8); |
| + memset(&mask, 0xff, cidr / 8); |
| + if (cidr % 32) |
| + mask.all[cidr / 32] = (__force u32)htonl( |
| + (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); |
| + return mask; |
| +} |
| + |
| +static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet) |
| +{ |
| + return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) + |
| + hweight32(subnet.all[2]) + hweight32(subnet.all[3]); |
| +} |
| + |
| +static __init inline void |
| +horrible_mask_self(struct horrible_allowedips_node *node) |
| +{ |
| + if (node->ip_version == 4) { |
| + node->ip.ip &= node->mask.ip; |
| + } else if (node->ip_version == 6) { |
| + node->ip.ip6[0] &= node->mask.ip6[0]; |
| + node->ip.ip6[1] &= node->mask.ip6[1]; |
| + node->ip.ip6[2] &= node->mask.ip6[2]; |
| + node->ip.ip6[3] &= node->mask.ip6[3]; |
| + } |
| +} |
| + |
| +static __init inline bool |
| +horrible_match_v4(const struct horrible_allowedips_node *node, |
| + struct in_addr *ip) |
| +{ |
| + return (ip->s_addr & node->mask.ip) == node->ip.ip; |
| +} |
| + |
| +static __init inline bool |
| +horrible_match_v6(const struct horrible_allowedips_node *node, |
| + struct in6_addr *ip) |
| +{ |
| + return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == |
| + node->ip.ip6[0] && |
| + (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == |
| + node->ip.ip6[1] && |
| + (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == |
| + node->ip.ip6[2] && |
| + (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; |
| +} |
| + |
| +static __init void |
| +horrible_insert_ordered(struct horrible_allowedips *table, |
| + struct horrible_allowedips_node *node) |
| +{ |
| + struct horrible_allowedips_node *other = NULL, *where = NULL; |
| + u8 my_cidr = horrible_mask_to_cidr(node->mask); |
| + |
| + hlist_for_each_entry(other, &table->head, table) { |
| + if (!memcmp(&other->mask, &node->mask, |
| + sizeof(union nf_inet_addr)) && |
| + !memcmp(&other->ip, &node->ip, |
| + sizeof(union nf_inet_addr)) && |
| + other->ip_version == node->ip_version) { |
| + other->value = node->value; |
| + kfree(node); |
| + return; |
| + } |
| + where = other; |
| + if (horrible_mask_to_cidr(other->mask) <= my_cidr) |
| + break; |
| + } |
| + if (!other && !where) |
| + hlist_add_head(&node->table, &table->head); |
| + else if (!other) |
| + hlist_add_behind(&node->table, &where->table); |
| + else |
| + hlist_add_before(&node->table, &where->table); |
| +} |
| + |
| +static __init int |
| +horrible_allowedips_insert_v4(struct horrible_allowedips *table, |
| + struct in_addr *ip, u8 cidr, void *value) |
| +{ |
| + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), |
| + GFP_KERNEL); |
| + |
| + if (unlikely(!node)) |
| + return -ENOMEM; |
| + node->ip.in = *ip; |
| + node->mask = horrible_cidr_to_mask(cidr); |
| + node->ip_version = 4; |
| + node->value = value; |
| + horrible_mask_self(node); |
| + horrible_insert_ordered(table, node); |
| + return 0; |
| +} |
| + |
| +static __init int |
| +horrible_allowedips_insert_v6(struct horrible_allowedips *table, |
| + struct in6_addr *ip, u8 cidr, void *value) |
| +{ |
| + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), |
| + GFP_KERNEL); |
| + |
| + if (unlikely(!node)) |
| + return -ENOMEM; |
| + node->ip.in6 = *ip; |
| + node->mask = horrible_cidr_to_mask(cidr); |
| + node->ip_version = 6; |
| + node->value = value; |
| + horrible_mask_self(node); |
| + horrible_insert_ordered(table, node); |
| + return 0; |
| +} |
| + |
| +static __init void * |
| +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, |
| + struct in_addr *ip) |
| +{ |
| + struct horrible_allowedips_node *node; |
| + void *ret = NULL; |
| + |
| + hlist_for_each_entry(node, &table->head, table) { |
| + if (node->ip_version != 4) |
| + continue; |
| + if (horrible_match_v4(node, ip)) { |
| + ret = node->value; |
| + break; |
| + } |
| + } |
| + return ret; |
| +} |
| + |
| +static __init void * |
| +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, |
| + struct in6_addr *ip) |
| +{ |
| + struct horrible_allowedips_node *node; |
| + void *ret = NULL; |
| + |
| + hlist_for_each_entry(node, &table->head, table) { |
| + if (node->ip_version != 6) |
| + continue; |
| + if (horrible_match_v6(node, ip)) { |
| + ret = node->value; |
| + break; |
| + } |
| + } |
| + return ret; |
| +} |
| + |
| +static __init bool randomized_test(void) |
| +{ |
| + unsigned int i, j, k, mutate_amount, cidr; |
| + u8 ip[16], mutate_mask[16], mutated[16]; |
| + struct wg_peer **peers, *peer; |
| + struct horrible_allowedips h; |
| + DEFINE_MUTEX(mutex); |
| + struct allowedips t; |
| + bool ret = false; |
| + |
| + mutex_init(&mutex); |
| + |
| + wg_allowedips_init(&t); |
| + horrible_allowedips_init(&h); |
| + |
| + peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL); |
| + if (unlikely(!peers)) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free; |
| + } |
| + for (i = 0; i < NUM_PEERS; ++i) { |
| + peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL); |
| + if (unlikely(!peers[i])) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free; |
| + } |
| + kref_init(&peers[i]->refcount); |
| + } |
| + |
| + mutex_lock(&mutex); |
| + |
| + for (i = 0; i < NUM_RAND_ROUTES; ++i) { |
| + prandom_bytes(ip, 4); |
| + cidr = prandom_u32_max(32) + 1; |
| + peer = peers[prandom_u32_max(NUM_PEERS)]; |
| + if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, |
| + peer, &mutex) < 0) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, |
| + cidr, peer) < 0) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { |
| + memcpy(mutated, ip, 4); |
| + prandom_bytes(mutate_mask, 4); |
| + mutate_amount = prandom_u32_max(32); |
| + for (k = 0; k < mutate_amount / 8; ++k) |
| + mutate_mask[k] = 0xff; |
| + mutate_mask[k] = 0xff |
| + << ((8 - (mutate_amount % 8)) % 8); |
| + for (; k < 4; ++k) |
| + mutate_mask[k] = 0; |
| + for (k = 0; k < 4; ++k) |
| + mutated[k] = (mutated[k] & mutate_mask[k]) | |
| + (~mutate_mask[k] & |
| + prandom_u32_max(256)); |
| + cidr = prandom_u32_max(32) + 1; |
| + peer = peers[prandom_u32_max(NUM_PEERS)]; |
| + if (wg_allowedips_insert_v4(&t, |
| + (struct in_addr *)mutated, |
| + cidr, peer, &mutex) < 0) { |
| + pr_err("allowedips random malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + if (horrible_allowedips_insert_v4(&h, |
| + (struct in_addr *)mutated, cidr, peer)) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + } |
| + } |
| + |
| + for (i = 0; i < NUM_RAND_ROUTES; ++i) { |
| + prandom_bytes(ip, 16); |
| + cidr = prandom_u32_max(128) + 1; |
| + peer = peers[prandom_u32_max(NUM_PEERS)]; |
| + if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, |
| + peer, &mutex) < 0) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, |
| + cidr, peer) < 0) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { |
| + memcpy(mutated, ip, 16); |
| + prandom_bytes(mutate_mask, 16); |
| + mutate_amount = prandom_u32_max(128); |
| + for (k = 0; k < mutate_amount / 8; ++k) |
| + mutate_mask[k] = 0xff; |
| + mutate_mask[k] = 0xff |
| + << ((8 - (mutate_amount % 8)) % 8); |
| + for (; k < 4; ++k) |
| + mutate_mask[k] = 0; |
| + for (k = 0; k < 4; ++k) |
| + mutated[k] = (mutated[k] & mutate_mask[k]) | |
| + (~mutate_mask[k] & |
| + prandom_u32_max(256)); |
| + cidr = prandom_u32_max(128) + 1; |
| + peer = peers[prandom_u32_max(NUM_PEERS)]; |
| + if (wg_allowedips_insert_v6(&t, |
| + (struct in6_addr *)mutated, |
| + cidr, peer, &mutex) < 0) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + if (horrible_allowedips_insert_v6( |
| + &h, (struct in6_addr *)mutated, cidr, |
| + peer)) { |
| + pr_err("allowedips random self-test malloc: FAIL\n"); |
| + goto free_locked; |
| + } |
| + } |
| + } |
| + |
| + mutex_unlock(&mutex); |
| + |
| + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { |
| + print_tree(t.root4, 32); |
| + print_tree(t.root6, 128); |
| + } |
| + |
| + for (i = 0; i < NUM_QUERIES; ++i) { |
| + prandom_bytes(ip, 4); |
| + if (lookup(t.root4, 32, ip) != |
| + horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { |
| + pr_err("allowedips random self-test: FAIL\n"); |
| + goto free; |
| + } |
| + } |
| + |
| + for (i = 0; i < NUM_QUERIES; ++i) { |
| + prandom_bytes(ip, 16); |
| + if (lookup(t.root6, 128, ip) != |
| + horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { |
| + pr_err("allowedips random self-test: FAIL\n"); |
| + goto free; |
| + } |
| + } |
| + ret = true; |
| + |
| +free: |
| + mutex_lock(&mutex); |
| +free_locked: |
| + wg_allowedips_free(&t, &mutex); |
| + mutex_unlock(&mutex); |
| + horrible_allowedips_free(&h); |
| + if (peers) { |
| + for (i = 0; i < NUM_PEERS; ++i) |
| + kfree(peers[i]); |
| + } |
| + kfree(peers); |
| + return ret; |
| +} |
| + |
| +static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d) |
| +{ |
| + static struct in_addr ip; |
| + u8 *split = (u8 *)&ip; |
| + |
| + split[0] = a; |
| + split[1] = b; |
| + split[2] = c; |
| + split[3] = d; |
| + return &ip; |
| +} |
| + |
| +static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d) |
| +{ |
| + static struct in6_addr ip; |
| + __be32 *split = (__be32 *)&ip; |
| + |
| + split[0] = cpu_to_be32(a); |
| + split[1] = cpu_to_be32(b); |
| + split[2] = cpu_to_be32(c); |
| + split[3] = cpu_to_be32(d); |
| + return &ip; |
| +} |
| + |
| +static __init struct wg_peer *init_peer(void) |
| +{ |
| + struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL); |
| + |
| + if (!peer) |
| + return NULL; |
| + kref_init(&peer->refcount); |
| + INIT_LIST_HEAD(&peer->allowedips_list); |
| + return peer; |
| +} |
| + |
| +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \ |
| + wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ |
| + cidr, mem, &mutex) |
| + |
| +#define maybe_fail() do { \ |
| + ++i; \ |
| + if (!_s) { \ |
| + pr_info("allowedips self-test %zu: FAIL\n", i); \ |
| + success = false; \ |
| + } \ |
| + } while (0) |
| + |
| +#define test(version, mem, ipa, ipb, ipc, ipd) do { \ |
| + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ |
| + ip##version(ipa, ipb, ipc, ipd)) == (mem); \ |
| + maybe_fail(); \ |
| + } while (0) |
| + |
| +#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \ |
| + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ |
| + ip##version(ipa, ipb, ipc, ipd)) != (mem); \ |
| + maybe_fail(); \ |
| + } while (0) |
| + |
| +#define test_boolean(cond) do { \ |
| + bool _s = (cond); \ |
| + maybe_fail(); \ |
| + } while (0) |
| + |
| +bool __init wg_allowedips_selftest(void) |
| +{ |
| + bool found_a = false, found_b = false, found_c = false, found_d = false, |
| + found_e = false, found_other = false; |
| + struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(), |
| + *d = init_peer(), *e = init_peer(), *f = init_peer(), |
| + *g = init_peer(), *h = init_peer(); |
| + struct allowedips_node *iter_node; |
| + bool success = false; |
| + struct allowedips t; |
| + DEFINE_MUTEX(mutex); |
| + struct in6_addr ip; |
| + size_t i = 0, count = 0; |
| + __be64 part; |
| + |
| + mutex_init(&mutex); |
| + mutex_lock(&mutex); |
| + wg_allowedips_init(&t); |
| + |
| + if (!a || !b || !c || !d || !e || !f || !g || !h) { |
| + pr_err("allowedips self-test malloc: FAIL\n"); |
| + goto free; |
| + } |
| + |
| + insert(4, a, 192, 168, 4, 0, 24); |
| + insert(4, b, 192, 168, 4, 4, 32); |
| + insert(4, c, 192, 168, 0, 0, 16); |
| + insert(4, d, 192, 95, 5, 64, 27); |
| + /* replaces previous entry, and maskself is required */ |
| + insert(4, c, 192, 95, 5, 65, 27); |
| + insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); |
| + insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); |
| + insert(4, e, 0, 0, 0, 0, 0); |
| + insert(6, e, 0, 0, 0, 0, 0); |
| + /* replaces previous entry */ |
| + insert(6, f, 0, 0, 0, 0, 0); |
| + insert(6, g, 0x24046800, 0, 0, 0, 32); |
| + /* maskself is required */ |
| + insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); |
| + insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); |
| + insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); |
| + insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); |
| + insert(4, g, 64, 15, 112, 0, 20); |
| + /* maskself is required */ |
| + insert(4, h, 64, 15, 123, 211, 25); |
| + insert(4, a, 10, 0, 0, 0, 25); |
| + insert(4, b, 10, 0, 0, 128, 25); |
| + insert(4, a, 10, 1, 0, 0, 30); |
| + insert(4, b, 10, 1, 0, 4, 30); |
| + insert(4, c, 10, 1, 0, 8, 29); |
| + insert(4, d, 10, 1, 0, 16, 29); |
| + |
| + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { |
| + print_tree(t.root4, 32); |
| + print_tree(t.root6, 128); |
| + } |
| + |
| + success = true; |
| + |
| + test(4, a, 192, 168, 4, 20); |
| + test(4, a, 192, 168, 4, 0); |
| + test(4, b, 192, 168, 4, 4); |
| + test(4, c, 192, 168, 200, 182); |
| + test(4, c, 192, 95, 5, 68); |
| + test(4, e, 192, 95, 5, 96); |
| + test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); |
| + test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); |
| + test(6, f, 0x26075300, 0x60006b01, 0, 0); |
| + test(6, g, 0x24046800, 0x40040806, 0, 0x1006); |
| + test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); |
| + test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); |
| + test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); |
| + test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); |
| + test(6, h, 0x24046800, 0x40040800, 0, 0); |
| + test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); |
| + test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); |
| + test(4, g, 64, 15, 116, 26); |
| + test(4, g, 64, 15, 127, 3); |
| + test(4, g, 64, 15, 123, 1); |
| + test(4, h, 64, 15, 123, 128); |
| + test(4, h, 64, 15, 123, 129); |
| + test(4, a, 10, 0, 0, 52); |
| + test(4, b, 10, 0, 0, 220); |
| + test(4, a, 10, 1, 0, 2); |
| + test(4, b, 10, 1, 0, 6); |
| + test(4, c, 10, 1, 0, 10); |
| + test(4, d, 10, 1, 0, 20); |
| + |
| + insert(4, a, 1, 0, 0, 0, 32); |
| + insert(4, a, 64, 0, 0, 0, 32); |
| + insert(4, a, 128, 0, 0, 0, 32); |
| + insert(4, a, 192, 0, 0, 0, 32); |
| + insert(4, a, 255, 0, 0, 0, 32); |
| + wg_allowedips_remove_by_peer(&t, a, &mutex); |
| + test_negative(4, a, 1, 0, 0, 0); |
| + test_negative(4, a, 64, 0, 0, 0); |
| + test_negative(4, a, 128, 0, 0, 0); |
| + test_negative(4, a, 192, 0, 0, 0); |
| + test_negative(4, a, 255, 0, 0, 0); |
| + |
| + wg_allowedips_free(&t, &mutex); |
| + wg_allowedips_init(&t); |
| + insert(4, a, 192, 168, 0, 0, 16); |
| + insert(4, a, 192, 168, 0, 0, 24); |
| + wg_allowedips_remove_by_peer(&t, a, &mutex); |
| + test_negative(4, a, 192, 168, 0, 1); |
| + |
| + /* These will hit the WARN_ON(len >= 128) in free_node if something |
| + * goes wrong. |
| + */ |
| + for (i = 0; i < 128; ++i) { |
| + part = cpu_to_be64(~(1LLU << (i % 64))); |
| + memset(&ip, 0xff, 16); |
| + memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); |
| + wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex); |
| + } |
| + |
| + wg_allowedips_free(&t, &mutex); |
| + |
| + wg_allowedips_init(&t); |
| + insert(4, a, 192, 95, 5, 93, 27); |
| + insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); |
| + insert(4, a, 10, 1, 0, 20, 29); |
| + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83); |
| + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21); |
| + list_for_each_entry(iter_node, &a->allowedips_list, peer_list) { |
| + u8 cidr, ip[16] __aligned(__alignof(u64)); |
| + int family = wg_allowedips_read_node(iter_node, ip, &cidr); |
| + |
| + count++; |
| + |
| + if (cidr == 27 && family == AF_INET && |
| + !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr))) |
| + found_a = true; |
| + else if (cidr == 128 && family == AF_INET6 && |
| + !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543), |
| + sizeof(struct in6_addr))) |
| + found_b = true; |
| + else if (cidr == 29 && family == AF_INET && |
| + !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr))) |
| + found_c = true; |
| + else if (cidr == 83 && family == AF_INET6 && |
| + !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0), |
| + sizeof(struct in6_addr))) |
| + found_d = true; |
| + else if (cidr == 21 && family == AF_INET6 && |
| + !memcmp(ip, ip6(0x26075000, 0, 0, 0), |
| + sizeof(struct in6_addr))) |
| + found_e = true; |
| + else |
| + found_other = true; |
| + } |
| + test_boolean(count == 5); |
| + test_boolean(found_a); |
| + test_boolean(found_b); |
| + test_boolean(found_c); |
| + test_boolean(found_d); |
| + test_boolean(found_e); |
| + test_boolean(!found_other); |
| + |
| + if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success) |
| + success = randomized_test(); |
| + |
| + if (success) |
| + pr_info("allowedips self-tests: pass\n"); |
| + |
| +free: |
| + wg_allowedips_free(&t, &mutex); |
| + kfree(a); |
| + kfree(b); |
| + kfree(c); |
| + kfree(d); |
| + kfree(e); |
| + kfree(f); |
| + kfree(g); |
| + kfree(h); |
| + mutex_unlock(&mutex); |
| + |
| + return success; |
| +} |
| + |
| +#undef test_negative |
| +#undef test |
| +#undef remove |
| +#undef insert |
| +#undef init_peer |
| + |
| +#endif |
| diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c |
| new file mode 100644 |
| index 000000000000..f4fbb9072ed7 |
| |
| |
| @@ -0,0 +1,104 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifdef DEBUG |
| +bool __init wg_packet_counter_selftest(void) |
| +{ |
| + unsigned int test_num = 0, i; |
| + union noise_counter counter; |
| + bool success = true; |
| + |
| +#define T_INIT do { \ |
| + memset(&counter, 0, sizeof(union noise_counter)); \ |
| + spin_lock_init(&counter.receive.lock); \ |
| + } while (0) |
| +#define T_LIM (COUNTER_WINDOW_SIZE + 1) |
| +#define T(n, v) do { \ |
| + ++test_num; \ |
| + if (counter_validate(&counter, n) != (v)) { \ |
| + pr_err("nonce counter self-test %u: FAIL\n", \ |
| + test_num); \ |
| + success = false; \ |
| + } \ |
| + } while (0) |
| + |
| + T_INIT; |
| + /* 1 */ T(0, true); |
| + /* 2 */ T(1, true); |
| + /* 3 */ T(1, false); |
| + /* 4 */ T(9, true); |
| + /* 5 */ T(8, true); |
| + /* 6 */ T(7, true); |
| + /* 7 */ T(7, false); |
| + /* 8 */ T(T_LIM, true); |
| + /* 9 */ T(T_LIM - 1, true); |
| + /* 10 */ T(T_LIM - 1, false); |
| + /* 11 */ T(T_LIM - 2, true); |
| + /* 12 */ T(2, true); |
| + /* 13 */ T(2, false); |
| + /* 14 */ T(T_LIM + 16, true); |
| + /* 15 */ T(3, false); |
| + /* 16 */ T(T_LIM + 16, false); |
| + /* 17 */ T(T_LIM * 4, true); |
| + /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true); |
| + /* 19 */ T(10, false); |
| + /* 20 */ T(T_LIM * 4 - T_LIM, false); |
| + /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false); |
| + /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true); |
| + /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false); |
| + /* 24 */ T(0, false); |
| + /* 25 */ T(REJECT_AFTER_MESSAGES, false); |
| + /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true); |
| + /* 27 */ T(REJECT_AFTER_MESSAGES, false); |
| + /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false); |
| + /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true); |
| + /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false); |
| + /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false); |
| + /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false); |
| + /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true); |
| + /* 34 */ T(0, false); |
| + |
| + T_INIT; |
| + for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i) |
| + T(i, true); |
| + T(0, true); |
| + T(0, false); |
| + |
| + T_INIT; |
| + for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i) |
| + T(i, true); |
| + T(1, true); |
| + T(0, false); |
| + |
| + T_INIT; |
| + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;) |
| + T(i, true); |
| + |
| + T_INIT; |
| + for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;) |
| + T(i, true); |
| + T(0, false); |
| + |
| + T_INIT; |
| + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) |
| + T(i, true); |
| + T(COUNTER_WINDOW_SIZE + 1, true); |
| + T(0, false); |
| + |
| + T_INIT; |
| + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) |
| + T(i, true); |
| + T(0, true); |
| + T(COUNTER_WINDOW_SIZE + 1, true); |
| + |
| +#undef T |
| +#undef T_LIM |
| +#undef T_INIT |
| + |
| + if (success) |
| + pr_info("nonce counter self-tests: pass\n"); |
| + return success; |
| +} |
| +#endif |
| diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c |
| new file mode 100644 |
| index 000000000000..bcd6462e4540 |
| |
| |
| @@ -0,0 +1,226 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifdef DEBUG |
| + |
| +#include <linux/jiffies.h> |
| + |
| +static const struct { |
| + bool result; |
| + unsigned int msec_to_sleep_before; |
| +} expected_results[] __initconst = { |
| + [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, |
| + [PACKETS_BURSTABLE] = { false, 0 }, |
| + [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, |
| + [PACKETS_BURSTABLE + 2] = { false, 0 }, |
| + [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, |
| + [PACKETS_BURSTABLE + 4] = { true, 0 }, |
| + [PACKETS_BURSTABLE + 5] = { false, 0 } |
| +}; |
| + |
| +static __init unsigned int maximum_jiffies_at_index(int index) |
| +{ |
| + unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; |
| + int i; |
| + |
| + for (i = 0; i <= index; ++i) |
| + total_msecs += expected_results[i].msec_to_sleep_before; |
| + return msecs_to_jiffies(total_msecs); |
| +} |
| + |
| +static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, |
| + struct sk_buff *skb6, struct ipv6hdr *hdr6, |
| + int *test) |
| +{ |
| + unsigned long loop_start_time; |
| + int i; |
| + |
| + wg_ratelimiter_gc_entries(NULL); |
| + rcu_barrier(); |
| + loop_start_time = jiffies; |
| + |
| + for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { |
| + if (expected_results[i].msec_to_sleep_before) |
| + msleep(expected_results[i].msec_to_sleep_before); |
| + |
| + if (time_is_before_jiffies(loop_start_time + |
| + maximum_jiffies_at_index(i))) |
| + return -ETIMEDOUT; |
| + if (wg_ratelimiter_allow(skb4, &init_net) != |
| + expected_results[i].result) |
| + return -EXFULL; |
| + ++(*test); |
| + |
| + hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1); |
| + if (time_is_before_jiffies(loop_start_time + |
| + maximum_jiffies_at_index(i))) |
| + return -ETIMEDOUT; |
| + if (!wg_ratelimiter_allow(skb4, &init_net)) |
| + return -EXFULL; |
| + ++(*test); |
| + |
| + hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1); |
| + |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + hdr6->saddr.in6_u.u6_addr32[2] = htonl(i); |
| + hdr6->saddr.in6_u.u6_addr32[3] = htonl(i); |
| + if (time_is_before_jiffies(loop_start_time + |
| + maximum_jiffies_at_index(i))) |
| + return -ETIMEDOUT; |
| + if (wg_ratelimiter_allow(skb6, &init_net) != |
| + expected_results[i].result) |
| + return -EXFULL; |
| + ++(*test); |
| + |
| + hdr6->saddr.in6_u.u6_addr32[0] = |
| + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1); |
| + if (time_is_before_jiffies(loop_start_time + |
| + maximum_jiffies_at_index(i))) |
| + return -ETIMEDOUT; |
| + if (!wg_ratelimiter_allow(skb6, &init_net)) |
| + return -EXFULL; |
| + ++(*test); |
| + |
| + hdr6->saddr.in6_u.u6_addr32[0] = |
| + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1); |
| + |
| + if (time_is_before_jiffies(loop_start_time + |
| + maximum_jiffies_at_index(i))) |
| + return -ETIMEDOUT; |
| +#endif |
| + } |
| + return 0; |
| +} |
| + |
| +static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4, |
| + int *test) |
| +{ |
| + int i; |
| + |
| + wg_ratelimiter_gc_entries(NULL); |
| + rcu_barrier(); |
| + |
| + if (atomic_read(&total_entries)) |
| + return -EXFULL; |
| + ++(*test); |
| + |
| + for (i = 0; i <= max_entries; ++i) { |
| + hdr4->saddr = htonl(i); |
| + if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries)) |
| + return -EXFULL; |
| + ++(*test); |
| + } |
| + return 0; |
| +} |
| + |
| +bool __init wg_ratelimiter_selftest(void) |
| +{ |
| + enum { TRIALS_BEFORE_GIVING_UP = 5000 }; |
| + bool success = false; |
| + int test = 0, trials; |
| + struct sk_buff *skb4, *skb6; |
| + struct iphdr *hdr4; |
| + struct ipv6hdr *hdr6; |
| + |
| + if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) |
| + return true; |
| + |
| + BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); |
| + |
| + if (wg_ratelimiter_init()) |
| + goto out; |
| + ++test; |
| + if (wg_ratelimiter_init()) { |
| + wg_ratelimiter_uninit(); |
| + goto out; |
| + } |
| + ++test; |
| + if (wg_ratelimiter_init()) { |
| + wg_ratelimiter_uninit(); |
| + wg_ratelimiter_uninit(); |
| + goto out; |
| + } |
| + ++test; |
| + |
| + skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL); |
| + if (unlikely(!skb4)) |
| + goto err_nofree; |
| + skb4->protocol = htons(ETH_P_IP); |
| + hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4)); |
| + hdr4->saddr = htonl(8182); |
| + skb_reset_network_header(skb4); |
| + ++test; |
| + |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL); |
| + if (unlikely(!skb6)) { |
| + kfree_skb(skb4); |
| + goto err_nofree; |
| + } |
| + skb6->protocol = htons(ETH_P_IPV6); |
| + hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6)); |
| + hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212); |
| + hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188); |
| + skb_reset_network_header(skb6); |
| + ++test; |
| +#endif |
| + |
| + for (trials = TRIALS_BEFORE_GIVING_UP;;) { |
| + int test_count = 0, ret; |
| + |
| + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); |
| + if (ret == -ETIMEDOUT) { |
| + if (!trials--) { |
| + test += test_count; |
| + goto err; |
| + } |
| + msleep(500); |
| + continue; |
| + } else if (ret < 0) { |
| + test += test_count; |
| + goto err; |
| + } else { |
| + test += test_count; |
| + break; |
| + } |
| + } |
| + |
| + for (trials = TRIALS_BEFORE_GIVING_UP;;) { |
| + int test_count = 0; |
| + |
| + if (capacity_test(skb4, hdr4, &test_count) < 0) { |
| + if (!trials--) { |
| + test += test_count; |
| + goto err; |
| + } |
| + msleep(50); |
| + continue; |
| + } |
| + test += test_count; |
| + break; |
| + } |
| + |
| + success = true; |
| + |
| +err: |
| + kfree_skb(skb4); |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + kfree_skb(skb6); |
| +#endif |
| +err_nofree: |
| + wg_ratelimiter_uninit(); |
| + wg_ratelimiter_uninit(); |
| + wg_ratelimiter_uninit(); |
| + /* Uninit one extra time to check underflow detection. */ |
| + wg_ratelimiter_uninit(); |
| +out: |
| + if (success) |
| + pr_info("ratelimiter self-tests: pass\n"); |
| + else |
| + pr_err("ratelimiter self-test %d: FAIL\n", test); |
| + |
| + return success; |
| +} |
| +#endif |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| new file mode 100644 |
| index 000000000000..c13260563446 |
| |
| |
| @@ -0,0 +1,413 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "queueing.h" |
| +#include "timers.h" |
| +#include "device.h" |
| +#include "peer.h" |
| +#include "socket.h" |
| +#include "messages.h" |
| +#include "cookie.h" |
| + |
| +#include <linux/uio.h> |
| +#include <linux/inetdevice.h> |
| +#include <linux/socket.h> |
| +#include <net/ip_tunnels.h> |
| +#include <net/udp.h> |
| +#include <net/sock.h> |
| + |
| +static void wg_packet_send_handshake_initiation(struct wg_peer *peer) |
| +{ |
| + struct message_handshake_initiation packet; |
| + |
| + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), |
| + REKEY_TIMEOUT)) |
| + return; /* This function is rate limited. */ |
| + |
| + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); |
| + net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + |
| + if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { |
| + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); |
| + wg_timers_any_authenticated_packet_traversal(peer); |
| + wg_timers_any_authenticated_packet_sent(peer); |
| + atomic64_set(&peer->last_sent_handshake, |
| + ktime_get_coarse_boottime_ns()); |
| + wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), |
| + HANDSHAKE_DSCP); |
| + wg_timers_handshake_initiated(peer); |
| + } |
| +} |
| + |
| +void wg_packet_handshake_send_worker(struct work_struct *work) |
| +{ |
| + struct wg_peer *peer = container_of(work, struct wg_peer, |
| + transmit_handshake_work); |
| + |
| + wg_packet_send_handshake_initiation(peer); |
| + wg_peer_put(peer); |
| +} |
| + |
| +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, |
| + bool is_retry) |
| +{ |
| + if (!is_retry) |
| + peer->timer_handshake_attempts = 0; |
| + |
| + rcu_read_lock_bh(); |
| + /* We check last_sent_handshake here in addition to the actual function |
| + * we're queueing up, so that we don't queue things if not strictly |
| + * necessary: |
| + */ |
| + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), |
| + REKEY_TIMEOUT) || |
| + unlikely(READ_ONCE(peer->is_dead))) |
| + goto out; |
| + |
| + wg_peer_get(peer); |
| + /* Queues up calling packet_send_queued_handshakes(peer), where we do a |
| + * peer_put(peer) after: |
| + */ |
| + if (!queue_work(peer->device->handshake_send_wq, |
| + &peer->transmit_handshake_work)) |
| + /* If the work was already queued, we want to drop the |
| + * extra reference: |
| + */ |
| + wg_peer_put(peer); |
| +out: |
| + rcu_read_unlock_bh(); |
| +} |
| + |
| +void wg_packet_send_handshake_response(struct wg_peer *peer) |
| +{ |
| + struct message_handshake_response packet; |
| + |
| + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); |
| + net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + |
| + if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { |
| + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); |
| + if (wg_noise_handshake_begin_session(&peer->handshake, |
| + &peer->keypairs)) { |
| + wg_timers_session_derived(peer); |
| + wg_timers_any_authenticated_packet_traversal(peer); |
| + wg_timers_any_authenticated_packet_sent(peer); |
| + atomic64_set(&peer->last_sent_handshake, |
| + ktime_get_coarse_boottime_ns()); |
| + wg_socket_send_buffer_to_peer(peer, &packet, |
| + sizeof(packet), |
| + HANDSHAKE_DSCP); |
| + } |
| + } |
| +} |
| + |
| +void wg_packet_send_handshake_cookie(struct wg_device *wg, |
| + struct sk_buff *initiating_skb, |
| + __le32 sender_index) |
| +{ |
| + struct message_handshake_cookie packet; |
| + |
| + net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", |
| + wg->dev->name, initiating_skb); |
| + wg_cookie_message_create(&packet, initiating_skb, sender_index, |
| + &wg->cookie_checker); |
| + wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, |
| + sizeof(packet)); |
| +} |
| + |
| +static void keep_key_fresh(struct wg_peer *peer) |
| +{ |
| + struct noise_keypair *keypair; |
| + bool send = false; |
| + |
| + rcu_read_lock_bh(); |
| + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
| + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && |
| + (unlikely(atomic64_read(&keypair->sending.counter.counter) > |
| + REKEY_AFTER_MESSAGES) || |
| + (keypair->i_am_the_initiator && |
| + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
| + REKEY_AFTER_TIME))))) |
| + send = true; |
| + rcu_read_unlock_bh(); |
| + |
| + if (send) |
| + wg_packet_send_queued_handshake_initiation(peer, false); |
| +} |
| + |
| +static unsigned int calculate_skb_padding(struct sk_buff *skb) |
| +{ |
| + /* We do this modulo business with the MTU, just in case the networking |
| + * layer gives us a packet that's bigger than the MTU. In that case, we |
| + * wouldn't want the final subtraction to overflow in the case of the |
| + * padded_size being clamped. |
| + */ |
| + unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; |
| + unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); |
| + |
| + if (padded_size > PACKET_CB(skb)->mtu) |
| + padded_size = PACKET_CB(skb)->mtu; |
| + return padded_size - last_unit; |
| +} |
| + |
| +static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) |
| +{ |
| + unsigned int padding_len, plaintext_len, trailer_len; |
| + struct scatterlist sg[MAX_SKB_FRAGS + 8]; |
| + struct message_data *header; |
| + struct sk_buff *trailer; |
| + int num_frags; |
| + |
| + /* Calculate lengths. */ |
| + padding_len = calculate_skb_padding(skb); |
| + trailer_len = padding_len + noise_encrypted_len(0); |
| + plaintext_len = skb->len + padding_len; |
| + |
| + /* Expand data section to have room for padding and auth tag. */ |
| + num_frags = skb_cow_data(skb, trailer_len, &trailer); |
| + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) |
| + return false; |
| + |
| + /* Set the padding to zeros, and make sure it and the auth tag are part |
| + * of the skb. |
| + */ |
| + memset(skb_tail_pointer(trailer), 0, padding_len); |
| + |
| + /* Expand head section to have room for our header and the network |
| + * stack's headers. |
| + */ |
| + if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) |
| + return false; |
| + |
| + /* Finalize checksum calculation for the inner packet, if required. */ |
| + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && |
| + skb_checksum_help(skb))) |
| + return false; |
| + |
| + /* Only after checksumming can we safely add on the padding at the end |
| + * and the header. |
| + */ |
| + skb_set_inner_network_header(skb, 0); |
| + header = (struct message_data *)skb_push(skb, sizeof(*header)); |
| + header->header.type = cpu_to_le32(MESSAGE_DATA); |
| + header->key_idx = keypair->remote_index; |
| + header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); |
| + pskb_put(skb, trailer, trailer_len); |
| + |
| + /* Now we can encrypt the scattergather segments */ |
| + sg_init_table(sg, num_frags); |
| + if (skb_to_sgvec(skb, sg, sizeof(struct message_data), |
| + noise_encrypted_len(plaintext_len)) <= 0) |
| + return false; |
| + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, |
| + PACKET_CB(skb)->nonce, |
| + keypair->sending.key); |
| +} |
| + |
| +void wg_packet_send_keepalive(struct wg_peer *peer) |
| +{ |
| + struct sk_buff *skb; |
| + |
| + if (skb_queue_empty(&peer->staged_packet_queue)) { |
| + skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, |
| + GFP_ATOMIC); |
| + if (unlikely(!skb)) |
| + return; |
| + skb_reserve(skb, DATA_PACKET_HEAD_ROOM); |
| + skb->dev = peer->device->dev; |
| + PACKET_CB(skb)->mtu = skb->dev->mtu; |
| + skb_queue_tail(&peer->staged_packet_queue, skb); |
| + net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr); |
| + } |
| + |
| + wg_packet_send_staged_packets(peer); |
| +} |
| + |
| +static void wg_packet_create_data_done(struct sk_buff *first, |
| + struct wg_peer *peer) |
| +{ |
| + struct sk_buff *skb, *next; |
| + bool is_keepalive, data_sent = false; |
| + |
| + wg_timers_any_authenticated_packet_traversal(peer); |
| + wg_timers_any_authenticated_packet_sent(peer); |
| + skb_list_walk_safe(first, skb, next) { |
| + is_keepalive = skb->len == message_data_len(0); |
| + if (likely(!wg_socket_send_skb_to_peer(peer, skb, |
| + PACKET_CB(skb)->ds) && !is_keepalive)) |
| + data_sent = true; |
| + } |
| + |
| + if (likely(data_sent)) |
| + wg_timers_data_sent(peer); |
| + |
| + keep_key_fresh(peer); |
| +} |
| + |
| +void wg_packet_tx_worker(struct work_struct *work) |
| +{ |
| + struct crypt_queue *queue = container_of(work, struct crypt_queue, |
| + work); |
| + struct noise_keypair *keypair; |
| + enum packet_state state; |
| + struct sk_buff *first; |
| + struct wg_peer *peer; |
| + |
| + while ((first = __ptr_ring_peek(&queue->ring)) != NULL && |
| + (state = atomic_read_acquire(&PACKET_CB(first)->state)) != |
| + PACKET_STATE_UNCRYPTED) { |
| + __ptr_ring_discard_one(&queue->ring); |
| + peer = PACKET_PEER(first); |
| + keypair = PACKET_CB(first)->keypair; |
| + |
| + if (likely(state == PACKET_STATE_CRYPTED)) |
| + wg_packet_create_data_done(first, peer); |
| + else |
| + kfree_skb_list(first); |
| + |
| + wg_noise_keypair_put(keypair, false); |
| + wg_peer_put(peer); |
| + } |
| +} |
| + |
| +void wg_packet_encrypt_worker(struct work_struct *work) |
| +{ |
| + struct crypt_queue *queue = container_of(work, struct multicore_worker, |
| + work)->ptr; |
| + struct sk_buff *first, *skb, *next; |
| + |
| + while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { |
| + enum packet_state state = PACKET_STATE_CRYPTED; |
| + |
| + skb_list_walk_safe(first, skb, next) { |
| + if (likely(encrypt_packet(skb, |
| + PACKET_CB(first)->keypair))) { |
| + wg_reset_packet(skb); |
| + } else { |
| + state = PACKET_STATE_DEAD; |
| + break; |
| + } |
| + } |
| + wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, |
| + state); |
| + |
| + } |
| +} |
| + |
| +static void wg_packet_create_data(struct sk_buff *first) |
| +{ |
| + struct wg_peer *peer = PACKET_PEER(first); |
| + struct wg_device *wg = peer->device; |
| + int ret = -EINVAL; |
| + |
| + rcu_read_lock_bh(); |
| + if (unlikely(READ_ONCE(peer->is_dead))) |
| + goto err; |
| + |
| + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, |
| + &peer->tx_queue, first, |
| + wg->packet_crypt_wq, |
| + &wg->encrypt_queue.last_cpu); |
| + if (unlikely(ret == -EPIPE)) |
| + wg_queue_enqueue_per_peer(&peer->tx_queue, first, |
| + PACKET_STATE_DEAD); |
| +err: |
| + rcu_read_unlock_bh(); |
| + if (likely(!ret || ret == -EPIPE)) |
| + return; |
| + wg_noise_keypair_put(PACKET_CB(first)->keypair, false); |
| + wg_peer_put(peer); |
| + kfree_skb_list(first); |
| +} |
| + |
| +void wg_packet_purge_staged_packets(struct wg_peer *peer) |
| +{ |
| + spin_lock_bh(&peer->staged_packet_queue.lock); |
| + peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; |
| + __skb_queue_purge(&peer->staged_packet_queue); |
| + spin_unlock_bh(&peer->staged_packet_queue.lock); |
| +} |
| + |
| +void wg_packet_send_staged_packets(struct wg_peer *peer) |
| +{ |
| + struct noise_symmetric_key *key; |
| + struct noise_keypair *keypair; |
| + struct sk_buff_head packets; |
| + struct sk_buff *skb; |
| + |
| + /* Steal the current queue into our local one. */ |
| + __skb_queue_head_init(&packets); |
| + spin_lock_bh(&peer->staged_packet_queue.lock); |
| + skb_queue_splice_init(&peer->staged_packet_queue, &packets); |
| + spin_unlock_bh(&peer->staged_packet_queue.lock); |
| + if (unlikely(skb_queue_empty(&packets))) |
| + return; |
| + |
| + /* First we make sure we have a valid reference to a valid key. */ |
| + rcu_read_lock_bh(); |
| + keypair = wg_noise_keypair_get( |
| + rcu_dereference_bh(peer->keypairs.current_keypair)); |
| + rcu_read_unlock_bh(); |
| + if (unlikely(!keypair)) |
| + goto out_nokey; |
| + key = &keypair->sending; |
| + if (unlikely(!READ_ONCE(key->is_valid))) |
| + goto out_nokey; |
| + if (unlikely(wg_birthdate_has_expired(key->birthdate, |
| + REJECT_AFTER_TIME))) |
| + goto out_invalid; |
| + |
| + /* After we know we have a somewhat valid key, we now try to assign |
| + * nonces to all of the packets in the queue. If we can't assign nonces |
| + * for all of them, we just consider it a failure and wait for the next |
| + * handshake. |
| + */ |
| + skb_queue_walk(&packets, skb) { |
| + /* 0 for no outer TOS: no leak. TODO: at some later point, we |
| + * might consider using flowi->tos as outer instead. |
| + */ |
| + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); |
| + PACKET_CB(skb)->nonce = |
| + atomic64_inc_return(&key->counter.counter) - 1; |
| + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) |
| + goto out_invalid; |
| + } |
| + |
| + packets.prev->next = NULL; |
| + wg_peer_get(keypair->entry.peer); |
| + PACKET_CB(packets.next)->keypair = keypair; |
| + wg_packet_create_data(packets.next); |
| + return; |
| + |
| +out_invalid: |
| + WRITE_ONCE(key->is_valid, false); |
| +out_nokey: |
| + wg_noise_keypair_put(keypair, false); |
| + |
| + /* We orphan the packets if we're waiting on a handshake, so that they |
| + * don't block a socket's pool. |
| + */ |
| + skb_queue_walk(&packets, skb) |
| + skb_orphan(skb); |
| + /* Then we put them back on the top of the queue. We're not too |
| + * concerned about accidentally getting things a little out of order if |
| + * packets are being added really fast, because this queue is for before |
| + * packets can even be sent and it's small anyway. |
| + */ |
| + spin_lock_bh(&peer->staged_packet_queue.lock); |
| + skb_queue_splice(&packets, &peer->staged_packet_queue); |
| + spin_unlock_bh(&peer->staged_packet_queue.lock); |
| + |
| + /* If we're exiting because there's something wrong with the key, it |
| + * means we should initiate a new handshake. |
| + */ |
| + wg_packet_send_queued_handshake_initiation(peer, false); |
| +} |
| diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c |
| new file mode 100644 |
| index 000000000000..c46256d0d81c |
| |
| |
| @@ -0,0 +1,437 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "device.h" |
| +#include "peer.h" |
| +#include "socket.h" |
| +#include "queueing.h" |
| +#include "messages.h" |
| + |
| +#include <linux/ctype.h> |
| +#include <linux/net.h> |
| +#include <linux/if_vlan.h> |
| +#include <linux/if_ether.h> |
| +#include <linux/inetdevice.h> |
| +#include <net/udp_tunnel.h> |
| +#include <net/ipv6.h> |
| + |
| +static int send4(struct wg_device *wg, struct sk_buff *skb, |
| + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) |
| +{ |
| + struct flowi4 fl = { |
| + .saddr = endpoint->src4.s_addr, |
| + .daddr = endpoint->addr4.sin_addr.s_addr, |
| + .fl4_dport = endpoint->addr4.sin_port, |
| + .flowi4_mark = wg->fwmark, |
| + .flowi4_proto = IPPROTO_UDP |
| + }; |
| + struct rtable *rt = NULL; |
| + struct sock *sock; |
| + int ret = 0; |
| + |
| + skb_mark_not_on_list(skb); |
| + skb->dev = wg->dev; |
| + skb->mark = wg->fwmark; |
| + |
| + rcu_read_lock_bh(); |
| + sock = rcu_dereference_bh(wg->sock4); |
| + |
| + if (unlikely(!sock)) { |
| + ret = -ENONET; |
| + goto err; |
| + } |
| + |
| + fl.fl4_sport = inet_sk(sock)->inet_sport; |
| + |
| + if (cache) |
| + rt = dst_cache_get_ip4(cache, &fl.saddr); |
| + |
| + if (!rt) { |
| + security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); |
| + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, |
| + fl.saddr, RT_SCOPE_HOST))) { |
| + endpoint->src4.s_addr = 0; |
| + *(__force __be32 *)&endpoint->src_if4 = 0; |
| + fl.saddr = 0; |
| + if (cache) |
| + dst_cache_reset(cache); |
| + } |
| + rt = ip_route_output_flow(sock_net(sock), &fl, sock); |
| + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && |
| + PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && |
| + rt->dst.dev->ifindex != endpoint->src_if4)))) { |
| + endpoint->src4.s_addr = 0; |
| + *(__force __be32 *)&endpoint->src_if4 = 0; |
| + fl.saddr = 0; |
| + if (cache) |
| + dst_cache_reset(cache); |
| + if (!IS_ERR(rt)) |
| + ip_rt_put(rt); |
| + rt = ip_route_output_flow(sock_net(sock), &fl, sock); |
| + } |
| + if (unlikely(IS_ERR(rt))) { |
| + ret = PTR_ERR(rt); |
| + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", |
| + wg->dev->name, &endpoint->addr, ret); |
| + goto err; |
| + } else if (unlikely(rt->dst.dev == skb->dev)) { |
| + ip_rt_put(rt); |
| + ret = -ELOOP; |
| + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", |
| + wg->dev->name, &endpoint->addr); |
| + goto err; |
| + } |
| + if (cache) |
| + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); |
| + } |
| + |
| + skb->ignore_df = 1; |
| + udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, |
| + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, |
| + fl.fl4_dport, false, false); |
| + goto out; |
| + |
| +err: |
| + kfree_skb(skb); |
| +out: |
| + rcu_read_unlock_bh(); |
| + return ret; |
| +} |
| + |
| +static int send6(struct wg_device *wg, struct sk_buff *skb, |
| + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) |
| +{ |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + struct flowi6 fl = { |
| + .saddr = endpoint->src6, |
| + .daddr = endpoint->addr6.sin6_addr, |
| + .fl6_dport = endpoint->addr6.sin6_port, |
| + .flowi6_mark = wg->fwmark, |
| + .flowi6_oif = endpoint->addr6.sin6_scope_id, |
| + .flowi6_proto = IPPROTO_UDP |
| + /* TODO: addr->sin6_flowinfo */ |
| + }; |
| + struct dst_entry *dst = NULL; |
| + struct sock *sock; |
| + int ret = 0; |
| + |
| + skb_mark_not_on_list(skb); |
| + skb->dev = wg->dev; |
| + skb->mark = wg->fwmark; |
| + |
| + rcu_read_lock_bh(); |
| + sock = rcu_dereference_bh(wg->sock6); |
| + |
| + if (unlikely(!sock)) { |
| + ret = -ENONET; |
| + goto err; |
| + } |
| + |
| + fl.fl6_sport = inet_sk(sock)->inet_sport; |
| + |
| + if (cache) |
| + dst = dst_cache_get_ip6(cache, &fl.saddr); |
| + |
| + if (!dst) { |
| + security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); |
| + if (unlikely(!ipv6_addr_any(&fl.saddr) && |
| + !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { |
| + endpoint->src6 = fl.saddr = in6addr_any; |
| + if (cache) |
| + dst_cache_reset(cache); |
| + } |
| + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, |
| + NULL); |
| + if (unlikely(IS_ERR(dst))) { |
| + ret = PTR_ERR(dst); |
| + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", |
| + wg->dev->name, &endpoint->addr, ret); |
| + goto err; |
| + } else if (unlikely(dst->dev == skb->dev)) { |
| + dst_release(dst); |
| + ret = -ELOOP; |
| + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", |
| + wg->dev->name, &endpoint->addr); |
| + goto err; |
| + } |
| + if (cache) |
| + dst_cache_set_ip6(cache, dst, &fl.saddr); |
| + } |
| + |
| + skb->ignore_df = 1; |
| + udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, |
| + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, |
| + fl.fl6_dport, false); |
| + goto out; |
| + |
| +err: |
| + kfree_skb(skb); |
| +out: |
| + rcu_read_unlock_bh(); |
| + return ret; |
| +#else |
| + return -EAFNOSUPPORT; |
| +#endif |
| +} |
| + |
| +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) |
| +{ |
| + size_t skb_len = skb->len; |
| + int ret = -EAFNOSUPPORT; |
| + |
| + read_lock_bh(&peer->endpoint_lock); |
| + if (peer->endpoint.addr.sa_family == AF_INET) |
| + ret = send4(peer->device, skb, &peer->endpoint, ds, |
| + &peer->endpoint_cache); |
| + else if (peer->endpoint.addr.sa_family == AF_INET6) |
| + ret = send6(peer->device, skb, &peer->endpoint, ds, |
| + &peer->endpoint_cache); |
| + else |
| + dev_kfree_skb(skb); |
| + if (likely(!ret)) |
| + peer->tx_bytes += skb_len; |
| + read_unlock_bh(&peer->endpoint_lock); |
| + |
| + return ret; |
| +} |
| + |
| +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, |
| + size_t len, u8 ds) |
| +{ |
| + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); |
| + |
| + if (unlikely(!skb)) |
| + return -ENOMEM; |
| + |
| + skb_reserve(skb, SKB_HEADER_LEN); |
| + skb_set_inner_network_header(skb, 0); |
| + skb_put_data(skb, buffer, len); |
| + return wg_socket_send_skb_to_peer(peer, skb, ds); |
| +} |
| + |
| +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, |
| + struct sk_buff *in_skb, void *buffer, |
| + size_t len) |
| +{ |
| + int ret = 0; |
| + struct sk_buff *skb; |
| + struct endpoint endpoint; |
| + |
| + if (unlikely(!in_skb)) |
| + return -EINVAL; |
| + ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); |
| + if (unlikely(ret < 0)) |
| + return ret; |
| + |
| + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); |
| + if (unlikely(!skb)) |
| + return -ENOMEM; |
| + skb_reserve(skb, SKB_HEADER_LEN); |
| + skb_set_inner_network_header(skb, 0); |
| + skb_put_data(skb, buffer, len); |
| + |
| + if (endpoint.addr.sa_family == AF_INET) |
| + ret = send4(wg, skb, &endpoint, 0, NULL); |
| + else if (endpoint.addr.sa_family == AF_INET6) |
| + ret = send6(wg, skb, &endpoint, 0, NULL); |
| + /* No other possibilities if the endpoint is valid, which it is, |
| + * as we checked above. |
| + */ |
| + |
| + return ret; |
| +} |
| + |
| +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, |
| + const struct sk_buff *skb) |
| +{ |
| + memset(endpoint, 0, sizeof(*endpoint)); |
| + if (skb->protocol == htons(ETH_P_IP)) { |
| + endpoint->addr4.sin_family = AF_INET; |
| + endpoint->addr4.sin_port = udp_hdr(skb)->source; |
| + endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; |
| + endpoint->src4.s_addr = ip_hdr(skb)->daddr; |
| + endpoint->src_if4 = skb->skb_iif; |
| + } else if (skb->protocol == htons(ETH_P_IPV6)) { |
| + endpoint->addr6.sin6_family = AF_INET6; |
| + endpoint->addr6.sin6_port = udp_hdr(skb)->source; |
| + endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; |
| + endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( |
| + &ipv6_hdr(skb)->saddr, skb->skb_iif); |
| + endpoint->src6 = ipv6_hdr(skb)->daddr; |
| + } else { |
| + return -EINVAL; |
| + } |
| + return 0; |
| +} |
| + |
| +static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) |
| +{ |
| + return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && |
| + a->addr4.sin_port == b->addr4.sin_port && |
| + a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && |
| + a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || |
| + (a->addr.sa_family == AF_INET6 && |
| + b->addr.sa_family == AF_INET6 && |
| + a->addr6.sin6_port == b->addr6.sin6_port && |
| + ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && |
| + a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && |
| + ipv6_addr_equal(&a->src6, &b->src6)) || |
| + unlikely(!a->addr.sa_family && !b->addr.sa_family); |
| +} |
| + |
| +void wg_socket_set_peer_endpoint(struct wg_peer *peer, |
| + const struct endpoint *endpoint) |
| +{ |
| + /* First we check unlocked, in order to optimize, since it's pretty rare |
| + * that an endpoint will change. If we happen to be mid-write, and two |
| + * CPUs wind up writing the same thing or something slightly different, |
| + * it doesn't really matter much either. |
| + */ |
| + if (endpoint_eq(endpoint, &peer->endpoint)) |
| + return; |
| + write_lock_bh(&peer->endpoint_lock); |
| + if (endpoint->addr.sa_family == AF_INET) { |
| + peer->endpoint.addr4 = endpoint->addr4; |
| + peer->endpoint.src4 = endpoint->src4; |
| + peer->endpoint.src_if4 = endpoint->src_if4; |
| + } else if (endpoint->addr.sa_family == AF_INET6) { |
| + peer->endpoint.addr6 = endpoint->addr6; |
| + peer->endpoint.src6 = endpoint->src6; |
| + } else { |
| + goto out; |
| + } |
| + dst_cache_reset(&peer->endpoint_cache); |
| +out: |
| + write_unlock_bh(&peer->endpoint_lock); |
| +} |
| + |
| +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, |
| + const struct sk_buff *skb) |
| +{ |
| + struct endpoint endpoint; |
| + |
| + if (!wg_socket_endpoint_from_skb(&endpoint, skb)) |
| + wg_socket_set_peer_endpoint(peer, &endpoint); |
| +} |
| + |
| +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) |
| +{ |
| + write_lock_bh(&peer->endpoint_lock); |
| + memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); |
| + dst_cache_reset(&peer->endpoint_cache); |
| + write_unlock_bh(&peer->endpoint_lock); |
| +} |
| + |
| +static int wg_receive(struct sock *sk, struct sk_buff *skb) |
| +{ |
| + struct wg_device *wg; |
| + |
| + if (unlikely(!sk)) |
| + goto err; |
| + wg = sk->sk_user_data; |
| + if (unlikely(!wg)) |
| + goto err; |
| + wg_packet_receive(wg, skb); |
| + return 0; |
| + |
| +err: |
| + kfree_skb(skb); |
| + return 0; |
| +} |
| + |
| +static void sock_free(struct sock *sock) |
| +{ |
| + if (unlikely(!sock)) |
| + return; |
| + sk_clear_memalloc(sock); |
| + udp_tunnel_sock_release(sock->sk_socket); |
| +} |
| + |
| +static void set_sock_opts(struct socket *sock) |
| +{ |
| + sock->sk->sk_allocation = GFP_ATOMIC; |
| + sock->sk->sk_sndbuf = INT_MAX; |
| + sk_set_memalloc(sock->sk); |
| +} |
| + |
| +int wg_socket_init(struct wg_device *wg, u16 port) |
| +{ |
| + int ret; |
| + struct udp_tunnel_sock_cfg cfg = { |
| + .sk_user_data = wg, |
| + .encap_type = 1, |
| + .encap_rcv = wg_receive |
| + }; |
| + struct socket *new4 = NULL, *new6 = NULL; |
| + struct udp_port_cfg port4 = { |
| + .family = AF_INET, |
| + .local_ip.s_addr = htonl(INADDR_ANY), |
| + .local_udp_port = htons(port), |
| + .use_udp_checksums = true |
| + }; |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + int retries = 0; |
| + struct udp_port_cfg port6 = { |
| + .family = AF_INET6, |
| + .local_ip6 = IN6ADDR_ANY_INIT, |
| + .use_udp6_tx_checksums = true, |
| + .use_udp6_rx_checksums = true, |
| + .ipv6_v6only = true |
| + }; |
| +#endif |
| + |
| +#if IS_ENABLED(CONFIG_IPV6) |
| +retry: |
| +#endif |
| + |
| + ret = udp_sock_create(wg->creating_net, &port4, &new4); |
| + if (ret < 0) { |
| + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); |
| + return ret; |
| + } |
| + set_sock_opts(new4); |
| + setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); |
| + |
| +#if IS_ENABLED(CONFIG_IPV6) |
| + if (ipv6_mod_enabled()) { |
| + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; |
| + ret = udp_sock_create(wg->creating_net, &port6, &new6); |
| + if (ret < 0) { |
| + udp_tunnel_sock_release(new4); |
| + if (ret == -EADDRINUSE && !port && retries++ < 100) |
| + goto retry; |
| + pr_err("%s: Could not create IPv6 socket\n", |
| + wg->dev->name); |
| + return ret; |
| + } |
| + set_sock_opts(new6); |
| + setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); |
| + } |
| +#endif |
| + |
| + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); |
| + return 0; |
| +} |
| + |
| +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, |
| + struct sock *new6) |
| +{ |
| + struct sock *old4, *old6; |
| + |
| + mutex_lock(&wg->socket_update_lock); |
| + old4 = rcu_dereference_protected(wg->sock4, |
| + lockdep_is_held(&wg->socket_update_lock)); |
| + old6 = rcu_dereference_protected(wg->sock6, |
| + lockdep_is_held(&wg->socket_update_lock)); |
| + rcu_assign_pointer(wg->sock4, new4); |
| + rcu_assign_pointer(wg->sock6, new6); |
| + if (new4) |
| + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); |
| + mutex_unlock(&wg->socket_update_lock); |
| + synchronize_rcu(); |
| + synchronize_net(); |
| + sock_free(old4); |
| + sock_free(old6); |
| +} |
| diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h |
| new file mode 100644 |
| index 000000000000..bab5848efbcd |
| |
| |
| @@ -0,0 +1,44 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_SOCKET_H |
| +#define _WG_SOCKET_H |
| + |
| +#include <linux/netdevice.h> |
| +#include <linux/udp.h> |
| +#include <linux/if_vlan.h> |
| +#include <linux/if_ether.h> |
| + |
| +int wg_socket_init(struct wg_device *wg, u16 port); |
| +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, |
| + struct sock *new6); |
| +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data, |
| + size_t len, u8 ds); |
| +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, |
| + u8 ds); |
| +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, |
| + struct sk_buff *in_skb, |
| + void *out_buffer, size_t len); |
| + |
| +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, |
| + const struct sk_buff *skb); |
| +void wg_socket_set_peer_endpoint(struct wg_peer *peer, |
| + const struct endpoint *endpoint); |
| +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, |
| + const struct sk_buff *skb); |
| +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer); |
| + |
| +#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) |
| +#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \ |
| + struct endpoint __endpoint; \ |
| + wg_socket_endpoint_from_skb(&__endpoint, skb); \ |
| + net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \ |
| + ##__VA_ARGS__); \ |
| + } while (0) |
| +#else |
| +#define net_dbg_skb_ratelimited(fmt, skb, ...) |
| +#endif |
| + |
| +#endif /* _WG_SOCKET_H */ |
| diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c |
| new file mode 100644 |
| index 000000000000..d54d32ac9bc4 |
| |
| |
| @@ -0,0 +1,243 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#include "timers.h" |
| +#include "device.h" |
| +#include "peer.h" |
| +#include "queueing.h" |
| +#include "socket.h" |
| + |
| +/* |
| + * - Timer for retransmitting the handshake if we don't hear back after |
| + * `REKEY_TIMEOUT + jitter` ms. |
| + * |
| + * - Timer for sending empty packet if we have received a packet but after have |
| + * not sent one for `KEEPALIVE_TIMEOUT` ms. |
| + * |
| + * - Timer for initiating new handshake if we have sent a packet but after have |
| + * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + |
| + * jitter` ms. |
| + * |
| + * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms |
| + * if no new keys have been received. |
| + * |
| + * - Timer for, if enabled, sending an empty authenticated packet every user- |
| + * specified seconds. |
| + */ |
| + |
| +static inline void mod_peer_timer(struct wg_peer *peer, |
| + struct timer_list *timer, |
| + unsigned long expires) |
| +{ |
| + rcu_read_lock_bh(); |
| + if (likely(netif_running(peer->device->dev) && |
| + !READ_ONCE(peer->is_dead))) |
| + mod_timer(timer, expires); |
| + rcu_read_unlock_bh(); |
| +} |
| + |
| +static void wg_expired_retransmit_handshake(struct timer_list *timer) |
| +{ |
| + struct wg_peer *peer = from_timer(peer, timer, |
| + timer_retransmit_handshake); |
| + |
| + if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { |
| + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); |
| + |
| + del_timer(&peer->timer_send_keepalive); |
| + /* We drop all packets without a keypair and don't try again, |
| + * if we try unsuccessfully for too long to make a handshake. |
| + */ |
| + wg_packet_purge_staged_packets(peer); |
| + |
| + /* We set a timer for destroying any residue that might be left |
| + * of a partial exchange. |
| + */ |
| + if (!timer_pending(&peer->timer_zero_key_material)) |
| + mod_peer_timer(peer, &peer->timer_zero_key_material, |
| + jiffies + REJECT_AFTER_TIME * 3 * HZ); |
| + } else { |
| + ++peer->timer_handshake_attempts; |
| + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr, REKEY_TIMEOUT, |
| + peer->timer_handshake_attempts + 1); |
| + |
| + /* We clear the endpoint address src address, in case this is |
| + * the cause of trouble. |
| + */ |
| + wg_socket_clear_peer_endpoint_src(peer); |
| + |
| + wg_packet_send_queued_handshake_initiation(peer, true); |
| + } |
| +} |
| + |
| +static void wg_expired_send_keepalive(struct timer_list *timer) |
| +{ |
| + struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive); |
| + |
| + wg_packet_send_keepalive(peer); |
| + if (peer->timer_need_another_keepalive) { |
| + peer->timer_need_another_keepalive = false; |
| + mod_peer_timer(peer, &peer->timer_send_keepalive, |
| + jiffies + KEEPALIVE_TIMEOUT * HZ); |
| + } |
| +} |
| + |
| +static void wg_expired_new_handshake(struct timer_list *timer) |
| +{ |
| + struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake); |
| + |
| + pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); |
| + /* We clear the endpoint address src address, in case this is the cause |
| + * of trouble. |
| + */ |
| + wg_socket_clear_peer_endpoint_src(peer); |
| + wg_packet_send_queued_handshake_initiation(peer, false); |
| +} |
| + |
| +static void wg_expired_zero_key_material(struct timer_list *timer) |
| +{ |
| + struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material); |
| + |
| + rcu_read_lock_bh(); |
| + if (!READ_ONCE(peer->is_dead)) { |
| + wg_peer_get(peer); |
| + if (!queue_work(peer->device->handshake_send_wq, |
| + &peer->clear_peer_work)) |
| + /* If the work was already on the queue, we want to drop |
| + * the extra reference. |
| + */ |
| + wg_peer_put(peer); |
| + } |
| + rcu_read_unlock_bh(); |
| +} |
| + |
| +static void wg_queued_expired_zero_key_material(struct work_struct *work) |
| +{ |
| + struct wg_peer *peer = container_of(work, struct wg_peer, |
| + clear_peer_work); |
| + |
| + pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", |
| + peer->device->dev->name, peer->internal_id, |
| + &peer->endpoint.addr, REJECT_AFTER_TIME * 3); |
| + wg_noise_handshake_clear(&peer->handshake); |
| + wg_noise_keypairs_clear(&peer->keypairs); |
| + wg_peer_put(peer); |
| +} |
| + |
| +static void wg_expired_send_persistent_keepalive(struct timer_list *timer) |
| +{ |
| + struct wg_peer *peer = from_timer(peer, timer, |
| + timer_persistent_keepalive); |
| + |
| + if (likely(peer->persistent_keepalive_interval)) |
| + wg_packet_send_keepalive(peer); |
| +} |
| + |
| +/* Should be called after an authenticated data packet is sent. */ |
| +void wg_timers_data_sent(struct wg_peer *peer) |
| +{ |
| + if (!timer_pending(&peer->timer_new_handshake)) |
| + mod_peer_timer(peer, &peer->timer_new_handshake, |
| + jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + |
| + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); |
| +} |
| + |
| +/* Should be called after an authenticated data packet is received. */ |
| +void wg_timers_data_received(struct wg_peer *peer) |
| +{ |
| + if (likely(netif_running(peer->device->dev))) { |
| + if (!timer_pending(&peer->timer_send_keepalive)) |
| + mod_peer_timer(peer, &peer->timer_send_keepalive, |
| + jiffies + KEEPALIVE_TIMEOUT * HZ); |
| + else |
| + peer->timer_need_another_keepalive = true; |
| + } |
| +} |
| + |
| +/* Should be called after any type of authenticated packet is sent, whether |
| + * keepalive, data, or handshake. |
| + */ |
| +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) |
| +{ |
| + del_timer(&peer->timer_send_keepalive); |
| +} |
| + |
| +/* Should be called after any type of authenticated packet is received, whether |
| + * keepalive, data, or handshake. |
| + */ |
| +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) |
| +{ |
| + del_timer(&peer->timer_new_handshake); |
| +} |
| + |
| +/* Should be called after a handshake initiation message is sent. */ |
| +void wg_timers_handshake_initiated(struct wg_peer *peer) |
| +{ |
| + mod_peer_timer(peer, &peer->timer_retransmit_handshake, |
| + jiffies + REKEY_TIMEOUT * HZ + |
| + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); |
| +} |
| + |
| +/* Should be called after a handshake response message is received and processed |
| + * or when getting key confirmation via the first data message. |
| + */ |
| +void wg_timers_handshake_complete(struct wg_peer *peer) |
| +{ |
| + del_timer(&peer->timer_retransmit_handshake); |
| + peer->timer_handshake_attempts = 0; |
| + peer->sent_lastminute_handshake = false; |
| + ktime_get_real_ts64(&peer->walltime_last_handshake); |
| +} |
| + |
| +/* Should be called after an ephemeral key is created, which is before sending a |
| + * handshake response or after receiving a handshake response. |
| + */ |
| +void wg_timers_session_derived(struct wg_peer *peer) |
| +{ |
| + mod_peer_timer(peer, &peer->timer_zero_key_material, |
| + jiffies + REJECT_AFTER_TIME * 3 * HZ); |
| +} |
| + |
| +/* Should be called before a packet with authentication, whether |
| + * keepalive, data, or handshakem is sent, or after one is received. |
| + */ |
| +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) |
| +{ |
| + if (peer->persistent_keepalive_interval) |
| + mod_peer_timer(peer, &peer->timer_persistent_keepalive, |
| + jiffies + peer->persistent_keepalive_interval * HZ); |
| +} |
| + |
| +void wg_timers_init(struct wg_peer *peer) |
| +{ |
| + timer_setup(&peer->timer_retransmit_handshake, |
| + wg_expired_retransmit_handshake, 0); |
| + timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); |
| + timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); |
| + timer_setup(&peer->timer_zero_key_material, |
| + wg_expired_zero_key_material, 0); |
| + timer_setup(&peer->timer_persistent_keepalive, |
| + wg_expired_send_persistent_keepalive, 0); |
| + INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); |
| + peer->timer_handshake_attempts = 0; |
| + peer->sent_lastminute_handshake = false; |
| + peer->timer_need_another_keepalive = false; |
| +} |
| + |
| +void wg_timers_stop(struct wg_peer *peer) |
| +{ |
| + del_timer_sync(&peer->timer_retransmit_handshake); |
| + del_timer_sync(&peer->timer_send_keepalive); |
| + del_timer_sync(&peer->timer_new_handshake); |
| + del_timer_sync(&peer->timer_zero_key_material); |
| + del_timer_sync(&peer->timer_persistent_keepalive); |
| + flush_work(&peer->clear_peer_work); |
| +} |
| diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h |
| new file mode 100644 |
| index 000000000000..f0653dcb1326 |
| |
| |
| @@ -0,0 +1,31 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#ifndef _WG_TIMERS_H |
| +#define _WG_TIMERS_H |
| + |
| +#include <linux/ktime.h> |
| + |
| +struct wg_peer; |
| + |
| +void wg_timers_init(struct wg_peer *peer); |
| +void wg_timers_stop(struct wg_peer *peer); |
| +void wg_timers_data_sent(struct wg_peer *peer); |
| +void wg_timers_data_received(struct wg_peer *peer); |
| +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer); |
| +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer); |
| +void wg_timers_handshake_initiated(struct wg_peer *peer); |
| +void wg_timers_handshake_complete(struct wg_peer *peer); |
| +void wg_timers_session_derived(struct wg_peer *peer); |
| +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer); |
| + |
| +static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, |
| + u64 expiration_seconds) |
| +{ |
| + return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) |
| + <= (s64)ktime_get_coarse_boottime_ns(); |
| +} |
| + |
| +#endif /* _WG_TIMERS_H */ |
| diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h |
| new file mode 100644 |
| index 000000000000..a1a269a11634 |
| |
| |
| @@ -0,0 +1 @@ |
| +#define WIREGUARD_VERSION "1.0.0" |
| diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h |
| new file mode 100644 |
| index 000000000000..dd8a47c4ad11 |
| |
| |
| @@ -0,0 +1,196 @@ |
| +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + * |
| + * Documentation |
| + * ============= |
| + * |
| + * The below enums and macros are for interfacing with WireGuard, using generic |
| + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two |
| + * methods: get and set. Note that while they share many common attributes, |
| + * these two functions actually accept a slightly different set of inputs and |
| + * outputs. |
| + * |
| + * WG_CMD_GET_DEVICE |
| + * ----------------- |
| + * |
| + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain |
| + * one but not both of: |
| + * |
| + * WGDEVICE_A_IFINDEX: NLA_U32 |
| + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 |
| + * |
| + * The kernel will then return several messages (NLM_F_MULTI) containing the |
| + * following tree of nested items: |
| + * |
| + * WGDEVICE_A_IFINDEX: NLA_U32 |
| + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 |
| + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN |
| + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN |
| + * WGDEVICE_A_LISTEN_PORT: NLA_U16 |
| + * WGDEVICE_A_FWMARK: NLA_U32 |
| + * WGDEVICE_A_PEERS: NLA_NESTED |
| + * 0: NLA_NESTED |
| + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN |
| + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN |
| + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 |
| + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 |
| + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec |
| + * WGPEER_A_RX_BYTES: NLA_U64 |
| + * WGPEER_A_TX_BYTES: NLA_U64 |
| + * WGPEER_A_ALLOWEDIPS: NLA_NESTED |
| + * 0: NLA_NESTED |
| + * WGALLOWEDIP_A_FAMILY: NLA_U16 |
| + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr |
| + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 |
| + * 0: NLA_NESTED |
| + * ... |
| + * 0: NLA_NESTED |
| + * ... |
| + * ... |
| + * WGPEER_A_PROTOCOL_VERSION: NLA_U32 |
| + * 0: NLA_NESTED |
| + * ... |
| + * ... |
| + * |
| + * It is possible that all of the allowed IPs of a single peer will not |
| + * fit within a single netlink message. In that case, the same peer will |
| + * be written in the following message, except it will only contain |
| + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several |
| + * times in a row for the same peer. It is then up to the receiver to |
| + * coalesce adjacent peers. Likewise, it is possible that all peers will |
| + * not fit within a single message. So, subsequent peers will be sent |
| + * in following messages, except those will only contain WGDEVICE_A_IFNAME |
| + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these |
| + * messages to form the complete list of peers. |
| + * |
| + * Since this is an NLA_F_DUMP command, the final message will always be |
| + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message |
| + * contains an integer error code. It is either zero or a negative error |
| + * code corresponding to the errno. |
| + * |
| + * WG_CMD_SET_DEVICE |
| + * ----------------- |
| + * |
| + * May only be called via NLM_F_REQUEST. The command should contain the |
| + * following tree of nested items, containing one but not both of |
| + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: |
| + * |
| + * WGDEVICE_A_IFINDEX: NLA_U32 |
| + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 |
| + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current |
| + * peers should be removed prior to adding the list below. |
| + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove |
| + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly |
| + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable |
| + * WGDEVICE_A_PEERS: NLA_NESTED |
| + * 0: NLA_NESTED |
| + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN |
| + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the |
| + * specified peer should not exist at the end of the |
| + * operation, rather than added/updated and/or |
| + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed |
| + * IPs of this peer should be removed prior to adding |
| + * the list below and/or WGPEER_F_UPDATE_ONLY if the |
| + * peer should only be set if it already exists. |
| + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove |
| + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 |
| + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable |
| + * WGPEER_A_ALLOWEDIPS: NLA_NESTED |
| + * 0: NLA_NESTED |
| + * WGALLOWEDIP_A_FAMILY: NLA_U16 |
| + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr |
| + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 |
| + * 0: NLA_NESTED |
| + * ... |
| + * 0: NLA_NESTED |
| + * ... |
| + * ... |
| + * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at |
| + * all by most users of this API, as the |
| + * most recent protocol will be used when |
| + * this is unset. Otherwise, must be set |
| + * to 1. |
| + * 0: NLA_NESTED |
| + * ... |
| + * ... |
| + * |
| + * It is possible that the amount of configuration data exceeds that of |
| + * the maximum message length accepted by the kernel. In that case, several |
| + * messages should be sent one after another, with each successive one |
| + * filling in information not contained in the prior. Note that if |
| + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably |
| + * should not be specified in fragments that come after, so that the list |
| + * of peers is only cleared the first time but appened after. Likewise for |
| + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message |
| + * of a peer, it likely should not be specified in subsequent fragments. |
| + * |
| + * If an error occurs, NLMSG_ERROR will reply containing an errno. |
| + */ |
| + |
| +#ifndef _WG_UAPI_WIREGUARD_H |
| +#define _WG_UAPI_WIREGUARD_H |
| + |
| +#define WG_GENL_NAME "wireguard" |
| +#define WG_GENL_VERSION 1 |
| + |
| +#define WG_KEY_LEN 32 |
| + |
| +enum wg_cmd { |
| + WG_CMD_GET_DEVICE, |
| + WG_CMD_SET_DEVICE, |
| + __WG_CMD_MAX |
| +}; |
| +#define WG_CMD_MAX (__WG_CMD_MAX - 1) |
| + |
| +enum wgdevice_flag { |
| + WGDEVICE_F_REPLACE_PEERS = 1U << 0, |
| + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS |
| +}; |
| +enum wgdevice_attribute { |
| + WGDEVICE_A_UNSPEC, |
| + WGDEVICE_A_IFINDEX, |
| + WGDEVICE_A_IFNAME, |
| + WGDEVICE_A_PRIVATE_KEY, |
| + WGDEVICE_A_PUBLIC_KEY, |
| + WGDEVICE_A_FLAGS, |
| + WGDEVICE_A_LISTEN_PORT, |
| + WGDEVICE_A_FWMARK, |
| + WGDEVICE_A_PEERS, |
| + __WGDEVICE_A_LAST |
| +}; |
| +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) |
| + |
| +enum wgpeer_flag { |
| + WGPEER_F_REMOVE_ME = 1U << 0, |
| + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, |
| + WGPEER_F_UPDATE_ONLY = 1U << 2, |
| + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | |
| + WGPEER_F_UPDATE_ONLY |
| +}; |
| +enum wgpeer_attribute { |
| + WGPEER_A_UNSPEC, |
| + WGPEER_A_PUBLIC_KEY, |
| + WGPEER_A_PRESHARED_KEY, |
| + WGPEER_A_FLAGS, |
| + WGPEER_A_ENDPOINT, |
| + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, |
| + WGPEER_A_LAST_HANDSHAKE_TIME, |
| + WGPEER_A_RX_BYTES, |
| + WGPEER_A_TX_BYTES, |
| + WGPEER_A_ALLOWEDIPS, |
| + WGPEER_A_PROTOCOL_VERSION, |
| + __WGPEER_A_LAST |
| +}; |
| +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) |
| + |
| +enum wgallowedip_attribute { |
| + WGALLOWEDIP_A_UNSPEC, |
| + WGALLOWEDIP_A_FAMILY, |
| + WGALLOWEDIP_A_IPADDR, |
| + WGALLOWEDIP_A_CIDR_MASK, |
| + __WGALLOWEDIP_A_LAST |
| +}; |
| +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) |
| + |
| +#endif /* _WG_UAPI_WIREGUARD_H */ |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| new file mode 100755 |
| index 000000000000..e7310d9390f7 |
| |
| |
| @@ -0,0 +1,537 @@ |
| +#!/bin/bash |
| +# SPDX-License-Identifier: GPL-2.0 |
| +# |
| +# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| +# |
| +# This script tests the below topology: |
| +# |
| +# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐ |
| +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ |
| +# │ │ │ │ │ │ |
| +# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│ |
| +# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││ |
| +# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│ |
| +# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││ |
| +# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││ |
| +# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│ |
| +# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘ |
| +# └──────────────────────────────────┘ |
| +# |
| +# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the |
| +# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 |
| +# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further |
| +# details on how this is accomplished. |
| +set -e |
| + |
| +exec 3>&1 |
| +export WG_HIDE_KEYS=never |
| +netns0="wg-test-$$-0" |
| +netns1="wg-test-$$-1" |
| +netns2="wg-test-$$-2" |
| +pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } |
| +pp() { pretty "" "$*"; "$@"; } |
| +maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } |
| +n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } |
| +n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } |
| +n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } |
| +ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } |
| +ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } |
| +ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } |
| +sleep() { read -t "$1" -N 0 || true; } |
| +waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } |
| +waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } |
| +waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } |
| +waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } |
| + |
| +cleanup() { |
| + set +e |
| + exec 2>/dev/null |
| + printf "$orig_message_cost" > /proc/sys/net/core/message_cost |
| + ip0 link del dev wg0 |
| + ip1 link del dev wg0 |
| + ip2 link del dev wg0 |
| + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" |
| + [[ -n $to_kill ]] && kill $to_kill |
| + pp ip netns del $netns1 |
| + pp ip netns del $netns2 |
| + pp ip netns del $netns0 |
| + exit |
| +} |
| + |
| +orig_message_cost="$(< /proc/sys/net/core/message_cost)" |
| +trap cleanup EXIT |
| +printf 0 > /proc/sys/net/core/message_cost |
| + |
| +ip netns del $netns0 2>/dev/null || true |
| +ip netns del $netns1 2>/dev/null || true |
| +ip netns del $netns2 2>/dev/null || true |
| +pp ip netns add $netns0 |
| +pp ip netns add $netns1 |
| +pp ip netns add $netns2 |
| +ip0 link set up dev lo |
| + |
| +ip0 link add dev wg0 type wireguard |
| +ip0 link set wg0 netns $netns1 |
| +ip0 link add dev wg0 type wireguard |
| +ip0 link set wg0 netns $netns2 |
| +key1="$(pp wg genkey)" |
| +key2="$(pp wg genkey)" |
| +key3="$(pp wg genkey)" |
| +pub1="$(pp wg pubkey <<<"$key1")" |
| +pub2="$(pp wg pubkey <<<"$key2")" |
| +pub3="$(pp wg pubkey <<<"$key3")" |
| +psk="$(pp wg genpsk)" |
| +[[ -n $key1 && -n $key2 && -n $psk ]] |
| + |
| +configure_peers() { |
| + ip1 addr add 192.168.241.1/24 dev wg0 |
| + ip1 addr add fd00::1/24 dev wg0 |
| + |
| + ip2 addr add 192.168.241.2/24 dev wg0 |
| + ip2 addr add fd00::2/24 dev wg0 |
| + |
| + n1 wg set wg0 \ |
| + private-key <(echo "$key1") \ |
| + listen-port 1 \ |
| + peer "$pub2" \ |
| + preshared-key <(echo "$psk") \ |
| + allowed-ips 192.168.241.2/32,fd00::2/128 |
| + n2 wg set wg0 \ |
| + private-key <(echo "$key2") \ |
| + listen-port 2 \ |
| + peer "$pub1" \ |
| + preshared-key <(echo "$psk") \ |
| + allowed-ips 192.168.241.1/32,fd00::1/128 |
| + |
| + ip1 link set up dev wg0 |
| + ip2 link set up dev wg0 |
| +} |
| +configure_peers |
| + |
| +tests() { |
| + # Ping over IPv4 |
| + n2 ping -c 10 -f -W 1 192.168.241.1 |
| + n1 ping -c 10 -f -W 1 192.168.241.2 |
| + |
| + # Ping over IPv6 |
| + n2 ping6 -c 10 -f -W 1 fd00::1 |
| + n1 ping6 -c 10 -f -W 1 fd00::2 |
| + |
| + # TCP over IPv4 |
| + n2 iperf3 -s -1 -B 192.168.241.2 & |
| + waitiperf $netns2 |
| + n1 iperf3 -Z -t 3 -c 192.168.241.2 |
| + |
| + # TCP over IPv6 |
| + n1 iperf3 -s -1 -B fd00::1 & |
| + waitiperf $netns1 |
| + n2 iperf3 -Z -t 3 -c fd00::1 |
| + |
| + # UDP over IPv4 |
| + n1 iperf3 -s -1 -B 192.168.241.1 & |
| + waitiperf $netns1 |
| + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 |
| + |
| + # UDP over IPv6 |
| + n2 iperf3 -s -1 -B fd00::2 & |
| + waitiperf $netns2 |
| + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 |
| +} |
| + |
| +[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" |
| +big_mtu=$(( 34816 - 1500 + $orig_mtu )) |
| + |
| +# Test using IPv4 as outer transport |
| +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 |
| +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 |
| +# Before calling tests, we first make sure that the stats counters and timestamper are working |
| +n2 ping -c 10 -f -W 1 192.168.241.1 |
| +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) |
| +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) |
| +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) |
| +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) |
| +read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) |
| +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) |
| +read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) |
| +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) |
| +read _ timestamp < <(n1 wg show wg0 latest-handshakes) |
| +(( timestamp != 0 )) |
| + |
| +tests |
| +ip1 link set wg0 mtu $big_mtu |
| +ip2 link set wg0 mtu $big_mtu |
| +tests |
| + |
| +ip1 link set wg0 mtu $orig_mtu |
| +ip2 link set wg0 mtu $orig_mtu |
| + |
| +# Test using IPv6 as outer transport |
| +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 |
| +n2 wg set wg0 peer "$pub1" endpoint [::1]:1 |
| +tests |
| +ip1 link set wg0 mtu $big_mtu |
| +ip2 link set wg0 mtu $big_mtu |
| +tests |
| + |
| +# Test that route MTUs work with the padding |
| +ip1 link set wg0 mtu 1300 |
| +ip2 link set wg0 mtu 1300 |
| +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 |
| +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 |
| +n0 iptables -A INPUT -m length --length 1360 -j DROP |
| +n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 |
| +n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 |
| +n2 ping -c 1 -W 1 -s 1269 192.168.241.1 |
| +n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 |
| +n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 |
| +n0 iptables -F INPUT |
| + |
| +ip1 link set wg0 mtu $orig_mtu |
| +ip2 link set wg0 mtu $orig_mtu |
| + |
| +# Test using IPv4 that roaming works |
| +ip0 -4 addr del 127.0.0.1/8 dev lo |
| +ip0 -4 addr add 127.212.121.99/8 dev lo |
| +n1 wg set wg0 listen-port 9999 |
| +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 |
| +n1 ping6 -W 1 -c 1 fd00::2 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] |
| + |
| +# Test using IPv6 that roaming works |
| +n1 wg set wg0 listen-port 9998 |
| +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] |
| + |
| +# Test that crypto-RP filter works |
| +n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 |
| +exec 4< <(n1 ncat -l -u -p 1111) |
| +ncat_pid=$! |
| +waitncatudp $netns1 |
| +n2 ncat -u 192.168.241.1 1111 <<<"X" |
| +read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] |
| +kill $ncat_pid |
| +more_specific_key="$(pp wg genkey | pp wg pubkey)" |
| +n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 |
| +n2 wg set wg0 listen-port 9997 |
| +exec 4< <(n1 ncat -l -u -p 1111) |
| +ncat_pid=$! |
| +waitncatudp $netns1 |
| +n2 ncat -u 192.168.241.1 1111 <<<"X" |
| +! read -r -N 1 -t 1 out <&4 || false |
| +kill $ncat_pid |
| +n1 wg set wg0 peer "$more_specific_key" remove |
| +[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] |
| + |
| +# Test that we can change private keys keys and immediately handshake |
| +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 |
| +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +n1 wg set wg0 private-key <(echo "$key3") |
| +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| + |
| +ip1 link del wg0 |
| +ip2 link del wg0 |
| + |
| +# Test using NAT. We now change the topology to this: |
| +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ |
| +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ |
| +# │ │ │ │ │ │ |
| +# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ |
| +# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │ |
| +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │ |
| +# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ |
| +# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ |
| +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ |
| +# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ |
| + |
| +ip1 link add dev wg0 type wireguard |
| +ip2 link add dev wg0 type wireguard |
| +configure_peers |
| + |
| +ip0 link add vethrc type veth peer name vethc |
| +ip0 link add vethrs type veth peer name veths |
| +ip0 link set vethc netns $netns1 |
| +ip0 link set veths netns $netns2 |
| +ip0 link set vethrc up |
| +ip0 link set vethrs up |
| +ip0 addr add 192.168.1.1/24 dev vethrc |
| +ip0 addr add 10.0.0.1/24 dev vethrs |
| +ip1 addr add 192.168.1.100/24 dev vethc |
| +ip1 link set vethc up |
| +ip1 route add default via 192.168.1.1 |
| +ip2 addr add 10.0.0.100/24 dev veths |
| +ip2 link set veths up |
| +waitiface $netns0 vethrc |
| +waitiface $netns0 vethrs |
| +waitiface $netns1 vethc |
| +waitiface $netns2 veths |
| + |
| +n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' |
| +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' |
| +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' |
| +n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 |
| + |
| +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] |
| +# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). |
| +pp sleep 3 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 |
| + |
| +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. |
| +ip1 -6 addr add fc00::9/96 dev vethc |
| +ip1 -6 route add default via fc00::1 |
| +ip2 -4 addr add 192.168.99.7/32 dev wg0 |
| +ip2 -6 addr add abab::1111/128 dev wg0 |
| +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 |
| +ip1 -6 route add default dev wg0 table 51820 |
| +ip1 -6 rule add not fwmark 51820 table 51820 |
| +ip1 -6 rule add table main suppress_prefixlength 0 |
| +ip1 -4 route add default dev wg0 table 51820 |
| +ip1 -4 rule add not fwmark 51820 table 51820 |
| +ip1 -4 rule add table main suppress_prefixlength 0 |
| +# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. |
| +if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then |
| + # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. |
| + n1 ping -W 1 -c 100 -f 192.168.99.7 |
| + n1 ping -W 1 -c 100 -f abab::1111 |
| +fi |
| + |
| +n0 iptables -t nat -F |
| +ip0 link del vethrc |
| +ip0 link del vethrs |
| +ip1 link del wg0 |
| +ip2 link del wg0 |
| + |
| +# Test that saddr routing is sticky but not too sticky, changing to this topology: |
| +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐ |
| +# │ $ns1 namespace │ │ $ns2 namespace │ |
| +# │ │ │ │ |
| +# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │ |
| +# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │ |
| +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │ |
| +# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │ |
| +# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │ |
| +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │ |
| +# └────────────────────────────────────────┘ └────────────────────────────────────────┘ |
| + |
| +ip1 link add dev wg0 type wireguard |
| +ip2 link add dev wg0 type wireguard |
| +configure_peers |
| +ip1 link add veth1 type veth peer name veth2 |
| +ip1 link set veth2 netns $netns2 |
| +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' |
| +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' |
| +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' |
| +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' |
| +n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' |
| + |
| +# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed |
| +ip1 addr add 10.0.0.1/24 dev veth1 |
| +ip1 addr add fd00:aa::1/96 dev veth1 |
| +ip2 addr add 10.0.0.2/24 dev veth2 |
| +ip2 addr add fd00:aa::2/96 dev veth2 |
| +ip1 link set veth1 up |
| +ip2 link set veth2 up |
| +waitiface $netns1 veth1 |
| +waitiface $netns2 veth2 |
| +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +ip1 addr add 10.0.0.10/24 dev veth1 |
| +ip1 addr del 10.0.0.1/24 dev veth1 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +ip1 addr add fd00:aa::10/96 dev veth1 |
| +ip1 addr del fd00:aa::1/96 dev veth1 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| + |
| +# Now we show that we can successfully do reply to sender routing |
| +ip1 link set veth1 down |
| +ip2 link set veth2 down |
| +ip1 addr flush dev veth1 |
| +ip2 addr flush dev veth2 |
| +ip1 addr add 10.0.0.1/24 dev veth1 |
| +ip1 addr add 10.0.0.2/24 dev veth1 |
| +ip1 addr add fd00:aa::1/96 dev veth1 |
| +ip1 addr add fd00:aa::2/96 dev veth1 |
| +ip2 addr add 10.0.0.3/24 dev veth2 |
| +ip2 addr add fd00:aa::3/96 dev veth2 |
| +ip1 link set veth1 up |
| +ip2 link set veth2 up |
| +waitiface $netns1 veth1 |
| +waitiface $netns2 veth2 |
| +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] |
| +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] |
| +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] |
| +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] |
| + |
| +# What happens if the inbound destination address belongs to a different interface as the default route? |
| +ip1 link add dummy0 type dummy |
| +ip1 addr add 10.50.0.1/24 dev dummy0 |
| +ip1 link set dummy0 up |
| +ip2 route add 10.50.0.0/24 dev veth2 |
| +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 |
| +n2 ping -W 1 -c 1 192.168.241.1 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] |
| + |
| +ip1 link del dummy0 |
| +ip1 addr flush dev veth1 |
| +ip2 addr flush dev veth2 |
| +ip1 route flush dev veth1 |
| +ip2 route flush dev veth2 |
| + |
| +# Now we see what happens if another interface route takes precedence over an ongoing one |
| +ip1 link add veth3 type veth peer name veth4 |
| +ip1 link set veth4 netns $netns2 |
| +ip1 addr add 10.0.0.1/24 dev veth1 |
| +ip2 addr add 10.0.0.2/24 dev veth2 |
| +ip1 addr add 10.0.0.3/24 dev veth3 |
| +ip1 link set veth1 up |
| +ip2 link set veth2 up |
| +ip1 link set veth3 up |
| +ip2 link set veth4 up |
| +waitiface $netns1 veth1 |
| +waitiface $netns2 veth2 |
| +waitiface $netns1 veth3 |
| +waitiface $netns2 veth4 |
| +ip1 route flush dev veth1 |
| +ip1 route flush dev veth3 |
| +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 |
| +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] |
| +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 |
| +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' |
| +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' |
| +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' |
| +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' |
| +n1 ping -W 1 -c 1 192.168.241.2 |
| +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] |
| + |
| +ip1 link del veth1 |
| +ip1 link del veth3 |
| +ip1 link del wg0 |
| +ip2 link del wg0 |
| + |
| +# We test that Netlink/IPC is working properly by doing things that usually cause split responses |
| +ip0 link add dev wg0 type wireguard |
| +config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) |
| +for a in {1..255}; do |
| + for b in {0..255}; do |
| + config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) |
| + done |
| +done |
| +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") |
| +i=0 |
| +for ip in $(n0 wg show wg0 allowed-ips); do |
| + ((++i)) |
| +done |
| +((i == 255*256*2+1)) |
| +ip0 link del wg0 |
| +ip0 link add dev wg0 type wireguard |
| +config=( "[Interface]" "PrivateKey=$(wg genkey)" ) |
| +for a in {1..40}; do |
| + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) |
| + for b in {1..52}; do |
| + config+=( "AllowedIPs=$a.$b.0.0/16" ) |
| + done |
| +done |
| +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") |
| +i=0 |
| +while read -r line; do |
| + j=0 |
| + for ip in $line; do |
| + ((++j)) |
| + done |
| + ((j == 53)) |
| + ((++i)) |
| +done < <(n0 wg show wg0 allowed-ips) |
| +((i == 40)) |
| +ip0 link del wg0 |
| +ip0 link add wg0 type wireguard |
| +config=( ) |
| +for i in {1..29}; do |
| + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) |
| +done |
| +config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) |
| +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") |
| +n0 wg showconf wg0 > /dev/null |
| +ip0 link del wg0 |
| + |
| +allowedips=( ) |
| +for i in {1..197}; do |
| + allowedips+=( abcd::$i ) |
| +done |
| +saved_ifs="$IFS" |
| +IFS=, |
| +allowedips="${allowedips[*]}" |
| +IFS="$saved_ifs" |
| +ip0 link add wg0 type wireguard |
| +n0 wg set wg0 peer "$pub1" |
| +n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" |
| +{ |
| + read -r pub allowedips |
| + [[ $pub == "$pub1" && $allowedips == "(none)" ]] |
| + read -r pub allowedips |
| + [[ $pub == "$pub2" ]] |
| + i=0 |
| + for _ in $allowedips; do |
| + ((++i)) |
| + done |
| + ((i == 197)) |
| +} < <(n0 wg show wg0 allowed-ips) |
| +ip0 link del wg0 |
| + |
| +! n0 wg show doesnotexist || false |
| + |
| +ip0 link add wg0 type wireguard |
| +n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") |
| +[[ $(n0 wg show wg0 private-key) == "$key1" ]] |
| +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] |
| +n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null |
| +[[ $(n0 wg show wg0 private-key) == "(none)" ]] |
| +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] |
| +n0 wg set wg0 peer "$pub2" |
| +n0 wg set wg0 private-key <(echo "$key2") |
| +[[ $(n0 wg show wg0 public-key) == "$pub2" ]] |
| +[[ -z $(n0 wg show wg0 peers) ]] |
| +n0 wg set wg0 peer "$pub2" |
| +[[ -z $(n0 wg show wg0 peers) ]] |
| +n0 wg set wg0 private-key <(echo "$key1") |
| +n0 wg set wg0 peer "$pub2" |
| +[[ $(n0 wg show wg0 peers) == "$pub2" ]] |
| +n0 wg set wg0 private-key <(echo "/${key1:1}") |
| +[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] |
| +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 |
| +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 |
| +n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 |
| +n0 wg set wg0 peer "$pub2" allowed-ips ::/0 |
| +ip0 link del wg0 |
| + |
| +declare -A objects |
| +while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do |
| + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue |
| + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" |
| +done < /dev/kmsg |
| +alldeleted=1 |
| +for object in "${!objects[@]}"; do |
| + if [[ ${objects["$object"]} != *createddestroyed ]]; then |
| + echo "Error: $object: merely ${objects["$object"]}" >&3 |
| + alldeleted=0 |
| + fi |
| +done |
| +[[ $alldeleted -eq 1 ]] |
| +pretty "" "Objects that were created were also destroyed." |
| -- |
| 2.18.2 |
| |
| |
| From 514644d220829b4c1987cd451485d984cd88e6dd Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 15 Dec 2019 22:08:00 +0100 |
| Subject: [PATCH 060/100] wireguard: selftests: import harness makefile for |
| test suite |
| |
| commit 65d88d04114bca7d85faebd5fed61069cb2b632c upstream. |
| |
| WireGuard has been using this on build.wireguard.com for the last |
| several years with considerable success. It allows for very quick and |
| iterative development cycles, and supports several platforms. |
| |
| To run the test suite on your current platform in QEMU: |
| |
| $ make -C tools/testing/selftests/wireguard/qemu -j$(nproc) |
| |
| To run it with KASAN and such turned on: |
| |
| $ DEBUG_KERNEL=yes make -C tools/testing/selftests/wireguard/qemu -j$(nproc) |
| |
| To run it emulated for another platform in QEMU: |
| |
| $ ARCH=arm make -C tools/testing/selftests/wireguard/qemu -j$(nproc) |
| |
| At the moment, we support aarch64_be, aarch64, arm, armeb, i686, m68k, |
| mips64, mips64el, mips, mipsel, powerpc64le, powerpc, and x86_64. |
| |
| The system supports incremental rebuilding, so it should be very fast to |
| change a single file and then test it out and have immediate feedback. |
| |
| This requires for the right toolchain and qemu to be installed prior. |
| I've had success with those from musl.cc. |
| |
| This is tailored for WireGuard at the moment, though later projects |
| might generalize it for other network testing. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| .../selftests/wireguard/qemu/.gitignore | 2 + |
| .../testing/selftests/wireguard/qemu/Makefile | 385 ++++++++++++++++++ |
| .../wireguard/qemu/arch/aarch64.config | 5 + |
| .../wireguard/qemu/arch/aarch64_be.config | 6 + |
| .../selftests/wireguard/qemu/arch/arm.config | 9 + |
| .../wireguard/qemu/arch/armeb.config | 10 + |
| .../selftests/wireguard/qemu/arch/i686.config | 5 + |
| .../selftests/wireguard/qemu/arch/m68k.config | 9 + |
| .../selftests/wireguard/qemu/arch/mips.config | 11 + |
| .../wireguard/qemu/arch/mips64.config | 14 + |
| .../wireguard/qemu/arch/mips64el.config | 15 + |
| .../wireguard/qemu/arch/mipsel.config | 12 + |
| .../wireguard/qemu/arch/powerpc.config | 10 + |
| .../wireguard/qemu/arch/powerpc64le.config | 12 + |
| .../wireguard/qemu/arch/x86_64.config | 5 + |
| .../selftests/wireguard/qemu/debug.config | 67 +++ |
| tools/testing/selftests/wireguard/qemu/init.c | 284 +++++++++++++ |
| .../selftests/wireguard/qemu/kernel.config | 86 ++++ |
| 18 files changed, 947 insertions(+) |
| create mode 100644 tools/testing/selftests/wireguard/qemu/.gitignore |
| create mode 100644 tools/testing/selftests/wireguard/qemu/Makefile |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/arm.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/armeb.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/i686.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/m68k.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64el.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mipsel.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/arch/x86_64.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/debug.config |
| create mode 100644 tools/testing/selftests/wireguard/qemu/init.c |
| create mode 100644 tools/testing/selftests/wireguard/qemu/kernel.config |
| |
| diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore |
| new file mode 100644 |
| index 000000000000..415b542a9d59 |
| |
| |
| @@ -0,0 +1,2 @@ |
| +build/ |
| +distfiles/ |
| diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile |
| new file mode 100644 |
| index 000000000000..6d51bf78eeff |
| |
| |
| @@ -0,0 +1,385 @@ |
| +# SPDX-License-Identifier: GPL-2.0 |
| +# |
| +# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + |
| +PWD := $(shell pwd) |
| + |
| +CHOST := $(shell gcc -dumpmachine) |
| +ifneq (,$(ARCH)) |
| +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) |
| +ifeq (,$(CBUILD)) |
| +$(error The toolchain for $(ARCH) is not installed) |
| +endif |
| +else |
| +CBUILD := $(CHOST) |
| +ARCH := $(firstword $(subst -, ,$(CBUILD))) |
| +endif |
| + |
| +# Set these from the environment to override |
| +KERNEL_PATH ?= $(PWD)/../../../../.. |
| +BUILD_PATH ?= $(PWD)/build/$(ARCH) |
| +DISTFILES_PATH ?= $(PWD)/distfiles |
| +NR_CPUS ?= 4 |
| + |
| +MIRROR := https://download.wireguard.com/qemu-test/distfiles/ |
| + |
| +default: qemu |
| + |
| +# variable name, tarball project name, version, tarball extension, default URI base |
| +define tar_download = |
| +$(1)_VERSION := $(3) |
| +$(1)_NAME := $(2)-$$($(1)_VERSION) |
| +$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4) |
| +$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) |
| +$(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) |
| +endef |
| + |
| +define file_download = |
| +$(DISTFILES_PATH)/$(1): |
| + mkdir -p $(DISTFILES_PATH) |
| + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' |
| + if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi |
| +endef |
| + |
| +$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61)) |
| +$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) |
| +$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f)) |
| +$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) |
| +$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2)) |
| +$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5)) |
| +$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21)) |
| +$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a)) |
| +$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071)) |
| + |
| +KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) |
| +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) |
| +WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) |
| + |
| +export CFLAGS ?= -O3 -pipe |
| +export LDFLAGS ?= |
| +export CPPFLAGS := -I$(BUILD_PATH)/include |
| + |
| +ifeq ($(CHOST),$(CBUILD)) |
| +CROSS_COMPILE_FLAG := --host=$(CHOST) |
| +NOPIE_GCC := gcc -fno-PIE |
| +CFLAGS += -march=native |
| +STRIP := strip |
| +else |
| +$(info Cross compilation: building for $(CBUILD) using $(CHOST)) |
| +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) |
| +export CROSS_COMPILE=$(CBUILD)- |
| +NOPIE_GCC := $(CBUILD)-gcc -fno-PIE |
| +STRIP := $(CBUILD)-strip |
| +endif |
| +ifeq ($(ARCH),aarch64) |
| +QEMU_ARCH := aarch64 |
| +KERNEL_ARCH := arm64 |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| +else |
| +QEMU_MACHINE := -cpu cortex-a53 -machine virt |
| +CFLAGS += -march=armv8-a -mtune=cortex-a53 |
| +endif |
| +else ifeq ($(ARCH),aarch64_be) |
| +QEMU_ARCH := aarch64 |
| +KERNEL_ARCH := arm64 |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| +else |
| +QEMU_MACHINE := -cpu cortex-a53 -machine virt |
| +CFLAGS += -march=armv8-a -mtune=cortex-a53 |
| +endif |
| +else ifeq ($(ARCH),arm) |
| +QEMU_ARCH := arm |
| +KERNEL_ARCH := arm |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| +else |
| +QEMU_MACHINE := -cpu cortex-a15 -machine virt |
| +CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux |
| +endif |
| +else ifeq ($(ARCH),armeb) |
| +QEMU_ARCH := arm |
| +KERNEL_ARCH := arm |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| +else |
| +QEMU_MACHINE := -cpu cortex-a15 -machine virt |
| +CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. |
| +LDFLAGS += -Wl,--be8 |
| +endif |
| +else ifeq ($(ARCH),x86_64) |
| +QEMU_ARCH := x86_64 |
| +KERNEL_ARCH := x86_64 |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine q35,accel=kvm |
| +else |
| +QEMU_MACHINE := -cpu Skylake-Server -machine q35 |
| +CFLAGS += -march=skylake-avx512 |
| +endif |
| +else ifeq ($(ARCH),i686) |
| +QEMU_ARCH := i386 |
| +KERNEL_ARCH := x86 |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage |
| +ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST)) |
| +QEMU_MACHINE := -cpu host -machine q35,accel=kvm |
| +else |
| +QEMU_MACHINE := -cpu coreduo -machine q35 |
| +CFLAGS += -march=prescott |
| +endif |
| +else ifeq ($(ARCH),mips64) |
| +QEMU_ARCH := mips64 |
| +KERNEL_ARCH := mips |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| +CFLAGS += -EB |
| +else |
| +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 |
| +CFLAGS += -march=mips64r2 -EB |
| +endif |
| +else ifeq ($(ARCH),mips64el) |
| +QEMU_ARCH := mips64el |
| +KERNEL_ARCH := mips |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| +CFLAGS += -EL |
| +else |
| +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 |
| +CFLAGS += -march=mips64r2 -EL |
| +endif |
| +else ifeq ($(ARCH),mips) |
| +QEMU_ARCH := mips |
| +KERNEL_ARCH := mips |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| +CFLAGS += -EB |
| +else |
| +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 |
| +CFLAGS += -march=mips32r2 -EB |
| +endif |
| +else ifeq ($(ARCH),mipsel) |
| +QEMU_ARCH := mipsel |
| +KERNEL_ARCH := mips |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| +CFLAGS += -EL |
| +else |
| +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 |
| +CFLAGS += -march=mips32r2 -EL |
| +endif |
| +else ifeq ($(ARCH),powerpc64le) |
| +QEMU_ARCH := ppc64 |
| +KERNEL_ARCH := powerpc |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries |
| +else |
| +QEMU_MACHINE := -machine pseries |
| +endif |
| +CFLAGS += -mcpu=powerpc64le -mlong-double-64 |
| +else ifeq ($(ARCH),powerpc) |
| +QEMU_ARCH := ppc |
| +KERNEL_ARCH := powerpc |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 |
| +else |
| +QEMU_MACHINE := -machine ppce500 |
| +endif |
| +CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt |
| +else ifeq ($(ARCH),m68k) |
| +QEMU_ARCH := m68k |
| +KERNEL_ARCH := m68k |
| +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| +ifeq ($(CHOST),$(CBUILD)) |
| +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 |
| +else |
| +QEMU_MACHINE := -machine q800 |
| +endif |
| +else |
| +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) |
| +endif |
| + |
| +REAL_CC := $(CBUILD)-gcc |
| +MUSL_CC := $(BUILD_PATH)/musl-gcc |
| +export CC := $(MUSL_CC) |
| +USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed |
| + |
| +build: $(KERNEL_BZIMAGE) |
| +qemu: $(KERNEL_BZIMAGE) |
| + rm -f $(BUILD_PATH)/result |
| + timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ |
| + -nodefaults \ |
| + -nographic \ |
| + -smp $(NR_CPUS) \ |
| + $(QEMU_MACHINE) \ |
| + -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ |
| + -serial stdio \ |
| + -serial file:$(BUILD_PATH)/result \ |
| + -no-reboot \ |
| + -monitor none \ |
| + -kernel $< |
| + grep -Fq success $(BUILD_PATH)/result |
| + |
| +$(BUILD_PATH)/init-cpio-spec.txt: |
| + mkdir -p $(BUILD_PATH) |
| + echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ |
| + echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ |
| + echo "dir /dev 755 0 0" >> $@ |
| + echo "nod /dev/console 644 0 0 c 5 1" >> $@ |
| + echo "dir /bin 755 0 0" >> $@ |
| + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ |
| + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@ |
| + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ |
| + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ |
| + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ |
| + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ |
| + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ |
| + echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@ |
| + echo "slink /bin/iptables xtables-multi 777 0 0" >> $@ |
| + echo "slink /bin/ping6 ping 777 0 0" >> $@ |
| + echo "dir /lib 755 0 0" >> $@ |
| + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ |
| + echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ |
| + |
| +$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config |
| + mkdir -p $(KERNEL_BUILD_PATH) |
| + cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config |
| + printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config |
| + cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config |
| + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig |
| + cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config |
| + $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) |
| + |
| +$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) |
| + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" |
| + |
| +$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config |
| + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install |
| + touch $@ |
| + |
| +$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) |
| + $(MAKE) -C $(MUSL_PATH) |
| + $(STRIP) -s $@ |
| + |
| +$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so |
| + $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers |
| + touch $@ |
| + |
| +$(MUSL_CC): $(MUSL_PATH)/lib/libc.so |
| + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs |
| + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc |
| + chmod +x $(BUILD_PATH)/musl-gcc |
| + |
| +$(IPERF_PATH)/.installed: $(IPERF_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h |
| + sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* |
| + touch $@ |
| + |
| +$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) |
| + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared |
| + $(MAKE) -C $(IPERF_PATH) |
| + $(STRIP) -s $@ |
| + |
| +$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + touch $@ |
| + |
| +$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) |
| + cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared |
| + $(MAKE) -C $(LIBMNL_PATH) |
| + sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc |
| + |
| +$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + touch $@ |
| + |
| +$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg |
| + $(STRIP) -s $@ |
| + |
| +$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) |
| + mkdir -p $(BUILD_PATH) |
| + $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< |
| + $(STRIP) -s $@ |
| + |
| +$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + touch $@ |
| + |
| +$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) |
| + $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping |
| + $(STRIP) -s $@ |
| + |
| +$(BASH_PATH)/.installed: $(BASH_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + touch $@ |
| + |
| +$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) |
| + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble |
| + $(MAKE) -C $(BASH_PATH) |
| + $(STRIP) -s $@ |
| + |
| +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk |
| + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile |
| + touch $@ |
| + |
| +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip |
| + $(STRIP) -s $(IPROUTE2_PATH)/ip/ip |
| + |
| +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss |
| + $(STRIP) -s $(IPROUTE2_PATH)/misc/ss |
| + |
| +$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure |
| + touch $@ |
| + |
| +$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| + cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include |
| + $(MAKE) -C $(IPTABLES_PATH) |
| + $(STRIP) -s $@ |
| + |
| +$(NMAP_PATH)/.installed: $(NMAP_TAR) |
| + mkdir -p $(BUILD_PATH) |
| + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| + touch $@ |
| + |
| +$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) |
| + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux |
| + $(MAKE) -C $(NMAP_PATH) build-ncat |
| + $(STRIP) -s $@ |
| + |
| +clean: |
| + rm -rf $(BUILD_PATH) |
| + |
| +distclean: clean |
| + rm -rf $(DISTFILES_PATH) |
| + |
| +menuconfig: $(KERNEL_BUILD_PATH)/.config |
| + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig |
| + |
| +.PHONY: qemu build clean distclean menuconfig |
| +.DELETE_ON_ERROR: |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config |
| new file mode 100644 |
| index 000000000000..3d063bb247bb |
| |
| |
| @@ -0,0 +1,5 @@ |
| +CONFIG_SERIAL_AMBA_PL011=y |
| +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" |
| +CONFIG_FRAME_WARN=1280 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config |
| new file mode 100644 |
| index 000000000000..dbdc7e406a7b |
| |
| |
| @@ -0,0 +1,6 @@ |
| +CONFIG_CPU_BIG_ENDIAN=y |
| +CONFIG_SERIAL_AMBA_PL011=y |
| +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" |
| +CONFIG_FRAME_WARN=1280 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config |
| new file mode 100644 |
| index 000000000000..148f49905418 |
| |
| |
| @@ -0,0 +1,9 @@ |
| +CONFIG_MMU=y |
| +CONFIG_ARCH_MULTI_V7=y |
| +CONFIG_ARCH_VIRT=y |
| +CONFIG_THUMB2_KERNEL=n |
| +CONFIG_SERIAL_AMBA_PL011=y |
| +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config |
| new file mode 100644 |
| index 000000000000..bd76b07d00a2 |
| |
| |
| @@ -0,0 +1,10 @@ |
| +CONFIG_MMU=y |
| +CONFIG_ARCH_MULTI_V7=y |
| +CONFIG_ARCH_VIRT=y |
| +CONFIG_THUMB2_KERNEL=n |
| +CONFIG_SERIAL_AMBA_PL011=y |
| +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" |
| +CONFIG_CPU_BIG_ENDIAN=y |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config |
| new file mode 100644 |
| index 000000000000..a85025d7206e |
| |
| |
| @@ -0,0 +1,5 @@ |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config |
| new file mode 100644 |
| index 000000000000..5381ea10896c |
| |
| |
| @@ -0,0 +1,9 @@ |
| +CONFIG_MMU=y |
| +CONFIG_M68040=y |
| +CONFIG_MAC=y |
| +CONFIG_SERIAL_PMACZILOG=y |
| +CONFIG_SERIAL_PMACZILOG_TTYS=y |
| +CONFIG_SERIAL_PMACZILOG_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config |
| new file mode 100644 |
| index 000000000000..df71d6b95546 |
| |
| |
| @@ -0,0 +1,11 @@ |
| +CONFIG_CPU_MIPS32_R2=y |
| +CONFIG_MIPS_MALTA=y |
| +CONFIG_MIPS_CPS=y |
| +CONFIG_MIPS_FP_SUPPORT=y |
| +CONFIG_POWER_RESET=y |
| +CONFIG_POWER_RESET_SYSCON=y |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config |
| new file mode 100644 |
| index 000000000000..90c783f725c4 |
| |
| |
| @@ -0,0 +1,14 @@ |
| +CONFIG_64BIT=y |
| +CONFIG_CPU_MIPS64_R2=y |
| +CONFIG_MIPS32_N32=y |
| +CONFIG_CPU_HAS_MSA=y |
| +CONFIG_MIPS_MALTA=y |
| +CONFIG_MIPS_CPS=y |
| +CONFIG_MIPS_FP_SUPPORT=y |
| +CONFIG_POWER_RESET=y |
| +CONFIG_POWER_RESET_SYSCON=y |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1280 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config |
| new file mode 100644 |
| index 000000000000..435b0b43e00c |
| |
| |
| @@ -0,0 +1,15 @@ |
| +CONFIG_64BIT=y |
| +CONFIG_CPU_MIPS64_R2=y |
| +CONFIG_MIPS32_N32=y |
| +CONFIG_CPU_HAS_MSA=y |
| +CONFIG_MIPS_MALTA=y |
| +CONFIG_CPU_LITTLE_ENDIAN=y |
| +CONFIG_MIPS_CPS=y |
| +CONFIG_MIPS_FP_SUPPORT=y |
| +CONFIG_POWER_RESET=y |
| +CONFIG_POWER_RESET_SYSCON=y |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1280 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config |
| new file mode 100644 |
| index 000000000000..62bb50c4a85f |
| |
| |
| @@ -0,0 +1,12 @@ |
| +CONFIG_CPU_MIPS32_R2=y |
| +CONFIG_MIPS_MALTA=y |
| +CONFIG_CPU_LITTLE_ENDIAN=y |
| +CONFIG_MIPS_CPS=y |
| +CONFIG_MIPS_FP_SUPPORT=y |
| +CONFIG_POWER_RESET=y |
| +CONFIG_POWER_RESET_SYSCON=y |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config |
| new file mode 100644 |
| index 000000000000..57957093b71b |
| |
| |
| @@ -0,0 +1,10 @@ |
| +CONFIG_PPC_QEMU_E500=y |
| +CONFIG_FSL_SOC_BOOKE=y |
| +CONFIG_PPC_85xx=y |
| +CONFIG_PHYS_64BIT=y |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_MATH_EMULATION=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config |
| new file mode 100644 |
| index 000000000000..990c510a9cfa |
| |
| |
| @@ -0,0 +1,12 @@ |
| +CONFIG_PPC64=y |
| +CONFIG_PPC_PSERIES=y |
| +CONFIG_ALTIVEC=y |
| +CONFIG_VSX=y |
| +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y |
| +CONFIG_PPC_RADIX_MMU=y |
| +CONFIG_HVC_CONSOLE=y |
| +CONFIG_CPU_LITTLE_ENDIAN=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" |
| +CONFIG_SECTION_MISMATCH_WARN_ONLY=y |
| +CONFIG_FRAME_WARN=1280 |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config |
| new file mode 100644 |
| index 000000000000..00a1ef4869d5 |
| |
| |
| @@ -0,0 +1,5 @@ |
| +CONFIG_SERIAL_8250=y |
| +CONFIG_SERIAL_8250_CONSOLE=y |
| +CONFIG_CMDLINE_BOOL=y |
| +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| +CONFIG_FRAME_WARN=1280 |
| diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config |
| new file mode 100644 |
| index 000000000000..b9c72706fe4d |
| |
| |
| @@ -0,0 +1,67 @@ |
| +CONFIG_LOCALVERSION="-debug" |
| +CONFIG_ENABLE_WARN_DEPRECATED=y |
| +CONFIG_ENABLE_MUST_CHECK=y |
| +CONFIG_FRAME_POINTER=y |
| +CONFIG_STACK_VALIDATION=y |
| +CONFIG_DEBUG_KERNEL=y |
| +CONFIG_DEBUG_INFO=y |
| +CONFIG_DEBUG_INFO_DWARF4=y |
| +CONFIG_PAGE_EXTENSION=y |
| +CONFIG_PAGE_POISONING=y |
| +CONFIG_DEBUG_OBJECTS=y |
| +CONFIG_DEBUG_OBJECTS_FREE=y |
| +CONFIG_DEBUG_OBJECTS_TIMERS=y |
| +CONFIG_DEBUG_OBJECTS_WORK=y |
| +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y |
| +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y |
| +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 |
| +CONFIG_SLUB_DEBUG_ON=y |
| +CONFIG_DEBUG_VM=y |
| +CONFIG_DEBUG_MEMORY_INIT=y |
| +CONFIG_HAVE_DEBUG_STACKOVERFLOW=y |
| +CONFIG_DEBUG_STACKOVERFLOW=y |
| +CONFIG_HAVE_ARCH_KMEMCHECK=y |
| +CONFIG_HAVE_ARCH_KASAN=y |
| +CONFIG_KASAN=y |
| +CONFIG_KASAN_INLINE=y |
| +CONFIG_UBSAN=y |
| +CONFIG_UBSAN_SANITIZE_ALL=y |
| +CONFIG_UBSAN_NO_ALIGNMENT=y |
| +CONFIG_UBSAN_NULL=y |
| +CONFIG_DEBUG_KMEMLEAK=y |
| +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 |
| +CONFIG_DEBUG_STACK_USAGE=y |
| +CONFIG_DEBUG_SHIRQ=y |
| +CONFIG_WQ_WATCHDOG=y |
| +CONFIG_SCHED_DEBUG=y |
| +CONFIG_SCHED_INFO=y |
| +CONFIG_SCHEDSTATS=y |
| +CONFIG_SCHED_STACK_END_CHECK=y |
| +CONFIG_DEBUG_TIMEKEEPING=y |
| +CONFIG_TIMER_STATS=y |
| +CONFIG_DEBUG_PREEMPT=y |
| +CONFIG_DEBUG_RT_MUTEXES=y |
| +CONFIG_DEBUG_SPINLOCK=y |
| +CONFIG_DEBUG_MUTEXES=y |
| +CONFIG_DEBUG_LOCK_ALLOC=y |
| +CONFIG_PROVE_LOCKING=y |
| +CONFIG_LOCKDEP=y |
| +CONFIG_DEBUG_ATOMIC_SLEEP=y |
| +CONFIG_TRACE_IRQFLAGS=y |
| +CONFIG_DEBUG_BUGVERBOSE=y |
| +CONFIG_DEBUG_LIST=y |
| +CONFIG_DEBUG_PI_LIST=y |
| +CONFIG_PROVE_RCU=y |
| +CONFIG_SPARSE_RCU_POINTER=y |
| +CONFIG_RCU_CPU_STALL_TIMEOUT=21 |
| +CONFIG_RCU_TRACE=y |
| +CONFIG_RCU_EQS_DEBUG=y |
| +CONFIG_USER_STACKTRACE_SUPPORT=y |
| +CONFIG_DEBUG_SG=y |
| +CONFIG_DEBUG_NOTIFIERS=y |
| +CONFIG_DOUBLEFAULT=y |
| +CONFIG_X86_DEBUG_FPU=y |
| +CONFIG_DEBUG_SECTION_MISMATCH=y |
| +CONFIG_DEBUG_PAGEALLOC=y |
| +CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y |
| +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y |
| diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c |
| new file mode 100644 |
| index 000000000000..51e5ddedee88 |
| |
| |
| @@ -0,0 +1,284 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| + */ |
| + |
| +#define _GNU_SOURCE |
| +#include <unistd.h> |
| +#include <errno.h> |
| +#include <string.h> |
| +#include <stdio.h> |
| +#include <stdlib.h> |
| +#include <stdbool.h> |
| +#include <fcntl.h> |
| +#include <sys/wait.h> |
| +#include <sys/mount.h> |
| +#include <sys/types.h> |
| +#include <sys/stat.h> |
| +#include <sys/types.h> |
| +#include <sys/io.h> |
| +#include <sys/ioctl.h> |
| +#include <sys/reboot.h> |
| +#include <sys/utsname.h> |
| +#include <sys/sendfile.h> |
| +#include <linux/random.h> |
| +#include <linux/version.h> |
| + |
| +__attribute__((noreturn)) static void poweroff(void) |
| +{ |
| + fflush(stdout); |
| + fflush(stderr); |
| + reboot(RB_AUTOBOOT); |
| + sleep(30); |
| + fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); |
| + exit(1); |
| +} |
| + |
| +static void panic(const char *what) |
| +{ |
| + fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); |
| + poweroff(); |
| +} |
| + |
| +#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") |
| + |
| +static void print_banner(void) |
| +{ |
| + struct utsname utsname; |
| + int len; |
| + |
| + if (uname(&utsname) < 0) |
| + panic("uname"); |
| + |
| + len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); |
| + printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); |
| +} |
| + |
| +static void seed_rng(void) |
| +{ |
| + int fd; |
| + struct { |
| + int entropy_count; |
| + int buffer_size; |
| + unsigned char buffer[256]; |
| + } entropy = { |
| + .entropy_count = sizeof(entropy.buffer) * 8, |
| + .buffer_size = sizeof(entropy.buffer), |
| + .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" |
| + }; |
| + |
| + if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) |
| + panic("mknod(/dev/urandom)"); |
| + fd = open("/dev/urandom", O_WRONLY); |
| + if (fd < 0) |
| + panic("open(urandom)"); |
| + for (int i = 0; i < 256; ++i) { |
| + if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) |
| + panic("ioctl(urandom)"); |
| + } |
| + close(fd); |
| +} |
| + |
| +static void mount_filesystems(void) |
| +{ |
| + pretty_message("[+] Mounting filesystems..."); |
| + mkdir("/dev", 0755); |
| + mkdir("/proc", 0755); |
| + mkdir("/sys", 0755); |
| + mkdir("/tmp", 0755); |
| + mkdir("/run", 0755); |
| + mkdir("/var", 0755); |
| + if (mount("none", "/dev", "devtmpfs", 0, NULL)) |
| + panic("devtmpfs mount"); |
| + if (mount("none", "/proc", "proc", 0, NULL)) |
| + panic("procfs mount"); |
| + if (mount("none", "/sys", "sysfs", 0, NULL)) |
| + panic("sysfs mount"); |
| + if (mount("none", "/tmp", "tmpfs", 0, NULL)) |
| + panic("tmpfs mount"); |
| + if (mount("none", "/run", "tmpfs", 0, NULL)) |
| + panic("tmpfs mount"); |
| + if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) |
| + ; /* Not a problem if it fails.*/ |
| + if (symlink("/run", "/var/run")) |
| + panic("run symlink"); |
| + if (symlink("/proc/self/fd", "/dev/fd")) |
| + panic("fd symlink"); |
| +} |
| + |
| +static void enable_logging(void) |
| +{ |
| + int fd; |
| + pretty_message("[+] Enabling logging..."); |
| + fd = open("/proc/sys/kernel/printk", O_WRONLY); |
| + if (fd >= 0) { |
| + if (write(fd, "9\n", 2) != 2) |
| + panic("write(printk)"); |
| + close(fd); |
| + } |
| + fd = open("/proc/sys/debug/exception-trace", O_WRONLY); |
| + if (fd >= 0) { |
| + if (write(fd, "1\n", 2) != 2) |
| + panic("write(exception-trace)"); |
| + close(fd); |
| + } |
| + fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); |
| + if (fd >= 0) { |
| + if (write(fd, "1\n", 2) != 2) |
| + panic("write(panic_on_warn)"); |
| + close(fd); |
| + } |
| +} |
| + |
| +static void kmod_selftests(void) |
| +{ |
| + FILE *file; |
| + char line[2048], *start, *pass; |
| + bool success = true; |
| + pretty_message("[+] Module self-tests:"); |
| + file = fopen("/proc/kmsg", "r"); |
| + if (!file) |
| + panic("fopen(kmsg)"); |
| + if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) |
| + panic("fcntl(kmsg, nonblock)"); |
| + while (fgets(line, sizeof(line), file)) { |
| + start = strstr(line, "wireguard: "); |
| + if (!start) |
| + continue; |
| + start += 11; |
| + *strchrnul(start, '\n') = '\0'; |
| + if (strstr(start, "www.wireguard.com")) |
| + break; |
| + pass = strstr(start, ": pass"); |
| + if (!pass || pass[6] != '\0') { |
| + success = false; |
| + printf(" \x1b[31m* %s\x1b[0m\n", start); |
| + } else |
| + printf(" \x1b[32m* %s\x1b[0m\n", start); |
| + } |
| + fclose(file); |
| + if (!success) { |
| + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); |
| + poweroff(); |
| + } |
| +} |
| + |
| +static void launch_tests(void) |
| +{ |
| + char cmdline[4096], *success_dev; |
| + int status, fd; |
| + pid_t pid; |
| + |
| + pretty_message("[+] Launching tests..."); |
| + pid = fork(); |
| + if (pid == -1) |
| + panic("fork"); |
| + else if (pid == 0) { |
| + execl("/init.sh", "init", NULL); |
| + panic("exec"); |
| + } |
| + if (waitpid(pid, &status, 0) < 0) |
| + panic("waitpid"); |
| + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { |
| + pretty_message("[+] Tests successful! :-)"); |
| + fd = open("/proc/cmdline", O_RDONLY); |
| + if (fd < 0) |
| + panic("open(/proc/cmdline)"); |
| + if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) |
| + panic("read(/proc/cmdline)"); |
| + cmdline[sizeof(cmdline) - 1] = '\0'; |
| + for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { |
| + if (strncmp(success_dev, "wg.success=", 11)) |
| + continue; |
| + memcpy(success_dev + 11 - 5, "/dev/", 5); |
| + success_dev += 11 - 5; |
| + break; |
| + } |
| + if (!success_dev || !strlen(success_dev)) |
| + panic("Unable to find success device"); |
| + |
| + fd = open(success_dev, O_WRONLY); |
| + if (fd < 0) |
| + panic("open(success_dev)"); |
| + if (write(fd, "success\n", 8) != 8) |
| + panic("write(success_dev)"); |
| + close(fd); |
| + } else { |
| + const char *why = "unknown cause"; |
| + int what = -1; |
| + |
| + if (WIFEXITED(status)) { |
| + why = "exit code"; |
| + what = WEXITSTATUS(status); |
| + } else if (WIFSIGNALED(status)) { |
| + why = "signal"; |
| + what = WTERMSIG(status); |
| + } |
| + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); |
| + } |
| +} |
| + |
| +static void ensure_console(void) |
| +{ |
| + for (unsigned int i = 0; i < 1000; ++i) { |
| + int fd = open("/dev/console", O_RDWR); |
| + if (fd < 0) { |
| + usleep(50000); |
| + continue; |
| + } |
| + dup2(fd, 0); |
| + dup2(fd, 1); |
| + dup2(fd, 2); |
| + close(fd); |
| + if (write(1, "\0\0\0\0\n", 5) == 5) |
| + return; |
| + } |
| + panic("Unable to open console device"); |
| +} |
| + |
| +static void clear_leaks(void) |
| +{ |
| + int fd; |
| + |
| + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); |
| + if (fd < 0) |
| + return; |
| + pretty_message("[+] Starting memory leak detection..."); |
| + write(fd, "clear\n", 5); |
| + close(fd); |
| +} |
| + |
| +static void check_leaks(void) |
| +{ |
| + int fd; |
| + |
| + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); |
| + if (fd < 0) |
| + return; |
| + pretty_message("[+] Scanning for memory leaks..."); |
| + sleep(2); /* Wait for any grace periods. */ |
| + write(fd, "scan\n", 5); |
| + close(fd); |
| + |
| + fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); |
| + if (fd < 0) |
| + return; |
| + if (sendfile(1, fd, NULL, 0x7ffff000) > 0) |
| + panic("Memory leaks encountered"); |
| + close(fd); |
| +} |
| + |
| +int main(int argc, char *argv[]) |
| +{ |
| + seed_rng(); |
| + ensure_console(); |
| + print_banner(); |
| + mount_filesystems(); |
| + kmod_selftests(); |
| + enable_logging(); |
| + clear_leaks(); |
| + launch_tests(); |
| + check_leaks(); |
| + poweroff(); |
| + return 1; |
| +} |
| diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config |
| new file mode 100644 |
| index 000000000000..9cca30206014 |
| |
| |
| @@ -0,0 +1,86 @@ |
| +CONFIG_LOCALVERSION="" |
| +CONFIG_NET=y |
| +CONFIG_NETDEVICES=y |
| +CONFIG_NET_CORE=y |
| +CONFIG_NET_IPIP=y |
| +CONFIG_DUMMY=y |
| +CONFIG_VETH=y |
| +CONFIG_MULTIUSER=y |
| +CONFIG_NAMESPACES=y |
| +CONFIG_NET_NS=y |
| +CONFIG_UNIX=y |
| +CONFIG_INET=y |
| +CONFIG_IPV6=y |
| +CONFIG_NETFILTER=y |
| +CONFIG_NETFILTER_ADVANCED=y |
| +CONFIG_NF_CONNTRACK=y |
| +CONFIG_NF_NAT=y |
| +CONFIG_NETFILTER_XTABLES=y |
| +CONFIG_NETFILTER_XT_NAT=y |
| +CONFIG_NETFILTER_XT_MATCH_LENGTH=y |
| +CONFIG_NF_CONNTRACK_IPV4=y |
| +CONFIG_NF_NAT_IPV4=y |
| +CONFIG_IP_NF_IPTABLES=y |
| +CONFIG_IP_NF_FILTER=y |
| +CONFIG_IP_NF_NAT=y |
| +CONFIG_IP_ADVANCED_ROUTER=y |
| +CONFIG_IP_MULTIPLE_TABLES=y |
| +CONFIG_IPV6_MULTIPLE_TABLES=y |
| +CONFIG_TTY=y |
| +CONFIG_BINFMT_ELF=y |
| +CONFIG_BINFMT_SCRIPT=y |
| +CONFIG_VDSO=y |
| +CONFIG_VIRTUALIZATION=y |
| +CONFIG_HYPERVISOR_GUEST=y |
| +CONFIG_PARAVIRT=y |
| +CONFIG_KVM_GUEST=y |
| +CONFIG_PARAVIRT_SPINLOCKS=y |
| +CONFIG_PRINTK=y |
| +CONFIG_KALLSYMS=y |
| +CONFIG_BUG=y |
| +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y |
| +CONFIG_EMBEDDED=n |
| +CONFIG_BASE_FULL=y |
| +CONFIG_FUTEX=y |
| +CONFIG_SHMEM=y |
| +CONFIG_SLUB=y |
| +CONFIG_SPARSEMEM_VMEMMAP=y |
| +CONFIG_SMP=y |
| +CONFIG_SCHED_SMT=y |
| +CONFIG_SCHED_MC=y |
| +CONFIG_NUMA=y |
| +CONFIG_PREEMPT=y |
| +CONFIG_NO_HZ=y |
| +CONFIG_NO_HZ_IDLE=y |
| +CONFIG_NO_HZ_FULL=n |
| +CONFIG_HZ_PERIODIC=n |
| +CONFIG_HIGH_RES_TIMERS=y |
| +CONFIG_ARCH_RANDOM=y |
| +CONFIG_FILE_LOCKING=y |
| +CONFIG_POSIX_TIMERS=y |
| +CONFIG_DEVTMPFS=y |
| +CONFIG_PROC_FS=y |
| +CONFIG_PROC_SYSCTL=y |
| +CONFIG_SYSFS=y |
| +CONFIG_TMPFS=y |
| +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 |
| +CONFIG_PRINTK_TIME=y |
| +CONFIG_BLK_DEV_INITRD=y |
| +CONFIG_LEGACY_VSYSCALL_NONE=y |
| +CONFIG_KERNEL_GZIP=y |
| +CONFIG_PANIC_ON_OOPS=y |
| +CONFIG_BUG_ON_DATA_CORRUPTION=y |
| +CONFIG_LOCKUP_DETECTOR=y |
| +CONFIG_SOFTLOCKUP_DETECTOR=y |
| +CONFIG_HARDLOCKUP_DETECTOR=y |
| +CONFIG_WQ_WATCHDOG=y |
| +CONFIG_DETECT_HUNG_TASK=y |
| +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y |
| +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y |
| +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y |
| +CONFIG_PANIC_TIMEOUT=-1 |
| +CONFIG_STACKTRACE=y |
| +CONFIG_EARLY_PRINTK=y |
| +CONFIG_GDB_SCRIPTS=y |
| +CONFIG_WIREGUARD=y |
| +CONFIG_WIREGUARD_DEBUG=y |
| -- |
| 2.18.2 |
| |
| |
| From 044e1033e6c7ee9f63c7bd2686f3a528c085befa Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Sun, 15 Dec 2019 22:08:01 +0100 |
| Subject: [PATCH 061/100] wireguard: Kconfig: select parent dependency for |
| crypto |
| |
| commit d7c68a38bb4f9b7c1a2e4a772872c752ee5c44a6 upstream. |
| |
| This fixes the crypto selection submenu depenencies. Otherwise, we'd |
| wind up issuing warnings in which certain dependencies we also select |
| couldn't be satisfied. This condition was triggered by the addition of |
| the test suite autobuilder in the previous commit. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/Kconfig | 2 ++ |
| 1 file changed, 2 insertions(+) |
| |
| diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig |
| index 16ad145e22c9..57f1ba924f4e 100644 |
| |
| |
| @@ -85,6 +85,8 @@ config WIREGUARD |
| select CRYPTO_POLY1305_X86_64 if X86 && 64BIT |
| select CRYPTO_BLAKE2S_X86 if X86 && 64BIT |
| select CRYPTO_CURVE25519_X86 if X86 && 64BIT |
| + select ARM_CRYPTO if ARM |
| + select ARM64_CRYPTO if ARM64 |
| select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON |
| select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON |
| select CRYPTO_POLY1305_ARM if ARM |
| -- |
| 2.18.2 |
| |
| |
| From 2ff0156ba74c1330a1ae557b898879a45b7aea33 Mon Sep 17 00:00:00 2001 |
| From: Josh Soref <jsoref@gmail.com> |
| Date: Sun, 15 Dec 2019 22:08:02 +0100 |
| Subject: [PATCH 062/100] wireguard: global: fix spelling mistakes in comments |
| |
| commit a2ec8b5706944d228181c8b91d815f41d6dd8e7b upstream. |
| |
| This fixes two spelling errors in source code comments. |
| |
| Signed-off-by: Josh Soref <jsoref@gmail.com> |
| [Jason: rewrote commit message] |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 2 +- |
| include/uapi/linux/wireguard.h | 8 ++++---- |
| 2 files changed, 5 insertions(+), 5 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 7e675f541491..9c6bab9c981f 100644 |
| |
| |
| @@ -380,7 +380,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, |
| /* We've already verified the Poly1305 auth tag, which means this packet |
| * was not modified in transit. We can therefore tell the networking |
| * stack that all checksums of every layer of encapsulation have already |
| - * been checked "by the hardware" and therefore is unneccessary to check |
| + * been checked "by the hardware" and therefore is unnecessary to check |
| * again in software. |
| */ |
| skb->ip_summed = CHECKSUM_UNNECESSARY; |
| diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h |
| index dd8a47c4ad11..ae88be14c947 100644 |
| |
| |
| @@ -18,13 +18,13 @@ |
| * one but not both of: |
| * |
| * WGDEVICE_A_IFINDEX: NLA_U32 |
| - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 |
| + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 |
| * |
| * The kernel will then return several messages (NLM_F_MULTI) containing the |
| * following tree of nested items: |
| * |
| * WGDEVICE_A_IFINDEX: NLA_U32 |
| - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 |
| + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 |
| * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN |
| * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN |
| * WGDEVICE_A_LISTEN_PORT: NLA_U16 |
| @@ -77,7 +77,7 @@ |
| * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: |
| * |
| * WGDEVICE_A_IFINDEX: NLA_U32 |
| - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 |
| + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 |
| * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current |
| * peers should be removed prior to adding the list below. |
| * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove |
| @@ -121,7 +121,7 @@ |
| * filling in information not contained in the prior. Note that if |
| * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably |
| * should not be specified in fragments that come after, so that the list |
| - * of peers is only cleared the first time but appened after. Likewise for |
| + * of peers is only cleared the first time but appended after. Likewise for |
| * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message |
| * of a peer, it likely should not be specified in subsequent fragments. |
| * |
| -- |
| 2.18.2 |
| |
| |
| From 84e89e9fd9f5264db5ca5c6b4601c3762bde0868 Mon Sep 17 00:00:00 2001 |
| From: YueHaibing <yuehaibing@huawei.com> |
| Date: Sun, 15 Dec 2019 22:08:03 +0100 |
| Subject: [PATCH 063/100] wireguard: main: remove unused include |
| <linux/version.h> |
| |
| commit 43967b6ff91e53bcce5ae08c16a0588a475b53a1 upstream. |
| |
| Remove <linux/version.h> from the includes for main.c, which is unused. |
| |
| Signed-off-by: YueHaibing <yuehaibing@huawei.com> |
| [Jason: reworded commit message] |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/main.c | 1 - |
| 1 file changed, 1 deletion(-) |
| |
| diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c |
| index 10c0a40f6a9e..7a7d5f1a80fc 100644 |
| |
| |
| @@ -12,7 +12,6 @@ |
| |
| #include <uapi/linux/wireguard.h> |
| |
| -#include <linux/version.h> |
| #include <linux/init.h> |
| #include <linux/module.h> |
| #include <linux/genetlink.h> |
| -- |
| 2.18.2 |
| |
| |
| From 3d578c333903a03c4431ba9bf7715da6ffbfada5 Mon Sep 17 00:00:00 2001 |
| From: Wei Yongjun <weiyongjun1@huawei.com> |
| Date: Sun, 15 Dec 2019 22:08:04 +0100 |
| Subject: [PATCH 064/100] wireguard: allowedips: use kfree_rcu() instead of |
| call_rcu() |
| |
| commit d89ee7d5c73af15c1c6f12b016cdf469742b5726 upstream. |
| |
| The callback function of call_rcu() just calls a kfree(), so we |
| can use kfree_rcu() instead of call_rcu() + callback function. |
| |
| Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/allowedips.c | 7 +------ |
| 1 file changed, 1 insertion(+), 6 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c |
| index 72667d5399c3..121d9ea0f135 100644 |
| |
| |
| @@ -31,11 +31,6 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, |
| #define CHOOSE_NODE(parent, key) \ |
| parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] |
| |
| -static void node_free_rcu(struct rcu_head *rcu) |
| -{ |
| - kfree(container_of(rcu, struct allowedips_node, rcu)); |
| -} |
| - |
| static void push_rcu(struct allowedips_node **stack, |
| struct allowedips_node __rcu *p, unsigned int *len) |
| { |
| @@ -112,7 +107,7 @@ static void walk_remove_by_peer(struct allowedips_node __rcu **top, |
| if (!node->bit[0] || !node->bit[1]) { |
| rcu_assign_pointer(*nptr, DEREF( |
| &node->bit[!REF(node->bit[0])])); |
| - call_rcu(&node->rcu, node_free_rcu); |
| + kfree_rcu(node, rcu); |
| node = DEREF(nptr); |
| } |
| } |
| -- |
| 2.18.2 |
| |
| |
| From cad9e6e6f5b2b10404d8873d0474a3c770f062be Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 2 Jan 2020 17:47:49 +0100 |
| Subject: [PATCH 065/100] wireguard: selftests: remove ancient kernel |
| compatibility code |
| |
| commit 9a69a4c8802adf642bc4a13d471b5a86b44ed434 upstream. |
| |
| Quite a bit of the test suite was designed to work with ancient kernels. |
| Thankfully we no longer have to deal with this. This commit updates |
| things that we can finally update and removes things that we can finally |
| remove, to avoid the build-up of the last several years as a result of |
| having to support ancient kernels. We can finally rely on suppress_ |
| prefixlength being available. On the build side of things, the no-PIE |
| hack is no longer required, and we can bump some of the tools, repair |
| our m68k and i686-kvm support, and get better coverage of the static |
| branches used in the crypto lib and in udp_tunnel. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| tools/testing/selftests/wireguard/netns.sh | 11 +-- |
| .../testing/selftests/wireguard/qemu/Makefile | 82 ++++++++++--------- |
| .../selftests/wireguard/qemu/arch/m68k.config | 2 +- |
| tools/testing/selftests/wireguard/qemu/init.c | 1 + |
| .../selftests/wireguard/qemu/kernel.config | 2 + |
| 5 files changed, 50 insertions(+), 48 deletions(-) |
| |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index e7310d9390f7..d5c85c7494f2 100755 |
| |
| |
| @@ -37,7 +37,7 @@ n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } |
| ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } |
| ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } |
| ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } |
| -sleep() { read -t "$1" -N 0 || true; } |
| +sleep() { read -t "$1" -N 1 || true; } |
| waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } |
| waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } |
| waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } |
| @@ -294,12 +294,9 @@ ip1 -6 rule add table main suppress_prefixlength 0 |
| ip1 -4 route add default dev wg0 table 51820 |
| ip1 -4 rule add not fwmark 51820 table 51820 |
| ip1 -4 rule add table main suppress_prefixlength 0 |
| -# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. |
| -if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then |
| - # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. |
| - n1 ping -W 1 -c 100 -f 192.168.99.7 |
| - n1 ping -W 1 -c 100 -f abab::1111 |
| -fi |
| +# Flood the pings instead of sending just one, to trigger routing table reference counting bugs. |
| +n1 ping -W 1 -c 100 -f 192.168.99.7 |
| +n1 ping -W 1 -c 100 -f abab::1111 |
| |
| n0 iptables -t nat -F |
| ip0 link del vethrc |
| diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile |
| index 6d51bf78eeff..f10aa3590adc 100644 |
| |
| |
| @@ -5,6 +5,7 @@ |
| PWD := $(shell pwd) |
| |
| CHOST := $(shell gcc -dumpmachine) |
| +HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) |
| ifneq (,$(ARCH)) |
| CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) |
| ifeq (,$(CBUILD)) |
| @@ -37,19 +38,19 @@ endef |
| define file_download = |
| $(DISTFILES_PATH)/$(1): |
| mkdir -p $(DISTFILES_PATH) |
| - flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' |
| + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' |
| if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi |
| endef |
| |
| -$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61)) |
| +$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) |
| $(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) |
| -$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f)) |
| +$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) |
| $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) |
| -$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2)) |
| -$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5)) |
| -$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21)) |
| -$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a)) |
| -$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071)) |
| +$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) |
| +$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) |
| +$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) |
| +$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) |
| +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) |
| |
| KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) |
| rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) |
| @@ -59,23 +60,21 @@ export CFLAGS ?= -O3 -pipe |
| export LDFLAGS ?= |
| export CPPFLAGS := -I$(BUILD_PATH)/include |
| |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| CROSS_COMPILE_FLAG := --host=$(CHOST) |
| -NOPIE_GCC := gcc -fno-PIE |
| CFLAGS += -march=native |
| STRIP := strip |
| else |
| $(info Cross compilation: building for $(CBUILD) using $(CHOST)) |
| CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) |
| export CROSS_COMPILE=$(CBUILD)- |
| -NOPIE_GCC := $(CBUILD)-gcc -fno-PIE |
| STRIP := $(CBUILD)-strip |
| endif |
| ifeq ($(ARCH),aarch64) |
| QEMU_ARCH := aarch64 |
| KERNEL_ARCH := arm64 |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| else |
| QEMU_MACHINE := -cpu cortex-a53 -machine virt |
| @@ -85,7 +84,7 @@ else ifeq ($(ARCH),aarch64_be) |
| QEMU_ARCH := aarch64 |
| KERNEL_ARCH := arm64 |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| else |
| QEMU_MACHINE := -cpu cortex-a53 -machine virt |
| @@ -95,7 +94,7 @@ else ifeq ($(ARCH),arm) |
| QEMU_ARCH := arm |
| KERNEL_ARCH := arm |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| else |
| QEMU_MACHINE := -cpu cortex-a15 -machine virt |
| @@ -105,7 +104,7 @@ else ifeq ($(ARCH),armeb) |
| QEMU_ARCH := arm |
| KERNEL_ARCH := arm |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm |
| else |
| QEMU_MACHINE := -cpu cortex-a15 -machine virt |
| @@ -116,7 +115,7 @@ else ifeq ($(ARCH),x86_64) |
| QEMU_ARCH := x86_64 |
| KERNEL_ARCH := x86_64 |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine q35,accel=kvm |
| else |
| QEMU_MACHINE := -cpu Skylake-Server -machine q35 |
| @@ -126,7 +125,7 @@ else ifeq ($(ARCH),i686) |
| QEMU_ARCH := i386 |
| KERNEL_ARCH := x86 |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage |
| -ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST)) |
| +ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine q35,accel=kvm |
| else |
| QEMU_MACHINE := -cpu coreduo -machine q35 |
| @@ -136,7 +135,7 @@ else ifeq ($(ARCH),mips64) |
| QEMU_ARCH := mips64 |
| KERNEL_ARCH := mips |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| CFLAGS += -EB |
| else |
| @@ -147,7 +146,7 @@ else ifeq ($(ARCH),mips64el) |
| QEMU_ARCH := mips64el |
| KERNEL_ARCH := mips |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| CFLAGS += -EL |
| else |
| @@ -158,7 +157,7 @@ else ifeq ($(ARCH),mips) |
| QEMU_ARCH := mips |
| KERNEL_ARCH := mips |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| CFLAGS += -EB |
| else |
| @@ -169,7 +168,7 @@ else ifeq ($(ARCH),mipsel) |
| QEMU_ARCH := mipsel |
| KERNEL_ARCH := mips |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host -machine malta,accel=kvm |
| CFLAGS += -EL |
| else |
| @@ -180,7 +179,7 @@ else ifeq ($(ARCH),powerpc64le) |
| QEMU_ARCH := ppc64 |
| KERNEL_ARCH := powerpc |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host,accel=kvm -machine pseries |
| else |
| QEMU_MACHINE := -machine pseries |
| @@ -190,7 +189,7 @@ else ifeq ($(ARCH),powerpc) |
| QEMU_ARCH := ppc |
| KERNEL_ARCH := powerpc |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage |
| -ifeq ($(CHOST),$(CBUILD)) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 |
| else |
| QEMU_MACHINE := -machine ppce500 |
| @@ -200,10 +199,11 @@ else ifeq ($(ARCH),m68k) |
| QEMU_ARCH := m68k |
| KERNEL_ARCH := m68k |
| KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux |
| -ifeq ($(CHOST),$(CBUILD)) |
| -QEMU_MACHINE := -cpu host,accel=kvm -machine q800 |
| +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) |
| +ifeq ($(HOST_ARCH),$(ARCH)) |
| +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) |
| else |
| -QEMU_MACHINE := -machine q800 |
| +QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) |
| endif |
| else |
| $(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) |
| @@ -238,14 +238,14 @@ $(BUILD_PATH)/init-cpio-spec.txt: |
| echo "nod /dev/console 644 0 0 c 5 1" >> $@ |
| echo "dir /bin 755 0 0" >> $@ |
| echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ |
| - echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@ |
| + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@ |
| echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ |
| echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ |
| echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ |
| echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ |
| echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ |
| - echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@ |
| - echo "slink /bin/iptables xtables-multi 777 0 0" >> $@ |
| + echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@ |
| + echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ |
| echo "slink /bin/ping6 ping 777 0 0" >> $@ |
| echo "dir /lib 755 0 0" >> $@ |
| echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ |
| @@ -260,8 +260,8 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config |
| cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config |
| $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) |
| |
| -$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) |
| - $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" |
| +$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) |
| + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) |
| |
| $(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config |
| $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install |
| @@ -280,7 +280,7 @@ $(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so |
| |
| $(MUSL_CC): $(MUSL_PATH)/lib/libc.so |
| sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs |
| - printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc |
| + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc |
| chmod +x $(BUILD_PATH)/musl-gcc |
| |
| $(IPERF_PATH)/.installed: $(IPERF_TAR) |
| @@ -291,7 +291,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR) |
| touch $@ |
| |
| $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) |
| - cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared |
| + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no |
| $(MAKE) -C $(IPERF_PATH) |
| $(STRIP) -s $@ |
| |
| @@ -308,8 +308,8 @@ $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) |
| flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| touch $@ |
| |
| -$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg |
| +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg |
| $(STRIP) -s $@ |
| |
| $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) |
| @@ -323,7 +323,8 @@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) |
| touch $@ |
| |
| $(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) |
| - $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping |
| + sed -i /atexit/d $(IPUTILS_PATH)/ping.c |
| + cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS) |
| $(STRIP) -s $@ |
| |
| $(BASH_PATH)/.installed: $(BASH_TAR) |
| @@ -357,7 +358,7 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) |
| sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure |
| touch $@ |
| |
| -$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include |
| $(MAKE) -C $(IPTABLES_PATH) |
| $(STRIP) -s $@ |
| @@ -368,8 +369,9 @@ $(NMAP_PATH)/.installed: $(NMAP_TAR) |
| touch $@ |
| |
| $(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) |
| - cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux |
| - $(MAKE) -C $(NMAP_PATH) build-ncat |
| + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh |
| + $(MAKE) -C $(NMAP_PATH)/libpcap |
| + $(MAKE) -C $(NMAP_PATH)/ncat |
| $(STRIP) -s $@ |
| |
| clean: |
| @@ -379,7 +381,7 @@ distclean: clean |
| rm -rf $(DISTFILES_PATH) |
| |
| menuconfig: $(KERNEL_BUILD_PATH)/.config |
| - $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig |
| + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig |
| |
| .PHONY: qemu build clean distclean menuconfig |
| .DELETE_ON_ERROR: |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config |
| index 5381ea10896c..62a15bdb877e 100644 |
| |
| |
| @@ -1,9 +1,9 @@ |
| CONFIG_MMU=y |
| +CONFIG_M68KCLASSIC=y |
| CONFIG_M68040=y |
| CONFIG_MAC=y |
| CONFIG_SERIAL_PMACZILOG=y |
| CONFIG_SERIAL_PMACZILOG_TTYS=y |
| CONFIG_SERIAL_PMACZILOG_CONSOLE=y |
| -CONFIG_CMDLINE_BOOL=y |
| CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" |
| CONFIG_FRAME_WARN=1024 |
| diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c |
| index 51e5ddedee88..90bc9813cadc 100644 |
| |
| |
| @@ -21,6 +21,7 @@ |
| #include <sys/reboot.h> |
| #include <sys/utsname.h> |
| #include <sys/sendfile.h> |
| +#include <sys/sysmacros.h> |
| #include <linux/random.h> |
| #include <linux/version.h> |
| |
| diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config |
| index 9cca30206014..af9323a0b6e0 100644 |
| |
| |
| @@ -39,6 +39,7 @@ CONFIG_PRINTK=y |
| CONFIG_KALLSYMS=y |
| CONFIG_BUG=y |
| CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y |
| +CONFIG_JUMP_LABEL=y |
| CONFIG_EMBEDDED=n |
| CONFIG_BASE_FULL=y |
| CONFIG_FUTEX=y |
| @@ -55,6 +56,7 @@ CONFIG_NO_HZ_IDLE=y |
| CONFIG_NO_HZ_FULL=n |
| CONFIG_HZ_PERIODIC=n |
| CONFIG_HIGH_RES_TIMERS=y |
| +CONFIG_COMPAT_32BIT_TIME=y |
| CONFIG_ARCH_RANDOM=y |
| CONFIG_FILE_LOCKING=y |
| CONFIG_POSIX_TIMERS=y |
| -- |
| 2.18.2 |
| |
| |
| From 9666ac553d46a1d0ee0baa37d5880e41428d1038 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 2 Jan 2020 17:47:50 +0100 |
| Subject: [PATCH 066/100] wireguard: queueing: do not account for pfmemalloc |
| when clearing skb header |
| |
| commit 04d2ea92a18417619182cbb79063f154892b0150 upstream. |
| |
| Before 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_ |
| header()"), the pfmemalloc flag used to be between headers_start and |
| headers_end, which is a region we clear when preparing the packet for |
| encryption/decryption. This is a parameter we certainly want to |
| preserve, which is why 8b7008620b84 moved it out of there. The code here |
| was written in a world before 8b7008620b84, though, where we had to |
| manually account for it. This commit brings things up to speed. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/queueing.h | 3 --- |
| 1 file changed, 3 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h |
| index 58fdd630b246..e62c714a548e 100644 |
| |
| |
| @@ -83,13 +83,10 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) |
| |
| static inline void wg_reset_packet(struct sk_buff *skb) |
| { |
| - const int pfmemalloc = skb->pfmemalloc; |
| - |
| skb_scrub_packet(skb, true); |
| memset(&skb->headers_start, 0, |
| offsetof(struct sk_buff, headers_end) - |
| offsetof(struct sk_buff, headers_start)); |
| - skb->pfmemalloc = pfmemalloc; |
| skb->queue_mapping = 0; |
| skb->nohdr = 0; |
| skb->peeked = 0; |
| -- |
| 2.18.2 |
| |
| |
| From 1f217f112b8681b030dd198a2c7a5c06102f20c5 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Thu, 2 Jan 2020 17:47:51 +0100 |
| Subject: [PATCH 067/100] wireguard: socket: mark skbs as not on list when |
| receiving via gro |
| |
| commit 736775d06bac60d7a353e405398b48b2bd8b1e54 upstream. |
| |
| Certain drivers will pass gro skbs to udp, at which point the udp driver |
| simply iterates through them and passes them off to encap_rcv, which is |
| where we pick up. At the moment, we're not attempting to coalesce these |
| into bundles, but we also don't want to wind up having cascaded lists of |
| skbs treated separately. The right behavior here, then, is to just mark |
| each incoming one as not on a list. This can be seen in practice, for |
| example, with Qualcomm's rmnet_perf driver. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Tested-by: Yaroslav Furman <yaro330@gmail.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/socket.c | 1 + |
| 1 file changed, 1 insertion(+) |
| |
| diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c |
| index c46256d0d81c..262f3b5c819d 100644 |
| |
| |
| @@ -333,6 +333,7 @@ static int wg_receive(struct sock *sk, struct sk_buff *skb) |
| wg = sk->sk_user_data; |
| if (unlikely(!wg)) |
| goto err; |
| + skb_mark_not_on_list(skb); |
| wg_packet_receive(wg, skb); |
| return 0; |
| |
| -- |
| 2.18.2 |
| |
| |
| From bc67e71a23c2e84f1ae7b471a50a62c886d36d7b Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 8 Jan 2020 16:59:02 -0500 |
| Subject: [PATCH 068/100] net: introduce skb_list_walk_safe for skb segment |
| walking |
| |
| commit dcfea72e79b0aa7a057c8f6024169d86a1bbc84b upstream. |
| |
| As part of the continual effort to remove direct usage of skb->next and |
| skb->prev, this patch adds a helper for iterating through the |
| singly-linked variant of skb lists, which are used for lists of GSO |
| packet. The name "skb_list_..." has been chosen to match the existing |
| function, "kfree_skb_list, which also operates on these singly-linked |
| lists, and the "..._walk_safe" part is the same idiom as elsewhere in |
| the kernel. |
| |
| This patch removes the helper from wireguard and puts it into |
| linux/skbuff.h, while making it a bit more robust for general usage. In |
| particular, parenthesis are added around the macro argument usage, and it |
| now accounts for trying to iterate through an already-null skb pointer, |
| which will simply run the iteration zero times. This latter enhancement |
| means it can be used to replace both do { ... } while and while (...) |
| open-coded idioms. |
| |
| This should take care of these three possible usages, which match all |
| current methods of iterations. |
| |
| skb_list_walk_safe(segs, skb, next) { ... } |
| skb_list_walk_safe(skb, skb, next) { ... } |
| skb_list_walk_safe(segs, skb, segs) { ... } |
| |
| Gcc appears to generate efficient code for each of these. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/device.h | 8 -------- |
| include/linux/skbuff.h | 5 +++++ |
| 2 files changed, 5 insertions(+), 8 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h |
| index c91f3051c5c7..b15a8be9d816 100644 |
| |
| |
| @@ -62,12 +62,4 @@ struct wg_device { |
| int wg_device_init(void); |
| void wg_device_uninit(void); |
| |
| -/* Later after the dust settles, this can be moved into include/linux/skbuff.h, |
| - * where virtually all code that deals with GSO segs can benefit, around ~30 |
| - * drivers as of writing. |
| - */ |
| -#define skb_list_walk_safe(first, skb, next) \ |
| - for (skb = first, next = skb->next; skb; \ |
| - skb = next, next = skb ? skb->next : NULL) |
| - |
| #endif /* _WG_DEVICE_H */ |
| diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
| index 955e1370f033..5e9fe508977f 100644 |
| |
| |
| @@ -1480,6 +1480,11 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) |
| skb->next = NULL; |
| } |
| |
| +/* Iterate through singly-linked GSO fragments of an skb. */ |
| +#define skb_list_walk_safe(first, skb, next) \ |
| + for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ |
| + (skb) = (next), (next) = (skb) ? (skb)->next : NULL) |
| + |
| static inline void skb_list_del_init(struct sk_buff *skb) |
| { |
| __list_del_entry(&skb->list); |
| -- |
| 2.18.2 |
| |
| |
| From 8fe9a34f82f28628f6a553d11370f35422346240 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 13 Jan 2020 18:42:26 -0500 |
| Subject: [PATCH 069/100] net: skbuff: disambiguate argument and member for |
| skb_list_walk_safe helper |
| |
| commit 5eee7bd7e245914e4e050c413dfe864e31805207 upstream. |
| |
| This worked before, because we made all callers name their next pointer |
| "next". But in trying to be more "drop-in" ready, the silliness here is |
| revealed. This commit fixes the problem by making the macro argument and |
| the member use different names. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/linux/skbuff.h | 6 +++--- |
| 1 file changed, 3 insertions(+), 3 deletions(-) |
| |
| diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
| index 5e9fe508977f..3c7755d29636 100644 |
| |
| |
| @@ -1481,9 +1481,9 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) |
| } |
| |
| /* Iterate through singly-linked GSO fragments of an skb. */ |
| -#define skb_list_walk_safe(first, skb, next) \ |
| - for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ |
| - (skb) = (next), (next) = (skb) ? (skb)->next : NULL) |
| +#define skb_list_walk_safe(first, skb, next_skb) \ |
| + for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ |
| + (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL) |
| |
| static inline void skb_list_del_init(struct sk_buff *skb) |
| { |
| -- |
| 2.18.2 |
| |
| |
| From 814bebdf05ee13ea187fce3c52ae9d7914b92aa2 Mon Sep 17 00:00:00 2001 |
| From: Eric Dumazet <edumazet@google.com> |
| Date: Tue, 4 Feb 2020 22:17:25 +0100 |
| Subject: [PATCH 070/100] wireguard: allowedips: fix use-after-free in |
| root_remove_peer_lists |
| |
| commit 9981159fc3b677b357f84e069a11de5a5ec8a2a8 upstream. |
| |
| In the unlikely case a new node could not be allocated, we need to |
| remove @newnode from @peer->allowedips_list before freeing it. |
| |
| syzbot reported: |
| |
| BUG: KASAN: use-after-free in __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54 |
| Read of size 8 at addr ffff88809881a538 by task syz-executor.4/30133 |
| |
| CPU: 0 PID: 30133 Comm: syz-executor.4 Not tainted 5.5.0-syzkaller #0 |
| Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 |
| Call Trace: |
| __dump_stack lib/dump_stack.c:77 [inline] |
| dump_stack+0x197/0x210 lib/dump_stack.c:118 |
| print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 |
| __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506 |
| kasan_report+0x12/0x20 mm/kasan/common.c:639 |
| __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 |
| __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54 |
| __list_del_entry include/linux/list.h:132 [inline] |
| list_del include/linux/list.h:146 [inline] |
| root_remove_peer_lists+0x24f/0x4b0 drivers/net/wireguard/allowedips.c:65 |
| wg_allowedips_free+0x232/0x390 drivers/net/wireguard/allowedips.c:300 |
| wg_peer_remove_all+0xd5/0x620 drivers/net/wireguard/peer.c:187 |
| wg_set_device+0xd01/0x1350 drivers/net/wireguard/netlink.c:542 |
| genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] |
| genl_family_rcv_msg net/netlink/genetlink.c:717 [inline] |
| genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734 |
| netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 |
| genl_rcv+0x29/0x40 net/netlink/genetlink.c:745 |
| netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] |
| netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 |
| netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 |
| sock_sendmsg_nosec net/socket.c:652 [inline] |
| sock_sendmsg+0xd7/0x130 net/socket.c:672 |
| ____sys_sendmsg+0x753/0x880 net/socket.c:2343 |
| ___sys_sendmsg+0x100/0x170 net/socket.c:2397 |
| __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 |
| __do_sys_sendmsg net/socket.c:2439 [inline] |
| __se_sys_sendmsg net/socket.c:2437 [inline] |
| __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 |
| do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 |
| entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| RIP: 0033:0x45b399 |
| Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 |
| RSP: 002b:00007f99a9bcdc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e |
| RAX: ffffffffffffffda RBX: 00007f99a9bce6d4 RCX: 000000000045b399 |
| RDX: 0000000000000000 RSI: 0000000020001340 RDI: 0000000000000003 |
| RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 |
| R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004 |
| R13: 00000000000009ba R14: 00000000004cb2b8 R15: 0000000000000009 |
| |
| Allocated by task 30103: |
| save_stack+0x23/0x90 mm/kasan/common.c:72 |
| set_track mm/kasan/common.c:80 [inline] |
| __kasan_kmalloc mm/kasan/common.c:513 [inline] |
| __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 |
| kasan_kmalloc+0x9/0x10 mm/kasan/common.c:527 |
| kmem_cache_alloc_trace+0x158/0x790 mm/slab.c:3551 |
| kmalloc include/linux/slab.h:556 [inline] |
| kzalloc include/linux/slab.h:670 [inline] |
| add+0x70a/0x1970 drivers/net/wireguard/allowedips.c:236 |
| wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320 |
| set_allowedip drivers/net/wireguard/netlink.c:343 [inline] |
| set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468 |
| wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591 |
| genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] |
| genl_family_rcv_msg net/netlink/genetlink.c:717 [inline] |
| genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734 |
| netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 |
| genl_rcv+0x29/0x40 net/netlink/genetlink.c:745 |
| netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] |
| netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 |
| netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 |
| sock_sendmsg_nosec net/socket.c:652 [inline] |
| sock_sendmsg+0xd7/0x130 net/socket.c:672 |
| ____sys_sendmsg+0x753/0x880 net/socket.c:2343 |
| ___sys_sendmsg+0x100/0x170 net/socket.c:2397 |
| __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 |
| __do_sys_sendmsg net/socket.c:2439 [inline] |
| __se_sys_sendmsg net/socket.c:2437 [inline] |
| __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 |
| do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 |
| entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| |
| Freed by task 30103: |
| save_stack+0x23/0x90 mm/kasan/common.c:72 |
| set_track mm/kasan/common.c:80 [inline] |
| kasan_set_free_info mm/kasan/common.c:335 [inline] |
| __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 |
| kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 |
| __cache_free mm/slab.c:3426 [inline] |
| kfree+0x10a/0x2c0 mm/slab.c:3757 |
| add+0x12d2/0x1970 drivers/net/wireguard/allowedips.c:266 |
| wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320 |
| set_allowedip drivers/net/wireguard/netlink.c:343 [inline] |
| set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468 |
| wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591 |
| genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] |
| genl_family_rcv_msg net/netlink/genetlink.c:717 [inline] |
| genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734 |
| netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 |
| genl_rcv+0x29/0x40 net/netlink/genetlink.c:745 |
| netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] |
| netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 |
| netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 |
| sock_sendmsg_nosec net/socket.c:652 [inline] |
| sock_sendmsg+0xd7/0x130 net/socket.c:672 |
| ____sys_sendmsg+0x753/0x880 net/socket.c:2343 |
| ___sys_sendmsg+0x100/0x170 net/socket.c:2397 |
| __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 |
| __do_sys_sendmsg net/socket.c:2439 [inline] |
| __se_sys_sendmsg net/socket.c:2437 [inline] |
| __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 |
| do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 |
| entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| |
| The buggy address belongs to the object at ffff88809881a500 |
| which belongs to the cache kmalloc-64 of size 64 |
| The buggy address is located 56 bytes inside of |
| 64-byte region [ffff88809881a500, ffff88809881a540) |
| The buggy address belongs to the page: |
| page:ffffea0002620680 refcount:1 mapcount:0 mapping:ffff8880aa400380 index:0x0 |
| raw: 00fffe0000000200 ffffea000250b748 ffffea000254bac8 ffff8880aa400380 |
| raw: 0000000000000000 ffff88809881a000 0000000100000020 0000000000000000 |
| page dumped because: kasan: bad access detected |
| |
| Memory state around the buggy address: |
| ffff88809881a400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc |
| ffff88809881a480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc |
| >ffff88809881a500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc |
| ^ |
| ffff88809881a580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc |
| ffff88809881a600: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc |
| |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Reported-by: syzbot <syzkaller@googlegroups.com> |
| Cc: Jason A. Donenfeld <Jason@zx2c4.com> |
| Cc: wireguard@lists.zx2c4.com |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/allowedips.c | 1 + |
| 1 file changed, 1 insertion(+) |
| |
| diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c |
| index 121d9ea0f135..3725e9cd85f4 100644 |
| |
| |
| @@ -263,6 +263,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, |
| } else { |
| node = kzalloc(sizeof(*node), GFP_KERNEL); |
| if (unlikely(!node)) { |
| + list_del(&newnode->peer_list); |
| kfree(newnode); |
| return -ENOMEM; |
| } |
| -- |
| 2.18.2 |
| |
| |
| From 865cb6d968679b607bb1a83b4d243eec411fc1c8 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 4 Feb 2020 22:17:26 +0100 |
| Subject: [PATCH 071/100] wireguard: noise: reject peers with low order public |
| keys |
| |
| commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream. |
| |
| Our static-static calculation returns a failure if the public key is of |
| low order. We check for this when peers are added, and don't allow them |
| to be added if they're low order, except in the case where we haven't |
| yet been given a private key. In that case, we would defer the removal |
| of the peer until we're given a private key, since at that point we're |
| doing new static-static calculations which incur failures we can act on. |
| This meant, however, that we wound up removing peers rather late in the |
| configuration flow. |
| |
| Syzkaller points out that peer_remove calls flush_workqueue, which in |
| turn might then wait for sending a handshake initiation to complete. |
| Since handshake initiation needs the static identity lock, holding the |
| static identity lock while calling peer_remove can result in a rare |
| deadlock. We have precisely this case in this situation of late-stage |
| peer removal based on an invalid public key. We can't drop the lock when |
| removing, because then incoming handshakes might interact with a bogus |
| static-static calculation. |
| |
| While the band-aid patch for this would involve breaking up the peer |
| removal into two steps like wg_peer_remove_all does, in order to solve |
| the locking issue, there's actually a much more elegant way of fixing |
| this: |
| |
| If the static-static calculation succeeds with one private key, it |
| *must* succeed with all others, because all 32-byte strings map to valid |
| private keys, thanks to clamping. That means we can get rid of this |
| silly dance and locking headaches of removing peers late in the |
| configuration flow, and instead just reject them early on, regardless of |
| whether the device has yet been assigned a private key. For the case |
| where the device doesn't yet have a private key, we safely use zeros |
| just for the purposes of checking for low order points by way of |
| checking the output of the calculation. |
| |
| The following PoC will trigger the deadlock: |
| |
| ip link add wg0 type wireguard |
| ip addr add 10.0.0.1/24 dev wg0 |
| ip link set wg0 up |
| ping -f 10.0.0.2 & |
| while true; do |
| wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234 |
| wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=) |
| done |
| |
| [ 0.949105] |
| [ 0.949550] WARNING: possible circular locking dependency detected |
| [ 0.950143] 5.5.0-debug+ #18 Not tainted |
| [ 0.950431] ------------------------------------------------------ |
| [ 0.950959] wg/89 is trying to acquire lock: |
| [ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0 |
| [ 0.951865] |
| [ 0.951865] but task is already holding lock: |
| [ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0 |
| [ 0.953011] |
| [ 0.953011] which lock already depends on the new lock. |
| [ 0.953011] |
| [ 0.953651] |
| [ 0.953651] the existing dependency chain (in reverse order) is: |
| [ 0.954292] |
| [ 0.954292] -> #2 (&wg->static_identity.lock){++++}: |
| [ 0.954804] lock_acquire+0x127/0x350 |
| [ 0.955133] down_read+0x83/0x410 |
| [ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700 |
| [ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280 |
| [ 0.956401] wg_packet_handshake_send_worker+0x10/0x20 |
| [ 0.956841] process_one_work+0x806/0x1500 |
| [ 0.957167] worker_thread+0x8c/0xcb0 |
| [ 0.957549] kthread+0x2ee/0x3b0 |
| [ 0.957792] ret_from_fork+0x24/0x30 |
| [ 0.958234] |
| [ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}: |
| [ 0.958808] lock_acquire+0x127/0x350 |
| [ 0.959075] process_one_work+0x7ab/0x1500 |
| [ 0.959369] worker_thread+0x8c/0xcb0 |
| [ 0.959639] kthread+0x2ee/0x3b0 |
| [ 0.959896] ret_from_fork+0x24/0x30 |
| [ 0.960346] |
| [ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}: |
| [ 0.960945] check_prev_add+0x167/0x1e20 |
| [ 0.961351] __lock_acquire+0x2012/0x3170 |
| [ 0.961725] lock_acquire+0x127/0x350 |
| [ 0.961990] flush_workqueue+0x106/0x12f0 |
| [ 0.962280] peer_remove_after_dead+0x160/0x220 |
| [ 0.962600] wg_set_device+0xa24/0xcc0 |
| [ 0.962994] genl_rcv_msg+0x52f/0xe90 |
| [ 0.963298] netlink_rcv_skb+0x111/0x320 |
| [ 0.963618] genl_rcv+0x1f/0x30 |
| [ 0.963853] netlink_unicast+0x3f6/0x610 |
| [ 0.964245] netlink_sendmsg+0x700/0xb80 |
| [ 0.964586] __sys_sendto+0x1dd/0x2c0 |
| [ 0.964854] __x64_sys_sendto+0xd8/0x1b0 |
| [ 0.965141] do_syscall_64+0x90/0xd9a |
| [ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| [ 0.965769] |
| [ 0.965769] other info that might help us debug this: |
| [ 0.965769] |
| [ 0.966337] Chain exists of: |
| [ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock |
| [ 0.966337] |
| [ 0.967417] Possible unsafe locking scenario: |
| [ 0.967417] |
| [ 0.967836] CPU0 CPU1 |
| [ 0.968155] ---- ---- |
| [ 0.968497] lock(&wg->static_identity.lock); |
| [ 0.968779] lock((work_completion)(&peer->transmit_handshake_work)); |
| [ 0.969345] lock(&wg->static_identity.lock); |
| [ 0.969809] lock((wq_completion)wg-kex-wg0); |
| [ 0.970146] |
| [ 0.970146] *** DEADLOCK *** |
| [ 0.970146] |
| [ 0.970531] 5 locks held by wg/89: |
| [ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30 |
| [ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90 |
| [ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0 |
| [ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0 |
| [ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0 |
| [ 0.973653] |
| [ 0.973653] stack backtrace: |
| [ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18 |
| [ 0.974476] Call Trace: |
| [ 0.974638] dump_stack+0x97/0xe0 |
| [ 0.974869] check_noncircular+0x312/0x3e0 |
| [ 0.975132] ? print_circular_bug+0x1f0/0x1f0 |
| [ 0.975410] ? __kernel_text_address+0x9/0x30 |
| [ 0.975727] ? unwind_get_return_address+0x51/0x90 |
| [ 0.976024] check_prev_add+0x167/0x1e20 |
| [ 0.976367] ? graph_lock+0x70/0x160 |
| [ 0.976682] __lock_acquire+0x2012/0x3170 |
| [ 0.976998] ? register_lock_class+0x1140/0x1140 |
| [ 0.977323] lock_acquire+0x127/0x350 |
| [ 0.977627] ? flush_workqueue+0xe3/0x12f0 |
| [ 0.977890] flush_workqueue+0x106/0x12f0 |
| [ 0.978147] ? flush_workqueue+0xe3/0x12f0 |
| [ 0.978410] ? find_held_lock+0x2c/0x110 |
| [ 0.978662] ? lock_downgrade+0x6e0/0x6e0 |
| [ 0.978919] ? queue_rcu_work+0x60/0x60 |
| [ 0.979166] ? netif_napi_del+0x151/0x3b0 |
| [ 0.979501] ? peer_remove_after_dead+0x160/0x220 |
| [ 0.979871] peer_remove_after_dead+0x160/0x220 |
| [ 0.980232] wg_set_device+0xa24/0xcc0 |
| [ 0.980516] ? deref_stack_reg+0x8e/0xc0 |
| [ 0.980801] ? set_peer+0xe10/0xe10 |
| [ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150 |
| [ 0.981430] ? __nla_validate_parse+0x163/0x270 |
| [ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310 |
| [ 0.982078] genl_rcv_msg+0x52f/0xe90 |
| [ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310 |
| [ 0.982690] ? register_lock_class+0x1140/0x1140 |
| [ 0.983049] netlink_rcv_skb+0x111/0x320 |
| [ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310 |
| [ 0.983645] ? netlink_ack+0x880/0x880 |
| [ 0.983888] genl_rcv+0x1f/0x30 |
| [ 0.984168] netlink_unicast+0x3f6/0x610 |
| [ 0.984443] ? netlink_detachskb+0x60/0x60 |
| [ 0.984729] ? find_held_lock+0x2c/0x110 |
| [ 0.984976] netlink_sendmsg+0x700/0xb80 |
| [ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60 |
| [ 0.985533] __sys_sendto+0x1dd/0x2c0 |
| [ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0 |
| [ 0.986039] ? sockfd_lookup_light+0x17/0x160 |
| [ 0.986397] ? __sys_recvmsg+0x8c/0xf0 |
| [ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0 |
| [ 0.987018] __x64_sys_sendto+0xd8/0x1b0 |
| [ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0 |
| [ 0.987666] do_syscall_64+0x90/0xd9a |
| [ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| [ 0.988223] RIP: 0033:0x7fe77c12003e |
| [ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4 |
| [ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c |
| [ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e |
| [ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004 |
| [ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c |
| [ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c |
| [ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001 |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Reported-by: syzbot <syzkaller@googlegroups.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/netlink.c | 6 ++---- |
| drivers/net/wireguard/noise.c | 10 +++++++--- |
| 2 files changed, 9 insertions(+), 7 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c |
| index 0739a2cd1920..45a631e79d7a 100644 |
| |
| |
| @@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) |
| private_key); |
| list_for_each_entry_safe(peer, temp, &wg->peer_list, |
| peer_list) { |
| - if (wg_noise_precompute_static_static(peer)) |
| - wg_noise_expire_current_peer_keypairs(peer); |
| - else |
| - wg_peer_remove(peer); |
| + BUG_ON(!wg_noise_precompute_static_static(peer)); |
| + wg_noise_expire_current_peer_keypairs(peer); |
| } |
| wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); |
| up_write(&wg->static_identity.lock); |
| diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c |
| index d71c8db68a8c..919d9d866446 100644 |
| |
| |
| @@ -46,17 +46,21 @@ void __init wg_noise_init(void) |
| /* Must hold peer->handshake.static_identity->lock */ |
| bool wg_noise_precompute_static_static(struct wg_peer *peer) |
| { |
| - bool ret = true; |
| + bool ret; |
| |
| down_write(&peer->handshake.lock); |
| - if (peer->handshake.static_identity->has_identity) |
| + if (peer->handshake.static_identity->has_identity) { |
| ret = curve25519( |
| peer->handshake.precomputed_static_static, |
| peer->handshake.static_identity->static_private, |
| peer->handshake.remote_static); |
| - else |
| + } else { |
| + u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; |
| + |
| + ret = curve25519(empty, empty, peer->handshake.remote_static); |
| memset(peer->handshake.precomputed_static_static, 0, |
| NOISE_PUBLIC_KEY_LEN); |
| + } |
| up_write(&peer->handshake.lock); |
| return ret; |
| } |
| -- |
| 2.18.2 |
| |
| |
| From dcfbac9ca10b50be17c38740df7f6c81c4840270 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 4 Feb 2020 22:17:27 +0100 |
| Subject: [PATCH 072/100] wireguard: selftests: ensure non-addition of peers |
| with failed precomputation |
| |
| commit f9398acba6a4ae9cb98bfe4d56414d376eff8d57 upstream. |
| |
| Ensure that peers with low order points are ignored, both in the case |
| where we already have a device private key and in the case where we do |
| not. This adds points that naturally give a zero output. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| tools/testing/selftests/wireguard/netns.sh | 6 ++++++ |
| 1 file changed, 6 insertions(+) |
| |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index d5c85c7494f2..b03647d1bbf6 100755 |
| |
| |
| @@ -516,6 +516,12 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16. |
| n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 |
| n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 |
| n0 wg set wg0 peer "$pub2" allowed-ips ::/0 |
| +n0 wg set wg0 peer "$pub2" remove |
| +low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) |
| +n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } |
| +[[ -z $(n0 wg show wg0 peers) ]] |
| +n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } |
| +[[ -z $(n0 wg show wg0 peers) ]] |
| ip0 link del wg0 |
| |
| declare -A objects |
| -- |
| 2.18.2 |
| |
| |
| From 428b6098ac8141b68397f7aaa85eb0f615ff237b Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 4 Feb 2020 22:17:29 +0100 |
| Subject: [PATCH 073/100] wireguard: selftests: tie socket waiting to target |
| pid |
| |
| commit 88f404a9b1d75388225b1c67b6dd327cb2182777 upstream. |
| |
| Without this, we wind up proceeding too early sometimes when the |
| previous process has just used the same listening port. So, we tie the |
| listening socket query to the specific pid we're interested in. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| tools/testing/selftests/wireguard/netns.sh | 17 ++++++++--------- |
| 1 file changed, 8 insertions(+), 9 deletions(-) |
| |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index b03647d1bbf6..f5ab1cda8bb5 100755 |
| |
| |
| @@ -38,9 +38,8 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } |
| ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } |
| ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } |
| sleep() { read -t "$1" -N 1 || true; } |
| -waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } |
| -waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } |
| -waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } |
| +waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } |
| +waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } |
| waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } |
| |
| cleanup() { |
| @@ -119,22 +118,22 @@ tests() { |
| |
| # TCP over IPv4 |
| n2 iperf3 -s -1 -B 192.168.241.2 & |
| - waitiperf $netns2 |
| + waitiperf $netns2 $! |
| n1 iperf3 -Z -t 3 -c 192.168.241.2 |
| |
| # TCP over IPv6 |
| n1 iperf3 -s -1 -B fd00::1 & |
| - waitiperf $netns1 |
| + waitiperf $netns1 $! |
| n2 iperf3 -Z -t 3 -c fd00::1 |
| |
| # UDP over IPv4 |
| n1 iperf3 -s -1 -B 192.168.241.1 & |
| - waitiperf $netns1 |
| + waitiperf $netns1 $! |
| n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 |
| |
| # UDP over IPv6 |
| n2 iperf3 -s -1 -B fd00::2 & |
| - waitiperf $netns2 |
| + waitiperf $netns2 $! |
| n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 |
| } |
| |
| @@ -207,7 +206,7 @@ n1 ping -W 1 -c 1 192.168.241.2 |
| n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 |
| exec 4< <(n1 ncat -l -u -p 1111) |
| ncat_pid=$! |
| -waitncatudp $netns1 |
| +waitncatudp $netns1 $ncat_pid |
| n2 ncat -u 192.168.241.1 1111 <<<"X" |
| read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] |
| kill $ncat_pid |
| @@ -216,7 +215,7 @@ n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 |
| n2 wg set wg0 listen-port 9997 |
| exec 4< <(n1 ncat -l -u -p 1111) |
| ncat_pid=$! |
| -waitncatudp $netns1 |
| +waitncatudp $netns1 $ncat_pid |
| n2 ncat -u 192.168.241.1 1111 <<<"X" |
| ! read -r -N 1 -t 1 out <&4 || false |
| kill $ncat_pid |
| -- |
| 2.18.2 |
| |
| |
| From 02e7bebbc099bcec52dd85181009d61881627d77 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 11 Feb 2020 20:47:08 +0100 |
| Subject: [PATCH 074/100] wireguard: device: use icmp_ndo_send helper |
| |
| commit a12d7f3cbdc72c7625881c8dc2660fc2c979fdf2 upstream. |
| |
| Because wireguard is calling icmp from network device context, it should |
| use the ndo helper so that the rate limiting applies correctly. This |
| commit adds a small test to the wireguard test suite to ensure that the |
| new functions continue doing the right thing in the context of |
| wireguard. It does this by setting up a condition that will definately |
| evoke an icmp error message from the driver, but along a nat'd path. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/device.c | 4 ++-- |
| tools/testing/selftests/wireguard/netns.sh | 11 +++++++++++ |
| 2 files changed, 13 insertions(+), 2 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c |
| index 16b19824b9ad..43db442b1373 100644 |
| |
| |
| @@ -203,9 +203,9 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) |
| err: |
| ++dev->stats.tx_errors; |
| if (skb->protocol == htons(ETH_P_IP)) |
| - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
| + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
| else if (skb->protocol == htons(ETH_P_IPV6)) |
| - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); |
| + icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); |
| kfree_skb(skb); |
| return ret; |
| } |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index f5ab1cda8bb5..138d46b3f330 100755 |
| |
| |
| @@ -24,6 +24,7 @@ |
| set -e |
| |
| exec 3>&1 |
| +export LANG=C |
| export WG_HIDE_KEYS=never |
| netns0="wg-test-$$-0" |
| netns1="wg-test-$$-1" |
| @@ -297,7 +298,17 @@ ip1 -4 rule add table main suppress_prefixlength 0 |
| n1 ping -W 1 -c 100 -f 192.168.99.7 |
| n1 ping -W 1 -c 100 -f abab::1111 |
| |
| +# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. |
| +n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 |
| +n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. |
| +n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' |
| +ip0 -4 route add 192.168.241.1 via 10.0.0.100 |
| +n2 wg set wg0 peer "$pub1" remove |
| +[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] |
| + |
| n0 iptables -t nat -F |
| +n0 iptables -t filter -F |
| +n2 iptables -t nat -F |
| ip0 link del vethrc |
| ip0 link del vethrs |
| ip1 link del wg0 |
| -- |
| 2.18.2 |
| |
| |
| From 7750b809baf591ea2729e2d7e3c3c021e93ad8f0 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 14 Feb 2020 23:57:20 +0100 |
| Subject: [PATCH 075/100] wireguard: selftests: reduce complexity and fix make |
| races |
| |
| commit 04ddf1208f03e1dbc39a4619c40eba640051b950 upstream. |
| |
| This gives us fewer dependencies and shortens build time, fixes up some |
| hash checking race conditions, and also fixes missing directory creation |
| that caused issues on massively parallel builds. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| .../testing/selftests/wireguard/qemu/Makefile | 38 +++++++------------ |
| 1 file changed, 14 insertions(+), 24 deletions(-) |
| |
| diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile |
| index f10aa3590adc..28d477683e8a 100644 |
| |
| |
| @@ -38,19 +38,17 @@ endef |
| define file_download = |
| $(DISTFILES_PATH)/$(1): |
| mkdir -p $(DISTFILES_PATH) |
| - flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' |
| - if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi |
| + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' |
| endef |
| |
| $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) |
| -$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) |
| $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) |
| $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) |
| $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) |
| $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) |
| $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) |
| $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) |
| -$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) |
| +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) |
| |
| KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) |
| rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) |
| @@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) |
| $(MAKE) -C $(IPERF_PATH) |
| $(STRIP) -s $@ |
| |
| -$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) |
| - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| - touch $@ |
| - |
| -$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) |
| - cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared |
| - $(MAKE) -C $(LIBMNL_PATH) |
| - sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc |
| - |
| $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) |
| + mkdir -p $(BUILD_PATH) |
| flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| touch $@ |
| |
| -$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg |
| +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS) |
| + $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg |
| $(STRIP) -s $@ |
| |
| $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) |
| @@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) |
| $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) |
| mkdir -p $(BUILD_PATH) |
| flock -s $<.lock tar -C $(BUILD_PATH) -xf $< |
| - printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk |
| + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk |
| printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile |
| touch $@ |
| |
| -$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip |
| - $(STRIP) -s $(IPROUTE2_PATH)/ip/ip |
| +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) |
| + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip |
| + $(STRIP) -s $@ |
| |
| -$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss |
| - $(STRIP) -s $(IPROUTE2_PATH)/misc/ss |
| +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) |
| + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss |
| + $(STRIP) -s $@ |
| |
| $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) |
| mkdir -p $(BUILD_PATH) |
| @@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) |
| sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure |
| touch $@ |
| |
| -$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) |
| - cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include |
| +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS) |
| + cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include |
| $(MAKE) -C $(IPTABLES_PATH) |
| $(STRIP) -s $@ |
| |
| -- |
| 2.18.2 |
| |
| |
| From 1296b940ccd927bbaa66765149ccb094c1b44fe2 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 14 Feb 2020 23:57:21 +0100 |
| Subject: [PATCH 076/100] wireguard: receive: reset last_under_load to zero |
| |
| commit 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 upstream. |
| |
| This is a small optimization that prevents more expensive comparisons |
| from happening when they are no longer necessary, by clearing the |
| last_under_load variable whenever we wind up in a state where we were |
| under load but we no longer are. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Suggested-by: Matt Dunwoodie <ncon@noconroy.net> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 7 +++++-- |
| 1 file changed, 5 insertions(+), 2 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 9c6bab9c981f..4a153894cee2 100644 |
| |
| |
| @@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg, |
| |
| under_load = skb_queue_len(&wg->incoming_handshakes) >= |
| MAX_QUEUED_INCOMING_HANDSHAKES / 8; |
| - if (under_load) |
| + if (under_load) { |
| last_under_load = ktime_get_coarse_boottime_ns(); |
| - else if (last_under_load) |
| + } else if (last_under_load) { |
| under_load = !wg_birthdate_has_expired(last_under_load, 1); |
| + if (!under_load) |
| + last_under_load = 0; |
| + } |
| mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, |
| under_load); |
| if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || |
| -- |
| 2.18.2 |
| |
| |
| From b2ce88b315ce1aeba00deecefe751925d8add692 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 14 Feb 2020 23:57:22 +0100 |
| Subject: [PATCH 077/100] wireguard: send: account for mtu=0 devices |
| |
| commit 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e upstream. |
| |
| It turns out there's an easy way to get packets queued up while still |
| having an MTU of zero, and that's via persistent keep alive. This commit |
| makes sure that in whatever condition, we don't wind up dividing by |
| zero. Note that an MTU of zero for a wireguard interface is something |
| quasi-valid, so I don't think the correct fix is to limit it via |
| min_mtu. This can be reproduced easily with: |
| |
| ip link add wg0 type wireguard |
| ip link add wg1 type wireguard |
| ip link set wg0 up mtu 0 |
| ip link set wg1 up |
| wg set wg0 private-key <(wg genkey) |
| wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key) |
| wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1 |
| |
| However, while min_mtu=0 seems fine, it makes sense to restrict the |
| max_mtu. This commit also restricts the maximum MTU to the greatest |
| number for which rounding up to the padding multiple won't overflow a |
| signed integer. Packets this large were always rejected anyway |
| eventually, due to checks deeper in, but it seems more sound not to even |
| let the administrator configure something that won't work anyway. |
| |
| We use this opportunity to clean up this function a bit so that it's |
| clear which paths we're expecting. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Cc: Eric Dumazet <eric.dumazet@gmail.com> |
| Reviewed-by: Eric Dumazet <edumazet@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/device.c | 7 ++++--- |
| drivers/net/wireguard/send.c | 16 +++++++++++----- |
| 2 files changed, 15 insertions(+), 8 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c |
| index 43db442b1373..cdc96968b0f4 100644 |
| |
| |
| @@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev) |
| enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | |
| NETIF_F_SG | NETIF_F_GSO | |
| NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; |
| + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + |
| + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); |
| |
| dev->netdev_ops = &netdev_ops; |
| dev->hard_header_len = 0; |
| @@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev) |
| dev->features |= WG_NETDEV_FEATURES; |
| dev->hw_features |= WG_NETDEV_FEATURES; |
| dev->hw_enc_features |= WG_NETDEV_FEATURES; |
| - dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - |
| - sizeof(struct udphdr) - |
| - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); |
| + dev->mtu = ETH_DATA_LEN - overhead; |
| + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; |
| |
| SET_NETDEV_DEVTYPE(dev, &device_type); |
| |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| index c13260563446..7348c10cbae3 100644 |
| |
| |
| @@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer) |
| |
| static unsigned int calculate_skb_padding(struct sk_buff *skb) |
| { |
| + unsigned int padded_size, last_unit = skb->len; |
| + |
| + if (unlikely(!PACKET_CB(skb)->mtu)) |
| + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; |
| + |
| /* We do this modulo business with the MTU, just in case the networking |
| * layer gives us a packet that's bigger than the MTU. In that case, we |
| * wouldn't want the final subtraction to overflow in the case of the |
| - * padded_size being clamped. |
| + * padded_size being clamped. Fortunately, that's very rarely the case, |
| + * so we optimize for that not happening. |
| */ |
| - unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; |
| - unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); |
| + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) |
| + last_unit %= PACKET_CB(skb)->mtu; |
| |
| - if (padded_size > PACKET_CB(skb)->mtu) |
| - padded_size = PACKET_CB(skb)->mtu; |
| + padded_size = min(PACKET_CB(skb)->mtu, |
| + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); |
| return padded_size - last_unit; |
| } |
| |
| -- |
| 2.18.2 |
| |
| |
| From 766b55937688166afcd08d168abbfa3cc675c3ef Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Fri, 14 Feb 2020 23:57:23 +0100 |
| Subject: [PATCH 078/100] wireguard: socket: remove extra call to |
| synchronize_net |
| |
| commit 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e upstream. |
| |
| synchronize_net() is a wrapper around synchronize_rcu(), so there's no |
| point in having synchronize_net and synchronize_rcu back to back, |
| despite the documentation comment suggesting maybe it's somewhat useful, |
| "Wait for packets currently being received to be done." This commit |
| removes the extra call. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> |
| Reviewed-by: Eric Dumazet <edumazet@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/socket.c | 1 - |
| 1 file changed, 1 deletion(-) |
| |
| diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c |
| index 262f3b5c819d..b0d6541582d3 100644 |
| |
| |
| @@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4, |
| wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); |
| mutex_unlock(&wg->socket_update_lock); |
| synchronize_rcu(); |
| - synchronize_net(); |
| sock_free(old4); |
| sock_free(old6); |
| } |
| -- |
| 2.18.2 |
| |
| |
| From 2f4ee5e5f2dad3d3ccfea4d16c2c54d9346f7cd1 Mon Sep 17 00:00:00 2001 |
| From: YueHaibing <yuehaibing@huawei.com> |
| Date: Wed, 18 Mar 2020 18:30:43 -0600 |
| Subject: [PATCH 079/100] wireguard: selftests: remove duplicated include |
| <sys/types.h> |
| |
| commit 166391159c5deb84795d2ff46e95f276177fa5fb upstream. |
| |
| This commit removes a duplicated include. |
| |
| Signed-off-by: YueHaibing <yuehaibing@huawei.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| tools/testing/selftests/wireguard/qemu/init.c | 1 - |
| 1 file changed, 1 deletion(-) |
| |
| diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c |
| index 90bc9813cadc..c9698120ac9d 100644 |
| |
| |
| @@ -13,7 +13,6 @@ |
| #include <fcntl.h> |
| #include <sys/wait.h> |
| #include <sys/mount.h> |
| -#include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <sys/io.h> |
| -- |
| 2.18.2 |
| |
| |
| From c7092cc3c5f766315eb5f1bd1717a2fe6348e977 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 18 Mar 2020 18:30:45 -0600 |
| Subject: [PATCH 080/100] wireguard: queueing: account for skb->protocol==0 |
| |
| commit a5588604af448664e796daf3c1d5a4523c60667b upstream. |
| |
| We carry out checks to the effect of: |
| |
| if (skb->protocol != wg_examine_packet_protocol(skb)) |
| goto err; |
| |
| By having wg_skb_examine_untrusted_ip_hdr return 0 on failure, this |
| means that the check above still passes in the case where skb->protocol |
| is zero, which is possible to hit with AF_PACKET: |
| |
| struct sockaddr_pkt saddr = { .spkt_device = "wg0" }; |
| unsigned char buffer[5] = { 0 }; |
| sendto(socket(AF_PACKET, SOCK_PACKET, /* skb->protocol = */ 0), |
| buffer, sizeof(buffer), 0, (const struct sockaddr *)&saddr, sizeof(saddr)); |
| |
| Additional checks mean that this isn't actually a problem in the code |
| base, but I could imagine it becoming a problem later if the function is |
| used more liberally. |
| |
| I would prefer to fix this by having wg_examine_packet_protocol return a |
| 32-bit ~0 value on failure, which will never match any value of |
| skb->protocol, which would simply change the generated code from a mov |
| to a movzx. However, sparse complains, and adding __force casts doesn't |
| seem like a good idea, so instead we just add a simple helper function |
| to check for the zero return value. Since wg_examine_packet_protocol |
| itself gets inlined, this winds up not adding an additional branch to |
| the generated code, since the 0 return value already happens in a |
| mergable branch. |
| |
| Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/device.c | 2 +- |
| drivers/net/wireguard/queueing.h | 8 +++++++- |
| drivers/net/wireguard/receive.c | 4 ++-- |
| 3 files changed, 10 insertions(+), 4 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c |
| index cdc96968b0f4..3ac3f8570ca1 100644 |
| |
| |
| @@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) |
| u32 mtu; |
| int ret; |
| |
| - if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { |
| + if (unlikely(!wg_check_packet_protocol(skb))) { |
| ret = -EPROTONOSUPPORT; |
| net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); |
| goto err; |
| diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h |
| index e62c714a548e..3432232afe06 100644 |
| |
| |
| @@ -66,7 +66,7 @@ struct packet_cb { |
| #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) |
| |
| /* Returns either the correct skb->protocol value, or 0 if invalid. */ |
| -static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) |
| +static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) |
| { |
| if (skb_network_header(skb) >= skb->head && |
| (skb_network_header(skb) + sizeof(struct iphdr)) <= |
| @@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) |
| return 0; |
| } |
| |
| +static inline bool wg_check_packet_protocol(struct sk_buff *skb) |
| +{ |
| + __be16 real_protocol = wg_examine_packet_protocol(skb); |
| + return real_protocol && skb->protocol == real_protocol; |
| +} |
| + |
| static inline void wg_reset_packet(struct sk_buff *skb) |
| { |
| skb_scrub_packet(skb, true); |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 4a153894cee2..243ed7172dd2 100644 |
| |
| |
| @@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) |
| size_t data_offset, data_len, header_len; |
| struct udphdr *udp; |
| |
| - if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || |
| + if (unlikely(!wg_check_packet_protocol(skb) || |
| skb_transport_header(skb) < skb->head || |
| (skb_transport_header(skb) + sizeof(struct udphdr)) > |
| skb_tail_pointer(skb))) |
| @@ -388,7 +388,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, |
| */ |
| skb->ip_summed = CHECKSUM_UNNECESSARY; |
| skb->csum_level = ~0; /* All levels */ |
| - skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); |
| + skb->protocol = wg_examine_packet_protocol(skb); |
| if (skb->protocol == htons(ETH_P_IP)) { |
| len = ntohs(ip_hdr(skb)->tot_len); |
| if (unlikely(len < sizeof(struct iphdr))) |
| -- |
| 2.18.2 |
| |
| |
| From 9c31aa57f2861ba637fb5513444ad5b3139511c8 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 18 Mar 2020 18:30:46 -0600 |
| Subject: [PATCH 081/100] wireguard: receive: remove dead code from default |
| packet type case |
| |
| commit 2b8765c52db24c0fbcc81bac9b5e8390f2c7d3c8 upstream. |
| |
| The situation in which we wind up hitting the default case here |
| indicates a major bug in earlier parsing code. It is not a usual thing |
| that should ever happen, which means a "friendly" message for it doesn't |
| make sense. Rather, replace this with a WARN_ON, just like we do earlier |
| in the file for a similar situation, so that somebody sends us a bug |
| report and we can fix it. |
| |
| Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 3 +-- |
| 1 file changed, 1 insertion(+), 2 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 243ed7172dd2..da3b782ab7d3 100644 |
| |
| |
| @@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) |
| wg_packet_consume_data(wg, skb); |
| break; |
| default: |
| - net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", |
| - wg->dev->name, skb); |
| + WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n"); |
| goto err; |
| } |
| return; |
| -- |
| 2.18.2 |
| |
| |
| From 72e3696fde1ec043a8e0bd2f193f9d56d3b46b6b Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 18 Mar 2020 18:30:47 -0600 |
| Subject: [PATCH 082/100] wireguard: noise: error out precomputed DH during |
| handshake rather than config |
| |
| commit 11a7686aa99c7fe4b3f80f6dcccd54129817984d upstream. |
| |
| We precompute the static-static ECDH during configuration time, in order |
| to save an expensive computation later when receiving network packets. |
| However, not all ECDH computations yield a contributory result. Prior, |
| we were just not letting those peers be added to the interface. However, |
| this creates a strange inconsistency, since it was still possible to add |
| other weird points, like a valid public key plus a low-order point, and, |
| like points that result in zeros, a handshake would not complete. In |
| order to make the behavior more uniform and less surprising, simply |
| allow all peers to be added. Then, we'll error out later when doing the |
| crypto if there's an issue. This also adds more separation between the |
| crypto layer and the configuration layer. |
| |
| Discussed-with: Mathias Hall-Andersen <mathias@hall-andersen.dk> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/netlink.c | 8 +--- |
| drivers/net/wireguard/noise.c | 55 ++++++++++++---------- |
| drivers/net/wireguard/noise.h | 12 ++--- |
| drivers/net/wireguard/peer.c | 7 +-- |
| tools/testing/selftests/wireguard/netns.sh | 15 ++++-- |
| 5 files changed, 49 insertions(+), 48 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c |
| index 45a631e79d7a..ab6cbe95a652 100644 |
| |
| |
| @@ -417,11 +417,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs) |
| |
| peer = wg_peer_create(wg, public_key, preshared_key); |
| if (IS_ERR(peer)) { |
| - /* Similar to the above, if the key is invalid, we skip |
| - * it without fanfare, so that services don't need to |
| - * worry about doing key validation themselves. |
| - */ |
| - ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); |
| + ret = PTR_ERR(peer); |
| peer = NULL; |
| goto out; |
| } |
| @@ -575,7 +571,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) |
| private_key); |
| list_for_each_entry_safe(peer, temp, &wg->peer_list, |
| peer_list) { |
| - BUG_ON(!wg_noise_precompute_static_static(peer)); |
| + wg_noise_precompute_static_static(peer); |
| wg_noise_expire_current_peer_keypairs(peer); |
| } |
| wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); |
| diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c |
| index 919d9d866446..708dc61c974f 100644 |
| |
| |
| @@ -44,32 +44,23 @@ void __init wg_noise_init(void) |
| } |
| |
| /* Must hold peer->handshake.static_identity->lock */ |
| -bool wg_noise_precompute_static_static(struct wg_peer *peer) |
| +void wg_noise_precompute_static_static(struct wg_peer *peer) |
| { |
| - bool ret; |
| - |
| down_write(&peer->handshake.lock); |
| - if (peer->handshake.static_identity->has_identity) { |
| - ret = curve25519( |
| - peer->handshake.precomputed_static_static, |
| + if (!peer->handshake.static_identity->has_identity || |
| + !curve25519(peer->handshake.precomputed_static_static, |
| peer->handshake.static_identity->static_private, |
| - peer->handshake.remote_static); |
| - } else { |
| - u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; |
| - |
| - ret = curve25519(empty, empty, peer->handshake.remote_static); |
| + peer->handshake.remote_static)) |
| memset(peer->handshake.precomputed_static_static, 0, |
| NOISE_PUBLIC_KEY_LEN); |
| - } |
| up_write(&peer->handshake.lock); |
| - return ret; |
| } |
| |
| -bool wg_noise_handshake_init(struct noise_handshake *handshake, |
| - struct noise_static_identity *static_identity, |
| - const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], |
| - const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], |
| - struct wg_peer *peer) |
| +void wg_noise_handshake_init(struct noise_handshake *handshake, |
| + struct noise_static_identity *static_identity, |
| + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], |
| + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], |
| + struct wg_peer *peer) |
| { |
| memset(handshake, 0, sizeof(*handshake)); |
| init_rwsem(&handshake->lock); |
| @@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake, |
| NOISE_SYMMETRIC_KEY_LEN); |
| handshake->static_identity = static_identity; |
| handshake->state = HANDSHAKE_ZEROED; |
| - return wg_noise_precompute_static_static(peer); |
| + wg_noise_precompute_static_static(peer); |
| } |
| |
| static void handshake_zero(struct noise_handshake *handshake) |
| @@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], |
| return true; |
| } |
| |
| +static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN], |
| + u8 key[NOISE_SYMMETRIC_KEY_LEN], |
| + const u8 precomputed[NOISE_PUBLIC_KEY_LEN]) |
| +{ |
| + static u8 zero_point[NOISE_PUBLIC_KEY_LEN]; |
| + if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN))) |
| + return false; |
| + kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN, |
| + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, |
| + chaining_key); |
| + return true; |
| +} |
| + |
| static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) |
| { |
| struct blake2s_state blake; |
| @@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, |
| NOISE_PUBLIC_KEY_LEN, key, handshake->hash); |
| |
| /* ss */ |
| - kdf(handshake->chaining_key, key, NULL, |
| - handshake->precomputed_static_static, NOISE_HASH_LEN, |
| - NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, |
| - handshake->chaining_key); |
| + if (!mix_precomputed_dh(handshake->chaining_key, key, |
| + handshake->precomputed_static_static)) |
| + goto out; |
| |
| /* {t} */ |
| tai64n_now(timestamp); |
| @@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, |
| handshake = &peer->handshake; |
| |
| /* ss */ |
| - kdf(chaining_key, key, NULL, handshake->precomputed_static_static, |
| - NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, |
| - chaining_key); |
| + if (!mix_precomputed_dh(chaining_key, key, |
| + handshake->precomputed_static_static)) |
| + goto out; |
| |
| /* {t} */ |
| if (!message_decrypt(t, src->encrypted_timestamp, |
| diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h |
| index 138a07bb817c..f532d59d3f19 100644 |
| |
| |
| @@ -94,11 +94,11 @@ struct noise_handshake { |
| struct wg_device; |
| |
| void wg_noise_init(void); |
| -bool wg_noise_handshake_init(struct noise_handshake *handshake, |
| - struct noise_static_identity *static_identity, |
| - const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], |
| - const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], |
| - struct wg_peer *peer); |
| +void wg_noise_handshake_init(struct noise_handshake *handshake, |
| + struct noise_static_identity *static_identity, |
| + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], |
| + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], |
| + struct wg_peer *peer); |
| void wg_noise_handshake_clear(struct noise_handshake *handshake); |
| static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) |
| { |
| @@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); |
| void wg_noise_set_static_identity_private_key( |
| struct noise_static_identity *static_identity, |
| const u8 private_key[NOISE_PUBLIC_KEY_LEN]); |
| -bool wg_noise_precompute_static_static(struct wg_peer *peer); |
| +void wg_noise_precompute_static_static(struct wg_peer *peer); |
| |
| bool |
| wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, |
| diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c |
| index 071eedf33f5a..1d634bd3038f 100644 |
| |
| |
| @@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, |
| return ERR_PTR(ret); |
| peer->device = wg; |
| |
| - if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, |
| - public_key, preshared_key, peer)) { |
| - ret = -EKEYREJECTED; |
| - goto err_1; |
| - } |
| + wg_noise_handshake_init(&peer->handshake, &wg->static_identity, |
| + public_key, preshared_key, peer); |
| if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) |
| goto err_1; |
| if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index 138d46b3f330..936e1ca9410e 100755 |
| |
| |
| @@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 |
| n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 |
| n0 wg set wg0 peer "$pub2" allowed-ips ::/0 |
| n0 wg set wg0 peer "$pub2" remove |
| -low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) |
| -n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } |
| -[[ -z $(n0 wg show wg0 peers) ]] |
| -n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } |
| -[[ -z $(n0 wg show wg0 peers) ]] |
| +for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do |
| + n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111 |
| +done |
| +[[ -n $(n0 wg show wg0 peers) ]] |
| +exec 4< <(n0 ncat -l -u -p 1111) |
| +ncat_pid=$! |
| +waitncatudp $netns0 $ncat_pid |
| +ip0 link set wg0 up |
| +! read -r -n 1 -t 2 <&4 || false |
| +kill $ncat_pid |
| ip0 link del wg0 |
| |
| declare -A objects |
| -- |
| 2.18.2 |
| |
| |
| From d1ae8043584a1d3a3474c9e212463bc8876996e2 Mon Sep 17 00:00:00 2001 |
| From: Sultan Alsawaf <sultan@kerneltoast.com> |
| Date: Wed, 29 Apr 2020 14:59:20 -0600 |
| Subject: [PATCH 083/100] wireguard: send: remove errant newline from |
| packet_encrypt_worker |
| |
| commit d6833e42786e050e7522d6a91a9361e54085897d upstream. |
| |
| This commit removes a useless newline at the end of a scope, which |
| doesn't add anything in the way of organization or readability. |
| |
| Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/send.c | 1 - |
| 1 file changed, 1 deletion(-) |
| |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| index 7348c10cbae3..3e030d614df5 100644 |
| |
| |
| @@ -304,7 +304,6 @@ void wg_packet_encrypt_worker(struct work_struct *work) |
| } |
| wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, |
| state); |
| - |
| } |
| } |
| |
| -- |
| 2.18.2 |
| |
| |
| From 1e92895e72c5c9354900200fbcfb6b2ab63d9908 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 29 Apr 2020 14:59:21 -0600 |
| Subject: [PATCH 084/100] wireguard: queueing: cleanup ptr_ring in error path |
| of packet_queue_init |
| |
| commit 130c58606171326c81841a49cc913cd354113dd9 upstream. |
| |
| Prior, if the alloc_percpu of packet_percpu_multicore_worker_alloc |
| failed, the previously allocated ptr_ring wouldn't be freed. This commit |
| adds the missing call to ptr_ring_cleanup in the error case. |
| |
| Reported-by: Sultan Alsawaf <sultan@kerneltoast.com> |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/queueing.c | 4 +++- |
| 1 file changed, 3 insertions(+), 1 deletion(-) |
| |
| diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c |
| index 5c964fcb994e..71b8e80b58e1 100644 |
| |
| |
| @@ -35,8 +35,10 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, |
| if (multicore) { |
| queue->worker = wg_packet_percpu_multicore_worker_alloc( |
| function, queue); |
| - if (!queue->worker) |
| + if (!queue->worker) { |
| + ptr_ring_cleanup(&queue->ring, NULL); |
| return -ENOMEM; |
| + } |
| } else { |
| INIT_WORK(&queue->work, function); |
| } |
| -- |
| 2.18.2 |
| |
| |
| From 4c5e81cd3378da4d3045474fcc5514c4b2542e5d Mon Sep 17 00:00:00 2001 |
| From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com> |
| Date: Wed, 29 Apr 2020 14:59:22 -0600 |
| Subject: [PATCH 085/100] wireguard: receive: use tunnel helpers for |
| decapsulating ECN markings |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit eebabcb26ea1e3295704477c6cd4e772c96a9559 upstream. |
| |
| WireGuard currently only propagates ECN markings on tunnel decap according |
| to the old RFC3168 specification. However, the spec has since been updated |
| in RFC6040 to recommend slightly different decapsulation semantics. This |
| was implemented in the kernel as a set of common helpers for ECN |
| decapsulation, so let's just switch over WireGuard to using those, so it |
| can benefit from this enhancement and any future tweaks. We do not drop |
| packets with invalid ECN marking combinations, because WireGuard is |
| frequently used to work around broken ISPs, which could be doing that. |
| |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Reported-by: Olivier Tilmans <olivier.tilmans@nokia-bell-labs.com> |
| Cc: Dave Taht <dave.taht@gmail.com> |
| Cc: Rodney W. Grimes <ietf@gndrsh.dnsmgr.net> |
| Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 6 ++---- |
| 1 file changed, 2 insertions(+), 4 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index da3b782ab7d3..267f202f1931 100644 |
| |
| |
| @@ -393,13 +393,11 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, |
| len = ntohs(ip_hdr(skb)->tot_len); |
| if (unlikely(len < sizeof(struct iphdr))) |
| goto dishonest_packet_size; |
| - if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) |
| - IP_ECN_set_ce(ip_hdr(skb)); |
| + INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos); |
| } else if (skb->protocol == htons(ETH_P_IPV6)) { |
| len = ntohs(ipv6_hdr(skb)->payload_len) + |
| sizeof(struct ipv6hdr); |
| - if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) |
| - IP6_ECN_set_ce(skb, ipv6_hdr(skb)); |
| + INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb))); |
| } else { |
| goto dishonest_packet_type; |
| } |
| -- |
| 2.18.2 |
| |
| |
| From dea57992e63578e9e696da9ef0c99366f521e4a1 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 6 May 2020 15:33:02 -0600 |
| Subject: [PATCH 086/100] wireguard: selftests: use normal kernel stack size on |
| ppc64 |
| |
| commit a0fd7cc87a018df1a17f9d3f0bd994c1f22c6b34 upstream. |
| |
| While at some point it might have made sense to be running these tests |
| on ppc64 with 4k stacks, the kernel hasn't actually used 4k stacks on |
| 64-bit powerpc in a long time, and more interesting things that we test |
| don't really work when we deviate from the default (16k). So, we stop |
| pushing our luck in this commit, and return to the default instead of |
| the minimum. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config | 1 + |
| 1 file changed, 1 insertion(+) |
| |
| diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config |
| index 990c510a9cfa..f52f1e2bc7f6 100644 |
| |
| |
| @@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y |
| CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" |
| CONFIG_SECTION_MISMATCH_WARN_ONLY=y |
| CONFIG_FRAME_WARN=1280 |
| +CONFIG_THREAD_SHIFT=14 |
| -- |
| 2.18.2 |
| |
| |
| From 09699317fac248b183c945291dffa1a105a1836e Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 6 May 2020 15:33:03 -0600 |
| Subject: [PATCH 087/100] wireguard: socket: remove errant restriction on |
| looping to self |
| |
| commit b673e24aad36981f327a6570412ffa7754de8911 upstream. |
| |
| It's already possible to create two different interfaces and loop |
| packets between them. This has always been possible with tunnels in the |
| kernel, and isn't specific to wireguard. Therefore, the networking stack |
| already needs to deal with that. At the very least, the packet winds up |
| exceeding the MTU and is discarded at that point. So, since this is |
| already something that happens, there's no need to forbid the not very |
| exceptional case of routing a packet back to the same interface; this |
| loop is no different than others, and we shouldn't special case it, but |
| rather rely on generic handling of loops in general. This also makes it |
| easier to do interesting things with wireguard such as onion routing. |
| |
| At the same time, we add a selftest for this, ensuring that both onion |
| routing works and infinite routing loops do not crash the kernel. We |
| also add a test case for wireguard interfaces nesting packets and |
| sending traffic between each other, as well as the loop in this case |
| too. We make sure to send some throughput-heavy traffic for this use |
| case, to stress out any possible recursion issues with the locks around |
| workqueues. |
| |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/socket.c | 12 ----- |
| tools/testing/selftests/wireguard/netns.sh | 54 ++++++++++++++++++++-- |
| 2 files changed, 51 insertions(+), 15 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c |
| index b0d6541582d3..f9018027fc13 100644 |
| |
| |
| @@ -76,12 +76,6 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, |
| net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", |
| wg->dev->name, &endpoint->addr, ret); |
| goto err; |
| - } else if (unlikely(rt->dst.dev == skb->dev)) { |
| - ip_rt_put(rt); |
| - ret = -ELOOP; |
| - net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", |
| - wg->dev->name, &endpoint->addr); |
| - goto err; |
| } |
| if (cache) |
| dst_cache_set_ip4(cache, &rt->dst, fl.saddr); |
| @@ -149,12 +143,6 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, |
| net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", |
| wg->dev->name, &endpoint->addr, ret); |
| goto err; |
| - } else if (unlikely(dst->dev == skb->dev)) { |
| - dst_release(dst); |
| - ret = -ELOOP; |
| - net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", |
| - wg->dev->name, &endpoint->addr); |
| - goto err; |
| } |
| if (cache) |
| dst_cache_set_ip6(cache, dst, &fl.saddr); |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index 936e1ca9410e..17a1f53ceba0 100755 |
| |
| |
| @@ -48,8 +48,11 @@ cleanup() { |
| exec 2>/dev/null |
| printf "$orig_message_cost" > /proc/sys/net/core/message_cost |
| ip0 link del dev wg0 |
| + ip0 link del dev wg1 |
| ip1 link del dev wg0 |
| + ip1 link del dev wg1 |
| ip2 link del dev wg0 |
| + ip2 link del dev wg1 |
| local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" |
| [[ -n $to_kill ]] && kill $to_kill |
| pp ip netns del $netns1 |
| @@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2 |
| key1="$(pp wg genkey)" |
| key2="$(pp wg genkey)" |
| key3="$(pp wg genkey)" |
| +key4="$(pp wg genkey)" |
| pub1="$(pp wg pubkey <<<"$key1")" |
| pub2="$(pp wg pubkey <<<"$key2")" |
| pub3="$(pp wg pubkey <<<"$key3")" |
| +pub4="$(pp wg pubkey <<<"$key4")" |
| psk="$(pp wg genpsk)" |
| [[ -n $key1 && -n $key2 && -n $psk ]] |
| |
| configure_peers() { |
| ip1 addr add 192.168.241.1/24 dev wg0 |
| - ip1 addr add fd00::1/24 dev wg0 |
| + ip1 addr add fd00::1/112 dev wg0 |
| |
| ip2 addr add 192.168.241.2/24 dev wg0 |
| - ip2 addr add fd00::2/24 dev wg0 |
| + ip2 addr add fd00::2/112 dev wg0 |
| |
| n1 wg set wg0 \ |
| private-key <(echo "$key1") \ |
| @@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2 |
| n1 wg set wg0 private-key <(echo "$key3") |
| n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove |
| n1 ping -W 1 -c 1 192.168.241.2 |
| +n2 wg set wg0 peer "$pub3" remove |
| + |
| +# Test that we can route wg through wg |
| +ip1 addr flush dev wg0 |
| +ip2 addr flush dev wg0 |
| +ip1 addr add fd00::5:1/112 dev wg0 |
| +ip2 addr add fd00::5:2/112 dev wg0 |
| +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2 |
| +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998 |
| +ip1 link add wg1 type wireguard |
| +ip2 link add wg1 type wireguard |
| +ip1 addr add 192.168.241.1/24 dev wg1 |
| +ip1 addr add fd00::1/112 dev wg1 |
| +ip2 addr add 192.168.241.2/24 dev wg1 |
| +ip2 addr add fd00::2/112 dev wg1 |
| +ip1 link set mtu 1340 up dev wg1 |
| +ip2 link set mtu 1340 up dev wg1 |
| +n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5 |
| +n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5 |
| +tests |
| +# Try to set up a routing loop between the two namespaces |
| +ip1 link set netns $netns0 dev wg1 |
| +ip0 addr add 192.168.241.1/24 dev wg1 |
| +ip0 link set up dev wg1 |
| +n0 ping -W 1 -c 1 192.168.241.2 |
| +n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 |
| +ip2 link del wg0 |
| +ip2 link del wg1 |
| +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel |
| |
| +ip0 link del wg1 |
| ip1 link del wg0 |
| -ip2 link del wg0 |
| |
| # Test using NAT. We now change the topology to this: |
| # ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ |
| @@ -282,6 +316,20 @@ pp sleep 3 |
| n2 ping -W 1 -c 1 192.168.241.1 |
| n1 wg set wg0 peer "$pub2" persistent-keepalive 0 |
| |
| +# Test that onion routing works, even when it loops |
| +n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 |
| +ip1 addr add 192.168.242.1/24 dev wg0 |
| +ip2 link add wg1 type wireguard |
| +ip2 addr add 192.168.242.2/24 dev wg1 |
| +n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32 |
| +ip2 link set wg1 up |
| +n1 ping -W 1 -c 1 192.168.242.2 |
| +ip2 link del wg1 |
| +n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5 |
| +! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel |
| +n1 wg set wg0 peer "$pub3" remove |
| +ip1 addr del 192.168.242.1/24 dev wg0 |
| + |
| # Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. |
| ip1 -6 addr add fc00::9/96 dev vethc |
| ip1 -6 route add default via fc00::1 |
| -- |
| 2.18.2 |
| |
| |
| From 16a4e963577da1e2b33b4f99fe84e99ae73ed4e1 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 6 May 2020 15:33:04 -0600 |
| Subject: [PATCH 088/100] wireguard: send/receive: cond_resched() when |
| processing worker ringbuffers |
| |
| commit 4005f5c3c9d006157ba716594e0d70c88a235c5e upstream. |
| |
| Users with pathological hardware reported CPU stalls on CONFIG_ |
| PREEMPT_VOLUNTARY=y, because the ringbuffers would stay full, meaning |
| these workers would never terminate. That turned out not to be okay on |
| systems without forced preemption, which Sultan observed. This commit |
| adds a cond_resched() to the bottom of each loop iteration, so that |
| these workers don't hog the core. Note that we don't need this on the |
| napi poll worker, since that terminates after its budget is expended. |
| |
| Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com> |
| Reported-by: Wang Jian <larkwang@gmail.com> |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 2 ++ |
| drivers/net/wireguard/send.c | 4 ++++ |
| 2 files changed, 6 insertions(+) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 267f202f1931..2566e13a292d 100644 |
| |
| |
| @@ -516,6 +516,8 @@ void wg_packet_decrypt_worker(struct work_struct *work) |
| &PACKET_CB(skb)->keypair->receiving)) ? |
| PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; |
| wg_queue_enqueue_per_peer_napi(skb, state); |
| + if (need_resched()) |
| + cond_resched(); |
| } |
| } |
| |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| index 3e030d614df5..dc3079e17c7f 100644 |
| |
| |
| @@ -281,6 +281,8 @@ void wg_packet_tx_worker(struct work_struct *work) |
| |
| wg_noise_keypair_put(keypair, false); |
| wg_peer_put(peer); |
| + if (need_resched()) |
| + cond_resched(); |
| } |
| } |
| |
| @@ -304,6 +306,8 @@ void wg_packet_encrypt_worker(struct work_struct *work) |
| } |
| wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, |
| state); |
| + if (need_resched()) |
| + cond_resched(); |
| } |
| } |
| |
| -- |
| 2.18.2 |
| |
| |
| From a2045eeb3476532960810da93c17f282e5360573 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 6 May 2020 15:33:05 -0600 |
| Subject: [PATCH 089/100] wireguard: selftests: initalize ipv6 members to NULL |
| to squelch clang warning |
| |
| commit 4fed818ef54b08d4b29200e416cce65546ad5312 upstream. |
| |
| Without setting these to NULL, clang complains in certain |
| configurations that have CONFIG_IPV6=n: |
| |
| In file included from drivers/net/wireguard/ratelimiter.c:223: |
| drivers/net/wireguard/selftest/ratelimiter.c:173:34: error: variable 'skb6' is uninitialized when used here [-Werror,-Wuninitialized] |
| ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); |
| ^~~~ |
| drivers/net/wireguard/selftest/ratelimiter.c:123:29: note: initialize the variable 'skb6' to silence this warning |
| struct sk_buff *skb4, *skb6; |
| ^ |
| = NULL |
| drivers/net/wireguard/selftest/ratelimiter.c:173:40: error: variable 'hdr6' is uninitialized when used here [-Werror,-Wuninitialized] |
| ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); |
| ^~~~ |
| drivers/net/wireguard/selftest/ratelimiter.c:125:22: note: initialize the variable 'hdr6' to silence this warning |
| struct ipv6hdr *hdr6; |
| ^ |
| |
| We silence this warning by setting the variables to NULL as the warning |
| suggests. |
| |
| Reported-by: Arnd Bergmann <arnd@arndb.de> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/selftest/ratelimiter.c | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c |
| index bcd6462e4540..007cd4457c5f 100644 |
| |
| |
| @@ -120,9 +120,9 @@ bool __init wg_ratelimiter_selftest(void) |
| enum { TRIALS_BEFORE_GIVING_UP = 5000 }; |
| bool success = false; |
| int test = 0, trials; |
| - struct sk_buff *skb4, *skb6; |
| + struct sk_buff *skb4, *skb6 = NULL; |
| struct iphdr *hdr4; |
| - struct ipv6hdr *hdr6; |
| + struct ipv6hdr *hdr6 = NULL; |
| |
| if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) |
| return true; |
| -- |
| 2.18.2 |
| |
| |
| From fe7815fa6ff6e3deb4a1b1e74e16759c0616b427 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 6 May 2020 15:33:06 -0600 |
| Subject: [PATCH 090/100] wireguard: send/receive: use explicit unlikely branch |
| instead of implicit coalescing |
| |
| commit 243f2148937adc72bcaaa590d482d599c936efde upstream. |
| |
| It's very unlikely that send will become true. It's nearly always false |
| between 0 and 120 seconds of a session, and in most cases becomes true |
| only between 120 and 121 seconds before becoming false again. So, |
| unlikely(send) is clearly the right option here. |
| |
| What happened before was that we had this complex boolean expression |
| with multiple likely and unlikely clauses nested. Since this is |
| evaluated left-to-right anyway, the whole thing got converted to |
| unlikely. So, we can clean this up to better represent what's going on. |
| |
| The generated code is the same. |
| |
| Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 13 ++++++------- |
| drivers/net/wireguard/send.c | 15 ++++++--------- |
| 2 files changed, 12 insertions(+), 16 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 2566e13a292d..3bb5b9ae7cd1 100644 |
| |
| |
| @@ -226,21 +226,20 @@ void wg_packet_handshake_receive_worker(struct work_struct *work) |
| static void keep_key_fresh(struct wg_peer *peer) |
| { |
| struct noise_keypair *keypair; |
| - bool send = false; |
| + bool send; |
| |
| if (peer->sent_lastminute_handshake) |
| return; |
| |
| rcu_read_lock_bh(); |
| keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
| - if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && |
| - keypair->i_am_the_initiator && |
| - unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
| - REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) |
| - send = true; |
| + send = keypair && READ_ONCE(keypair->sending.is_valid) && |
| + keypair->i_am_the_initiator && |
| + wg_birthdate_has_expired(keypair->sending.birthdate, |
| + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT); |
| rcu_read_unlock_bh(); |
| |
| - if (send) { |
| + if (unlikely(send)) { |
| peer->sent_lastminute_handshake = true; |
| wg_packet_send_queued_handshake_initiation(peer, false); |
| } |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| index dc3079e17c7f..6687db699803 100644 |
| |
| |
| @@ -124,20 +124,17 @@ void wg_packet_send_handshake_cookie(struct wg_device *wg, |
| static void keep_key_fresh(struct wg_peer *peer) |
| { |
| struct noise_keypair *keypair; |
| - bool send = false; |
| + bool send; |
| |
| rcu_read_lock_bh(); |
| keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
| - if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && |
| - (unlikely(atomic64_read(&keypair->sending.counter.counter) > |
| - REKEY_AFTER_MESSAGES) || |
| - (keypair->i_am_the_initiator && |
| - unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
| - REKEY_AFTER_TIME))))) |
| - send = true; |
| + send = keypair && READ_ONCE(keypair->sending.is_valid) && |
| + (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES || |
| + (keypair->i_am_the_initiator && |
| + wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); |
| rcu_read_unlock_bh(); |
| |
| - if (send) |
| + if (unlikely(send)) |
| wg_packet_send_queued_handshake_initiation(peer, false); |
| } |
| |
| -- |
| 2.18.2 |
| |
| |
| From 9cfdaa1253b4ccb2b8a0fe2a2cabad4ae93ad0d0 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 19 May 2020 22:49:27 -0600 |
| Subject: [PATCH 091/100] wireguard: selftests: use newer iproute2 for gcc-10 |
| |
| commit ee3c1aa3f34b7842c1557cfe5d8c3f7b8c692de8 upstream. |
| |
| gcc-10 switched to defaulting to -fno-common, which broke iproute2-5.4. |
| This was fixed in iproute-5.6, so switch to that. Because we're after a |
| stable testing surface, we generally don't like to bump these |
| unnecessarily, but in this case, being able to actually build is a basic |
| necessity. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| tools/testing/selftests/wireguard/qemu/Makefile | 2 +- |
| 1 file changed, 1 insertion(+), 1 deletion(-) |
| |
| diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile |
| index 28d477683e8a..2dab4f57516d 100644 |
| |
| |
| @@ -44,7 +44,7 @@ endef |
| $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) |
| $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) |
| $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) |
| -$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) |
| +$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) |
| $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) |
| $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) |
| $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) |
| -- |
| 2.18.2 |
| |
| |
| From 6c01e455945ce254bab6ea2a62ccfd0881e2ea27 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 19 May 2020 22:49:28 -0600 |
| Subject: [PATCH 092/100] wireguard: noise: read preshared key while taking |
| lock |
| |
| commit bc67d371256f5c47d824e2eec51e46c8d62d022e upstream. |
| |
| Prior we read the preshared key after dropping the handshake lock, which |
| isn't an actual crypto issue if it races, but it's still not quite |
| correct. So copy that part of the state into a temporary like we do with |
| the rest of the handshake state variables. Then we can release the lock, |
| operate on the temporary, and zero it out at the end of the function. In |
| performance tests, the impact of this was entirely unnoticable, probably |
| because those bytes are coming from the same cacheline as other things |
| that are being copied out in the same manner. |
| |
| Reported-by: Matt Dunwoodie <ncon@noconroy.net> |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/noise.c | 6 +++++- |
| 1 file changed, 5 insertions(+), 1 deletion(-) |
| |
| diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c |
| index 708dc61c974f..07eb438a6dee 100644 |
| |
| |
| @@ -715,6 +715,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, |
| u8 e[NOISE_PUBLIC_KEY_LEN]; |
| u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; |
| u8 static_private[NOISE_PUBLIC_KEY_LEN]; |
| + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; |
| |
| down_read(&wg->static_identity.lock); |
| |
| @@ -733,6 +734,8 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, |
| memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); |
| memcpy(ephemeral_private, handshake->ephemeral_private, |
| NOISE_PUBLIC_KEY_LEN); |
| + memcpy(preshared_key, handshake->preshared_key, |
| + NOISE_SYMMETRIC_KEY_LEN); |
| up_read(&handshake->lock); |
| |
| if (state != HANDSHAKE_CREATED_INITIATION) |
| @@ -750,7 +753,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, |
| goto fail; |
| |
| /* psk */ |
| - mix_psk(chaining_key, hash, key, handshake->preshared_key); |
| + mix_psk(chaining_key, hash, key, preshared_key); |
| |
| /* {} */ |
| if (!message_decrypt(NULL, src->encrypted_nothing, |
| @@ -783,6 +786,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, |
| memzero_explicit(chaining_key, NOISE_HASH_LEN); |
| memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); |
| memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); |
| + memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN); |
| up_read(&wg->static_identity.lock); |
| return ret_peer; |
| } |
| -- |
| 2.18.2 |
| |
| |
| From 1d53452dea64c2136269ed52b70a21ad6fe351c9 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 19 May 2020 22:49:29 -0600 |
| Subject: [PATCH 093/100] wireguard: queueing: preserve flow hash across packet |
| scrubbing |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit c78a0b4a78839d572d8a80f6a62221c0d7843135 upstream. |
| |
| It's important that we clear most header fields during encapsulation and |
| decapsulation, because the packet is substantially changed, and we don't |
| want any info leak or logic bug due to an accidental correlation. But, |
| for encapsulation, it's wrong to clear skb->hash, since it's used by |
| fq_codel and flow dissection in general. Without it, classification does |
| not proceed as usual. This change might make it easier to estimate the |
| number of innerflows by examining clustering of out of order packets, |
| but this shouldn't open up anything that can't already be inferred |
| otherwise (e.g. syn packet size inference), and fq_codel can be disabled |
| anyway. |
| |
| Furthermore, it might be the case that the hash isn't used or queried at |
| all until after wireguard transmits the encrypted UDP packet, which |
| means skb->hash might still be zero at this point, and thus no hash |
| taken over the inner packet data. In order to address this situation, we |
| force a calculation of skb->hash before encrypting packet data. |
| |
| Of course this means that fq_codel might transmit packets slightly more |
| out of order than usual. Toke did some testing on beefy machines with |
| high quantities of parallel flows and found that increasing the |
| reply-attack counter to 8192 takes care of the most pathological cases |
| pretty well. |
| |
| Reported-by: Dave Taht <dave.taht@gmail.com> |
| Reviewed-and-tested-by: Toke Høiland-Jørgensen <toke@toke.dk> |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/messages.h | 2 +- |
| drivers/net/wireguard/queueing.h | 10 +++++++++- |
| drivers/net/wireguard/receive.c | 2 +- |
| drivers/net/wireguard/send.c | 7 ++++++- |
| 4 files changed, 17 insertions(+), 4 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h |
| index b8a7b9ce32ba..208da72673fc 100644 |
| |
| |
| @@ -32,7 +32,7 @@ enum cookie_values { |
| }; |
| |
| enum counter_values { |
| - COUNTER_BITS_TOTAL = 2048, |
| + COUNTER_BITS_TOTAL = 8192, |
| COUNTER_REDUNDANT_BITS = BITS_PER_LONG, |
| COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS |
| }; |
| diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h |
| index 3432232afe06..c58df439dbbe 100644 |
| |
| |
| @@ -87,12 +87,20 @@ static inline bool wg_check_packet_protocol(struct sk_buff *skb) |
| return real_protocol && skb->protocol == real_protocol; |
| } |
| |
| -static inline void wg_reset_packet(struct sk_buff *skb) |
| +static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) |
| { |
| + u8 l4_hash = skb->l4_hash; |
| + u8 sw_hash = skb->sw_hash; |
| + u32 hash = skb->hash; |
| skb_scrub_packet(skb, true); |
| memset(&skb->headers_start, 0, |
| offsetof(struct sk_buff, headers_end) - |
| offsetof(struct sk_buff, headers_start)); |
| + if (encapsulating) { |
| + skb->l4_hash = l4_hash; |
| + skb->sw_hash = sw_hash; |
| + skb->hash = hash; |
| + } |
| skb->queue_mapping = 0; |
| skb->nohdr = 0; |
| skb->peeked = 0; |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 3bb5b9ae7cd1..d0eebd90c9d5 100644 |
| |
| |
| @@ -484,7 +484,7 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) |
| if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) |
| goto next; |
| |
| - wg_reset_packet(skb); |
| + wg_reset_packet(skb, false); |
| wg_packet_consume_data_done(peer, skb, &endpoint); |
| free = false; |
| |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| index 6687db699803..2f5119ff93d8 100644 |
| |
| |
| @@ -167,6 +167,11 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) |
| struct sk_buff *trailer; |
| int num_frags; |
| |
| + /* Force hash calculation before encryption so that flow analysis is |
| + * consistent over the inner packet. |
| + */ |
| + skb_get_hash(skb); |
| + |
| /* Calculate lengths. */ |
| padding_len = calculate_skb_padding(skb); |
| trailer_len = padding_len + noise_encrypted_len(0); |
| @@ -295,7 +300,7 @@ void wg_packet_encrypt_worker(struct work_struct *work) |
| skb_list_walk_safe(first, skb, next) { |
| if (likely(encrypt_packet(skb, |
| PACKET_CB(first)->keypair))) { |
| - wg_reset_packet(skb); |
| + wg_reset_packet(skb, true); |
| } else { |
| state = PACKET_STATE_DEAD; |
| break; |
| -- |
| 2.18.2 |
| |
| |
| From 0271d514595e74b57a787e6eff78edbfb037037d Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 19 May 2020 22:49:30 -0600 |
| Subject: [PATCH 094/100] wireguard: noise: separate receive counter from send |
| counter |
| |
| commit a9e90d9931f3a474f04bab782ccd9d77904941e9 upstream. |
| |
| In "wireguard: queueing: preserve flow hash across packet scrubbing", we |
| were required to slightly increase the size of the receive replay |
| counter to something still fairly small, but an increase nonetheless. |
| It turns out that we can recoup some of the additional memory overhead |
| by splitting up the prior union type into two distinct types. Before, we |
| used the same "noise_counter" union for both sending and receiving, with |
| sending just using a simple atomic64_t, while receiving used the full |
| replay counter checker. This meant that most of the memory being |
| allocated for the sending counter was being wasted. Since the old |
| "noise_counter" type increased in size in the prior commit, now is a |
| good time to split up that union type into a distinct "noise_replay_ |
| counter" for receiving and a boring atomic64_t for sending, each using |
| neither more nor less memory than required. |
| |
| Also, since sometimes the replay counter is accessed without |
| necessitating additional accesses to the bitmap, we can reduce cache |
| misses by hoisting the always-necessary lock above the bitmap in the |
| struct layout. We also change a "noise_replay_counter" stack allocation |
| to kmalloc in a -DDEBUG selftest so that KASAN doesn't trigger a stack |
| frame warning. |
| |
| All and all, removing a bit of abstraction in this commit makes the code |
| simpler and smaller, in addition to the motivating memory usage |
| recuperation. For example, passing around raw "noise_symmetric_key" |
| structs is something that really only makes sense within noise.c, in the |
| one place where the sending and receiving keys can safely be thought of |
| as the same type of object; subsequent to that, it's important that we |
| uniformly access these through keypair->{sending,receiving}, where their |
| distinct roles are always made explicit. So this patch allows us to draw |
| that distinction clearly as well. |
| |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/noise.c | 16 +++------ |
| drivers/net/wireguard/noise.h | 14 ++++---- |
| drivers/net/wireguard/receive.c | 42 ++++++++++++------------ |
| drivers/net/wireguard/selftest/counter.c | 17 +++++++--- |
| drivers/net/wireguard/send.c | 12 +++---- |
| 5 files changed, 48 insertions(+), 53 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c |
| index 07eb438a6dee..626433690abb 100644 |
| |
| |
| @@ -104,6 +104,7 @@ static struct noise_keypair *keypair_create(struct wg_peer *peer) |
| |
| if (unlikely(!keypair)) |
| return NULL; |
| + spin_lock_init(&keypair->receiving_counter.lock); |
| keypair->internal_id = atomic64_inc_return(&keypair_counter); |
| keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; |
| keypair->entry.peer = peer; |
| @@ -358,25 +359,16 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, |
| memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); |
| } |
| |
| -static void symmetric_key_init(struct noise_symmetric_key *key) |
| -{ |
| - spin_lock_init(&key->counter.receive.lock); |
| - atomic64_set(&key->counter.counter, 0); |
| - memset(key->counter.receive.backtrack, 0, |
| - sizeof(key->counter.receive.backtrack)); |
| - key->birthdate = ktime_get_coarse_boottime_ns(); |
| - key->is_valid = true; |
| -} |
| - |
| static void derive_keys(struct noise_symmetric_key *first_dst, |
| struct noise_symmetric_key *second_dst, |
| const u8 chaining_key[NOISE_HASH_LEN]) |
| { |
| + u64 birthdate = ktime_get_coarse_boottime_ns(); |
| kdf(first_dst->key, second_dst->key, NULL, NULL, |
| NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, |
| chaining_key); |
| - symmetric_key_init(first_dst); |
| - symmetric_key_init(second_dst); |
| + first_dst->birthdate = second_dst->birthdate = birthdate; |
| + first_dst->is_valid = second_dst->is_valid = true; |
| } |
| |
| static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], |
| diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h |
| index f532d59d3f19..c527253dba80 100644 |
| |
| |
| @@ -15,18 +15,14 @@ |
| #include <linux/mutex.h> |
| #include <linux/kref.h> |
| |
| -union noise_counter { |
| - struct { |
| - u64 counter; |
| - unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; |
| - spinlock_t lock; |
| - } receive; |
| - atomic64_t counter; |
| +struct noise_replay_counter { |
| + u64 counter; |
| + spinlock_t lock; |
| + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; |
| }; |
| |
| struct noise_symmetric_key { |
| u8 key[NOISE_SYMMETRIC_KEY_LEN]; |
| - union noise_counter counter; |
| u64 birthdate; |
| bool is_valid; |
| }; |
| @@ -34,7 +30,9 @@ struct noise_symmetric_key { |
| struct noise_keypair { |
| struct index_hashtable_entry entry; |
| struct noise_symmetric_key sending; |
| + atomic64_t sending_counter; |
| struct noise_symmetric_key receiving; |
| + struct noise_replay_counter receiving_counter; |
| __le32 remote_index; |
| bool i_am_the_initiator; |
| struct kref refcount; |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index d0eebd90c9d5..91438144e4f7 100644 |
| |
| |
| @@ -245,20 +245,20 @@ static void keep_key_fresh(struct wg_peer *peer) |
| } |
| } |
| |
| -static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) |
| +static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) |
| { |
| struct scatterlist sg[MAX_SKB_FRAGS + 8]; |
| struct sk_buff *trailer; |
| unsigned int offset; |
| int num_frags; |
| |
| - if (unlikely(!key)) |
| + if (unlikely(!keypair)) |
| return false; |
| |
| - if (unlikely(!READ_ONCE(key->is_valid) || |
| - wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || |
| - key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { |
| - WRITE_ONCE(key->is_valid, false); |
| + if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || |
| + wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || |
| + keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { |
| + WRITE_ONCE(keypair->receiving.is_valid, false); |
| return false; |
| } |
| |
| @@ -283,7 +283,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) |
| |
| if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, |
| PACKET_CB(skb)->nonce, |
| - key->key)) |
| + keypair->receiving.key)) |
| return false; |
| |
| /* Another ugly situation of pushing and pulling the header so as to |
| @@ -298,41 +298,41 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) |
| } |
| |
| /* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ |
| -static bool counter_validate(union noise_counter *counter, u64 their_counter) |
| +static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter) |
| { |
| unsigned long index, index_current, top, i; |
| bool ret = false; |
| |
| - spin_lock_bh(&counter->receive.lock); |
| + spin_lock_bh(&counter->lock); |
| |
| - if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || |
| + if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 || |
| their_counter >= REJECT_AFTER_MESSAGES)) |
| goto out; |
| |
| ++their_counter; |
| |
| if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < |
| - counter->receive.counter)) |
| + counter->counter)) |
| goto out; |
| |
| index = their_counter >> ilog2(BITS_PER_LONG); |
| |
| - if (likely(their_counter > counter->receive.counter)) { |
| - index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); |
| + if (likely(their_counter > counter->counter)) { |
| + index_current = counter->counter >> ilog2(BITS_PER_LONG); |
| top = min_t(unsigned long, index - index_current, |
| COUNTER_BITS_TOTAL / BITS_PER_LONG); |
| for (i = 1; i <= top; ++i) |
| - counter->receive.backtrack[(i + index_current) & |
| + counter->backtrack[(i + index_current) & |
| ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; |
| - counter->receive.counter = their_counter; |
| + counter->counter = their_counter; |
| } |
| |
| index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; |
| ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), |
| - &counter->receive.backtrack[index]); |
| + &counter->backtrack[index]); |
| |
| out: |
| - spin_unlock_bh(&counter->receive.lock); |
| + spin_unlock_bh(&counter->lock); |
| return ret; |
| } |
| |
| @@ -472,12 +472,12 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) |
| if (unlikely(state != PACKET_STATE_CRYPTED)) |
| goto next; |
| |
| - if (unlikely(!counter_validate(&keypair->receiving.counter, |
| + if (unlikely(!counter_validate(&keypair->receiving_counter, |
| PACKET_CB(skb)->nonce))) { |
| net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", |
| peer->device->dev->name, |
| PACKET_CB(skb)->nonce, |
| - keypair->receiving.counter.receive.counter); |
| + keypair->receiving_counter.counter); |
| goto next; |
| } |
| |
| @@ -511,8 +511,8 @@ void wg_packet_decrypt_worker(struct work_struct *work) |
| struct sk_buff *skb; |
| |
| while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { |
| - enum packet_state state = likely(decrypt_packet(skb, |
| - &PACKET_CB(skb)->keypair->receiving)) ? |
| + enum packet_state state = |
| + likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? |
| PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; |
| wg_queue_enqueue_per_peer_napi(skb, state); |
| if (need_resched()) |
| diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c |
| index f4fbb9072ed7..ec3c156bf91b 100644 |
| |
| |
| @@ -6,18 +6,24 @@ |
| #ifdef DEBUG |
| bool __init wg_packet_counter_selftest(void) |
| { |
| + struct noise_replay_counter *counter; |
| unsigned int test_num = 0, i; |
| - union noise_counter counter; |
| bool success = true; |
| |
| -#define T_INIT do { \ |
| - memset(&counter, 0, sizeof(union noise_counter)); \ |
| - spin_lock_init(&counter.receive.lock); \ |
| + counter = kmalloc(sizeof(*counter), GFP_KERNEL); |
| + if (unlikely(!counter)) { |
| + pr_err("nonce counter self-test malloc: FAIL\n"); |
| + return false; |
| + } |
| + |
| +#define T_INIT do { \ |
| + memset(counter, 0, sizeof(*counter)); \ |
| + spin_lock_init(&counter->lock); \ |
| } while (0) |
| #define T_LIM (COUNTER_WINDOW_SIZE + 1) |
| #define T(n, v) do { \ |
| ++test_num; \ |
| - if (counter_validate(&counter, n) != (v)) { \ |
| + if (counter_validate(counter, n) != (v)) { \ |
| pr_err("nonce counter self-test %u: FAIL\n", \ |
| test_num); \ |
| success = false; \ |
| @@ -99,6 +105,7 @@ bool __init wg_packet_counter_selftest(void) |
| |
| if (success) |
| pr_info("nonce counter self-tests: pass\n"); |
| + kfree(counter); |
| return success; |
| } |
| #endif |
| diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c |
| index 2f5119ff93d8..f74b9341ab0f 100644 |
| |
| |
| @@ -129,7 +129,7 @@ static void keep_key_fresh(struct wg_peer *peer) |
| rcu_read_lock_bh(); |
| keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
| send = keypair && READ_ONCE(keypair->sending.is_valid) && |
| - (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES || |
| + (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES || |
| (keypair->i_am_the_initiator && |
| wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); |
| rcu_read_unlock_bh(); |
| @@ -349,7 +349,6 @@ void wg_packet_purge_staged_packets(struct wg_peer *peer) |
| |
| void wg_packet_send_staged_packets(struct wg_peer *peer) |
| { |
| - struct noise_symmetric_key *key; |
| struct noise_keypair *keypair; |
| struct sk_buff_head packets; |
| struct sk_buff *skb; |
| @@ -369,10 +368,9 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) |
| rcu_read_unlock_bh(); |
| if (unlikely(!keypair)) |
| goto out_nokey; |
| - key = &keypair->sending; |
| - if (unlikely(!READ_ONCE(key->is_valid))) |
| + if (unlikely(!READ_ONCE(keypair->sending.is_valid))) |
| goto out_nokey; |
| - if (unlikely(wg_birthdate_has_expired(key->birthdate, |
| + if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
| REJECT_AFTER_TIME))) |
| goto out_invalid; |
| |
| @@ -387,7 +385,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) |
| */ |
| PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); |
| PACKET_CB(skb)->nonce = |
| - atomic64_inc_return(&key->counter.counter) - 1; |
| + atomic64_inc_return(&keypair->sending_counter) - 1; |
| if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) |
| goto out_invalid; |
| } |
| @@ -399,7 +397,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) |
| return; |
| |
| out_invalid: |
| - WRITE_ONCE(key->is_valid, false); |
| + WRITE_ONCE(keypair->sending.is_valid, false); |
| out_nokey: |
| wg_noise_keypair_put(keypair, false); |
| |
| -- |
| 2.18.2 |
| |
| |
| From cab5c845e26d1d60febea5920bd88bda8f2a7ed4 Mon Sep 17 00:00:00 2001 |
| From: Frank Werner-Krippendorf <mail@hb9fxq.ch> |
| Date: Tue, 23 Jun 2020 03:59:44 -0600 |
| Subject: [PATCH 095/100] wireguard: noise: do not assign initiation time in if |
| condition |
| |
| commit 558b353c9c2a717509f291c066c6bd8f5f5e21be upstream. |
| |
| Fixes an error condition reported by checkpatch.pl which caused by |
| assigning a variable in an if condition in wg_noise_handshake_consume_ |
| initiation(). |
| |
| Signed-off-by: Frank Werner-Krippendorf <mail@hb9fxq.ch> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/noise.c | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c |
| index 626433690abb..201a22681945 100644 |
| |
| |
| @@ -617,8 +617,8 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, |
| memcpy(handshake->hash, hash, NOISE_HASH_LEN); |
| memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); |
| handshake->remote_index = src->sender_index; |
| - if ((s64)(handshake->last_initiation_consumption - |
| - (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) |
| + initiation_consumption = ktime_get_coarse_boottime_ns(); |
| + if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0) |
| handshake->last_initiation_consumption = initiation_consumption; |
| handshake->state = HANDSHAKE_CONSUMED_INITIATION; |
| up_write(&handshake->lock); |
| -- |
| 2.18.2 |
| |
| |
| From 86f957cb5a047391d80b789f971a8f1ebd70b216 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Tue, 23 Jun 2020 03:59:45 -0600 |
| Subject: [PATCH 096/100] wireguard: device: avoid circular netns references |
| |
| commit 900575aa33a3eaaef802b31de187a85c4a4b4bd0 upstream. |
| |
| Before, we took a reference to the creating netns if the new netns was |
| different. This caused issues with circular references, with two |
| wireguard interfaces swapping namespaces. The solution is to rather not |
| take any extra references at all, but instead simply invalidate the |
| creating netns pointer when that netns is deleted. |
| |
| In order to prevent this from happening again, this commit improves the |
| rough object leak tracking by allowing it to account for created and |
| destroyed interfaces, aside from just peers and keys. That then makes it |
| possible to check for the object leak when having two interfaces take a |
| reference to each others' namespaces. |
| |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/device.c | 58 ++++++++++------------ |
| drivers/net/wireguard/device.h | 3 +- |
| drivers/net/wireguard/netlink.c | 14 ++++-- |
| drivers/net/wireguard/socket.c | 25 +++++++--- |
| tools/testing/selftests/wireguard/netns.sh | 13 ++++- |
| 5 files changed, 67 insertions(+), 46 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c |
| index 3ac3f8570ca1..a8f151b1b5fa 100644 |
| |
| |
| @@ -45,17 +45,18 @@ static int wg_open(struct net_device *dev) |
| if (dev_v6) |
| dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; |
| |
| + mutex_lock(&wg->device_update_lock); |
| ret = wg_socket_init(wg, wg->incoming_port); |
| if (ret < 0) |
| - return ret; |
| - mutex_lock(&wg->device_update_lock); |
| + goto out; |
| list_for_each_entry(peer, &wg->peer_list, peer_list) { |
| wg_packet_send_staged_packets(peer); |
| if (peer->persistent_keepalive_interval) |
| wg_packet_send_keepalive(peer); |
| } |
| +out: |
| mutex_unlock(&wg->device_update_lock); |
| - return 0; |
| + return ret; |
| } |
| |
| #ifdef CONFIG_PM_SLEEP |
| @@ -225,6 +226,7 @@ static void wg_destruct(struct net_device *dev) |
| list_del(&wg->device_list); |
| rtnl_unlock(); |
| mutex_lock(&wg->device_update_lock); |
| + rcu_assign_pointer(wg->creating_net, NULL); |
| wg->incoming_port = 0; |
| wg_socket_reinit(wg, NULL, NULL); |
| /* The final references are cleared in the below calls to destroy_workqueue. */ |
| @@ -240,13 +242,11 @@ static void wg_destruct(struct net_device *dev) |
| skb_queue_purge(&wg->incoming_handshakes); |
| free_percpu(dev->tstats); |
| free_percpu(wg->incoming_handshakes_worker); |
| - if (wg->have_creating_net_ref) |
| - put_net(wg->creating_net); |
| kvfree(wg->index_hashtable); |
| kvfree(wg->peer_hashtable); |
| mutex_unlock(&wg->device_update_lock); |
| |
| - pr_debug("%s: Interface deleted\n", dev->name); |
| + pr_debug("%s: Interface destroyed\n", dev->name); |
| free_netdev(dev); |
| } |
| |
| @@ -292,7 +292,7 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, |
| struct wg_device *wg = netdev_priv(dev); |
| int ret = -ENOMEM; |
| |
| - wg->creating_net = src_net; |
| + rcu_assign_pointer(wg->creating_net, src_net); |
| init_rwsem(&wg->static_identity.lock); |
| mutex_init(&wg->socket_update_lock); |
| mutex_init(&wg->device_update_lock); |
| @@ -393,30 +393,26 @@ static struct rtnl_link_ops link_ops __read_mostly = { |
| .newlink = wg_newlink, |
| }; |
| |
| -static int wg_netdevice_notification(struct notifier_block *nb, |
| - unsigned long action, void *data) |
| +static void wg_netns_pre_exit(struct net *net) |
| { |
| - struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; |
| - struct wg_device *wg = netdev_priv(dev); |
| - |
| - ASSERT_RTNL(); |
| - |
| - if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) |
| - return 0; |
| + struct wg_device *wg; |
| |
| - if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { |
| - put_net(wg->creating_net); |
| - wg->have_creating_net_ref = false; |
| - } else if (dev_net(dev) != wg->creating_net && |
| - !wg->have_creating_net_ref) { |
| - wg->have_creating_net_ref = true; |
| - get_net(wg->creating_net); |
| + rtnl_lock(); |
| + list_for_each_entry(wg, &device_list, device_list) { |
| + if (rcu_access_pointer(wg->creating_net) == net) { |
| + pr_debug("%s: Creating namespace exiting\n", wg->dev->name); |
| + netif_carrier_off(wg->dev); |
| + mutex_lock(&wg->device_update_lock); |
| + rcu_assign_pointer(wg->creating_net, NULL); |
| + wg_socket_reinit(wg, NULL, NULL); |
| + mutex_unlock(&wg->device_update_lock); |
| + } |
| } |
| - return 0; |
| + rtnl_unlock(); |
| } |
| |
| -static struct notifier_block netdevice_notifier = { |
| - .notifier_call = wg_netdevice_notification |
| +static struct pernet_operations pernet_ops = { |
| + .pre_exit = wg_netns_pre_exit |
| }; |
| |
| int __init wg_device_init(void) |
| @@ -429,18 +425,18 @@ int __init wg_device_init(void) |
| return ret; |
| #endif |
| |
| - ret = register_netdevice_notifier(&netdevice_notifier); |
| + ret = register_pernet_device(&pernet_ops); |
| if (ret) |
| goto error_pm; |
| |
| ret = rtnl_link_register(&link_ops); |
| if (ret) |
| - goto error_netdevice; |
| + goto error_pernet; |
| |
| return 0; |
| |
| -error_netdevice: |
| - unregister_netdevice_notifier(&netdevice_notifier); |
| +error_pernet: |
| + unregister_pernet_device(&pernet_ops); |
| error_pm: |
| #ifdef CONFIG_PM_SLEEP |
| unregister_pm_notifier(&pm_notifier); |
| @@ -451,7 +447,7 @@ int __init wg_device_init(void) |
| void wg_device_uninit(void) |
| { |
| rtnl_link_unregister(&link_ops); |
| - unregister_netdevice_notifier(&netdevice_notifier); |
| + unregister_pernet_device(&pernet_ops); |
| #ifdef CONFIG_PM_SLEEP |
| unregister_pm_notifier(&pm_notifier); |
| #endif |
| diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h |
| index b15a8be9d816..4d0144e16947 100644 |
| |
| |
| @@ -40,7 +40,7 @@ struct wg_device { |
| struct net_device *dev; |
| struct crypt_queue encrypt_queue, decrypt_queue; |
| struct sock __rcu *sock4, *sock6; |
| - struct net *creating_net; |
| + struct net __rcu *creating_net; |
| struct noise_static_identity static_identity; |
| struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; |
| struct workqueue_struct *packet_crypt_wq; |
| @@ -56,7 +56,6 @@ struct wg_device { |
| unsigned int num_peers, device_update_gen; |
| u32 fwmark; |
| u16 incoming_port; |
| - bool have_creating_net_ref; |
| }; |
| |
| int wg_device_init(void); |
| diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c |
| index ab6cbe95a652..9756239416fd 100644 |
| |
| |
| @@ -517,11 +517,15 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) |
| if (flags & ~__WGDEVICE_F_ALL) |
| goto out; |
| |
| - ret = -EPERM; |
| - if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || |
| - info->attrs[WGDEVICE_A_FWMARK]) && |
| - !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) |
| - goto out; |
| + if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) { |
| + struct net *net; |
| + rcu_read_lock(); |
| + net = rcu_dereference(wg->creating_net); |
| + ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0; |
| + rcu_read_unlock(); |
| + if (ret) |
| + goto out; |
| + } |
| |
| ++wg->device_update_gen; |
| |
| diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c |
| index f9018027fc13..c33e2c81635f 100644 |
| |
| |
| @@ -347,6 +347,7 @@ static void set_sock_opts(struct socket *sock) |
| |
| int wg_socket_init(struct wg_device *wg, u16 port) |
| { |
| + struct net *net; |
| int ret; |
| struct udp_tunnel_sock_cfg cfg = { |
| .sk_user_data = wg, |
| @@ -371,37 +372,47 @@ int wg_socket_init(struct wg_device *wg, u16 port) |
| }; |
| #endif |
| |
| + rcu_read_lock(); |
| + net = rcu_dereference(wg->creating_net); |
| + net = net ? maybe_get_net(net) : NULL; |
| + rcu_read_unlock(); |
| + if (unlikely(!net)) |
| + return -ENONET; |
| + |
| #if IS_ENABLED(CONFIG_IPV6) |
| retry: |
| #endif |
| |
| - ret = udp_sock_create(wg->creating_net, &port4, &new4); |
| + ret = udp_sock_create(net, &port4, &new4); |
| if (ret < 0) { |
| pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); |
| - return ret; |
| + goto out; |
| } |
| set_sock_opts(new4); |
| - setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); |
| + setup_udp_tunnel_sock(net, new4, &cfg); |
| |
| #if IS_ENABLED(CONFIG_IPV6) |
| if (ipv6_mod_enabled()) { |
| port6.local_udp_port = inet_sk(new4->sk)->inet_sport; |
| - ret = udp_sock_create(wg->creating_net, &port6, &new6); |
| + ret = udp_sock_create(net, &port6, &new6); |
| if (ret < 0) { |
| udp_tunnel_sock_release(new4); |
| if (ret == -EADDRINUSE && !port && retries++ < 100) |
| goto retry; |
| pr_err("%s: Could not create IPv6 socket\n", |
| wg->dev->name); |
| - return ret; |
| + goto out; |
| } |
| set_sock_opts(new6); |
| - setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); |
| + setup_udp_tunnel_sock(net, new6, &cfg); |
| } |
| #endif |
| |
| wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); |
| - return 0; |
| + ret = 0; |
| +out: |
| + put_net(net); |
| + return ret; |
| } |
| |
| void wg_socket_reinit(struct wg_device *wg, struct sock *new4, |
| diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh |
| index 17a1f53ceba0..d77f4829f1e0 100755 |
| |
| |
| @@ -587,9 +587,20 @@ ip0 link set wg0 up |
| kill $ncat_pid |
| ip0 link del wg0 |
| |
| +# Ensure there aren't circular reference loops |
| +ip1 link add wg1 type wireguard |
| +ip2 link add wg2 type wireguard |
| +ip1 link set wg1 netns $netns2 |
| +ip2 link set wg2 netns $netns1 |
| +pp ip netns delete $netns1 |
| +pp ip netns delete $netns2 |
| +pp ip netns add $netns1 |
| +pp ip netns add $netns2 |
| + |
| +sleep 2 # Wait for cleanup and grace periods |
| declare -A objects |
| while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do |
| - [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue |
| + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue |
| objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" |
| done < /dev/kmsg |
| alldeleted=1 |
| -- |
| 2.18.2 |
| |
| |
| From b5ad616118347eb41cdf4723a47efc820eb3de72 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Wed, 24 Jun 2020 16:06:03 -0600 |
| Subject: [PATCH 097/100] wireguard: receive: account for napi_gro_receive |
| never returning GRO_DROP |
| |
| commit df08126e3833e9dca19e2407db5f5860a7c194fb upstream. |
| |
| The napi_gro_receive function no longer returns GRO_DROP ever, making |
| handling GRO_DROP dead code. This commit removes that dead code. |
| Further, it's not even clear that device drivers have any business in |
| taking action after passing off received packets; that's arguably out of |
| their hands. |
| |
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/receive.c | 10 ++-------- |
| 1 file changed, 2 insertions(+), 8 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 91438144e4f7..9b2ab6fc91cd 100644 |
| |
| |
| @@ -414,14 +414,8 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, |
| if (unlikely(routed_peer != peer)) |
| goto dishonest_packet_peer; |
| |
| - if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { |
| - ++dev->stats.rx_dropped; |
| - net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", |
| - dev->name, peer->internal_id, |
| - &peer->endpoint.addr); |
| - } else { |
| - update_rx_stats(peer, message_data_len(len_before_trim)); |
| - } |
| + napi_gro_receive(&peer->napi, skb); |
| + update_rx_stats(peer, message_data_len(len_before_trim)); |
| return; |
| |
| dishonest_packet_peer: |
| -- |
| 2.18.2 |
| |
| |
| From 65e2cc153a28545822075c4615807ffb848c634d Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 29 Jun 2020 19:06:18 -0600 |
| Subject: [PATCH 098/100] net: ip_tunnel: add header_ops for layer 3 devices |
| |
| commit 2606aff916854b61234bf85001be9777bab2d5f8 upstream. |
| |
| Some devices that take straight up layer 3 packets benefit from having a |
| shared header_ops so that AF_PACKET sockets can inject packets that are |
| recognized. This shared infrastructure will be used by other drivers |
| that currently can't inject packets using AF_PACKET. It also exposes the |
| parser function, as it is useful in standalone form too. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Acked-by: Willem de Bruijn <willemb@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| include/net/ip_tunnels.h | 3 +++ |
| net/ipv4/ip_tunnel_core.c | 18 ++++++++++++++++++ |
| 2 files changed, 21 insertions(+) |
| |
| diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h |
| index af645604f328..b0b03a9f7af9 100644 |
| |
| |
| @@ -289,6 +289,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], |
| struct ip_tunnel_parm *p, __u32 fwmark); |
| void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); |
| |
| +extern const struct header_ops ip_tunnel_header_ops; |
| +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); |
| + |
| struct ip_tunnel_encap_ops { |
| size_t (*encap_hlen)(struct ip_tunnel_encap *e); |
| int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, |
| diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c |
| index 1452a97914a0..cfe21c3ddfc2 100644 |
| |
| |
| @@ -446,3 +446,21 @@ void ip_tunnel_unneed_metadata(void) |
| static_branch_dec(&ip_tunnel_metadata_cnt); |
| } |
| EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); |
| + |
| +/* Returns either the correct skb->protocol value, or 0 if invalid. */ |
| +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) |
| +{ |
| + if (skb_network_header(skb) >= skb->head && |
| + (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && |
| + ip_hdr(skb)->version == 4) |
| + return htons(ETH_P_IP); |
| + if (skb_network_header(skb) >= skb->head && |
| + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && |
| + ipv6_hdr(skb)->version == 6) |
| + return htons(ETH_P_IPV6); |
| + return 0; |
| +} |
| +EXPORT_SYMBOL(ip_tunnel_parse_protocol); |
| + |
| +const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; |
| +EXPORT_SYMBOL(ip_tunnel_header_ops); |
| -- |
| 2.18.2 |
| |
| |
| From 7aa05817924e993a55069d9b6304cd432f8800c3 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 29 Jun 2020 19:06:20 -0600 |
| Subject: [PATCH 099/100] wireguard: implement header_ops->parse_protocol for |
| AF_PACKET |
| |
| commit 01a4967c71c004f8ecad4ab57021348636502fa9 upstream. |
| |
| WireGuard uses skb->protocol to determine packet type, and bails out if |
| it's not set or set to something it's not expecting. For AF_PACKET |
| injection, we need to support its call chain of: |
| |
| packet_sendmsg -> packet_snd -> packet_parse_headers -> |
| dev_parse_header_protocol -> parse_protocol |
| |
| Without a valid parse_protocol, this returns zero, and wireguard then |
| rejects the skb. So, this wires up the ip_tunnel handler for layer 3 |
| packets for that case. |
| |
| Reported-by: Hans Wippel <ndev@hwipl.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/device.c | 1 + |
| 1 file changed, 1 insertion(+) |
| |
| diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c |
| index a8f151b1b5fa..c9f65e96ccb0 100644 |
| |
| |
| @@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev) |
| max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); |
| |
| dev->netdev_ops = &netdev_ops; |
| + dev->header_ops = &ip_tunnel_header_ops; |
| dev->hard_header_len = 0; |
| dev->addr_len = 0; |
| dev->needed_headroom = DATA_PACKET_HEAD_ROOM; |
| -- |
| 2.18.2 |
| |
| |
| From c1445fa819ec6f562548bdd9cb2d3c24cd654f81 Mon Sep 17 00:00:00 2001 |
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| Date: Mon, 29 Jun 2020 19:06:21 -0600 |
| Subject: [PATCH 100/100] wireguard: queueing: make use of |
| ip_tunnel_parse_protocol |
| |
| commit 1a574074ae7d1d745c16f7710655f38a53174c27 upstream. |
| |
| Now that wg_examine_packet_protocol has been added for general |
| consumption as ip_tunnel_parse_protocol, it's possible to remove |
| wg_examine_packet_protocol and simply use the new |
| ip_tunnel_parse_protocol function directly. |
| |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| |
| drivers/net/wireguard/queueing.h | 19 ++----------------- |
| drivers/net/wireguard/receive.c | 2 +- |
| 2 files changed, 3 insertions(+), 18 deletions(-) |
| |
| diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h |
| index c58df439dbbe..dfb674e03076 100644 |
| |
| |
| @@ -11,6 +11,7 @@ |
| #include <linux/skbuff.h> |
| #include <linux/ip.h> |
| #include <linux/ipv6.h> |
| +#include <net/ip_tunnels.h> |
| |
| struct wg_device; |
| struct wg_peer; |
| @@ -65,25 +66,9 @@ struct packet_cb { |
| #define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) |
| #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) |
| |
| -/* Returns either the correct skb->protocol value, or 0 if invalid. */ |
| -static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) |
| -{ |
| - if (skb_network_header(skb) >= skb->head && |
| - (skb_network_header(skb) + sizeof(struct iphdr)) <= |
| - skb_tail_pointer(skb) && |
| - ip_hdr(skb)->version == 4) |
| - return htons(ETH_P_IP); |
| - if (skb_network_header(skb) >= skb->head && |
| - (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= |
| - skb_tail_pointer(skb) && |
| - ipv6_hdr(skb)->version == 6) |
| - return htons(ETH_P_IPV6); |
| - return 0; |
| -} |
| - |
| static inline bool wg_check_packet_protocol(struct sk_buff *skb) |
| { |
| - __be16 real_protocol = wg_examine_packet_protocol(skb); |
| + __be16 real_protocol = ip_tunnel_parse_protocol(skb); |
| return real_protocol && skb->protocol == real_protocol; |
| } |
| |
| diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c |
| index 9b2ab6fc91cd..2c9551ea6dc7 100644 |
| |
| |
| @@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, |
| */ |
| skb->ip_summed = CHECKSUM_UNNECESSARY; |
| skb->csum_level = ~0; /* All levels */ |
| - skb->protocol = wg_examine_packet_protocol(skb); |
| + skb->protocol = ip_tunnel_parse_protocol(skb); |
| if (skb->protocol == htons(ETH_P_IP)) { |
| len = ntohs(ip_hdr(skb)->tot_len); |
| if (unlikely(len < sizeof(struct iphdr))) |
| -- |
| 2.18.2 |
| |