adenilson / rpms / zlib

Forked from rpms/zlib 8 months ago
Clone
65fa5e
From d1155b9ab9a2ef643ec82285d1fb767dcfd00d16 Mon Sep 17 00:00:00 2001
65fa5e
From: Ondrej Dubaj <odubaj@redhat.com>
65fa5e
Date: Thu, 1 Aug 2019 12:17:06 +0200
65fa5e
Subject: [PATCH] Optimized CRC32 for POWER 8+ architectures.
65fa5e
65fa5e
---
65fa5e
 Makefile.in                           |    8 +
65fa5e
 configure                             |   77 ++
65fa5e
 contrib/power8-crc/clang_workaround.h |   82 ++
65fa5e
 contrib/power8-crc/crc32_constants.h  | 1206 +++++++++++++++++++++++++
65fa5e
 contrib/power8-crc/vec_crc32.c        |  674 ++++++++++++++
65fa5e
 crc32.c                               |  100 +-
65fa5e
 6 files changed, 2135 insertions(+), 12 deletions(-)
65fa5e
 create mode 100644 contrib/power8-crc/clang_workaround.h
65fa5e
 create mode 100644 contrib/power8-crc/crc32_constants.h
65fa5e
 create mode 100644 contrib/power8-crc/vec_crc32.c
65fa5e
65fa5e
diff --git a/Makefile.in b/Makefile.in
65fa5e
index b7bdbf2..55f6489 100644
65fa5e
--- a/Makefile.in
65fa5e
+++ b/Makefile.in
65fa5e
@@ -167,6 +167,9 @@ minigzip64.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
65fa5e
 adler32.o: $(SRCDIR)adler32.c
65fa5e
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)adler32.c
65fa5e
 
65fa5e
+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
65fa5e
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
65fa5e
+
65fa5e
 crc32.o: $(SRCDIR)crc32.c
65fa5e
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
65fa5e
 
65fa5e
@@ -215,6 +218,11 @@ adler32.lo: $(SRCDIR)adler32.c
65fa5e
 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/adler32.o $(SRCDIR)adler32.c
65fa5e
 	-@mv objs/adler32.o $@
65fa5e
 
65fa5e
+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
65fa5e
+	-@mkdir objs 2>/dev/null || test -d objs
65fa5e
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
65fa5e
+	-@mv objs/crc32_power8.o $@
65fa5e
+
65fa5e
 crc32.lo: $(SRCDIR)crc32.c
65fa5e
 	-@mkdir objs 2>/dev/null || test -d objs
65fa5e
 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
65fa5e
diff --git a/configure b/configure
65fa5e
index cd9eeef..e93ff99 100755
65fa5e
--- a/configure
65fa5e
+++ b/configure
65fa5e
@@ -839,6 +839,83 @@ else
65fa5e
     echo "Checking for sys/sdt.h ... No." | tee -a configure.log
65fa5e
 fi
65fa5e
 
65fa5e
+# test to see if Power8+ implementation is compile time possible
65fa5e
+echo >> configure.log
65fa5e
+cat > $test.c <
65fa5e
+#if _ARCH_PWR8==1
65fa5e
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
65fa5e
+#error "Clang vector instructions aren't big endian compatible"
65fa5e
+#endif
65fa5e
+#if defined(__BUILTIN_CPU_SUPPORTS__)
65fa5e
+/* good and easy */
65fa5e
+#else
65fa5e
+#include <sys/auxv.h>
65fa5e
+#include <bits/hwcap.h>
65fa5e
+int main()
65fa5e
+{
65fa5e
+  return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
65fa5e
+}
65fa5e
+#endif
65fa5e
+#else
65fa5e
+#error No Power 8 or newer architecture, may need -mcpu=power8
65fa5e
+#endif
65fa5e
+EOF
65fa5e
+
65fa5e
+if tryboth $CC -c $CFLAGS $test.c; then
65fa5e
+  OBJC="$OBJC crc32_power8.o"
65fa5e
+  PIC_OBJC="$PIC_OBJC crc32_power8.lo"
65fa5e
+  echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
65fa5e
+else
65fa5e
+  echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
65fa5e
+fi
65fa5e
+
65fa5e
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
65fa5e
+echo >> configure.log
65fa5e
+cat > $test.c <
65fa5e
+static unsigned int crc32_real_optimized(unsigned int crc, unsigned char *p,
65fa5e
+                          unsigned long len)
65fa5e
+{
65fa5e
+        return 1;
65fa5e
+}
65fa5e
+static unsigned int (*(crc32_ifunc(void)))(unsigned int, unsigned char *,unsigned long)
65fa5e
+{
65fa5e
+  return crc32_real_optimized;
65fa5e
+}
65fa5e
+unsigned int crc32(unsigned int, unsigned char *,unsigned long)
65fa5e
+                    __attribute__ ((ifunc ("crc32_ifunc")));
65fa5e
+EOF
65fa5e
+
65fa5e
+if tryboth $CC -c $CFLAGS $test.c; then
65fa5e
+  SFLAGS="${SFLAGS} -DZ_IFUNC_NATIVE"
65fa5e
+  echo "Checking for attribute(ifunc) support... Yes." | tee -a configure.log
65fa5e
+else
65fa5e
+  echo "Checking for attribute(ifunc) support... No." | tee -a configure.log
65fa5e
+
65fa5e
+  # alternately can we can use a gnu indirection using __asm__ attributes to detect and load optimized code at runtime
65fa5e
+  echo >> configure.log
65fa5e
+  cat > $test.c <
65fa5e
+static unsigned int crc32_real_optimized(unsigned int crc, unsigned char *p,
65fa5e
+                          unsigned long len)
65fa5e
+{
65fa5e
+        return 1;
65fa5e
+}
65fa5e
+static unsigned int (*(crc32_ifunc(void)))(unsigned int, unsigned char *,unsigned long)
65fa5e
+       __asm__ ("crc32");
65fa5e
+static unsigned int (*(crc32_ifunc(void)))(unsigned int, unsigned char *,unsigned long)
65fa5e
+{
65fa5e
+  return crc32_real_optimized;
65fa5e
+}
65fa5e
+__asm__(".type crc32, %gnu_indirect_function");
65fa5e
+EOF
65fa5e
+
65fa5e
+  if tryboth $CC -c $CFLAGS $test.c; then
65fa5e
+    SFLAGS="${SFLAGS} -DZ_IFUNC_ASM"
65fa5e
+    echo "Checking for asm .type %gnu_indirect_function support... Yes." | tee -a configure.log
65fa5e
+  else
65fa5e
+    echo "Checking for asm .type %gnu_indirect_function support... No." | tee -a configure.log
65fa5e
+  fi
65fa5e
+fi
65fa5e
+
65fa5e
 # show the results in the log
65fa5e
 echo >> configure.log
65fa5e
 echo ALL = $ALL >> configure.log
65fa5e
diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
65fa5e
new file mode 100644
65fa5e
index 0000000..09c411b
65fa5e
--- /dev/null
65fa5e
+++ b/contrib/power8-crc/clang_workaround.h
65fa5e
@@ -0,0 +1,82 @@
65fa5e
+#ifndef CLANG_WORKAROUNDS_H
65fa5e
+#define CLANG_WORKAROUNDS_H
65fa5e
+
65fa5e
+/*
65fa5e
+ * These stubs fix clang incompatibilities with GCC builtins.
65fa5e
+ */
65fa5e
+
65fa5e
+#ifndef __builtin_crypto_vpmsumw
65fa5e
+#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
65fa5e
+#endif
65fa5e
+#ifndef __builtin_crypto_vpmsumd
65fa5e
+#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb
65fa5e
+#endif
65fa5e
+
65fa5e
+static inline
65fa5e
+__vector unsigned long long __attribute__((overloadable))
65fa5e
+vec_ld(int __a, const __vector unsigned long long* __b)
65fa5e
+{
65fa5e
+	return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b);
65fa5e
+}
65fa5e
+
65fa5e
+/*
65fa5e
+ * GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang
65fa5e
+ * does not recognize this type. On GCC this builtin is translated to a
65fa5e
+ * xxpermdi instruction that only moves the registers __a, __b instead generates
65fa5e
+ * a load.
65fa5e
+ *
65fa5e
+ * Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0.
65fa5e
+ */
65fa5e
+static inline
65fa5e
+__vector unsigned long long  __builtin_pack_vector (unsigned long __a,
65fa5e
+						    unsigned long __b)
65fa5e
+{
65fa5e
+	#if defined(__BIG_ENDIAN__)
65fa5e
+	__vector unsigned long long __v = {__a, __b};
65fa5e
+	#else
65fa5e
+	__vector unsigned long long __v = {__b, __a};
65fa5e
+	#endif
65fa5e
+	return __v;
65fa5e
+}
65fa5e
+
65fa5e
+#ifndef vec_xxpermdi
65fa5e
+
65fa5e
+static inline
65fa5e
+unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
65fa5e
+				       int __o)
65fa5e
+{
65fa5e
+	return __v[__o];
65fa5e
+}
65fa5e
+
65fa5e
+#if defined(__BIG_ENDIAN__)
65fa5e
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
65fa5e
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
65fa5e
+#else
65fa5e
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
65fa5e
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
65fa5e
+#endif
65fa5e
+
65fa5e
+#else
65fa5e
+
65fa5e
+static inline
65fa5e
+unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
65fa5e
+{
65fa5e
+	#if defined(__BIG_ENDIAN__)
65fa5e
+	return vec_xxpermdi(__v, __v, 0x0)[1];
65fa5e
+	#else
65fa5e
+	return vec_xxpermdi(__v, __v, 0x0)[0];
65fa5e
+	#endif
65fa5e
+}
65fa5e
+
65fa5e
+static inline
65fa5e
+unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
65fa5e
+{
65fa5e
+	#if defined(__BIG_ENDIAN__)
65fa5e
+	return vec_xxpermdi(__v, __v, 0x3)[1];
65fa5e
+	#else
65fa5e
+	return vec_xxpermdi(__v, __v, 0x3)[0];
65fa5e
+	#endif
65fa5e
+}
65fa5e
+#endif /* vec_xxpermdi */
65fa5e
+
65fa5e
+#endif
65fa5e
\ No newline at end of file
65fa5e
diff --git a/contrib/power8-crc/crc32_constants.h b/contrib/power8-crc/crc32_constants.h
65fa5e
new file mode 100644
65fa5e
index 0000000..58088dc
65fa5e
--- /dev/null
65fa5e
+++ b/contrib/power8-crc/crc32_constants.h
65fa5e
@@ -0,0 +1,1206 @@
65fa5e
+/*
65fa5e
+*
65fa5e
+* THIS FILE IS GENERATED WITH
65fa5e
+./crc32_constants -c -r -x 0x04C11DB7 
65fa5e
+
65fa5e
+* This is from https://github.com/antonblanchard/crc32-vpmsum/
65fa5e
+* DO NOT MODIFY IT MANUALLY!
65fa5e
+*
65fa5e
+*/
65fa5e
+
65fa5e
+#define CRC 0x4c11db7
65fa5e
+#define CRC_XOR
65fa5e
+#define REFLECT
65fa5e
+#define MAX_SIZE    32768
65fa5e
+
65fa5e
+#ifndef __ASSEMBLER__
65fa5e
+#ifdef CRC_TABLE
65fa5e
+static const unsigned int crc_table[] = {
65fa5e
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
65fa5e
+	0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
65fa5e
+	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
65fa5e
+	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
65fa5e
+	0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
65fa5e
+	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
65fa5e
+	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
65fa5e
+	0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
65fa5e
+	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
65fa5e
+	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
65fa5e
+	0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
65fa5e
+	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
65fa5e
+	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
65fa5e
+	0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
65fa5e
+	0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
65fa5e
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
65fa5e
+	0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
65fa5e
+	0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
65fa5e
+	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
65fa5e
+	0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
65fa5e
+	0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
65fa5e
+	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
65fa5e
+	0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
65fa5e
+	0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
65fa5e
+	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
65fa5e
+	0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
65fa5e
+	0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
65fa5e
+	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
65fa5e
+	0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
65fa5e
+	0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
65fa5e
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
65fa5e
+	0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
65fa5e
+	0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
65fa5e
+	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
65fa5e
+	0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
65fa5e
+	0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
65fa5e
+	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
65fa5e
+	0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
65fa5e
+	0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
65fa5e
+	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
65fa5e
+	0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
65fa5e
+	0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
65fa5e
+	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
65fa5e
+	0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
65fa5e
+	0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
65fa5e
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
65fa5e
+	0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
65fa5e
+	0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
65fa5e
+	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
65fa5e
+	0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
65fa5e
+	0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
65fa5e
+	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
65fa5e
+	0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
65fa5e
+	0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
65fa5e
+	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
65fa5e
+	0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
65fa5e
+	0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
65fa5e
+	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
65fa5e
+	0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
65fa5e
+	0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
65fa5e
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
65fa5e
+	0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
65fa5e
+	0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
65fa5e
+	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,};
65fa5e
+
65fa5e
+#endif /* CRC_TABLE */
65fa5e
+#ifdef POWER8_INTRINSICS
65fa5e
+
65fa5e
+/* Constants */
65fa5e
+
65fa5e
+/* Reduce 262144 kbits to 1024 bits */
65fa5e
+static const __vector unsigned long long vcrc_const[255]
65fa5e
+	__attribute__((aligned (16))) = {
65fa5e
+#ifdef __LITTLE_ENDIAN__
65fa5e
+		/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000099ea94a8, 0x00000001651797d2 },
65fa5e
+		/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000945a8420, 0x0000000021e0d56c },
65fa5e
+		/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000030762706, 0x000000000f95ecaa },
65fa5e
+		/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a52fc582, 0x00000001ebd224ac },
65fa5e
+		/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a4a7167a, 0x000000000ccb97ca },
65fa5e
+		/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000c18249a, 0x00000001006ec8a8 },
65fa5e
+		/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000a924ae7c, 0x000000014f58f196 },
65fa5e
+		/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e12ccc12, 0x00000001a7192ca6 },
65fa5e
+		/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000a0b9d4ac, 0x000000019a64bab2 },
65fa5e
+		/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000095e8ddfe, 0x0000000014f4ed2e },
65fa5e
+		/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000233fddc4, 0x000000011092b6a2 },
65fa5e
+		/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b4529b62, 0x00000000c8a1629c },
65fa5e
+		/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a7fa0e64, 0x000000017bf32e8e },
65fa5e
+		/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b5334592, 0x00000001f8cc6582 },
65fa5e
+		/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011f8ee1b4, 0x000000008631ddf0 },
65fa5e
+		/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006252e632, 0x000000007e5a76d0 },
65fa5e
+		/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ab973e84, 0x000000002b09b31c },
65fa5e
+		/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007734f5ec, 0x00000001b2df1f84 },
65fa5e
+		/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007c547798, 0x00000001d6f56afc },
65fa5e
+		/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007ec40210, 0x00000001b9b5e70c },
65fa5e
+		/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ab1695a8, 0x0000000034b626d2 },
65fa5e
+		/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000090494bba, 0x000000014c53479a },
65fa5e
+		/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001123fb816, 0x00000001a6d179a4 },
65fa5e
+		/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e188c74c, 0x000000015abd16b4 },
65fa5e
+		/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c2d3451c, 0x00000000018f9852 },
65fa5e
+		/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000f55cf1ca, 0x000000001fb3084a },
65fa5e
+		/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a0531540, 0x00000000c53dfb04 },
65fa5e
+		/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000132cd7ebc, 0x00000000e10c9ad6 },
65fa5e
+		/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000073ab7f36, 0x0000000025aa994a },
65fa5e
+		/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000041aed1c2, 0x00000000fa3a74c4 },
65fa5e
+		/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000136c53800, 0x0000000033eb3f40 },
65fa5e
+		/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000126835a30, 0x000000017193f296 },
65fa5e
+		/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006241b502, 0x0000000043f6c86a },
65fa5e
+		/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000d5196ad4, 0x000000016b513ec6 },
65fa5e
+		/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009cfa769a, 0x00000000c8f25b4e },
65fa5e
+		/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000920e5df4, 0x00000001a45048ec },
65fa5e
+		/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000169dc310e, 0x000000000c441004 },
65fa5e
+		/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000009fc331c, 0x000000000e17cad6 },
65fa5e
+		/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010d94a81e, 0x00000001253ae964 },
65fa5e
+		/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000027a20ab2, 0x00000001d7c88ebc },
65fa5e
+		/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000114f87504, 0x00000001e7ca913a },
65fa5e
+		/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004b076d96, 0x0000000033ed078a },
65fa5e
+		/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000da4d1e74, 0x00000000e1839c78 },
65fa5e
+		/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001b81f672, 0x00000001322b267e },
65fa5e
+		/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009367c988, 0x00000000638231b6 },
65fa5e
+		/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001717214ca, 0x00000001ee7f16f4 },
65fa5e
+		/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009f47d820, 0x0000000117d9924a },
65fa5e
+		/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010d9a47d2, 0x00000000e1a9e0c4 },
65fa5e
+		/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000a696c58c, 0x00000001403731dc },
65fa5e
+		/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002aa28ec6, 0x00000001a5ea9682 },
65fa5e
+		/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001fe18fd9a, 0x0000000101c5c578 },
65fa5e
+		/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019d4fc1ae, 0x00000000dddf6494 },
65fa5e
+		/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ba0e3dea, 0x00000000f1c3db28 },
65fa5e
+		/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000074b59a5e, 0x000000013112fb9c },
65fa5e
+		/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000f2b5ea98, 0x00000000b680b906 },
65fa5e
+		/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000187132676, 0x000000001a282932 },
65fa5e
+		/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010a8c6ad4, 0x0000000089406e7e },
65fa5e
+		/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e21dfe70, 0x00000001def6be8c },
65fa5e
+		/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001da0050e4, 0x0000000075258728 },
65fa5e
+		/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000772172ae, 0x000000019536090a },
65fa5e
+		/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e47724aa, 0x00000000f2455bfc },
65fa5e
+		/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
65fa5e
+		{ 0x000000003cd63ac4, 0x000000018c40baf4 },
65fa5e
+		/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001bf47d352, 0x000000004cd390d4 },
65fa5e
+		/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
65fa5e
+		{ 0x000000018dc1d708, 0x00000001e4ece95a },
65fa5e
+		/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002d4620a4, 0x000000001a3ee918 },
65fa5e
+		/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000058fd1740, 0x000000007c652fb8 },
65fa5e
+		/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000dadd9bfc, 0x000000011c67842c },
65fa5e
+		/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ea2140be, 0x00000000254f759c },
65fa5e
+		/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009de128ba, 0x000000007ece94ca },
65fa5e
+		/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013ac3aa8e, 0x0000000038f258c2 },
65fa5e
+		/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000099980562, 0x00000001cdf17b00 },
65fa5e
+		/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c1579c86, 0x000000011f882c16 },
65fa5e
+		/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000068dbbf94, 0x0000000100093fc8 },
65fa5e
+		/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004509fb04, 0x00000001cd684f16 },
65fa5e
+		/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001202f6398, 0x000000004bc6a70a },
65fa5e
+		/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013aea243e, 0x000000004fc7e8e4 },
65fa5e
+		/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b4052ae6, 0x0000000130103f1c },
65fa5e
+		/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001cd2a0ae8, 0x0000000111b0024c },
65fa5e
+		/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001fe4aa8b4, 0x000000010b3079da },
65fa5e
+		/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d1559a42, 0x000000010192bcc2 },
65fa5e
+		/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f3e05ecc, 0x0000000074838d50 },
65fa5e
+		/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000104ddd2cc, 0x000000001b20f520 },
65fa5e
+		/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015393153c, 0x0000000050c3590a },
65fa5e
+		/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000057e942c6, 0x00000000b41cac8e },
65fa5e
+		/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
65fa5e
+		{ 0x000000012c633850, 0x000000000c72cc78 },
65fa5e
+		/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ebcaae4c, 0x0000000030cdb032 },
65fa5e
+		/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013ee532a6, 0x000000013e09fc32 },
65fa5e
+		/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001bf0cbc7e, 0x000000001ed624d2 },
65fa5e
+		/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000d50b7a5a, 0x00000000781aee1a },
65fa5e
+		/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000002fca6e8, 0x00000001c4d8348c },
65fa5e
+		/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007af40044, 0x0000000057a40336 },
65fa5e
+		/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000016178744, 0x0000000085544940 },
65fa5e
+		/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
65fa5e
+		{ 0x000000014c177458, 0x000000019cd21e80 },
65fa5e
+		/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011b6ddf04, 0x000000013eb95bc0 },
65fa5e
+		/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f3e29ccc, 0x00000001dfc9fdfc },
65fa5e
+		/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000135ae7562, 0x00000000cd028bc2 },
65fa5e
+		/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000190ef812c, 0x0000000090db8c44 },
65fa5e
+		/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000067a2c786, 0x000000010010a4ce },
65fa5e
+		/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000048b9496c, 0x00000001c8f4c72c },
65fa5e
+		/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015a422de6, 0x000000001c26170c },
65fa5e
+		/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ef0e3640, 0x00000000e3fccf68 },
65fa5e
+		/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001006d2d26, 0x00000000d513ed24 },
65fa5e
+		/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001170d56d6, 0x00000000141beada },
65fa5e
+		/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000a5fb613c, 0x000000011071aea0 },
65fa5e
+		/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000040bbf7fc, 0x000000012e19080a },
65fa5e
+		/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016ac3a5b2, 0x0000000100ecf826 },
65fa5e
+		/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000abf16230, 0x0000000069b09412 },
65fa5e
+		/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ebe23fac, 0x0000000122297bac },
65fa5e
+		/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
65fa5e
+		{ 0x000000008b6a0894, 0x00000000e9e4b068 },
65fa5e
+		/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001288ea478, 0x000000004b38651a },
65fa5e
+		/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016619c442, 0x00000001468360e2 },
65fa5e
+		/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000086230038, 0x00000000121c2408 },
65fa5e
+		/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017746a756, 0x00000000da7e7d08 },
65fa5e
+		/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000191b8f8f8, 0x00000001058d7652 },
65fa5e
+		/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
65fa5e
+		{ 0x000000008e167708, 0x000000014a098a90 },
65fa5e
+		/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000148b22d54, 0x0000000020dbe72e },
65fa5e
+		/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000044ba2c3c, 0x000000011e7323e8 },
65fa5e
+		/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b54d2b52, 0x00000000d5d4bf94 },
65fa5e
+		/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000005a4fd8a, 0x0000000199d8746c },
65fa5e
+		/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000139f9fc46, 0x00000000ce9ca8a0 },
65fa5e
+		/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015a1fa824, 0x00000000136edece },
65fa5e
+		/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000a61ae4c, 0x000000019b92a068 },
65fa5e
+		/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000145e9113e, 0x0000000071d62206 },
65fa5e
+		/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006a348448, 0x00000000dfc50158 },
65fa5e
+		/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004d80a08c, 0x00000001517626bc },
65fa5e
+		/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
65fa5e
+		{ 0x000000014b6837a0, 0x0000000148d1e4fa },
65fa5e
+		/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016896a7fc, 0x0000000094d8266e },
65fa5e
+		/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
65fa5e
+		{ 0x000000014f187140, 0x00000000606c5e34 },
65fa5e
+		/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019581b9da, 0x000000019766beaa },
65fa5e
+		/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001091bc984, 0x00000001d80c506c },
65fa5e
+		/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001067223c, 0x000000001e73837c },
65fa5e
+		/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ab16ea02, 0x0000000064d587de },
65fa5e
+		/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013c4598a8, 0x00000000f4a507b0 },
65fa5e
+		/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b3735430, 0x0000000040e342fc },
65fa5e
+		/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001bb3fc0c0, 0x00000001d5ad9c3a },
65fa5e
+		/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001570ae19c, 0x0000000094a691a4 },
65fa5e
+		/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ea910712, 0x00000001271ecdfa },
65fa5e
+		/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000167127128, 0x000000009e54475a },
65fa5e
+		/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000019e790a2, 0x00000000c9c099ee },
65fa5e
+		/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
65fa5e
+		{ 0x000000003788f710, 0x000000009a2f736c },
65fa5e
+		/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001682a160e, 0x00000000bb9f4996 },
65fa5e
+		/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007f0ebd2e, 0x00000001db688050 },
65fa5e
+		/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002b032080, 0x00000000e9b10af4 },
65fa5e
+		/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000cfd1664a, 0x000000012d4545e4 },
65fa5e
+		/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000aa1181c2, 0x000000000361139c },
65fa5e
+		/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ddd08002, 0x00000001a5a1a3a8 },
65fa5e
+		/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e8dd0446, 0x000000006844e0b0 },
65fa5e
+		/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001bbd94a00, 0x00000000c3762f28 },
65fa5e
+		/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ab6cd180, 0x00000001d26287a2 },
65fa5e
+		/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000031803ce2, 0x00000001f6f0bba8 },
65fa5e
+		/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000024f40b0c, 0x000000002ffabd62 },
65fa5e
+		/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ba1d9834, 0x00000000fb4516b8 },
65fa5e
+		/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000104de61aa, 0x000000018cfa961c },
65fa5e
+		/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000113e40d46, 0x000000019e588d52 },
65fa5e
+		/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001415598a0, 0x00000001180f0bbc },
65fa5e
+		/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000bf6c8c90, 0x00000000e1d9177a },
65fa5e
+		/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001788b0504, 0x0000000105abc27c },
65fa5e
+		/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000038385d02, 0x00000000972e4a58 },
65fa5e
+		/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b6c83844, 0x0000000183499a5e },
65fa5e
+		/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000051061a8a, 0x00000001c96a8cca },
65fa5e
+		/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017351388a, 0x00000001a1a5b60c },
65fa5e
+		/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000132928f92, 0x00000000e4b6ac9c },
65fa5e
+		/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e6b4f48a, 0x00000001807e7f5a },
65fa5e
+		/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000039d15e90, 0x000000017a7e3bc8 },
65fa5e
+		/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000312d6074, 0x00000000d73975da },
65fa5e
+		/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017bbb2cc4, 0x000000017375d038 },
65fa5e
+		/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016ded3e18, 0x00000000193680bc },
65fa5e
+		/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000f1638b16, 0x00000000999b06f6 },
65fa5e
+		/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d38b9ecc, 0x00000001f685d2b8 },
65fa5e
+		/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
65fa5e
+		{ 0x000000018b8d09dc, 0x00000001f4ecbed2 },
65fa5e
+		/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e7bc27d2, 0x00000000ba16f1a0 },
65fa5e
+		/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000275e1e96, 0x0000000115aceac4 },
65fa5e
+		/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e2e3031e, 0x00000001aeff6292 },
65fa5e
+		/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001041c84d8, 0x000000009640124c },
65fa5e
+		/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000706ce672, 0x0000000114f41f02 },
65fa5e
+		/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015d5070da, 0x000000009c5f3586 },
65fa5e
+		/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000038f9493a, 0x00000001878275fa },
65fa5e
+		/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000a3348a76, 0x00000000ddc42ce8 },
65fa5e
+		/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ad0aab92, 0x0000000181d2c73a },
65fa5e
+		/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019e85f712, 0x0000000141c9320a },
65fa5e
+		/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
65fa5e
+		{ 0x000000005a871e76, 0x000000015235719a },
65fa5e
+		/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017249c662, 0x00000000be27d804 },
65fa5e
+		/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
65fa5e
+		{ 0x000000003a084712, 0x000000006242d45a },
65fa5e
+		/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ed438478, 0x000000009a53638e },
65fa5e
+		/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000abac34cc, 0x00000001001ecfb6 },
65fa5e
+		/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
65fa5e
+		{ 0x000000005f35ef3e, 0x000000016d7c2d64 },
65fa5e
+		/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000047d6608c, 0x00000001d0ce46c0 },
65fa5e
+		/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002d01470e, 0x0000000124c907b4 },
65fa5e
+		/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000158bbc7b0, 0x0000000018a555ca },
65fa5e
+		/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c0a23e8e, 0x000000006b0980bc },
65fa5e
+		/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ebd85c88, 0x000000008bbba964 },
65fa5e
+		/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019ee20bb2, 0x00000001070a5a1e },
65fa5e
+		/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001acabf2d6, 0x000000002204322a },
65fa5e
+		/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b7963d56, 0x00000000a27524d0 },
65fa5e
+		/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017bffa1fe, 0x0000000020b1e4ba },
65fa5e
+		/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001f15333e, 0x0000000032cc27fc },
65fa5e
+		/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
65fa5e
+		{ 0x000000018593129e, 0x0000000044dd22b8 },
65fa5e
+		/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019cb32602, 0x00000000dffc9e0a },
65fa5e
+		/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000142b05cc8, 0x00000001b7a0ed14 },
65fa5e
+		/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001be49e7a4, 0x00000000c7842488 },
65fa5e
+		/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000108f69d6c, 0x00000001c02a4fee },
65fa5e
+		/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006c0971f0, 0x000000003c273778 },
65fa5e
+		/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
65fa5e
+		{ 0x000000005b16467a, 0x00000001d63f8894 },
65fa5e
+		/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001551a628e, 0x000000006be557d6 },
65fa5e
+		/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019e42ea92, 0x000000006a7806ea },
65fa5e
+		/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
65fa5e
+		{ 0x000000012fa83ff2, 0x000000016155aa0c },
65fa5e
+		/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011ca9cde0, 0x00000000908650ac },
65fa5e
+		/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c8e5cd74, 0x00000000aa5a8084 },
65fa5e
+		/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000096c27f0c, 0x0000000191bb500a },
65fa5e
+		/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002baed926, 0x0000000064e9bed0 },
65fa5e
+		/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017c8de8d2, 0x000000009444f302 },
65fa5e
+		/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000d43d6068, 0x000000019db07d3c },
65fa5e
+		/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000cb2c4b26, 0x00000001359e3e6e },
65fa5e
+		/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000145b8da26, 0x00000001e4f10dd2 },
65fa5e
+		/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
65fa5e
+		{ 0x000000018fff4b08, 0x0000000124f5735e },
65fa5e
+		/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000150b58ed0, 0x0000000124760a4c },
65fa5e
+		/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001549f39bc, 0x000000000f1fc186 },
65fa5e
+		/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ef4d2f42, 0x00000000150e4cc4 },
65fa5e
+		/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b1468572, 0x000000002a6204e8 },
65fa5e
+		/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013d7403b2, 0x00000000beb1d432 },
65fa5e
+		/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a4681842, 0x0000000135f3f1f0 },
65fa5e
+		/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000167714492, 0x0000000074fe2232 },
65fa5e
+		/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e599099a, 0x000000001ac6e2ba },
65fa5e
+		/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000fe128194, 0x0000000013fca91e },
65fa5e
+		/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000077e8b990, 0x0000000183f4931e },
65fa5e
+		/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a267f63a, 0x00000000b6d9b4e4 },
65fa5e
+		/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001945c245a, 0x00000000b5188656 },
65fa5e
+		/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000149002e76, 0x0000000027a81a84 },
65fa5e
+		/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001bb8310a4, 0x0000000125699258 },
65fa5e
+		/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019ec60bcc, 0x00000001b23de796 },
65fa5e
+		/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
65fa5e
+		{ 0x000000012d8590ae, 0x00000000fe4365dc },
65fa5e
+		/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000065b00684, 0x00000000c68f497a },
65fa5e
+		/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015e5aeadc, 0x00000000fbf521ee },
65fa5e
+		/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b77ff2b0, 0x000000015eac3378 },
65fa5e
+		/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000188da2ff6, 0x0000000134914b90 },
65fa5e
+		/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000063da929a, 0x0000000016335cfe },
65fa5e
+		/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001389caa80, 0x000000010372d10c },
65fa5e
+		/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013db599d2, 0x000000015097b908 },
65fa5e
+		/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000122505a86, 0x00000001227a7572 },
65fa5e
+		/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016bd72746, 0x000000009a8f75c0 },
65fa5e
+		/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c3faf1d4, 0x00000000682c77a2 },
65fa5e
+		/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001111c826c, 0x00000000231f091c },
65fa5e
+		/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000153e9fb2, 0x000000007d4439f2 },
65fa5e
+		/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002b1f7b60, 0x000000017e221efc },
65fa5e
+		/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b1dba570, 0x0000000167457c38 },
65fa5e
+		/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f6397b76, 0x00000000bdf081c4 },
65fa5e
+		/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000156335214, 0x000000016286d6b0 },
65fa5e
+		/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d70e3986, 0x00000000c84f001c },
65fa5e
+		/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
65fa5e
+		{ 0x000000003701a774, 0x0000000064efe7c0 },
65fa5e
+		/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ac81ef72, 0x000000000ac2d904 },
65fa5e
+		/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000133212464, 0x00000000fd226d14 },
65fa5e
+		/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e4e45610, 0x000000011cfd42e0 },
65fa5e
+		/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000c1bd370, 0x000000016e5a5678 },
65fa5e
+		/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a7b9e7a6, 0x00000001d888fe22 },
65fa5e
+		/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007d657a10, 0x00000001af77fcd4 }
65fa5e
+#else /* __LITTLE_ENDIAN__ */
65fa5e
+		/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001651797d2, 0x0000000099ea94a8 },
65fa5e
+		/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000021e0d56c, 0x00000000945a8420 },
65fa5e
+		/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000f95ecaa, 0x0000000030762706 },
65fa5e
+		/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ebd224ac, 0x00000001a52fc582 },
65fa5e
+		/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000ccb97ca, 0x00000001a4a7167a },
65fa5e
+		/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001006ec8a8, 0x000000000c18249a },
65fa5e
+		/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
65fa5e
+		{ 0x000000014f58f196, 0x00000000a924ae7c },
65fa5e
+		/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a7192ca6, 0x00000001e12ccc12 },
65fa5e
+		/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019a64bab2, 0x00000000a0b9d4ac },
65fa5e
+		/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000014f4ed2e, 0x0000000095e8ddfe },
65fa5e
+		/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011092b6a2, 0x00000000233fddc4 },
65fa5e
+		/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c8a1629c, 0x00000001b4529b62 },
65fa5e
+		/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017bf32e8e, 0x00000001a7fa0e64 },
65fa5e
+		/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f8cc6582, 0x00000001b5334592 },
65fa5e
+		/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
65fa5e
+		{ 0x000000008631ddf0, 0x000000011f8ee1b4 },
65fa5e
+		/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007e5a76d0, 0x000000006252e632 },
65fa5e
+		/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002b09b31c, 0x00000000ab973e84 },
65fa5e
+		/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b2df1f84, 0x000000007734f5ec },
65fa5e
+		/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d6f56afc, 0x000000007c547798 },
65fa5e
+		/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b9b5e70c, 0x000000007ec40210 },
65fa5e
+		/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000034b626d2, 0x00000001ab1695a8 },
65fa5e
+		/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
65fa5e
+		{ 0x000000014c53479a, 0x0000000090494bba },
65fa5e
+		/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a6d179a4, 0x00000001123fb816 },
65fa5e
+		/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015abd16b4, 0x00000001e188c74c },
65fa5e
+		/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000018f9852, 0x00000001c2d3451c },
65fa5e
+		/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001fb3084a, 0x00000000f55cf1ca },
65fa5e
+		/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c53dfb04, 0x00000001a0531540 },
65fa5e
+		/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e10c9ad6, 0x0000000132cd7ebc },
65fa5e
+		/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000025aa994a, 0x0000000073ab7f36 },
65fa5e
+		/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000fa3a74c4, 0x0000000041aed1c2 },
65fa5e
+		/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000033eb3f40, 0x0000000136c53800 },
65fa5e
+		/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017193f296, 0x0000000126835a30 },
65fa5e
+		/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000043f6c86a, 0x000000006241b502 },
65fa5e
+		/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016b513ec6, 0x00000000d5196ad4 },
65fa5e
+		/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c8f25b4e, 0x000000009cfa769a },
65fa5e
+		/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a45048ec, 0x00000000920e5df4 },
65fa5e
+		/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000c441004, 0x0000000169dc310e },
65fa5e
+		/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000e17cad6, 0x0000000009fc331c },
65fa5e
+		/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001253ae964, 0x000000010d94a81e },
65fa5e
+		/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d7c88ebc, 0x0000000027a20ab2 },
65fa5e
+		/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e7ca913a, 0x0000000114f87504 },
65fa5e
+		/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000033ed078a, 0x000000004b076d96 },
65fa5e
+		/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e1839c78, 0x00000000da4d1e74 },
65fa5e
+		/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001322b267e, 0x000000001b81f672 },
65fa5e
+		/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000638231b6, 0x000000009367c988 },
65fa5e
+		/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001ee7f16f4, 0x00000001717214ca },
65fa5e
+		/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000117d9924a, 0x000000009f47d820 },
65fa5e
+		/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e1a9e0c4, 0x000000010d9a47d2 },
65fa5e
+		/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001403731dc, 0x00000000a696c58c },
65fa5e
+		/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a5ea9682, 0x000000002aa28ec6 },
65fa5e
+		/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000101c5c578, 0x00000001fe18fd9a },
65fa5e
+		/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000dddf6494, 0x000000019d4fc1ae },
65fa5e
+		/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000f1c3db28, 0x00000001ba0e3dea },
65fa5e
+		/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013112fb9c, 0x0000000074b59a5e },
65fa5e
+		/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b680b906, 0x00000000f2b5ea98 },
65fa5e
+		/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001a282932, 0x0000000187132676 },
65fa5e
+		/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000089406e7e, 0x000000010a8c6ad4 },
65fa5e
+		/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001def6be8c, 0x00000001e21dfe70 },
65fa5e
+		/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000075258728, 0x00000001da0050e4 },
65fa5e
+		/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019536090a, 0x00000000772172ae },
65fa5e
+		/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000f2455bfc, 0x00000000e47724aa },
65fa5e
+		/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
65fa5e
+		{ 0x000000018c40baf4, 0x000000003cd63ac4 },
65fa5e
+		/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004cd390d4, 0x00000001bf47d352 },
65fa5e
+		/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e4ece95a, 0x000000018dc1d708 },
65fa5e
+		/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001a3ee918, 0x000000002d4620a4 },
65fa5e
+		/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007c652fb8, 0x0000000058fd1740 },
65fa5e
+		/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011c67842c, 0x00000000dadd9bfc },
65fa5e
+		/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000254f759c, 0x00000001ea2140be },
65fa5e
+		/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007ece94ca, 0x000000009de128ba },
65fa5e
+		/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000038f258c2, 0x000000013ac3aa8e },
65fa5e
+		/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001cdf17b00, 0x0000000099980562 },
65fa5e
+		/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011f882c16, 0x00000001c1579c86 },
65fa5e
+		/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000100093fc8, 0x0000000068dbbf94 },
65fa5e
+		/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001cd684f16, 0x000000004509fb04 },
65fa5e
+		/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004bc6a70a, 0x00000001202f6398 },
65fa5e
+		/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004fc7e8e4, 0x000000013aea243e },
65fa5e
+		/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000130103f1c, 0x00000001b4052ae6 },
65fa5e
+		/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000111b0024c, 0x00000001cd2a0ae8 },
65fa5e
+		/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010b3079da, 0x00000001fe4aa8b4 },
65fa5e
+		/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010192bcc2, 0x00000001d1559a42 },
65fa5e
+		/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000074838d50, 0x00000001f3e05ecc },
65fa5e
+		/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001b20f520, 0x0000000104ddd2cc },
65fa5e
+		/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000050c3590a, 0x000000015393153c },
65fa5e
+		/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b41cac8e, 0x0000000057e942c6 },
65fa5e
+		/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000c72cc78, 0x000000012c633850 },
65fa5e
+		/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000030cdb032, 0x00000000ebcaae4c },
65fa5e
+		/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013e09fc32, 0x000000013ee532a6 },
65fa5e
+		/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001ed624d2, 0x00000001bf0cbc7e },
65fa5e
+		/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000781aee1a, 0x00000000d50b7a5a },
65fa5e
+		/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c4d8348c, 0x0000000002fca6e8 },
65fa5e
+		/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000057a40336, 0x000000007af40044 },
65fa5e
+		/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000085544940, 0x0000000016178744 },
65fa5e
+		/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019cd21e80, 0x000000014c177458 },
65fa5e
+		/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
65fa5e
+		{ 0x000000013eb95bc0, 0x000000011b6ddf04 },
65fa5e
+		/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001dfc9fdfc, 0x00000001f3e29ccc },
65fa5e
+		/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000cd028bc2, 0x0000000135ae7562 },
65fa5e
+		/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000090db8c44, 0x0000000190ef812c },
65fa5e
+		/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010010a4ce, 0x0000000067a2c786 },
65fa5e
+		/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c8f4c72c, 0x0000000048b9496c },
65fa5e
+		/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001c26170c, 0x000000015a422de6 },
65fa5e
+		/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e3fccf68, 0x00000001ef0e3640 },
65fa5e
+		/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000d513ed24, 0x00000001006d2d26 },
65fa5e
+		/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000141beada, 0x00000001170d56d6 },
65fa5e
+		/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011071aea0, 0x00000000a5fb613c },
65fa5e
+		/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
65fa5e
+		{ 0x000000012e19080a, 0x0000000040bbf7fc },
65fa5e
+		/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000100ecf826, 0x000000016ac3a5b2 },
65fa5e
+		/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000069b09412, 0x00000000abf16230 },
65fa5e
+		/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000122297bac, 0x00000001ebe23fac },
65fa5e
+		/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e9e4b068, 0x000000008b6a0894 },
65fa5e
+		/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
65fa5e
+		{ 0x000000004b38651a, 0x00000001288ea478 },
65fa5e
+		/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001468360e2, 0x000000016619c442 },
65fa5e
+		/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000121c2408, 0x0000000086230038 },
65fa5e
+		/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000da7e7d08, 0x000000017746a756 },
65fa5e
+		/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001058d7652, 0x0000000191b8f8f8 },
65fa5e
+		/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
65fa5e
+		{ 0x000000014a098a90, 0x000000008e167708 },
65fa5e
+		/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000020dbe72e, 0x0000000148b22d54 },
65fa5e
+		/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011e7323e8, 0x0000000044ba2c3c },
65fa5e
+		/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000d5d4bf94, 0x00000000b54d2b52 },
65fa5e
+		/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000199d8746c, 0x0000000005a4fd8a },
65fa5e
+		/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ce9ca8a0, 0x0000000139f9fc46 },
65fa5e
+		/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000136edece, 0x000000015a1fa824 },
65fa5e
+		/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019b92a068, 0x000000000a61ae4c },
65fa5e
+		/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000071d62206, 0x0000000145e9113e },
65fa5e
+		/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000dfc50158, 0x000000006a348448 },
65fa5e
+		/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001517626bc, 0x000000004d80a08c },
65fa5e
+		/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000148d1e4fa, 0x000000014b6837a0 },
65fa5e
+		/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000094d8266e, 0x000000016896a7fc },
65fa5e
+		/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000606c5e34, 0x000000014f187140 },
65fa5e
+		/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019766beaa, 0x000000019581b9da },
65fa5e
+		/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d80c506c, 0x00000001091bc984 },
65fa5e
+		/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001e73837c, 0x000000001067223c },
65fa5e
+		/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000064d587de, 0x00000001ab16ea02 },
65fa5e
+		/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000f4a507b0, 0x000000013c4598a8 },
65fa5e
+		/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000040e342fc, 0x00000000b3735430 },
65fa5e
+		/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 },
65fa5e
+		/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000094a691a4, 0x00000001570ae19c },
65fa5e
+		/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001271ecdfa, 0x00000001ea910712 },
65fa5e
+		/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009e54475a, 0x0000000167127128 },
65fa5e
+		/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c9c099ee, 0x0000000019e790a2 },
65fa5e
+		/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009a2f736c, 0x000000003788f710 },
65fa5e
+		/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000bb9f4996, 0x00000001682a160e },
65fa5e
+		/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001db688050, 0x000000007f0ebd2e },
65fa5e
+		/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e9b10af4, 0x000000002b032080 },
65fa5e
+		/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
65fa5e
+		{ 0x000000012d4545e4, 0x00000000cfd1664a },
65fa5e
+		/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000361139c, 0x00000000aa1181c2 },
65fa5e
+		/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a5a1a3a8, 0x00000000ddd08002 },
65fa5e
+		/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006844e0b0, 0x00000000e8dd0446 },
65fa5e
+		/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c3762f28, 0x00000001bbd94a00 },
65fa5e
+		/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d26287a2, 0x00000000ab6cd180 },
65fa5e
+		/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f6f0bba8, 0x0000000031803ce2 },
65fa5e
+		/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002ffabd62, 0x0000000024f40b0c },
65fa5e
+		/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000fb4516b8, 0x00000001ba1d9834 },
65fa5e
+		/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
65fa5e
+		{ 0x000000018cfa961c, 0x0000000104de61aa },
65fa5e
+		/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019e588d52, 0x0000000113e40d46 },
65fa5e
+		/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001180f0bbc, 0x00000001415598a0 },
65fa5e
+		/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e1d9177a, 0x00000000bf6c8c90 },
65fa5e
+		/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000105abc27c, 0x00000001788b0504 },
65fa5e
+		/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000972e4a58, 0x0000000038385d02 },
65fa5e
+		/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000183499a5e, 0x00000001b6c83844 },
65fa5e
+		/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c96a8cca, 0x0000000051061a8a },
65fa5e
+		/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001a1a5b60c, 0x000000017351388a },
65fa5e
+		/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000e4b6ac9c, 0x0000000132928f92 },
65fa5e
+		/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001807e7f5a, 0x00000000e6b4f48a },
65fa5e
+		/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017a7e3bc8, 0x0000000039d15e90 },
65fa5e
+		/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000d73975da, 0x00000000312d6074 },
65fa5e
+		/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017375d038, 0x000000017bbb2cc4 },
65fa5e
+		/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000193680bc, 0x000000016ded3e18 },
65fa5e
+		/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000999b06f6, 0x00000000f1638b16 },
65fa5e
+		/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f685d2b8, 0x00000001d38b9ecc },
65fa5e
+		/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001f4ecbed2, 0x000000018b8d09dc },
65fa5e
+		/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ba16f1a0, 0x00000000e7bc27d2 },
65fa5e
+		/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000115aceac4, 0x00000000275e1e96 },
65fa5e
+		/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001aeff6292, 0x00000000e2e3031e },
65fa5e
+		/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009640124c, 0x00000001041c84d8 },
65fa5e
+		/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000114f41f02, 0x00000000706ce672 },
65fa5e
+		/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009c5f3586, 0x000000015d5070da },
65fa5e
+		/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001878275fa, 0x0000000038f9493a },
65fa5e
+		/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000ddc42ce8, 0x00000000a3348a76 },
65fa5e
+		/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000181d2c73a, 0x00000001ad0aab92 },
65fa5e
+		/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000141c9320a, 0x000000019e85f712 },
65fa5e
+		/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015235719a, 0x000000005a871e76 },
65fa5e
+		/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000be27d804, 0x000000017249c662 },
65fa5e
+		/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006242d45a, 0x000000003a084712 },
65fa5e
+		/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009a53638e, 0x00000000ed438478 },
65fa5e
+		/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001001ecfb6, 0x00000000abac34cc },
65fa5e
+		/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016d7c2d64, 0x000000005f35ef3e },
65fa5e
+		/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d0ce46c0, 0x0000000047d6608c },
65fa5e
+		/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000124c907b4, 0x000000002d01470e },
65fa5e
+		/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000018a555ca, 0x0000000158bbc7b0 },
65fa5e
+		/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006b0980bc, 0x00000000c0a23e8e },
65fa5e
+		/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
65fa5e
+		{ 0x000000008bbba964, 0x00000001ebd85c88 },
65fa5e
+		/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001070a5a1e, 0x000000019ee20bb2 },
65fa5e
+		/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002204322a, 0x00000001acabf2d6 },
65fa5e
+		/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000a27524d0, 0x00000001b7963d56 },
65fa5e
+		/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000020b1e4ba, 0x000000017bffa1fe },
65fa5e
+		/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000032cc27fc, 0x000000001f15333e },
65fa5e
+		/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000044dd22b8, 0x000000018593129e },
65fa5e
+		/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000dffc9e0a, 0x000000019cb32602 },
65fa5e
+		/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b7a0ed14, 0x0000000142b05cc8 },
65fa5e
+		/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c7842488, 0x00000001be49e7a4 },
65fa5e
+		/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001c02a4fee, 0x0000000108f69d6c },
65fa5e
+		/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
65fa5e
+		{ 0x000000003c273778, 0x000000006c0971f0 },
65fa5e
+		/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d63f8894, 0x000000005b16467a },
65fa5e
+		/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006be557d6, 0x00000001551a628e },
65fa5e
+		/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
65fa5e
+		{ 0x000000006a7806ea, 0x000000019e42ea92 },
65fa5e
+		/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016155aa0c, 0x000000012fa83ff2 },
65fa5e
+		/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000908650ac, 0x000000011ca9cde0 },
65fa5e
+		/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000aa5a8084, 0x00000000c8e5cd74 },
65fa5e
+		/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000191bb500a, 0x0000000096c27f0c },
65fa5e
+		/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000064e9bed0, 0x000000002baed926 },
65fa5e
+		/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009444f302, 0x000000017c8de8d2 },
65fa5e
+		/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
65fa5e
+		{ 0x000000019db07d3c, 0x00000000d43d6068 },
65fa5e
+		/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001359e3e6e, 0x00000000cb2c4b26 },
65fa5e
+		/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001e4f10dd2, 0x0000000145b8da26 },
65fa5e
+		/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000124f5735e, 0x000000018fff4b08 },
65fa5e
+		/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000124760a4c, 0x0000000150b58ed0 },
65fa5e
+		/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000f1fc186, 0x00000001549f39bc },
65fa5e
+		/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000150e4cc4, 0x00000000ef4d2f42 },
65fa5e
+		/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
65fa5e
+		{ 0x000000002a6204e8, 0x00000001b1468572 },
65fa5e
+		/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000beb1d432, 0x000000013d7403b2 },
65fa5e
+		/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000135f3f1f0, 0x00000001a4681842 },
65fa5e
+		/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000074fe2232, 0x0000000167714492 },
65fa5e
+		/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
65fa5e
+		{ 0x000000001ac6e2ba, 0x00000001e599099a },
65fa5e
+		/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000013fca91e, 0x00000000fe128194 },
65fa5e
+		/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000183f4931e, 0x0000000077e8b990 },
65fa5e
+		/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b6d9b4e4, 0x00000001a267f63a },
65fa5e
+		/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000b5188656, 0x00000001945c245a },
65fa5e
+		/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000027a81a84, 0x0000000149002e76 },
65fa5e
+		/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000125699258, 0x00000001bb8310a4 },
65fa5e
+		/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001b23de796, 0x000000019ec60bcc },
65fa5e
+		/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000fe4365dc, 0x000000012d8590ae },
65fa5e
+		/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c68f497a, 0x0000000065b00684 },
65fa5e
+		/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000fbf521ee, 0x000000015e5aeadc },
65fa5e
+		/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015eac3378, 0x00000000b77ff2b0 },
65fa5e
+		/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000134914b90, 0x0000000188da2ff6 },
65fa5e
+		/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000016335cfe, 0x0000000063da929a },
65fa5e
+		/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
65fa5e
+		{ 0x000000010372d10c, 0x00000001389caa80 },
65fa5e
+		/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
65fa5e
+		{ 0x000000015097b908, 0x000000013db599d2 },
65fa5e
+		/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001227a7572, 0x0000000122505a86 },
65fa5e
+		/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
65fa5e
+		{ 0x000000009a8f75c0, 0x000000016bd72746 },
65fa5e
+		/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000682c77a2, 0x00000001c3faf1d4 },
65fa5e
+		/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000231f091c, 0x00000001111c826c },
65fa5e
+		/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
65fa5e
+		{ 0x000000007d4439f2, 0x00000000153e9fb2 },
65fa5e
+		/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
65fa5e
+		{ 0x000000017e221efc, 0x000000002b1f7b60 },
65fa5e
+		/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000167457c38, 0x00000000b1dba570 },
65fa5e
+		/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000bdf081c4, 0x00000001f6397b76 },
65fa5e
+		/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016286d6b0, 0x0000000156335214 },
65fa5e
+		/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000c84f001c, 0x00000001d70e3986 },
65fa5e
+		/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
65fa5e
+		{ 0x0000000064efe7c0, 0x000000003701a774 },
65fa5e
+		/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
65fa5e
+		{ 0x000000000ac2d904, 0x00000000ac81ef72 },
65fa5e
+		/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
65fa5e
+		{ 0x00000000fd226d14, 0x0000000133212464 },
65fa5e
+		/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
65fa5e
+		{ 0x000000011cfd42e0, 0x00000000e4e45610 },
65fa5e
+		/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
65fa5e
+		{ 0x000000016e5a5678, 0x000000000c1bd370 },
65fa5e
+		/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001d888fe22, 0x00000001a7b9e7a6 },
65fa5e
+		/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
65fa5e
+		{ 0x00000001af77fcd4, 0x000000007d657a10 }
65fa5e
+#endif /* __LITTLE_ENDIAN__ */
65fa5e
+	};
65fa5e
+
65fa5e
+/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
65fa5e
+
65fa5e
+static const __vector unsigned long long vcrc_short_const[16]
65fa5e
+	__attribute__((aligned (16))) = {
65fa5e
+#ifdef __LITTLE_ENDIAN__
65fa5e
+		/* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x)  */
65fa5e
+		{ 0x99168a18ec447f11, 0xed837b2613e8221e },
65fa5e
+		/* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x)  */
65fa5e
+		{ 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a },
65fa5e
+		/* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x)  */
65fa5e
+		{ 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 },
65fa5e
+		/* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x)  */
65fa5e
+		{ 0xf38a3556291ea462, 0xc10ec5e033fbca3b },
65fa5e
+		/* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x)  */
65fa5e
+		{ 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f },
65fa5e
+		/* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x)  */
65fa5e
+		{ 0x855712b3784d2a56, 0x71aa1df0e172334d },
65fa5e
+		/* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x)  */
65fa5e
+		{ 0xa5abe9f80eaee722, 0xfee3053e3969324d },
65fa5e
+		/* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x)  */
65fa5e
+		{ 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 },
65fa5e
+		/* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x)  */
65fa5e
+		{ 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 },
65fa5e
+		/* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x)  */
65fa5e
+		{ 0xebe7e3566325605c, 0x6f8346e1d777606e },
65fa5e
+		/* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x)  */
65fa5e
+		{ 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 },
65fa5e
+		/* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x)  */
65fa5e
+		{ 0x5705a9ca4721589f, 0xaa2215ea329ecc11 },
65fa5e
+		/* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x)  */
65fa5e
+		{ 0xe3720acb88d14467, 0x1ed8f66ed95efd26 },
65fa5e
+		/* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x)  */
65fa5e
+		{ 0xba1aca0315141c31, 0x78ed02d5a700e96a },
65fa5e
+		/* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x)  */
65fa5e
+		{ 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 },
65fa5e
+		/* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x)  */
65fa5e
+		{ 0x6655004fa06a2517, 0xedb88320b1e6b092 }
65fa5e
+#else /* __LITTLE_ENDIAN__ */
65fa5e
+		/* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x)  */
65fa5e
+		{ 0xed837b2613e8221e, 0x99168a18ec447f11 },
65fa5e
+		/* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x)  */
65fa5e
+		{ 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c },
65fa5e
+		/* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x)  */
65fa5e
+		{ 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 },
65fa5e
+		/* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x)  */
65fa5e
+		{ 0xc10ec5e033fbca3b, 0xf38a3556291ea462 },
65fa5e
+		/* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x)  */
65fa5e
+		{ 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b },
65fa5e
+		/* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x)  */
65fa5e
+		{ 0x71aa1df0e172334d, 0x855712b3784d2a56 },
65fa5e
+		/* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x)  */
65fa5e
+		{ 0xfee3053e3969324d, 0xa5abe9f80eaee722 },
65fa5e
+		/* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x)  */
65fa5e
+		{ 0xf44779b93eb2bd08, 0x1fa0943ddb54814c },
65fa5e
+		/* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x)  */
65fa5e
+		{ 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a },
65fa5e
+		/* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x)  */
65fa5e
+		{ 0x6f8346e1d777606e, 0xebe7e3566325605c },
65fa5e
+		/* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x)  */
65fa5e
+		{ 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 },
65fa5e
+		/* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x)  */
65fa5e
+		{ 0xaa2215ea329ecc11, 0x5705a9ca4721589f },
65fa5e
+		/* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x)  */
65fa5e
+		{ 0x1ed8f66ed95efd26, 0xe3720acb88d14467 },
65fa5e
+		/* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x)  */
65fa5e
+		{ 0x78ed02d5a700e96a, 0xba1aca0315141c31 },
65fa5e
+		/* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x)  */
65fa5e
+		{ 0xba8ccbe832b39da3, 0xad2a31b3ed627dae },
65fa5e
+		/* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x)  */
65fa5e
+		{ 0xedb88320b1e6b092, 0x6655004fa06a2517 }
65fa5e
+#endif /* __LITTLE_ENDIAN__ */
65fa5e
+	};
65fa5e
+
65fa5e
+/* Barrett constants */
65fa5e
+/* 33 bit reflected Barrett constant m - (4^32)/n */
65fa5e
+
65fa5e
+static const __vector unsigned long long v_Barrett_const[2]
65fa5e
+	__attribute__((aligned (16))) = {
65fa5e
+		/* x^64 div p(x)  */
65fa5e
+#ifdef __LITTLE_ENDIAN__
65fa5e
+		{ 0x00000001f7011641, 0x0000000000000000 },
65fa5e
+		{ 0x00000001db710641, 0x0000000000000000 }
65fa5e
+#else /* __LITTLE_ENDIAN__ */
65fa5e
+		{ 0x0000000000000000, 0x00000001f7011641 },
65fa5e
+		{ 0x0000000000000000, 0x00000001db710641 }
65fa5e
+#endif /* __LITTLE_ENDIAN__ */
65fa5e
+	};
65fa5e
+#endif /* POWER8_INTRINSICS */
65fa5e
+
65fa5e
+#endif /* __ASSEMBLER__ */
65fa5e
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
65fa5e
new file mode 100644
65fa5e
index 0000000..bb2204b
65fa5e
--- /dev/null
65fa5e
+++ b/contrib/power8-crc/vec_crc32.c
65fa5e
@@ -0,0 +1,674 @@
65fa5e
+/*
65fa5e
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
65fa5e
+ * 16 bytes.
65fa5e
+ *
65fa5e
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
65fa5e
+ * chunks in order to mask the latency of the vpmsum instructions. If we
65fa5e
+ * have more than 32 kB of data to checksum we repeat this step multiple
65fa5e
+ * times, passing in the previous 1024 bits.
65fa5e
+ *
65fa5e
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
65fa5e
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
65fa5e
+ * calculate constants that land the data in this 32 bits.
65fa5e
+ *
65fa5e
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
65fa5e
+ * for n = CRC using POWER8 instructions. We use x = 32.
65fa5e
+ *
65fa5e
+ * http://en.wikipedia.org/wiki/Barrett_reduction
65fa5e
+ *
65fa5e
+ * This code uses gcc vector builtins instead using assembly directly.
65fa5e
+ *
65fa5e
+ * Copyright (C) 2017 Rogerio Alves <rogealve@br.ibm.com>, IBM
65fa5e
+ *
65fa5e
+ * This program is free software; you can redistribute it and/or
65fa5e
+ * modify it under the terms of either:
65fa5e
+ *
65fa5e
+ *  a) the GNU General Public License as published by the Free Software
65fa5e
+ *     Foundation; either version 2 of the License, or (at your option)
65fa5e
+ *     any later version, or
65fa5e
+ *  b) the Apache License, Version 2.0
65fa5e
+ */
65fa5e
+
65fa5e
+#include <altivec.h>
65fa5e
+
65fa5e
+#define POWER8_INTRINSICS
65fa5e
+#define CRC_TABLE
65fa5e
+
65fa5e
+#ifdef CRC32_CONSTANTS_HEADER
65fa5e
+#include CRC32_CONSTANTS_HEADER
65fa5e
+#else
65fa5e
+#include "crc32_constants.h"
65fa5e
+#endif
65fa5e
+
65fa5e
+#define VMX_ALIGN	16
65fa5e
+#define VMX_ALIGN_MASK	(VMX_ALIGN-1)
65fa5e
+
65fa5e
+#ifdef REFLECT
65fa5e
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
65fa5e
+			       unsigned long len)
65fa5e
+{
65fa5e
+	while (len--)
65fa5e
+		crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
65fa5e
+	return crc;
65fa5e
+}
65fa5e
+#else
65fa5e
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
65fa5e
+				unsigned long len)
65fa5e
+{
65fa5e
+	while (len--)
65fa5e
+		crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
65fa5e
+	return crc;
65fa5e
+}
65fa5e
+#endif
65fa5e
+
65fa5e
+static unsigned int __attribute__ ((aligned (32)))
65fa5e
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
65fa5e
+
65fa5e
+#ifndef CRC32_FUNCTION
65fa5e
+#define CRC32_FUNCTION  crc32_vpmsum
65fa5e
+#endif
65fa5e
+
65fa5e
+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
65fa5e
+			    unsigned long len)
65fa5e
+{
65fa5e
+	unsigned int prealign;
65fa5e
+	unsigned int tail;
65fa5e
+
65fa5e
+#ifdef CRC_XOR
65fa5e
+	crc ^= 0xffffffff;
65fa5e
+#endif
65fa5e
+
65fa5e
+	if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
65fa5e
+		crc = crc32_align(crc, p, len);
65fa5e
+		goto out;
65fa5e
+	}
65fa5e
+
65fa5e
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
65fa5e
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
65fa5e
+		crc = crc32_align(crc, p, prealign);
65fa5e
+		len -= prealign;
65fa5e
+		p += prealign;
65fa5e
+	}
65fa5e
+
65fa5e
+	crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
65fa5e
+
65fa5e
+	tail = len & VMX_ALIGN_MASK;
65fa5e
+	if (tail) {
65fa5e
+		p += len & ~VMX_ALIGN_MASK;
65fa5e
+		crc = crc32_align(crc, p, tail);
65fa5e
+	}
65fa5e
+
65fa5e
+out:
65fa5e
+#ifdef CRC_XOR
65fa5e
+	crc ^= 0xffffffff;
65fa5e
+#endif
65fa5e
+
65fa5e
+	return crc;
65fa5e
+}
65fa5e
+
65fa5e
+#if defined (__clang__)
65fa5e
+#include "clang_workaround.h"
65fa5e
+#else
65fa5e
+#define __builtin_pack_vector(a, b)  __builtin_pack_vector_int128 ((a), (b))
65fa5e
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 0)
65fa5e
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 1)
65fa5e
+#endif
65fa5e
+
65fa5e
+/* When we have a load-store in a single-dispatch group and address overlap
65fa5e
+ * such that foward is not allowed (load-hit-store) the group must be flushed.
65fa5e
+ * A group ending NOP prevents the flush.
65fa5e
+ */
65fa5e
+#define GROUP_ENDING_NOP asm("ori 2,2,0" ::: "memory")
65fa5e
+
65fa5e
+#if defined(__BIG_ENDIAN__) && defined (REFLECT)
65fa5e
+#define BYTESWAP_DATA
65fa5e
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
65fa5e
+#define BYTESWAP_DATA
65fa5e
+#endif
65fa5e
+
65fa5e
+#ifdef BYTESWAP_DATA
65fa5e
+#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb,\
65fa5e
+			(__vector unsigned char) vc)
65fa5e
+#if defined(__LITTLE_ENDIAN__)
65fa5e
+/* Byte reverse permute constant LE. */
65fa5e
+static const __vector unsigned long long vperm_const
65fa5e
+	__attribute__ ((aligned(16))) = { 0x08090A0B0C0D0E0FUL,
65fa5e
+			0x0001020304050607UL };
65fa5e
+#else
65fa5e
+static const __vector unsigned long long vperm_const
65fa5e
+	__attribute__ ((aligned(16))) = { 0x0F0E0D0C0B0A0908UL,
65fa5e
+			0X0706050403020100UL };
65fa5e
+#endif
65fa5e
+#else
65fa5e
+#define VEC_PERM(vr, va, vb, vc)
65fa5e
+#endif
65fa5e
+
65fa5e
+static unsigned int __attribute__ ((aligned (32)))
65fa5e
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
65fa5e
+
65fa5e
+	const __vector unsigned long long vzero = {0,0};
65fa5e
+	const __vector unsigned long long vones = {0xffffffffffffffffUL,
65fa5e
+		0xffffffffffffffffUL};
65fa5e
+
65fa5e
+#ifdef REFLECT
65fa5e
+	const __vector unsigned long long vmask_32bit =
65fa5e
+		(__vector unsigned long long)vec_sld((__vector unsigned char)vzero,
65fa5e
+			(__vector unsigned char)vones, 4);
65fa5e
+#endif
65fa5e
+
65fa5e
+	const __vector unsigned long long vmask_64bit =
65fa5e
+		(__vector unsigned long long)vec_sld((__vector unsigned char)vzero,
65fa5e
+			(__vector unsigned char)vones, 8);
65fa5e
+
65fa5e
+	__vector unsigned long long vcrc;
65fa5e
+
65fa5e
+	__vector unsigned long long vconst1, vconst2;
65fa5e
+
65fa5e
+	/* vdata0-vdata7 will contain our data (p). */
65fa5e
+	__vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4,
65fa5e
+		vdata5, vdata6, vdata7;
65fa5e
+
65fa5e
+	/* v0-v7 will contain our checksums */
65fa5e
+	__vector unsigned long long v0 = {0,0};
65fa5e
+	__vector unsigned long long v1 = {0,0};
65fa5e
+	__vector unsigned long long v2 = {0,0};
65fa5e
+	__vector unsigned long long v3 = {0,0};
65fa5e
+	__vector unsigned long long v4 = {0,0};
65fa5e
+	__vector unsigned long long v5 = {0,0};
65fa5e
+	__vector unsigned long long v6 = {0,0};
65fa5e
+	__vector unsigned long long v7 = {0,0};
65fa5e
+
65fa5e
+
65fa5e
+	/* Vector auxiliary variables. */
65fa5e
+	__vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7;
65fa5e
+
65fa5e
+	unsigned int result = 0;
65fa5e
+	unsigned int offset; /* Constant table offset. */
65fa5e
+
65fa5e
+	unsigned long i; /* Counter. */
65fa5e
+	unsigned long chunks;
65fa5e
+
65fa5e
+	unsigned long block_size;
65fa5e
+	int next_block = 0;
65fa5e
+
65fa5e
+	/* Align by 128 bits. The last 128 bit block will be processed at end. */
65fa5e
+	unsigned long length = len & 0xFFFFFFFFFFFFFF80UL;
65fa5e
+
65fa5e
+#ifdef REFLECT
65fa5e
+	vcrc = (__vector unsigned long long)__builtin_pack_vector(0UL, crc);
65fa5e
+#else
65fa5e
+	vcrc = (__vector unsigned long long)__builtin_pack_vector(crc, 0UL);
65fa5e
+
65fa5e
+	/* Shift into top 32 bits */
65fa5e
+	vcrc = (__vector unsigned long long)vec_sld((__vector unsigned char)vcrc,
65fa5e
+        (__vector unsigned char)vzero, 4);
65fa5e
+#endif
65fa5e
+
65fa5e
+	/* Short version. */
65fa5e
+	if (len < 256) {
65fa5e
+		/* Calculate where in the constant table we need to start. */
65fa5e
+		offset = 256 - len;
65fa5e
+
65fa5e
+		vconst1 = vec_ld(offset, vcrc_short_const);
65fa5e
+		vdata0 = vec_ld(0, (__vector unsigned long long*) p);
65fa5e
+		VEC_PERM(vdata0, vdata0, vconst1, vperm_const);
65fa5e
+
65fa5e
+		/* xor initial value*/
65fa5e
+		vdata0 = vec_xor(vdata0, vcrc);
65fa5e
+
65fa5e
+		vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw
65fa5e
+				((__vector unsigned int)vdata0, (__vector unsigned int)vconst1);
65fa5e
+		v0 = vec_xor(v0, vdata0);
65fa5e
+
65fa5e
+		for (i = 16; i < len; i += 16) {
65fa5e
+			vconst1 = vec_ld(offset + i, vcrc_short_const);
65fa5e
+			vdata0 = vec_ld(i, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(vdata0, vdata0, vconst1, vperm_const);
65fa5e
+			vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw
65fa5e
+				((__vector unsigned int)vdata0, (__vector unsigned int)vconst1);
65fa5e
+			v0 = vec_xor(v0, vdata0);
65fa5e
+		}
65fa5e
+	} else {
65fa5e
+
65fa5e
+		/* Load initial values. */
65fa5e
+		vdata0 = vec_ld(0, (__vector unsigned long long*) p);
65fa5e
+		vdata1 = vec_ld(16, (__vector unsigned long long*) p);
65fa5e
+
65fa5e
+		VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
65fa5e
+		VEC_PERM(vdata1, vdata1, vdata1, vperm_const);
65fa5e
+
65fa5e
+		vdata2 = vec_ld(32, (__vector unsigned long long*) p);
65fa5e
+		vdata3 = vec_ld(48, (__vector unsigned long long*) p);
65fa5e
+
65fa5e
+		VEC_PERM(vdata2, vdata2, vdata2, vperm_const);
65fa5e
+		VEC_PERM(vdata3, vdata3, vdata3, vperm_const);
65fa5e
+
65fa5e
+		vdata4 = vec_ld(64, (__vector unsigned long long*) p);
65fa5e
+		vdata5 = vec_ld(80, (__vector unsigned long long*) p);
65fa5e
+
65fa5e
+		VEC_PERM(vdata4, vdata4, vdata4, vperm_const);
65fa5e
+		VEC_PERM(vdata5, vdata5, vdata5, vperm_const);
65fa5e
+
65fa5e
+		vdata6 = vec_ld(96, (__vector unsigned long long*) p);
65fa5e
+		vdata7 = vec_ld(112, (__vector unsigned long long*) p);
65fa5e
+
65fa5e
+		VEC_PERM(vdata6, vdata6, vdata6, vperm_const);
65fa5e
+		VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
65fa5e
+
65fa5e
+		/* xor in initial value */
65fa5e
+		vdata0 = vec_xor(vdata0, vcrc);
65fa5e
+
65fa5e
+		p = (char *)p + 128;
65fa5e
+
65fa5e
+		do {
65fa5e
+			/* Checksum in blocks of MAX_SIZE. */
65fa5e
+			block_size = length;
65fa5e
+			if (block_size > MAX_SIZE) {
65fa5e
+				block_size = MAX_SIZE;
65fa5e
+			}
65fa5e
+
65fa5e
+			length = length - block_size;
65fa5e
+
65fa5e
+			/*
65fa5e
+			* Work out the offset into the constants table to start at. Each
65fa5e
+			* constant is 16 bytes, and it is used against 128 bytes of input
65fa5e
+			* data - 128 / 16 = 8
65fa5e
+			*/
65fa5e
+			offset = (MAX_SIZE/8) - (block_size/8);
65fa5e
+			/* We reduce our final 128 bytes in a separate step */
65fa5e
+			chunks = (block_size/128)-1;
65fa5e
+
65fa5e
+		    vconst1 = vec_ld(offset, vcrc_const);
65fa5e
+
65fa5e
+			va0 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata0,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata1,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va2 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata2,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va3 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata3,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va4 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata4,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va5 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata5,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va6 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata6,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+			va7 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata7,
65fa5e
+						(__vector unsigned long long)vconst1);
65fa5e
+
65fa5e
+			if (chunks > 1) {
65fa5e
+				offset += 16;
65fa5e
+				vconst2 = vec_ld(offset, vcrc_const);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				vdata0 = vec_ld(0, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
65fa5e
+
65fa5e
+				vdata1 = vec_ld(16, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata1, vdata1, vdata1, vperm_const);
65fa5e
+
65fa5e
+				vdata2 = vec_ld(32, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata2, vdata2, vdata2, vperm_const);
65fa5e
+
65fa5e
+				vdata3 = vec_ld(48, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata3, vdata3, vdata3, vperm_const);
65fa5e
+
65fa5e
+				vdata4 = vec_ld(64, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata4, vdata4, vdata4, vperm_const);
65fa5e
+
65fa5e
+				vdata5 = vec_ld(80, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata5, vdata5, vdata5, vperm_const);
65fa5e
+
65fa5e
+				vdata6 = vec_ld(96, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata6, vdata6, vdata6, vperm_const);
65fa5e
+
65fa5e
+				vdata7 = vec_ld(112, (__vector unsigned long long*) p);
65fa5e
+				VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
65fa5e
+
65fa5e
+				p = (char *)p + 128;
65fa5e
+
65fa5e
+				/*
65fa5e
+				 * main loop. We modulo schedule it such that it takes three
65fa5e
+				 * iterations to complete - first iteration load, second
65fa5e
+				 * iteration vpmsum, third iteration xor.
65fa5e
+				 */
65fa5e
+				for (i = 0; i < chunks-2; i++) {
65fa5e
+					vconst1 = vec_ld(offset, vcrc_const);
65fa5e
+					offset += 16;
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v0 = vec_xor(v0, va0);
65fa5e
+					va0 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata0, (__vector unsigned long long)vconst2);
65fa5e
+					vdata0 = vec_ld(0, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v1 = vec_xor(v1, va1);
65fa5e
+					va1 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata1, (__vector unsigned long long)vconst2);
65fa5e
+					vdata1 = vec_ld(16, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata1, vdata1, vdata1, vperm_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v2 = vec_xor(v2, va2);
65fa5e
+					va2 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata2, (__vector unsigned long long)vconst2);
65fa5e
+					vdata2 = vec_ld(32, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata2, vdata2, vdata2, vperm_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v3 = vec_xor(v3, va3);
65fa5e
+					va3 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata3, (__vector unsigned long long)vconst2);
65fa5e
+					vdata3 = vec_ld(48, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata3, vdata3, vdata3, vperm_const);
65fa5e
+
65fa5e
+					vconst2 = vec_ld(offset, vcrc_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v4 = vec_xor(v4, va4);
65fa5e
+					va4 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata4, (__vector unsigned long long)vconst1);
65fa5e
+					vdata4 = vec_ld(64, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata4, vdata4, vdata4, vperm_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v5 = vec_xor(v5, va5);
65fa5e
+					va5 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata5, (__vector unsigned long long)vconst1);
65fa5e
+					vdata5 = vec_ld(80, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata5, vdata5, vdata5, vperm_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v6 = vec_xor(v6, va6);
65fa5e
+					va6 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata6, (__vector unsigned long long)vconst1);
65fa5e
+					vdata6 = vec_ld(96, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata6, vdata6, vdata6, vperm_const);
65fa5e
+					GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+					v7 = vec_xor(v7, va7);
65fa5e
+					va7 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata7, (__vector unsigned long long)vconst1);
65fa5e
+					vdata7 = vec_ld(112, (__vector unsigned long long*) p);
65fa5e
+					VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
65fa5e
+
65fa5e
+					p = (char *)p + 128;
65fa5e
+				}
65fa5e
+
65fa5e
+				/* First cool down*/
65fa5e
+				vconst1 = vec_ld(offset, vcrc_const);
65fa5e
+				offset += 16;
65fa5e
+
65fa5e
+				v0 = vec_xor(v0, va0);
65fa5e
+				va0 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata0, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v1 = vec_xor(v1, va1);
65fa5e
+				va1 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata1, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v2 = vec_xor(v2, va2);
65fa5e
+				va2 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata2, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v3 = vec_xor(v3, va3);
65fa5e
+				va3 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata3, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v4 = vec_xor(v4, va4);
65fa5e
+				va4 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata4, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v5 = vec_xor(v5, va5);
65fa5e
+				va5 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata5, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v6 = vec_xor(v6, va6);
65fa5e
+				va6 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata6, (__vector unsigned long long)vconst1);
65fa5e
+				GROUP_ENDING_NOP;
65fa5e
+
65fa5e
+				v7 = vec_xor(v7, va7);
65fa5e
+				va7 = __builtin_crypto_vpmsumd ((__vector unsigned long
65fa5e
+							long)vdata7, (__vector unsigned long long)vconst1);
65fa5e
+			}/* else */
65fa5e
+
65fa5e
+			/* Second cool down. */
65fa5e
+			v0 = vec_xor(v0, va0);
65fa5e
+			v1 = vec_xor(v1, va1);
65fa5e
+			v2 = vec_xor(v2, va2);
65fa5e
+			v3 = vec_xor(v3, va3);
65fa5e
+			v4 = vec_xor(v4, va4);
65fa5e
+			v5 = vec_xor(v5, va5);
65fa5e
+			v6 = vec_xor(v6, va6);
65fa5e
+			v7 = vec_xor(v7, va7);
65fa5e
+
65fa5e
+#ifdef REFLECT
65fa5e
+			/*
65fa5e
+			 * vpmsumd produces a 96 bit result in the least significant bits
65fa5e
+			 * of the register. Since we are bit reflected we have to shift it
65fa5e
+			 * left 32 bits so it occupies the least significant bits in the
65fa5e
+			 * bit reflected domain.
65fa5e
+			 */
65fa5e
+			v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+			v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7,
65fa5e
+					(__vector unsigned char)vzero, 4);
65fa5e
+#endif
65fa5e
+
65fa5e
+			/* xor with the last 1024 bits. */
65fa5e
+			va0 = vec_ld(0, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va0, va0, va0, vperm_const);
65fa5e
+
65fa5e
+			va1 = vec_ld(16, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va1, va1, va1, vperm_const);
65fa5e
+
65fa5e
+			va2 = vec_ld(32, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va2, va2, va2, vperm_const);
65fa5e
+
65fa5e
+			va3 = vec_ld(48, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va3, va3, va3, vperm_const);
65fa5e
+
65fa5e
+			va4 = vec_ld(64, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va4, va4, va4, vperm_const);
65fa5e
+
65fa5e
+			va5 = vec_ld(80, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va5, va5, va5, vperm_const);
65fa5e
+
65fa5e
+			va6 = vec_ld(96, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va6, va6, va6, vperm_const);
65fa5e
+
65fa5e
+			va7 = vec_ld(112, (__vector unsigned long long*) p);
65fa5e
+			VEC_PERM(va7, va7, va7, vperm_const);
65fa5e
+
65fa5e
+			p = (char *)p + 128;
65fa5e
+
65fa5e
+			vdata0 = vec_xor(v0, va0);
65fa5e
+			vdata1 = vec_xor(v1, va1);
65fa5e
+			vdata2 = vec_xor(v2, va2);
65fa5e
+			vdata3 = vec_xor(v3, va3);
65fa5e
+			vdata4 = vec_xor(v4, va4);
65fa5e
+			vdata5 = vec_xor(v5, va5);
65fa5e
+			vdata6 = vec_xor(v6, va6);
65fa5e
+			vdata7 = vec_xor(v7, va7);
65fa5e
+
65fa5e
+			/* Check if we have more blocks to process */
65fa5e
+			next_block = 0;
65fa5e
+			if (length != 0) {
65fa5e
+				next_block = 1;
65fa5e
+
65fa5e
+			    /* zero v0-v7 */
65fa5e
+				v0 = vec_xor(v0, v0);
65fa5e
+				v1 = vec_xor(v1, v1);
65fa5e
+				v2 = vec_xor(v2, v2);
65fa5e
+				v3 = vec_xor(v3, v3);
65fa5e
+				v4 = vec_xor(v4, v4);
65fa5e
+				v5 = vec_xor(v5, v5);
65fa5e
+				v6 = vec_xor(v6, v6);
65fa5e
+				v7 = vec_xor(v7, v7);
65fa5e
+			}
65fa5e
+			length = length + 128;
65fa5e
+
65fa5e
+		} while (next_block);
65fa5e
+
65fa5e
+		/* Calculate how many bytes we have left. */
65fa5e
+		length = (len & 127);
65fa5e
+
65fa5e
+		/* Calculate where in (short) constant table we need to start. */
65fa5e
+		offset = 128 - length;
65fa5e
+
65fa5e
+		v0 = vec_ld(offset, vcrc_short_const);
65fa5e
+		v1 = vec_ld(offset + 16, vcrc_short_const);
65fa5e
+		v2 = vec_ld(offset + 32, vcrc_short_const);
65fa5e
+		v3 = vec_ld(offset + 48, vcrc_short_const);
65fa5e
+		v4 = vec_ld(offset + 64, vcrc_short_const);
65fa5e
+		v5 = vec_ld(offset + 80, vcrc_short_const);
65fa5e
+		v6 = vec_ld(offset + 96, vcrc_short_const);
65fa5e
+		v7 = vec_ld(offset + 112, vcrc_short_const);
65fa5e
+
65fa5e
+		offset += 128;
65fa5e
+
65fa5e
+		v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata0,(__vector unsigned int)v0);
65fa5e
+		v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata1,(__vector unsigned int)v1);
65fa5e
+		v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata2,(__vector unsigned int)v2);
65fa5e
+		v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata3,(__vector unsigned int)v3);
65fa5e
+		v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata4,(__vector unsigned int)v4);
65fa5e
+		v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata5,(__vector unsigned int)v5);
65fa5e
+		v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata6,(__vector unsigned int)v6);
65fa5e
+		v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata7,(__vector unsigned int)v7);
65fa5e
+
65fa5e
+		/* Now reduce the tail (0-112 bytes). */
65fa5e
+		for (i = 0; i < length; i+=16) {
65fa5e
+			vdata0 = vec_ld(i,(__vector unsigned long long*)p);
65fa5e
+			VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
65fa5e
+			va0 = vec_ld(offset + i,vcrc_short_const);
65fa5e
+			va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
65fa5e
+			(__vector unsigned int)vdata0,(__vector unsigned int)va0);
65fa5e
+			v0 = vec_xor(v0, va0);
65fa5e
+		}
65fa5e
+
65fa5e
+		/* xor all parallel chunks together. */
65fa5e
+		v0 = vec_xor(v0, v1);
65fa5e
+		v2 = vec_xor(v2, v3);
65fa5e
+		v4 = vec_xor(v4, v5);
65fa5e
+		v6 = vec_xor(v6, v7);
65fa5e
+
65fa5e
+		v0 = vec_xor(v0, v2);
65fa5e
+		v4 = vec_xor(v4, v6);
65fa5e
+
65fa5e
+		v0 = vec_xor(v0, v4);
65fa5e
+	}
65fa5e
+
65fa5e
+	/* Barrett Reduction */
65fa5e
+	vconst1 = vec_ld(0, v_Barrett_const);
65fa5e
+	vconst2 = vec_ld(16, v_Barrett_const);
65fa5e
+
65fa5e
+	v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0,
65fa5e
+			(__vector unsigned char)v0, 8);
65fa5e
+	v0 = vec_xor(v1,v0);
65fa5e
+
65fa5e
+#ifdef REFLECT
65fa5e
+	/* shift left one bit */
65fa5e
+	__vector unsigned char vsht_splat = vec_splat_u8 (1);
65fa5e
+	v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0,
65fa5e
+			vsht_splat);
65fa5e
+#endif
65fa5e
+
65fa5e
+	v0 = vec_and(v0, vmask_64bit);
65fa5e
+
65fa5e
+#ifndef REFLECT
65fa5e
+
65fa5e
+	/*
65fa5e
+	 * Now for the actual algorithm. The idea is to calculate q,
65fa5e
+	 * the multiple of our polynomial that we need to subtract. By
65fa5e
+	 * doing the computation 2x bits higher (ie 64 bits) and shifting the
65fa5e
+	 * result back down 2x bits, we round down to the nearest multiple.
65fa5e
+	 */
65fa5e
+
65fa5e
+	/* ma */
65fa5e
+	v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v0,
65fa5e
+			(__vector unsigned long long)vconst1);
65fa5e
+	/* q = floor(ma/(2^64)) */
65fa5e
+	v1 = (__vector unsigned long long)vec_sld ((__vector unsigned char)vzero,
65fa5e
+			(__vector unsigned char)v1, 8);
65fa5e
+	/* qn */
65fa5e
+	v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1,
65fa5e
+			(__vector unsigned long long)vconst2);
65fa5e
+	/* a - qn, subtraction is xor in GF(2) */
65fa5e
+	v0 = vec_xor (v0, v1);
65fa5e
+	/*
65fa5e
+	 * Get the result into r3. We need to shift it left 8 bytes:
65fa5e
+	 * V0 [ 0 1 2 X ]
65fa5e
+	 * V0 [ 0 X 2 3 ]
65fa5e
+	 */
65fa5e
+	result = __builtin_unpack_vector_1 (v0);
65fa5e
+#else
65fa5e
+
65fa5e
+	/*
65fa5e
+	 * The reflected version of Barrett reduction. Instead of bit
65fa5e
+	 * reflecting our data (which is expensive to do), we bit reflect our
65fa5e
+	 * constants and our algorithm, which means the intermediate data in
65fa5e
+	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
65fa5e
+	 * the algorithm because we don't carry in mod 2 arithmetic.
65fa5e
+	 */
65fa5e
+
65fa5e
+	/* bottom 32 bits of a */
65fa5e
+	v1 = vec_and(v0, vmask_32bit);
65fa5e
+
65fa5e
+	/* ma */
65fa5e
+	v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1,
65fa5e
+			(__vector unsigned long long)vconst1);
65fa5e
+
65fa5e
+	/* bottom 32bits of ma */
65fa5e
+	v1 = vec_and(v1, vmask_32bit);
65fa5e
+	/* qn */
65fa5e
+	v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1,
65fa5e
+			(__vector unsigned long long)vconst2);
65fa5e
+	/* a - qn, subtraction is xor in GF(2) */
65fa5e
+	v0 = vec_xor (v0, v1);
65fa5e
+
65fa5e
+	/*
65fa5e
+	 * Since we are bit reflected, the result (ie the low 32 bits) is in
65fa5e
+	 * the high 32 bits. We just need to shift it left 4 bytes
65fa5e
+	 * V0 [ 0 1 X 3 ]
65fa5e
+	 * V0 [ 0 X 2 3 ]
65fa5e
+	 */
65fa5e
+
65fa5e
+	/* shift result into top 64 bits of */
65fa5e
+	v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0,
65fa5e
+        (__vector unsigned char)vzero, 4);
65fa5e
+
65fa5e
+	result = __builtin_unpack_vector_0 (v0);
65fa5e
+#endif
65fa5e
+
65fa5e
+	return result;
65fa5e
+}
65fa5e
diff --git a/crc32.c b/crc32.c
65fa5e
index 9580440..406d350 100644
65fa5e
--- a/crc32.c
65fa5e
+++ b/crc32.c
65fa5e
@@ -199,13 +199,78 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
65fa5e
 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
65fa5e
 
65fa5e
 /* ========================================================================= */
65fa5e
-unsigned long ZEXPORT crc32_z(crc, buf, len)
65fa5e
+local
65fa5e
+unsigned long ZEXPORT crc32_table_lookup(crc, buf, len)
65fa5e
     unsigned long crc;
65fa5e
     const unsigned char FAR *buf;
65fa5e
     z_size_t len;
65fa5e
 {
65fa5e
     if (buf == Z_NULL) return 0UL;
65fa5e
 
65fa5e
+    crc = crc ^ 0xffffffffUL;
65fa5e
+    while (len >= 8) {
65fa5e
+        DO8;
65fa5e
+        len -= 8;
65fa5e
+    }
65fa5e
+    if (len) do {
65fa5e
+        DO1;
65fa5e
+    } while (--len);
65fa5e
+    return crc ^ 0xffffffffUL;
65fa5e
+}
65fa5e
+
65fa5e
+/* Small helper function to compare optfun against the reference table lookup
65fa5e
+ * return test_ref_comparision_##optfn in crc32_z_ifunc
65fa5e
+#include <assert.h>
65fa5e
+#define TEST_COMPARE(optfn) \
65fa5e
+   static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
65fa5e
+   { \
65fa5e
+     unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
65fa5e
+     unsigned long optcrc = optfn(crc, p, len); \
65fa5e
+     assert( optcrc == crc_tbl_lookup ); \
65fa5e
+     return optcrc; \
65fa5e
+   }
65fa5e
+*/
65fa5e
+
65fa5e
+#ifdef Z_IFUNC_ASM
65fa5e
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
65fa5e
+    __asm__ ("crc32_z");
65fa5e
+__asm__(".type crc32_z, %gnu_indirect_function");
65fa5e
+#elif defined(Z_IFUNC_NATIVE)
65fa5e
+unsigned long ZEXPORT crc32_z(
65fa5e
+    unsigned long crc,
65fa5e
+    const unsigned char FAR *buf,
65fa5e
+    z_size_t len)
65fa5e
+  __attribute__ ((ifunc ("crc32_z_ifunc")));
65fa5e
+#endif
65fa5e
+
65fa5e
+#if _ARCH_PWR8==1
65fa5e
+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
65fa5e
+/* for testing TEST_COMPARE(crc32_vpmsum) */
65fa5e
+#ifndef __BUILTIN_CPU_SUPPORTS__
65fa5e
+#include <sys/auxv.h>
65fa5e
+#include <bits/hwcap.h>
65fa5e
+#endif
65fa5e
+#endif
65fa5e
+
65fa5e
+/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
65fa5e
+ * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
65fa5e
+#ifndef Z_IFUNC_ASM
65fa5e
+local
65fa5e
+#endif
65fa5e
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
65fa5e
+{
65fa5e
+#if _ARCH_PWR8==1
65fa5e
+#if defined(__BUILTIN_CPU_SUPPORTS__)
65fa5e
+    if (__builtin_cpu_supports("arch_2_07"))
65fa5e
+        return crc32_vpmsum;
65fa5e
+#else
65fa5e
+    if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
65fa5e
+        return crc32_vpmsum;
65fa5e
+#endif
65fa5e
+#endif /* _ARCH_PWR8 */
65fa5e
+
65fa5e
+/* return a function pointer for optimized arches here */
65fa5e
+
65fa5e
 #ifdef DYNAMIC_CRC_TABLE
65fa5e
     if (crc_table_empty)
65fa5e
         make_crc_table();
65fa5e
@@ -217,22 +282,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
65fa5e
 
65fa5e
         endian = 1;
65fa5e
         if (*((unsigned char *)(&endian)))
65fa5e
-            return crc32_little(crc, buf, len);
65fa5e
+            return crc32_little;
65fa5e
         else
65fa5e
-            return crc32_big(crc, buf, len);
65fa5e
+            return crc32_big;
65fa5e
     }
65fa5e
 #endif /* BYFOUR */
65fa5e
-    crc = crc ^ 0xffffffffUL;
65fa5e
-    while (len >= 8) {
65fa5e
-        DO8;
65fa5e
-        len -= 8;
65fa5e
-    }
65fa5e
-    if (len) do {
65fa5e
-        DO1;
65fa5e
-    } while (--len);
65fa5e
-    return crc ^ 0xffffffffUL;
65fa5e
+    
65fa5e
+    return crc32_table_lookup;
65fa5e
 }
65fa5e
 
65fa5e
+#if !defined(Z_IFUNC_ASM) && !defined(Z_IFUNC_NATIVE)
65fa5e
+
65fa5e
+unsigned long ZEXPORT crc32_z(crc, buf, len)
65fa5e
+    unsigned long crc;
65fa5e
+    const unsigned char FAR *buf;
65fa5e
+    z_size_t len;
65fa5e
+{
65fa5e
+    static unsigned long ZEXPORT (*crc32_func)(unsigned long, const unsigned char FAR *, z_size_t) = NULL;
65fa5e
+
65fa5e
+    if (!crc32_func)
65fa5e
+        crc32_func = crc32_z_ifunc();
65fa5e
+        return (*crc32_func)(crc, buf, len);
65fa5e
+}
65fa5e
+
65fa5e
+#endif /* defined(Z_IFUNC_ASM) || defined(Z_IFUNC_NATIVE) */
65fa5e
+
65fa5e
 /* ========================================================================= */
65fa5e
 unsigned long ZEXPORT crc32(crc, buf, len)
65fa5e
     unsigned long crc;
65fa5e
@@ -271,6 +345,7 @@ local unsigned long crc32_little(crc, buf, len)
65fa5e
     register z_crc_t c;
65fa5e
     register const z_crc_t FAR *buf4;
65fa5e
 
65fa5e
+    if (buf == Z_NULL) return 0UL;
65fa5e
     c = (z_crc_t)crc;
65fa5e
     c = ~c;
65fa5e
     while (len && ((ptrdiff_t)buf & 3)) {
65fa5e
@@ -311,6 +386,7 @@ local unsigned long crc32_big(crc, buf, len)
65fa5e
     register z_crc_t c;
65fa5e
     register const z_crc_t FAR *buf4;
65fa5e
 
65fa5e
+    if (buf == Z_NULL) return 0UL;
65fa5e
     c = ZSWAP32((z_crc_t)crc);
65fa5e
     c = ~c;
65fa5e
     while (len && ((ptrdiff_t)buf & 3)) {
65fa5e
-- 
65fa5e
2.19.1
65fa5e