c55b40
From cfbf97cb54a6d06a80e86c85869331e4e2871129 Mon Sep 17 00:00:00 2001
c55b40
From: Ilya Leoshkevich <iii@linux.ibm.com>
c55b40
Date: Thu, 19 Mar 2020 11:52:03 +0100
c55b40
Subject: [PATCH] s390x: vectorize crc32
c55b40
c55b40
Use vector extensions when compiling for s390x and binutils knows
c55b40
about them. At runtime, check whether kernel supports vector
c55b40
extensions (it has to be not just the CPU, but also the kernel) and
c55b40
choose between the regular and the vectorized implementations.
c55b40
---
c55b40
 Makefile.in                     |   9 ++
c55b40
 configure                       |  28 +++++
c55b40
 contrib/gcc/zifunc.h            |  21 +++-
c55b40
 contrib/s390/crc32-vx.c         | 195 ++++++++++++++++++++++++++++++++
c55b40
 contrib/s390/crc32_z_resolver.c |  41 +++++++
c55b40
 crc32.c                         |  11 +-
c55b40
 6 files changed, 301 insertions(+), 4 deletions(-)
c55b40
 create mode 100644 contrib/s390/crc32-vx.c
c55b40
 create mode 100644 contrib/s390/crc32_z_resolver.c
c55b40
c55b40
diff --git a/Makefile.in b/Makefile.in
c55b40
index d392616..63f76da 100644
c55b40
--- a/Makefile.in
c55b40
+++ b/Makefile.in
c55b40
@@ -29,6 +29,7 @@ LDFLAGS=
c55b40
 TEST_LDFLAGS=-L. libz.a
c55b40
 LDSHARED=$(CC)
c55b40
 CPP=$(CC) -E
c55b40
+VGFMAFLAG=
c55b40
 
c55b40
 STATICLIB=libz.a
c55b40
 SHAREDLIB=libz.so
c55b40
@@ -179,6 +180,9 @@ crc32.o: $(SRCDIR)crc32.c
c55b40
 crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c
c55b40
 	$(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c
c55b40
 
c55b40
+crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c
c55b40
+	$(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c
c55b40
+
c55b40
 deflate.o: $(SRCDIR)deflate.c
c55b40
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
c55b40
 
c55b40
@@ -229,6 +233,11 @@ crc32.lo: $(SRCDIR)crc32.c
c55b40
 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
c55b40
 	-@mv objs/crc32.o $@
c55b40
 
c55b40
+crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c
c55b40
+	-@mkdir objs 2>/dev/null || test -d objs
c55b40
+	$(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c
c55b40
+	-@mv objs/crc32-vx.o $@
c55b40
+
c55b40
 crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c
c55b40
 	-@mkdir objs 2>/dev/null || test -d objs
c55b40
 	$(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c
c55b40
diff --git a/configure b/configure
c55b40
index e37dac8..a4606b8 100755
c55b40
--- a/configure
c55b40
+++ b/configure
c55b40
@@ -915,6 +915,32 @@ else
c55b40
   echo "Checking for Power optimizations support... No." | tee -a configure.log
c55b40
 fi
c55b40
 
c55b40
+# check if we are compiling for s390 and binutils support vector extensions
c55b40
+VGFMAFLAG=-march=z13
c55b40
+cat > $test.c <
c55b40
+#ifndef __s390__
c55b40
+#error
c55b40
+#endif
c55b40
+EOF
c55b40
+if try $CC -c $CFLAGS $VGFMAFLAG $test.c; then
c55b40
+  CFLAGS="$CFLAGS -DHAVE_S390X_VX"
c55b40
+  SFLAGS="$SFLAGS -DHAVE_S390X_VX"
c55b40
+  OBJC="$OBJC crc32-vx.o"
c55b40
+  PIC_OBJC="$PIC_OBJC crc32-vx.lo"
c55b40
+  echo "Checking for s390 vector extensions... Yes." | tee -a configure.log
c55b40
+
c55b40
+  for flag in -mzarch -fzvector; do
c55b40
+    if try $CC -c $CFLAGS $VGFMAFLAG $flag $test.c; then
c55b40
+      VGFMAFLAG="$VGFMAFLAG $flag"
c55b40
+      echo "Checking for $flag... Yes." | tee -a configure.log
c55b40
+    else
c55b40
+      echo "Checking for $flag... No." | tee -a configure.log
c55b40
+    fi
c55b40
+  done
c55b40
+else
c55b40
+  echo "Checking for s390 vector extensions... No." | tee -a configure.log
c55b40
+fi
c55b40
+
c55b40
 # show the results in the log
c55b40
 echo >> configure.log
c55b40
 echo ALL = $ALL >> configure.log
c55b40
@@ -947,6 +973,7 @@ echo mandir = $mandir >> configure.log
c55b40
 echo prefix = $prefix >> configure.log
c55b40
 echo sharedlibdir = $sharedlibdir >> configure.log
c55b40
 echo uname = $uname >> configure.log
c55b40
+echo VGFMAFLAG = $VGFMAFLAG >> configure.log
c55b40
 
c55b40
 # udpate Makefile with the configure results
c55b40
 sed < ${SRCDIR}Makefile.in "
c55b40
@@ -956,6 +983,7 @@ sed < ${SRCDIR}Makefile.in "
c55b40
 /^LDFLAGS *=/s#=.*#=$LDFLAGS#
c55b40
 /^LDSHARED *=/s#=.*#=$LDSHARED#
c55b40
 /^CPP *=/s#=.*#=$CPP#
c55b40
+/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG#
c55b40
 /^STATICLIB *=/s#=.*#=$STATICLIB#
c55b40
 /^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
c55b40
 /^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
c55b40
diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h
c55b40
index daf4fe4..b62379e 100644
c55b40
--- a/contrib/gcc/zifunc.h
c55b40
+++ b/contrib/gcc/zifunc.h
c55b40
@@ -8,9 +8,28 @@
c55b40
 
c55b40
 /* Helpers for arch optimizations */
c55b40
 
c55b40
+#if defined(__clang__)
c55b40
+#if __has_feature(coverage_sanitizer)
c55b40
+#define Z_IFUNC_NO_SANCOV __attribute__((no_sanitize("coverage")))
c55b40
+#else /* __has_feature(coverage_sanitizer) */
c55b40
+#define Z_IFUNC_NO_SANCOV
c55b40
+#endif /* __has_feature(coverage_sanitizer) */
c55b40
+#else /* __clang__ */
c55b40
+#define Z_IFUNC_NO_SANCOV
c55b40
+#endif /* __clang__ */
c55b40
+
c55b40
+#ifdef __s390__
c55b40
+#define Z_IFUNC_PARAMS unsigned long hwcap
c55b40
+#define Z_IFUNC_ATTRS Z_IFUNC_NO_SANCOV
c55b40
+#else /* __s390__ */
c55b40
+#define Z_IFUNC_PARAMS void
c55b40
+#define Z_IFUNC_ATTRS
c55b40
+#endif /* __s390__ */
c55b40
+
c55b40
 #define Z_IFUNC(fname) \
c55b40
     typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \
c55b40
-    local typeof(fname) *fname##_resolver(void)
c55b40
+    Z_IFUNC_ATTRS \
c55b40
+    local typeof(fname) *fname##_resolver(Z_IFUNC_PARAMS)
c55b40
 /* This is a helper macro to declare a resolver for an indirect function
c55b40
  * (ifunc). Let's say you have function
c55b40
  *
c55b40
diff --git a/contrib/s390/crc32-vx.c b/contrib/s390/crc32-vx.c
c55b40
new file mode 100644
c55b40
index 0000000..fa5387c
c55b40
--- /dev/null
c55b40
+++ b/contrib/s390/crc32-vx.c
c55b40
@@ -0,0 +1,195 @@
c55b40
+/*
c55b40
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
c55b40
+ *
c55b40
+ * Use the z/Architecture Vector Extension Facility to accelerate the
c55b40
+ * computing of bitreflected CRC-32 checksums.
c55b40
+ *
c55b40
+ * This CRC-32 implementation algorithm is bitreflected and processes
c55b40
+ * the least-significant bit first (Little-Endian).
c55b40
+ *
c55b40
+ * This code was originally written by Hendrik Brueckner
c55b40
+ * <brueckner@linux.vnet.ibm.com> for use in the Linux kernel and has been
c55b40
+ * relicensed under the zlib license.
c55b40
+ */
c55b40
+
c55b40
+#include "../../zutil.h"
c55b40
+
c55b40
+#include <stdint.h>
c55b40
+#include <vecintrin.h>
c55b40
+
c55b40
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
c55b40
+typedef unsigned int uv4si __attribute__((vector_size(16)));
c55b40
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
c55b40
+
c55b40
+uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) {
c55b40
+    /*
c55b40
+     * The CRC-32 constant block contains reduction constants to fold and
c55b40
+     * process particular chunks of the input data stream in parallel.
c55b40
+     *
c55b40
+     * For the CRC-32 variants, the constants are precomputed according to
c55b40
+     * these definitions:
c55b40
+     *
c55b40
+     *      R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
c55b40
+     *      R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
c55b40
+     *      R3 = [(x128+32 mod P'(x) << 32)]'   << 1
c55b40
+     *      R4 = [(x128-32 mod P'(x) << 32)]'   << 1
c55b40
+     *      R5 = [(x64 mod P'(x) << 32)]'       << 1
c55b40
+     *      R6 = [(x32 mod P'(x) << 32)]'       << 1
c55b40
+     *
c55b40
+     *      The bitreflected Barret reduction constant, u', is defined as
c55b40
+     *      the bit reversal of floor(x**64 / P(x)).
c55b40
+     *
c55b40
+     *      where P(x) is the polynomial in the normal domain and the P'(x) is the
c55b40
+     *      polynomial in the reversed (bitreflected) domain.
c55b40
+     *
c55b40
+     * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
c55b40
+     *
c55b40
+     *      P(x)  = 0x04C11DB7
c55b40
+     *      P'(x) = 0xEDB88320
c55b40
+     */
c55b40
+    const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};  /* BE->LE mask */
c55b40
+    const uv2di r2r1 = {0x1C6E41596, 0x154442BD4};                                     /* R2, R1 */
c55b40
+    const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0};                                     /* R4, R3 */
c55b40
+    const uv2di r5 = {0, 0x163CD6124};                                                 /* R5 */
c55b40
+    const uv2di ru_poly = {0, 0x1F7011641};                                            /* u' */
c55b40
+    const uv2di crc_poly = {0, 0x1DB710641};                                           /* P'(x) << 1 */
c55b40
+
c55b40
+    /*
c55b40
+     * Load the initial CRC value.
c55b40
+     *
c55b40
+     * The CRC value is loaded into the rightmost word of the
c55b40
+     * vector register and is later XORed with the LSB portion
c55b40
+     * of the loaded input data.
c55b40
+     */
c55b40
+    uv2di v0 = {0, 0};
c55b40
+    v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3);
c55b40
+
c55b40
+    /* Load a 64-byte data chunk and XOR with CRC */
c55b40
+    uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be);
c55b40
+    uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be);
c55b40
+    uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be);
c55b40
+    uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be);
c55b40
+
c55b40
+    v1 ^= v0;
c55b40
+    buf += 64;
c55b40
+    len -= 64;
c55b40
+
c55b40
+    while (len >= 64) {
c55b40
+        /* Load the next 64-byte data chunk */
c55b40
+        uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be);
c55b40
+        uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be);
c55b40
+        uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be);
c55b40
+        uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be);
c55b40
+
c55b40
+        /*
c55b40
+         * Perform a GF(2) multiplication of the doublewords in V1 with
c55b40
+         * the R1 and R2 reduction constants in V0.  The intermediate result
c55b40
+         * is then folded (accumulated) with the next data chunk in PART1 and
c55b40
+         * stored in V1. Repeat this step for the register contents
c55b40
+         * in V2, V3, and V4 respectively.
c55b40
+         */
c55b40
+        v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1);
c55b40
+        v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2);
c55b40
+        v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3);
c55b40
+        v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4);
c55b40
+
c55b40
+        buf += 64;
c55b40
+        len -= 64;
c55b40
+    }
c55b40
+
c55b40
+    /*
c55b40
+     * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
c55b40
+     * and R4 and accumulating the next 128-bit chunk until a single 128-bit
c55b40
+     * value remains.
c55b40
+     */
c55b40
+    v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
c55b40
+    v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3);
c55b40
+    v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4);
c55b40
+
c55b40
+    while (len >= 16) {
c55b40
+        /* Load next data chunk */
c55b40
+        v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be);
c55b40
+
c55b40
+        /* Fold next data chunk */
c55b40
+        v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
c55b40
+
c55b40
+        buf += 16;
c55b40
+        len -= 16;
c55b40
+    }
c55b40
+
c55b40
+    /*
c55b40
+     * Set up a vector register for byte shifts.  The shift value must
c55b40
+     * be loaded in bits 1-4 in byte element 7 of a vector register.
c55b40
+     * Shift by 8 bytes: 0x40
c55b40
+     * Shift by 4 bytes: 0x20
c55b40
+     */
c55b40
+    uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
c55b40
+    v9 = vec_insert((unsigned char)0x40, v9, 7);
c55b40
+
c55b40
+    /*
c55b40
+     * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
c55b40
+     * to move R4 into the rightmost doubleword and set the leftmost
c55b40
+     * doubleword to 0x1.
c55b40
+     */
c55b40
+    v0 = vec_srb(r4r3, (uv2di)v9);
c55b40
+    v0[0] = 1;
c55b40
+
c55b40
+    /*
c55b40
+     * Compute GF(2) product of V1 and V0.  The rightmost doubleword
c55b40
+     * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
c55b40
+     * multiplied by 0x1 and is then XORed with rightmost product.
c55b40
+     * Implicitly, the intermediate leftmost product becomes padded
c55b40
+     */
c55b40
+    v1 = (uv2di)vec_gfmsum_128(v0, v1);
c55b40
+
c55b40
+    /*
c55b40
+     * Now do the final 32-bit fold by multiplying the rightmost word
c55b40
+     * in V1 with R5 and XOR the result with the remaining bits in V1.
c55b40
+     *
c55b40
+     * To achieve this by a single VGFMAG, right shift V1 by a word
c55b40
+     * and store the result in V2 which is then accumulated.  Use the
c55b40
+     * vector unpack instruction to load the rightmost half of the
c55b40
+     * doubleword into the rightmost doubleword element of V1; the other
c55b40
+     * half is loaded in the leftmost doubleword.
c55b40
+     * The vector register with CONST_R5 contains the R5 constant in the
c55b40
+     * rightmost doubleword and the leftmost doubleword is zero to ignore
c55b40
+     * the leftmost product of V1.
c55b40
+     */
c55b40
+    v9 = vec_insert((unsigned char)0x20, v9, 7);
c55b40
+    v2 = vec_srb(v1, (uv2di)v9);
c55b40
+    v1 = vec_unpackl((uv4si)v1);  /* Split rightmost doubleword */
c55b40
+    v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2);
c55b40
+
c55b40
+    /*
c55b40
+     * Apply a Barret reduction to compute the final 32-bit CRC value.
c55b40
+     *
c55b40
+     * The input values to the Barret reduction are the degree-63 polynomial
c55b40
+     * in V1 (R(x)), degree-32 generator polynomial, and the reduction
c55b40
+     * constant u.  The Barret reduction result is the CRC value of R(x) mod
c55b40
+     * P(x).
c55b40
+     *
c55b40
+     * The Barret reduction algorithm is defined as:
c55b40
+     *
c55b40
+     *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
c55b40
+     *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
c55b40
+     *    3. C(x)  = R(x) XOR T2(x) mod x^32
c55b40
+     *
c55b40
+     *  Note: The leftmost doubleword of vector register containing
c55b40
+     *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
c55b40
+     *  is zero and does not contribute to the final result.
c55b40
+     */
c55b40
+
c55b40
+    /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
c55b40
+    v2 = vec_unpackl((uv4si)v1);
c55b40
+    v2 = (uv2di)vec_gfmsum_128(ru_poly, v2);
c55b40
+
c55b40
+    /*
c55b40
+     * Compute the GF(2) product of the CRC polynomial with T1(x) in
c55b40
+     * V2 and XOR the intermediate result, T2(x), with the value in V1.
c55b40
+     * The final result is stored in word element 2 of V2.
c55b40
+     */
c55b40
+    v2 = vec_unpackl((uv4si)v2);
c55b40
+    v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1);
c55b40
+
c55b40
+    return ((uv4si)v2)[2];
c55b40
+}
c55b40
diff --git a/contrib/s390/crc32_z_resolver.c b/contrib/s390/crc32_z_resolver.c
c55b40
new file mode 100644
c55b40
index 0000000..9749cab
c55b40
--- /dev/null
c55b40
+++ b/contrib/s390/crc32_z_resolver.c
c55b40
@@ -0,0 +1,41 @@
c55b40
+#include <sys/auxv.h>
c55b40
+#include "../gcc/zifunc.h"
c55b40
+
c55b40
+#define VX_MIN_LEN 64
c55b40
+#define VX_ALIGNMENT 16L
c55b40
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
c55b40
+
c55b40
+unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len);
c55b40
+
c55b40
+local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len)
c55b40
+{
c55b40
+    uintptr_t prealign, aligned, remaining;
c55b40
+
c55b40
+    if (buf == Z_NULL) return 0UL;
c55b40
+
c55b40
+    if (len < VX_MIN_LEN + VX_ALIGN_MASK)
c55b40
+        return crc32_z_default(crc, buf, len);
c55b40
+
c55b40
+    if ((uintptr_t)buf & VX_ALIGN_MASK) {
c55b40
+        prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK);
c55b40
+        len -= prealign;
c55b40
+        crc = crc32_z_default(crc, buf, prealign);
c55b40
+        buf += prealign;
c55b40
+    }
c55b40
+    aligned = len & ~VX_ALIGN_MASK;
c55b40
+    remaining = len & VX_ALIGN_MASK;
c55b40
+
c55b40
+    crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff;
c55b40
+
c55b40
+    if (remaining)
c55b40
+        crc = crc32_z_default(crc, buf + aligned, remaining);
c55b40
+
c55b40
+    return crc;
c55b40
+}
c55b40
+
c55b40
+Z_IFUNC(crc32_z)
c55b40
+{
c55b40
+    if (hwcap & HWCAP_S390_VX)
c55b40
+        return s390_crc32_vx;
c55b40
+    return crc32_z_default;
c55b40
+}
c55b40
diff --git a/crc32.c b/crc32.c
c55b40
index b0cda20..379fac3 100644
c55b40
--- a/crc32.c
c55b40
+++ b/crc32.c
c55b40
@@ -199,12 +199,12 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
c55b40
 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
c55b40
 
c55b40
 /* ========================================================================= */
c55b40
-#ifdef Z_POWER_OPT
c55b40
+#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX)
c55b40
 /* Rename function so resolver can use its symbol. The default version will be
c55b40
  * returned by the resolver if the host has no support for an optimized version.
c55b40
  */
c55b40
 #define crc32_z crc32_z_default
c55b40
-#endif /* Z_POWER_OPT */
c55b40
+#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */
c55b40
 
c55b40
 unsigned long ZEXPORT crc32_z(crc, buf, len)
c55b40
     unsigned long crc;
c55b40
@@ -240,10 +240,15 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
c55b40
     return crc ^ 0xffffffffUL;
c55b40
 }
c55b40
 
c55b40
-#ifdef Z_POWER_OPT
c55b40
+#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX)
c55b40
 #undef crc32_z
c55b40
+#ifdef Z_POWER_OPT
c55b40
 #include "contrib/power/crc32_z_resolver.c"
c55b40
 #endif /* Z_POWER_OPT */
c55b40
+#ifdef HAVE_S390X_VX
c55b40
+#include "contrib/s390/crc32_z_resolver.c"
c55b40
+#endif /* HAVE_S390X_VX */
c55b40
+#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */
c55b40
 
c55b40
 /* ========================================================================= */
c55b40
 unsigned long ZEXPORT crc32(crc, buf, len)
c55b40
-- 
c55b40
2.39.1
c55b40