From 4e3973d3cf048d489f44be4b806eec05f2751480 Mon Sep 17 00:00:00 2001 From: aekoroglu Date: Jan 19 2023 17:15:42 +0000 Subject: hyperscale c9s intel branch --- diff --git a/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch b/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch index 27454ab..3db5676 100644 --- a/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch +++ b/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch @@ -1,6 +1,3 @@ -Source from https://gitlab.com/redhat/centos-stream/rpms/zlib/-/merge_requests/9 -Author: Ilya Leoshkevich - --- a/compress.c +++ b/compress.c @@ -5,9 +5,15 @@ diff --git a/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-inflate-small-window.patch b/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-inflate-small-window.patch new file mode 100644 index 0000000..b39d839 --- /dev/null +++ b/SOURCES/zlib-1.2.11-IBM-Z-hw-accelrated-inflate-small-window.patch @@ -0,0 +1,299 @@ +--- a/contrib/s390/dfltcc.c ++++ b/contrib/s390/dfltcc.c +@@ -539,10 +539,6 @@ int ZLIB_INTERNAL dfltcc_can_inflate(strm) + struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; + struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); + +- /* Unsupported compression settings */ +- if (state->wbits != HB_BITS) +- return 0; +- + /* Unsupported hardware */ + return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && + is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); +@@ -606,8 +602,6 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret) + /* Translate stream to parameter block */ + param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; + param->sbb = state->bits; +- param->hl = state->whave; /* Software and hardware history formats match */ +- param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); + if (param->hl) + param->nt = 0; /* Honor history for the first block */ + param->cv = state->flags ? ZSWAP32(state->check) : state->check; +@@ -621,8 +615,6 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret) + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->last = cc == DFLTCC_CC_OK; + state->bits = param->sbb; +- state->whave = param->hl; +- state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); + strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* Report an error if stream is corrupted */ +@@ -644,11 +636,52 @@ int ZLIB_INTERNAL dfltcc_was_inflate_used(strm) + return !param->nt; + } + ++/* ++ Rotates a circular buffer. ++ The implementation is based on https://cplusplus.com/reference/algorithm/rotate/ ++ */ ++local void rotate OF((Bytef *start, Bytef *pivot, Bytef *end)); ++local void rotate(start, pivot, end) ++ Bytef *start; ++ Bytef *pivot; ++ Bytef *end; ++{ ++ Bytef *p = pivot; ++ Bytef tmp; ++ ++ while (p != start) { ++ tmp = *start; ++ *start = *p; ++ *p = tmp; ++ ++ start++; ++ p++; ++ ++ if (p == end) ++ p = pivot; ++ else if (start == pivot) ++ pivot = p; ++ } ++} ++ ++#define MIN(x, y) ({ \ ++ typeof(x) _x = (x); \ ++ typeof(y) _y = (y); \ ++ _x < _y ? _x : _y; \ ++}) ++ ++#define MAX(x, y) ({ \ ++ typeof(x) _x = (x); \ ++ typeof(y) _y = (y); \ ++ _x > _y ? _x : _y; \ ++}) ++ + int ZLIB_INTERNAL dfltcc_inflate_disable(strm) + z_streamp strm; + { + struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; + struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (!dfltcc_can_inflate(strm)) + return 0; +@@ -660,6 +693,9 @@ int ZLIB_INTERNAL dfltcc_inflate_disable(strm) + return 1; + /* DFLTCC was not used yet - decompress in software */ + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ /* Convert the window from the hardware to the software format */ ++ rotate(state->window, state->window + param->ho, state->window + HB_SIZE); ++ state->whave = state->wnext = MIN(param->hl, state->wsize); + return 0; + } + +@@ -830,9 +866,9 @@ voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size) + voidpf p, w; + + /* To simplify freeing, we store the pointer to the allocated buffer right +- * before the window. ++ * before the window. Note that DFLTCC always uses HB_SIZE bytes. + */ +- p = ZALLOC(strm, sizeof(voidpf) + items * size + PAGE_ALIGN, ++ p = ZALLOC(strm, sizeof(voidpf) + MAX(items * size, HB_SIZE) + PAGE_ALIGN, + sizeof(unsigned char)); + if (p == NULL) + return NULL; +@@ -841,6 +877,14 @@ voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size) + return w; + } + ++void ZLIB_INTERNAL dfltcc_copy_window(dest, src, n) ++ void *dest; ++ const void *src; ++ size_t n; ++{ ++ memcpy(dest, src, MAX(n, HB_SIZE)); ++} ++ + void ZLIB_INTERNAL dfltcc_free_window(strm, w) + z_streamp strm; + voidpf w; +@@ -951,6 +995,24 @@ local void append_history(param, history, buf, count) + } + } + ++local void get_history OF((struct dfltcc_param_v0 FAR *param, ++ const Bytef *history, ++ Bytef *buf)); ++local void get_history(param, history, buf) ++ struct dfltcc_param_v0 FAR *param; ++ const Bytef *history; ++ Bytef *buf; ++{ ++ if (param->ho + param->hl <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ memcpy(buf, history + param->ho, param->hl); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ memcpy(buf, history + param->ho, HB_SIZE - param->ho); ++ memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); ++ } ++} ++ + int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length) + z_streamp strm; + const Bytef *dictionary; +@@ -975,20 +1037,43 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(strm, dictionary, dict_length) + struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; + +- if (dictionary) { +- if (param->ho + param->hl <= HB_SIZE) +- /* Circular history buffer does not wrap - copy one chunk */ +- zmemcpy(dictionary, state->window + param->ho, param->hl); +- else { +- /* Circular history buffer wraps - copy two chunks */ +- zmemcpy(dictionary, +- state->window + param->ho, +- HB_SIZE - param->ho); +- zmemcpy(dictionary + HB_SIZE - param->ho, +- state->window, +- param->ho + param->hl - HB_SIZE); +- } ++ if (dictionary) ++ get_history(param, state->window, dictionary); ++ if (dict_length) ++ *dict_length = param->hl; ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_inflate_set_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ const Bytef *dictionary; ++ uInt dict_length; ++{ ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (inflate_ensure_window(state)) { ++ state->mode = MEM; ++ return Z_MEM_ERROR; + } ++ ++ append_history(param, state->window, dictionary, dict_length); ++ state->havedict = 1; ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ Bytef *dictionary; ++ uInt *dict_length; ++{ ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (dictionary && state->window) ++ get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +--- a/contrib/s390/dfltcc.h ++++ b/contrib/s390/dfltcc.h +@@ -11,6 +11,8 @@ void ZLIB_INTERNAL dfltcc_copy_state OF((voidpf dst, const voidpf src, + void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size)); + voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items, + uInt size)); ++void ZLIB_INTERNAL dfltcc_copy_window OF((void *dest, const void *src, ++ size_t n)); + void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w)); + #define DFLTCC_BLOCK_HEADER_BITS 3 + #define DFLTCC_HLITS_COUNT_BITS 5 +@@ -44,11 +46,18 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate OF((z_streamp strm, + int flush, int *ret)); + int ZLIB_INTERNAL dfltcc_was_inflate_used OF((z_streamp strm)); + int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_inflate_set_dictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dict_length)); ++int ZLIB_INTERNAL dfltcc_inflate_get_dictionary OF((z_streamp strm, ++ Bytef *dictionary, ++ uInt* dict_length)); + + #define ZALLOC_STATE dfltcc_alloc_state + #define ZFREE_STATE ZFREE + #define ZCOPY_STATE dfltcc_copy_state + #define ZALLOC_WINDOW dfltcc_alloc_window ++#define ZCOPY_WINDOW dfltcc_copy_window + #define ZFREE_WINDOW dfltcc_free_window + #define TRY_FREE_WINDOW dfltcc_free_window + #define INFLATE_RESET_KEEP_HOOK(strm) \ +@@ -77,5 +86,15 @@ int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm)); + do { \ + if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \ + } while (0) ++#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_inflate(strm)) \ ++ return dfltcc_inflate_set_dictionary(strm, dict, dict_len); \ ++ } while (0) ++#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_inflate(strm)) \ ++ return dfltcc_inflate_get_dictionary(strm, dict, dict_len); \ ++ } while (0) + + #endif +\ No newline at end of file +diff --git a/inflate.c b/inflate.c +index 3750152..a0e2169 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -93,6 +93,7 @@ + #define ZFREE_STATE ZFREE + #define ZCOPY_STATE zmemcpy + #define ZALLOC_WINDOW ZALLOC ++#define ZCOPY_WINDOW zmemcpy + #define ZFREE_WINDOW ZFREE + #define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) + #define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) +@@ -101,6 +102,8 @@ + #define INFLATE_NEED_UPDATEWINDOW(strm) 1 + #define INFLATE_MARK_HOOK(strm) do {} while (0) + #define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) ++#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) + #endif + + #ifdef MAKEFIXED +@@ -1330,6 +1333,8 @@ uInt *dictLength; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + ++ INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); ++ + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, +@@ -1365,6 +1370,8 @@ uInt dictLength; + return Z_DATA_ERROR; + } + ++ INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); ++ + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); +@@ -1529,8 +1536,7 @@ z_streamp source; + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { +- wsize = 1U << state->wbits; +- zmemcpy(window, state->window, wsize); ++ ZCOPY_WINDOW(window, state->window, 1U << state->wbits); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; diff --git a/SOURCES/zlib-1.2.11-covscan-issues-rhel9.patch b/SOURCES/zlib-1.2.11-covscan-issues-rhel9.patch new file mode 100644 index 0000000..d692589 --- /dev/null +++ b/SOURCES/zlib-1.2.11-covscan-issues-rhel9.patch @@ -0,0 +1,65 @@ +From 1e56dd1e7285d3026092ba794078edb290b4c1b1 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Mon, 15 Mar 2021 13:06:35 +0100 +Subject: [PATCH] fixed isues found by covscan + +--- + contrib/minizip/mztools.c | 8 ++++++++ + contrib/minizip/zip.c | 4 ++-- + deflate.c | 5 +++-- + 3 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/contrib/minizip/mztools.c b/contrib/minizip/mztools.c +index 96891c2..1197928 100644 +--- a/contrib/minizip/mztools.c ++++ b/contrib/minizip/mztools.c +@@ -286,6 +286,14 @@ uLong* bytesRecovered; + } + } else { + err = Z_STREAM_ERROR; ++ if(fpZip != NULL) ++ fclose(fpZip); ++ ++ if(fpOut != NULL) ++ fclose(fpOut); ++ ++ if(fpOutCD != NULL) ++ fclose(fpOutCD); + } + return err; + } +diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c +index 44e88a9..a753c17 100644 +--- a/contrib/minizip/zip.c ++++ b/contrib/minizip/zip.c +@@ -526,8 +526,8 @@ local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_f + break; + } + +- if (uPosFound!=0) +- break; ++ if (uPosFound!=0) ++ break; + } + TRYFREE(buf); + return uPosFound; +diff --git a/deflate.c b/deflate.c +index 085abbe..3963e79 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -203,9 +203,10 @@ local const config configuration_table[10] = { + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +-#define CLEAR_HASH(s) \ ++#define CLEAR_HASH(s) do { \ + s->head[s->hash_size-1] = NIL; \ +- zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); ++ zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); \ ++} while (0) + + /* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 +-- +2.26.0 + diff --git a/SOURCES/zlib-1.2.11-s390x-vectorize-crc32.patch b/SOURCES/zlib-1.2.11-s390x-vectorize-crc32.patch new file mode 100644 index 0000000..71af7ad --- /dev/null +++ b/SOURCES/zlib-1.2.11-s390x-vectorize-crc32.patch @@ -0,0 +1,393 @@ +From 2dfdc5b7d6943c0ac60eef63e361e2a50f9da610 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Thu, 19 Mar 2020 11:52:03 +0100 +Subject: [PATCH] s390x: vectorize crc32 + +Use vector extensions when compiling for s390x and binutils knows +about them. At runtime, check whether kernel supports vector +extensions (it has to be not just the CPU, but also the kernel) and +choose between the regular and the vectorized implementations. +--- + Makefile.in | 9 ++ + configure | 28 ++++++ + contrib/s390/crc32-vx.c | 195 ++++++++++++++++++++++++++++++++++++++++ + crc32.c | 55 +++++++++++- + 4 files changed, 285 insertions(+), 2 deletions(-) + create mode 100644 contrib/s390/crc32-vx.c + +diff --git a/Makefile.in b/Makefile.in +index 6070dcc..9e9743b 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -29,6 +29,7 @@ LDFLAGS= + TEST_LDFLAGS=-L. libz.a + LDSHARED=$(CC) + CPP=$(CC) -E ++VGFMAFLAG= + + STATICLIB=libz.a + SHAREDLIB=libz.so +@@ -179,6 +180,9 @@ crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c + crc32.o: $(SRCDIR)crc32.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c + ++crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c ++ $(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c ++ + deflate.o: $(SRCDIR)deflate.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c + +@@ -234,6 +238,11 @@ crc32.lo: $(SRCDIR)crc32.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c + -@mv objs/crc32.o $@ + ++crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c ++ -@mv objs/crc32-vx.o $@ ++ + deflate.lo: $(SRCDIR)deflate.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c +diff --git a/configure b/configure +index 70ed86b..7941f75 100755 +--- a/configure ++++ b/configure +@@ -923,6 +923,32 @@ EOF + fi + fi + ++# check if we are compiling for s390 and binutils support vector extensions ++VGFMAFLAG=-march=z13 ++cat > $test.c <> configure.log + echo ALL = $ALL >> configure.log +@@ -955,6 +981,7 @@ echo mandir = $mandir >> configure.log + echo prefix = $prefix >> configure.log + echo sharedlibdir = $sharedlibdir >> configure.log + echo uname = $uname >> configure.log ++echo VGFMAFLAG = $VGFMAFLAG >> configure.log + + # udpate Makefile with the configure results + sed < ${SRCDIR}Makefile.in " +@@ -964,6 +991,7 @@ sed < ${SRCDIR}Makefile.in " + /^LDFLAGS *=/s#=.*#=$LDFLAGS# + /^LDSHARED *=/s#=.*#=$LDSHARED# + /^CPP *=/s#=.*#=$CPP# ++/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG# + /^STATICLIB *=/s#=.*#=$STATICLIB# + /^SHAREDLIB *=/s#=.*#=$SHAREDLIB# + /^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV# +diff --git a/contrib/s390/crc32-vx.c b/contrib/s390/crc32-vx.c +new file mode 100644 +index 0000000..fa5387c +--- /dev/null ++++ b/contrib/s390/crc32-vx.c +@@ -0,0 +1,195 @@ ++/* ++ * Hardware-accelerated CRC-32 variants for Linux on z Systems ++ * ++ * Use the z/Architecture Vector Extension Facility to accelerate the ++ * computing of bitreflected CRC-32 checksums. ++ * ++ * This CRC-32 implementation algorithm is bitreflected and processes ++ * the least-significant bit first (Little-Endian). ++ * ++ * This code was originally written by Hendrik Brueckner ++ * for use in the Linux kernel and has been ++ * relicensed under the zlib license. ++ */ ++ ++#include "../../zutil.h" ++ ++#include ++#include ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) { ++ /* ++ * The CRC-32 constant block contains reduction constants to fold and ++ * process particular chunks of the input data stream in parallel. ++ * ++ * For the CRC-32 variants, the constants are precomputed according to ++ * these definitions: ++ * ++ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 ++ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 ++ * R3 = [(x128+32 mod P'(x) << 32)]' << 1 ++ * R4 = [(x128-32 mod P'(x) << 32)]' << 1 ++ * R5 = [(x64 mod P'(x) << 32)]' << 1 ++ * R6 = [(x32 mod P'(x) << 32)]' << 1 ++ * ++ * The bitreflected Barret reduction constant, u', is defined as ++ * the bit reversal of floor(x**64 / P(x)). ++ * ++ * where P(x) is the polynomial in the normal domain and the P'(x) is the ++ * polynomial in the reversed (bitreflected) domain. ++ * ++ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: ++ * ++ * P(x) = 0x04C11DB7 ++ * P'(x) = 0xEDB88320 ++ */ ++ const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ ++ const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ ++ const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ ++ const uv2di r5 = {0, 0x163CD6124}; /* R5 */ ++ const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ ++ const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ ++ ++ /* ++ * Load the initial CRC value. ++ * ++ * The CRC value is loaded into the rightmost word of the ++ * vector register and is later XORed with the LSB portion ++ * of the loaded input data. ++ */ ++ uv2di v0 = {0, 0}; ++ v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); ++ ++ /* Load a 64-byte data chunk and XOR with CRC */ ++ uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); ++ uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); ++ uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); ++ uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); ++ ++ v1 ^= v0; ++ buf += 64; ++ len -= 64; ++ ++ while (len >= 64) { ++ /* Load the next 64-byte data chunk */ ++ uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); ++ uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); ++ uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); ++ uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); ++ ++ /* ++ * Perform a GF(2) multiplication of the doublewords in V1 with ++ * the R1 and R2 reduction constants in V0. The intermediate result ++ * is then folded (accumulated) with the next data chunk in PART1 and ++ * stored in V1. Repeat this step for the register contents ++ * in V2, V3, and V4 respectively. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); ++ v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); ++ v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); ++ v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); ++ ++ buf += 64; ++ len -= 64; ++ } ++ ++ /* ++ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 ++ * and R4 and accumulating the next 128-bit chunk until a single 128-bit ++ * value remains. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); ++ ++ while (len >= 16) { ++ /* Load next data chunk */ ++ v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); ++ ++ /* Fold next data chunk */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ ++ buf += 16; ++ len -= 16; ++ } ++ ++ /* ++ * Set up a vector register for byte shifts. The shift value must ++ * be loaded in bits 1-4 in byte element 7 of a vector register. ++ * Shift by 8 bytes: 0x40 ++ * Shift by 4 bytes: 0x20 ++ */ ++ uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ v9 = vec_insert((unsigned char)0x40, v9, 7); ++ ++ /* ++ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes ++ * to move R4 into the rightmost doubleword and set the leftmost ++ * doubleword to 0x1. ++ */ ++ v0 = vec_srb(r4r3, (uv2di)v9); ++ v0[0] = 1; ++ ++ /* ++ * Compute GF(2) product of V1 and V0. The rightmost doubleword ++ * of V1 is multiplied with R4. The leftmost doubleword of V1 is ++ * multiplied by 0x1 and is then XORed with rightmost product. ++ * Implicitly, the intermediate leftmost product becomes padded ++ */ ++ v1 = (uv2di)vec_gfmsum_128(v0, v1); ++ ++ /* ++ * Now do the final 32-bit fold by multiplying the rightmost word ++ * in V1 with R5 and XOR the result with the remaining bits in V1. ++ * ++ * To achieve this by a single VGFMAG, right shift V1 by a word ++ * and store the result in V2 which is then accumulated. Use the ++ * vector unpack instruction to load the rightmost half of the ++ * doubleword into the rightmost doubleword element of V1; the other ++ * half is loaded in the leftmost doubleword. ++ * The vector register with CONST_R5 contains the R5 constant in the ++ * rightmost doubleword and the leftmost doubleword is zero to ignore ++ * the leftmost product of V1. ++ */ ++ v9 = vec_insert((unsigned char)0x20, v9, 7); ++ v2 = vec_srb(v1, (uv2di)v9); ++ v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); ++ ++ /* ++ * Apply a Barret reduction to compute the final 32-bit CRC value. ++ * ++ * The input values to the Barret reduction are the degree-63 polynomial ++ * in V1 (R(x)), degree-32 generator polynomial, and the reduction ++ * constant u. The Barret reduction result is the CRC value of R(x) mod ++ * P(x). ++ * ++ * The Barret reduction algorithm is defined as: ++ * ++ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u ++ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) ++ * 3. C(x) = R(x) XOR T2(x) mod x^32 ++ * ++ * Note: The leftmost doubleword of vector register containing ++ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product ++ * is zero and does not contribute to the final result. ++ */ ++ ++ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ ++ v2 = vec_unpackl((uv4si)v1); ++ v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); ++ ++ /* ++ * Compute the GF(2) product of the CRC polynomial with T1(x) in ++ * V2 and XOR the intermediate result, T2(x), with the value in V1. ++ * The final result is stored in word element 2 of V2. ++ */ ++ v2 = vec_unpackl((uv4si)v2); ++ v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); ++ ++ return ((uv4si)v2)[2]; ++} +diff --git a/crc32.c b/crc32.c +index 34132ea..dfa33ef 100644 +--- a/crc32.c ++++ b/crc32.c +@@ -252,12 +252,54 @@ unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t); + #endif + #endif + ++#ifdef HAVE_S390X_VX ++#include ++ ++#define VX_MIN_LEN 64 ++#define VX_ALIGNMENT 16L ++#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) ++ ++unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len); ++ ++local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len) ++{ ++ uint64_t prealign, aligned, remaining; ++ ++ if (buf == Z_NULL) return 0UL; ++ ++ if (len < VX_MIN_LEN + VX_ALIGN_MASK) ++ return crc32_big(crc, buf, len); ++ ++ if ((uintptr_t)buf & VX_ALIGN_MASK) { ++ prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); ++ len -= prealign; ++ crc = crc32_big(crc, buf, prealign); ++ buf += prealign; ++ } ++ aligned = len & ~VX_ALIGN_MASK; ++ remaining = len & VX_ALIGN_MASK; ++ ++ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff; ++ ++ if (remaining) ++ crc = crc32_big(crc, buf + aligned, remaining); ++ ++ return crc; ++} ++#endif ++ + /* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to + * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */ + #ifndef Z_IFUNC_ASM + local + #endif +-unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) ++unsigned long (*(crc32_z_ifunc( ++#ifdef __s390__ ++unsigned long hwcap ++#else ++void ++#endif ++)))(unsigned long, const unsigned char FAR *, z_size_t) + { + #if _ARCH_PWR8==1 + #if defined(__BUILTIN_CPU_SUPPORTS__) +@@ -269,6 +311,11 @@ unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, + #endif + #endif /* _ARCH_PWR8 */ + ++#ifdef HAVE_S390X_VX ++ if (hwcap & HWCAP_S390_VX) ++ return s390_crc32_vx; ++#endif ++ + /* return a function pointer for optimized arches here */ + + #ifdef DYNAMIC_CRC_TABLE +@@ -301,7 +348,11 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) + static unsigned long ZEXPORT (*crc32_func)(unsigned long, const unsigned char FAR *, z_size_t) = NULL; + + if (!crc32_func) +- crc32_func = crc32_z_ifunc(); ++ crc32_func = crc32_z_ifunc( ++#ifdef __s390__ ++ getauxval(AT_HWCAP) ++#endif ++ ); + return (*crc32_func)(crc, buf, len); + } + +-- +2.25.1 + diff --git a/SPECS/zlib.spec b/SPECS/zlib.spec index 436767a..629d49f 100644 --- a/SPECS/zlib.spec +++ b/SPECS/zlib.spec @@ -1,52 +1,62 @@ -# disabled, per rhbz#1609830 and rhbz#1602742 -%bcond_with minizip +%bcond_without minizip Name: zlib Version: 1.2.11 -Release: 22%{?dist} -Summary: The compression and decompression library +Release: 37%{?dist} +Summary: Compression and decompression library # /contrib/dotzlib/ have Boost license License: zlib and Boost -URL: http://www.zlib.net/ +URL: https://www.zlib.net/ -Source: https://www.zlib.net/fossils/zlib-%{version}.tar.gz +Source: https://www.zlib.net/zlib-%{version}.tar.xz # https://github.com/madler/zlib/pull/210 Patch0: zlib-1.2.5-minizip-fixuncrypt.patch # resolves: #805113 Patch1: zlib-1.2.11-optimized-s390.patch # IBM Z optimalizations -Patch2: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch +Patch7: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch # IBM CRC32 optimalization for POWER archs -Patch3: zlib-1.2.11-optimized-CRC32-framework.patch +Patch8: zlib-1.2.11-optimized-CRC32-framework.patch # fixed firefox crash + added test case -Patch4: zlib-1.2.11-firefox-crash-fix.patch +Patch9: zlib-1.2.11-firefox-crash-fix.patch # fixed covscan issues -Patch5: zlib-1.2.11-covscan-issues.patch +Patch10: zlib-1.2.11-covscan-issues.patch # fix for IBM Z optimalizations -Patch6: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch +Patch11: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch # permit a deflateParams() parameter change -Patch7: zlib-1.2.11-permit-deflateParams-change.patch +Patch12: zlib-1.2.11-permit-deflateParams-change.patch # fixed DFLTCC compression level switching issues # enabled HW compression for compression levels 1 through 6 -Patch8: zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch +Patch13: zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch # fixed inflateSyncPoint() bad return value on z15 -Patch9: zlib-1.2.11-inflateSyncPoint-return-value-fix.patch -Patch10: zlib-1.2.11-CVE-2018-25032.patch -# Fix the compressBound() on z15 -Patch11: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch +Patch14: zlib-1.2.11-inflateSyncPoint-return-value-fix.patch +# fixed issues found by covscan for rhel-9 +# ref: https://github.com/madler/zlib/pull/554 +Patch15: zlib-1.2.11-covscan-issues-rhel9.patch +# Fix for s390x vectorize CRC32 +Patch16: zlib-1.2.11-s390x-vectorize-crc32.patch +# fix for IBM Z optimalizations +Patch17: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch +Patch18: zlib-1.2.11-CVE-2018-25032.patch -# Fix CVE-2022-37434 -Patch12: zlib-1.2.11-cve-2022-37434.patch -Patch13: zlib-1.2.11-cve-2022-37434_2.patch +# Fix for CVE-2022-37434 +Patch19: zlib-1.2.11-cve-2022-37434.patch +Patch20: zlib-1.2.11-cve-2022-37434_2.patch # Fix setting strm.adler on z15 -Patch14: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch +Patch21: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch + +# Optimization for z15 +Patch22: zlib-1.2.11-IBM-Z-hw-accelrated-inflate-small-window.patch # Intel slide hash optimization for x86_64 arch Patch100: zlib-1.2.11-x86_64-accelrated-slide-hash.patch +BuildRequires: make BuildRequires: automake, autoconf, libtool +%global __provides_exclude_from ^%{_libdir}/pkgconfig/minizip\\.pc$ + %description Zlib is a general-purpose, patent-free, lossless data compression library which is used by many different programs. @@ -73,20 +83,21 @@ decompression library. %if %{with minizip} -%package -n minizip +%package -n minizip-compat Summary: Library for manipulation with .zip archives Requires: %{name}%{?_isa} = %{version}-%{release} -%description -n minizip +%description -n minizip-compat Minizip is a library for manipulation with files from .zip archives. -%package -n minizip-devel +%package -n minizip-compat-devel Summary: Development files for the minizip library -Requires: minizip%{?_isa} = %{version}-%{release} +Requires: minizip-compat%{?_isa} = %{version}-%{release} Requires: %{name}-devel%{?_isa} = %{version}-%{release} +Conflicts: minizip-devel -%description -n minizip-devel +%description -n minizip-compat-devel This package contains the libraries and header files needed for developing applications which use minizip. %endif @@ -98,11 +109,6 @@ developing applications which use minizip. %ifarch s390 s390x %patch1 -p1 -b .optimized-deflate %endif -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 %patch7 -p1 %patch8 -p1 %patch9 -p1 @@ -111,28 +117,30 @@ developing applications which use minizip. %patch12 -p1 %patch13 -p1 %patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 %patch100 -p1 + iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp mv ChangeLog.tmp ChangeLog %build export CFLAGS="$RPM_OPT_FLAGS" -%ifarch ppc64 -CFLAGS+=" -O3" -%endif export LDFLAGS="$LDFLAGS -Wl,-z,relro -Wl,-z,now" # no-autotools, %%configure is not compatible %ifarch s390 s390x -./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} --dfltcc -%else -%ifarch x86_64 -./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} --enable-sse_slide + ./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} --dfltcc %else -./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} -%endif + ./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} %endif %make_build @@ -181,12 +189,12 @@ find $RPM_BUILD_ROOT -name '*.la' -delete %if %{with minizip} -%files -n minizip +%files -n minizip-compat %doc contrib/minizip/MiniZip64_info.txt contrib/minizip/MiniZip64_Changes.txt %{_libdir}/libminizip.so.* -%files -n minizip-devel +%files -n minizip-compat-devel %dir %{_includedir}/minizip %{_includedir}/minizip/*.h %{_libdir}/libminizip.so @@ -195,58 +203,102 @@ find $RPM_BUILD_ROOT -name '*.la' -delete %changelog -* Wed Oct 12 2022 Ilya Leoshkevich - 1.2.11-22 +* Thu Jan 19 2023 Ali Erdinc Koroglu 1.2.11-37 +- Intel SSE slide hash optimization for x86_64 + +* Mon Dec 19 2022 Ilya Leoshkevich - 1.2.11-36 +- Inflate small window optimization for IBM z15 rhbz#2154775 + +* Wed Oct 12 2022 Ilya Leoshkevich - 1.2.11-35 - Fix for IBM strm.adler rhbz#2134074 -* Tue Aug 09 2022 Matej Mužila - 1.2.11-21 +* Wed Aug 10 2022 Matej Mužila - 1.2.11-34 - Fix heap-based buffer over-read or buffer overflow in inflate in inflate.c - Resolves: CVE-2022-37434 -* Mon May 16 2022 Lukas Javorsky - 1.2.11-20 -- Apply IBM patch for compressBound() function -- Source from https://github.com/madler/zlib/issues/410#issuecomment-947212824 -- Resolves: #2056900 +* Mon Apr 25 2022 Matej Mužila - 1.2.11-33 +- Fix CVE-2018-25032 + Resolves: CVE-2018-25032 + +* Tue Mar 01 2022 Ilya Leoshkevich - 1.2.11-32 +- Fix for IBM compressBound() rhbz#2056899 + +* Tue Aug 10 2021 Mohan Boddu - 1.2.11-31 +- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags + Related: rhbz#1991688 -* Tue May 03 2022 Ali Erdinc Koroglu 1.2.11-19 -- Intel SSE2 optimized slide_hash +* Tue Jul 20 2021 Ondrej Dubaj - 1.2.11-30 +- Fix for IBM CRC32 optimalization rhbz#1959423 -* Tue Mar 29 2022 Matej Mužila - 1.2.11-18 -- Resolves: CVE-2018-25032 +* Thu Jul 15 2021 Ondrej Dubaj - 1.2.11-29 +- Missing RPM_OPT_FLAGS in CFLAGS (#1972057) -* Mon Jun 15 2020 Ondrej Dubaj - 1.2.11-17 -- Fixed DFLTCC compression level switching issues (#1875492) -- Enabled HW compression for compression levels 1 through 6 (#1847438) -- Fixed inflateSyncPoint() bad return value on z15 (#1888930) +* Thu Jun 03 2021 Patrik Novotný - 1.2.11-28 +- IBM CRC32 optimalization rhbz#1959423 +- Enabled Z hardware-accelerated deflate for compression levels 1 through 6 (#1972057) -* Mon Jun 15 2020 Ondrej Dubaj - 1.2.11-16 -- Permit a deflateParams() parameter change -- Another fix for Z hardware-accelerated deflate for s390x architectures -- according to previous change by upstream +* Fri Apr 16 2021 Mohan Boddu - 1.2.11-27 +- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937 -* Mon Jun 15 2020 Ondrej Dubaj - 1.2.11-15 -- Another fix for Z hardware-accelerated deflate for s390x architectures +* Wed Mar 31 2021 Ondrej Dubaj - 1.2.11-26 +- fixed covscan issues for rhel-9 -* Mon May 25 2020 Ondrej Dubaj - 1.2.11-14 -- Fix for Z hardware-accelerated deflate for s390x architectures +* Fri Feb 12 2021 Michal Schorm - 1.2.11-25 +- Remove ancient PPC64 hack +- Remove aarch64 optimalizations (#1936823) -* Tue Oct 29 2019 Ondrej Dubaj - 1.2.11-13 +* Thu Jan 28 2021 Fedora Release Engineering - 1.2.11-24 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild + +* Wed Nov 18 2020 Ondrej Dubaj - 1.2.11-23 +- backport IBM Z updates to fedora + +* Wed Jul 29 2020 Fedora Release Engineering - 1.2.11-22 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild + +* Fri Jan 31 2020 Fedora Release Engineering - 1.2.11-21 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild + +* Tue Oct 29 2019 Ondrej Dubaj - 1.2.11-20 - Added -DDFLTCC parameter to configure to enable - Z hardware-accelerated deflate for s390x architectures (#1659433) -* Tue Oct 15 2019 Ondrej Dubaj - 1.2.11-12 -- fixed covscan issues +* Thu Sep 05 2019 Ondrej Dubaj - 1.2.11-19 +- IBM CRC32 optimalization for POWER 8+ architectures re-add +- fixed firefox crash duer to zlib (#1741266) +- added test for crc32 + +* Thu Aug 15 2019 Ondrej Dubaj - 1.2.11-18 +- IBM CRC32 optimalization for POWER 8+ architectures revert -* Mon Oct 14 2019 Ondrej Dubaj - 1.2.11-11 +* Thu Aug 01 2019 Ondrej Dubaj - 1.2.11-17 - IBM Z hardware-accelerated deflate for s390x architectures - IBM CRC32 optimalization for POWER 8+ architectures -- fixed firefox crash due to zlib (#1741266) -- added test for crc32 + +* Sat Jul 27 2019 Fedora Release Engineering - 1.2.11-16 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_31_Mass_Rebuild + +* Sun Feb 03 2019 Fedora Release Engineering - 1.2.11-15 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_30_Mass_Rebuild + +* Tue Oct 2 2018 Peter Robinson 1.2.11-14 +- Bump build + +* Tue Sep 18 2018 Peter Robinson 1.2.11-13 +- Revert aarch64 neon inflate optimisation + +* Wed Aug 29 2018 Patrik Novotný - 1.2.11-12 +- Rename minizip and minizip-devel to minizip-compat and minizip-compat-devel respectively + +* Thu Aug 23 2018 Patrik Novotný - 1.2.11-11 +- Provides minizip-compat and minizip-compat-devel * Fri Aug 03 2018 Pavel Raiskup - 1.2.11-10 -- sync with fedora rawhide +- add %%bcond for minizip +- use %%make_* macros -* Fri May 25 2018 Pavel Raiskup - 1.2.11-9 -- revert previous aarch64 changes (rhbz#1582444, rhbz#1578798) +* Sat Jul 14 2018 Fedora Release Engineering - 1.2.11-9 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild * Mon Apr 30 2018 Peter Robinson 1.2.11-8 - Optimisations for aarch64