From e6944ddbd27e591f57bb38be2c0b587390d7097f Mon Sep 17 00:00:00 2001 From: aekoroglu Date: Apr 25 2022 18:52:48 +0000 Subject: hyperscale intel branch --- diff --git a/SOURCES/zlib-1.2.11-optimize-fill_window.patch b/SOURCES/zlib-1.2.11-optimize-fill_window.patch new file mode 100644 index 0000000..c9c362a --- /dev/null +++ b/SOURCES/zlib-1.2.11-optimize-fill_window.patch @@ -0,0 +1,134 @@ +--- a/configure 2022-04-19 17:46:39.589212290 +0300 ++++ b/configure 2022-04-19 17:48:26.737818784 +0300 +@@ -115,6 +115,7 @@ + echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log + echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log + echo ' [--dfltcc]' | tee -a configure.log ++ echo ' [--enable-sse_slide]' | tee -a configure.log + exit 0 ;; + -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; + -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; +@@ -144,6 +145,12 @@ + PIC_OBJC="$PIC_OBJC dfltcc.lo" + shift + ;; ++ --enable-sse_slide) ++ CFLAGS="$CFLAGS -DUSE_SSE_SLIDE" ++ OBJC="$OBJC slide_sse.o" ++ PIC_OBJC="$PIC_OBJC slide_sse.lo" ++ shift ++ ;; + *) + echo "unknown option: $1" | tee -a configure.log + echo "$0 --help for help" | tee -a configure.log +--- a/Makefile.in 2022-04-11 18:00:47.184530801 +0300 ++++ b/Makefile.in 2022-04-11 18:02:47.815927655 +0300 +@@ -151,6 +151,14 @@ + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c + -@mv objs/dfltcc.o $@ + ++slide_sse.o: $(SRCDIR)slide_sse.c ++ $(CC) $(CFLAGS) $(ZINC) -msse2 -c -o $@ $(SRCDIR)slide_sse.c ++ ++slide_sse.lo: $(SRCDIR)slide_sse.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -msse2 -c -o objs/slide_sse.o $(SRCDIR)slide_sse.c ++ -@mv objs/slide_sse.o $@ ++ + example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c + +--- a/deflate.c 2022-04-19 11:43:42.333320519 +0300 ++++ b/deflate.c 2022-04-19 15:55:30.636531139 +0300 +@@ -90,6 +90,8 @@ + + local int deflateStateCheck OF((z_streamp strm)); + local void slide_hash OF((deflate_state *s)); ++local void slide_hash_c OF((deflate_state *s)); ++extern void slide_hash_sse (deflate_state *s); + local void fill_window OF((deflate_state *s)); + local block_state deflate_stored OF((deflate_state *s, int flush)); + local block_state deflate_fast OF((deflate_state *s, int flush)); +@@ -212,7 +214,7 @@ + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +-local void slide_hash(s) ++local void slide_hash_c(s) + deflate_state *s; + { + unsigned n, m; +@@ -238,6 +240,15 @@ + #endif + } + ++local void slide_hash(deflate_state *s) ++{ ++#ifdef USE_SSE_SLIDE ++ slide_hash_sse(s); ++#else ++ slide_hash_c(s); ++#endif ++} ++ + /* ========================================================================= */ + int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; +diff --git a/slide_sse.c b/slide_sse.c +new file mode 100644 +index 0000000..2ef2669 +--- /dev/null ++++ b/slide_sse.c +@@ -0,0 +1,49 @@ ++/* ++ * SSE optimized hash slide ++ * ++ * Copyright (C) 2017 Intel Corporation ++ * Authors: ++ * Arjan van de Ven ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include "deflate.h" ++#include ++ ++void slide_hash_sse(deflate_state *s) ++{ ++ unsigned n; ++ Posf *p; ++ uInt wsize = s->w_size; ++ z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); ++ ++ n = s->hash_size; ++ p = &s->head[n] - 8; ++ do { ++ __m128i value, result; ++ ++ value = _mm_loadu_si128((__m128i *)p); ++ result= _mm_subs_epu16(value, xmm_wsize); ++ _mm_storeu_si128((__m128i *)p, result); ++ p -= 8; ++ n -= 8; ++ } while (n > 0); ++ ++#ifndef FASTEST ++ n = wsize; ++ p = &s->prev[n] - 8; ++ do { ++ __m128i value, result; ++ ++ value = _mm_loadu_si128((__m128i *)p); ++ result= _mm_subs_epu16(value, xmm_wsize); ++ _mm_storeu_si128((__m128i *)p, result); ++ ++ p -= 8; ++ n -= 8; ++ } while (n > 0); ++#endif ++} ++ ++ +-- +2.27.0 + diff --git a/SPECS/zlib.spec b/SPECS/zlib.spec index d04aeea..b4d1869 100644 --- a/SPECS/zlib.spec +++ b/SPECS/zlib.spec @@ -3,13 +3,13 @@ Name: zlib Version: 1.2.11 -Release: 17%{?dist} +Release: 18%{?dist} Summary: The compression and decompression library # /contrib/dotzlib/ have Boost license License: zlib and Boost URL: http://www.zlib.net/ -Source: http://www.zlib.net/zlib-%{version}.tar.xz +Source: https://www.zlib.net/fossils/zlib-%{version}.tar.gz # https://github.com/madler/zlib/pull/210 Patch0: zlib-1.2.5-minizip-fixuncrypt.patch # resolves: #805113 @@ -31,7 +31,8 @@ Patch7: zlib-1.2.11-permit-deflateParams-change.patch Patch8: zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch # fixed inflateSyncPoint() bad return value on z15 Patch9: zlib-1.2.11-inflateSyncPoint-return-value-fix.patch - +# optimize fill window +Patch10: zlib-1.2.11-optimize-fill_window.patch BuildRequires: automake, autoconf, libtool %description @@ -93,6 +94,7 @@ developing applications which use minizip. %patch7 -p1 %patch8 -p1 %patch9 -p1 +%patch10 -p1 iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp mv ChangeLog.tmp ChangeLog @@ -109,8 +111,12 @@ export LDFLAGS="$LDFLAGS -Wl,-z,relro -Wl,-z,now" %ifarch s390 s390x ./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} --dfltcc %else +%ifarch x86_64 +./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} --enable-sse_slide +%else ./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} %endif +%endif %make_build %if %{with minizip} @@ -172,6 +178,9 @@ find $RPM_BUILD_ROOT -name '*.la' -delete %changelog +* Mon Apr 25 2022 Ali Erdinc Koroglu 1.2.11-18 +- Intel sse_slide optimization patch added + * Mon Jun 15 2020 Ondrej Dubaj - 1.2.11-17 - Fixed DFLTCC compression level switching issues (#1875492) - Enabled HW compression for compression levels 1 through 6 (#1847438)