diff --git a/SOURCES/zlib-1.2.11-optimize-fill_window.patch b/SOURCES/zlib-1.2.11-optimize-fill_window.patch deleted file mode 100644 index c9c362a..0000000 --- a/SOURCES/zlib-1.2.11-optimize-fill_window.patch +++ /dev/null @@ -1,134 +0,0 @@ ---- a/configure 2022-04-19 17:46:39.589212290 +0300 -+++ b/configure 2022-04-19 17:48:26.737818784 +0300 -@@ -115,6 +115,7 @@ - echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log - echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log - echo ' [--dfltcc]' | tee -a configure.log -+ echo ' [--enable-sse_slide]' | tee -a configure.log - exit 0 ;; - -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; - -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; -@@ -144,6 +145,12 @@ - PIC_OBJC="$PIC_OBJC dfltcc.lo" - shift - ;; -+ --enable-sse_slide) -+ CFLAGS="$CFLAGS -DUSE_SSE_SLIDE" -+ OBJC="$OBJC slide_sse.o" -+ PIC_OBJC="$PIC_OBJC slide_sse.lo" -+ shift -+ ;; - *) - echo "unknown option: $1" | tee -a configure.log - echo "$0 --help for help" | tee -a configure.log ---- a/Makefile.in 2022-04-11 18:00:47.184530801 +0300 -+++ b/Makefile.in 2022-04-11 18:02:47.815927655 +0300 -@@ -151,6 +151,14 @@ - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c - -@mv objs/dfltcc.o $@ - -+slide_sse.o: $(SRCDIR)slide_sse.c -+ $(CC) $(CFLAGS) $(ZINC) -msse2 -c -o $@ $(SRCDIR)slide_sse.c -+ -+slide_sse.lo: $(SRCDIR)slide_sse.c -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -DPIC -msse2 -c -o objs/slide_sse.o $(SRCDIR)slide_sse.c -+ -@mv objs/slide_sse.o $@ -+ - example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h - $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c - ---- a/deflate.c 2022-04-19 11:43:42.333320519 +0300 -+++ b/deflate.c 2022-04-19 15:55:30.636531139 +0300 -@@ -90,6 +90,8 @@ - - local int deflateStateCheck OF((z_streamp strm)); - local void slide_hash OF((deflate_state *s)); -+local void slide_hash_c OF((deflate_state *s)); -+extern void slide_hash_sse (deflate_state *s); - local void fill_window OF((deflate_state *s)); - local block_state deflate_stored OF((deflate_state *s, int flush)); - local block_state deflate_fast OF((deflate_state *s, int flush)); -@@ -212,7 +214,7 @@ - * bit values at the expense of memory usage). We slide even when level == 0 to - * keep the hash table consistent if we switch back to level > 0 later. - */ --local void slide_hash(s) -+local void slide_hash_c(s) - deflate_state *s; - { - unsigned n, m; -@@ -238,6 +240,15 @@ - #endif - } - -+local void slide_hash(deflate_state *s) -+{ -+#ifdef USE_SSE_SLIDE -+ slide_hash_sse(s); -+#else -+ slide_hash_c(s); -+#endif -+} -+ - /* ========================================================================= */ - int ZEXPORT deflateInit_(strm, level, version, stream_size) - z_streamp strm; -diff --git a/slide_sse.c b/slide_sse.c -new file mode 100644 -index 0000000..2ef2669 ---- /dev/null -+++ b/slide_sse.c -@@ -0,0 +1,49 @@ -+/* -+ * SSE optimized hash slide -+ * -+ * Copyright (C) 2017 Intel Corporation -+ * Authors: -+ * Arjan van de Ven -+ * Jim Kukunas -+ * -+ * For conditions of distribution and use, see copyright notice in zlib.h -+ */ -+#include "deflate.h" -+#include -+ -+void slide_hash_sse(deflate_state *s) -+{ -+ unsigned n; -+ Posf *p; -+ uInt wsize = s->w_size; -+ z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); -+ -+ n = s->hash_size; -+ p = &s->head[n] - 8; -+ do { -+ __m128i value, result; -+ -+ value = _mm_loadu_si128((__m128i *)p); -+ result= _mm_subs_epu16(value, xmm_wsize); -+ _mm_storeu_si128((__m128i *)p, result); -+ p -= 8; -+ n -= 8; -+ } while (n > 0); -+ -+#ifndef FASTEST -+ n = wsize; -+ p = &s->prev[n] - 8; -+ do { -+ __m128i value, result; -+ -+ value = _mm_loadu_si128((__m128i *)p); -+ result= _mm_subs_epu16(value, xmm_wsize); -+ _mm_storeu_si128((__m128i *)p, result); -+ -+ p -= 8; -+ n -= 8; -+ } while (n > 0); -+#endif -+} -+ -+ --- -2.27.0 - diff --git a/SOURCES/zlib-1.2.11-x86_64-accelrated-slide-hash.patch b/SOURCES/zlib-1.2.11-x86_64-accelrated-slide-hash.patch new file mode 100644 index 0000000..c9c362a --- /dev/null +++ b/SOURCES/zlib-1.2.11-x86_64-accelrated-slide-hash.patch @@ -0,0 +1,134 @@ +--- a/configure 2022-04-19 17:46:39.589212290 +0300 ++++ b/configure 2022-04-19 17:48:26.737818784 +0300 +@@ -115,6 +115,7 @@ + echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log + echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log + echo ' [--dfltcc]' | tee -a configure.log ++ echo ' [--enable-sse_slide]' | tee -a configure.log + exit 0 ;; + -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; + -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; +@@ -144,6 +145,12 @@ + PIC_OBJC="$PIC_OBJC dfltcc.lo" + shift + ;; ++ --enable-sse_slide) ++ CFLAGS="$CFLAGS -DUSE_SSE_SLIDE" ++ OBJC="$OBJC slide_sse.o" ++ PIC_OBJC="$PIC_OBJC slide_sse.lo" ++ shift ++ ;; + *) + echo "unknown option: $1" | tee -a configure.log + echo "$0 --help for help" | tee -a configure.log +--- a/Makefile.in 2022-04-11 18:00:47.184530801 +0300 ++++ b/Makefile.in 2022-04-11 18:02:47.815927655 +0300 +@@ -151,6 +151,14 @@ + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c + -@mv objs/dfltcc.o $@ + ++slide_sse.o: $(SRCDIR)slide_sse.c ++ $(CC) $(CFLAGS) $(ZINC) -msse2 -c -o $@ $(SRCDIR)slide_sse.c ++ ++slide_sse.lo: $(SRCDIR)slide_sse.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -msse2 -c -o objs/slide_sse.o $(SRCDIR)slide_sse.c ++ -@mv objs/slide_sse.o $@ ++ + example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c + +--- a/deflate.c 2022-04-19 11:43:42.333320519 +0300 ++++ b/deflate.c 2022-04-19 15:55:30.636531139 +0300 +@@ -90,6 +90,8 @@ + + local int deflateStateCheck OF((z_streamp strm)); + local void slide_hash OF((deflate_state *s)); ++local void slide_hash_c OF((deflate_state *s)); ++extern void slide_hash_sse (deflate_state *s); + local void fill_window OF((deflate_state *s)); + local block_state deflate_stored OF((deflate_state *s, int flush)); + local block_state deflate_fast OF((deflate_state *s, int flush)); +@@ -212,7 +214,7 @@ + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +-local void slide_hash(s) ++local void slide_hash_c(s) + deflate_state *s; + { + unsigned n, m; +@@ -238,6 +240,15 @@ + #endif + } + ++local void slide_hash(deflate_state *s) ++{ ++#ifdef USE_SSE_SLIDE ++ slide_hash_sse(s); ++#else ++ slide_hash_c(s); ++#endif ++} ++ + /* ========================================================================= */ + int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; +diff --git a/slide_sse.c b/slide_sse.c +new file mode 100644 +index 0000000..2ef2669 +--- /dev/null ++++ b/slide_sse.c +@@ -0,0 +1,49 @@ ++/* ++ * SSE optimized hash slide ++ * ++ * Copyright (C) 2017 Intel Corporation ++ * Authors: ++ * Arjan van de Ven ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include "deflate.h" ++#include ++ ++void slide_hash_sse(deflate_state *s) ++{ ++ unsigned n; ++ Posf *p; ++ uInt wsize = s->w_size; ++ z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); ++ ++ n = s->hash_size; ++ p = &s->head[n] - 8; ++ do { ++ __m128i value, result; ++ ++ value = _mm_loadu_si128((__m128i *)p); ++ result= _mm_subs_epu16(value, xmm_wsize); ++ _mm_storeu_si128((__m128i *)p, result); ++ p -= 8; ++ n -= 8; ++ } while (n > 0); ++ ++#ifndef FASTEST ++ n = wsize; ++ p = &s->prev[n] - 8; ++ do { ++ __m128i value, result; ++ ++ value = _mm_loadu_si128((__m128i *)p); ++ result= _mm_subs_epu16(value, xmm_wsize); ++ _mm_storeu_si128((__m128i *)p, result); ++ ++ p -= 8; ++ n -= 8; ++ } while (n > 0); ++#endif ++} ++ ++ +-- +2.27.0 + diff --git a/SPECS/zlib.spec b/SPECS/zlib.spec index 63e9a7f..436767a 100644 --- a/SPECS/zlib.spec +++ b/SPECS/zlib.spec @@ -42,8 +42,8 @@ Patch13: zlib-1.2.11-cve-2022-37434_2.patch # Fix setting strm.adler on z15 Patch14: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch -# Intel SSE2 optimization -Patch100: zlib-1.2.11-optimize-fill_window.patch +# Intel slide hash optimization for x86_64 arch +Patch100: zlib-1.2.11-x86_64-accelrated-slide-hash.patch BuildRequires: automake, autoconf, libtool