#6 Backport multi-threaded zstd to 4.14.x as it is the latest version available on centos 8
Merged a year ago by dcavalca. Opened a year ago by alexk.

@@ -0,0 +1,178 @@ 

+ From a5803faa083690526b96484c7e6a4cc915ca3921 Mon Nov 28 16:35:09 2022

+ From: Aleksandr Kazakov <alexkazakov@meta.com>

+ Date: Mon, 28 Nov 2022 16:35:09 +0000

+ Subject: [PATCH] Backport multi-threaded zstd to 4.14.x to support

+  multi-threaded zstd compression on centos 8

+ 

+ Signed-off-by: Aleksandr Kazakov <alexkazakov@meta.com>

+ ---

+  configure.ac  |  2 +-

+  macros.in     |  1 +

+  rpmio/rpmio.c | 82 +++++++++++++++++++++++++++++++++++----------------

+  3 files changed, 58 insertions(+), 27 deletions(-)

+ 

+ diff --git a/configure.ac b/configure.ac

+ index 47327bd..b1213ae 100644

+ --- a/configure.ac

+ +++ b/configure.ac

+ @@ -214,7 +214,7 @@ AC_ARG_ENABLE([zstd],

+                [enable_zstd=auto])

+  

+  AS_IF([test "x$enable_zstd" != "xno"], [

+ -  PKG_CHECK_MODULES([ZSTD], [libzstd], [have_zstd=yes], [have_zstd=no])

+ +  PKG_CHECK_MODULES([ZSTD], [libzstd >= 1.3.8], [have_zstd=yes], [have_zstd=no])
alexk commented a year ago

new api to use multi-threaded compression since this version

+    AS_IF([test "$enable_zstd" = "yes"], [

+      if test "$have_zstd" = "no"; then

+        AC_MSG_ERROR([--enable-zstd specified, but not available])

+ diff --git a/macros.in b/macros.in

+ index 9b9fe23..832b60a 100644

+ --- a/macros.in

+ +++ b/macros.in

+ @@ -394,6 +394,7 @@ package or when debugging this package.\

+  #		"w9.bzdio"	bzip2 level 9.

+  #		"w6.xzdio"	xz level 6, xz's default.

+  #		"w7T16.xzdio"	xz level 7 using 16 thread (xz only)

+ +#		"w19T8.zstdio"	zstd level 19 using 8 threads

+  #		"w6.lzdio"	lzma-alone level 6, lzma's default

+  #

+  #%_source_payload	w9.gzdio

+ diff --git a/rpmio/rpmio.c b/rpmio/rpmio.c

+ index 09b5d02..d030a9c 100644

+ --- a/rpmio/rpmio.c

+ +++ b/rpmio/rpmio.c

+ @@ -1070,6 +1070,7 @@ static rpmzstd rpmzstdNew(int fdno, const char *fmode)

+      char *t = stdio;

+      char *te = t + sizeof(stdio) - 2;

+      int c;

+ +    int threads = 0;

+  

+      switch ((c = *s++)) {

+      case 'a':

+ @@ -1098,7 +1099,14 @@ static rpmzstd rpmzstdNew(int fdno, const char *fmode)

+  	    flags &= ~O_ACCMODE;

+  	    flags |= O_RDWR;

+  	    continue;

+ -	    break;

+ +	case 'T':

+ +	    if (*s >= '0' && *s <= '9') {

+ +		threads = strtol(s, (char **)&s, 10);

+ +		/* T0 means automatic detection */

+ +		if (threads == 0)

+ +		    threads = sysconf(_SC_NPROCESSORS_ONLN);
alexk commented a year ago

same as in xz. Not sure if this is the right thing to do. getcpunumber macro (used in the original PR) is not available in 4.14.x

+ +	    }

+ +	    continue;

+  	default:

+  	    if (c >= (int)'0' && c <= (int)'9') {

+  		level = strtol(s-1, (char **)&s, 10);

+ @@ -1132,10 +1140,16 @@ static rpmzstd rpmzstdNew(int fdno, const char *fmode)

+  	}

+  	nb = ZSTD_DStreamInSize();

+      } else {					/* compressing */

+ -	if ((_stream = (void *) ZSTD_createCStream()) == NULL

+ -	 || ZSTD_isError(ZSTD_initCStream(_stream, level))) {

+ +	if ((_stream = (void *) ZSTD_createCCtx()) == NULL

+ +	 || ZSTD_isError(ZSTD_CCtx_setParameter(_stream, ZSTD_c_compressionLevel, level))) {

+  	    goto err;

+  	}

+ +

+ +	rpmlog(RPMLOG_DEBUG, "using %i threads in zstd compression\n", threads);

+ +	if (threads > 0) {

+ +	    if (ZSTD_isError (ZSTD_CCtx_setParameter(_stream, ZSTD_c_nbWorkers, threads)))

+ +		rpmlog(RPMLOG_WARNING, "zstd library does not support multi-threading\n");
alexk commented a year ago

change debug message (in upstream) to warning. It's a bit annoying when rpmbuild runs single threaded compression quietly by default without any message

+ +	}

+  	nb = ZSTD_CStreamOutSize();

+      }

+  

+ @@ -1155,7 +1169,7 @@ err:

+      if ((flags & O_ACCMODE) == O_RDONLY)

+  	ZSTD_freeDStream(_stream);

+      else

+ -	ZSTD_freeCStream(_stream);

+ +	ZSTD_freeCCtx(_stream);

+      return NULL;

+  }

+  

+ @@ -1181,16 +1195,24 @@ assert(zstd);

+  	rc = 0;

+      } else {					/* compressing */

+  	/* close frame */

+ -	zstd->zob.dst  = zstd->b;

+ -	zstd->zob.size = zstd->nb;

+ -	zstd->zob.pos  = 0;

+ -	int xx = ZSTD_flushStream(zstd->_stream, &zstd->zob);

+ -	if (ZSTD_isError(xx))

+ -	    fps->errcookie = ZSTD_getErrorName(xx);

+ -	else if (zstd->zob.pos != fwrite(zstd->b, 1, zstd->zob.pos, zstd->fp))

+ -	    fps->errcookie = "zstdFlush fwrite failed.";

+ -	else

+ -	    rc = 0;

+ +	int xx;

+ +	do {

+ +	  ZSTD_inBuffer zib = { NULL, 0, 0 };

+ +	  zstd->zob.dst  = zstd->b;

+ +	  zstd->zob.size = zstd->nb;

+ +	  zstd->zob.pos  = 0;

+ +	  xx = ZSTD_compressStream2(zstd->_stream, &zstd->zob, &zib, ZSTD_e_flush);

+ +	  if (ZSTD_isError(xx)) {

+ +	      fps->errcookie = ZSTD_getErrorName(xx);

+ +	      break;

+ +	  }

+ +	  else if (zstd->zob.pos != fwrite(zstd->b, 1, zstd->zob.pos, zstd->fp)) {

+ +	      fps->errcookie = "zstdClose fwrite failed.";

+ +	      break;

+ +	  }

+ +	  else

+ +	      rc = 0;

+ +	} while (xx != 0);

+      }

+      return rc;

+  }

+ @@ -1235,7 +1257,7 @@ assert(zstd);

+  	zstd->zob.pos  = 0;

+  

+  	/* Compress next chunk. */

+ -        int xx = ZSTD_compressStream(zstd->_stream, &zstd->zob, &zib);

+ +        int xx = ZSTD_compressStream2(zstd->_stream, &zstd->zob, &zib, ZSTD_e_continue);

+          if (ZSTD_isError(xx)) {

+  	    fps->errcookie = ZSTD_getErrorName(xx);

+  	    return -1;

+ @@ -1264,17 +1286,25 @@ assert(zstd);

+  	ZSTD_freeDStream(zstd->_stream);

+      } else {					/* compressing */

+  	/* close frame */

+ -	zstd->zob.dst  = zstd->b;

+ -	zstd->zob.size = zstd->nb;

+ -	zstd->zob.pos  = 0;

+ -	int xx = ZSTD_endStream(zstd->_stream, &zstd->zob);

+ -	if (ZSTD_isError(xx))

+ -	    fps->errcookie = ZSTD_getErrorName(xx);

+ -	else if (zstd->zob.pos != fwrite(zstd->b, 1, zstd->zob.pos, zstd->fp))

+ -	    fps->errcookie = "zstdClose fwrite failed.";

+ -	else

+ -	    rc = 0;

+ -	ZSTD_freeCStream(zstd->_stream);

+ +	int xx;

+ +	do {

+ +	  ZSTD_inBuffer zib = { NULL, 0, 0 };

+ +	  zstd->zob.dst  = zstd->b;

+ +	  zstd->zob.size = zstd->nb;

+ +	  zstd->zob.pos  = 0;

+ +	  xx = ZSTD_compressStream2(zstd->_stream, &zstd->zob, &zib, ZSTD_e_end);

+ +	  if (ZSTD_isError(xx)) {

+ +	      fps->errcookie = ZSTD_getErrorName(xx);

+ +	      break;

+ +	  }

+ +	  else if (zstd->zob.pos != fwrite(zstd->b, 1, zstd->zob.pos, zstd->fp)) {

+ +	      fps->errcookie = "zstdClose fwrite failed.";

+ +	      break;

+ +	  }

+ +	  else

+ +	      rc = 0;

+ +	} while (xx != 0);

+ +	ZSTD_freeCCtx(zstd->_stream);

+      }

+  

+      if (zstd->fp && fileno(zstd->fp) > 2)

+ -- 

+ 2.38.1

+ 

file modified
+10 -2
@@ -42,7 +42,7 @@ 

  

  %global rpmver 4.14.3

  #global snapver rc2

- %global rel 24.1

+ %global rel 24.2

  

  %global srcver %{version}%{?snapver:-%{snapver}}

  %global srcdir %{?snapver:testing}%{!?snapver:%{name}-%(echo %{version} | cut -d'.' -f1-2).x}
@@ -157,6 +157,11 @@ 

  # make unversioned %%__python an error unless explicitly overridden

  Patch1002: rpm-4.14.2-unversioned-python.patch

  

+ %if %{with zstd}

+ # multithreaded zstd compression

+ Patch1003: rpm-4.14.3-backport-multithreaded-zstd.patch

+ %endif

+ 

  # fsverity support

  %if %{with libfsverity}

  Patch1950: 0001-Add-RPMTAG_AUTOINSTALLED-reservation.patch
@@ -383,7 +388,7 @@ 

  Requires: findutils sed grep gawk diffutils file patch >= 2.5

  Requires: tar unzip gzip bzip2 cpio xz

  %if %{with zstd}

- Requires: zstd

+ Requires: zstd >= 1.3.8

  %endif

  Requires: pkgconfig >= 1:0.24

  Requires: /usr/bin/gdb-add-index
@@ -832,6 +837,9 @@ 

  %doc doc/librpm/html/*

  

  %changelog

+ * Mon Nov 29 2022 Aleksandr Kazakov <alexkazakov@meta.com> - 4.14.3-24.2

+ - Backport support for multi-threaded zstd compression

+ 

  * Sat Oct 29 2022 Richard Phibel <richardphibel@fb.com> - 4.14.3-24.1

  - Merge upstream changes for Hyperscale

  

Original pull request on github that was decline as centos 8 is EOL and 4.14.x branch is not going to have new releases https://github.com/rpm-software-management/rpm/pull/2130

Disclaimer: first time patching RPMs and working with mock/chroot, I may be doing something dumb. Let me know. any help is appreciated.

Summary

zstd supports multi-threaded compression and is already available in rpm 4.17.
Unfortunately, this feature is not available on centos 8, that has 4.14.x only.
Adding this feature requires upgrading dependency on zstd lib and backporting the intergration.

Changes:
- Backport of the Support threading for zstd compression PR
- Backport of bug fix fa97556
I couldn't backport autodetection for the number of threads as the getncpus macro is not supported in 4.14.x. I replaced it with sysconf(_SC_NPROCESSORS_ONLN) that is already used in xz.
- 4.14.x will require libzstd version >= 1.3.8.

Testing

Note about multi-threading support in zstd

I ran mock with -r centos-stream-hyperscale-experimental-8-x86_64 as the main branch wasn't working and was missing tons of dependencies. the experimental repo has zstd 1.4.4, which doesn't have multithreading (MT) enabled by default. The library should be rebuild with a macro to enable MT. zst 1.5.0 has MT enabled by default tho and it's available in centos-stream-hyperscale-8-x86_64, so I decided to include the main repo in my mock configs and install zst 1.5.1 instead.

➜  rpm git:(c8s-sig-hyperscale) ✗ cat /etc/mock/centos-stream-hyperscale-experimental-8-x86_64.cfg 
include('templates/centos-stream-8.tpl')
include('templates/epel-8.tpl')
include('templates/epel-next-8.tpl')
include('templates/centos-stream-hyperscale-8.tpl')
include('templates/centos-stream-hyperscale-experimental-8.tpl')

config_opts['root'] = 'centos-stream-hyperscale-experimental-8-x86_64'
config_opts['target_arch'] = 'x86_64'
config_opts['legal_host_arches'] = ('x86_64',)

config_opts['macros']['%dist'] = ".hsx.el8"
config_opts['macros']['%centos_hs'] = "1"

Then I updated the SPECS/rpm.spec locally to require zstd 1.5.0+

Requires: zstd >= 1.3.8

I also tested the PR without these changes, MT wasn't working and I was getting an warning that zstd doesn't not support multi-threading and falling back to the single-threaded mode.

What I tested:
- Ran mock -r centos-stream-hyperscale-experimental-8-x86_64 --sources ./SOURCES --spec ./SPECS/*.spec --postinstall with my patch that finished successfully
- Ran mock -r centos-stream-hyperscale-experimental-8-x86_64 --sources ./SOURCES --spec ./SPECS/*.spec --shell to get inside the chroot
- Set up a basic rpm build inside the chroot with one 200MB file in SOURCES generated with head -c 200MB </dev/urandom >myfile and put /usr/bin/podman in the test rpm too to add more weigh (300 MB). So a test rpm build ~500MB.
- Ran with default compressor

<mock-chroot> sh-4.4# time -v rpmbuild -bb -v SPECS/hello.spec
...
Wrote: /builddir/build/RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
Executing(%clean): /bin/sh -e /var/tmp/rpm-tmp.Lc2gUG
+ umask 022
+ cd /builddir/build/BUILD
+ cd hello-1.0
+ rm -rf /builddir/build/BUILDROOT/hello-1.0-1.hsx.el8.x86_64
+ exit 0
    Command being timed: "rpmbuild -bb -v SPECS/hello.spec"
    User time (seconds): 124.53
    System time (seconds): 2.66
    Percent of CPU this job got: 96%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 2:12.12
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 41896
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 3
    Minor (reclaiming a frame) page faults: 55327
    Voluntary context switches: 7890
    Involuntary context switches: 17473
    Swaps: 0
    File system inputs: 0
    File system outputs: 1500824
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

<mock-chroot> sh-4.4# du -sh RPMS/hello-1.0-1.hsx.el8.x86_64.rpm 
202M    RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
  • MT compression with cpu based number of threads w19T0.zstdio
<mock-chroot> sh-4.4# time -v rpmbuild --define '_binary_payload w19T0.zstdio' -bb -v SPECS/hello.spec 
...
Wrote: /builddir/build/RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
Executing(%clean): /bin/sh -e /var/tmp/rpm-tmp.l3HgpH
+ umask 022
+ cd /builddir/build/BUILD
+ cd hello-1.0
+ rm -rf /builddir/build/BUILDROOT/hello-1.0-1.hsx.el8.x86_64
+ exit 0
    Command being timed: "rpmbuild --define _binary_payload w19T0.zstdio -bb -v SPECS/hello.spec"
    User time (seconds): 130.39
    System time (seconds): 2.86
    Percent of CPU this job got: 158%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 1:23.94
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 449096
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 158357
    Voluntary context switches: 8363
    Involuntary context switches: 28262
    Swaps: 0
    File system inputs: 0
    File system outputs: 1499936
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
<mock-chroot> sh-4.4# du -sh RPMS/hello-1.0-1.hsx.el8.x86_64.rpm 
202M    RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
  • no threads specified
<mock-chroot> sh-4.4# time -v rpmbuild --define '_binary_payload w19.zstdio' -bb -v SPECS/hello.spec
...
+ rm -rf /builddir/build/BUILDROOT/hello-1.0-1.hsx.el8.x86_64
+ exit 0
    Command being timed: "rpmbuild --define _binary_payload w19.zstdio -bb -v SPECS/hello.spec"
    User time (seconds): 106.24
    System time (seconds): 2.23
    Percent of CPU this job got: 94%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 1:54.48
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 112264
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 74180
    Voluntary context switches: 7649
    Involuntary context switches: 15383
    Swaps: 0
    File system inputs: 0
    File system outputs: 1499872
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
<mock-chroot> sh-4.4# du -sh RPMS/hello-1.0-1.hsx.el8.x86_64.rpm 
202M    RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
  • one thread set
 <mock-chroot> sh-4.4# time -v rpmbuild --define '_binary_payload w19T1.zstdio' -bb -v SPECS/hello.spec
...
Wrote: /builddir/build/RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
Executing(%clean): /bin/sh -e /var/tmp/rpm-tmp.Hyyqzy
+ umask 022
+ cd /builddir/build/BUILD
+ cd hello-1.0
+ rm -rf /builddir/build/BUILDROOT/hello-1.0-1.hsx.el8.x86_64
+ exit 0
    Command being timed: "rpmbuild --define _binary_payload w19T1.zstdio -bb -v SPECS/hello.spec"
    User time (seconds): 118.46
    System time (seconds): 2.42
    Percent of CPU this job got: 96%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 2:04.70
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 300452
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 1
    Minor (reclaiming a frame) page faults: 121284
    Voluntary context switches: 8408
    Involuntary context switches: 15759
    Swaps: 0
    File system inputs: 0
    File system outputs: 1499944
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
<mock-chroot> sh-4.4# du -sh RPMS/hello-1.0-1.hsx.el8.x86_64.rpm 
202M    RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
  • 4 threads set
<mock-chroot> sh-4.4# time -v rpmbuild --define '_binary_payload w19T4.zstdio' -bb -v SPECS/hello.spec
...
Wrote: /builddir/build/RPMS/hello-1.0-1.hsx.el8.x86_64.rpm
Executing(%clean): /bin/sh -e /var/tmp/rpm-tmp.hgAtHo
+ umask 022
+ cd /builddir/build/BUILD
+ cd hello-1.0
+ rm -rf /builddir/build/BUILDROOT/hello-1.0-1.hsx.el8.x86_64
+ exit 0
    Command being timed: "rpmbuild --define _binary_payload w19T4.zstdio -bb -v SPECS/hello.spec"
    User time (seconds): 132.98
    System time (seconds): 3.33
    Percent of CPU this job got: 159%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 1:25.38
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 745580
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 3
    Minor (reclaiming a frame) page faults: 232525
    Voluntary context switches: 8169
    Involuntary context switches: 43975
    Swaps: 0
    File system inputs: 53208
    File system outputs: 1499944
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
<mock-chroot> sh-4.4# du -sh RPMS/hello-1.0-1.hsx.el8.x86_64.rpm 
202M    RPMS/hello-1.0-1.hsx.el8.x86_64.rpm

I also unpacked the rpm and compared the files' md5s with the original files' md5s - they are the same

# unpacking
<mock-chroot> sh-4.4# rpm2cpio hello-1.0-1.hsx.el8.x86_64.rpm | cpio -idmv
./usr/bin/hello-world.sh
./usr/bin/myfile
./usr/bin/podman
./usr/lib/.build-id
./usr/lib/.build-id/ad
./usr/lib/.build-id/ad/f500a8beab5bcb6b66a5fa6d7497517b79468b
467002 blocks
<mock-chroot> sh-4.4# md5sum ./usr/bin/myfile 
88ebe22a347d1c3e99603cb6811b5881  ./usr/bin/myfile
<mock-chroot> sh-4.4# md5sum ./usr/bin/podman 
4520b424e2c11b78973ff2b8249c46f6  ./usr/bin/podman

...
# original files
➜  rpmbuild pwd
/home/alex/rpmbuild
➜  rpmbuild cd SOURCES/hello-1.0 
➜  hello-1.0 md5sum *
b37bd98e4077dc920344c0dcacc8ed4d  hello-world.sh
88ebe22a347d1c3e99603cb6811b5881  myfile
4520b424e2c11b78973ff2b8249c46f6  podman

same as in xz. Not sure if this is the right thing to do. getcpunumber macro is not available in 4.14.x

rebased onto 6d37e9a

a year ago

rebased onto 35d7149

a year ago

same as in xz. Not sure if this is the right thing to do. getcpunumber macro (used in the original PR) is not available in 4.14.x

new api to use multi-threaded compression since this version

change debug message (in upstream) to warning. It's a bit annoying when rpmbuild runs single threaded compression quietly by default without any message

This should be 24.2

The version here should be 4.14.3-24.2

rebased onto 3c9fa8b

a year ago

changed version from 25 to 24.2 as per prev comments

I ran mock with -r centos-stream-hyperscale-experimental-8-x86_64 as the main branch wasn't working and was missing tons of dependencies. the experimental repo has zstd 1.4.4, which doesn't have multithreading (MT) enabled by default. The library should be rebuild with a macro to enable MT. zst 1.5.0 has MT enabled by default tho and it's available in centos-stream-hyperscale-8-x86_64, so I decided to include the main repo in my mock configs and install zst 1.5.1 instead.

I've tagged the missing package in the buildroot, so this should be fixed as soon as the mirrors propagate.

Pull-Request has been merged by dcavalca

a year ago