629b27
From 7bd31ce85b2ed377f495c31fcea2422a07739e24 Mon Sep 17 00:00:00 2001
629b27
From: Matthew Almond <malmond@fb.com>
629b27
Date: Fri, 8 Nov 2019 09:29:43 -0800
629b27
Subject: [PATCH 01/30] RPM with Copy on Write
629b27
629b27
This is part of https://fedoraproject.org/wiki/Changes/RPMCoW
629b27
629b27
The majority of changes are in two new programs:
629b27
629b27
= rpm2extents
629b27
629b27
Modeled as a 'stream processor'. It reads a regular .rpm file on stdin,
629b27
and produces a modified .rpm file on stdout. The lead, signature and
629b27
headers are preserved 1:1 to allow all the normal metadata inspection,
629b27
signature verification to work as expected. Only the 'payload' is
629b27
modified.
629b27
629b27
The primary motivation for this tool is to re-organize the payload as a
629b27
sequence of raw file extents (hence the name). The files are organized
629b27
by their digest identity instead of path/filename. If any digest is
629b27
repeated, then the file is skipped/de-duped. Only regular files are
629b27
represented. All other entries like directories, symlinks, devices are
629b27
fully described in the headers and are omitted.
629b27
629b27
The files are padded so they start on `sysconf(_SC_PAGESIZE)` boundries
629b27
to permit 'reflink' syscalls to work in the `reflink` plugin.
629b27
629b27
At the end of the file is a footer with 3 sections:
629b27
629b27
1. List of calculated digests of the input stream. This is used in
629b27
   `librepo` because the file *written* is a derivative, and not the
629b27
   same as the repo metadata describes. `rpm2extents` takes one or more
629b27
   positional arguments that described which digest algorithms are
629b27
   desired. This is often just `SHA256`. This program is only measuring
629b27
   and recording the digest - it does not express an opinion on whether
629b27
   the file is correct. Due to the API on most compression libraries
629b27
   directly reading the source file, the whole file digest is measured
629b27
   using a subprocess and pipes. I don't love it, but it works.
629b27
2. Sorted List of file content digests + offset pairs. This is used in
629b27
   the plugin with a trivial binary search to locate the start of file
629b27
   content. The size is not needed because it's part of normal headers.
629b27
3. (offset of 1., offset of 2., 8 byte MAGIC value) triple
629b27
629b27
= reflink plugin
629b27
629b27
Looks for the 8 byte magic value at the end of the rpm file. If present
629b27
it alters the `RPMTAG_PAYLOADFORMAT` in memory to `clon`, and reads in
629b27
the digest-> offset table.
629b27
629b27
`rpmPackageFilesInstall()` in `fsm.c` is
629b27
modified to alter the enumeration strategy from
629b27
`rpmfiNewArchiveReader()` to `rpmfilesIter()` if not `cpio`. This is
629b27
needed because there is no cpio to enumerate. In the same function, if
629b27
`rpmpluginsCallFsmFilePre()` returns `RPMRC_PLUGIN_CONTENTS` then
629b27
`fsmMkfile()` is skipped as it is assumed the plugin did the work.
629b27
629b27
The majority of the work is in `reflink_fsm_file_pre()` - the per file
629b27
hook for RPM plugins. If the file enumerated in
629b27
`rpmPackageFilesInstall()` is a regular file, this function will look up
629b27
the offset in the digest->offset table and will try to reflink it, then
629b27
fall back to a regular copy. If reflinking does work: we will have
629b27
reflinked a whole number of pages, so we truncate the file to the
629b27
expected size. Therefore installing most files does involve two writes:
629b27
the reflink of the full size, then a fork/copy on write for the last
629b27
page worth.
629b27
629b27
If the file passed to `reflink_fsm_file_pre()` is anything other than a
629b27
regular file, it return `RPMRC_OK` so the normal mechanics of
629b27
`rpmPackageFilesInstall()` are used. That handles directories, symlinks
629b27
and other non file types.
629b27
629b27
= New API for internal use
629b27
629b27
1. `rpmReadPackageRaw()` is used within `rpm2extents` to read all the
629b27
   headers without trying to validate signatures. This eliminates the
629b27
   runtime dependency on rpmdb.
629b27
2. `rpmteFd()` exposes the Fd behind the rpmte, so plugins can interact
629b27
   with the rpm itself.
629b27
3. `RPMRC_PLUGIN_CONTENTS` in `rpmRC_e` for use in
629b27
   `rpmpluginsCallFsmFilePre()` specifically.
629b27
4. `pgpStringVal()` is used to help parse the command line in
629b27
   `rpm2extents` - the positional arguments are strings, and this
629b27
   converts the values back to the values in the table.
629b27
629b27
Nothing has been removed, and none of the changes are intended to be
629b27
used externally, so I don't think a soname bump is warranted here.
629b27
---
629b27
 Makefile.am         |   6 +-
629b27
 lib/depends.c       |   2 +
629b27
 lib/fsm.c           |  45 +++-
629b27
 lib/package.c       |  40 ++++
629b27
 lib/rpmlib.h        |   9 +
629b27
 lib/rpmplugins.c    |  21 +-
629b27
 lib/rpmte.c         |   5 +
629b27
 lib/rpmte.h         |   2 +
629b27
 lib/rpmtypes.h      |   3 +-
629b27
 macros.in           |   1 +
629b27
 plugins/Makefile.am |   4 +
629b27
 plugins/reflink.c   | 340 +++++++++++++++++++++++++++++
629b27
 rpm2extents.c       | 519 ++++++++++++++++++++++++++++++++++++++++++++
629b27
 rpmio/rpmpgp.c      |  10 +
629b27
 rpmio/rpmpgp.h      |   9 +
629b27
 15 files changed, 1004 insertions(+), 12 deletions(-)
629b27
 create mode 100644 plugins/reflink.c
629b27
 create mode 100644 rpm2extents.c
629b27
629b27
diff --git a/Makefile.am b/Makefile.am
629b27
index e5c75d7b4..288668819 100644
629b27
--- a/Makefile.am
629b27
+++ b/Makefile.am
629b27
@@ -99,7 +99,7 @@ pkginclude_HEADERS += build/rpmfc.h
629b27
 pkginclude_HEADERS += build/rpmspec.h
629b27
 
629b27
 
629b27
-bin_PROGRAMS =		rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec
629b27
+bin_PROGRAMS =		rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec rpm2extents
629b27
 if WITH_ARCHIVE
629b27
 bin_PROGRAMS += 	rpm2archive 
629b27
 endif
629b27
@@ -154,6 +154,10 @@ rpm2cpio_SOURCES =	rpm2cpio.c debug.h system.h
629b27
 rpm2cpio_LDADD =	lib/librpm.la rpmio/librpmio.la
629b27
 rpm2cpio_LDADD +=	@WITH_POPT_LIB@
629b27
 
629b27
+rpm2extents_SOURCES =	rpm2extents.c debug.h system.h
629b27
+rpm2extents_LDADD =	lib/librpm.la rpmio/librpmio.la
629b27
+rpm2extents_LDADD +=	@WITH_POPT_LIB@
629b27
+
629b27
 rpm2archive_SOURCES =	rpm2archive.c debug.h system.h
629b27
 rpm2archive_LDADD =	lib/librpm.la rpmio/librpmio.la
629b27
 rpm2archive_LDADD +=	@WITH_POPT_LIB@ @WITH_ARCHIVE_LIB@
629b27
diff --git a/lib/depends.c b/lib/depends.c
629b27
index 30234df3d..8998afcd3 100644
629b27
--- a/lib/depends.c
629b27
+++ b/lib/depends.c
629b27
@@ -81,6 +81,8 @@ static rpmRC headerCheckPayloadFormat(Header h) {
629b27
      */
629b27
     if (!payloadfmt) return rc;
629b27
 
629b27
+    if (rstreq(payloadfmt, "clon")) return rc;
629b27
+
629b27
     if (!rstreq(payloadfmt, "cpio")) {
629b27
         char *nevra = headerGetAsString(h, RPMTAG_NEVRA);
629b27
         if (payloadfmt && rstreq(payloadfmt, "drpm")) {
629b27
diff --git a/lib/fsm.c b/lib/fsm.c
629b27
index 935a0a5c6..90193c749 100644
629b27
--- a/lib/fsm.c
629b27
+++ b/lib/fsm.c
629b27
@@ -8,6 +8,7 @@
629b27
629b27
 #include <utime.h>
629b27
 #include <errno.h>
629b27
+#include <stdbool.h>
629b27
 #if WITH_CAP
629b27
 #include <sys/capability.h>
629b27
 #endif
629b27
@@ -19,6 +20,7 @@
629b27
 
629b27
 #include "rpmio/rpmio_internal.h"	/* fdInit/FiniDigest */
629b27
 #include "lib/fsm.h"
629b27
+#include "lib/rpmlib.h"
629b27
 #include "lib/rpmte_internal.h"	/* XXX rpmfs */
629b27
 #include "lib/rpmplugins.h"	/* rpm plugins hooks */
629b27
 #include "lib/rpmug.h"
629b27
@@ -52,6 +54,7 @@ struct filedata_s {
629b27
     int stage;
629b27
     int setmeta;
629b27
     int skip;
629b27
+    int plugin_contents;
629b27
     rpmFileAction action;
629b27
     const char *suffix;
629b27
     char *fpath;
629b27
@@ -891,6 +894,14 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
629b27
     struct filedata_s *fdata = xcalloc(fc, sizeof(*fdata));
629b27
     struct filedata_s *firstlink = NULL;
629b27
 
629b27
+    Header h = rpmteHeader(te);
629b27
+    const char *payloadfmt = headerGetString(h, RPMTAG_PAYLOADFORMAT);
629b27
+    bool cpio = true;
629b27
+
629b27
+    if (payloadfmt && rstreq(payloadfmt, "clon")) {
629b27
+	cpio = false;
629b27
+    }
629b27
+
629b27
     /* transaction id used for temporary path suffix while installing */
629b27
     rasprintf(&tid, ";%08x", (unsigned)rpmtsGetTid(ts));
629b27
 
629b27
@@ -911,12 +922,23 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
629b27
 	/* Remap file perms, owner, and group. */
629b27
 	rc = rpmfiStat(fi, 1, &fp->sb);
629b27
 
629b27
-	setFileState(fs, fx);
629b27
 	fsmDebug(fp->fpath, fp->action, &fp->sb);
629b27
 
629b27
 	/* Run fsm file pre hook for all plugins */
629b27
 	rc = rpmpluginsCallFsmFilePre(plugins, fi, fp->fpath,
629b27
 				      fp->sb.st_mode, fp->action);
629b27
+	fp->plugin_contents = false;
629b27
+	switch (rc) {
629b27
+	case RPMRC_OK:
629b27
+	    setFileState(fs, fx);
629b27
+	    break;
629b27
+	case RPMRC_PLUGIN_CONTENTS:
629b27
+	    fp->plugin_contents = true;
629b27
+	    // reduce reads on cpio to this value. Could be zero if
629b27
+	    // this is from a hard link.
629b27
+	    rc = RPMRC_OK;
629b27
+	    break;
629b27
+	}
629b27
 	fp->stage = FILE_PRE;
629b27
     }
629b27
     fi = rpmfiFree(fi);
629b27
@@ -924,10 +946,14 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
629b27
     if (rc)
629b27
 	goto exit;
629b27
 
629b27
-    fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
629b27
-    if (fi == NULL) {
629b27
-        rc = RPMERR_BAD_MAGIC;
629b27
-        goto exit;
629b27
+    if (cpio) {
629b27
+	fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
629b27
+	if (fi == NULL) {
629b27
+	    rc = RPMERR_BAD_MAGIC;
629b27
+	    goto exit;
629b27
+	}
629b27
+    } else {
629b27
+	fi = rpmfilesIter(files, RPMFI_ITER_FWD);
629b27
     }
629b27
 
629b27
     /* Detect and create directories not explicitly in package. */
629b27
@@ -969,8 +995,12 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
629b27
 
629b27
             if (S_ISREG(fp->sb.st_mode)) {
629b27
 		if (rc == RPMERR_ENOENT) {
629b27
-		    rc = fsmMkfile(fi, fp, files, psm, nodigest,
629b27
-				   &firstlink, &firstlinkfile);
629b27
+		    if(fp->plugin_contents) {
629b27
+			rc = RPMRC_OK;
629b27
+		    }else {
629b27
+			rc = fsmMkfile(fi, fp, files, psm, nodigest,
629b27
+			    &firstlink, &firstlinkfile);
629b27
+		    }
629b27
 		}
629b27
             } else if (S_ISDIR(fp->sb.st_mode)) {
629b27
                 if (rc == RPMERR_ENOENT) {
629b27
@@ -1078,6 +1108,7 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
629b27
     rpmswAdd(rpmtsOp(ts, RPMTS_OP_DIGEST), fdOp(payload, FDSTAT_DIGEST));
629b27
 
629b27
 exit:
629b27
+    h = headerFree(h);
629b27
     fi = rpmfiFree(fi);
629b27
     Fclose(payload);
629b27
     free(tid);
629b27
diff --git a/lib/package.c b/lib/package.c
629b27
index 281275029..90bd0d8a7 100644
629b27
--- a/lib/package.c
629b27
+++ b/lib/package.c
629b27
@@ -404,5 +404,45 @@ exit:
629b27
     return rc;
629b27
 }
629b27
 
629b27
+rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp)
629b27
+{
629b27
+    char *msg = NULL;
629b27
+    hdrblob sigblob = hdrblobCreate();
629b27
+    hdrblob blob = hdrblobCreate();
629b27
+    Header h = NULL;
629b27
+    Header sigh = NULL;
629b27
+
629b27
+    rpmRC rc = rpmLeadRead(fd, &msg;;
629b27
+    if (rc != RPMRC_OK)
629b27
+	goto exit;
629b27
+
629b27
+    rc = hdrblobRead(fd, 1, 0, RPMTAG_HEADERSIGNATURES, sigblob, &msg;;
629b27
+    if (rc != RPMRC_OK)
629b27
+	goto exit;
629b27
+
629b27
+    rc = hdrblobRead(fd, 1, 1, RPMTAG_HEADERIMMUTABLE, blob, &msg;;
629b27
+    if (rc != RPMRC_OK)
629b27
+	goto exit;
629b27
+
629b27
+    rc = hdrblobImport(sigblob, 0, &sigh, &msg;;
629b27
+    if (rc)
629b27
+	goto exit;
629b27
 
629b27
+    rc = hdrblobImport(blob, 0, &h, &msg;;
629b27
+    if (rc)
629b27
+	goto exit;
629b27
 
629b27
+    *sigp = headerLink(sigh);
629b27
+    *hdrp = headerLink(h);
629b27
+
629b27
+exit:
629b27
+    if (rc != RPMRC_OK && msg)
629b27
+	rpmlog(RPMLOG_ERR, "%s: %s\n", Fdescr(fd), msg);
629b27
+    hdrblobFree(sigblob);
629b27
+    hdrblobFree(blob);
629b27
+    headerFree(sigh);
629b27
+    headerFree(h);
629b27
+    free(msg);
629b27
+
629b27
+    return rc;
629b27
+}
629b27
diff --git a/lib/rpmlib.h b/lib/rpmlib.h
629b27
index 0879d04e5..a09ba0daf 100644
629b27
--- a/lib/rpmlib.h
629b27
+++ b/lib/rpmlib.h
629b27
@@ -155,6 +155,15 @@ rpmRC rpmReadHeader(rpmts ts, FD_t fd, Header *hdrp, char ** msg);
629b27
 rpmRC rpmReadPackageFile(rpmts ts, FD_t fd,
629b27
 		const char * fn, Header * hdrp);
629b27
 
629b27
+/** \ingroup header
629b27
+ * Return package signature, header from file handle, no verification.
629b27
+ * @param fd		file handle
629b27
+ * @param[out] sigp		address of header (or NULL)
629b27
+ * @param[out] hdrp		address of header (or NULL)
629b27
+ * @return		RPMRC_OK on success
629b27
+ */
629b27
+rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp);
629b27
+
629b27
 /** \ingroup rpmtrans
629b27
  * Install source package.
629b27
  * @param ts		transaction set
629b27
diff --git a/lib/rpmplugins.c b/lib/rpmplugins.c
629b27
index 62d75c4cf..c5084d398 100644
629b27
--- a/lib/rpmplugins.c
629b27
+++ b/lib/rpmplugins.c
629b27
@@ -356,13 +356,28 @@ rpmRC rpmpluginsCallFsmFilePre(rpmPlugins plugins, rpmfi fi, const char *path,
629b27
     plugin_fsm_file_pre_func hookFunc;
629b27
     int i;
629b27
     rpmRC rc = RPMRC_OK;
629b27
+    rpmRC hook_rc;
629b27
 
629b27
     for (i = 0; i < plugins->count; i++) {
629b27
 	rpmPlugin plugin = plugins->plugins[i];
629b27
 	RPMPLUGINS_SET_HOOK_FUNC(fsm_file_pre);
629b27
-	if (hookFunc && hookFunc(plugin, fi, path, file_mode, op) == RPMRC_FAIL) {
629b27
-	    rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name);
629b27
-	    rc = RPMRC_FAIL;
629b27
+	if (hookFunc) {
629b27
+	    hook_rc = hookFunc(plugin, fi, path, file_mode, op);
629b27
+	    if (hook_rc == RPMRC_FAIL) {
629b27
+		rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name);
629b27
+		rc = RPMRC_FAIL;
629b27
+	    } else if (hook_rc == RPMRC_PLUGIN_CONTENTS && rc != RPMRC_FAIL) {
629b27
+		if (rc == RPMRC_PLUGIN_CONTENTS) {
629b27
+		    /*
629b27
+		    Another plugin already said it'd handle contents. It's undefined how
629b27
+		    these would combine, so treat this as a failure condition.
629b27
+		    */
629b27
+		    rc = RPMRC_FAIL;
629b27
+		} else {
629b27
+		    /* Plugin will handle content */
629b27
+		    rc = RPMRC_PLUGIN_CONTENTS;
629b27
+		}
629b27
+	    }
629b27
 	}
629b27
     }
629b27
 
629b27
diff --git a/lib/rpmte.c b/lib/rpmte.c
629b27
index 3663604e7..d43dc41ad 100644
629b27
--- a/lib/rpmte.c
629b27
+++ b/lib/rpmte.c
629b27
@@ -423,6 +423,11 @@ FD_t rpmteSetFd(rpmte te, FD_t fd)
629b27
     return NULL;
629b27
 }
629b27
 
629b27
+FD_t rpmteFd(rpmte te)
629b27
+{
629b27
+    return (te != NULL ? te->fd : NULL);
629b27
+}
629b27
+
629b27
 fnpyKey rpmteKey(rpmte te)
629b27
 {
629b27
     return (te != NULL ? te->key : NULL);
629b27
diff --git a/lib/rpmte.h b/lib/rpmte.h
629b27
index 81acf7a19..6fc0a9f91 100644
629b27
--- a/lib/rpmte.h
629b27
+++ b/lib/rpmte.h
629b27
@@ -209,6 +209,8 @@ const char * rpmteNEVR(rpmte te);
629b27
  */
629b27
 const char * rpmteNEVRA(rpmte te);
629b27
 
629b27
+FD_t rpmteFd(rpmte te);
629b27
+
629b27
 /** \ingroup rpmte
629b27
  * Retrieve key from transaction element.
629b27
  * @param te		transaction element
629b27
diff --git a/lib/rpmtypes.h b/lib/rpmtypes.h
629b27
index e8e69b506..af2611e9e 100644
629b27
--- a/lib/rpmtypes.h
629b27
+++ b/lib/rpmtypes.h
629b27
@@ -106,7 +106,8 @@ typedef	enum rpmRC_e {
629b27
     RPMRC_NOTFOUND	= 1,	/*!< Generic not found code. */
629b27
     RPMRC_FAIL		= 2,	/*!< Generic failure code. */
629b27
     RPMRC_NOTTRUSTED	= 3,	/*!< Signature is OK, but key is not trusted. */
629b27
-    RPMRC_NOKEY		= 4	/*!< Public key is unavailable. */
629b27
+    RPMRC_NOKEY		= 4,	/*!< Public key is unavailable. */
629b27
+    RPMRC_PLUGIN_CONTENTS = 5     /*!< fsm_file_pre plugin is handling content */
629b27
 } rpmRC;
629b27
 
629b27
 #ifdef __cplusplus
629b27
diff --git a/macros.in b/macros.in
629b27
index e90cefa9a..363252b0f 100644
629b27
--- a/macros.in
629b27
+++ b/macros.in
629b27
@@ -1143,6 +1143,7 @@ package or when debugging this package.\
629b27
 
629b27
 # Transaction plugin macros
629b27
 %__plugindir		%{_libdir}/rpm-plugins
629b27
+%__transaction_reflink		%{__plugindir}/reflink.so
629b27
 %__transaction_systemd_inhibit	%{__plugindir}/systemd_inhibit.so
629b27
 %__transaction_selinux		%{__plugindir}/selinux.so
629b27
 %__transaction_syslog		%{__plugindir}/syslog.so
629b27
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
629b27
index 3a929d0ce..ad0d3bce7 100644
629b27
--- a/plugins/Makefile.am
629b27
+++ b/plugins/Makefile.am
629b27
@@ -42,6 +42,10 @@ prioreset_la_SOURCES = prioreset.c
629b27
 prioreset_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
629b27
 plugins_LTLIBRARIES += prioreset.la
629b27
 
629b27
+reflink_la_SOURCES = reflink.c
629b27
+reflink_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
629b27
+plugins_LTLIBRARIES += reflink.la
629b27
+
629b27
 syslog_la_SOURCES = syslog.c
629b27
 syslog_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
629b27
 plugins_LTLIBRARIES += syslog.la
629b27
diff --git a/plugins/reflink.c b/plugins/reflink.c
629b27
new file mode 100644
629b27
index 000000000..d7f19acd9
629b27
--- /dev/null
629b27
+++ b/plugins/reflink.c
629b27
@@ -0,0 +1,340 @@
629b27
+#include "system.h"
629b27
+
629b27
+#include <errno.h>
629b27
+#include <sys/resource.h>
629b27
+#include <unistd.h>
629b27
+#include <sys/types.h>
629b27
+#include <sys/stat.h>
629b27
+#include <fcntl.h>
629b27
+#if defined(__linux__)
629b27
+#include <linux/fs.h>        /* For FICLONE */
629b27
+#endif
629b27
+
629b27
+#include <rpm/rpmlog.h>
629b27
+#include "lib/rpmlib.h"
629b27
+#include "lib/rpmplugin.h"
629b27
+#include "lib/rpmte_internal.h"
629b27
+#include <rpm/rpmfileutil.h>
629b27
+#include "rpmio/rpmio_internal.h"
629b27
+
629b27
+
629b27
+#include "debug.h"
629b27
+
629b27
+#include <sys/ioctl.h>
629b27
+
629b27
+/* use hash table to remember inode -> ix (for rpmfilesFN(ix)) lookups */
629b27
+#undef HASHTYPE
629b27
+#undef HTKEYTYPE
629b27
+#undef HTDATATYPE
629b27
+#define HASHTYPE inodeIndexHash
629b27
+#define HTKEYTYPE rpm_ino_t
629b27
+#define HTDATATYPE int
629b27
+#include "lib/rpmhash.H"
629b27
+#include "lib/rpmhash.C"
629b27
+
629b27
+/*
629b27
+We use this in find to indicate a key wasn't found. This is an unrecoverable
629b27
+error, but we can at least show a decent error. 0 is never a valid offset
629b27
+because it's the offset of the start of the file.
629b27
+*/
629b27
+#define NOT_FOUND 0
629b27
+
629b27
+#define BUFFER_SIZE (1024 * 128)
629b27
+
629b27
+/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
629b27
+#define MAGIC 3472329499408095051
629b27
+
629b27
+struct reflink_state_s {
629b27
+  /* Stuff that's used across rpms */
629b27
+  long fundamental_block_size;
629b27
+  char *buffer;
629b27
+
629b27
+  /* stuff that's used/updated per psm */
629b27
+  uint32_t keys, keysize;
629b27
+
629b27
+  // table for current rpm, keys * (keysize + sizeof(rpm_loff_t))
629b27
+  unsigned char *table;
629b27
+  FD_t fd;
629b27
+  rpmfiles files;
629b27
+  inodeIndexHash inodeIndexes;
629b27
+};
629b27
+
629b27
+typedef struct reflink_state_s * reflink_state;
629b27
+
629b27
+static int inodeCmp(rpm_ino_t a, rpm_ino_t b)
629b27
+{
629b27
+    return (a != b);
629b27
+}
629b27
+
629b27
+static unsigned int inodeId(rpm_ino_t a)
629b27
+{
629b27
+    /* rpm_ino_t is uint32_t so maps safely to unsigned int */
629b27
+    return (unsigned int)a;
629b27
+}
629b27
+
629b27
+static rpmRC reflink_init(rpmPlugin plugin, rpmts ts) {
629b27
+  reflink_state state = rcalloc(1, sizeof(struct reflink_state_s));
629b27
+
629b27
+  /*
629b27
+  IOCTL-FICLONERANGE(2): ...Disk filesystems generally require the offset and
629b27
+  length arguments to be aligned to the fundamental block size.
629b27
+
629b27
+  The value of "fundamental block size" is directly related to the system's
629b27
+  page size, so we should use that.
629b27
+  */
629b27
+  state->fundamental_block_size = sysconf(_SC_PAGESIZE);
629b27
+  state->buffer = rcalloc(1, BUFFER_SIZE);
629b27
+  rpmPluginSetData(plugin, state);
629b27
+
629b27
+  return RPMRC_OK;
629b27
+}
629b27
+
629b27
+static void reflink_cleanup(rpmPlugin plugin) {
629b27
+  reflink_state state = rpmPluginGetData(plugin);
629b27
+  free(state->buffer);
629b27
+  free(state);
629b27
+}
629b27
+
629b27
+static rpmRC reflink_psm_pre(rpmPlugin plugin, rpmte te) {
629b27
+    reflink_state state = rpmPluginGetData(plugin);
629b27
+    state->fd = rpmteFd(te);
629b27
+    if (state->fd == 0) {
629b27
+      rpmlog(RPMLOG_DEBUG, _("reflink: fd = 0, no install\n"));
629b27
+      return RPMRC_OK;
629b27
+    }
629b27
+    rpm_loff_t current = Ftell(state->fd);
629b27
+    uint64_t magic;
629b27
+    if (Fseek(state->fd, -(sizeof(magic)), SEEK_END) < 0) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: failed to seek for magic\n"));
629b27
+      if (Fseek(state->fd, current, SEEK_SET) < 0) {
629b27
+        /* yes this gets a bit repetitive */
629b27
+        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
629b27
+      }
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    size_t len = sizeof(magic);
629b27
+    if (Fread(&magic, len, 1, state->fd) != len) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read magic\n"));
629b27
+      if (Fseek(state->fd, current, SEEK_SET) < 0) {
629b27
+        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
629b27
+      }
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    if (magic != MAGIC) {
629b27
+      rpmlog(RPMLOG_DEBUG, _("reflink: not transcoded\n"));
629b27
+      if (Fseek(state->fd, current, SEEK_SET) < 0) {
629b27
+        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
629b27
+        return RPMRC_FAIL;
629b27
+      }
629b27
+      return RPMRC_OK;
629b27
+    }
629b27
+    rpmlog(RPMLOG_DEBUG, _("reflink: *is* transcoded\n"));
629b27
+    Header h = rpmteHeader(te);
629b27
+
629b27
+    /* replace/add header that main fsm.c can read */
629b27
+    headerDel(h, RPMTAG_PAYLOADFORMAT);
629b27
+    headerPutString(h, RPMTAG_PAYLOADFORMAT, "clon");
629b27
+    headerFree(h);
629b27
+    state->files = rpmteFiles(te);
629b27
+    /* tail of file contains offset_table, offset_checksums
629b27
+       then magic
629b27
+    */
629b27
+    if (Fseek(state->fd, -(sizeof(rpm_loff_t) * 2 + sizeof(magic)), SEEK_END) < 0) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: failed to seek for tail %p\n"), state->fd);
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    rpm_loff_t table_start;
629b27
+    len = sizeof(table_start);
629b27
+    if (Fread(&table_start, len, 1, state->fd) != len) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read table_start\n"));
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    if (Fseek(state->fd, table_start, SEEK_SET) < 0) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: unable to seek to table_start\n"));
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    len = sizeof(state->keys);
629b27
+    if (Fread(&state->keys, len, 1, state->fd) != len) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read number of keys\n"));
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    len = sizeof(state->keysize);
629b27
+    if (Fread(&state->keysize, len, 1, state->fd) != len) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read keysize\n"));
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    rpmlog(RPMLOG_DEBUG, _("reflink: table_start=0x%lx, keys=%d, keysize=%d\n"), table_start, state->keys, state->keysize);
629b27
+    // now get digest table if there is a reason to have one.
629b27
+    if (state->keys == 0 || state->keysize == 0) {
629b27
+      // no files (or no digests(!))
629b27
+      state->table = NULL;
629b27
+    } else {
629b27
+      int table_size = state->keys * (state->keysize + sizeof(rpm_loff_t));
629b27
+      state->table = rcalloc(1, table_size);
629b27
+      if (Fread(state->table, table_size, 1, state->fd) != table_size) {
629b27
+        rpmlog(RPMLOG_ERR, _("reflink: unable to read table\n"));
629b27
+        return RPMRC_FAIL;
629b27
+      }
629b27
+      state->inodeIndexes = inodeIndexHashCreate(state->keys, inodeId, inodeCmp, NULL, NULL);
629b27
+    }
629b27
+
629b27
+    // seek back to original location
629b27
+    // might not be needed if we seek to offset immediately
629b27
+    if (Fseek(state->fd, current, SEEK_SET) < 0) {
629b27
+      rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
629b27
+      return RPMRC_FAIL;
629b27
+    }
629b27
+    return RPMRC_OK;
629b27
+}
629b27
+
629b27
+static rpmRC reflink_psm_post(rpmPlugin plugin, rpmte te, int res)
629b27
+{
629b27
+    reflink_state state = rpmPluginGetData(plugin);
629b27
+    state->files = rpmfilesFree(state->files);
629b27
+    if (state->table) {
629b27
+      free(state->table);
629b27
+      state->table = NULL;
629b27
+    }
629b27
+    if (state->inodeIndexes) {
629b27
+      inodeIndexHashFree(state->inodeIndexes);
629b27
+      state->inodeIndexes = NULL;
629b27
+    }
629b27
+    return RPMRC_OK;
629b27
+}
629b27
+
629b27
+
629b27
+// have a prototype, warnings system
629b27
+rpm_loff_t find(const unsigned char *digest, reflink_state state);
629b27
+
629b27
+rpm_loff_t find(const unsigned char *digest, reflink_state state) {
629b27
+# if defined(__GNUC__)
629b27
+  /* GCC nested function because bsearch's comparison function can't access
629b27
+     state-keysize otherwise
629b27
+  */
629b27
+  int cmpdigest(const void *k1, const void *k2) {
629b27
+    rpmlog(RPMLOG_DEBUG, _("reflink: cmpdigest k1=%p k2=%p\n"), k1, k2);
629b27
+    return memcmp(k1, k2, state->keysize);
629b27
+  }
629b27
+# endif
629b27
+  rpmlog(RPMLOG_DEBUG, _("reflink: bsearch(key=%p, base=%p, nmemb=%d, size=%lu)\n"), digest, state->table, state->keys, state->keysize + sizeof(rpm_loff_t));
629b27
+  char *entry = bsearch(digest, state->table, state->keys, state->keysize + sizeof(rpm_loff_t), cmpdigest);
629b27
+  if (entry == NULL) {
629b27
+    return NOT_FOUND;
629b27
+  }
629b27
+  rpm_loff_t offset = *(rpm_loff_t *)(entry + state->keysize);
629b27
+  return offset;
629b27
+}
629b27
+
629b27
+static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path, mode_t file_mode, rpmFsmOp op)
629b27
+{
629b27
+    struct file_clone_range fcr;
629b27
+    rpm_loff_t size;
629b27
+    int dst, rc;
629b27
+    int *hlix;
629b27
+
629b27
+    reflink_state state = rpmPluginGetData(plugin);
629b27
+    if (state->table == NULL) {
629b27
+        // no table means rpm is not in reflink format, so leave. Now.
629b27
+        return RPMRC_OK;
629b27
+    }
629b27
+    if (op == FA_TOUCH) {
629b27
+        // we're not overwriting an existing file
629b27
+        return RPMRC_OK;
629b27
+    }
629b27
+    fcr.dest_offset = 0;
629b27
+    if (S_ISREG(file_mode) && !(rpmfiFFlags(fi) & RPMFILE_GHOST)) {
629b27
+      rpm_ino_t inode = rpmfiFInode(fi);
629b27
+      /* check for hard link entry in table. GetEntry overwrites hlix with the address of the first match */
629b27
+      if (inodeIndexHashGetEntry(state->inodeIndexes, inode, &hlix, NULL, NULL)) {
629b27
+        // entry is in table, use hard link
629b27
+        char *fn = rpmfilesFN(state->files, hlix[0]);
629b27
+        if (link(fn, path) != 0) {
629b27
+          rpmlog(RPMLOG_ERR, _("reflink: Unable to hard link %s -> %s due to %s\n"), fn, path, strerror(errno));
629b27
+          free(fn);
629b27
+          return RPMRC_FAIL;
629b27
+        }
629b27
+        free(fn);
629b27
+        return RPMRC_PLUGIN_CONTENTS;
629b27
+      }
629b27
+      /* if we didn't hard link, then we'll track this inode as being created soon */
629b27
+      if (rpmfiFNlink(fi) > 1) {
629b27
+        /* minor optimization: only store files with more than one link */
629b27
+        inodeIndexHashAddEntry(state->inodeIndexes, inode, rpmfiFX(fi));
629b27
+      }
629b27
+      /* derived from wfd_open in fsm.c */
629b27
+      mode_t old_umask = umask(0577);
629b27
+      dst = open(path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
629b27
+      umask(old_umask);
629b27
+      if (dst == -1) {
629b27
+          rpmlog(RPMLOG_ERR, _("reflink: Unable to open %s for writing due to %s, flags = %x\n"), path, strerror(errno), rpmfiFFlags(fi));
629b27
+          return RPMRC_FAIL;
629b27
+      }
629b27
+      size = rpmfiFSize(fi);
629b27
+      if (size > 0) {
629b27
+          /* round src_length down to fundamental_block_size multiple */
629b27
+          fcr.src_length = size / state->fundamental_block_size * state->fundamental_block_size;
629b27
+          if ((size % state->fundamental_block_size) > 0) {
629b27
+              /* round up to next fundamental_block_size. We expect the data in the rpm to be similarly padded */
629b27
+              fcr.src_length += state->fundamental_block_size;
629b27
+          }
629b27
+          fcr.src_fd = Fileno(state->fd);
629b27
+          if (fcr.src_fd == -1) {
629b27
+            close(dst);
629b27
+            rpmlog(RPMLOG_ERR, _("reflink: src fd lookup failed\n"));
629b27
+            return RPMRC_FAIL;
629b27
+          }
629b27
+          fcr.src_offset = find(rpmfiFDigest(fi, NULL, NULL), state);
629b27
+          if (fcr.src_offset == NOT_FOUND) {
629b27
+            close(dst);
629b27
+            rpmlog(RPMLOG_ERR, _("reflink: digest not found\n"));
629b27
+            return RPMRC_FAIL;
629b27
+          }
629b27
+          rpmlog(RPMLOG_DEBUG, _("reflink: Reflinking %lu bytes at %lu to %s orig size=%lu, file=%ld\n"), fcr.src_length, fcr.src_offset, path, size, fcr.src_fd);
629b27
+          rc = ioctl(dst, FICLONERANGE, &fcr;;
629b27
+          if (rc) {
629b27
+            rpmlog(RPMLOG_WARNING, _("reflink: falling back to copying bits for %s due to %d, %d = %s\n"), path, rc, errno, strerror(errno));
629b27
+            if (Fseek(state->fd, fcr.src_offset, SEEK_SET) < 0) {
629b27
+                close(dst);
629b27
+                rpmlog(RPMLOG_ERR, _("reflink: unable to seek on copying bits\n"));
629b27
+                return RPMRC_FAIL;
629b27
+            }
629b27
+            rpm_loff_t left = size;
629b27
+            size_t len, read, written;
629b27
+            while (left) {
629b27
+              len = (left > BUFFER_SIZE ? BUFFER_SIZE : left);
629b27
+              read = Fread(state->buffer, len, 1, state->fd);
629b27
+              if (read != len) {
629b27
+                close(dst);
629b27
+                rpmlog(RPMLOG_ERR, _("reflink: short read on copying bits\n"));
629b27
+                return RPMRC_FAIL;
629b27
+              }
629b27
+              written = write(dst, state->buffer, len);
629b27
+              if (read != written) {
629b27
+                close(dst);
629b27
+                rpmlog(RPMLOG_ERR, _("reflink: short write on copying bits\n"));
629b27
+                return RPMRC_FAIL;
629b27
+              }
629b27
+              left -= len;
629b27
+            }
629b27
+          } else {
629b27
+            /* reflink worked, so truncate */
629b27
+            rc = ftruncate(dst, size);
629b27
+            if (rc) {
629b27
+                rpmlog(RPMLOG_ERR, _("reflink: Unable to truncate %s to %ld due to %s\n"), path, size, strerror(errno));
629b27
+                return RPMRC_FAIL;
629b27
+            }
629b27
+          }
629b27
+      }
629b27
+      close(dst);
629b27
+      return RPMRC_PLUGIN_CONTENTS;
629b27
+    }
629b27
+    return RPMRC_OK;
629b27
+}
629b27
+
629b27
+struct rpmPluginHooks_s reflink_hooks = {
629b27
+    .init = reflink_init,
629b27
+    .cleanup = reflink_cleanup,
629b27
+    .psm_pre = reflink_psm_pre,
629b27
+    .psm_post = reflink_psm_post,
629b27
+    .fsm_file_pre = reflink_fsm_file_pre,
629b27
+};
629b27
diff --git a/rpm2extents.c b/rpm2extents.c
629b27
new file mode 100644
629b27
index 000000000..5662b86a6
629b27
--- /dev/null
629b27
+++ b/rpm2extents.c
629b27
@@ -0,0 +1,519 @@
629b27
+/* rpm2extents: convert payload to inline extents */
629b27
+
629b27
+#include "system.h"
629b27
+
629b27
+#include <rpm/rpmlib.h>		/* rpmReadPackageFile .. */
629b27
+#include <rpm/rpmfi.h>
629b27
+#include <rpm/rpmtag.h>
629b27
+#include <rpm/rpmio.h>
629b27
+#include <rpm/rpmpgp.h>
629b27
+
629b27
+#include <rpm/rpmts.h>
629b27
+#include "lib/rpmlead.h"
629b27
+#include "lib/signature.h"
629b27
+#include "lib/header_internal.h"
629b27
+#include "rpmio/rpmio_internal.h"
629b27
+
629b27
+#include <unistd.h>
629b27
+#include <sys/types.h>
629b27
+#include <sys/wait.h>
629b27
+#include <signal.h>
629b27
+#include <errno.h>
629b27
+#include <string.h>
629b27
+
629b27
+#include "debug.h"
629b27
+
629b27
+/* hash of void * (pointers) to file digests to offsets within output.
629b27
+   The length of the key depends on what the FILEDIGESTALGO is.
629b27
+ */
629b27
+#undef HASHTYPE
629b27
+#undef HTKEYTYPE
629b27
+#undef HTDATATYPE
629b27
+#define HASHTYPE digestSet
629b27
+#define HTKEYTYPE const unsigned char *
629b27
+#include "lib/rpmhash.H"
629b27
+#include "lib/rpmhash.C"
629b27
+
629b27
+/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
629b27
+#define MAGIC 3472329499408095051
629b27
+
629b27
+struct digestoffset {
629b27
+    const unsigned char * digest;
629b27
+    rpm_loff_t pos;
629b27
+};
629b27
+
629b27
+rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit);
629b27
+
629b27
+rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit)
629b27
+{
629b27
+    return (unit - (pos % unit)) % unit;
629b27
+}
629b27
+
629b27
+static int digestor(
629b27
+    FD_t fdi,
629b27
+    FD_t fdo,
629b27
+    FD_t validationo,
629b27
+    uint8_t algos[],
629b27
+    uint32_t algos_len
629b27
+)
629b27
+{
629b27
+    ssize_t fdilength;
629b27
+    const char *filedigest, *algo_name;
629b27
+    size_t filedigest_len, len;
629b27
+    uint32_t algo_name_len, algo_digest_len;
629b27
+    int algo;
629b27
+    rpmRC rc = RPMRC_FAIL;
629b27
+
629b27
+    for (algo = 0; algo < algos_len; algo++)
629b27
+    {
629b27
+        fdInitDigest(fdi, algos[algo], 0);
629b27
+    }
629b27
+    fdilength = ufdCopy(fdi, fdo);
629b27
+    if (fdilength == -1)
629b27
+    {
629b27
+        fprintf(stderr, _("digest cat failed\n"));
629b27
+        goto exit;
629b27
+    }
629b27
+
629b27
+    len = sizeof(fdilength);
629b27
+    if (Fwrite(&fdilength, len, 1, validationo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write input length %zd\n"), fdilength);
629b27
+        goto exit;
629b27
+    }
629b27
+    len = sizeof(algos_len);
629b27
+    if (Fwrite(&algos_len, len, 1, validationo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write number of validation digests\n"));
629b27
+        goto exit;
629b27
+    }
629b27
+    for (algo = 0; algo < algos_len; algo++)
629b27
+    {
629b27
+        fdFiniDigest(fdi, algos[algo], (void **)&filedigest, &filedigest_len, 0);
629b27
+
629b27
+        algo_name = pgpValString(PGPVAL_HASHALGO, algos[algo]);
629b27
+        algo_name_len = (uint32_t)strlen(algo_name);
629b27
+        algo_digest_len = (uint32_t)filedigest_len;
629b27
+
629b27
+        len = sizeof(algo_name_len);
629b27
+        if (Fwrite(&algo_name_len, len, 1, validationo) != len)
629b27
+        {
629b27
+            fprintf(
629b27
+                stderr,
629b27
+                _("Unable to write validation algo name length\n")
629b27
+            );
629b27
+            goto exit;
629b27
+        }
629b27
+        len = sizeof(algo_digest_len);
629b27
+        if (Fwrite(&algo_digest_len, len, 1, validationo) != len)
629b27
+        {
629b27
+            fprintf(
629b27
+                stderr,
629b27
+                _("Unable to write number of bytes for validation digest\n")
629b27
+            );
629b27
+            goto exit;
629b27
+        }
629b27
+        if (Fwrite(algo_name, algo_name_len, 1, validationo) != algo_name_len)
629b27
+        {
629b27
+            fprintf(stderr, _("Unable to write validation algo name\n"));
629b27
+            goto exit;
629b27
+        }
629b27
+        if (
629b27
+            Fwrite(
629b27
+                filedigest,
629b27
+                algo_digest_len,
629b27
+                1,
629b27
+                validationo
629b27
+            ) != algo_digest_len
629b27
+        )
629b27
+        {
629b27
+            fprintf(
629b27
+                stderr,
629b27
+                _("Unable to write validation digest value %u, %zu\n"),
629b27
+                algo_digest_len,
629b27
+                filedigest_len
629b27
+            );
629b27
+            goto exit;
629b27
+        }
629b27
+    }
629b27
+    rc = RPMRC_OK;
629b27
+exit:
629b27
+    return rc;
629b27
+}
629b27
+
629b27
+static rpmRC process_package(FD_t fdi, FD_t validationi)
629b27
+{
629b27
+    uint32_t diglen;
629b27
+    /* GNU C extension: can use diglen from outer context */
629b27
+    int digestSetCmp(const unsigned char * a, const unsigned char * b)
629b27
+    {
629b27
+        return memcmp(a, b, diglen);
629b27
+    }
629b27
+
629b27
+    unsigned int digestSetHash(const unsigned char * digest)
629b27
+    {
629b27
+        /* assumes sizeof(unsigned int) < diglen */
629b27
+        return *(unsigned int *)digest;
629b27
+    }
629b27
+
629b27
+    int digestoffsetCmp(const void * a, const void * b)
629b27
+    {
629b27
+        return digestSetCmp(
629b27
+            ((struct digestoffset *)a)->digest,
629b27
+            ((struct digestoffset *)b)->digest
629b27
+        );
629b27
+    }
629b27
+
629b27
+    FD_t fdo;
629b27
+    FD_t gzdi;
629b27
+    Header h, sigh;
629b27
+    long fundamental_block_size = sysconf(_SC_PAGESIZE);
629b27
+    rpmRC rc = RPMRC_OK;
629b27
+    rpm_mode_t mode;
629b27
+    char *rpmio_flags = NULL, *zeros;
629b27
+    const unsigned char *digest;
629b27
+    rpm_loff_t pos, size, pad, validation_pos;
629b27
+    uint32_t offset_ix = 0;
629b27
+    size_t len;
629b27
+    int next = 0;
629b27
+
629b27
+    fdo = fdDup(STDOUT_FILENO);
629b27
+
629b27
+    if (rpmReadPackageRaw(fdi, &sigh, &h))
629b27
+    {
629b27
+        fprintf(stderr, _("Error reading package\n"));
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    if (rpmLeadWrite(fdo, h))
629b27
+    {
629b27
+        fprintf(
629b27
+            stderr,
629b27
+            _("Unable to write package lead: %s\n"),
629b27
+            Fstrerror(fdo)
629b27
+        );
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    if (rpmWriteSignature(fdo, sigh))
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write signature: %s\n"), Fstrerror(fdo));
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    if (headerWrite(fdo, h, HEADER_MAGIC_YES))
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write headers: %s\n"), Fstrerror(fdo));
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    /* Retrieve payload size and compression type. */
629b27
+    {	const char *compr = headerGetString(h, RPMTAG_PAYLOADCOMPRESSOR);
629b27
+        rpmio_flags = rstrscat(NULL, "r.", compr ? compr : "gzip", NULL);
629b27
+    }
629b27
+
629b27
+    gzdi = Fdopen(fdi, rpmio_flags);	/* XXX gzdi == fdi */
629b27
+    free(rpmio_flags);
629b27
+
629b27
+    if (gzdi == NULL)
629b27
+    {
629b27
+        fprintf(stderr, _("cannot re-open payload: %s\n"), Fstrerror(gzdi));
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    rpmfiles files = rpmfilesNew(NULL, h, 0, RPMFI_KEEPHEADER);
629b27
+    rpmfi fi = rpmfiNewArchiveReader(
629b27
+        gzdi,
629b27
+        files,
629b27
+        RPMFI_ITER_READ_ARCHIVE_CONTENT_FIRST
629b27
+    );
629b27
+
629b27
+    /* this is encoded in the file format, so needs to be fixed size (for
629b27
+        now?)
629b27
+    */
629b27
+    diglen = (uint32_t)rpmDigestLength(rpmfiDigestAlgo(fi));
629b27
+    digestSet ds = digestSetCreate(
629b27
+        rpmfiFC(fi),
629b27
+        digestSetHash,
629b27
+        digestSetCmp,
629b27
+        NULL
629b27
+    );
629b27
+    struct digestoffset offsets[rpmfiFC(fi)];
629b27
+    pos = RPMLEAD_SIZE + headerSizeof(sigh, HEADER_MAGIC_YES);
629b27
+
629b27
+    /* main headers are aligned to 8 byte boundry */
629b27
+    pos += pad_to(pos, 8);
629b27
+    pos += headerSizeof(h, HEADER_MAGIC_YES);
629b27
+
629b27
+    zeros = xcalloc(fundamental_block_size, 1);
629b27
+
629b27
+    while (next >= 0)
629b27
+    {
629b27
+        next = rpmfiNext(fi);
629b27
+        if (next == RPMERR_ITER_END)
629b27
+        {
629b27
+            rc = RPMRC_OK;
629b27
+            break;
629b27
+        }
629b27
+        mode = rpmfiFMode(fi);
629b27
+        if (!S_ISREG(mode) || !rpmfiArchiveHasContent(fi))
629b27
+        {
629b27
+            /* not a regular file, or the archive doesn't contain any content for
629b27
+               this entry
629b27
+            */
629b27
+            continue;
629b27
+        }
629b27
+        digest = rpmfiFDigest(fi, NULL, NULL);
629b27
+        if (digestSetGetEntry(ds, digest, NULL))
629b27
+        {
629b27
+            /* This specific digest has already been included, so skip it */
629b27
+            continue;
629b27
+        }
629b27
+        pad = pad_to(pos, fundamental_block_size);
629b27
+        if (Fwrite(zeros, sizeof(char), pad, fdo) != pad)
629b27
+        {
629b27
+            fprintf(stderr, _("Unable to write padding\n"));
629b27
+            rc = RPMRC_FAIL;
629b27
+            goto exit;
629b27
+        }
629b27
+        /* round up to next fundamental_block_size */
629b27
+        pos += pad;
629b27
+        digestSetAddEntry(ds, digest);
629b27
+        offsets[offset_ix].digest = digest;
629b27
+        offsets[offset_ix].pos = pos;
629b27
+        offset_ix++;
629b27
+        size = rpmfiFSize(fi);
629b27
+        rc = rpmfiArchiveReadToFile(fi, fdo, 0);
629b27
+        if (rc != RPMRC_OK)
629b27
+        {
629b27
+            fprintf(stderr, _("rpmfiArchiveReadToFile failed with %d\n"), rc);
629b27
+            goto exit;
629b27
+        }
629b27
+        pos += size;
629b27
+    }
629b27
+    Fclose(gzdi);	/* XXX gzdi == fdi */
629b27
+
629b27
+    qsort(
629b27
+        offsets,
629b27
+        (size_t)offset_ix,
629b27
+        sizeof(struct digestoffset),
629b27
+        digestoffsetCmp
629b27
+    );
629b27
+
629b27
+    len = sizeof(offset_ix);
629b27
+    if (Fwrite(&offset_ix, len, 1, fdo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write length of table\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+    len = sizeof(diglen);
629b27
+    if (Fwrite(&diglen, len, 1, fdo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write length of digest\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+    len = sizeof(rpm_loff_t);
629b27
+    for (int x = 0; x < offset_ix; x++)
629b27
+    {
629b27
+        if (Fwrite(offsets[x].digest, diglen, 1, fdo) != diglen)
629b27
+        {
629b27
+            fprintf(stderr, _("Unable to write digest\n"));
629b27
+            rc = RPMRC_FAIL;
629b27
+            goto exit;
629b27
+        }
629b27
+        if (Fwrite(&offsets[x].pos, len, 1, fdo) != len)
629b27
+        {
629b27
+            fprintf(stderr, _("Unable to write offset\n"));
629b27
+            rc = RPMRC_FAIL;
629b27
+            goto exit;
629b27
+        }
629b27
+    }
629b27
+    validation_pos = (
629b27
+        pos + sizeof(offset_ix) + sizeof(diglen) +
629b27
+        offset_ix * (diglen + sizeof(rpm_loff_t))
629b27
+    );
629b27
+
629b27
+    ssize_t validation_len = ufdCopy(validationi, fdo);
629b27
+    if (validation_len == -1)
629b27
+    {
629b27
+        fprintf(stderr, _("digest table ufdCopy failed\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+    /* add more padding so the last file can be cloned. It doesn't matter that
629b27
+       the table and validation etc are in this space. In fact, it's pretty
629b27
+       efficient if it is
629b27
+    */
629b27
+
629b27
+    pad = pad_to(
629b27
+        (
629b27
+            validation_pos + validation_len + 2 * sizeof(rpm_loff_t) +
629b27
+            sizeof(uint64_t)
629b27
+        ),
629b27
+        fundamental_block_size
629b27
+    );
629b27
+    if (Fwrite(zeros, sizeof(char), pad, fdo) != pad)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write final padding\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+    zeros = _free(zeros);
629b27
+    if (Fwrite(&pos, len, 1, fdo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write offset of digest table\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+    if (Fwrite(&validation_pos, len, 1, fdo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write offset of validation table\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+    uint64_t magic = MAGIC;
629b27
+    len = sizeof(magic);
629b27
+    if (Fwrite(&magic, len, 1, fdo) != len)
629b27
+    {
629b27
+        fprintf(stderr, _("Unable to write magic\n"));
629b27
+        rc = RPMRC_FAIL;
629b27
+        goto exit;
629b27
+    }
629b27
+
629b27
+exit:
629b27
+    rpmfilesFree(files);
629b27
+    rpmfiFree(fi);
629b27
+    headerFree(h);
629b27
+    return rc;
629b27
+}
629b27
+
629b27
+int main(int argc, char *argv[])
629b27
+{
629b27
+    rpmRC rc;
629b27
+    int cprc = 0;
629b27
+    uint8_t algos[argc - 1];
629b27
+    int mainpipefd[2];
629b27
+    int metapipefd[2];
629b27
+    pid_t cpid, w;
629b27
+    int wstatus;
629b27
+
629b27
+    xsetprogname(argv[0]);	/* Portability call -- see system.h */
629b27
+    rpmReadConfigFiles(NULL, NULL);
629b27
+
629b27
+    if (argc > 1 && (rstreq(argv[1], "-h") || rstreq(argv[1], "--help")))
629b27
+    {
629b27
+        fprintf(stderr, _("Usage: %s [DIGESTALGO]...\n"), argv[0]);
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    if (argc == 1)
629b27
+    {
629b27
+        fprintf(
629b27
+            stderr,
629b27
+            _("Need at least one DIGESTALGO parameter, e.g. 'SHA256'\n")
629b27
+        );
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+
629b27
+    for (int x = 0; x < (argc - 1); x++)
629b27
+    {
629b27
+        if (pgpStringVal(PGPVAL_HASHALGO, argv[x + 1], &algos[x]) != 0)
629b27
+        {
629b27
+            fprintf(
629b27
+                stderr,
629b27
+                _("Unable to resolve '%s' as a digest algorithm, exiting\n"),
629b27
+                argv[x + 1]
629b27
+            );
629b27
+            exit(EXIT_FAILURE);
629b27
+        }
629b27
+    }
629b27
+
629b27
+
629b27
+    if (pipe(mainpipefd) == -1)
629b27
+    {
629b27
+        fprintf(stderr, _("Main pipe failure\n"));
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+    if (pipe(metapipefd) == -1)
629b27
+    {
629b27
+        fprintf(stderr, _("Meta pipe failure\n"));
629b27
+        exit(EXIT_FAILURE);
629b27
+    }
629b27
+    cpid = fork();
629b27
+    if (cpid == 0)
629b27
+    {
629b27
+        /* child: digestor */
629b27
+        close(mainpipefd[0]);
629b27
+        close(metapipefd[0]);
629b27
+        FD_t fdi = fdDup(STDIN_FILENO);
629b27
+        FD_t fdo = fdDup(mainpipefd[1]);
629b27
+        FD_t validationo = fdDup(metapipefd[1]);
629b27
+        rc = digestor(fdi, fdo, validationo, algos, argc - 1);
629b27
+        Fclose(validationo);
629b27
+        Fclose(fdo);
629b27
+        Fclose(fdi);
629b27
+    } else {
629b27
+        /* parent: main program */
629b27
+        close(mainpipefd[1]);
629b27
+        close(metapipefd[1]);
629b27
+        FD_t fdi = fdDup(mainpipefd[0]);
629b27
+        FD_t validationi = fdDup(metapipefd[0]);
629b27
+        rc = process_package(fdi, validationi);
629b27
+        Fclose(validationi);
629b27
+        /* fdi is normally closed through the stacked file gzdi in the function. */
629b27
+        /* wait for child process (digestor for stdin) to complete. */
629b27
+        if (rc != RPMRC_OK)
629b27
+        {
629b27
+            if (kill(cpid, SIGTERM) != 0)
629b27
+            {
629b27
+                fprintf(
629b27
+                    stderr,
629b27
+                    _("Failed to kill digest process when main process failed: %s\n"),
629b27
+                    strerror(errno)
629b27
+                );
629b27
+            }
629b27
+        }
629b27
+        w = waitpid(cpid, &wstatus, 0);
629b27
+        if (w == -1)
629b27
+        {
629b27
+            fprintf(stderr, _("waitpid failed\n"));
629b27
+            cprc = EXIT_FAILURE;
629b27
+        } else if (WIFEXITED(wstatus))
629b27
+        {
629b27
+            cprc = WEXITSTATUS(wstatus);
629b27
+            if (cprc != 0)
629b27
+            {
629b27
+                fprintf(
629b27
+                    stderr,
629b27
+                    _("Digest process non-zero exit code %d\n"),
629b27
+                    cprc
629b27
+                );
629b27
+            }
629b27
+        } else if (WIFSIGNALED(wstatus))
629b27
+        {
629b27
+            fprintf(
629b27
+                stderr,
629b27
+                _("Digest process was terminated with a signal: %d\n"),
629b27
+                WTERMSIG(wstatus)
629b27
+            );
629b27
+            cprc = EXIT_FAILURE;
629b27
+        } else
629b27
+        {
629b27
+            /* don't think this can happen, but covering all bases */
629b27
+            fprintf(stderr, _("Unhandled circumstance in waitpid\n"));
629b27
+            cprc = EXIT_FAILURE;
629b27
+        }
629b27
+        if (cprc != EXIT_SUCCESS)
629b27
+        {
629b27
+            rc = RPMRC_FAIL;
629b27
+        }
629b27
+    }
629b27
+    if (rc != RPMRC_OK)
629b27
+    {
629b27
+        /* translate rpmRC into generic failure return code. */
629b27
+        return EXIT_FAILURE;
629b27
+    }
629b27
+    return EXIT_SUCCESS;
629b27
+}
629b27
diff --git a/rpmio/rpmpgp.c b/rpmio/rpmpgp.c
629b27
index 015c15a5c..7b972b4a6 100644
629b27
--- a/rpmio/rpmpgp.c
629b27
+++ b/rpmio/rpmpgp.c
629b27
@@ -283,6 +283,16 @@ int pgpValTok(pgpValTbl vs, const char * s, const char * se)
629b27
     return vs->val;
629b27
 }
629b27
 
629b27
+int pgpStringVal(pgpValType type, const char *str, uint8_t *val)
629b27
+{
629b27
+    pgpValTbl tbl = pgpValTable(type);
629b27
+    if (tbl == NULL) return -1;
629b27
+    int v = pgpValTok(tbl, str, str + strlen(str));
629b27
+    if (v == -1) return -1;
629b27
+    *val = (uint8_t)v;
629b27
+    return 0;
629b27
+}
629b27
+
629b27
 /** \ingroup rpmpgp
629b27
  * Decode length from 1, 2, or 5 octet body length encoding, used in
629b27
  * new format packet headers and V4 signature subpackets.
629b27
diff --git a/rpmio/rpmpgp.h b/rpmio/rpmpgp.h
629b27
index c53e29b01..2b57318ba 100644
629b27
--- a/rpmio/rpmpgp.h
629b27
+++ b/rpmio/rpmpgp.h
629b27
@@ -973,6 +973,15 @@ typedef rpmFlags rpmDigestFlags;
629b27
  */
629b27
 const char * pgpValString(pgpValType type, uint8_t val);
629b27
 
629b27
+/** \ingroup rpmpgp
629b27
+ * Return  OpenPGP value for a string.
629b27
+ * @param type		type of value
629b27
+ * @param str		string to lookup
629b27
+ * @param[out] val  byte value associated with string
629b27
+ * @return		0 on success else -1
629b27
+ */
629b27
+int pgpStringVal(pgpValType type, const char *str, uint8_t *val);
629b27
+
629b27
 /** \ingroup rpmpgp
629b27
  * Return (native-endian) integer from big-endian representation.
629b27
  * @param s		pointer to big-endian integer
629b27
-- 
629b27
2.35.1
629b27