b3c12f
From 82b454d2ff43b39e1a3b38fded0c2e5caffcd336 Mon Sep 17 00:00:00 2001
b3c12f
From: Matthew Almond <malmond@fb.com>
b3c12f
Date: Fri, 8 Nov 2019 09:29:43 -0800
b3c12f
Subject: [PATCH 1/4] RPM with Copy on Write
b3c12f
b3c12f
This is part of https://fedoraproject.org/wiki/Changes/RPMCoW
b3c12f
b3c12f
The majority of changes are in two new programs:
b3c12f
b3c12f
= rpm2extents
b3c12f
b3c12f
Modeled as a 'stream processor'. It reads a regular .rpm file on stdin,
b3c12f
and produces a modified .rpm file on stdout. The lead, signature and
b3c12f
headers are preserved 1:1 to allow all the normal metadata inspection,
b3c12f
signature verification to work as expected. Only the 'payload' is
b3c12f
modified.
b3c12f
b3c12f
The primary motivation for this tool is to re-organize the payload as a
b3c12f
sequence of raw file extents (hence the name). The files are organized
b3c12f
by their digest identity instead of path/filename. If any digest is
b3c12f
repeated, then the file is skipped/de-duped. Only regular files are
b3c12f
represented. All other entries like directories, symlinks, devices are
b3c12f
fully described in the headers and are omitted.
b3c12f
b3c12f
The files are padded so they start on `sysconf(_SC_PAGESIZE)` boundries
b3c12f
to permit 'reflink' syscalls to work in the `reflink` plugin.
b3c12f
b3c12f
At the end of the file is a footer with 3 sections:
b3c12f
b3c12f
1. List of calculated digests of the input stream. This is used in
b3c12f
   `librepo` because the file *written* is a derivative, and not the
b3c12f
   same as the repo metadata describes. `rpm2extents` takes one or more
b3c12f
   positional arguments that described which digest algorithms are
b3c12f
   desired. This is often just `SHA256`. This program is only measuring
b3c12f
   and recording the digest - it does not express an opinion on whether
b3c12f
   the file is correct. Due to the API on most compression libraries
b3c12f
   directly reading the source file, the whole file digest is measured
b3c12f
   using a subprocess and pipes. I don't love it, but it works.
b3c12f
2. Sorted List of file content digests + offset pairs. This is used in
b3c12f
   the plugin with a trivial binary search to locate the start of file
b3c12f
   content. The size is not needed because it's part of normal headers.
b3c12f
3. (offset of 1., offset of 2., 8 byte MAGIC value) triple
b3c12f
b3c12f
= reflink plugin
b3c12f
b3c12f
Looks for the 8 byte magic value at the end of the rpm file. If present
b3c12f
it alters the `RPMTAG_PAYLOADFORMAT` in memory to `clon`, and reads in
b3c12f
the digest-> offset table.
b3c12f
b3c12f
`rpmPackageFilesInstall()` in `fsm.c` is
b3c12f
modified to alter the enumeration strategy from
b3c12f
`rpmfiNewArchiveReader()` to `rpmfilesIter()` if not `cpio`. This is
b3c12f
needed because there is no cpio to enumerate. In the same function, if
b3c12f
`rpmpluginsCallFsmFilePre()` returns `RPMRC_PLUGIN_CONTENTS` then
b3c12f
`fsmMkfile()` is skipped as it is assumed the plugin did the work.
b3c12f
b3c12f
The majority of the work is in `reflink_fsm_file_pre()` - the per file
b3c12f
hook for RPM plugins. If the file enumerated in
b3c12f
`rpmPackageFilesInstall()` is a regular file, this function will look up
b3c12f
the offset in the digest->offset table and will try to reflink it, then
b3c12f
fall back to a regular copy. If reflinking does work: we will have
b3c12f
reflinked a whole number of pages, so we truncate the file to the
b3c12f
expected size. Therefore installing most files does involve two writes:
b3c12f
the reflink of the full size, then a fork/copy on write for the last
b3c12f
page worth.
b3c12f
b3c12f
If the file passed to `reflink_fsm_file_pre()` is anything other than a
b3c12f
regular file, it return `RPMRC_OK` so the normal mechanics of
b3c12f
`rpmPackageFilesInstall()` are used. That handles directories, symlinks
b3c12f
and other non file types.
b3c12f
b3c12f
= New API for internal use
b3c12f
b3c12f
1. `rpmReadPackageRaw()` is used within `rpm2extents` to read all the
b3c12f
   headers without trying to validate signatures. This eliminates the
b3c12f
   runtime dependency on rpmdb.
b3c12f
2. `rpmteFd()` exposes the Fd behind the rpmte, so plugins can interact
b3c12f
   with the rpm itself.
b3c12f
3. `RPMRC_PLUGIN_CONTENTS` in `rpmRC_e` for use in
b3c12f
   `rpmpluginsCallFsmFilePre()` specifically.
b3c12f
4. `pgpStringVal()` is used to help parse the command line in
b3c12f
   `rpm2extents` - the positional arguments are strings, and this
b3c12f
   converts the values back to the values in the table.
b3c12f
b3c12f
Nothing has been removed, and none of the changes are intended to be
b3c12f
used externally, so I don't think a soname bump is warranted here.
b3c12f
---
b3c12f
 Makefile.am         |   6 +-
b3c12f
 lib/depends.c       |   2 +
b3c12f
 lib/fsm.c           |  50 ++++-
b3c12f
 lib/package.c       |  40 ++++
b3c12f
 lib/rpmlib.h        |   9 +
b3c12f
 lib/rpmplugins.c    |  21 +-
b3c12f
 lib/rpmte.c         |   5 +
b3c12f
 lib/rpmte.h         |   2 +
b3c12f
 lib/rpmtypes.h      |   3 +-
b3c12f
 macros.in           |   1 +
b3c12f
 plugins/Makefile.am |   4 +
b3c12f
 plugins/reflink.c   | 340 +++++++++++++++++++++++++++++
b3c12f
 rpm2extents.c       | 519 ++++++++++++++++++++++++++++++++++++++++++++
b3c12f
 rpmio/rpmpgp.c      |  10 +
b3c12f
 rpmio/rpmpgp.h      |   9 +
b3c12f
 15 files changed, 1006 insertions(+), 15 deletions(-)
b3c12f
 create mode 100644 plugins/reflink.c
b3c12f
 create mode 100644 rpm2extents.c
b3c12f
b3c12f
diff --git a/Makefile.am b/Makefile.am
b3c12f
index aca2f8996..17c1e0e8a 100644
b3c12f
--- a/Makefile.am
b3c12f
+++ b/Makefile.am
b3c12f
@@ -105,7 +105,7 @@ pkginclude_HEADERS += build/rpmfc.h
b3c12f
 pkginclude_HEADERS += build/rpmspec.h
b3c12f
 
b3c12f
 
b3c12f
-bin_PROGRAMS =		rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec
b3c12f
+bin_PROGRAMS =		rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec rpm2extents
b3c12f
 if WITH_ARCHIVE
b3c12f
 bin_PROGRAMS += 	rpm2archive 
b3c12f
 endif
b3c12f
@@ -159,6 +159,10 @@ rpm2cpio_SOURCES =	rpm2cpio.c debug.h system.h
b3c12f
 rpm2cpio_LDADD =	lib/librpm.la rpmio/librpmio.la
b3c12f
 rpm2cpio_LDADD +=	@WITH_POPT_LIB@
b3c12f
 
b3c12f
+rpm2extents_SOURCES =	rpm2extents.c debug.h system.h
b3c12f
+rpm2extents_LDADD =	lib/librpm.la rpmio/librpmio.la
b3c12f
+rpm2extents_LDADD +=	@WITH_POPT_LIB@
b3c12f
+
b3c12f
 rpm2archive_SOURCES =	rpm2archive.c debug.h system.h
b3c12f
 rpm2archive_LDADD =	lib/librpm.la rpmio/librpmio.la
b3c12f
 rpm2archive_LDADD +=	@WITH_POPT_LIB@ @WITH_ARCHIVE_LIB@
b3c12f
diff --git a/lib/depends.c b/lib/depends.c
b3c12f
index 28a4a784d..9e0489bcc 100644
b3c12f
--- a/lib/depends.c
b3c12f
+++ b/lib/depends.c
b3c12f
@@ -80,6 +80,8 @@ static rpmRC headerCheckPayloadFormat(Header h) {
b3c12f
      */
b3c12f
     if (!payloadfmt) return rc;
b3c12f
 
b3c12f
+    if (rstreq(payloadfmt, "clon")) return rc;
b3c12f
+
b3c12f
     if (!rstreq(payloadfmt, "cpio")) {
b3c12f
         char *nevra = headerGetAsString(h, RPMTAG_NEVRA);
b3c12f
         if (payloadfmt && rstreq(payloadfmt, "drpm")) {
b3c12f
diff --git a/lib/fsm.c b/lib/fsm.c
b3c12f
index 35dcda081..03a716474 100644
b3c12f
--- a/lib/fsm.c
b3c12f
+++ b/lib/fsm.c
b3c12f
@@ -7,6 +7,7 @@
b3c12f
 
b3c12f
 #include <utime.h>
b3c12f
 #include <errno.h>
b3c12f
+#include <stdbool.h>
b3c12f
 #if WITH_CAP
b3c12f
 #include <sys/capability.h>
b3c12f
 #endif
b3c12f
@@ -18,6 +19,7 @@
b3c12f
 
b3c12f
 #include "rpmio/rpmio_internal.h"	/* fdInit/FiniDigest */
b3c12f
 #include "lib/fsm.h"
b3c12f
+#include "lib/rpmlib.h"
b3c12f
 #include "lib/rpmte_internal.h"	/* XXX rpmfs */
b3c12f
 #include "lib/rpmplugins.h"	/* rpm plugins hooks */
b3c12f
 #include "lib/rpmug.h"
b3c12f
@@ -835,7 +837,7 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
               rpmpsm psm, char ** failedFile)
b3c12f
 {
b3c12f
     FD_t payload = rpmtePayload(te);
b3c12f
-    rpmfi fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
b3c12f
+    rpmfi fi;
b3c12f
     rpmfs fs = rpmteGetFileStates(te);
b3c12f
     rpmPlugins plugins = rpmtsPlugins(ts);
b3c12f
     struct stat sb;
b3c12f
@@ -850,10 +852,21 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
     char *tid = NULL;
b3c12f
     const char *suffix;
b3c12f
     char *fpath = NULL;
b3c12f
+    Header h = rpmteHeader(te);
b3c12f
+    const char *payloadfmt = headerGetString(h, RPMTAG_PAYLOADFORMAT);
b3c12f
+    bool cpio = true;
b3c12f
 
b3c12f
-    if (fi == NULL) {
b3c12f
-	rc = RPMERR_BAD_MAGIC;
b3c12f
-	goto exit;
b3c12f
+    if (payloadfmt && rstreq(payloadfmt, "clon")) {
b3c12f
+	cpio = false;
b3c12f
+    }
b3c12f
+    if (cpio) {
b3c12f
+	fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
b3c12f
+	if (fi == NULL) {
b3c12f
+	    rc = RPMERR_BAD_MAGIC;
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
+    } else {
b3c12f
+	fi = rpmfilesIter(files, RPMFI_ITER_FWD);
b3c12f
     }
b3c12f
 
b3c12f
     /* transaction id used for temporary path suffix while installing */
b3c12f
@@ -893,10 +906,20 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
 	/* Run fsm file pre hook for all plugins */
b3c12f
 	rc = rpmpluginsCallFsmFilePre(plugins, fi, fpath,
b3c12f
 				      sb.st_mode, action);
b3c12f
-	if (rc) {
b3c12f
-	    skip = 1;
b3c12f
-	} else {
b3c12f
+	skip = skip || rpmfiFFlags(fi) & RPMFILE_GHOST;
b3c12f
+	bool plugin_contents = false;
b3c12f
+	switch (rc) {
b3c12f
+	case RPMRC_OK:
b3c12f
 	    setFileState(fs, rpmfiFX(fi));
b3c12f
+	    break;
b3c12f
+	case RPMRC_PLUGIN_CONTENTS:
b3c12f
+	    plugin_contents = true;
b3c12f
+	    // reduce reads on cpio to this value. Could be zero if
b3c12f
+	    // this is from a hard link.
b3c12f
+	    rc = RPMRC_OK;
b3c12f
+	    break;
b3c12f
+	default:
b3c12f
+	    skip = 1;
b3c12f
 	}
b3c12f
 
b3c12f
         if (!skip) {
b3c12f
@@ -926,8 +949,12 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
 
b3c12f
             if (S_ISREG(sb.st_mode)) {
b3c12f
 		if (rc == RPMERR_ENOENT) {
b3c12f
-		    rc = fsmMkfile(fi, fpath, files, psm, nodigest,
b3c12f
-				   &setmeta, &firsthardlink, &firstlinkfile);
b3c12f
+		    if (plugin_contents) {
b3c12f
+			rc = RPMRC_OK;
b3c12f
+		    } else {
b3c12f
+			rc = fsmMkfile(fi, fpath, files, psm, nodigest,
b3c12f
+				&setmeta, &firsthardlink, &firstlinkfile);
b3c12f
+		    }
b3c12f
 		}
b3c12f
             } else if (S_ISDIR(sb.st_mode)) {
b3c12f
                 if (rc == RPMERR_ENOENT) {
b3c12f
@@ -1011,7 +1038,10 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
 exit:
b3c12f
 
b3c12f
     /* No need to bother with close errors on read */
b3c12f
-    rpmfiArchiveClose(fi);
b3c12f
+    if (cpio) {
b3c12f
+	rpmfiArchiveClose(fi);
b3c12f
+    }
b3c12f
+	h = headerFree(h);
b3c12f
     rpmfiFree(fi);
b3c12f
     Fclose(payload);
b3c12f
     free(tid);
b3c12f
diff --git a/lib/package.c b/lib/package.c
b3c12f
index 93a06ebfe..b0c1c2857 100644
b3c12f
--- a/lib/package.c
b3c12f
+++ b/lib/package.c
b3c12f
@@ -407,5 +407,45 @@ rpmRC rpmReadPackageFile(rpmts ts, FD_t fd, const char * fn, Header * hdrp)
b3c12f
     return rc;
b3c12f
 }
b3c12f
 
b3c12f
+rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp)
b3c12f
+{
b3c12f
+    char *msg = NULL;
b3c12f
+    hdrblob sigblob = hdrblobCreate();
b3c12f
+    hdrblob blob = hdrblobCreate();
b3c12f
+    Header h = NULL;
b3c12f
+    Header sigh = NULL;
b3c12f
+
b3c12f
+    rpmRC rc = rpmLeadRead(fd, &msg;;
b3c12f
+    if (rc != RPMRC_OK)
b3c12f
+	goto exit;
b3c12f
+
b3c12f
+    rc = hdrblobRead(fd, 1, 0, RPMTAG_HEADERSIGNATURES, sigblob, &msg;;
b3c12f
+    if (rc != RPMRC_OK)
b3c12f
+	goto exit;
b3c12f
+
b3c12f
+    rc = hdrblobRead(fd, 1, 1, RPMTAG_HEADERIMMUTABLE, blob, &msg;;
b3c12f
+    if (rc != RPMRC_OK)
b3c12f
+	goto exit;
b3c12f
+
b3c12f
+    rc = hdrblobImport(sigblob, 0, &sigh, &msg;;
b3c12f
+    if (rc)
b3c12f
+	goto exit;
b3c12f
 
b3c12f
+    rc = hdrblobImport(blob, 0, &h, &msg;;
b3c12f
+    if (rc)
b3c12f
+	goto exit;
b3c12f
 
b3c12f
+    *sigp = headerLink(sigh);
b3c12f
+    *hdrp = headerLink(h);
b3c12f
+
b3c12f
+exit:
b3c12f
+    if (rc != RPMRC_OK && msg)
b3c12f
+	rpmlog(RPMLOG_ERR, "%s: %s\n", Fdescr(fd), msg);
b3c12f
+    hdrblobFree(sigblob);
b3c12f
+    hdrblobFree(blob);
b3c12f
+    headerFree(sigh);
b3c12f
+    headerFree(h);
b3c12f
+    free(msg);
b3c12f
+
b3c12f
+    return rc;
b3c12f
+}
b3c12f
diff --git a/lib/rpmlib.h b/lib/rpmlib.h
b3c12f
index 72ee724e8..4f1a24d14 100644
b3c12f
--- a/lib/rpmlib.h
b3c12f
+++ b/lib/rpmlib.h
b3c12f
@@ -156,6 +156,15 @@ rpmRC rpmReadHeader(rpmts ts, FD_t fd, Header *hdrp, char ** msg);
b3c12f
 rpmRC rpmReadPackageFile(rpmts ts, FD_t fd,
b3c12f
 		const char * fn, Header * hdrp);
b3c12f
 
b3c12f
+/** \ingroup header
b3c12f
+ * Return package signature, header from file handle, no verification.
b3c12f
+ * @param fd		file handle
b3c12f
+ * @param[out] sigp		address of header (or NULL)
b3c12f
+ * @param[out] hdrp		address of header (or NULL)
b3c12f
+ * @return		RPMRC_OK on success
b3c12f
+ */
b3c12f
+rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp);
b3c12f
+
b3c12f
 /** \ingroup rpmtrans
b3c12f
  * Install source package.
b3c12f
  * @param ts		transaction set
b3c12f
diff --git a/lib/rpmplugins.c b/lib/rpmplugins.c
b3c12f
index 62d75c4cf..c5084d398 100644
b3c12f
--- a/lib/rpmplugins.c
b3c12f
+++ b/lib/rpmplugins.c
b3c12f
@@ -356,13 +356,28 @@ rpmRC rpmpluginsCallFsmFilePre(rpmPlugins plugins, rpmfi fi, const char *path,
b3c12f
     plugin_fsm_file_pre_func hookFunc;
b3c12f
     int i;
b3c12f
     rpmRC rc = RPMRC_OK;
b3c12f
+    rpmRC hook_rc;
b3c12f
 
b3c12f
     for (i = 0; i < plugins->count; i++) {
b3c12f
 	rpmPlugin plugin = plugins->plugins[i];
b3c12f
 	RPMPLUGINS_SET_HOOK_FUNC(fsm_file_pre);
b3c12f
-	if (hookFunc && hookFunc(plugin, fi, path, file_mode, op) == RPMRC_FAIL) {
b3c12f
-	    rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name);
b3c12f
-	    rc = RPMRC_FAIL;
b3c12f
+	if (hookFunc) {
b3c12f
+	    hook_rc = hookFunc(plugin, fi, path, file_mode, op);
b3c12f
+	    if (hook_rc == RPMRC_FAIL) {
b3c12f
+		rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name);
b3c12f
+		rc = RPMRC_FAIL;
b3c12f
+	    } else if (hook_rc == RPMRC_PLUGIN_CONTENTS && rc != RPMRC_FAIL) {
b3c12f
+		if (rc == RPMRC_PLUGIN_CONTENTS) {
b3c12f
+		    /*
b3c12f
+		    Another plugin already said it'd handle contents. It's undefined how
b3c12f
+		    these would combine, so treat this as a failure condition.
b3c12f
+		    */
b3c12f
+		    rc = RPMRC_FAIL;
b3c12f
+		} else {
b3c12f
+		    /* Plugin will handle content */
b3c12f
+		    rc = RPMRC_PLUGIN_CONTENTS;
b3c12f
+		}
b3c12f
+	    }
b3c12f
 	}
b3c12f
     }
b3c12f
 
b3c12f
diff --git a/lib/rpmte.c b/lib/rpmte.c
b3c12f
index fe9782953..ba06c2985 100644
b3c12f
--- a/lib/rpmte.c
b3c12f
+++ b/lib/rpmte.c
b3c12f
@@ -422,6 +422,11 @@ FD_t rpmteSetFd(rpmte te, FD_t fd)
b3c12f
     return NULL;
b3c12f
 }
b3c12f
 
b3c12f
+FD_t rpmteFd(rpmte te)
b3c12f
+{
b3c12f
+    return (te != NULL ? te->fd : NULL);
b3c12f
+}
b3c12f
+
b3c12f
 fnpyKey rpmteKey(rpmte te)
b3c12f
 {
b3c12f
     return (te != NULL ? te->key : NULL);
b3c12f
diff --git a/lib/rpmte.h b/lib/rpmte.h
b3c12f
index 81acf7a19..6fc0a9f91 100644
b3c12f
--- a/lib/rpmte.h
b3c12f
+++ b/lib/rpmte.h
b3c12f
@@ -209,6 +209,8 @@ const char * rpmteNEVR(rpmte te);
b3c12f
  */
b3c12f
 const char * rpmteNEVRA(rpmte te);
b3c12f
 
b3c12f
+FD_t rpmteFd(rpmte te);
b3c12f
+
b3c12f
 /** \ingroup rpmte
b3c12f
  * Retrieve key from transaction element.
b3c12f
  * @param te		transaction element
b3c12f
diff --git a/lib/rpmtypes.h b/lib/rpmtypes.h
b3c12f
index e8e69b506..af2611e9e 100644
b3c12f
--- a/lib/rpmtypes.h
b3c12f
+++ b/lib/rpmtypes.h
b3c12f
@@ -106,7 +106,8 @@ typedef	enum rpmRC_e {
b3c12f
     RPMRC_NOTFOUND	= 1,	/*!< Generic not found code. */
b3c12f
     RPMRC_FAIL		= 2,	/*!< Generic failure code. */
b3c12f
     RPMRC_NOTTRUSTED	= 3,	/*!< Signature is OK, but key is not trusted. */
b3c12f
-    RPMRC_NOKEY		= 4	/*!< Public key is unavailable. */
b3c12f
+    RPMRC_NOKEY		= 4,	/*!< Public key is unavailable. */
b3c12f
+    RPMRC_PLUGIN_CONTENTS = 5     /*!< fsm_file_pre plugin is handling content */
b3c12f
 } rpmRC;
b3c12f
 
b3c12f
 #ifdef __cplusplus
b3c12f
diff --git a/macros.in b/macros.in
b3c12f
index 5778a1f58..15a28c2d2 100644
b3c12f
--- a/macros.in
b3c12f
+++ b/macros.in
b3c12f
@@ -1175,6 +1175,7 @@ package or when debugging this package.\
b3c12f
 
b3c12f
 # Transaction plugin macros
b3c12f
 %__plugindir		%{_libdir}/rpm-plugins
b3c12f
+%__transaction_reflink		%{__plugindir}/reflink.so
b3c12f
 %__transaction_systemd_inhibit	%{__plugindir}/systemd_inhibit.so
b3c12f
 %__transaction_selinux		%{__plugindir}/selinux.so
b3c12f
 %__transaction_syslog		%{__plugindir}/syslog.so
b3c12f
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
b3c12f
index 963d53db4..a21401966 100644
b3c12f
--- a/plugins/Makefile.am
b3c12f
+++ b/plugins/Makefile.am
b3c12f
@@ -33,6 +33,10 @@ prioreset_la_SOURCES = prioreset.c
b3c12f
 prioreset_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
b3c12f
 plugins_LTLIBRARIES += prioreset.la
b3c12f
 
b3c12f
+reflink_la_SOURCES = reflink.c
b3c12f
+reflink_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
b3c12f
+plugins_LTLIBRARIES += reflink.la
b3c12f
+
b3c12f
 syslog_la_SOURCES = syslog.c
b3c12f
 syslog_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
b3c12f
 plugins_LTLIBRARIES += syslog.la
b3c12f
diff --git a/plugins/reflink.c b/plugins/reflink.c
b3c12f
new file mode 100644
b3c12f
index 000000000..d7f19acd9
b3c12f
--- /dev/null
b3c12f
+++ b/plugins/reflink.c
b3c12f
@@ -0,0 +1,340 @@
b3c12f
+#include "system.h"
b3c12f
+
b3c12f
+#include <errno.h>
b3c12f
+#include <sys/resource.h>
b3c12f
+#include <unistd.h>
b3c12f
+#include <sys/types.h>
b3c12f
+#include <sys/stat.h>
b3c12f
+#include <fcntl.h>
b3c12f
+#if defined(__linux__)
b3c12f
+#include <linux/fs.h>        /* For FICLONE */
b3c12f
+#endif
b3c12f
+
b3c12f
+#include <rpm/rpmlog.h>
b3c12f
+#include "lib/rpmlib.h"
b3c12f
+#include "lib/rpmplugin.h"
b3c12f
+#include "lib/rpmte_internal.h"
b3c12f
+#include <rpm/rpmfileutil.h>
b3c12f
+#include "rpmio/rpmio_internal.h"
b3c12f
+
b3c12f
+
b3c12f
+#include "debug.h"
b3c12f
+
b3c12f
+#include <sys/ioctl.h>
b3c12f
+
b3c12f
+/* use hash table to remember inode -> ix (for rpmfilesFN(ix)) lookups */
b3c12f
+#undef HASHTYPE
b3c12f
+#undef HTKEYTYPE
b3c12f
+#undef HTDATATYPE
b3c12f
+#define HASHTYPE inodeIndexHash
b3c12f
+#define HTKEYTYPE rpm_ino_t
b3c12f
+#define HTDATATYPE int
b3c12f
+#include "lib/rpmhash.H"
b3c12f
+#include "lib/rpmhash.C"
b3c12f
+
b3c12f
+/*
b3c12f
+We use this in find to indicate a key wasn't found. This is an unrecoverable
b3c12f
+error, but we can at least show a decent error. 0 is never a valid offset
b3c12f
+because it's the offset of the start of the file.
b3c12f
+*/
b3c12f
+#define NOT_FOUND 0
b3c12f
+
b3c12f
+#define BUFFER_SIZE (1024 * 128)
b3c12f
+
b3c12f
+/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
b3c12f
+#define MAGIC 3472329499408095051
b3c12f
+
b3c12f
+struct reflink_state_s {
b3c12f
+  /* Stuff that's used across rpms */
b3c12f
+  long fundamental_block_size;
b3c12f
+  char *buffer;
b3c12f
+
b3c12f
+  /* stuff that's used/updated per psm */
b3c12f
+  uint32_t keys, keysize;
b3c12f
+
b3c12f
+  // table for current rpm, keys * (keysize + sizeof(rpm_loff_t))
b3c12f
+  unsigned char *table;
b3c12f
+  FD_t fd;
b3c12f
+  rpmfiles files;
b3c12f
+  inodeIndexHash inodeIndexes;
b3c12f
+};
b3c12f
+
b3c12f
+typedef struct reflink_state_s * reflink_state;
b3c12f
+
b3c12f
+static int inodeCmp(rpm_ino_t a, rpm_ino_t b)
b3c12f
+{
b3c12f
+    return (a != b);
b3c12f
+}
b3c12f
+
b3c12f
+static unsigned int inodeId(rpm_ino_t a)
b3c12f
+{
b3c12f
+    /* rpm_ino_t is uint32_t so maps safely to unsigned int */
b3c12f
+    return (unsigned int)a;
b3c12f
+}
b3c12f
+
b3c12f
+static rpmRC reflink_init(rpmPlugin plugin, rpmts ts) {
b3c12f
+  reflink_state state = rcalloc(1, sizeof(struct reflink_state_s));
b3c12f
+
b3c12f
+  /*
b3c12f
+  IOCTL-FICLONERANGE(2): ...Disk filesystems generally require the offset and
b3c12f
+  length arguments to be aligned to the fundamental block size.
b3c12f
+
b3c12f
+  The value of "fundamental block size" is directly related to the system's
b3c12f
+  page size, so we should use that.
b3c12f
+  */
b3c12f
+  state->fundamental_block_size = sysconf(_SC_PAGESIZE);
b3c12f
+  state->buffer = rcalloc(1, BUFFER_SIZE);
b3c12f
+  rpmPluginSetData(plugin, state);
b3c12f
+
b3c12f
+  return RPMRC_OK;
b3c12f
+}
b3c12f
+
b3c12f
+static void reflink_cleanup(rpmPlugin plugin) {
b3c12f
+  reflink_state state = rpmPluginGetData(plugin);
b3c12f
+  free(state->buffer);
b3c12f
+  free(state);
b3c12f
+}
b3c12f
+
b3c12f
+static rpmRC reflink_psm_pre(rpmPlugin plugin, rpmte te) {
b3c12f
+    reflink_state state = rpmPluginGetData(plugin);
b3c12f
+    state->fd = rpmteFd(te);
b3c12f
+    if (state->fd == 0) {
b3c12f
+      rpmlog(RPMLOG_DEBUG, _("reflink: fd = 0, no install\n"));
b3c12f
+      return RPMRC_OK;
b3c12f
+    }
b3c12f
+    rpm_loff_t current = Ftell(state->fd);
b3c12f
+    uint64_t magic;
b3c12f
+    if (Fseek(state->fd, -(sizeof(magic)), SEEK_END) < 0) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: failed to seek for magic\n"));
b3c12f
+      if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+        /* yes this gets a bit repetitive */
b3c12f
+        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
+      }
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    size_t len = sizeof(magic);
b3c12f
+    if (Fread(&magic, len, 1, state->fd) != len) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read magic\n"));
b3c12f
+      if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
+      }
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    if (magic != MAGIC) {
b3c12f
+      rpmlog(RPMLOG_DEBUG, _("reflink: not transcoded\n"));
b3c12f
+      if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
+        return RPMRC_FAIL;
b3c12f
+      }
b3c12f
+      return RPMRC_OK;
b3c12f
+    }
b3c12f
+    rpmlog(RPMLOG_DEBUG, _("reflink: *is* transcoded\n"));
b3c12f
+    Header h = rpmteHeader(te);
b3c12f
+
b3c12f
+    /* replace/add header that main fsm.c can read */
b3c12f
+    headerDel(h, RPMTAG_PAYLOADFORMAT);
b3c12f
+    headerPutString(h, RPMTAG_PAYLOADFORMAT, "clon");
b3c12f
+    headerFree(h);
b3c12f
+    state->files = rpmteFiles(te);
b3c12f
+    /* tail of file contains offset_table, offset_checksums
b3c12f
+       then magic
b3c12f
+    */
b3c12f
+    if (Fseek(state->fd, -(sizeof(rpm_loff_t) * 2 + sizeof(magic)), SEEK_END) < 0) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: failed to seek for tail %p\n"), state->fd);
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    rpm_loff_t table_start;
b3c12f
+    len = sizeof(table_start);
b3c12f
+    if (Fread(&table_start, len, 1, state->fd) != len) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read table_start\n"));
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    if (Fseek(state->fd, table_start, SEEK_SET) < 0) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: unable to seek to table_start\n"));
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    len = sizeof(state->keys);
b3c12f
+    if (Fread(&state->keys, len, 1, state->fd) != len) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read number of keys\n"));
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    len = sizeof(state->keysize);
b3c12f
+    if (Fread(&state->keysize, len, 1, state->fd) != len) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: unable to read keysize\n"));
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    rpmlog(RPMLOG_DEBUG, _("reflink: table_start=0x%lx, keys=%d, keysize=%d\n"), table_start, state->keys, state->keysize);
b3c12f
+    // now get digest table if there is a reason to have one.
b3c12f
+    if (state->keys == 0 || state->keysize == 0) {
b3c12f
+      // no files (or no digests(!))
b3c12f
+      state->table = NULL;
b3c12f
+    } else {
b3c12f
+      int table_size = state->keys * (state->keysize + sizeof(rpm_loff_t));
b3c12f
+      state->table = rcalloc(1, table_size);
b3c12f
+      if (Fread(state->table, table_size, 1, state->fd) != table_size) {
b3c12f
+        rpmlog(RPMLOG_ERR, _("reflink: unable to read table\n"));
b3c12f
+        return RPMRC_FAIL;
b3c12f
+      }
b3c12f
+      state->inodeIndexes = inodeIndexHashCreate(state->keys, inodeId, inodeCmp, NULL, NULL);
b3c12f
+    }
b3c12f
+
b3c12f
+    // seek back to original location
b3c12f
+    // might not be needed if we seek to offset immediately
b3c12f
+    if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+      rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
+      return RPMRC_FAIL;
b3c12f
+    }
b3c12f
+    return RPMRC_OK;
b3c12f
+}
b3c12f
+
b3c12f
+static rpmRC reflink_psm_post(rpmPlugin plugin, rpmte te, int res)
b3c12f
+{
b3c12f
+    reflink_state state = rpmPluginGetData(plugin);
b3c12f
+    state->files = rpmfilesFree(state->files);
b3c12f
+    if (state->table) {
b3c12f
+      free(state->table);
b3c12f
+      state->table = NULL;
b3c12f
+    }
b3c12f
+    if (state->inodeIndexes) {
b3c12f
+      inodeIndexHashFree(state->inodeIndexes);
b3c12f
+      state->inodeIndexes = NULL;
b3c12f
+    }
b3c12f
+    return RPMRC_OK;
b3c12f
+}
b3c12f
+
b3c12f
+
b3c12f
+// have a prototype, warnings system
b3c12f
+rpm_loff_t find(const unsigned char *digest, reflink_state state);
b3c12f
+
b3c12f
+rpm_loff_t find(const unsigned char *digest, reflink_state state) {
b3c12f
+# if defined(__GNUC__)
b3c12f
+  /* GCC nested function because bsearch's comparison function can't access
b3c12f
+     state-keysize otherwise
b3c12f
+  */
b3c12f
+  int cmpdigest(const void *k1, const void *k2) {
b3c12f
+    rpmlog(RPMLOG_DEBUG, _("reflink: cmpdigest k1=%p k2=%p\n"), k1, k2);
b3c12f
+    return memcmp(k1, k2, state->keysize);
b3c12f
+  }
b3c12f
+# endif
b3c12f
+  rpmlog(RPMLOG_DEBUG, _("reflink: bsearch(key=%p, base=%p, nmemb=%d, size=%lu)\n"), digest, state->table, state->keys, state->keysize + sizeof(rpm_loff_t));
b3c12f
+  char *entry = bsearch(digest, state->table, state->keys, state->keysize + sizeof(rpm_loff_t), cmpdigest);
b3c12f
+  if (entry == NULL) {
b3c12f
+    return NOT_FOUND;
b3c12f
+  }
b3c12f
+  rpm_loff_t offset = *(rpm_loff_t *)(entry + state->keysize);
b3c12f
+  return offset;
b3c12f
+}
b3c12f
+
b3c12f
+static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path, mode_t file_mode, rpmFsmOp op)
b3c12f
+{
b3c12f
+    struct file_clone_range fcr;
b3c12f
+    rpm_loff_t size;
b3c12f
+    int dst, rc;
b3c12f
+    int *hlix;
b3c12f
+
b3c12f
+    reflink_state state = rpmPluginGetData(plugin);
b3c12f
+    if (state->table == NULL) {
b3c12f
+        // no table means rpm is not in reflink format, so leave. Now.
b3c12f
+        return RPMRC_OK;
b3c12f
+    }
b3c12f
+    if (op == FA_TOUCH) {
b3c12f
+        // we're not overwriting an existing file
b3c12f
+        return RPMRC_OK;
b3c12f
+    }
b3c12f
+    fcr.dest_offset = 0;
b3c12f
+    if (S_ISREG(file_mode) && !(rpmfiFFlags(fi) & RPMFILE_GHOST)) {
b3c12f
+      rpm_ino_t inode = rpmfiFInode(fi);
b3c12f
+      /* check for hard link entry in table. GetEntry overwrites hlix with the address of the first match */
b3c12f
+      if (inodeIndexHashGetEntry(state->inodeIndexes, inode, &hlix, NULL, NULL)) {
b3c12f
+        // entry is in table, use hard link
b3c12f
+        char *fn = rpmfilesFN(state->files, hlix[0]);
b3c12f
+        if (link(fn, path) != 0) {
b3c12f
+          rpmlog(RPMLOG_ERR, _("reflink: Unable to hard link %s -> %s due to %s\n"), fn, path, strerror(errno));
b3c12f
+          free(fn);
b3c12f
+          return RPMRC_FAIL;
b3c12f
+        }
b3c12f
+        free(fn);
b3c12f
+        return RPMRC_PLUGIN_CONTENTS;
b3c12f
+      }
b3c12f
+      /* if we didn't hard link, then we'll track this inode as being created soon */
b3c12f
+      if (rpmfiFNlink(fi) > 1) {
b3c12f
+        /* minor optimization: only store files with more than one link */
b3c12f
+        inodeIndexHashAddEntry(state->inodeIndexes, inode, rpmfiFX(fi));
b3c12f
+      }
b3c12f
+      /* derived from wfd_open in fsm.c */
b3c12f
+      mode_t old_umask = umask(0577);
b3c12f
+      dst = open(path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
b3c12f
+      umask(old_umask);
b3c12f
+      if (dst == -1) {
b3c12f
+          rpmlog(RPMLOG_ERR, _("reflink: Unable to open %s for writing due to %s, flags = %x\n"), path, strerror(errno), rpmfiFFlags(fi));
b3c12f
+          return RPMRC_FAIL;
b3c12f
+      }
b3c12f
+      size = rpmfiFSize(fi);
b3c12f
+      if (size > 0) {
b3c12f
+          /* round src_length down to fundamental_block_size multiple */
b3c12f
+          fcr.src_length = size / state->fundamental_block_size * state->fundamental_block_size;
b3c12f
+          if ((size % state->fundamental_block_size) > 0) {
b3c12f
+              /* round up to next fundamental_block_size. We expect the data in the rpm to be similarly padded */
b3c12f
+              fcr.src_length += state->fundamental_block_size;
b3c12f
+          }
b3c12f
+          fcr.src_fd = Fileno(state->fd);
b3c12f
+          if (fcr.src_fd == -1) {
b3c12f
+            close(dst);
b3c12f
+            rpmlog(RPMLOG_ERR, _("reflink: src fd lookup failed\n"));
b3c12f
+            return RPMRC_FAIL;
b3c12f
+          }
b3c12f
+          fcr.src_offset = find(rpmfiFDigest(fi, NULL, NULL), state);
b3c12f
+          if (fcr.src_offset == NOT_FOUND) {
b3c12f
+            close(dst);
b3c12f
+            rpmlog(RPMLOG_ERR, _("reflink: digest not found\n"));
b3c12f
+            return RPMRC_FAIL;
b3c12f
+          }
b3c12f
+          rpmlog(RPMLOG_DEBUG, _("reflink: Reflinking %lu bytes at %lu to %s orig size=%lu, file=%ld\n"), fcr.src_length, fcr.src_offset, path, size, fcr.src_fd);
b3c12f
+          rc = ioctl(dst, FICLONERANGE, &fcr;;
b3c12f
+          if (rc) {
b3c12f
+            rpmlog(RPMLOG_WARNING, _("reflink: falling back to copying bits for %s due to %d, %d = %s\n"), path, rc, errno, strerror(errno));
b3c12f
+            if (Fseek(state->fd, fcr.src_offset, SEEK_SET) < 0) {
b3c12f
+                close(dst);
b3c12f
+                rpmlog(RPMLOG_ERR, _("reflink: unable to seek on copying bits\n"));
b3c12f
+                return RPMRC_FAIL;
b3c12f
+            }
b3c12f
+            rpm_loff_t left = size;
b3c12f
+            size_t len, read, written;
b3c12f
+            while (left) {
b3c12f
+              len = (left > BUFFER_SIZE ? BUFFER_SIZE : left);
b3c12f
+              read = Fread(state->buffer, len, 1, state->fd);
b3c12f
+              if (read != len) {
b3c12f
+                close(dst);
b3c12f
+                rpmlog(RPMLOG_ERR, _("reflink: short read on copying bits\n"));
b3c12f
+                return RPMRC_FAIL;
b3c12f
+              }
b3c12f
+              written = write(dst, state->buffer, len);
b3c12f
+              if (read != written) {
b3c12f
+                close(dst);
b3c12f
+                rpmlog(RPMLOG_ERR, _("reflink: short write on copying bits\n"));
b3c12f
+                return RPMRC_FAIL;
b3c12f
+              }
b3c12f
+              left -= len;
b3c12f
+            }
b3c12f
+          } else {
b3c12f
+            /* reflink worked, so truncate */
b3c12f
+            rc = ftruncate(dst, size);
b3c12f
+            if (rc) {
b3c12f
+                rpmlog(RPMLOG_ERR, _("reflink: Unable to truncate %s to %ld due to %s\n"), path, size, strerror(errno));
b3c12f
+                return RPMRC_FAIL;
b3c12f
+            }
b3c12f
+          }
b3c12f
+      }
b3c12f
+      close(dst);
b3c12f
+      return RPMRC_PLUGIN_CONTENTS;
b3c12f
+    }
b3c12f
+    return RPMRC_OK;
b3c12f
+}
b3c12f
+
b3c12f
+struct rpmPluginHooks_s reflink_hooks = {
b3c12f
+    .init = reflink_init,
b3c12f
+    .cleanup = reflink_cleanup,
b3c12f
+    .psm_pre = reflink_psm_pre,
b3c12f
+    .psm_post = reflink_psm_post,
b3c12f
+    .fsm_file_pre = reflink_fsm_file_pre,
b3c12f
+};
b3c12f
diff --git a/rpm2extents.c b/rpm2extents.c
b3c12f
new file mode 100644
b3c12f
index 000000000..5662b86a6
b3c12f
--- /dev/null
b3c12f
+++ b/rpm2extents.c
b3c12f
@@ -0,0 +1,519 @@
b3c12f
+/* rpm2extents: convert payload to inline extents */
b3c12f
+
b3c12f
+#include "system.h"
b3c12f
+
b3c12f
+#include <rpm/rpmlib.h>		/* rpmReadPackageFile .. */
b3c12f
+#include <rpm/rpmfi.h>
b3c12f
+#include <rpm/rpmtag.h>
b3c12f
+#include <rpm/rpmio.h>
b3c12f
+#include <rpm/rpmpgp.h>
b3c12f
+
b3c12f
+#include <rpm/rpmts.h>
b3c12f
+#include "lib/rpmlead.h"
b3c12f
+#include "lib/signature.h"
b3c12f
+#include "lib/header_internal.h"
b3c12f
+#include "rpmio/rpmio_internal.h"
b3c12f
+
b3c12f
+#include <unistd.h>
b3c12f
+#include <sys/types.h>
b3c12f
+#include <sys/wait.h>
b3c12f
+#include <signal.h>
b3c12f
+#include <errno.h>
b3c12f
+#include <string.h>
b3c12f
+
b3c12f
+#include "debug.h"
b3c12f
+
b3c12f
+/* hash of void * (pointers) to file digests to offsets within output.
b3c12f
+   The length of the key depends on what the FILEDIGESTALGO is.
b3c12f
+ */
b3c12f
+#undef HASHTYPE
b3c12f
+#undef HTKEYTYPE
b3c12f
+#undef HTDATATYPE
b3c12f
+#define HASHTYPE digestSet
b3c12f
+#define HTKEYTYPE const unsigned char *
b3c12f
+#include "lib/rpmhash.H"
b3c12f
+#include "lib/rpmhash.C"
b3c12f
+
b3c12f
+/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
b3c12f
+#define MAGIC 3472329499408095051
b3c12f
+
b3c12f
+struct digestoffset {
b3c12f
+    const unsigned char * digest;
b3c12f
+    rpm_loff_t pos;
b3c12f
+};
b3c12f
+
b3c12f
+rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit);
b3c12f
+
b3c12f
+rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit)
b3c12f
+{
b3c12f
+    return (unit - (pos % unit)) % unit;
b3c12f
+}
b3c12f
+
b3c12f
+static int digestor(
b3c12f
+    FD_t fdi,
b3c12f
+    FD_t fdo,
b3c12f
+    FD_t validationo,
b3c12f
+    uint8_t algos[],
b3c12f
+    uint32_t algos_len
b3c12f
+)
b3c12f
+{
b3c12f
+    ssize_t fdilength;
b3c12f
+    const char *filedigest, *algo_name;
b3c12f
+    size_t filedigest_len, len;
b3c12f
+    uint32_t algo_name_len, algo_digest_len;
b3c12f
+    int algo;
b3c12f
+    rpmRC rc = RPMRC_FAIL;
b3c12f
+
b3c12f
+    for (algo = 0; algo < algos_len; algo++)
b3c12f
+    {
b3c12f
+        fdInitDigest(fdi, algos[algo], 0);
b3c12f
+    }
b3c12f
+    fdilength = ufdCopy(fdi, fdo);
b3c12f
+    if (fdilength == -1)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("digest cat failed\n"));
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+
b3c12f
+    len = sizeof(fdilength);
b3c12f
+    if (Fwrite(&fdilength, len, 1, validationo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write input length %zd\n"), fdilength);
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    len = sizeof(algos_len);
b3c12f
+    if (Fwrite(&algos_len, len, 1, validationo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write number of validation digests\n"));
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    for (algo = 0; algo < algos_len; algo++)
b3c12f
+    {
b3c12f
+        fdFiniDigest(fdi, algos[algo], (void **)&filedigest, &filedigest_len, 0);
b3c12f
+
b3c12f
+        algo_name = pgpValString(PGPVAL_HASHALGO, algos[algo]);
b3c12f
+        algo_name_len = (uint32_t)strlen(algo_name);
b3c12f
+        algo_digest_len = (uint32_t)filedigest_len;
b3c12f
+
b3c12f
+        len = sizeof(algo_name_len);
b3c12f
+        if (Fwrite(&algo_name_len, len, 1, validationo) != len)
b3c12f
+        {
b3c12f
+            fprintf(
b3c12f
+                stderr,
b3c12f
+                _("Unable to write validation algo name length\n")
b3c12f
+            );
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+        len = sizeof(algo_digest_len);
b3c12f
+        if (Fwrite(&algo_digest_len, len, 1, validationo) != len)
b3c12f
+        {
b3c12f
+            fprintf(
b3c12f
+                stderr,
b3c12f
+                _("Unable to write number of bytes for validation digest\n")
b3c12f
+            );
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+        if (Fwrite(algo_name, algo_name_len, 1, validationo) != algo_name_len)
b3c12f
+        {
b3c12f
+            fprintf(stderr, _("Unable to write validation algo name\n"));
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+        if (
b3c12f
+            Fwrite(
b3c12f
+                filedigest,
b3c12f
+                algo_digest_len,
b3c12f
+                1,
b3c12f
+                validationo
b3c12f
+            ) != algo_digest_len
b3c12f
+        )
b3c12f
+        {
b3c12f
+            fprintf(
b3c12f
+                stderr,
b3c12f
+                _("Unable to write validation digest value %u, %zu\n"),
b3c12f
+                algo_digest_len,
b3c12f
+                filedigest_len
b3c12f
+            );
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+    }
b3c12f
+    rc = RPMRC_OK;
b3c12f
+exit:
b3c12f
+    return rc;
b3c12f
+}
b3c12f
+
b3c12f
+static rpmRC process_package(FD_t fdi, FD_t validationi)
b3c12f
+{
b3c12f
+    uint32_t diglen;
b3c12f
+    /* GNU C extension: can use diglen from outer context */
b3c12f
+    int digestSetCmp(const unsigned char * a, const unsigned char * b)
b3c12f
+    {
b3c12f
+        return memcmp(a, b, diglen);
b3c12f
+    }
b3c12f
+
b3c12f
+    unsigned int digestSetHash(const unsigned char * digest)
b3c12f
+    {
b3c12f
+        /* assumes sizeof(unsigned int) < diglen */
b3c12f
+        return *(unsigned int *)digest;
b3c12f
+    }
b3c12f
+
b3c12f
+    int digestoffsetCmp(const void * a, const void * b)
b3c12f
+    {
b3c12f
+        return digestSetCmp(
b3c12f
+            ((struct digestoffset *)a)->digest,
b3c12f
+            ((struct digestoffset *)b)->digest
b3c12f
+        );
b3c12f
+    }
b3c12f
+
b3c12f
+    FD_t fdo;
b3c12f
+    FD_t gzdi;
b3c12f
+    Header h, sigh;
b3c12f
+    long fundamental_block_size = sysconf(_SC_PAGESIZE);
b3c12f
+    rpmRC rc = RPMRC_OK;
b3c12f
+    rpm_mode_t mode;
b3c12f
+    char *rpmio_flags = NULL, *zeros;
b3c12f
+    const unsigned char *digest;
b3c12f
+    rpm_loff_t pos, size, pad, validation_pos;
b3c12f
+    uint32_t offset_ix = 0;
b3c12f
+    size_t len;
b3c12f
+    int next = 0;
b3c12f
+
b3c12f
+    fdo = fdDup(STDOUT_FILENO);
b3c12f
+
b3c12f
+    if (rpmReadPackageRaw(fdi, &sigh, &h))
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Error reading package\n"));
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    if (rpmLeadWrite(fdo, h))
b3c12f
+    {
b3c12f
+        fprintf(
b3c12f
+            stderr,
b3c12f
+            _("Unable to write package lead: %s\n"),
b3c12f
+            Fstrerror(fdo)
b3c12f
+        );
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    if (rpmWriteSignature(fdo, sigh))
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write signature: %s\n"), Fstrerror(fdo));
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    if (headerWrite(fdo, h, HEADER_MAGIC_YES))
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write headers: %s\n"), Fstrerror(fdo));
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    /* Retrieve payload size and compression type. */
b3c12f
+    {	const char *compr = headerGetString(h, RPMTAG_PAYLOADCOMPRESSOR);
b3c12f
+        rpmio_flags = rstrscat(NULL, "r.", compr ? compr : "gzip", NULL);
b3c12f
+    }
b3c12f
+
b3c12f
+    gzdi = Fdopen(fdi, rpmio_flags);	/* XXX gzdi == fdi */
b3c12f
+    free(rpmio_flags);
b3c12f
+
b3c12f
+    if (gzdi == NULL)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("cannot re-open payload: %s\n"), Fstrerror(gzdi));
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    rpmfiles files = rpmfilesNew(NULL, h, 0, RPMFI_KEEPHEADER);
b3c12f
+    rpmfi fi = rpmfiNewArchiveReader(
b3c12f
+        gzdi,
b3c12f
+        files,
b3c12f
+        RPMFI_ITER_READ_ARCHIVE_CONTENT_FIRST
b3c12f
+    );
b3c12f
+
b3c12f
+    /* this is encoded in the file format, so needs to be fixed size (for
b3c12f
+        now?)
b3c12f
+    */
b3c12f
+    diglen = (uint32_t)rpmDigestLength(rpmfiDigestAlgo(fi));
b3c12f
+    digestSet ds = digestSetCreate(
b3c12f
+        rpmfiFC(fi),
b3c12f
+        digestSetHash,
b3c12f
+        digestSetCmp,
b3c12f
+        NULL
b3c12f
+    );
b3c12f
+    struct digestoffset offsets[rpmfiFC(fi)];
b3c12f
+    pos = RPMLEAD_SIZE + headerSizeof(sigh, HEADER_MAGIC_YES);
b3c12f
+
b3c12f
+    /* main headers are aligned to 8 byte boundry */
b3c12f
+    pos += pad_to(pos, 8);
b3c12f
+    pos += headerSizeof(h, HEADER_MAGIC_YES);
b3c12f
+
b3c12f
+    zeros = xcalloc(fundamental_block_size, 1);
b3c12f
+
b3c12f
+    while (next >= 0)
b3c12f
+    {
b3c12f
+        next = rpmfiNext(fi);
b3c12f
+        if (next == RPMERR_ITER_END)
b3c12f
+        {
b3c12f
+            rc = RPMRC_OK;
b3c12f
+            break;
b3c12f
+        }
b3c12f
+        mode = rpmfiFMode(fi);
b3c12f
+        if (!S_ISREG(mode) || !rpmfiArchiveHasContent(fi))
b3c12f
+        {
b3c12f
+            /* not a regular file, or the archive doesn't contain any content for
b3c12f
+               this entry
b3c12f
+            */
b3c12f
+            continue;
b3c12f
+        }
b3c12f
+        digest = rpmfiFDigest(fi, NULL, NULL);
b3c12f
+        if (digestSetGetEntry(ds, digest, NULL))
b3c12f
+        {
b3c12f
+            /* This specific digest has already been included, so skip it */
b3c12f
+            continue;
b3c12f
+        }
b3c12f
+        pad = pad_to(pos, fundamental_block_size);
b3c12f
+        if (Fwrite(zeros, sizeof(char), pad, fdo) != pad)
b3c12f
+        {
b3c12f
+            fprintf(stderr, _("Unable to write padding\n"));
b3c12f
+            rc = RPMRC_FAIL;
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+        /* round up to next fundamental_block_size */
b3c12f
+        pos += pad;
b3c12f
+        digestSetAddEntry(ds, digest);
b3c12f
+        offsets[offset_ix].digest = digest;
b3c12f
+        offsets[offset_ix].pos = pos;
b3c12f
+        offset_ix++;
b3c12f
+        size = rpmfiFSize(fi);
b3c12f
+        rc = rpmfiArchiveReadToFile(fi, fdo, 0);
b3c12f
+        if (rc != RPMRC_OK)
b3c12f
+        {
b3c12f
+            fprintf(stderr, _("rpmfiArchiveReadToFile failed with %d\n"), rc);
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+        pos += size;
b3c12f
+    }
b3c12f
+    Fclose(gzdi);	/* XXX gzdi == fdi */
b3c12f
+
b3c12f
+    qsort(
b3c12f
+        offsets,
b3c12f
+        (size_t)offset_ix,
b3c12f
+        sizeof(struct digestoffset),
b3c12f
+        digestoffsetCmp
b3c12f
+    );
b3c12f
+
b3c12f
+    len = sizeof(offset_ix);
b3c12f
+    if (Fwrite(&offset_ix, len, 1, fdo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write length of table\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    len = sizeof(diglen);
b3c12f
+    if (Fwrite(&diglen, len, 1, fdo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write length of digest\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    len = sizeof(rpm_loff_t);
b3c12f
+    for (int x = 0; x < offset_ix; x++)
b3c12f
+    {
b3c12f
+        if (Fwrite(offsets[x].digest, diglen, 1, fdo) != diglen)
b3c12f
+        {
b3c12f
+            fprintf(stderr, _("Unable to write digest\n"));
b3c12f
+            rc = RPMRC_FAIL;
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+        if (Fwrite(&offsets[x].pos, len, 1, fdo) != len)
b3c12f
+        {
b3c12f
+            fprintf(stderr, _("Unable to write offset\n"));
b3c12f
+            rc = RPMRC_FAIL;
b3c12f
+            goto exit;
b3c12f
+        }
b3c12f
+    }
b3c12f
+    validation_pos = (
b3c12f
+        pos + sizeof(offset_ix) + sizeof(diglen) +
b3c12f
+        offset_ix * (diglen + sizeof(rpm_loff_t))
b3c12f
+    );
b3c12f
+
b3c12f
+    ssize_t validation_len = ufdCopy(validationi, fdo);
b3c12f
+    if (validation_len == -1)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("digest table ufdCopy failed\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    /* add more padding so the last file can be cloned. It doesn't matter that
b3c12f
+       the table and validation etc are in this space. In fact, it's pretty
b3c12f
+       efficient if it is
b3c12f
+    */
b3c12f
+
b3c12f
+    pad = pad_to(
b3c12f
+        (
b3c12f
+            validation_pos + validation_len + 2 * sizeof(rpm_loff_t) +
b3c12f
+            sizeof(uint64_t)
b3c12f
+        ),
b3c12f
+        fundamental_block_size
b3c12f
+    );
b3c12f
+    if (Fwrite(zeros, sizeof(char), pad, fdo) != pad)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write final padding\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    zeros = _free(zeros);
b3c12f
+    if (Fwrite(&pos, len, 1, fdo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write offset of digest table\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    if (Fwrite(&validation_pos, len, 1, fdo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write offset of validation table\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+    uint64_t magic = MAGIC;
b3c12f
+    len = sizeof(magic);
b3c12f
+    if (Fwrite(&magic, len, 1, fdo) != len)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Unable to write magic\n"));
b3c12f
+        rc = RPMRC_FAIL;
b3c12f
+        goto exit;
b3c12f
+    }
b3c12f
+
b3c12f
+exit:
b3c12f
+    rpmfilesFree(files);
b3c12f
+    rpmfiFree(fi);
b3c12f
+    headerFree(h);
b3c12f
+    return rc;
b3c12f
+}
b3c12f
+
b3c12f
+int main(int argc, char *argv[])
b3c12f
+{
b3c12f
+    rpmRC rc;
b3c12f
+    int cprc = 0;
b3c12f
+    uint8_t algos[argc - 1];
b3c12f
+    int mainpipefd[2];
b3c12f
+    int metapipefd[2];
b3c12f
+    pid_t cpid, w;
b3c12f
+    int wstatus;
b3c12f
+
b3c12f
+    xsetprogname(argv[0]);	/* Portability call -- see system.h */
b3c12f
+    rpmReadConfigFiles(NULL, NULL);
b3c12f
+
b3c12f
+    if (argc > 1 && (rstreq(argv[1], "-h") || rstreq(argv[1], "--help")))
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Usage: %s [DIGESTALGO]...\n"), argv[0]);
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    if (argc == 1)
b3c12f
+    {
b3c12f
+        fprintf(
b3c12f
+            stderr,
b3c12f
+            _("Need at least one DIGESTALGO parameter, e.g. 'SHA256'\n")
b3c12f
+        );
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+
b3c12f
+    for (int x = 0; x < (argc - 1); x++)
b3c12f
+    {
b3c12f
+        if (pgpStringVal(PGPVAL_HASHALGO, argv[x + 1], &algos[x]) != 0)
b3c12f
+        {
b3c12f
+            fprintf(
b3c12f
+                stderr,
b3c12f
+                _("Unable to resolve '%s' as a digest algorithm, exiting\n"),
b3c12f
+                argv[x + 1]
b3c12f
+            );
b3c12f
+            exit(EXIT_FAILURE);
b3c12f
+        }
b3c12f
+    }
b3c12f
+
b3c12f
+
b3c12f
+    if (pipe(mainpipefd) == -1)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Main pipe failure\n"));
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+    if (pipe(metapipefd) == -1)
b3c12f
+    {
b3c12f
+        fprintf(stderr, _("Meta pipe failure\n"));
b3c12f
+        exit(EXIT_FAILURE);
b3c12f
+    }
b3c12f
+    cpid = fork();
b3c12f
+    if (cpid == 0)
b3c12f
+    {
b3c12f
+        /* child: digestor */
b3c12f
+        close(mainpipefd[0]);
b3c12f
+        close(metapipefd[0]);
b3c12f
+        FD_t fdi = fdDup(STDIN_FILENO);
b3c12f
+        FD_t fdo = fdDup(mainpipefd[1]);
b3c12f
+        FD_t validationo = fdDup(metapipefd[1]);
b3c12f
+        rc = digestor(fdi, fdo, validationo, algos, argc - 1);
b3c12f
+        Fclose(validationo);
b3c12f
+        Fclose(fdo);
b3c12f
+        Fclose(fdi);
b3c12f
+    } else {
b3c12f
+        /* parent: main program */
b3c12f
+        close(mainpipefd[1]);
b3c12f
+        close(metapipefd[1]);
b3c12f
+        FD_t fdi = fdDup(mainpipefd[0]);
b3c12f
+        FD_t validationi = fdDup(metapipefd[0]);
b3c12f
+        rc = process_package(fdi, validationi);
b3c12f
+        Fclose(validationi);
b3c12f
+        /* fdi is normally closed through the stacked file gzdi in the function. */
b3c12f
+        /* wait for child process (digestor for stdin) to complete. */
b3c12f
+        if (rc != RPMRC_OK)
b3c12f
+        {
b3c12f
+            if (kill(cpid, SIGTERM) != 0)
b3c12f
+            {
b3c12f
+                fprintf(
b3c12f
+                    stderr,
b3c12f
+                    _("Failed to kill digest process when main process failed: %s\n"),
b3c12f
+                    strerror(errno)
b3c12f
+                );
b3c12f
+            }
b3c12f
+        }
b3c12f
+        w = waitpid(cpid, &wstatus, 0);
b3c12f
+        if (w == -1)
b3c12f
+        {
b3c12f
+            fprintf(stderr, _("waitpid failed\n"));
b3c12f
+            cprc = EXIT_FAILURE;
b3c12f
+        } else if (WIFEXITED(wstatus))
b3c12f
+        {
b3c12f
+            cprc = WEXITSTATUS(wstatus);
b3c12f
+            if (cprc != 0)
b3c12f
+            {
b3c12f
+                fprintf(
b3c12f
+                    stderr,
b3c12f
+                    _("Digest process non-zero exit code %d\n"),
b3c12f
+                    cprc
b3c12f
+                );
b3c12f
+            }
b3c12f
+        } else if (WIFSIGNALED(wstatus))
b3c12f
+        {
b3c12f
+            fprintf(
b3c12f
+                stderr,
b3c12f
+                _("Digest process was terminated with a signal: %d\n"),
b3c12f
+                WTERMSIG(wstatus)
b3c12f
+            );
b3c12f
+            cprc = EXIT_FAILURE;
b3c12f
+        } else
b3c12f
+        {
b3c12f
+            /* don't think this can happen, but covering all bases */
b3c12f
+            fprintf(stderr, _("Unhandled circumstance in waitpid\n"));
b3c12f
+            cprc = EXIT_FAILURE;
b3c12f
+        }
b3c12f
+        if (cprc != EXIT_SUCCESS)
b3c12f
+        {
b3c12f
+            rc = RPMRC_FAIL;
b3c12f
+        }
b3c12f
+    }
b3c12f
+    if (rc != RPMRC_OK)
b3c12f
+    {
b3c12f
+        /* translate rpmRC into generic failure return code. */
b3c12f
+        return EXIT_FAILURE;
b3c12f
+    }
b3c12f
+    return EXIT_SUCCESS;
b3c12f
+}
b3c12f
diff --git a/rpmio/rpmpgp.c b/rpmio/rpmpgp.c
b3c12f
index d1d2e7de3..2b58de167 100644
b3c12f
--- a/rpmio/rpmpgp.c
b3c12f
+++ b/rpmio/rpmpgp.c
b3c12f
@@ -298,6 +298,16 @@ int pgpValTok(pgpValTbl vs, const char * s, const char * se)
b3c12f
     return vs->val;
b3c12f
 }
b3c12f
 
b3c12f
+int pgpStringVal(pgpValType type, const char *str, uint8_t *val)
b3c12f
+{
b3c12f
+    pgpValTbl tbl = pgpValTable(type);
b3c12f
+    if (tbl == NULL) return -1;
b3c12f
+    int v = pgpValTok(tbl, str, str + strlen(str));
b3c12f
+    if (v == -1) return -1;
b3c12f
+    *val = (uint8_t)v;
b3c12f
+    return 0;
b3c12f
+}
b3c12f
+
b3c12f
 /** \ingroup rpmpgp
b3c12f
  * Decode length from 1, 2, or 5 octet body length encoding, used in
b3c12f
  * new format packet headers and V4 signature subpackets.
b3c12f
diff --git a/rpmio/rpmpgp.h b/rpmio/rpmpgp.h
b3c12f
index 1614750d6..4626a2efc 100644
b3c12f
--- a/rpmio/rpmpgp.h
b3c12f
+++ b/rpmio/rpmpgp.h
b3c12f
@@ -973,6 +973,15 @@ typedef rpmFlags rpmDigestFlags;
b3c12f
  */
b3c12f
 const char * pgpValString(pgpValType type, uint8_t val);
b3c12f
 
b3c12f
+/** \ingroup rpmpgp
b3c12f
+ * Return  OpenPGP value for a string.
b3c12f
+ * @param type		type of value
b3c12f
+ * @param str		string to lookup
b3c12f
+ * @param[out] val  byte value associated with string
b3c12f
+ * @return		0 on success else -1
b3c12f
+ */
b3c12f
+int pgpStringVal(pgpValType type, const char *str, uint8_t *val);
b3c12f
+
b3c12f
 /** \ingroup rpmpgp
b3c12f
  * Return (native-endian) integer from big-endian representation.
b3c12f
  * @param s		pointer to big-endian integer
b3c12f
b3c12f
From 9de362d128634768543e1999763fb1371313c40d Mon Sep 17 00:00:00 2001
b3c12f
From: Matthew Almond <malmond@fb.com>
b3c12f
Date: Sun, 31 Jan 2021 12:30:33 -0800
b3c12f
Subject: [PATCH 2/4] Remove use of bool type for consistency
b3c12f
b3c12f
---
b3c12f
 lib/fsm.c | 9 ++++-----
b3c12f
 1 file changed, 4 insertions(+), 5 deletions(-)
b3c12f
b3c12f
diff --git a/lib/fsm.c b/lib/fsm.c
b3c12f
index 03a716474..7966ac2f1 100644
b3c12f
--- a/lib/fsm.c
b3c12f
+++ b/lib/fsm.c
b3c12f
@@ -7,7 +7,6 @@
b3c12f
 
b3c12f
 #include <utime.h>
b3c12f
 #include <errno.h>
b3c12f
-#include <stdbool.h>
b3c12f
 #if WITH_CAP
b3c12f
 #include <sys/capability.h>
b3c12f
 #endif
b3c12f
@@ -854,10 +853,10 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
     char *fpath = NULL;
b3c12f
     Header h = rpmteHeader(te);
b3c12f
     const char *payloadfmt = headerGetString(h, RPMTAG_PAYLOADFORMAT);
b3c12f
-    bool cpio = true;
b3c12f
+    int cpio = 1;
b3c12f
 
b3c12f
     if (payloadfmt && rstreq(payloadfmt, "clon")) {
b3c12f
-	cpio = false;
b3c12f
+	cpio = 0;
b3c12f
     }
b3c12f
     if (cpio) {
b3c12f
 	fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
b3c12f
@@ -907,13 +906,13 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
b3c12f
 	rc = rpmpluginsCallFsmFilePre(plugins, fi, fpath,
b3c12f
 				      sb.st_mode, action);
b3c12f
 	skip = skip || rpmfiFFlags(fi) & RPMFILE_GHOST;
b3c12f
-	bool plugin_contents = false;
b3c12f
+	int plugin_contents = 0;
b3c12f
 	switch (rc) {
b3c12f
 	case RPMRC_OK:
b3c12f
 	    setFileState(fs, rpmfiFX(fi));
b3c12f
 	    break;
b3c12f
 	case RPMRC_PLUGIN_CONTENTS:
b3c12f
-	    plugin_contents = true;
b3c12f
+	    plugin_contents = 1;
b3c12f
 	    // reduce reads on cpio to this value. Could be zero if
b3c12f
 	    // this is from a hard link.
b3c12f
 	    rc = RPMRC_OK;
b3c12f
b3c12f
From 91f7284e961cdbecdfec5beedbce03ee2f0fbd85 Mon Sep 17 00:00:00 2001
b3c12f
From: Matthew Almond <malmond@fb.com>
b3c12f
Date: Sun, 31 Jan 2021 13:51:16 -0800
b3c12f
Subject: [PATCH 3/4] Match formatting/style of existing code
b3c12f
b3c12f
The existing code contains some variability in formatting. I'm not sure
b3c12f
if { is meant to be on the end of the line, or on a new line, but I've
b3c12f
standardized on the former.
b3c12f
b3c12f
The indentation is intended to match the existing convention: 4 column
b3c12f
indent, but 8 column wide tab characters. This is easy to follow/use in
b3c12f
vim, but is surprisingly difficult to get right in vscode. I am doing
b3c12f
this reformat here and now, and future changes will be after this.
b3c12f
b3c12f
I'm keen to fold the patches together, but for now, I'm trying to keep
b3c12f
the history of #1470 linear so everyone can follow along.
b3c12f
---
b3c12f
 lib/rpmplugins.c  |   6 +-
b3c12f
 plugins/reflink.c | 407 ++++++++++++++++++---------------
b3c12f
 rpm2extents.c     | 562 ++++++++++++++++++++--------------------------
b3c12f
 3 files changed, 462 insertions(+), 513 deletions(-)
b3c12f
b3c12f
diff --git a/lib/rpmplugins.c b/lib/rpmplugins.c
b3c12f
index c5084d398..3da3097af 100644
b3c12f
--- a/lib/rpmplugins.c
b3c12f
+++ b/lib/rpmplugins.c
b3c12f
@@ -368,9 +368,9 @@ rpmRC rpmpluginsCallFsmFilePre(rpmPlugins plugins, rpmfi fi, const char *path,
b3c12f
 		rc = RPMRC_FAIL;
b3c12f
 	    } else if (hook_rc == RPMRC_PLUGIN_CONTENTS && rc != RPMRC_FAIL) {
b3c12f
 		if (rc == RPMRC_PLUGIN_CONTENTS) {
b3c12f
-		    /*
b3c12f
-		    Another plugin already said it'd handle contents. It's undefined how
b3c12f
-		    these would combine, so treat this as a failure condition.
b3c12f
+		    /* Another plugin already said it'd handle contents. It's
b3c12f
+		     * undefined how these would combine, so treat this as a
b3c12f
+		     * failure condition.
b3c12f
 		    */
b3c12f
 		    rc = RPMRC_FAIL;
b3c12f
 		} else {
b3c12f
diff --git a/plugins/reflink.c b/plugins/reflink.c
b3c12f
index d7f19acd9..9eaa87094 100644
b3c12f
--- a/plugins/reflink.c
b3c12f
+++ b/plugins/reflink.c
b3c12f
@@ -32,31 +32,32 @@
b3c12f
 #include "lib/rpmhash.H"
b3c12f
 #include "lib/rpmhash.C"
b3c12f
 
b3c12f
-/*
b3c12f
-We use this in find to indicate a key wasn't found. This is an unrecoverable
b3c12f
-error, but we can at least show a decent error. 0 is never a valid offset
b3c12f
-because it's the offset of the start of the file.
b3c12f
-*/
b3c12f
+/* We use this in find to indicate a key wasn't found. This is an
b3c12f
+ * unrecoverable error, but we can at least show a decent error. 0 is never a
b3c12f
+ * valid offset because it's the offset of the start of the file.
b3c12f
+ */
b3c12f
 #define NOT_FOUND 0
b3c12f
 
b3c12f
 #define BUFFER_SIZE (1024 * 128)
b3c12f
 
b3c12f
-/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
b3c12f
+/* magic value at end of file (64 bits) that indicates this is a transcoded
b3c12f
+ * rpm.
b3c12f
+ */
b3c12f
 #define MAGIC 3472329499408095051
b3c12f
 
b3c12f
 struct reflink_state_s {
b3c12f
-  /* Stuff that's used across rpms */
b3c12f
-  long fundamental_block_size;
b3c12f
-  char *buffer;
b3c12f
+    /* Stuff that's used across rpms */
b3c12f
+    long fundamental_block_size;
b3c12f
+    char *buffer;
b3c12f
 
b3c12f
-  /* stuff that's used/updated per psm */
b3c12f
-  uint32_t keys, keysize;
b3c12f
+    /* stuff that's used/updated per psm */
b3c12f
+    uint32_t keys, keysize;
b3c12f
 
b3c12f
-  // table for current rpm, keys * (keysize + sizeof(rpm_loff_t))
b3c12f
-  unsigned char *table;
b3c12f
-  FD_t fd;
b3c12f
-  rpmfiles files;
b3c12f
-  inodeIndexHash inodeIndexes;
b3c12f
+    /* table for current rpm, keys * (keysize + sizeof(rpm_loff_t)) */
b3c12f
+    unsigned char *table;
b3c12f
+    FD_t fd;
b3c12f
+    rpmfiles files;
b3c12f
+    inodeIndexHash inodeIndexes;
b3c12f
 };
b3c12f
 
b3c12f
 typedef struct reflink_state_s * reflink_state;
b3c12f
@@ -73,60 +74,62 @@ static unsigned int inodeId(rpm_ino_t a)
b3c12f
 }
b3c12f
 
b3c12f
 static rpmRC reflink_init(rpmPlugin plugin, rpmts ts) {
b3c12f
-  reflink_state state = rcalloc(1, sizeof(struct reflink_state_s));
b3c12f
+    reflink_state state = rcalloc(1, sizeof(struct reflink_state_s));
b3c12f
 
b3c12f
-  /*
b3c12f
-  IOCTL-FICLONERANGE(2): ...Disk filesystems generally require the offset and
b3c12f
-  length arguments to be aligned to the fundamental block size.
b3c12f
+    /* IOCTL-FICLONERANGE(2): ...Disk filesystems generally require the offset
b3c12f
+     * and length arguments to be aligned to the fundamental block size.
b3c12f
+     *
b3c12f
+     * The value of "fundamental block size" is directly related to the
b3c12f
+     * system's page size, so we should use that.
b3c12f
+     */
b3c12f
+    state->fundamental_block_size = sysconf(_SC_PAGESIZE);
b3c12f
+    state->buffer = rcalloc(1, BUFFER_SIZE);
b3c12f
+    rpmPluginSetData(plugin, state);
b3c12f
 
b3c12f
-  The value of "fundamental block size" is directly related to the system's
b3c12f
-  page size, so we should use that.
b3c12f
-  */
b3c12f
-  state->fundamental_block_size = sysconf(_SC_PAGESIZE);
b3c12f
-  state->buffer = rcalloc(1, BUFFER_SIZE);
b3c12f
-  rpmPluginSetData(plugin, state);
b3c12f
-
b3c12f
-  return RPMRC_OK;
b3c12f
+    return RPMRC_OK;
b3c12f
 }
b3c12f
 
b3c12f
 static void reflink_cleanup(rpmPlugin plugin) {
b3c12f
-  reflink_state state = rpmPluginGetData(plugin);
b3c12f
-  free(state->buffer);
b3c12f
-  free(state);
b3c12f
+    reflink_state state = rpmPluginGetData(plugin);
b3c12f
+    free(state->buffer);
b3c12f
+    free(state);
b3c12f
 }
b3c12f
 
b3c12f
 static rpmRC reflink_psm_pre(rpmPlugin plugin, rpmte te) {
b3c12f
     reflink_state state = rpmPluginGetData(plugin);
b3c12f
     state->fd = rpmteFd(te);
b3c12f
     if (state->fd == 0) {
b3c12f
-      rpmlog(RPMLOG_DEBUG, _("reflink: fd = 0, no install\n"));
b3c12f
-      return RPMRC_OK;
b3c12f
+	rpmlog(RPMLOG_DEBUG, _("reflink: fd = 0, no install\n"));
b3c12f
+	return RPMRC_OK;
b3c12f
     }
b3c12f
     rpm_loff_t current = Ftell(state->fd);
b3c12f
     uint64_t magic;
b3c12f
     if (Fseek(state->fd, -(sizeof(magic)), SEEK_END) < 0) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: failed to seek for magic\n"));
b3c12f
-      if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
-        /* yes this gets a bit repetitive */
b3c12f
-        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
-      }
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: failed to seek for magic\n"));
b3c12f
+	if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+	    /* yes this gets a bit repetitive */
b3c12f
+	    rpmlog(RPMLOG_ERR,
b3c12f
+		 _("reflink: unable to seek back to original location\n"));
b3c12f
+	}
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     size_t len = sizeof(magic);
b3c12f
     if (Fread(&magic, len, 1, state->fd) != len) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: unable to read magic\n"));
b3c12f
-      if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
-        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
-      }
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read magic\n"));
b3c12f
+	if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+	    rpmlog(RPMLOG_ERR,
b3c12f
+		   _("reflink: unable to seek back to original location\n"));
b3c12f
+	}
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     if (magic != MAGIC) {
b3c12f
-      rpmlog(RPMLOG_DEBUG, _("reflink: not transcoded\n"));
b3c12f
-      if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
-        rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
-        return RPMRC_FAIL;
b3c12f
-      }
b3c12f
-      return RPMRC_OK;
b3c12f
+	rpmlog(RPMLOG_DEBUG, _("reflink: not transcoded\n"));
b3c12f
+	if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
+	    rpmlog(RPMLOG_ERR,
b3c12f
+		   _("reflink: unable to seek back to original location\n"));
b3c12f
+	    return RPMRC_FAIL;
b3c12f
+	}
b3c12f
+	return RPMRC_OK;
b3c12f
     }
b3c12f
     rpmlog(RPMLOG_DEBUG, _("reflink: *is* transcoded\n"));
b3c12f
     Header h = rpmteHeader(te);
b3c12f
@@ -136,53 +139,60 @@ static rpmRC reflink_psm_pre(rpmPlugin plugin, rpmte te) {
b3c12f
     headerPutString(h, RPMTAG_PAYLOADFORMAT, "clon");
b3c12f
     headerFree(h);
b3c12f
     state->files = rpmteFiles(te);
b3c12f
-    /* tail of file contains offset_table, offset_checksums
b3c12f
-       then magic
b3c12f
-    */
b3c12f
+    /* tail of file contains offset_table, offset_checksums then magic */
b3c12f
     if (Fseek(state->fd, -(sizeof(rpm_loff_t) * 2 + sizeof(magic)), SEEK_END) < 0) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: failed to seek for tail %p\n"), state->fd);
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: failed to seek for tail %p\n"),
b3c12f
+	       state->fd);
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     rpm_loff_t table_start;
b3c12f
     len = sizeof(table_start);
b3c12f
     if (Fread(&table_start, len, 1, state->fd) != len) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: unable to read table_start\n"));
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read table_start\n"));
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     if (Fseek(state->fd, table_start, SEEK_SET) < 0) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: unable to seek to table_start\n"));
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: unable to seek to table_start\n"));
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     len = sizeof(state->keys);
b3c12f
     if (Fread(&state->keys, len, 1, state->fd) != len) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: unable to read number of keys\n"));
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read number of keys\n"));
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     len = sizeof(state->keysize);
b3c12f
     if (Fread(&state->keysize, len, 1, state->fd) != len) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: unable to read keysize\n"));
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read keysize\n"));
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
-    rpmlog(RPMLOG_DEBUG, _("reflink: table_start=0x%lx, keys=%d, keysize=%d\n"), table_start, state->keys, state->keysize);
b3c12f
-    // now get digest table if there is a reason to have one.
b3c12f
+    rpmlog(
b3c12f
+	RPMLOG_DEBUG,
b3c12f
+	_("reflink: table_start=0x%lx, keys=%d, keysize=%d\n"),
b3c12f
+	table_start, state->keys, state->keysize
b3c12f
+    );
b3c12f
+    /* now get digest table if there is a reason to have one. */
b3c12f
     if (state->keys == 0 || state->keysize == 0) {
b3c12f
-      // no files (or no digests(!))
b3c12f
-      state->table = NULL;
b3c12f
+	/* no files (or no digests(!)) */
b3c12f
+	state->table = NULL;
b3c12f
     } else {
b3c12f
-      int table_size = state->keys * (state->keysize + sizeof(rpm_loff_t));
b3c12f
-      state->table = rcalloc(1, table_size);
b3c12f
-      if (Fread(state->table, table_size, 1, state->fd) != table_size) {
b3c12f
-        rpmlog(RPMLOG_ERR, _("reflink: unable to read table\n"));
b3c12f
-        return RPMRC_FAIL;
b3c12f
-      }
b3c12f
-      state->inodeIndexes = inodeIndexHashCreate(state->keys, inodeId, inodeCmp, NULL, NULL);
b3c12f
+	int table_size = state->keys * (state->keysize + sizeof(rpm_loff_t));
b3c12f
+	state->table = rcalloc(1, table_size);
b3c12f
+	if (Fread(state->table, table_size, 1, state->fd) != table_size) {
b3c12f
+	    rpmlog(RPMLOG_ERR, _("reflink: unable to read table\n"));
b3c12f
+	    return RPMRC_FAIL;
b3c12f
+	}
b3c12f
+	state->inodeIndexes = inodeIndexHashCreate(
b3c12f
+	    state->keys, inodeId, inodeCmp, NULL, NULL
b3c12f
+	);
b3c12f
     }
b3c12f
 
b3c12f
-    // seek back to original location
b3c12f
-    // might not be needed if we seek to offset immediately
b3c12f
+    /* Seek back to original location.
b3c12f
+     * Might not be needed if we seek to offset immediately
b3c12f
+     */
b3c12f
     if (Fseek(state->fd, current, SEEK_SET) < 0) {
b3c12f
-      rpmlog(RPMLOG_ERR, _("reflink: unable to seek back to original location\n"));
b3c12f
-      return RPMRC_FAIL;
b3c12f
+	rpmlog(RPMLOG_ERR,
b3c12f
+	       _("reflink: unable to seek back to original location\n"));
b3c12f
+	return RPMRC_FAIL;
b3c12f
     }
b3c12f
     return RPMRC_OK;
b3c12f
 }
b3c12f
@@ -192,40 +202,45 @@ static rpmRC reflink_psm_post(rpmPlugin plugin, rpmte te, int res)
b3c12f
     reflink_state state = rpmPluginGetData(plugin);
b3c12f
     state->files = rpmfilesFree(state->files);
b3c12f
     if (state->table) {
b3c12f
-      free(state->table);
b3c12f
-      state->table = NULL;
b3c12f
+	free(state->table);
b3c12f
+	state->table = NULL;
b3c12f
     }
b3c12f
     if (state->inodeIndexes) {
b3c12f
-      inodeIndexHashFree(state->inodeIndexes);
b3c12f
-      state->inodeIndexes = NULL;
b3c12f
+	inodeIndexHashFree(state->inodeIndexes);
b3c12f
+	state->inodeIndexes = NULL;
b3c12f
     }
b3c12f
     return RPMRC_OK;
b3c12f
 }
b3c12f
 
b3c12f
 
b3c12f
-// have a prototype, warnings system
b3c12f
+/* have a prototype, warnings system */
b3c12f
 rpm_loff_t find(const unsigned char *digest, reflink_state state);
b3c12f
 
b3c12f
 rpm_loff_t find(const unsigned char *digest, reflink_state state) {
b3c12f
 # if defined(__GNUC__)
b3c12f
-  /* GCC nested function because bsearch's comparison function can't access
b3c12f
-     state-keysize otherwise
b3c12f
-  */
b3c12f
-  int cmpdigest(const void *k1, const void *k2) {
b3c12f
-    rpmlog(RPMLOG_DEBUG, _("reflink: cmpdigest k1=%p k2=%p\n"), k1, k2);
b3c12f
-    return memcmp(k1, k2, state->keysize);
b3c12f
-  }
b3c12f
+    /* GCC nested function because bsearch's comparison function can't access
b3c12f
+     * state-keysize otherwise
b3c12f
+     */
b3c12f
+    int cmpdigest(const void *k1, const void *k2) {
b3c12f
+	rpmlog(RPMLOG_DEBUG, _("reflink: cmpdigest k1=%p k2=%p\n"), k1, k2);
b3c12f
+	return memcmp(k1, k2, state->keysize);
b3c12f
+    }
b3c12f
 # endif
b3c12f
-  rpmlog(RPMLOG_DEBUG, _("reflink: bsearch(key=%p, base=%p, nmemb=%d, size=%lu)\n"), digest, state->table, state->keys, state->keysize + sizeof(rpm_loff_t));
b3c12f
-  char *entry = bsearch(digest, state->table, state->keys, state->keysize + sizeof(rpm_loff_t), cmpdigest);
b3c12f
-  if (entry == NULL) {
b3c12f
-    return NOT_FOUND;
b3c12f
-  }
b3c12f
-  rpm_loff_t offset = *(rpm_loff_t *)(entry + state->keysize);
b3c12f
-  return offset;
b3c12f
+    rpmlog(RPMLOG_DEBUG,
b3c12f
+	   _("reflink: bsearch(key=%p, base=%p, nmemb=%d, size=%lu)\n"),
b3c12f
+	   digest, state->table, state->keys,
b3c12f
+	   state->keysize + sizeof(rpm_loff_t));
b3c12f
+    char *entry = bsearch(digest, state->table, state->keys,
b3c12f
+			  state->keysize + sizeof(rpm_loff_t), cmpdigest);
b3c12f
+    if (entry == NULL) {
b3c12f
+	return NOT_FOUND;
b3c12f
+    }
b3c12f
+    rpm_loff_t offset = *(rpm_loff_t *)(entry + state->keysize);
b3c12f
+    return offset;
b3c12f
 }
b3c12f
 
b3c12f
-static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path, mode_t file_mode, rpmFsmOp op)
b3c12f
+static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path,
b3c12f
+                                  mode_t file_mode, rpmFsmOp op)
b3c12f
 {
b3c12f
     struct file_clone_range fcr;
b3c12f
     rpm_loff_t size;
b3c12f
@@ -234,99 +249,119 @@ static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path,
b3c12f
 
b3c12f
     reflink_state state = rpmPluginGetData(plugin);
b3c12f
     if (state->table == NULL) {
b3c12f
-        // no table means rpm is not in reflink format, so leave. Now.
b3c12f
-        return RPMRC_OK;
b3c12f
+	/* no table means rpm is not in reflink format, so leave. Now. */
b3c12f
+	return RPMRC_OK;
b3c12f
     }
b3c12f
     if (op == FA_TOUCH) {
b3c12f
-        // we're not overwriting an existing file
b3c12f
-        return RPMRC_OK;
b3c12f
+	/* we're not overwriting an existing file. */
b3c12f
+	return RPMRC_OK;
b3c12f
     }
b3c12f
     fcr.dest_offset = 0;
b3c12f
     if (S_ISREG(file_mode) && !(rpmfiFFlags(fi) & RPMFILE_GHOST)) {
b3c12f
-      rpm_ino_t inode = rpmfiFInode(fi);
b3c12f
-      /* check for hard link entry in table. GetEntry overwrites hlix with the address of the first match */
b3c12f
-      if (inodeIndexHashGetEntry(state->inodeIndexes, inode, &hlix, NULL, NULL)) {
b3c12f
-        // entry is in table, use hard link
b3c12f
-        char *fn = rpmfilesFN(state->files, hlix[0]);
b3c12f
-        if (link(fn, path) != 0) {
b3c12f
-          rpmlog(RPMLOG_ERR, _("reflink: Unable to hard link %s -> %s due to %s\n"), fn, path, strerror(errno));
b3c12f
-          free(fn);
b3c12f
-          return RPMRC_FAIL;
b3c12f
-        }
b3c12f
-        free(fn);
b3c12f
-        return RPMRC_PLUGIN_CONTENTS;
b3c12f
-      }
b3c12f
-      /* if we didn't hard link, then we'll track this inode as being created soon */
b3c12f
-      if (rpmfiFNlink(fi) > 1) {
b3c12f
-        /* minor optimization: only store files with more than one link */
b3c12f
-        inodeIndexHashAddEntry(state->inodeIndexes, inode, rpmfiFX(fi));
b3c12f
-      }
b3c12f
-      /* derived from wfd_open in fsm.c */
b3c12f
-      mode_t old_umask = umask(0577);
b3c12f
-      dst = open(path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
b3c12f
-      umask(old_umask);
b3c12f
-      if (dst == -1) {
b3c12f
-          rpmlog(RPMLOG_ERR, _("reflink: Unable to open %s for writing due to %s, flags = %x\n"), path, strerror(errno), rpmfiFFlags(fi));
b3c12f
-          return RPMRC_FAIL;
b3c12f
-      }
b3c12f
-      size = rpmfiFSize(fi);
b3c12f
-      if (size > 0) {
b3c12f
-          /* round src_length down to fundamental_block_size multiple */
b3c12f
-          fcr.src_length = size / state->fundamental_block_size * state->fundamental_block_size;
b3c12f
-          if ((size % state->fundamental_block_size) > 0) {
b3c12f
-              /* round up to next fundamental_block_size. We expect the data in the rpm to be similarly padded */
b3c12f
-              fcr.src_length += state->fundamental_block_size;
b3c12f
-          }
b3c12f
-          fcr.src_fd = Fileno(state->fd);
b3c12f
-          if (fcr.src_fd == -1) {
b3c12f
-            close(dst);
b3c12f
-            rpmlog(RPMLOG_ERR, _("reflink: src fd lookup failed\n"));
b3c12f
-            return RPMRC_FAIL;
b3c12f
-          }
b3c12f
-          fcr.src_offset = find(rpmfiFDigest(fi, NULL, NULL), state);
b3c12f
-          if (fcr.src_offset == NOT_FOUND) {
b3c12f
-            close(dst);
b3c12f
-            rpmlog(RPMLOG_ERR, _("reflink: digest not found\n"));
b3c12f
-            return RPMRC_FAIL;
b3c12f
-          }
b3c12f
-          rpmlog(RPMLOG_DEBUG, _("reflink: Reflinking %lu bytes at %lu to %s orig size=%lu, file=%ld\n"), fcr.src_length, fcr.src_offset, path, size, fcr.src_fd);
b3c12f
-          rc = ioctl(dst, FICLONERANGE, &fcr;;
b3c12f
-          if (rc) {
b3c12f
-            rpmlog(RPMLOG_WARNING, _("reflink: falling back to copying bits for %s due to %d, %d = %s\n"), path, rc, errno, strerror(errno));
b3c12f
-            if (Fseek(state->fd, fcr.src_offset, SEEK_SET) < 0) {
b3c12f
-                close(dst);
b3c12f
-                rpmlog(RPMLOG_ERR, _("reflink: unable to seek on copying bits\n"));
b3c12f
-                return RPMRC_FAIL;
b3c12f
-            }
b3c12f
-            rpm_loff_t left = size;
b3c12f
-            size_t len, read, written;
b3c12f
-            while (left) {
b3c12f
-              len = (left > BUFFER_SIZE ? BUFFER_SIZE : left);
b3c12f
-              read = Fread(state->buffer, len, 1, state->fd);
b3c12f
-              if (read != len) {
b3c12f
-                close(dst);
b3c12f
-                rpmlog(RPMLOG_ERR, _("reflink: short read on copying bits\n"));
b3c12f
-                return RPMRC_FAIL;
b3c12f
-              }
b3c12f
-              written = write(dst, state->buffer, len);
b3c12f
-              if (read != written) {
b3c12f
-                close(dst);
b3c12f
-                rpmlog(RPMLOG_ERR, _("reflink: short write on copying bits\n"));
b3c12f
-                return RPMRC_FAIL;
b3c12f
-              }
b3c12f
-              left -= len;
b3c12f
-            }
b3c12f
-          } else {
b3c12f
-            /* reflink worked, so truncate */
b3c12f
-            rc = ftruncate(dst, size);
b3c12f
-            if (rc) {
b3c12f
-                rpmlog(RPMLOG_ERR, _("reflink: Unable to truncate %s to %ld due to %s\n"), path, size, strerror(errno));
b3c12f
-                return RPMRC_FAIL;
b3c12f
-            }
b3c12f
-          }
b3c12f
-      }
b3c12f
-      close(dst);
b3c12f
-      return RPMRC_PLUGIN_CONTENTS;
b3c12f
+	rpm_ino_t inode = rpmfiFInode(fi);
b3c12f
+	/* check for hard link entry in table. GetEntry overwrites hlix with
b3c12f
+	 * the address of the first match.
b3c12f
+	 */
b3c12f
+	if (inodeIndexHashGetEntry(state->inodeIndexes, inode, &hlix, NULL,
b3c12f
+	                           NULL)) {
b3c12f
+	    /* entry is in table, use hard link */
b3c12f
+	    char *fn = rpmfilesFN(state->files, hlix[0]);
b3c12f
+	    if (link(fn, path) != 0) {
b3c12f
+		rpmlog(RPMLOG_ERR,
b3c12f
+		       _("reflink: Unable to hard link %s -> %s due to %s\n"),
b3c12f
+		       fn, path, strerror(errno));
b3c12f
+		free(fn);
b3c12f
+		return RPMRC_FAIL;
b3c12f
+	    }
b3c12f
+	    free(fn);
b3c12f
+	    return RPMRC_PLUGIN_CONTENTS;
b3c12f
+	}
b3c12f
+	/* if we didn't hard link, then we'll track this inode as being
b3c12f
+	 * created soon
b3c12f
+	 */
b3c12f
+	if (rpmfiFNlink(fi) > 1) {
b3c12f
+	    /* minor optimization: only store files with more than one link */
b3c12f
+	    inodeIndexHashAddEntry(state->inodeIndexes, inode, rpmfiFX(fi));
b3c12f
+	}
b3c12f
+	/* derived from wfd_open in fsm.c */
b3c12f
+	mode_t old_umask = umask(0577);
b3c12f
+	dst = open(path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
b3c12f
+	umask(old_umask);
b3c12f
+	if (dst == -1) {
b3c12f
+	    rpmlog(RPMLOG_ERR,
b3c12f
+		   _("reflink: Unable to open %s for writing due to %s, flags = %x\n"),
b3c12f
+		   path, strerror(errno), rpmfiFFlags(fi));
b3c12f
+	    return RPMRC_FAIL;
b3c12f
+	}
b3c12f
+	size = rpmfiFSize(fi);
b3c12f
+	if (size > 0) {
b3c12f
+	    /* round src_length down to fundamental_block_size multiple */
b3c12f
+	    fcr.src_length = size / state->fundamental_block_size * state->fundamental_block_size;
b3c12f
+	    if ((size % state->fundamental_block_size) > 0) {
b3c12f
+		/* round up to next fundamental_block_size. We expect the data
b3c12f
+		 * in the rpm to be similarly padded.
b3c12f
+		 */
b3c12f
+		fcr.src_length += state->fundamental_block_size;
b3c12f
+	    }
b3c12f
+	    fcr.src_fd = Fileno(state->fd);
b3c12f
+	    if (fcr.src_fd == -1) {
b3c12f
+		close(dst);
b3c12f
+		rpmlog(RPMLOG_ERR, _("reflink: src fd lookup failed\n"));
b3c12f
+		return RPMRC_FAIL;
b3c12f
+	    }
b3c12f
+	    fcr.src_offset = find(rpmfiFDigest(fi, NULL, NULL), state);
b3c12f
+	    if (fcr.src_offset == NOT_FOUND) {
b3c12f
+		close(dst);
b3c12f
+		rpmlog(RPMLOG_ERR, _("reflink: digest not found\n"));
b3c12f
+		return RPMRC_FAIL;
b3c12f
+	    }
b3c12f
+	    rpmlog(RPMLOG_DEBUG,
b3c12f
+	           _("reflink: Reflinking %lu bytes at %lu to %s orig size=%lu, file=%ld\n"),
b3c12f
+		   fcr.src_length, fcr.src_offset, path, size, fcr.src_fd);
b3c12f
+	    rc = ioctl(dst, FICLONERANGE, &fcr;;
b3c12f
+	    if (rc) {
b3c12f
+		rpmlog(RPMLOG_WARNING,
b3c12f
+		       _("reflink: falling back to copying bits for %s due to %d, %d = %s\n"),
b3c12f
+		       path, rc, errno, strerror(errno));
b3c12f
+		if (Fseek(state->fd, fcr.src_offset, SEEK_SET) < 0) {
b3c12f
+		    close(dst);
b3c12f
+		    rpmlog(RPMLOG_ERR,
b3c12f
+			   _("reflink: unable to seek on copying bits\n"));
b3c12f
+		    return RPMRC_FAIL;
b3c12f
+		}
b3c12f
+		rpm_loff_t left = size;
b3c12f
+		size_t len, read, written;
b3c12f
+		while (left) {
b3c12f
+		    len = (left > BUFFER_SIZE ? BUFFER_SIZE : left);
b3c12f
+		    read = Fread(state->buffer, len, 1, state->fd);
b3c12f
+		    if (read != len) {
b3c12f
+			close(dst);
b3c12f
+			rpmlog(RPMLOG_ERR,
b3c12f
+			       _("reflink: short read on copying bits\n"));
b3c12f
+			return RPMRC_FAIL;
b3c12f
+		    }
b3c12f
+		    written = write(dst, state->buffer, len);
b3c12f
+		    if (read != written) {
b3c12f
+			close(dst);
b3c12f
+			rpmlog(RPMLOG_ERR,
b3c12f
+			       _("reflink: short write on copying bits\n"));
b3c12f
+			return RPMRC_FAIL;
b3c12f
+		    }
b3c12f
+		    left -= len;
b3c12f
+		}
b3c12f
+	    } else {
b3c12f
+		/* reflink worked, so truncate */
b3c12f
+		rc = ftruncate(dst, size);
b3c12f
+		if (rc) {
b3c12f
+		    rpmlog(RPMLOG_ERR,
b3c12f
+			   _("reflink: Unable to truncate %s to %ld due to %s\n"),
b3c12f
+			   path, size, strerror(errno));
b3c12f
+		     return RPMRC_FAIL;
b3c12f
+		}
b3c12f
+	    }
b3c12f
+	}
b3c12f
+	close(dst);
b3c12f
+	return RPMRC_PLUGIN_CONTENTS;
b3c12f
     }
b3c12f
     return RPMRC_OK;
b3c12f
 }
b3c12f
diff --git a/rpm2extents.c b/rpm2extents.c
b3c12f
index 5662b86a6..c111be0a2 100644
b3c12f
--- a/rpm2extents.c
b3c12f
+++ b/rpm2extents.c
b3c12f
@@ -24,7 +24,7 @@
b3c12f
 #include "debug.h"
b3c12f
 
b3c12f
 /* hash of void * (pointers) to file digests to offsets within output.
b3c12f
-   The length of the key depends on what the FILEDIGESTALGO is.
b3c12f
+ * The length of the key depends on what the FILEDIGESTALGO is.
b3c12f
  */
b3c12f
 #undef HASHTYPE
b3c12f
 #undef HTKEYTYPE
b3c12f
@@ -34,7 +34,9 @@
b3c12f
 #include "lib/rpmhash.H"
b3c12f
 #include "lib/rpmhash.C"
b3c12f
 
b3c12f
-/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
b3c12f
+/* magic value at end of file (64 bits) that indicates this is a transcoded
b3c12f
+ * rpm.
b3c12f
+ */
b3c12f
 #define MAGIC 3472329499408095051
b3c12f
 
b3c12f
 struct digestoffset {
b3c12f
@@ -64,77 +66,54 @@ static int digestor(
b3c12f
     int algo;
b3c12f
     rpmRC rc = RPMRC_FAIL;
b3c12f
 
b3c12f
-    for (algo = 0; algo < algos_len; algo++)
b3c12f
-    {
b3c12f
-        fdInitDigest(fdi, algos[algo], 0);
b3c12f
+    for (algo = 0; algo < algos_len; algo++) {
b3c12f
+	fdInitDigest(fdi, algos[algo], 0);
b3c12f
     }
b3c12f
     fdilength = ufdCopy(fdi, fdo);
b3c12f
-    if (fdilength == -1)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("digest cat failed\n"));
b3c12f
-        goto exit;
b3c12f
+    if (fdilength == -1) {
b3c12f
+	fprintf(stderr, _("digest cat failed\n"));
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
 
b3c12f
     len = sizeof(fdilength);
b3c12f
-    if (Fwrite(&fdilength, len, 1, validationo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write input length %zd\n"), fdilength);
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&fdilength, len, 1, validationo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write input length %zd\n"), fdilength);
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
     len = sizeof(algos_len);
b3c12f
-    if (Fwrite(&algos_len, len, 1, validationo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write number of validation digests\n"));
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&algos_len, len, 1, validationo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write number of validation digests\n"));
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
-    for (algo = 0; algo < algos_len; algo++)
b3c12f
-    {
b3c12f
-        fdFiniDigest(fdi, algos[algo], (void **)&filedigest, &filedigest_len, 0);
b3c12f
-
b3c12f
-        algo_name = pgpValString(PGPVAL_HASHALGO, algos[algo]);
b3c12f
-        algo_name_len = (uint32_t)strlen(algo_name);
b3c12f
-        algo_digest_len = (uint32_t)filedigest_len;
b3c12f
-
b3c12f
-        len = sizeof(algo_name_len);
b3c12f
-        if (Fwrite(&algo_name_len, len, 1, validationo) != len)
b3c12f
-        {
b3c12f
-            fprintf(
b3c12f
-                stderr,
b3c12f
-                _("Unable to write validation algo name length\n")
b3c12f
-            );
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
-        len = sizeof(algo_digest_len);
b3c12f
-        if (Fwrite(&algo_digest_len, len, 1, validationo) != len)
b3c12f
-        {
b3c12f
-            fprintf(
b3c12f
-                stderr,
b3c12f
-                _("Unable to write number of bytes for validation digest\n")
b3c12f
-            );
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
-        if (Fwrite(algo_name, algo_name_len, 1, validationo) != algo_name_len)
b3c12f
-        {
b3c12f
-            fprintf(stderr, _("Unable to write validation algo name\n"));
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
-        if (
b3c12f
-            Fwrite(
b3c12f
-                filedigest,
b3c12f
-                algo_digest_len,
b3c12f
-                1,
b3c12f
-                validationo
b3c12f
-            ) != algo_digest_len
b3c12f
-        )
b3c12f
-        {
b3c12f
-            fprintf(
b3c12f
-                stderr,
b3c12f
-                _("Unable to write validation digest value %u, %zu\n"),
b3c12f
-                algo_digest_len,
b3c12f
-                filedigest_len
b3c12f
-            );
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
+    for (algo = 0; algo < algos_len; algo++) {
b3c12f
+	fdFiniDigest(fdi, algos[algo], (void **)&filedigest, &filedigest_len, 0);
b3c12f
+
b3c12f
+	algo_name = pgpValString(PGPVAL_HASHALGO, algos[algo]);
b3c12f
+	algo_name_len = (uint32_t)strlen(algo_name);
b3c12f
+	algo_digest_len = (uint32_t)filedigest_len;
b3c12f
+
b3c12f
+	len = sizeof(algo_name_len);
b3c12f
+	if (Fwrite(&algo_name_len, len, 1, validationo) != len) {
b3c12f
+	    fprintf(stderr,
b3c12f
+		    _("Unable to write validation algo name length\n"));
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
+	len = sizeof(algo_digest_len);
b3c12f
+	if (Fwrite(&algo_digest_len, len, 1, validationo) != len) {
b3c12f
+	    fprintf(stderr,
b3c12f
+		    _("Unable to write number of bytes for validation digest\n"));
b3c12f
+	     goto exit;
b3c12f
+	}
b3c12f
+	if (Fwrite(algo_name, algo_name_len, 1, validationo) != algo_name_len) {
b3c12f
+	    fprintf(stderr, _("Unable to write validation algo name\n"));
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
+	if (Fwrite(filedigest, algo_digest_len, 1, validationo ) != algo_digest_len) {
b3c12f
+	    fprintf(stderr,
b3c12f
+		    _("Unable to write validation digest value %u, %zu\n"),
b3c12f
+		    algo_digest_len, filedigest_len);
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
     }
b3c12f
     rc = RPMRC_OK;
b3c12f
 exit:
b3c12f
@@ -145,23 +124,20 @@ static rpmRC process_package(FD_t fdi, FD_t validationi)
b3c12f
 {
b3c12f
     uint32_t diglen;
b3c12f
     /* GNU C extension: can use diglen from outer context */
b3c12f
-    int digestSetCmp(const unsigned char * a, const unsigned char * b)
b3c12f
-    {
b3c12f
-        return memcmp(a, b, diglen);
b3c12f
+    int digestSetCmp(const unsigned char * a, const unsigned char * b) {
b3c12f
+	return memcmp(a, b, diglen);
b3c12f
     }
b3c12f
 
b3c12f
-    unsigned int digestSetHash(const unsigned char * digest)
b3c12f
-    {
b3c12f
+    unsigned int digestSetHash(const unsigned char * digest) {
b3c12f
         /* assumes sizeof(unsigned int) < diglen */
b3c12f
         return *(unsigned int *)digest;
b3c12f
     }
b3c12f
 
b3c12f
-    int digestoffsetCmp(const void * a, const void * b)
b3c12f
-    {
b3c12f
-        return digestSetCmp(
b3c12f
-            ((struct digestoffset *)a)->digest,
b3c12f
-            ((struct digestoffset *)b)->digest
b3c12f
-        );
b3c12f
+    int digestoffsetCmp(const void * a, const void * b) {
b3c12f
+	return digestSetCmp(
b3c12f
+	    ((struct digestoffset *)a)->digest,
b3c12f
+	    ((struct digestoffset *)b)->digest
b3c12f
+	);
b3c12f
     }
b3c12f
 
b3c12f
     FD_t fdo;
b3c12f
@@ -179,65 +155,52 @@ static rpmRC process_package(FD_t fdi, FD_t validationi)
b3c12f
 
b3c12f
     fdo = fdDup(STDOUT_FILENO);
b3c12f
 
b3c12f
-    if (rpmReadPackageRaw(fdi, &sigh, &h))
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Error reading package\n"));
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (rpmReadPackageRaw(fdi, &sigh, &h)) {
b3c12f
+	fprintf(stderr, _("Error reading package\n"));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
     if (rpmLeadWrite(fdo, h))
b3c12f
     {
b3c12f
-        fprintf(
b3c12f
-            stderr,
b3c12f
-            _("Unable to write package lead: %s\n"),
b3c12f
-            Fstrerror(fdo)
b3c12f
-        );
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+	fprintf(stderr, _("Unable to write package lead: %s\n"),
b3c12f
+		Fstrerror(fdo));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
-    if (rpmWriteSignature(fdo, sigh))
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write signature: %s\n"), Fstrerror(fdo));
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (rpmWriteSignature(fdo, sigh)) {
b3c12f
+	fprintf(stderr, _("Unable to write signature: %s\n"), Fstrerror(fdo));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
-    if (headerWrite(fdo, h, HEADER_MAGIC_YES))
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write headers: %s\n"), Fstrerror(fdo));
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (headerWrite(fdo, h, HEADER_MAGIC_YES)) {
b3c12f
+	fprintf(stderr, _("Unable to write headers: %s\n"), Fstrerror(fdo));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
     /* Retrieve payload size and compression type. */
b3c12f
-    {	const char *compr = headerGetString(h, RPMTAG_PAYLOADCOMPRESSOR);
b3c12f
-        rpmio_flags = rstrscat(NULL, "r.", compr ? compr : "gzip", NULL);
b3c12f
+    {
b3c12f
+	const char *compr = headerGetString(h, RPMTAG_PAYLOADCOMPRESSOR);
b3c12f
+	rpmio_flags = rstrscat(NULL, "r.", compr ? compr : "gzip", NULL);
b3c12f
     }
b3c12f
 
b3c12f
     gzdi = Fdopen(fdi, rpmio_flags);	/* XXX gzdi == fdi */
b3c12f
     free(rpmio_flags);
b3c12f
 
b3c12f
-    if (gzdi == NULL)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("cannot re-open payload: %s\n"), Fstrerror(gzdi));
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (gzdi == NULL) {
b3c12f
+	fprintf(stderr, _("cannot re-open payload: %s\n"), Fstrerror(gzdi));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
     rpmfiles files = rpmfilesNew(NULL, h, 0, RPMFI_KEEPHEADER);
b3c12f
-    rpmfi fi = rpmfiNewArchiveReader(
b3c12f
-        gzdi,
b3c12f
-        files,
b3c12f
-        RPMFI_ITER_READ_ARCHIVE_CONTENT_FIRST
b3c12f
-    );
b3c12f
+    rpmfi fi = rpmfiNewArchiveReader(gzdi, files,
b3c12f
+				     RPMFI_ITER_READ_ARCHIVE_CONTENT_FIRST);
b3c12f
 
b3c12f
     /* this is encoded in the file format, so needs to be fixed size (for
b3c12f
-        now?)
b3c12f
-    */
b3c12f
+     * now?)
b3c12f
+     */
b3c12f
     diglen = (uint32_t)rpmDigestLength(rpmfiDigestAlgo(fi));
b3c12f
-    digestSet ds = digestSetCreate(
b3c12f
-        rpmfiFC(fi),
b3c12f
-        digestSetHash,
b3c12f
-        digestSetCmp,
b3c12f
-        NULL
b3c12f
-    );
b3c12f
+    digestSet ds = digestSetCreate(rpmfiFC(fi), digestSetHash, digestSetCmp,
b3c12f
+				   NULL);
b3c12f
     struct digestoffset offsets[rpmfiFC(fi)];
b3c12f
     pos = RPMLEAD_SIZE + headerSizeof(sigh, HEADER_MAGIC_YES);
b3c12f
 
b3c12f
@@ -247,139 +210,114 @@ static rpmRC process_package(FD_t fdi, FD_t validationi)
b3c12f
 
b3c12f
     zeros = xcalloc(fundamental_block_size, 1);
b3c12f
 
b3c12f
-    while (next >= 0)
b3c12f
-    {
b3c12f
-        next = rpmfiNext(fi);
b3c12f
-        if (next == RPMERR_ITER_END)
b3c12f
-        {
b3c12f
-            rc = RPMRC_OK;
b3c12f
-            break;
b3c12f
-        }
b3c12f
-        mode = rpmfiFMode(fi);
b3c12f
-        if (!S_ISREG(mode) || !rpmfiArchiveHasContent(fi))
b3c12f
-        {
b3c12f
-            /* not a regular file, or the archive doesn't contain any content for
b3c12f
-               this entry
b3c12f
-            */
b3c12f
-            continue;
b3c12f
-        }
b3c12f
-        digest = rpmfiFDigest(fi, NULL, NULL);
b3c12f
-        if (digestSetGetEntry(ds, digest, NULL))
b3c12f
-        {
b3c12f
-            /* This specific digest has already been included, so skip it */
b3c12f
-            continue;
b3c12f
-        }
b3c12f
-        pad = pad_to(pos, fundamental_block_size);
b3c12f
-        if (Fwrite(zeros, sizeof(char), pad, fdo) != pad)
b3c12f
-        {
b3c12f
-            fprintf(stderr, _("Unable to write padding\n"));
b3c12f
-            rc = RPMRC_FAIL;
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
-        /* round up to next fundamental_block_size */
b3c12f
-        pos += pad;
b3c12f
-        digestSetAddEntry(ds, digest);
b3c12f
-        offsets[offset_ix].digest = digest;
b3c12f
-        offsets[offset_ix].pos = pos;
b3c12f
-        offset_ix++;
b3c12f
-        size = rpmfiFSize(fi);
b3c12f
-        rc = rpmfiArchiveReadToFile(fi, fdo, 0);
b3c12f
-        if (rc != RPMRC_OK)
b3c12f
-        {
b3c12f
-            fprintf(stderr, _("rpmfiArchiveReadToFile failed with %d\n"), rc);
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
-        pos += size;
b3c12f
+    while (next >= 0) {
b3c12f
+	next = rpmfiNext(fi);
b3c12f
+	if (next == RPMERR_ITER_END) {
b3c12f
+	    rc = RPMRC_OK;
b3c12f
+	    break;
b3c12f
+	}
b3c12f
+	mode = rpmfiFMode(fi);
b3c12f
+	if (!S_ISREG(mode) || !rpmfiArchiveHasContent(fi)) {
b3c12f
+	    /* not a regular file, or the archive doesn't contain any content
b3c12f
+	     * for this entry.
b3c12f
+	    */
b3c12f
+	    continue;
b3c12f
+	}
b3c12f
+	digest = rpmfiFDigest(fi, NULL, NULL);
b3c12f
+	if (digestSetGetEntry(ds, digest, NULL)) {
b3c12f
+	    /* This specific digest has already been included, so skip it. */
b3c12f
+	    continue;
b3c12f
+	}
b3c12f
+	pad = pad_to(pos, fundamental_block_size);
b3c12f
+	if (Fwrite(zeros, sizeof(char), pad, fdo) != pad) {
b3c12f
+	    fprintf(stderr, _("Unable to write padding\n"));
b3c12f
+	    rc = RPMRC_FAIL;
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
+	/* round up to next fundamental_block_size */
b3c12f
+	pos += pad;
b3c12f
+	digestSetAddEntry(ds, digest);
b3c12f
+	offsets[offset_ix].digest = digest;
b3c12f
+	offsets[offset_ix].pos = pos;
b3c12f
+	offset_ix++;
b3c12f
+	size = rpmfiFSize(fi);
b3c12f
+	rc = rpmfiArchiveReadToFile(fi, fdo, 0);
b3c12f
+	if (rc != RPMRC_OK) {
b3c12f
+	    fprintf(stderr, _("rpmfiArchiveReadToFile failed with %d\n"), rc);
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
+	pos += size;
b3c12f
     }
b3c12f
     Fclose(gzdi);	/* XXX gzdi == fdi */
b3c12f
 
b3c12f
-    qsort(
b3c12f
-        offsets,
b3c12f
-        (size_t)offset_ix,
b3c12f
-        sizeof(struct digestoffset),
b3c12f
-        digestoffsetCmp
b3c12f
-    );
b3c12f
+    qsort(offsets, (size_t)offset_ix, sizeof(struct digestoffset),
b3c12f
+	  digestoffsetCmp);
b3c12f
 
b3c12f
     len = sizeof(offset_ix);
b3c12f
-    if (Fwrite(&offset_ix, len, 1, fdo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write length of table\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&offset_ix, len, 1, fdo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write length of table\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
     len = sizeof(diglen);
b3c12f
-    if (Fwrite(&diglen, len, 1, fdo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write length of digest\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&diglen, len, 1, fdo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write length of digest\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
     len = sizeof(rpm_loff_t);
b3c12f
-    for (int x = 0; x < offset_ix; x++)
b3c12f
-    {
b3c12f
-        if (Fwrite(offsets[x].digest, diglen, 1, fdo) != diglen)
b3c12f
-        {
b3c12f
-            fprintf(stderr, _("Unable to write digest\n"));
b3c12f
-            rc = RPMRC_FAIL;
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
-        if (Fwrite(&offsets[x].pos, len, 1, fdo) != len)
b3c12f
-        {
b3c12f
-            fprintf(stderr, _("Unable to write offset\n"));
b3c12f
-            rc = RPMRC_FAIL;
b3c12f
-            goto exit;
b3c12f
-        }
b3c12f
+    for (int x = 0; x < offset_ix; x++) {
b3c12f
+	if (Fwrite(offsets[x].digest, diglen, 1, fdo) != diglen) {
b3c12f
+	    fprintf(stderr, _("Unable to write digest\n"));
b3c12f
+	    rc = RPMRC_FAIL;
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
+	if (Fwrite(&offsets[x].pos, len, 1, fdo) != len) {
b3c12f
+	    fprintf(stderr, _("Unable to write offset\n"));
b3c12f
+	    rc = RPMRC_FAIL;
b3c12f
+	    goto exit;
b3c12f
+	}
b3c12f
     }
b3c12f
     validation_pos = (
b3c12f
-        pos + sizeof(offset_ix) + sizeof(diglen) +
b3c12f
-        offset_ix * (diglen + sizeof(rpm_loff_t))
b3c12f
+	pos + sizeof(offset_ix) + sizeof(diglen) +
b3c12f
+	offset_ix * (diglen + sizeof(rpm_loff_t))
b3c12f
     );
b3c12f
 
b3c12f
     ssize_t validation_len = ufdCopy(validationi, fdo);
b3c12f
-    if (validation_len == -1)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("digest table ufdCopy failed\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    if (validation_len == -1) {
b3c12f
+	fprintf(stderr, _("digest table ufdCopy failed\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
     /* add more padding so the last file can be cloned. It doesn't matter that
b3c12f
-       the table and validation etc are in this space. In fact, it's pretty
b3c12f
-       efficient if it is
b3c12f
+     * the table and validation etc are in this space. In fact, it's pretty
b3c12f
+     * efficient if it is.
b3c12f
     */
b3c12f
 
b3c12f
-    pad = pad_to(
b3c12f
-        (
b3c12f
-            validation_pos + validation_len + 2 * sizeof(rpm_loff_t) +
b3c12f
-            sizeof(uint64_t)
b3c12f
-        ),
b3c12f
-        fundamental_block_size
b3c12f
-    );
b3c12f
-    if (Fwrite(zeros, sizeof(char), pad, fdo) != pad)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write final padding\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    pad = pad_to((validation_pos + validation_len + 2 * sizeof(rpm_loff_t) +
b3c12f
+		 sizeof(uint64_t)), fundamental_block_size);
b3c12f
+    if (Fwrite(zeros, sizeof(char), pad, fdo) != pad) {
b3c12f
+	fprintf(stderr, _("Unable to write final padding\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
     zeros = _free(zeros);
b3c12f
-    if (Fwrite(&pos, len, 1, fdo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write offset of digest table\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&pos, len, 1, fdo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write offset of digest table\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
-    if (Fwrite(&validation_pos, len, 1, fdo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write offset of validation table\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&validation_pos, len, 1, fdo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write offset of validation table\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
     uint64_t magic = MAGIC;
b3c12f
     len = sizeof(magic);
b3c12f
-    if (Fwrite(&magic, len, 1, fdo) != len)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Unable to write magic\n"));
b3c12f
-        rc = RPMRC_FAIL;
b3c12f
-        goto exit;
b3c12f
+    if (Fwrite(&magic, len, 1, fdo) != len) {
b3c12f
+	fprintf(stderr, _("Unable to write magic\n"));
b3c12f
+	rc = RPMRC_FAIL;
b3c12f
+	goto exit;
b3c12f
     }
b3c12f
 
b3c12f
 exit:
b3c12f
@@ -389,8 +327,7 @@ static rpmRC process_package(FD_t fdi, FD_t validationi)
b3c12f
     return rc;
b3c12f
 }
b3c12f
 
b3c12f
-int main(int argc, char *argv[])
b3c12f
-{
b3c12f
+int main(int argc, char *argv[]) {
b3c12f
     rpmRC rc;
b3c12f
     int cprc = 0;
b3c12f
     uint8_t algos[argc - 1];
b3c12f
@@ -402,118 +339,95 @@ int main(int argc, char *argv[])
b3c12f
     xsetprogname(argv[0]);	/* Portability call -- see system.h */
b3c12f
     rpmReadConfigFiles(NULL, NULL);
b3c12f
 
b3c12f
-    if (argc > 1 && (rstreq(argv[1], "-h") || rstreq(argv[1], "--help")))
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Usage: %s [DIGESTALGO]...\n"), argv[0]);
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (argc > 1 && (rstreq(argv[1], "-h") || rstreq(argv[1], "--help"))) {
b3c12f
+	fprintf(stderr, _("Usage: %s [DIGESTALGO]...\n"), argv[0]);
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
-    if (argc == 1)
b3c12f
-    {
b3c12f
-        fprintf(
b3c12f
-            stderr,
b3c12f
-            _("Need at least one DIGESTALGO parameter, e.g. 'SHA256'\n")
b3c12f
-        );
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (argc == 1) {
b3c12f
+	fprintf(stderr,
b3c12f
+		_("Need at least one DIGESTALGO parameter, e.g. 'SHA256'\n"));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
 
b3c12f
-    for (int x = 0; x < (argc - 1); x++)
b3c12f
-    {
b3c12f
-        if (pgpStringVal(PGPVAL_HASHALGO, argv[x + 1], &algos[x]) != 0)
b3c12f
-        {
b3c12f
-            fprintf(
b3c12f
-                stderr,
b3c12f
-                _("Unable to resolve '%s' as a digest algorithm, exiting\n"),
b3c12f
-                argv[x + 1]
b3c12f
-            );
b3c12f
-            exit(EXIT_FAILURE);
b3c12f
-        }
b3c12f
+    for (int x = 0; x < (argc - 1); x++) {
b3c12f
+	if (pgpStringVal(PGPVAL_HASHALGO, argv[x + 1], &algos[x]) != 0)
b3c12f
+	{
b3c12f
+	    fprintf(stderr,
b3c12f
+		    _("Unable to resolve '%s' as a digest algorithm, exiting\n"),
b3c12f
+		    argv[x + 1]);
b3c12f
+	    exit(EXIT_FAILURE);
b3c12f
+	}
b3c12f
     }
b3c12f
 
b3c12f
 
b3c12f
-    if (pipe(mainpipefd) == -1)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Main pipe failure\n"));
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (pipe(mainpipefd) == -1) {
b3c12f
+	fprintf(stderr, _("Main pipe failure\n"));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
-    if (pipe(metapipefd) == -1)
b3c12f
-    {
b3c12f
-        fprintf(stderr, _("Meta pipe failure\n"));
b3c12f
-        exit(EXIT_FAILURE);
b3c12f
+    if (pipe(metapipefd) == -1) {
b3c12f
+	fprintf(stderr, _("Meta pipe failure\n"));
b3c12f
+	exit(EXIT_FAILURE);
b3c12f
     }
b3c12f
     cpid = fork();
b3c12f
-    if (cpid == 0)
b3c12f
-    {
b3c12f
-        /* child: digestor */
b3c12f
-        close(mainpipefd[0]);
b3c12f
-        close(metapipefd[0]);
b3c12f
-        FD_t fdi = fdDup(STDIN_FILENO);
b3c12f
-        FD_t fdo = fdDup(mainpipefd[1]);
b3c12f
-        FD_t validationo = fdDup(metapipefd[1]);
b3c12f
-        rc = digestor(fdi, fdo, validationo, algos, argc - 1);
b3c12f
-        Fclose(validationo);
b3c12f
-        Fclose(fdo);
b3c12f
-        Fclose(fdi);
b3c12f
+    if (cpid == 0) {
b3c12f
+	/* child: digestor */
b3c12f
+	close(mainpipefd[0]);
b3c12f
+	close(metapipefd[0]);
b3c12f
+	FD_t fdi = fdDup(STDIN_FILENO);
b3c12f
+	FD_t fdo = fdDup(mainpipefd[1]);
b3c12f
+	FD_t validationo = fdDup(metapipefd[1]);
b3c12f
+	rc = digestor(fdi, fdo, validationo, algos, argc - 1);
b3c12f
+	Fclose(validationo);
b3c12f
+	Fclose(fdo);
b3c12f
+	Fclose(fdi);
b3c12f
     } else {
b3c12f
-        /* parent: main program */
b3c12f
-        close(mainpipefd[1]);
b3c12f
-        close(metapipefd[1]);
b3c12f
-        FD_t fdi = fdDup(mainpipefd[0]);
b3c12f
-        FD_t validationi = fdDup(metapipefd[0]);
b3c12f
-        rc = process_package(fdi, validationi);
b3c12f
-        Fclose(validationi);
b3c12f
-        /* fdi is normally closed through the stacked file gzdi in the function. */
b3c12f
-        /* wait for child process (digestor for stdin) to complete. */
b3c12f
-        if (rc != RPMRC_OK)
b3c12f
-        {
b3c12f
-            if (kill(cpid, SIGTERM) != 0)
b3c12f
-            {
b3c12f
-                fprintf(
b3c12f
-                    stderr,
b3c12f
-                    _("Failed to kill digest process when main process failed: %s\n"),
b3c12f
-                    strerror(errno)
b3c12f
-                );
b3c12f
-            }
b3c12f
-        }
b3c12f
-        w = waitpid(cpid, &wstatus, 0);
b3c12f
-        if (w == -1)
b3c12f
-        {
b3c12f
-            fprintf(stderr, _("waitpid failed\n"));
b3c12f
-            cprc = EXIT_FAILURE;
b3c12f
-        } else if (WIFEXITED(wstatus))
b3c12f
-        {
b3c12f
-            cprc = WEXITSTATUS(wstatus);
b3c12f
-            if (cprc != 0)
b3c12f
-            {
b3c12f
-                fprintf(
b3c12f
-                    stderr,
b3c12f
-                    _("Digest process non-zero exit code %d\n"),
b3c12f
-                    cprc
b3c12f
-                );
b3c12f
-            }
b3c12f
-        } else if (WIFSIGNALED(wstatus))
b3c12f
-        {
b3c12f
-            fprintf(
b3c12f
-                stderr,
b3c12f
-                _("Digest process was terminated with a signal: %d\n"),
b3c12f
-                WTERMSIG(wstatus)
b3c12f
-            );
b3c12f
-            cprc = EXIT_FAILURE;
b3c12f
-        } else
b3c12f
-        {
b3c12f
-            /* don't think this can happen, but covering all bases */
b3c12f
-            fprintf(stderr, _("Unhandled circumstance in waitpid\n"));
b3c12f
-            cprc = EXIT_FAILURE;
b3c12f
-        }
b3c12f
-        if (cprc != EXIT_SUCCESS)
b3c12f
-        {
b3c12f
-            rc = RPMRC_FAIL;
b3c12f
-        }
b3c12f
+	/* parent: main program */
b3c12f
+	close(mainpipefd[1]);
b3c12f
+	close(metapipefd[1]);
b3c12f
+	FD_t fdi = fdDup(mainpipefd[0]);
b3c12f
+	FD_t validationi = fdDup(metapipefd[0]);
b3c12f
+	rc = process_package(fdi, validationi);
b3c12f
+	Fclose(validationi);
b3c12f
+	/* fdi is normally closed through the stacked file gzdi in the
b3c12f
+	 * function.
b3c12f
+	 * Wait for child process (digestor for stdin) to complete.
b3c12f
+	 */
b3c12f
+	if (rc != RPMRC_OK) {
b3c12f
+	    if (kill(cpid, SIGTERM) != 0) {
b3c12f
+		fprintf(stderr,
b3c12f
+		        _("Failed to kill digest process when main process failed: %s\n"),
b3c12f
+			strerror(errno));
b3c12f
+	    }
b3c12f
+	}
b3c12f
+	w = waitpid(cpid, &wstatus, 0);
b3c12f
+	if (w == -1) {
b3c12f
+	    fprintf(stderr, _("waitpid failed\n"));
b3c12f
+	    cprc = EXIT_FAILURE;
b3c12f
+	} else if (WIFEXITED(wstatus)) {
b3c12f
+	    cprc = WEXITSTATUS(wstatus);
b3c12f
+	    if (cprc != 0) {
b3c12f
+		fprintf(stderr,
b3c12f
+			_("Digest process non-zero exit code %d\n"),
b3c12f
+			cprc);
b3c12f
+	    }
b3c12f
+	} else if (WIFSIGNALED(wstatus)) {
b3c12f
+	    fprintf(stderr,
b3c12f
+		    _("Digest process was terminated with a signal: %d\n"),
b3c12f
+		    WTERMSIG(wstatus));
b3c12f
+	    cprc = EXIT_FAILURE;
b3c12f
+	} else {
b3c12f
+	    /* Don't think this can happen, but covering all bases */
b3c12f
+	    fprintf(stderr, _("Unhandled circumstance in waitpid\n"));
b3c12f
+	    cprc = EXIT_FAILURE;
b3c12f
+	}
b3c12f
+	if (cprc != EXIT_SUCCESS) {
b3c12f
+	    rc = RPMRC_FAIL;
b3c12f
+	}
b3c12f
     }
b3c12f
-    if (rc != RPMRC_OK)
b3c12f
-    {
b3c12f
-        /* translate rpmRC into generic failure return code. */
b3c12f
-        return EXIT_FAILURE;
b3c12f
+    if (rc != RPMRC_OK) {
b3c12f
+	/* translate rpmRC into generic failure return code. */
b3c12f
+	return EXIT_FAILURE;
b3c12f
     }
b3c12f
     return EXIT_SUCCESS;
b3c12f
 }
b3c12f
b3c12f
From 19694b76508d83bc83201441ff2c2721d45c4d1d Mon Sep 17 00:00:00 2001
b3c12f
From: Matthew Almond <malmond@fb.com>
b3c12f
Date: Sun, 31 Jan 2021 15:24:25 -0800
b3c12f
Subject: [PATCH 4/4] Fix printf formatting in reflink.c
b3c12f
b3c12f
There were some mismatches on field "sizes". This should eliminate the
b3c12f
error messages.
b3c12f
---
b3c12f
 plugins/reflink.c | 2 +-
b3c12f
 1 file changed, 1 insertion(+), 1 deletion(-)
b3c12f
b3c12f
diff --git a/plugins/reflink.c b/plugins/reflink.c
b3c12f
index 9eaa87094..513887604 100644
b3c12f
--- a/plugins/reflink.c
b3c12f
+++ b/plugins/reflink.c
b3c12f
@@ -316,7 +316,7 @@ static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path,
b3c12f
 		return RPMRC_FAIL;
b3c12f
 	    }
b3c12f
 	    rpmlog(RPMLOG_DEBUG,
b3c12f
-	           _("reflink: Reflinking %lu bytes at %lu to %s orig size=%lu, file=%ld\n"),
b3c12f
+	           _("reflink: Reflinking %llu bytes at %llu to %s orig size=%ld, file=%lld\n"),
b3c12f
 		   fcr.src_length, fcr.src_offset, path, size, fcr.src_fd);
b3c12f
 	    rc = ioctl(dst, FICLONERANGE, &fcr;;
b3c12f
 	    if (rc) {