dcavalca / rpms / rpm

Forked from rpms/rpm 2 years ago
Clone
Blob Blame History Raw
diff --git a/Makefile.am b/Makefile.am
index e5c75d7b4..288668819 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -99,7 +99,7 @@ pkginclude_HEADERS += build/rpmfc.h
 pkginclude_HEADERS += build/rpmspec.h
 
 
-bin_PROGRAMS =		rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec
+bin_PROGRAMS =		rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec rpm2extents
 if WITH_ARCHIVE
 bin_PROGRAMS += 	rpm2archive 
 endif
@@ -154,6 +154,10 @@ rpm2cpio_SOURCES =	rpm2cpio.c debug.h system.h
 rpm2cpio_LDADD =	lib/librpm.la rpmio/librpmio.la
 rpm2cpio_LDADD +=	@WITH_POPT_LIB@
 
+rpm2extents_SOURCES =	rpm2extents.c debug.h system.h
+rpm2extents_LDADD =	lib/librpm.la rpmio/librpmio.la
+rpm2extents_LDADD +=	@WITH_POPT_LIB@
+
 rpm2archive_SOURCES =	rpm2archive.c debug.h system.h
 rpm2archive_LDADD =	lib/librpm.la rpmio/librpmio.la
 rpm2archive_LDADD +=	@WITH_POPT_LIB@ @WITH_ARCHIVE_LIB@
diff --git a/lib/depends.c b/lib/depends.c
index 30234df3d..8998afcd3 100644
--- a/lib/depends.c
+++ b/lib/depends.c
@@ -81,6 +81,8 @@ static rpmRC headerCheckPayloadFormat(Header h) {
      */
     if (!payloadfmt) return rc;
 
+    if (rstreq(payloadfmt, "clon")) return rc;
+
     if (!rstreq(payloadfmt, "cpio")) {
         char *nevra = headerGetAsString(h, RPMTAG_NEVRA);
         if (payloadfmt && rstreq(payloadfmt, "drpm")) {
diff --git a/lib/fsm.c b/lib/fsm.c
index 935a0a5c6..feda3750c 100644
--- a/lib/fsm.c
+++ b/lib/fsm.c
@@ -19,6 +19,7 @@
 
 #include "rpmio/rpmio_internal.h"	/* fdInit/FiniDigest */
 #include "lib/fsm.h"
+#include "lib/rpmlib.h"
 #include "lib/rpmte_internal.h"	/* XXX rpmfs */
 #include "lib/rpmplugins.h"	/* rpm plugins hooks */
 #include "lib/rpmug.h"
@@ -52,6 +53,7 @@ struct filedata_s {
     int stage;
     int setmeta;
     int skip;
+    int plugin_contents;
     rpmFileAction action;
     const char *suffix;
     char *fpath;
@@ -891,6 +893,14 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
     struct filedata_s *fdata = xcalloc(fc, sizeof(*fdata));
     struct filedata_s *firstlink = NULL;
 
+    Header h = rpmteHeader(te);
+    const char *payloadfmt = headerGetString(h, RPMTAG_PAYLOADFORMAT);
+    int cpio = 1;
+
+    if (payloadfmt && rstreq(payloadfmt, "clon")) {
+	cpio = 0;
+    }
+
     /* transaction id used for temporary path suffix while installing */
     rasprintf(&tid, ";%08x", (unsigned)rpmtsGetTid(ts));
 
@@ -911,12 +921,23 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
 	/* Remap file perms, owner, and group. */
 	rc = rpmfiStat(fi, 1, &fp->sb);
 
-	setFileState(fs, fx);
 	fsmDebug(fp->fpath, fp->action, &fp->sb);
 
 	/* Run fsm file pre hook for all plugins */
 	rc = rpmpluginsCallFsmFilePre(plugins, fi, fp->fpath,
 				      fp->sb.st_mode, fp->action);
+	fp->plugin_contents = 0;
+	switch (rc) {
+	case RPMRC_OK:
+	    setFileState(fs, fx);
+	    break;
+	case RPMRC_PLUGIN_CONTENTS:
+	    fp->plugin_contents = 1;
+	    // reduce reads on cpio to this value. Could be zero if
+	    // this is from a hard link.
+	    rc = RPMRC_OK;
+	    break;
+	}
 	fp->stage = FILE_PRE;
     }
     fi = rpmfiFree(fi);
@@ -924,10 +945,14 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
     if (rc)
 	goto exit;
 
-    fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
-    if (fi == NULL) {
-        rc = RPMERR_BAD_MAGIC;
-        goto exit;
+    if (cpio) {
+	fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE);
+	if (fi == NULL) {
+	    rc = RPMERR_BAD_MAGIC;
+	    goto exit;
+	}
+    } else {
+	fi = rpmfilesIter(files, RPMFI_ITER_FWD);
     }
 
     /* Detect and create directories not explicitly in package. */
@@ -969,8 +994,12 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
 
             if (S_ISREG(fp->sb.st_mode)) {
 		if (rc == RPMERR_ENOENT) {
-		    rc = fsmMkfile(fi, fp, files, psm, nodigest,
-				   &firstlink, &firstlinkfile);
+		    if(fp->plugin_contents) {
+			rc = RPMRC_OK;
+		    }else {
+			rc = fsmMkfile(fi, fp, files, psm, nodigest,
+			    &firstlink, &firstlinkfile);
+		    }
 		}
             } else if (S_ISDIR(fp->sb.st_mode)) {
                 if (rc == RPMERR_ENOENT) {
@@ -1078,6 +1107,7 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files,
     rpmswAdd(rpmtsOp(ts, RPMTS_OP_DIGEST), fdOp(payload, FDSTAT_DIGEST));
 
 exit:
+    h = headerFree(h);
     fi = rpmfiFree(fi);
     Fclose(payload);
     free(tid);
diff --git a/lib/package.c b/lib/package.c
index 281275029..90bd0d8a7 100644
--- a/lib/package.c
+++ b/lib/package.c
@@ -404,5 +404,45 @@ rpmRC rpmReadPackageFile(rpmts ts, FD_t fd, const char * fn, Header * hdrp)
     return rc;
 }
 
+rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp)
+{
+    char *msg = NULL;
+    hdrblob sigblob = hdrblobCreate();
+    hdrblob blob = hdrblobCreate();
+    Header h = NULL;
+    Header sigh = NULL;
+
+    rpmRC rc = rpmLeadRead(fd, &msg);
+    if (rc != RPMRC_OK)
+	goto exit;
+
+    rc = hdrblobRead(fd, 1, 0, RPMTAG_HEADERSIGNATURES, sigblob, &msg);
+    if (rc != RPMRC_OK)
+	goto exit;
+
+    rc = hdrblobRead(fd, 1, 1, RPMTAG_HEADERIMMUTABLE, blob, &msg);
+    if (rc != RPMRC_OK)
+	goto exit;
+
+    rc = hdrblobImport(sigblob, 0, &sigh, &msg);
+    if (rc)
+	goto exit;
 
+    rc = hdrblobImport(blob, 0, &h, &msg);
+    if (rc)
+	goto exit;
 
+    *sigp = headerLink(sigh);
+    *hdrp = headerLink(h);
+
+exit:
+    if (rc != RPMRC_OK && msg)
+	rpmlog(RPMLOG_ERR, "%s: %s\n", Fdescr(fd), msg);
+    hdrblobFree(sigblob);
+    hdrblobFree(blob);
+    headerFree(sigh);
+    headerFree(h);
+    free(msg);
+
+    return rc;
+}
diff --git a/lib/rpmlib.h b/lib/rpmlib.h
index 0879d04e5..a09ba0daf 100644
--- a/lib/rpmlib.h
+++ b/lib/rpmlib.h
@@ -155,6 +155,15 @@ rpmRC rpmReadHeader(rpmts ts, FD_t fd, Header *hdrp, char ** msg);
 rpmRC rpmReadPackageFile(rpmts ts, FD_t fd,
 		const char * fn, Header * hdrp);
 
+/** \ingroup header
+ * Return package signature, header from file handle, no verification.
+ * @param fd		file handle
+ * @param[out] sigp		address of header (or NULL)
+ * @param[out] hdrp		address of header (or NULL)
+ * @return		RPMRC_OK on success
+ */
+rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp);
+
 /** \ingroup rpmtrans
  * Install source package.
  * @param ts		transaction set
diff --git a/lib/rpmplugins.c b/lib/rpmplugins.c
index 62d75c4cf..3da3097af 100644
--- a/lib/rpmplugins.c
+++ b/lib/rpmplugins.c
@@ -356,13 +356,28 @@ rpmRC rpmpluginsCallFsmFilePre(rpmPlugins plugins, rpmfi fi, const char *path,
     plugin_fsm_file_pre_func hookFunc;
     int i;
     rpmRC rc = RPMRC_OK;
+    rpmRC hook_rc;
 
     for (i = 0; i < plugins->count; i++) {
 	rpmPlugin plugin = plugins->plugins[i];
 	RPMPLUGINS_SET_HOOK_FUNC(fsm_file_pre);
-	if (hookFunc && hookFunc(plugin, fi, path, file_mode, op) == RPMRC_FAIL) {
-	    rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name);
-	    rc = RPMRC_FAIL;
+	if (hookFunc) {
+	    hook_rc = hookFunc(plugin, fi, path, file_mode, op);
+	    if (hook_rc == RPMRC_FAIL) {
+		rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name);
+		rc = RPMRC_FAIL;
+	    } else if (hook_rc == RPMRC_PLUGIN_CONTENTS && rc != RPMRC_FAIL) {
+		if (rc == RPMRC_PLUGIN_CONTENTS) {
+		    /* Another plugin already said it'd handle contents. It's
+		     * undefined how these would combine, so treat this as a
+		     * failure condition.
+		    */
+		    rc = RPMRC_FAIL;
+		} else {
+		    /* Plugin will handle content */
+		    rc = RPMRC_PLUGIN_CONTENTS;
+		}
+	    }
 	}
     }
 
diff --git a/lib/rpmte.c b/lib/rpmte.c
index 3663604e7..d43dc41ad 100644
--- a/lib/rpmte.c
+++ b/lib/rpmte.c
@@ -423,6 +423,11 @@ FD_t rpmteSetFd(rpmte te, FD_t fd)
     return NULL;
 }
 
+FD_t rpmteFd(rpmte te)
+{
+    return (te != NULL ? te->fd : NULL);
+}
+
 fnpyKey rpmteKey(rpmte te)
 {
     return (te != NULL ? te->key : NULL);
diff --git a/lib/rpmte.h b/lib/rpmte.h
index 81acf7a19..6fc0a9f91 100644
--- a/lib/rpmte.h
+++ b/lib/rpmte.h
@@ -209,6 +209,8 @@ const char * rpmteNEVR(rpmte te);
  */
 const char * rpmteNEVRA(rpmte te);
 
+FD_t rpmteFd(rpmte te);
+
 /** \ingroup rpmte
  * Retrieve key from transaction element.
  * @param te		transaction element
diff --git a/lib/rpmtypes.h b/lib/rpmtypes.h
index e8e69b506..af2611e9e 100644
--- a/lib/rpmtypes.h
+++ b/lib/rpmtypes.h
@@ -106,7 +106,8 @@ typedef	enum rpmRC_e {
     RPMRC_NOTFOUND	= 1,	/*!< Generic not found code. */
     RPMRC_FAIL		= 2,	/*!< Generic failure code. */
     RPMRC_NOTTRUSTED	= 3,	/*!< Signature is OK, but key is not trusted. */
-    RPMRC_NOKEY		= 4	/*!< Public key is unavailable. */
+    RPMRC_NOKEY		= 4,	/*!< Public key is unavailable. */
+    RPMRC_PLUGIN_CONTENTS = 5     /*!< fsm_file_pre plugin is handling content */
 } rpmRC;
 
 #ifdef __cplusplus
diff --git a/macros.in b/macros.in
index e90cefa9a..363252b0f 100644
--- a/macros.in
+++ b/macros.in
@@ -1143,6 +1143,7 @@ package or when debugging this package.\
 
 # Transaction plugin macros
 %__plugindir		%{_libdir}/rpm-plugins
+%__transaction_reflink		%{__plugindir}/reflink.so
 %__transaction_systemd_inhibit	%{__plugindir}/systemd_inhibit.so
 %__transaction_selinux		%{__plugindir}/selinux.so
 %__transaction_syslog		%{__plugindir}/syslog.so
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index 3a929d0ce..ad0d3bce7 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -42,6 +42,10 @@ prioreset_la_SOURCES = prioreset.c
 prioreset_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
 plugins_LTLIBRARIES += prioreset.la
 
+reflink_la_SOURCES = reflink.c
+reflink_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
+plugins_LTLIBRARIES += reflink.la
+
 syslog_la_SOURCES = syslog.c
 syslog_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la
 plugins_LTLIBRARIES += syslog.la
diff --git a/plugins/reflink.c b/plugins/reflink.c
new file mode 100644
index 000000000..513887604
--- /dev/null
+++ b/plugins/reflink.c
@@ -0,0 +1,375 @@
+#include "system.h"
+
+#include <errno.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#if defined(__linux__)
+#include <linux/fs.h>        /* For FICLONE */
+#endif
+
+#include <rpm/rpmlog.h>
+#include "lib/rpmlib.h"
+#include "lib/rpmplugin.h"
+#include "lib/rpmte_internal.h"
+#include <rpm/rpmfileutil.h>
+#include "rpmio/rpmio_internal.h"
+
+
+#include "debug.h"
+
+#include <sys/ioctl.h>
+
+/* use hash table to remember inode -> ix (for rpmfilesFN(ix)) lookups */
+#undef HASHTYPE
+#undef HTKEYTYPE
+#undef HTDATATYPE
+#define HASHTYPE inodeIndexHash
+#define HTKEYTYPE rpm_ino_t
+#define HTDATATYPE int
+#include "lib/rpmhash.H"
+#include "lib/rpmhash.C"
+
+/* We use this in find to indicate a key wasn't found. This is an
+ * unrecoverable error, but we can at least show a decent error. 0 is never a
+ * valid offset because it's the offset of the start of the file.
+ */
+#define NOT_FOUND 0
+
+#define BUFFER_SIZE (1024 * 128)
+
+/* magic value at end of file (64 bits) that indicates this is a transcoded
+ * rpm.
+ */
+#define MAGIC 3472329499408095051
+
+struct reflink_state_s {
+    /* Stuff that's used across rpms */
+    long fundamental_block_size;
+    char *buffer;
+
+    /* stuff that's used/updated per psm */
+    uint32_t keys, keysize;
+
+    /* table for current rpm, keys * (keysize + sizeof(rpm_loff_t)) */
+    unsigned char *table;
+    FD_t fd;
+    rpmfiles files;
+    inodeIndexHash inodeIndexes;
+};
+
+typedef struct reflink_state_s * reflink_state;
+
+static int inodeCmp(rpm_ino_t a, rpm_ino_t b)
+{
+    return (a != b);
+}
+
+static unsigned int inodeId(rpm_ino_t a)
+{
+    /* rpm_ino_t is uint32_t so maps safely to unsigned int */
+    return (unsigned int)a;
+}
+
+static rpmRC reflink_init(rpmPlugin plugin, rpmts ts) {
+    reflink_state state = rcalloc(1, sizeof(struct reflink_state_s));
+
+    /* IOCTL-FICLONERANGE(2): ...Disk filesystems generally require the offset
+     * and length arguments to be aligned to the fundamental block size.
+     *
+     * The value of "fundamental block size" is directly related to the
+     * system's page size, so we should use that.
+     */
+    state->fundamental_block_size = sysconf(_SC_PAGESIZE);
+    state->buffer = rcalloc(1, BUFFER_SIZE);
+    rpmPluginSetData(plugin, state);
+
+    return RPMRC_OK;
+}
+
+static void reflink_cleanup(rpmPlugin plugin) {
+    reflink_state state = rpmPluginGetData(plugin);
+    free(state->buffer);
+    free(state);
+}
+
+static rpmRC reflink_psm_pre(rpmPlugin plugin, rpmte te) {
+    reflink_state state = rpmPluginGetData(plugin);
+    state->fd = rpmteFd(te);
+    if (state->fd == 0) {
+	rpmlog(RPMLOG_DEBUG, _("reflink: fd = 0, no install\n"));
+	return RPMRC_OK;
+    }
+    rpm_loff_t current = Ftell(state->fd);
+    uint64_t magic;
+    if (Fseek(state->fd, -(sizeof(magic)), SEEK_END) < 0) {
+	rpmlog(RPMLOG_ERR, _("reflink: failed to seek for magic\n"));
+	if (Fseek(state->fd, current, SEEK_SET) < 0) {
+	    /* yes this gets a bit repetitive */
+	    rpmlog(RPMLOG_ERR,
+		 _("reflink: unable to seek back to original location\n"));
+	}
+	return RPMRC_FAIL;
+    }
+    size_t len = sizeof(magic);
+    if (Fread(&magic, len, 1, state->fd) != len) {
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read magic\n"));
+	if (Fseek(state->fd, current, SEEK_SET) < 0) {
+	    rpmlog(RPMLOG_ERR,
+		   _("reflink: unable to seek back to original location\n"));
+	}
+	return RPMRC_FAIL;
+    }
+    if (magic != MAGIC) {
+	rpmlog(RPMLOG_DEBUG, _("reflink: not transcoded\n"));
+	if (Fseek(state->fd, current, SEEK_SET) < 0) {
+	    rpmlog(RPMLOG_ERR,
+		   _("reflink: unable to seek back to original location\n"));
+	    return RPMRC_FAIL;
+	}
+	return RPMRC_OK;
+    }
+    rpmlog(RPMLOG_DEBUG, _("reflink: *is* transcoded\n"));
+    Header h = rpmteHeader(te);
+
+    /* replace/add header that main fsm.c can read */
+    headerDel(h, RPMTAG_PAYLOADFORMAT);
+    headerPutString(h, RPMTAG_PAYLOADFORMAT, "clon");
+    headerFree(h);
+    state->files = rpmteFiles(te);
+    /* tail of file contains offset_table, offset_checksums then magic */
+    if (Fseek(state->fd, -(sizeof(rpm_loff_t) * 2 + sizeof(magic)), SEEK_END) < 0) {
+	rpmlog(RPMLOG_ERR, _("reflink: failed to seek for tail %p\n"),
+	       state->fd);
+	return RPMRC_FAIL;
+    }
+    rpm_loff_t table_start;
+    len = sizeof(table_start);
+    if (Fread(&table_start, len, 1, state->fd) != len) {
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read table_start\n"));
+	return RPMRC_FAIL;
+    }
+    if (Fseek(state->fd, table_start, SEEK_SET) < 0) {
+	rpmlog(RPMLOG_ERR, _("reflink: unable to seek to table_start\n"));
+	return RPMRC_FAIL;
+    }
+    len = sizeof(state->keys);
+    if (Fread(&state->keys, len, 1, state->fd) != len) {
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read number of keys\n"));
+	return RPMRC_FAIL;
+    }
+    len = sizeof(state->keysize);
+    if (Fread(&state->keysize, len, 1, state->fd) != len) {
+	rpmlog(RPMLOG_ERR, _("reflink: unable to read keysize\n"));
+	return RPMRC_FAIL;
+    }
+    rpmlog(
+	RPMLOG_DEBUG,
+	_("reflink: table_start=0x%lx, keys=%d, keysize=%d\n"),
+	table_start, state->keys, state->keysize
+    );
+    /* now get digest table if there is a reason to have one. */
+    if (state->keys == 0 || state->keysize == 0) {
+	/* no files (or no digests(!)) */
+	state->table = NULL;
+    } else {
+	int table_size = state->keys * (state->keysize + sizeof(rpm_loff_t));
+	state->table = rcalloc(1, table_size);
+	if (Fread(state->table, table_size, 1, state->fd) != table_size) {
+	    rpmlog(RPMLOG_ERR, _("reflink: unable to read table\n"));
+	    return RPMRC_FAIL;
+	}
+	state->inodeIndexes = inodeIndexHashCreate(
+	    state->keys, inodeId, inodeCmp, NULL, NULL
+	);
+    }
+
+    /* Seek back to original location.
+     * Might not be needed if we seek to offset immediately
+     */
+    if (Fseek(state->fd, current, SEEK_SET) < 0) {
+	rpmlog(RPMLOG_ERR,
+	       _("reflink: unable to seek back to original location\n"));
+	return RPMRC_FAIL;
+    }
+    return RPMRC_OK;
+}
+
+static rpmRC reflink_psm_post(rpmPlugin plugin, rpmte te, int res)
+{
+    reflink_state state = rpmPluginGetData(plugin);
+    state->files = rpmfilesFree(state->files);
+    if (state->table) {
+	free(state->table);
+	state->table = NULL;
+    }
+    if (state->inodeIndexes) {
+	inodeIndexHashFree(state->inodeIndexes);
+	state->inodeIndexes = NULL;
+    }
+    return RPMRC_OK;
+}
+
+
+/* have a prototype, warnings system */
+rpm_loff_t find(const unsigned char *digest, reflink_state state);
+
+rpm_loff_t find(const unsigned char *digest, reflink_state state) {
+# if defined(__GNUC__)
+    /* GCC nested function because bsearch's comparison function can't access
+     * state-keysize otherwise
+     */
+    int cmpdigest(const void *k1, const void *k2) {
+	rpmlog(RPMLOG_DEBUG, _("reflink: cmpdigest k1=%p k2=%p\n"), k1, k2);
+	return memcmp(k1, k2, state->keysize);
+    }
+# endif
+    rpmlog(RPMLOG_DEBUG,
+	   _("reflink: bsearch(key=%p, base=%p, nmemb=%d, size=%lu)\n"),
+	   digest, state->table, state->keys,
+	   state->keysize + sizeof(rpm_loff_t));
+    char *entry = bsearch(digest, state->table, state->keys,
+			  state->keysize + sizeof(rpm_loff_t), cmpdigest);
+    if (entry == NULL) {
+	return NOT_FOUND;
+    }
+    rpm_loff_t offset = *(rpm_loff_t *)(entry + state->keysize);
+    return offset;
+}
+
+static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path,
+                                  mode_t file_mode, rpmFsmOp op)
+{
+    struct file_clone_range fcr;
+    rpm_loff_t size;
+    int dst, rc;
+    int *hlix;
+
+    reflink_state state = rpmPluginGetData(plugin);
+    if (state->table == NULL) {
+	/* no table means rpm is not in reflink format, so leave. Now. */
+	return RPMRC_OK;
+    }
+    if (op == FA_TOUCH) {
+	/* we're not overwriting an existing file. */
+	return RPMRC_OK;
+    }
+    fcr.dest_offset = 0;
+    if (S_ISREG(file_mode) && !(rpmfiFFlags(fi) & RPMFILE_GHOST)) {
+	rpm_ino_t inode = rpmfiFInode(fi);
+	/* check for hard link entry in table. GetEntry overwrites hlix with
+	 * the address of the first match.
+	 */
+	if (inodeIndexHashGetEntry(state->inodeIndexes, inode, &hlix, NULL,
+	                           NULL)) {
+	    /* entry is in table, use hard link */
+	    char *fn = rpmfilesFN(state->files, hlix[0]);
+	    if (link(fn, path) != 0) {
+		rpmlog(RPMLOG_ERR,
+		       _("reflink: Unable to hard link %s -> %s due to %s\n"),
+		       fn, path, strerror(errno));
+		free(fn);
+		return RPMRC_FAIL;
+	    }
+	    free(fn);
+	    return RPMRC_PLUGIN_CONTENTS;
+	}
+	/* if we didn't hard link, then we'll track this inode as being
+	 * created soon
+	 */
+	if (rpmfiFNlink(fi) > 1) {
+	    /* minor optimization: only store files with more than one link */
+	    inodeIndexHashAddEntry(state->inodeIndexes, inode, rpmfiFX(fi));
+	}
+	/* derived from wfd_open in fsm.c */
+	mode_t old_umask = umask(0577);
+	dst = open(path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
+	umask(old_umask);
+	if (dst == -1) {
+	    rpmlog(RPMLOG_ERR,
+		   _("reflink: Unable to open %s for writing due to %s, flags = %x\n"),
+		   path, strerror(errno), rpmfiFFlags(fi));
+	    return RPMRC_FAIL;
+	}
+	size = rpmfiFSize(fi);
+	if (size > 0) {
+	    /* round src_length down to fundamental_block_size multiple */
+	    fcr.src_length = size / state->fundamental_block_size * state->fundamental_block_size;
+	    if ((size % state->fundamental_block_size) > 0) {
+		/* round up to next fundamental_block_size. We expect the data
+		 * in the rpm to be similarly padded.
+		 */
+		fcr.src_length += state->fundamental_block_size;
+	    }
+	    fcr.src_fd = Fileno(state->fd);
+	    if (fcr.src_fd == -1) {
+		close(dst);
+		rpmlog(RPMLOG_ERR, _("reflink: src fd lookup failed\n"));
+		return RPMRC_FAIL;
+	    }
+	    fcr.src_offset = find(rpmfiFDigest(fi, NULL, NULL), state);
+	    if (fcr.src_offset == NOT_FOUND) {
+		close(dst);
+		rpmlog(RPMLOG_ERR, _("reflink: digest not found\n"));
+		return RPMRC_FAIL;
+	    }
+	    rpmlog(RPMLOG_DEBUG,
+	           _("reflink: Reflinking %llu bytes at %llu to %s orig size=%ld, file=%lld\n"),
+		   fcr.src_length, fcr.src_offset, path, size, fcr.src_fd);
+	    rc = ioctl(dst, FICLONERANGE, &fcr);
+	    if (rc) {
+		rpmlog(RPMLOG_WARNING,
+		       _("reflink: falling back to copying bits for %s due to %d, %d = %s\n"),
+		       path, rc, errno, strerror(errno));
+		if (Fseek(state->fd, fcr.src_offset, SEEK_SET) < 0) {
+		    close(dst);
+		    rpmlog(RPMLOG_ERR,
+			   _("reflink: unable to seek on copying bits\n"));
+		    return RPMRC_FAIL;
+		}
+		rpm_loff_t left = size;
+		size_t len, read, written;
+		while (left) {
+		    len = (left > BUFFER_SIZE ? BUFFER_SIZE : left);
+		    read = Fread(state->buffer, len, 1, state->fd);
+		    if (read != len) {
+			close(dst);
+			rpmlog(RPMLOG_ERR,
+			       _("reflink: short read on copying bits\n"));
+			return RPMRC_FAIL;
+		    }
+		    written = write(dst, state->buffer, len);
+		    if (read != written) {
+			close(dst);
+			rpmlog(RPMLOG_ERR,
+			       _("reflink: short write on copying bits\n"));
+			return RPMRC_FAIL;
+		    }
+		    left -= len;
+		}
+	    } else {
+		/* reflink worked, so truncate */
+		rc = ftruncate(dst, size);
+		if (rc) {
+		    rpmlog(RPMLOG_ERR,
+			   _("reflink: Unable to truncate %s to %ld due to %s\n"),
+			   path, size, strerror(errno));
+		     return RPMRC_FAIL;
+		}
+	    }
+	}
+	close(dst);
+	return RPMRC_PLUGIN_CONTENTS;
+    }
+    return RPMRC_OK;
+}
+
+struct rpmPluginHooks_s reflink_hooks = {
+    .init = reflink_init,
+    .cleanup = reflink_cleanup,
+    .psm_pre = reflink_psm_pre,
+    .psm_post = reflink_psm_post,
+    .fsm_file_pre = reflink_fsm_file_pre,
+};
diff --git a/rpm2extents.c b/rpm2extents.c
new file mode 100644
index 000000000..c111be0a2
--- /dev/null
+++ b/rpm2extents.c
@@ -0,0 +1,433 @@
+/* rpm2extents: convert payload to inline extents */
+
+#include "system.h"
+
+#include <rpm/rpmlib.h>		/* rpmReadPackageFile .. */
+#include <rpm/rpmfi.h>
+#include <rpm/rpmtag.h>
+#include <rpm/rpmio.h>
+#include <rpm/rpmpgp.h>
+
+#include <rpm/rpmts.h>
+#include "lib/rpmlead.h"
+#include "lib/signature.h"
+#include "lib/header_internal.h"
+#include "rpmio/rpmio_internal.h"
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+
+#include "debug.h"
+
+/* hash of void * (pointers) to file digests to offsets within output.
+ * The length of the key depends on what the FILEDIGESTALGO is.
+ */
+#undef HASHTYPE
+#undef HTKEYTYPE
+#undef HTDATATYPE
+#define HASHTYPE digestSet
+#define HTKEYTYPE const unsigned char *
+#include "lib/rpmhash.H"
+#include "lib/rpmhash.C"
+
+/* magic value at end of file (64 bits) that indicates this is a transcoded
+ * rpm.
+ */
+#define MAGIC 3472329499408095051
+
+struct digestoffset {
+    const unsigned char * digest;
+    rpm_loff_t pos;
+};
+
+rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit);
+
+rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit)
+{
+    return (unit - (pos % unit)) % unit;
+}
+
+static int digestor(
+    FD_t fdi,
+    FD_t fdo,
+    FD_t validationo,
+    uint8_t algos[],
+    uint32_t algos_len
+)
+{
+    ssize_t fdilength;
+    const char *filedigest, *algo_name;
+    size_t filedigest_len, len;
+    uint32_t algo_name_len, algo_digest_len;
+    int algo;
+    rpmRC rc = RPMRC_FAIL;
+
+    for (algo = 0; algo < algos_len; algo++) {
+	fdInitDigest(fdi, algos[algo], 0);
+    }
+    fdilength = ufdCopy(fdi, fdo);
+    if (fdilength == -1) {
+	fprintf(stderr, _("digest cat failed\n"));
+	goto exit;
+    }
+
+    len = sizeof(fdilength);
+    if (Fwrite(&fdilength, len, 1, validationo) != len) {
+	fprintf(stderr, _("Unable to write input length %zd\n"), fdilength);
+	goto exit;
+    }
+    len = sizeof(algos_len);
+    if (Fwrite(&algos_len, len, 1, validationo) != len) {
+	fprintf(stderr, _("Unable to write number of validation digests\n"));
+	goto exit;
+    }
+    for (algo = 0; algo < algos_len; algo++) {
+	fdFiniDigest(fdi, algos[algo], (void **)&filedigest, &filedigest_len, 0);
+
+	algo_name = pgpValString(PGPVAL_HASHALGO, algos[algo]);
+	algo_name_len = (uint32_t)strlen(algo_name);
+	algo_digest_len = (uint32_t)filedigest_len;
+
+	len = sizeof(algo_name_len);
+	if (Fwrite(&algo_name_len, len, 1, validationo) != len) {
+	    fprintf(stderr,
+		    _("Unable to write validation algo name length\n"));
+	    goto exit;
+	}
+	len = sizeof(algo_digest_len);
+	if (Fwrite(&algo_digest_len, len, 1, validationo) != len) {
+	    fprintf(stderr,
+		    _("Unable to write number of bytes for validation digest\n"));
+	     goto exit;
+	}
+	if (Fwrite(algo_name, algo_name_len, 1, validationo) != algo_name_len) {
+	    fprintf(stderr, _("Unable to write validation algo name\n"));
+	    goto exit;
+	}
+	if (Fwrite(filedigest, algo_digest_len, 1, validationo ) != algo_digest_len) {
+	    fprintf(stderr,
+		    _("Unable to write validation digest value %u, %zu\n"),
+		    algo_digest_len, filedigest_len);
+	    goto exit;
+	}
+    }
+    rc = RPMRC_OK;
+exit:
+    return rc;
+}
+
+static rpmRC process_package(FD_t fdi, FD_t validationi)
+{
+    uint32_t diglen;
+    /* GNU C extension: can use diglen from outer context */
+    int digestSetCmp(const unsigned char * a, const unsigned char * b) {
+	return memcmp(a, b, diglen);
+    }
+
+    unsigned int digestSetHash(const unsigned char * digest) {
+        /* assumes sizeof(unsigned int) < diglen */
+        return *(unsigned int *)digest;
+    }
+
+    int digestoffsetCmp(const void * a, const void * b) {
+	return digestSetCmp(
+	    ((struct digestoffset *)a)->digest,
+	    ((struct digestoffset *)b)->digest
+	);
+    }
+
+    FD_t fdo;
+    FD_t gzdi;
+    Header h, sigh;
+    long fundamental_block_size = sysconf(_SC_PAGESIZE);
+    rpmRC rc = RPMRC_OK;
+    rpm_mode_t mode;
+    char *rpmio_flags = NULL, *zeros;
+    const unsigned char *digest;
+    rpm_loff_t pos, size, pad, validation_pos;
+    uint32_t offset_ix = 0;
+    size_t len;
+    int next = 0;
+
+    fdo = fdDup(STDOUT_FILENO);
+
+    if (rpmReadPackageRaw(fdi, &sigh, &h)) {
+	fprintf(stderr, _("Error reading package\n"));
+	exit(EXIT_FAILURE);
+    }
+
+    if (rpmLeadWrite(fdo, h))
+    {
+	fprintf(stderr, _("Unable to write package lead: %s\n"),
+		Fstrerror(fdo));
+	exit(EXIT_FAILURE);
+    }
+
+    if (rpmWriteSignature(fdo, sigh)) {
+	fprintf(stderr, _("Unable to write signature: %s\n"), Fstrerror(fdo));
+	exit(EXIT_FAILURE);
+    }
+
+    if (headerWrite(fdo, h, HEADER_MAGIC_YES)) {
+	fprintf(stderr, _("Unable to write headers: %s\n"), Fstrerror(fdo));
+	exit(EXIT_FAILURE);
+    }
+
+    /* Retrieve payload size and compression type. */
+    {
+	const char *compr = headerGetString(h, RPMTAG_PAYLOADCOMPRESSOR);
+	rpmio_flags = rstrscat(NULL, "r.", compr ? compr : "gzip", NULL);
+    }
+
+    gzdi = Fdopen(fdi, rpmio_flags);	/* XXX gzdi == fdi */
+    free(rpmio_flags);
+
+    if (gzdi == NULL) {
+	fprintf(stderr, _("cannot re-open payload: %s\n"), Fstrerror(gzdi));
+	exit(EXIT_FAILURE);
+    }
+
+    rpmfiles files = rpmfilesNew(NULL, h, 0, RPMFI_KEEPHEADER);
+    rpmfi fi = rpmfiNewArchiveReader(gzdi, files,
+				     RPMFI_ITER_READ_ARCHIVE_CONTENT_FIRST);
+
+    /* this is encoded in the file format, so needs to be fixed size (for
+     * now?)
+     */
+    diglen = (uint32_t)rpmDigestLength(rpmfiDigestAlgo(fi));
+    digestSet ds = digestSetCreate(rpmfiFC(fi), digestSetHash, digestSetCmp,
+				   NULL);
+    struct digestoffset offsets[rpmfiFC(fi)];
+    pos = RPMLEAD_SIZE + headerSizeof(sigh, HEADER_MAGIC_YES);
+
+    /* main headers are aligned to 8 byte boundry */
+    pos += pad_to(pos, 8);
+    pos += headerSizeof(h, HEADER_MAGIC_YES);
+
+    zeros = xcalloc(fundamental_block_size, 1);
+
+    while (next >= 0) {
+	next = rpmfiNext(fi);
+	if (next == RPMERR_ITER_END) {
+	    rc = RPMRC_OK;
+	    break;
+	}
+	mode = rpmfiFMode(fi);
+	if (!S_ISREG(mode) || !rpmfiArchiveHasContent(fi)) {
+	    /* not a regular file, or the archive doesn't contain any content
+	     * for this entry.
+	    */
+	    continue;
+	}
+	digest = rpmfiFDigest(fi, NULL, NULL);
+	if (digestSetGetEntry(ds, digest, NULL)) {
+	    /* This specific digest has already been included, so skip it. */
+	    continue;
+	}
+	pad = pad_to(pos, fundamental_block_size);
+	if (Fwrite(zeros, sizeof(char), pad, fdo) != pad) {
+	    fprintf(stderr, _("Unable to write padding\n"));
+	    rc = RPMRC_FAIL;
+	    goto exit;
+	}
+	/* round up to next fundamental_block_size */
+	pos += pad;
+	digestSetAddEntry(ds, digest);
+	offsets[offset_ix].digest = digest;
+	offsets[offset_ix].pos = pos;
+	offset_ix++;
+	size = rpmfiFSize(fi);
+	rc = rpmfiArchiveReadToFile(fi, fdo, 0);
+	if (rc != RPMRC_OK) {
+	    fprintf(stderr, _("rpmfiArchiveReadToFile failed with %d\n"), rc);
+	    goto exit;
+	}
+	pos += size;
+    }
+    Fclose(gzdi);	/* XXX gzdi == fdi */
+
+    qsort(offsets, (size_t)offset_ix, sizeof(struct digestoffset),
+	  digestoffsetCmp);
+
+    len = sizeof(offset_ix);
+    if (Fwrite(&offset_ix, len, 1, fdo) != len) {
+	fprintf(stderr, _("Unable to write length of table\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+    len = sizeof(diglen);
+    if (Fwrite(&diglen, len, 1, fdo) != len) {
+	fprintf(stderr, _("Unable to write length of digest\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+    len = sizeof(rpm_loff_t);
+    for (int x = 0; x < offset_ix; x++) {
+	if (Fwrite(offsets[x].digest, diglen, 1, fdo) != diglen) {
+	    fprintf(stderr, _("Unable to write digest\n"));
+	    rc = RPMRC_FAIL;
+	    goto exit;
+	}
+	if (Fwrite(&offsets[x].pos, len, 1, fdo) != len) {
+	    fprintf(stderr, _("Unable to write offset\n"));
+	    rc = RPMRC_FAIL;
+	    goto exit;
+	}
+    }
+    validation_pos = (
+	pos + sizeof(offset_ix) + sizeof(diglen) +
+	offset_ix * (diglen + sizeof(rpm_loff_t))
+    );
+
+    ssize_t validation_len = ufdCopy(validationi, fdo);
+    if (validation_len == -1) {
+	fprintf(stderr, _("digest table ufdCopy failed\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+    /* add more padding so the last file can be cloned. It doesn't matter that
+     * the table and validation etc are in this space. In fact, it's pretty
+     * efficient if it is.
+    */
+
+    pad = pad_to((validation_pos + validation_len + 2 * sizeof(rpm_loff_t) +
+		 sizeof(uint64_t)), fundamental_block_size);
+    if (Fwrite(zeros, sizeof(char), pad, fdo) != pad) {
+	fprintf(stderr, _("Unable to write final padding\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+    zeros = _free(zeros);
+    if (Fwrite(&pos, len, 1, fdo) != len) {
+	fprintf(stderr, _("Unable to write offset of digest table\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+    if (Fwrite(&validation_pos, len, 1, fdo) != len) {
+	fprintf(stderr, _("Unable to write offset of validation table\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+    uint64_t magic = MAGIC;
+    len = sizeof(magic);
+    if (Fwrite(&magic, len, 1, fdo) != len) {
+	fprintf(stderr, _("Unable to write magic\n"));
+	rc = RPMRC_FAIL;
+	goto exit;
+    }
+
+exit:
+    rpmfilesFree(files);
+    rpmfiFree(fi);
+    headerFree(h);
+    return rc;
+}
+
+int main(int argc, char *argv[]) {
+    rpmRC rc;
+    int cprc = 0;
+    uint8_t algos[argc - 1];
+    int mainpipefd[2];
+    int metapipefd[2];
+    pid_t cpid, w;
+    int wstatus;
+
+    xsetprogname(argv[0]);	/* Portability call -- see system.h */
+    rpmReadConfigFiles(NULL, NULL);
+
+    if (argc > 1 && (rstreq(argv[1], "-h") || rstreq(argv[1], "--help"))) {
+	fprintf(stderr, _("Usage: %s [DIGESTALGO]...\n"), argv[0]);
+	exit(EXIT_FAILURE);
+    }
+
+    if (argc == 1) {
+	fprintf(stderr,
+		_("Need at least one DIGESTALGO parameter, e.g. 'SHA256'\n"));
+	exit(EXIT_FAILURE);
+    }
+
+    for (int x = 0; x < (argc - 1); x++) {
+	if (pgpStringVal(PGPVAL_HASHALGO, argv[x + 1], &algos[x]) != 0)
+	{
+	    fprintf(stderr,
+		    _("Unable to resolve '%s' as a digest algorithm, exiting\n"),
+		    argv[x + 1]);
+	    exit(EXIT_FAILURE);
+	}
+    }
+
+
+    if (pipe(mainpipefd) == -1) {
+	fprintf(stderr, _("Main pipe failure\n"));
+	exit(EXIT_FAILURE);
+    }
+    if (pipe(metapipefd) == -1) {
+	fprintf(stderr, _("Meta pipe failure\n"));
+	exit(EXIT_FAILURE);
+    }
+    cpid = fork();
+    if (cpid == 0) {
+	/* child: digestor */
+	close(mainpipefd[0]);
+	close(metapipefd[0]);
+	FD_t fdi = fdDup(STDIN_FILENO);
+	FD_t fdo = fdDup(mainpipefd[1]);
+	FD_t validationo = fdDup(metapipefd[1]);
+	rc = digestor(fdi, fdo, validationo, algos, argc - 1);
+	Fclose(validationo);
+	Fclose(fdo);
+	Fclose(fdi);
+    } else {
+	/* parent: main program */
+	close(mainpipefd[1]);
+	close(metapipefd[1]);
+	FD_t fdi = fdDup(mainpipefd[0]);
+	FD_t validationi = fdDup(metapipefd[0]);
+	rc = process_package(fdi, validationi);
+	Fclose(validationi);
+	/* fdi is normally closed through the stacked file gzdi in the
+	 * function.
+	 * Wait for child process (digestor for stdin) to complete.
+	 */
+	if (rc != RPMRC_OK) {
+	    if (kill(cpid, SIGTERM) != 0) {
+		fprintf(stderr,
+		        _("Failed to kill digest process when main process failed: %s\n"),
+			strerror(errno));
+	    }
+	}
+	w = waitpid(cpid, &wstatus, 0);
+	if (w == -1) {
+	    fprintf(stderr, _("waitpid failed\n"));
+	    cprc = EXIT_FAILURE;
+	} else if (WIFEXITED(wstatus)) {
+	    cprc = WEXITSTATUS(wstatus);
+	    if (cprc != 0) {
+		fprintf(stderr,
+			_("Digest process non-zero exit code %d\n"),
+			cprc);
+	    }
+	} else if (WIFSIGNALED(wstatus)) {
+	    fprintf(stderr,
+		    _("Digest process was terminated with a signal: %d\n"),
+		    WTERMSIG(wstatus));
+	    cprc = EXIT_FAILURE;
+	} else {
+	    /* Don't think this can happen, but covering all bases */
+	    fprintf(stderr, _("Unhandled circumstance in waitpid\n"));
+	    cprc = EXIT_FAILURE;
+	}
+	if (cprc != EXIT_SUCCESS) {
+	    rc = RPMRC_FAIL;
+	}
+    }
+    if (rc != RPMRC_OK) {
+	/* translate rpmRC into generic failure return code. */
+	return EXIT_FAILURE;
+    }
+    return EXIT_SUCCESS;
+}
diff --git a/rpmio/rpmpgp.c b/rpmio/rpmpgp.c
index 015c15a5c..7b972b4a6 100644
--- a/rpmio/rpmpgp.c
+++ b/rpmio/rpmpgp.c
@@ -283,6 +283,16 @@ int pgpValTok(pgpValTbl vs, const char * s, const char * se)
     return vs->val;
 }
 
+int pgpStringVal(pgpValType type, const char *str, uint8_t *val)
+{
+    pgpValTbl tbl = pgpValTable(type);
+    if (tbl == NULL) return -1;
+    int v = pgpValTok(tbl, str, str + strlen(str));
+    if (v == -1) return -1;
+    *val = (uint8_t)v;
+    return 0;
+}
+
 /** \ingroup rpmpgp
  * Decode length from 1, 2, or 5 octet body length encoding, used in
  * new format packet headers and V4 signature subpackets.
diff --git a/rpmio/rpmpgp.h b/rpmio/rpmpgp.h
index c53e29b01..2b57318ba 100644
--- a/rpmio/rpmpgp.h
+++ b/rpmio/rpmpgp.h
@@ -973,6 +973,15 @@ typedef rpmFlags rpmDigestFlags;
  */
 const char * pgpValString(pgpValType type, uint8_t val);
 
+/** \ingroup rpmpgp
+ * Return  OpenPGP value for a string.
+ * @param type		type of value
+ * @param str		string to lookup
+ * @param[out] val  byte value associated with string
+ * @return		0 on success else -1
+ */
+int pgpStringVal(pgpValType type, const char *str, uint8_t *val);
+
 /** \ingroup rpmpgp
  * Return (native-endian) integer from big-endian representation.
  * @param s		pointer to big-endian integer