From 8b6ae95abeb85a5c529d52c430d80f6bb9fe878c Mon Sep 17 00:00:00 2001 From: chantra Date: Mar 01 2022 18:12:19 +0000 Subject: PR1470 (RPM CoW) on top of 1534. https://github.com/chantra/rpm/compare/master...cow.diff --- diff --git a/master...cow.diff b/master...cow.diff new file mode 100644 index 0000000..0e4160c --- /dev/null +++ b/master...cow.diff @@ -0,0 +1,1176 @@ +diff --git a/Makefile.am b/Makefile.am +index e5c75d7b4..288668819 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -99,7 +99,7 @@ pkginclude_HEADERS += build/rpmfc.h + pkginclude_HEADERS += build/rpmspec.h + + +-bin_PROGRAMS = rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec ++bin_PROGRAMS = rpm rpm2cpio rpmbuild rpmdb rpmkeys rpmsign rpmspec rpm2extents + if WITH_ARCHIVE + bin_PROGRAMS += rpm2archive + endif +@@ -154,6 +154,10 @@ rpm2cpio_SOURCES = rpm2cpio.c debug.h system.h + rpm2cpio_LDADD = lib/librpm.la rpmio/librpmio.la + rpm2cpio_LDADD += @WITH_POPT_LIB@ + ++rpm2extents_SOURCES = rpm2extents.c debug.h system.h ++rpm2extents_LDADD = lib/librpm.la rpmio/librpmio.la ++rpm2extents_LDADD += @WITH_POPT_LIB@ ++ + rpm2archive_SOURCES = rpm2archive.c debug.h system.h + rpm2archive_LDADD = lib/librpm.la rpmio/librpmio.la + rpm2archive_LDADD += @WITH_POPT_LIB@ @WITH_ARCHIVE_LIB@ +diff --git a/lib/depends.c b/lib/depends.c +index 30234df3d..8998afcd3 100644 +--- a/lib/depends.c ++++ b/lib/depends.c +@@ -81,6 +81,8 @@ static rpmRC headerCheckPayloadFormat(Header h) { + */ + if (!payloadfmt) return rc; + ++ if (rstreq(payloadfmt, "clon")) return rc; ++ + if (!rstreq(payloadfmt, "cpio")) { + char *nevra = headerGetAsString(h, RPMTAG_NEVRA); + if (payloadfmt && rstreq(payloadfmt, "drpm")) { +diff --git a/lib/fsm.c b/lib/fsm.c +index 935a0a5c6..feda3750c 100644 +--- a/lib/fsm.c ++++ b/lib/fsm.c +@@ -19,6 +19,7 @@ + + #include "rpmio/rpmio_internal.h" /* fdInit/FiniDigest */ + #include "lib/fsm.h" ++#include "lib/rpmlib.h" + #include "lib/rpmte_internal.h" /* XXX rpmfs */ + #include "lib/rpmplugins.h" /* rpm plugins hooks */ + #include "lib/rpmug.h" +@@ -52,6 +53,7 @@ struct filedata_s { + int stage; + int setmeta; + int skip; ++ int plugin_contents; + rpmFileAction action; + const char *suffix; + char *fpath; +@@ -891,6 +893,14 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files, + struct filedata_s *fdata = xcalloc(fc, sizeof(*fdata)); + struct filedata_s *firstlink = NULL; + ++ Header h = rpmteHeader(te); ++ const char *payloadfmt = headerGetString(h, RPMTAG_PAYLOADFORMAT); ++ int cpio = 1; ++ ++ if (payloadfmt && rstreq(payloadfmt, "clon")) { ++ cpio = 0; ++ } ++ + /* transaction id used for temporary path suffix while installing */ + rasprintf(&tid, ";%08x", (unsigned)rpmtsGetTid(ts)); + +@@ -911,12 +921,23 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files, + /* Remap file perms, owner, and group. */ + rc = rpmfiStat(fi, 1, &fp->sb); + +- setFileState(fs, fx); + fsmDebug(fp->fpath, fp->action, &fp->sb); + + /* Run fsm file pre hook for all plugins */ + rc = rpmpluginsCallFsmFilePre(plugins, fi, fp->fpath, + fp->sb.st_mode, fp->action); ++ fp->plugin_contents = 0; ++ switch (rc) { ++ case RPMRC_OK: ++ setFileState(fs, fx); ++ break; ++ case RPMRC_PLUGIN_CONTENTS: ++ fp->plugin_contents = 1; ++ // reduce reads on cpio to this value. Could be zero if ++ // this is from a hard link. ++ rc = RPMRC_OK; ++ break; ++ } + fp->stage = FILE_PRE; + } + fi = rpmfiFree(fi); +@@ -924,10 +945,14 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files, + if (rc) + goto exit; + +- fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE); +- if (fi == NULL) { +- rc = RPMERR_BAD_MAGIC; +- goto exit; ++ if (cpio) { ++ fi = rpmfiNewArchiveReader(payload, files, RPMFI_ITER_READ_ARCHIVE); ++ if (fi == NULL) { ++ rc = RPMERR_BAD_MAGIC; ++ goto exit; ++ } ++ } else { ++ fi = rpmfilesIter(files, RPMFI_ITER_FWD); + } + + /* Detect and create directories not explicitly in package. */ +@@ -969,8 +994,12 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files, + + if (S_ISREG(fp->sb.st_mode)) { + if (rc == RPMERR_ENOENT) { +- rc = fsmMkfile(fi, fp, files, psm, nodigest, +- &firstlink, &firstlinkfile); ++ if(fp->plugin_contents) { ++ rc = RPMRC_OK; ++ }else { ++ rc = fsmMkfile(fi, fp, files, psm, nodigest, ++ &firstlink, &firstlinkfile); ++ } + } + } else if (S_ISDIR(fp->sb.st_mode)) { + if (rc == RPMERR_ENOENT) { +@@ -1078,6 +1107,7 @@ int rpmPackageFilesInstall(rpmts ts, rpmte te, rpmfiles files, + rpmswAdd(rpmtsOp(ts, RPMTS_OP_DIGEST), fdOp(payload, FDSTAT_DIGEST)); + + exit: ++ h = headerFree(h); + fi = rpmfiFree(fi); + Fclose(payload); + free(tid); +diff --git a/lib/package.c b/lib/package.c +index 281275029..90bd0d8a7 100644 +--- a/lib/package.c ++++ b/lib/package.c +@@ -404,5 +404,45 @@ rpmRC rpmReadPackageFile(rpmts ts, FD_t fd, const char * fn, Header * hdrp) + return rc; + } + ++rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp) ++{ ++ char *msg = NULL; ++ hdrblob sigblob = hdrblobCreate(); ++ hdrblob blob = hdrblobCreate(); ++ Header h = NULL; ++ Header sigh = NULL; ++ ++ rpmRC rc = rpmLeadRead(fd, &msg); ++ if (rc != RPMRC_OK) ++ goto exit; ++ ++ rc = hdrblobRead(fd, 1, 0, RPMTAG_HEADERSIGNATURES, sigblob, &msg); ++ if (rc != RPMRC_OK) ++ goto exit; ++ ++ rc = hdrblobRead(fd, 1, 1, RPMTAG_HEADERIMMUTABLE, blob, &msg); ++ if (rc != RPMRC_OK) ++ goto exit; ++ ++ rc = hdrblobImport(sigblob, 0, &sigh, &msg); ++ if (rc) ++ goto exit; + ++ rc = hdrblobImport(blob, 0, &h, &msg); ++ if (rc) ++ goto exit; + ++ *sigp = headerLink(sigh); ++ *hdrp = headerLink(h); ++ ++exit: ++ if (rc != RPMRC_OK && msg) ++ rpmlog(RPMLOG_ERR, "%s: %s\n", Fdescr(fd), msg); ++ hdrblobFree(sigblob); ++ hdrblobFree(blob); ++ headerFree(sigh); ++ headerFree(h); ++ free(msg); ++ ++ return rc; ++} +diff --git a/lib/rpmlib.h b/lib/rpmlib.h +index 0879d04e5..a09ba0daf 100644 +--- a/lib/rpmlib.h ++++ b/lib/rpmlib.h +@@ -155,6 +155,15 @@ rpmRC rpmReadHeader(rpmts ts, FD_t fd, Header *hdrp, char ** msg); + rpmRC rpmReadPackageFile(rpmts ts, FD_t fd, + const char * fn, Header * hdrp); + ++/** \ingroup header ++ * Return package signature, header from file handle, no verification. ++ * @param fd file handle ++ * @param[out] sigp address of header (or NULL) ++ * @param[out] hdrp address of header (or NULL) ++ * @return RPMRC_OK on success ++ */ ++rpmRC rpmReadPackageRaw(FD_t fd, Header * sigp, Header * hdrp); ++ + /** \ingroup rpmtrans + * Install source package. + * @param ts transaction set +diff --git a/lib/rpmplugins.c b/lib/rpmplugins.c +index 62d75c4cf..3da3097af 100644 +--- a/lib/rpmplugins.c ++++ b/lib/rpmplugins.c +@@ -356,13 +356,28 @@ rpmRC rpmpluginsCallFsmFilePre(rpmPlugins plugins, rpmfi fi, const char *path, + plugin_fsm_file_pre_func hookFunc; + int i; + rpmRC rc = RPMRC_OK; ++ rpmRC hook_rc; + + for (i = 0; i < plugins->count; i++) { + rpmPlugin plugin = plugins->plugins[i]; + RPMPLUGINS_SET_HOOK_FUNC(fsm_file_pre); +- if (hookFunc && hookFunc(plugin, fi, path, file_mode, op) == RPMRC_FAIL) { +- rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name); +- rc = RPMRC_FAIL; ++ if (hookFunc) { ++ hook_rc = hookFunc(plugin, fi, path, file_mode, op); ++ if (hook_rc == RPMRC_FAIL) { ++ rpmlog(RPMLOG_ERR, "Plugin %s: hook fsm_file_pre failed\n", plugin->name); ++ rc = RPMRC_FAIL; ++ } else if (hook_rc == RPMRC_PLUGIN_CONTENTS && rc != RPMRC_FAIL) { ++ if (rc == RPMRC_PLUGIN_CONTENTS) { ++ /* Another plugin already said it'd handle contents. It's ++ * undefined how these would combine, so treat this as a ++ * failure condition. ++ */ ++ rc = RPMRC_FAIL; ++ } else { ++ /* Plugin will handle content */ ++ rc = RPMRC_PLUGIN_CONTENTS; ++ } ++ } + } + } + +diff --git a/lib/rpmte.c b/lib/rpmte.c +index 3663604e7..d43dc41ad 100644 +--- a/lib/rpmte.c ++++ b/lib/rpmte.c +@@ -423,6 +423,11 @@ FD_t rpmteSetFd(rpmte te, FD_t fd) + return NULL; + } + ++FD_t rpmteFd(rpmte te) ++{ ++ return (te != NULL ? te->fd : NULL); ++} ++ + fnpyKey rpmteKey(rpmte te) + { + return (te != NULL ? te->key : NULL); +diff --git a/lib/rpmte.h b/lib/rpmte.h +index 81acf7a19..6fc0a9f91 100644 +--- a/lib/rpmte.h ++++ b/lib/rpmte.h +@@ -209,6 +209,8 @@ const char * rpmteNEVR(rpmte te); + */ + const char * rpmteNEVRA(rpmte te); + ++FD_t rpmteFd(rpmte te); ++ + /** \ingroup rpmte + * Retrieve key from transaction element. + * @param te transaction element +diff --git a/lib/rpmtypes.h b/lib/rpmtypes.h +index e8e69b506..af2611e9e 100644 +--- a/lib/rpmtypes.h ++++ b/lib/rpmtypes.h +@@ -106,7 +106,8 @@ typedef enum rpmRC_e { + RPMRC_NOTFOUND = 1, /*!< Generic not found code. */ + RPMRC_FAIL = 2, /*!< Generic failure code. */ + RPMRC_NOTTRUSTED = 3, /*!< Signature is OK, but key is not trusted. */ +- RPMRC_NOKEY = 4 /*!< Public key is unavailable. */ ++ RPMRC_NOKEY = 4, /*!< Public key is unavailable. */ ++ RPMRC_PLUGIN_CONTENTS = 5 /*!< fsm_file_pre plugin is handling content */ + } rpmRC; + + #ifdef __cplusplus +diff --git a/macros.in b/macros.in +index e90cefa9a..363252b0f 100644 +--- a/macros.in ++++ b/macros.in +@@ -1143,6 +1143,7 @@ package or when debugging this package.\ + + # Transaction plugin macros + %__plugindir %{_libdir}/rpm-plugins ++%__transaction_reflink %{__plugindir}/reflink.so + %__transaction_systemd_inhibit %{__plugindir}/systemd_inhibit.so + %__transaction_selinux %{__plugindir}/selinux.so + %__transaction_syslog %{__plugindir}/syslog.so +diff --git a/plugins/Makefile.am b/plugins/Makefile.am +index 3a929d0ce..ad0d3bce7 100644 +--- a/plugins/Makefile.am ++++ b/plugins/Makefile.am +@@ -42,6 +42,10 @@ prioreset_la_SOURCES = prioreset.c + prioreset_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la + plugins_LTLIBRARIES += prioreset.la + ++reflink_la_SOURCES = reflink.c ++reflink_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la ++plugins_LTLIBRARIES += reflink.la ++ + syslog_la_SOURCES = syslog.c + syslog_la_LIBADD = $(top_builddir)/lib/librpm.la $(top_builddir)/rpmio/librpmio.la + plugins_LTLIBRARIES += syslog.la +diff --git a/plugins/reflink.c b/plugins/reflink.c +new file mode 100644 +index 000000000..513887604 +--- /dev/null ++++ b/plugins/reflink.c +@@ -0,0 +1,375 @@ ++#include "system.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#if defined(__linux__) ++#include /* For FICLONE */ ++#endif ++ ++#include ++#include "lib/rpmlib.h" ++#include "lib/rpmplugin.h" ++#include "lib/rpmte_internal.h" ++#include ++#include "rpmio/rpmio_internal.h" ++ ++ ++#include "debug.h" ++ ++#include ++ ++/* use hash table to remember inode -> ix (for rpmfilesFN(ix)) lookups */ ++#undef HASHTYPE ++#undef HTKEYTYPE ++#undef HTDATATYPE ++#define HASHTYPE inodeIndexHash ++#define HTKEYTYPE rpm_ino_t ++#define HTDATATYPE int ++#include "lib/rpmhash.H" ++#include "lib/rpmhash.C" ++ ++/* We use this in find to indicate a key wasn't found. This is an ++ * unrecoverable error, but we can at least show a decent error. 0 is never a ++ * valid offset because it's the offset of the start of the file. ++ */ ++#define NOT_FOUND 0 ++ ++#define BUFFER_SIZE (1024 * 128) ++ ++/* magic value at end of file (64 bits) that indicates this is a transcoded ++ * rpm. ++ */ ++#define MAGIC 3472329499408095051 ++ ++struct reflink_state_s { ++ /* Stuff that's used across rpms */ ++ long fundamental_block_size; ++ char *buffer; ++ ++ /* stuff that's used/updated per psm */ ++ uint32_t keys, keysize; ++ ++ /* table for current rpm, keys * (keysize + sizeof(rpm_loff_t)) */ ++ unsigned char *table; ++ FD_t fd; ++ rpmfiles files; ++ inodeIndexHash inodeIndexes; ++}; ++ ++typedef struct reflink_state_s * reflink_state; ++ ++static int inodeCmp(rpm_ino_t a, rpm_ino_t b) ++{ ++ return (a != b); ++} ++ ++static unsigned int inodeId(rpm_ino_t a) ++{ ++ /* rpm_ino_t is uint32_t so maps safely to unsigned int */ ++ return (unsigned int)a; ++} ++ ++static rpmRC reflink_init(rpmPlugin plugin, rpmts ts) { ++ reflink_state state = rcalloc(1, sizeof(struct reflink_state_s)); ++ ++ /* IOCTL-FICLONERANGE(2): ...Disk filesystems generally require the offset ++ * and length arguments to be aligned to the fundamental block size. ++ * ++ * The value of "fundamental block size" is directly related to the ++ * system's page size, so we should use that. ++ */ ++ state->fundamental_block_size = sysconf(_SC_PAGESIZE); ++ state->buffer = rcalloc(1, BUFFER_SIZE); ++ rpmPluginSetData(plugin, state); ++ ++ return RPMRC_OK; ++} ++ ++static void reflink_cleanup(rpmPlugin plugin) { ++ reflink_state state = rpmPluginGetData(plugin); ++ free(state->buffer); ++ free(state); ++} ++ ++static rpmRC reflink_psm_pre(rpmPlugin plugin, rpmte te) { ++ reflink_state state = rpmPluginGetData(plugin); ++ state->fd = rpmteFd(te); ++ if (state->fd == 0) { ++ rpmlog(RPMLOG_DEBUG, _("reflink: fd = 0, no install\n")); ++ return RPMRC_OK; ++ } ++ rpm_loff_t current = Ftell(state->fd); ++ uint64_t magic; ++ if (Fseek(state->fd, -(sizeof(magic)), SEEK_END) < 0) { ++ rpmlog(RPMLOG_ERR, _("reflink: failed to seek for magic\n")); ++ if (Fseek(state->fd, current, SEEK_SET) < 0) { ++ /* yes this gets a bit repetitive */ ++ rpmlog(RPMLOG_ERR, ++ _("reflink: unable to seek back to original location\n")); ++ } ++ return RPMRC_FAIL; ++ } ++ size_t len = sizeof(magic); ++ if (Fread(&magic, len, 1, state->fd) != len) { ++ rpmlog(RPMLOG_ERR, _("reflink: unable to read magic\n")); ++ if (Fseek(state->fd, current, SEEK_SET) < 0) { ++ rpmlog(RPMLOG_ERR, ++ _("reflink: unable to seek back to original location\n")); ++ } ++ return RPMRC_FAIL; ++ } ++ if (magic != MAGIC) { ++ rpmlog(RPMLOG_DEBUG, _("reflink: not transcoded\n")); ++ if (Fseek(state->fd, current, SEEK_SET) < 0) { ++ rpmlog(RPMLOG_ERR, ++ _("reflink: unable to seek back to original location\n")); ++ return RPMRC_FAIL; ++ } ++ return RPMRC_OK; ++ } ++ rpmlog(RPMLOG_DEBUG, _("reflink: *is* transcoded\n")); ++ Header h = rpmteHeader(te); ++ ++ /* replace/add header that main fsm.c can read */ ++ headerDel(h, RPMTAG_PAYLOADFORMAT); ++ headerPutString(h, RPMTAG_PAYLOADFORMAT, "clon"); ++ headerFree(h); ++ state->files = rpmteFiles(te); ++ /* tail of file contains offset_table, offset_checksums then magic */ ++ if (Fseek(state->fd, -(sizeof(rpm_loff_t) * 2 + sizeof(magic)), SEEK_END) < 0) { ++ rpmlog(RPMLOG_ERR, _("reflink: failed to seek for tail %p\n"), ++ state->fd); ++ return RPMRC_FAIL; ++ } ++ rpm_loff_t table_start; ++ len = sizeof(table_start); ++ if (Fread(&table_start, len, 1, state->fd) != len) { ++ rpmlog(RPMLOG_ERR, _("reflink: unable to read table_start\n")); ++ return RPMRC_FAIL; ++ } ++ if (Fseek(state->fd, table_start, SEEK_SET) < 0) { ++ rpmlog(RPMLOG_ERR, _("reflink: unable to seek to table_start\n")); ++ return RPMRC_FAIL; ++ } ++ len = sizeof(state->keys); ++ if (Fread(&state->keys, len, 1, state->fd) != len) { ++ rpmlog(RPMLOG_ERR, _("reflink: unable to read number of keys\n")); ++ return RPMRC_FAIL; ++ } ++ len = sizeof(state->keysize); ++ if (Fread(&state->keysize, len, 1, state->fd) != len) { ++ rpmlog(RPMLOG_ERR, _("reflink: unable to read keysize\n")); ++ return RPMRC_FAIL; ++ } ++ rpmlog( ++ RPMLOG_DEBUG, ++ _("reflink: table_start=0x%lx, keys=%d, keysize=%d\n"), ++ table_start, state->keys, state->keysize ++ ); ++ /* now get digest table if there is a reason to have one. */ ++ if (state->keys == 0 || state->keysize == 0) { ++ /* no files (or no digests(!)) */ ++ state->table = NULL; ++ } else { ++ int table_size = state->keys * (state->keysize + sizeof(rpm_loff_t)); ++ state->table = rcalloc(1, table_size); ++ if (Fread(state->table, table_size, 1, state->fd) != table_size) { ++ rpmlog(RPMLOG_ERR, _("reflink: unable to read table\n")); ++ return RPMRC_FAIL; ++ } ++ state->inodeIndexes = inodeIndexHashCreate( ++ state->keys, inodeId, inodeCmp, NULL, NULL ++ ); ++ } ++ ++ /* Seek back to original location. ++ * Might not be needed if we seek to offset immediately ++ */ ++ if (Fseek(state->fd, current, SEEK_SET) < 0) { ++ rpmlog(RPMLOG_ERR, ++ _("reflink: unable to seek back to original location\n")); ++ return RPMRC_FAIL; ++ } ++ return RPMRC_OK; ++} ++ ++static rpmRC reflink_psm_post(rpmPlugin plugin, rpmte te, int res) ++{ ++ reflink_state state = rpmPluginGetData(plugin); ++ state->files = rpmfilesFree(state->files); ++ if (state->table) { ++ free(state->table); ++ state->table = NULL; ++ } ++ if (state->inodeIndexes) { ++ inodeIndexHashFree(state->inodeIndexes); ++ state->inodeIndexes = NULL; ++ } ++ return RPMRC_OK; ++} ++ ++ ++/* have a prototype, warnings system */ ++rpm_loff_t find(const unsigned char *digest, reflink_state state); ++ ++rpm_loff_t find(const unsigned char *digest, reflink_state state) { ++# if defined(__GNUC__) ++ /* GCC nested function because bsearch's comparison function can't access ++ * state-keysize otherwise ++ */ ++ int cmpdigest(const void *k1, const void *k2) { ++ rpmlog(RPMLOG_DEBUG, _("reflink: cmpdigest k1=%p k2=%p\n"), k1, k2); ++ return memcmp(k1, k2, state->keysize); ++ } ++# endif ++ rpmlog(RPMLOG_DEBUG, ++ _("reflink: bsearch(key=%p, base=%p, nmemb=%d, size=%lu)\n"), ++ digest, state->table, state->keys, ++ state->keysize + sizeof(rpm_loff_t)); ++ char *entry = bsearch(digest, state->table, state->keys, ++ state->keysize + sizeof(rpm_loff_t), cmpdigest); ++ if (entry == NULL) { ++ return NOT_FOUND; ++ } ++ rpm_loff_t offset = *(rpm_loff_t *)(entry + state->keysize); ++ return offset; ++} ++ ++static rpmRC reflink_fsm_file_pre(rpmPlugin plugin, rpmfi fi, const char* path, ++ mode_t file_mode, rpmFsmOp op) ++{ ++ struct file_clone_range fcr; ++ rpm_loff_t size; ++ int dst, rc; ++ int *hlix; ++ ++ reflink_state state = rpmPluginGetData(plugin); ++ if (state->table == NULL) { ++ /* no table means rpm is not in reflink format, so leave. Now. */ ++ return RPMRC_OK; ++ } ++ if (op == FA_TOUCH) { ++ /* we're not overwriting an existing file. */ ++ return RPMRC_OK; ++ } ++ fcr.dest_offset = 0; ++ if (S_ISREG(file_mode) && !(rpmfiFFlags(fi) & RPMFILE_GHOST)) { ++ rpm_ino_t inode = rpmfiFInode(fi); ++ /* check for hard link entry in table. GetEntry overwrites hlix with ++ * the address of the first match. ++ */ ++ if (inodeIndexHashGetEntry(state->inodeIndexes, inode, &hlix, NULL, ++ NULL)) { ++ /* entry is in table, use hard link */ ++ char *fn = rpmfilesFN(state->files, hlix[0]); ++ if (link(fn, path) != 0) { ++ rpmlog(RPMLOG_ERR, ++ _("reflink: Unable to hard link %s -> %s due to %s\n"), ++ fn, path, strerror(errno)); ++ free(fn); ++ return RPMRC_FAIL; ++ } ++ free(fn); ++ return RPMRC_PLUGIN_CONTENTS; ++ } ++ /* if we didn't hard link, then we'll track this inode as being ++ * created soon ++ */ ++ if (rpmfiFNlink(fi) > 1) { ++ /* minor optimization: only store files with more than one link */ ++ inodeIndexHashAddEntry(state->inodeIndexes, inode, rpmfiFX(fi)); ++ } ++ /* derived from wfd_open in fsm.c */ ++ mode_t old_umask = umask(0577); ++ dst = open(path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR); ++ umask(old_umask); ++ if (dst == -1) { ++ rpmlog(RPMLOG_ERR, ++ _("reflink: Unable to open %s for writing due to %s, flags = %x\n"), ++ path, strerror(errno), rpmfiFFlags(fi)); ++ return RPMRC_FAIL; ++ } ++ size = rpmfiFSize(fi); ++ if (size > 0) { ++ /* round src_length down to fundamental_block_size multiple */ ++ fcr.src_length = size / state->fundamental_block_size * state->fundamental_block_size; ++ if ((size % state->fundamental_block_size) > 0) { ++ /* round up to next fundamental_block_size. We expect the data ++ * in the rpm to be similarly padded. ++ */ ++ fcr.src_length += state->fundamental_block_size; ++ } ++ fcr.src_fd = Fileno(state->fd); ++ if (fcr.src_fd == -1) { ++ close(dst); ++ rpmlog(RPMLOG_ERR, _("reflink: src fd lookup failed\n")); ++ return RPMRC_FAIL; ++ } ++ fcr.src_offset = find(rpmfiFDigest(fi, NULL, NULL), state); ++ if (fcr.src_offset == NOT_FOUND) { ++ close(dst); ++ rpmlog(RPMLOG_ERR, _("reflink: digest not found\n")); ++ return RPMRC_FAIL; ++ } ++ rpmlog(RPMLOG_DEBUG, ++ _("reflink: Reflinking %llu bytes at %llu to %s orig size=%ld, file=%lld\n"), ++ fcr.src_length, fcr.src_offset, path, size, fcr.src_fd); ++ rc = ioctl(dst, FICLONERANGE, &fcr); ++ if (rc) { ++ rpmlog(RPMLOG_WARNING, ++ _("reflink: falling back to copying bits for %s due to %d, %d = %s\n"), ++ path, rc, errno, strerror(errno)); ++ if (Fseek(state->fd, fcr.src_offset, SEEK_SET) < 0) { ++ close(dst); ++ rpmlog(RPMLOG_ERR, ++ _("reflink: unable to seek on copying bits\n")); ++ return RPMRC_FAIL; ++ } ++ rpm_loff_t left = size; ++ size_t len, read, written; ++ while (left) { ++ len = (left > BUFFER_SIZE ? BUFFER_SIZE : left); ++ read = Fread(state->buffer, len, 1, state->fd); ++ if (read != len) { ++ close(dst); ++ rpmlog(RPMLOG_ERR, ++ _("reflink: short read on copying bits\n")); ++ return RPMRC_FAIL; ++ } ++ written = write(dst, state->buffer, len); ++ if (read != written) { ++ close(dst); ++ rpmlog(RPMLOG_ERR, ++ _("reflink: short write on copying bits\n")); ++ return RPMRC_FAIL; ++ } ++ left -= len; ++ } ++ } else { ++ /* reflink worked, so truncate */ ++ rc = ftruncate(dst, size); ++ if (rc) { ++ rpmlog(RPMLOG_ERR, ++ _("reflink: Unable to truncate %s to %ld due to %s\n"), ++ path, size, strerror(errno)); ++ return RPMRC_FAIL; ++ } ++ } ++ } ++ close(dst); ++ return RPMRC_PLUGIN_CONTENTS; ++ } ++ return RPMRC_OK; ++} ++ ++struct rpmPluginHooks_s reflink_hooks = { ++ .init = reflink_init, ++ .cleanup = reflink_cleanup, ++ .psm_pre = reflink_psm_pre, ++ .psm_post = reflink_psm_post, ++ .fsm_file_pre = reflink_fsm_file_pre, ++}; +diff --git a/rpm2extents.c b/rpm2extents.c +new file mode 100644 +index 000000000..c111be0a2 +--- /dev/null ++++ b/rpm2extents.c +@@ -0,0 +1,433 @@ ++/* rpm2extents: convert payload to inline extents */ ++ ++#include "system.h" ++ ++#include /* rpmReadPackageFile .. */ ++#include ++#include ++#include ++#include ++ ++#include ++#include "lib/rpmlead.h" ++#include "lib/signature.h" ++#include "lib/header_internal.h" ++#include "rpmio/rpmio_internal.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "debug.h" ++ ++/* hash of void * (pointers) to file digests to offsets within output. ++ * The length of the key depends on what the FILEDIGESTALGO is. ++ */ ++#undef HASHTYPE ++#undef HTKEYTYPE ++#undef HTDATATYPE ++#define HASHTYPE digestSet ++#define HTKEYTYPE const unsigned char * ++#include "lib/rpmhash.H" ++#include "lib/rpmhash.C" ++ ++/* magic value at end of file (64 bits) that indicates this is a transcoded ++ * rpm. ++ */ ++#define MAGIC 3472329499408095051 ++ ++struct digestoffset { ++ const unsigned char * digest; ++ rpm_loff_t pos; ++}; ++ ++rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit); ++ ++rpm_loff_t pad_to(rpm_loff_t pos, rpm_loff_t unit) ++{ ++ return (unit - (pos % unit)) % unit; ++} ++ ++static int digestor( ++ FD_t fdi, ++ FD_t fdo, ++ FD_t validationo, ++ uint8_t algos[], ++ uint32_t algos_len ++) ++{ ++ ssize_t fdilength; ++ const char *filedigest, *algo_name; ++ size_t filedigest_len, len; ++ uint32_t algo_name_len, algo_digest_len; ++ int algo; ++ rpmRC rc = RPMRC_FAIL; ++ ++ for (algo = 0; algo < algos_len; algo++) { ++ fdInitDigest(fdi, algos[algo], 0); ++ } ++ fdilength = ufdCopy(fdi, fdo); ++ if (fdilength == -1) { ++ fprintf(stderr, _("digest cat failed\n")); ++ goto exit; ++ } ++ ++ len = sizeof(fdilength); ++ if (Fwrite(&fdilength, len, 1, validationo) != len) { ++ fprintf(stderr, _("Unable to write input length %zd\n"), fdilength); ++ goto exit; ++ } ++ len = sizeof(algos_len); ++ if (Fwrite(&algos_len, len, 1, validationo) != len) { ++ fprintf(stderr, _("Unable to write number of validation digests\n")); ++ goto exit; ++ } ++ for (algo = 0; algo < algos_len; algo++) { ++ fdFiniDigest(fdi, algos[algo], (void **)&filedigest, &filedigest_len, 0); ++ ++ algo_name = pgpValString(PGPVAL_HASHALGO, algos[algo]); ++ algo_name_len = (uint32_t)strlen(algo_name); ++ algo_digest_len = (uint32_t)filedigest_len; ++ ++ len = sizeof(algo_name_len); ++ if (Fwrite(&algo_name_len, len, 1, validationo) != len) { ++ fprintf(stderr, ++ _("Unable to write validation algo name length\n")); ++ goto exit; ++ } ++ len = sizeof(algo_digest_len); ++ if (Fwrite(&algo_digest_len, len, 1, validationo) != len) { ++ fprintf(stderr, ++ _("Unable to write number of bytes for validation digest\n")); ++ goto exit; ++ } ++ if (Fwrite(algo_name, algo_name_len, 1, validationo) != algo_name_len) { ++ fprintf(stderr, _("Unable to write validation algo name\n")); ++ goto exit; ++ } ++ if (Fwrite(filedigest, algo_digest_len, 1, validationo ) != algo_digest_len) { ++ fprintf(stderr, ++ _("Unable to write validation digest value %u, %zu\n"), ++ algo_digest_len, filedigest_len); ++ goto exit; ++ } ++ } ++ rc = RPMRC_OK; ++exit: ++ return rc; ++} ++ ++static rpmRC process_package(FD_t fdi, FD_t validationi) ++{ ++ uint32_t diglen; ++ /* GNU C extension: can use diglen from outer context */ ++ int digestSetCmp(const unsigned char * a, const unsigned char * b) { ++ return memcmp(a, b, diglen); ++ } ++ ++ unsigned int digestSetHash(const unsigned char * digest) { ++ /* assumes sizeof(unsigned int) < diglen */ ++ return *(unsigned int *)digest; ++ } ++ ++ int digestoffsetCmp(const void * a, const void * b) { ++ return digestSetCmp( ++ ((struct digestoffset *)a)->digest, ++ ((struct digestoffset *)b)->digest ++ ); ++ } ++ ++ FD_t fdo; ++ FD_t gzdi; ++ Header h, sigh; ++ long fundamental_block_size = sysconf(_SC_PAGESIZE); ++ rpmRC rc = RPMRC_OK; ++ rpm_mode_t mode; ++ char *rpmio_flags = NULL, *zeros; ++ const unsigned char *digest; ++ rpm_loff_t pos, size, pad, validation_pos; ++ uint32_t offset_ix = 0; ++ size_t len; ++ int next = 0; ++ ++ fdo = fdDup(STDOUT_FILENO); ++ ++ if (rpmReadPackageRaw(fdi, &sigh, &h)) { ++ fprintf(stderr, _("Error reading package\n")); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (rpmLeadWrite(fdo, h)) ++ { ++ fprintf(stderr, _("Unable to write package lead: %s\n"), ++ Fstrerror(fdo)); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (rpmWriteSignature(fdo, sigh)) { ++ fprintf(stderr, _("Unable to write signature: %s\n"), Fstrerror(fdo)); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (headerWrite(fdo, h, HEADER_MAGIC_YES)) { ++ fprintf(stderr, _("Unable to write headers: %s\n"), Fstrerror(fdo)); ++ exit(EXIT_FAILURE); ++ } ++ ++ /* Retrieve payload size and compression type. */ ++ { ++ const char *compr = headerGetString(h, RPMTAG_PAYLOADCOMPRESSOR); ++ rpmio_flags = rstrscat(NULL, "r.", compr ? compr : "gzip", NULL); ++ } ++ ++ gzdi = Fdopen(fdi, rpmio_flags); /* XXX gzdi == fdi */ ++ free(rpmio_flags); ++ ++ if (gzdi == NULL) { ++ fprintf(stderr, _("cannot re-open payload: %s\n"), Fstrerror(gzdi)); ++ exit(EXIT_FAILURE); ++ } ++ ++ rpmfiles files = rpmfilesNew(NULL, h, 0, RPMFI_KEEPHEADER); ++ rpmfi fi = rpmfiNewArchiveReader(gzdi, files, ++ RPMFI_ITER_READ_ARCHIVE_CONTENT_FIRST); ++ ++ /* this is encoded in the file format, so needs to be fixed size (for ++ * now?) ++ */ ++ diglen = (uint32_t)rpmDigestLength(rpmfiDigestAlgo(fi)); ++ digestSet ds = digestSetCreate(rpmfiFC(fi), digestSetHash, digestSetCmp, ++ NULL); ++ struct digestoffset offsets[rpmfiFC(fi)]; ++ pos = RPMLEAD_SIZE + headerSizeof(sigh, HEADER_MAGIC_YES); ++ ++ /* main headers are aligned to 8 byte boundry */ ++ pos += pad_to(pos, 8); ++ pos += headerSizeof(h, HEADER_MAGIC_YES); ++ ++ zeros = xcalloc(fundamental_block_size, 1); ++ ++ while (next >= 0) { ++ next = rpmfiNext(fi); ++ if (next == RPMERR_ITER_END) { ++ rc = RPMRC_OK; ++ break; ++ } ++ mode = rpmfiFMode(fi); ++ if (!S_ISREG(mode) || !rpmfiArchiveHasContent(fi)) { ++ /* not a regular file, or the archive doesn't contain any content ++ * for this entry. ++ */ ++ continue; ++ } ++ digest = rpmfiFDigest(fi, NULL, NULL); ++ if (digestSetGetEntry(ds, digest, NULL)) { ++ /* This specific digest has already been included, so skip it. */ ++ continue; ++ } ++ pad = pad_to(pos, fundamental_block_size); ++ if (Fwrite(zeros, sizeof(char), pad, fdo) != pad) { ++ fprintf(stderr, _("Unable to write padding\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ /* round up to next fundamental_block_size */ ++ pos += pad; ++ digestSetAddEntry(ds, digest); ++ offsets[offset_ix].digest = digest; ++ offsets[offset_ix].pos = pos; ++ offset_ix++; ++ size = rpmfiFSize(fi); ++ rc = rpmfiArchiveReadToFile(fi, fdo, 0); ++ if (rc != RPMRC_OK) { ++ fprintf(stderr, _("rpmfiArchiveReadToFile failed with %d\n"), rc); ++ goto exit; ++ } ++ pos += size; ++ } ++ Fclose(gzdi); /* XXX gzdi == fdi */ ++ ++ qsort(offsets, (size_t)offset_ix, sizeof(struct digestoffset), ++ digestoffsetCmp); ++ ++ len = sizeof(offset_ix); ++ if (Fwrite(&offset_ix, len, 1, fdo) != len) { ++ fprintf(stderr, _("Unable to write length of table\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ len = sizeof(diglen); ++ if (Fwrite(&diglen, len, 1, fdo) != len) { ++ fprintf(stderr, _("Unable to write length of digest\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ len = sizeof(rpm_loff_t); ++ for (int x = 0; x < offset_ix; x++) { ++ if (Fwrite(offsets[x].digest, diglen, 1, fdo) != diglen) { ++ fprintf(stderr, _("Unable to write digest\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ if (Fwrite(&offsets[x].pos, len, 1, fdo) != len) { ++ fprintf(stderr, _("Unable to write offset\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ } ++ validation_pos = ( ++ pos + sizeof(offset_ix) + sizeof(diglen) + ++ offset_ix * (diglen + sizeof(rpm_loff_t)) ++ ); ++ ++ ssize_t validation_len = ufdCopy(validationi, fdo); ++ if (validation_len == -1) { ++ fprintf(stderr, _("digest table ufdCopy failed\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ /* add more padding so the last file can be cloned. It doesn't matter that ++ * the table and validation etc are in this space. In fact, it's pretty ++ * efficient if it is. ++ */ ++ ++ pad = pad_to((validation_pos + validation_len + 2 * sizeof(rpm_loff_t) + ++ sizeof(uint64_t)), fundamental_block_size); ++ if (Fwrite(zeros, sizeof(char), pad, fdo) != pad) { ++ fprintf(stderr, _("Unable to write final padding\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ zeros = _free(zeros); ++ if (Fwrite(&pos, len, 1, fdo) != len) { ++ fprintf(stderr, _("Unable to write offset of digest table\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ if (Fwrite(&validation_pos, len, 1, fdo) != len) { ++ fprintf(stderr, _("Unable to write offset of validation table\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ uint64_t magic = MAGIC; ++ len = sizeof(magic); ++ if (Fwrite(&magic, len, 1, fdo) != len) { ++ fprintf(stderr, _("Unable to write magic\n")); ++ rc = RPMRC_FAIL; ++ goto exit; ++ } ++ ++exit: ++ rpmfilesFree(files); ++ rpmfiFree(fi); ++ headerFree(h); ++ return rc; ++} ++ ++int main(int argc, char *argv[]) { ++ rpmRC rc; ++ int cprc = 0; ++ uint8_t algos[argc - 1]; ++ int mainpipefd[2]; ++ int metapipefd[2]; ++ pid_t cpid, w; ++ int wstatus; ++ ++ xsetprogname(argv[0]); /* Portability call -- see system.h */ ++ rpmReadConfigFiles(NULL, NULL); ++ ++ if (argc > 1 && (rstreq(argv[1], "-h") || rstreq(argv[1], "--help"))) { ++ fprintf(stderr, _("Usage: %s [DIGESTALGO]...\n"), argv[0]); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (argc == 1) { ++ fprintf(stderr, ++ _("Need at least one DIGESTALGO parameter, e.g. 'SHA256'\n")); ++ exit(EXIT_FAILURE); ++ } ++ ++ for (int x = 0; x < (argc - 1); x++) { ++ if (pgpStringVal(PGPVAL_HASHALGO, argv[x + 1], &algos[x]) != 0) ++ { ++ fprintf(stderr, ++ _("Unable to resolve '%s' as a digest algorithm, exiting\n"), ++ argv[x + 1]); ++ exit(EXIT_FAILURE); ++ } ++ } ++ ++ ++ if (pipe(mainpipefd) == -1) { ++ fprintf(stderr, _("Main pipe failure\n")); ++ exit(EXIT_FAILURE); ++ } ++ if (pipe(metapipefd) == -1) { ++ fprintf(stderr, _("Meta pipe failure\n")); ++ exit(EXIT_FAILURE); ++ } ++ cpid = fork(); ++ if (cpid == 0) { ++ /* child: digestor */ ++ close(mainpipefd[0]); ++ close(metapipefd[0]); ++ FD_t fdi = fdDup(STDIN_FILENO); ++ FD_t fdo = fdDup(mainpipefd[1]); ++ FD_t validationo = fdDup(metapipefd[1]); ++ rc = digestor(fdi, fdo, validationo, algos, argc - 1); ++ Fclose(validationo); ++ Fclose(fdo); ++ Fclose(fdi); ++ } else { ++ /* parent: main program */ ++ close(mainpipefd[1]); ++ close(metapipefd[1]); ++ FD_t fdi = fdDup(mainpipefd[0]); ++ FD_t validationi = fdDup(metapipefd[0]); ++ rc = process_package(fdi, validationi); ++ Fclose(validationi); ++ /* fdi is normally closed through the stacked file gzdi in the ++ * function. ++ * Wait for child process (digestor for stdin) to complete. ++ */ ++ if (rc != RPMRC_OK) { ++ if (kill(cpid, SIGTERM) != 0) { ++ fprintf(stderr, ++ _("Failed to kill digest process when main process failed: %s\n"), ++ strerror(errno)); ++ } ++ } ++ w = waitpid(cpid, &wstatus, 0); ++ if (w == -1) { ++ fprintf(stderr, _("waitpid failed\n")); ++ cprc = EXIT_FAILURE; ++ } else if (WIFEXITED(wstatus)) { ++ cprc = WEXITSTATUS(wstatus); ++ if (cprc != 0) { ++ fprintf(stderr, ++ _("Digest process non-zero exit code %d\n"), ++ cprc); ++ } ++ } else if (WIFSIGNALED(wstatus)) { ++ fprintf(stderr, ++ _("Digest process was terminated with a signal: %d\n"), ++ WTERMSIG(wstatus)); ++ cprc = EXIT_FAILURE; ++ } else { ++ /* Don't think this can happen, but covering all bases */ ++ fprintf(stderr, _("Unhandled circumstance in waitpid\n")); ++ cprc = EXIT_FAILURE; ++ } ++ if (cprc != EXIT_SUCCESS) { ++ rc = RPMRC_FAIL; ++ } ++ } ++ if (rc != RPMRC_OK) { ++ /* translate rpmRC into generic failure return code. */ ++ return EXIT_FAILURE; ++ } ++ return EXIT_SUCCESS; ++} +diff --git a/rpmio/rpmpgp.c b/rpmio/rpmpgp.c +index 015c15a5c..7b972b4a6 100644 +--- a/rpmio/rpmpgp.c ++++ b/rpmio/rpmpgp.c +@@ -283,6 +283,16 @@ int pgpValTok(pgpValTbl vs, const char * s, const char * se) + return vs->val; + } + ++int pgpStringVal(pgpValType type, const char *str, uint8_t *val) ++{ ++ pgpValTbl tbl = pgpValTable(type); ++ if (tbl == NULL) return -1; ++ int v = pgpValTok(tbl, str, str + strlen(str)); ++ if (v == -1) return -1; ++ *val = (uint8_t)v; ++ return 0; ++} ++ + /** \ingroup rpmpgp + * Decode length from 1, 2, or 5 octet body length encoding, used in + * new format packet headers and V4 signature subpackets. +diff --git a/rpmio/rpmpgp.h b/rpmio/rpmpgp.h +index c53e29b01..2b57318ba 100644 +--- a/rpmio/rpmpgp.h ++++ b/rpmio/rpmpgp.h +@@ -973,6 +973,15 @@ typedef rpmFlags rpmDigestFlags; + */ + const char * pgpValString(pgpValType type, uint8_t val); + ++/** \ingroup rpmpgp ++ * Return OpenPGP value for a string. ++ * @param type type of value ++ * @param str string to lookup ++ * @param[out] val byte value associated with string ++ * @return 0 on success else -1 ++ */ ++int pgpStringVal(pgpValType type, const char *str, uint8_t *val); ++ + /** \ingroup rpmpgp + * Return (native-endian) integer from big-endian representation. + * @param s pointer to big-endian integer diff --git a/measure.patch b/measure.patch index 46ce540..b0c580f 100644 --- a/measure.patch +++ b/measure.patch @@ -14,9 +14,10 @@ diff --git a/macros.in b/macros.in index 3cc8a3555..c8a087959 100644 --- a/macros.in +++ b/macros.in -@@ -1173,5 +1173,6 @@ package or when debugging this package.\ +@@ -1173,6 +1173,7 @@ package or when debugging this package.\ # Transaction plugin macros %__plugindir %{_libdir}/rpm-plugins + %__transaction_reflink %{__plugindir}/reflink.so +%__transaction_measure %{__plugindir}/measure.so %__transaction_systemd_inhibit %{__plugindir}/systemd_inhibit.so %__transaction_selinux %{__plugindir}/selinux.so diff --git a/rpm.spec b/rpm.spec index 4e38dd8..388c103 100644 --- a/rpm.spec +++ b/rpm.spec @@ -135,6 +135,8 @@ Patch1983: 0034-rpmsign-Adopting-PKCS11-opaque-keys-support-in-libfsverity-for-f %endif Patch9989: 1534.patch +Patch9991: https://github.com/chantra/rpm/compare/master...cow.diff +Provides: rpm(pr1470) Patch9999: measure.patch # Partially GPL/LGPL dual-licensed and some bits with BSD @@ -369,6 +371,13 @@ Obsoletes: fapolicyd-dnf-plugin %description plugin-fapolicyd %{summary}. +%package plugin-reflink +Summary: Rpm plugin for reflink functionality +Requires: rpm-libs%{_isa} = %{version}-%{release} + +%description plugin-reflink +%{summary}. + %package plugin-prioreset Summary: Rpm plugin for resetting scriptlet priorities for SysV init Requires: rpm-libs%{_isa} = %{version}-%{release} @@ -611,6 +620,10 @@ fi %{_libdir}/rpm-plugins/fapolicyd.so %{_mandir}/man8/rpm-plugin-fapolicyd.8* +%files plugin-reflink +%{_bindir}/rpm2extents +%{_libdir}/rpm-plugins/reflink.so + %files plugin-prioreset %{_libdir}/rpm-plugins/prioreset.so %{_mandir}/man8/rpm-plugin-prioreset.8*