Blob Blame History Raw
From 638dd0a374b5936a5bb1558468cfd4a973f36907 Mon Sep 17 00:00:00 2001
From: Matthew Almond <malmond@fb.com>
Date: Mon, 21 Sep 2020 15:20:07 -0700
Subject: [PATCH 1/2] Add support for rpm2extents transcoder

Two related parts:

1. If `LIBREPO_TRANSCODE_RPMS` environment is set to a program (with parameters) then downloads are piped through it.
2. Transcoded RPMS by definition will not have the same bits on disk as downloaded. This is inherent. The transcoder is tasked with measuring the bits that enter stdin and storing a copy of the digest(s) seen in the footer. `librepo` can then use these stored digests instead if the environment variable is set.

This is part of changes described in https://fedoraproject.org/wiki/Changes/RPMCoW
---
 librepo/checksum.c   | 111 ++++++++++++++++++++++++++++++++-
 librepo/downloader.c | 145 ++++++++++++++++++++++++++++++++++++++++++-
 librepo/rcodes.h     |   2 +
 3 files changed, 254 insertions(+), 4 deletions(-)

diff --git a/librepo/checksum.c b/librepo/checksum.c
index db370404..b1d3d20a 100644
--- a/librepo/checksum.c
+++ b/librepo/checksum.c
@@ -39,6 +39,9 @@
 #define BUFFER_SIZE             2048
 #define MAX_CHECKSUM_NAME_LEN   7
 
+/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
+#define MAGIC 3472329499408095051
+
 LrChecksumType
 lr_checksum_type(const char *type)
 {
@@ -102,6 +105,100 @@ lr_checksum_type_to_str(LrChecksumType type)
     return NULL;
 }
 
+char *
+lr_checksum_cow_fd(LrChecksumType type, int fd, GError **err)
+{
+    struct __attribute__ ((__packed__)) csum_offset_magic {
+        off64_t csum_offset;
+        uint64_t magic;
+    };
+    struct __attribute__ ((__packed__)) orig_size_algos_len {
+        ssize_t orig_size;
+        uint32_t algos_len;
+    };
+    struct __attribute__ ((__packed__)) algo_len_digest_len {
+        uint32_t algo_len;
+        uint32_t digest_len;
+    };
+
+    struct csum_offset_magic csum_offset_magic;
+    struct orig_size_algos_len orig_size_algos_len;
+    struct algo_len_digest_len algo_len_digest_len;
+    char *algo, *checksum;
+    unsigned char *digest;
+    size_t len = sizeof(csum_offset_magic);
+
+    if (g_getenv("LIBREPO_TRANSCODE_RPMS") == NULL) {
+        g_debug("Transcoding not enabled, skipping path");
+        return NULL;
+    }
+    if (lseek(fd, -len, SEEK_END) == -1) {
+        g_warning("seek for transcode failed, probably too small");
+        return NULL;
+    }
+    if (read(fd, &csum_offset_magic, len) != len) {
+        g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
+                    "Cannot read csum_offset, magic. size = %lu", len);
+        return NULL;
+    }
+    if (csum_offset_magic.magic != MAGIC) {
+        g_debug("Not transcoded");
+        return NULL;
+    }
+    g_debug("Is transcoded");
+    if (lseek(fd, csum_offset_magic.csum_offset, SEEK_SET) == -1) {
+        g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
+                    "seek for transcode csum_offset failed");
+        return NULL;
+    }
+    len = sizeof(orig_size_algos_len);
+    if (read(fd, &orig_size_algos_len, len) != len) {
+        g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
+                    "Cannot read orig_size_algos_len");
+        return NULL;
+    }
+    while (orig_size_algos_len.algos_len > 0) {
+        len = sizeof(algo_len_digest_len);
+        if (read(fd, &algo_len_digest_len, len) != len) {
+            g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
+                        "Cannot read algo_len_digest_len");
+            return NULL;
+        }
+
+        len = algo_len_digest_len.algo_len;
+        algo = lr_malloc0(len + 1);
+        if (read(fd, algo, len) != len) {
+            g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
+                        "Cannot read algo");
+            lr_free(algo);
+            return NULL;
+        }
+        len = algo_len_digest_len.digest_len;
+        digest = lr_malloc0(len);
+        if (read(fd, digest, len) != len) {
+            g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
+                        "Cannot read digest");
+            lr_free(algo);
+            lr_free(digest);
+            return NULL;
+        }
+        if (lr_checksum_type(algo) == type) {
+            /* found it, do the same as lr_checksum_fd does */
+            checksum = lr_malloc0(sizeof(char) * (len * 2 + 1));
+            for (size_t x = 0; x < len; x++) {
+                sprintf(checksum+(x*2), "%02x", digest[x]);
+            }
+            lr_free(algo);
+            lr_free(digest);
+            return checksum;
+        }
+        lr_free(algo);
+        lr_free(digest);
+        orig_size_algos_len.algos_len--;
+    }
+    return NULL;
+}
+
 char *
 lr_checksum_fd(LrChecksumType type, int fd, GError **err)
 {
@@ -259,9 +356,17 @@ lr_checksum_fd_compare(LrChecksumType type,
         }
     }
 
-    checksum = lr_checksum_fd(type, fd, err);
-    if (!checksum)
-        return FALSE;
+    checksum = lr_checksum_cow_fd(type, fd, err);
+    if (checksum) {
+        // if checksum is found in CoW package, do not cache it in xattr
+        // because looking this up is nearly constant time (cheap) but
+        // is not valid when CoW is not enabled in RPM.
+        caching = FALSE;
+    } else {
+        checksum = lr_checksum_fd(type, fd, err);
+        if (!checksum)
+            return FALSE;
+    }
 
     *matches = (strcmp(expected, checksum)) ? FALSE : TRUE;
 
diff --git a/librepo/downloader.c b/librepo/downloader.c
index c5278fbc..5326eb7a 100644
--- a/librepo/downloader.c
+++ b/librepo/downloader.c
@@ -32,6 +32,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/time.h>
+#include <sys/wait.h>
 #include <sys/xattr.h>
 #include <fcntl.h>
 #include <curl/curl.h>
@@ -151,6 +152,10 @@ typedef struct {
     FILE *f; /*!<
         fdopened file descriptor from LrDownloadTarget and used
         in curl_handle. */
+    FILE *writef; /*!<
+        the fd to write data to. Could be a subprocess. */
+    pid_t pid; /*!<
+        the pid of a transcoder. */
     char errorbuffer[CURL_ERROR_SIZE]; /*!<
         Error buffer used in curl handle */
     GSList *tried_mirrors; /*!<
@@ -614,7 +619,7 @@ lr_writecb(char *ptr, size_t size, size_t nmemb, void *userdata)
     if (range_start <= 0 && range_end <= 0) {
         // Write everything curl give to you
         target->writecb_recieved += all;
-        return fwrite(ptr, size, nmemb, target->f);
+        return fwrite(ptr, size, nmemb, target->writef);
     }
 
     /* Deal with situation when user wants only specific byte range of the
@@ -1428,6 +1433,136 @@ open_target_file(LrTarget *target, GError **err)
     return f;
 }
 
+/** Maybe transcode the file
+ */
+void
+maybe_transcode(LrTarget *target, GError **err)
+{
+    const char *e = g_getenv("LIBREPO_TRANSCODE_RPMS");
+    int transcoder_stdin[2], fd;
+    pid_t pid;
+    FILE *out;
+    _cleanup_strv_free_ gchar **args = NULL;
+    target->writef = NULL;
+    if (!e) {
+        g_debug("Not transcoding");
+        target->writef = target->f;
+        return;
+    }
+    if (g_str_has_suffix(target->target->path, ".rpm") == FALSE) {
+        g_debug("Not transcoding %s due to name", target->target->path);
+        target->writef = target->f;
+        return;
+    }
+    g_debug("Transcoding %s", target->target->path);
+    args = g_strsplit(e, " ", -1);
+    if (args[0] == NULL) {
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "transcode env empty");
+        return;
+    }
+    if (pipe(transcoder_stdin) != 0) {
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "input pipe creation failed: %s",
+                    g_strerror(errno));
+        return;
+    }
+    /** librepo collects the 'write' ends of the pipes. We must mark these as
+     * FD_CLOEXEC so a second download/transcode does not inherit them and
+     * hold them open, as it'll prevent an EOF and cause a deadlock.
+    */
+    if (fcntl(transcoder_stdin[1], F_SETFD, FD_CLOEXEC) != 0) {
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "input pipe write close-on-fork failed: %s",
+                    g_strerror(errno));
+        return;
+    }
+    pid = fork();
+    if (pid == -1) {
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "fork failed: %s",
+                    g_strerror(errno));
+        return;
+    }
+    if (pid == 0) {
+        /* child */
+        if (dup2(transcoder_stdin[0], STDIN_FILENO) == -1) {
+            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                        "dup2 of stdin failed: %s",
+                        g_strerror(errno));
+            return;
+        }
+        close(transcoder_stdin[0]);
+        close(transcoder_stdin[1]);
+        fd = fileno(target->f);
+        if (fd == -1) {
+            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                        "fileno for target failed");
+            return;
+        }
+        if (dup2(fd, STDOUT_FILENO) == -1) {
+            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                        "dup2 of stdout failed: %s",
+                        g_strerror(errno));
+            return;
+        }
+        if (execv(args[0], args) == -1) {
+            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                        "execv failed: %s", g_strerror(errno));
+        }
+        /* we never get here, but appease static analysis */
+        return;
+    } else {
+        /* parent */
+        close(transcoder_stdin[0]);
+        out = fdopen(transcoder_stdin[1], "w");
+        if (out == NULL) {
+            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                        "fdopen failed: %s",
+                        g_strerror(errno));
+            return;
+        }
+        target->pid = pid;
+        target->writef = out;
+        /* resuming a transcode is not yet implemented */
+        target->resume = FALSE;
+    }
+}
+
+void
+cleanup_transcode(LrTarget *target, GError **err)
+{
+    int wstatus, trc;
+    if (!target->writef) {
+        return;
+    }
+    if (target->writef == target->f) {
+        return;
+    }
+    fclose(target->writef);
+    if(waitpid(target->pid, &wstatus, 0) == -1) {
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "transcode waitpid failed: %s", g_strerror(errno));
+    } else if (WIFEXITED(wstatus)) {
+        trc = WEXITSTATUS(wstatus);
+        if (trc != 0) {
+            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                        "transcode process non-zero exit code %d", trc);
+        }
+    } else if (WIFSIGNALED(wstatus)) {
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "transcode process was terminated with a signal: %d",
+                    WTERMSIG(wstatus));
+    } else {
+        /* don't think this can happen, but covering all bases */
+        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+                    "transcode unhandled circumstance in waitpid");
+    }
+    target->writef = NULL;
+    /* pid is only valid if writef is not NULL */
+    /* target->pid = -1; */
+}
+
 /** Prepare next transfer
  */
 static gboolean
@@ -1509,6 +1644,9 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
     target->f = open_target_file(target, err);
     if (!target->f)
         goto fail;
+    maybe_transcode(target, err);
+    if (!target->writef)
+        goto fail;
     target->writecb_recieved = 0;
     target->writecb_required_range_written = FALSE;
 
@@ -1684,6 +1822,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
         curl_easy_cleanup(target->curl_handle);
         target->curl_handle = NULL;
     }
+    cleanup_transcode(target, err);
     if (target->f != NULL) {
         fclose(target->f);
         target->f = NULL;
@@ -2254,6 +2393,8 @@ check_transfer_statuses(LrDownload *dd, GError **err)
         if (transfer_err)  // Transfer was unsuccessful
             goto transfer_error;
 
+        cleanup_transcode(target, err);
+
         //
         // Checksum checking
         //
@@ -2348,6 +2489,7 @@ check_transfer_statuses(LrDownload *dd, GError **err)
         target->curl_handle = NULL;
         g_free(target->headercb_interrupt_reason);
         target->headercb_interrupt_reason = NULL;
+        cleanup_transcode(target, err);
         fclose(target->f);
         target->f = NULL;
         if (target->curl_rqheaders) {
@@ -2751,6 +2893,7 @@ lr_download(GSList *targets,
             curl_multi_remove_handle(dd.multi_handle, target->curl_handle);
             curl_easy_cleanup(target->curl_handle);
             target->curl_handle = NULL;
+            cleanup_transcode(target, err);
             fclose(target->f);
             target->f = NULL;
             g_free(target->headercb_interrupt_reason);
diff --git a/librepo/rcodes.h b/librepo/rcodes.h
index dcbeb413..fd65bd60 100644
--- a/librepo/rcodes.h
+++ b/librepo/rcodes.h
@@ -125,6 +125,8 @@ typedef enum {
         key/group not found, ...) */
     LRE_ZCK, /*!<
         (41) Zchunk error (error reading zchunk file, ...) */
+    LRE_TRANSCODE, /*!<
+        (42) Transcode error (env empty, ...) */
     LRE_UNKNOWNERROR, /*!<
         (xx) unknown error - sentinel of error codes enum */
 } LrRc; /*!< Return codes */

From 566400afbc489c228b88f4126b7d440681e08824 Mon Sep 17 00:00:00 2001
From: Matthew Almond <malmond@fb.com>
Date: Fri, 29 Jan 2021 23:14:28 -0800
Subject: [PATCH 2/2] Fix hard download error on 404 etc

I've observed errors when some mirrors are not completely synced. The
library tries to download a file, but gets a 404 error. This means we
need to retry with another mirror, not crash out. This was crashing
because setting `err` in `clean_transcode()` was firing the assert at
the start of `truncate_transfer_file()`. Note this failure mode was most
common with 404's, but any transfer error could likely have turned
fatal, for invalid reasons.

We use `cleanup_transcode()` in two contexts.

1. Within `check_transfer_statuses()`. The first call here happens
   during a normal download after `check_finished_transfer_status()`.
   The cleanup checks for errors, and any here will be flagged as a
   `transfer_err` (not a general, err).
2. In 3 other places where an error has already occurred. We need to
   wait for the program to exit (and it should stop due to a SIGPIPE
   or short read from stdin), but we don't need to set an error because
   something already is handling one. It doesn't matter that the
   transcoder crashed because we're not going to use the output anyway,
   and we are likely to retry.
---
 librepo/downloader.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/librepo/downloader.c b/librepo/downloader.c
index 5326eb7a..39567a19 100644
--- a/librepo/downloader.c
+++ b/librepo/downloader.c
@@ -1530,8 +1530,12 @@ maybe_transcode(LrTarget *target, GError **err)
 }
 
 void
-cleanup_transcode(LrTarget *target, GError **err)
+cleanup_transcode(LrTarget *target, GError **transfer_err)
 {
+    /** transfer_err can be NULL if we're using this to clean up a failed
+     * transfer. In that circumstance g_set_error does nothing which is fine,
+     * we don't need to pile on a second failure reason.
+     */
     int wstatus, trc;
     if (!target->writef) {
         return;
@@ -1541,21 +1545,21 @@ cleanup_transcode(LrTarget *target, GError **err)
     }
     fclose(target->writef);
     if(waitpid(target->pid, &wstatus, 0) == -1) {
-        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+        g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
                     "transcode waitpid failed: %s", g_strerror(errno));
     } else if (WIFEXITED(wstatus)) {
         trc = WEXITSTATUS(wstatus);
         if (trc != 0) {
-            g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+            g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
                         "transcode process non-zero exit code %d", trc);
         }
     } else if (WIFSIGNALED(wstatus)) {
-        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+        g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
                     "transcode process was terminated with a signal: %d",
                     WTERMSIG(wstatus));
     } else {
         /* don't think this can happen, but covering all bases */
-        g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
+        g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
                     "transcode unhandled circumstance in waitpid");
     }
     target->writef = NULL;
@@ -1822,7 +1826,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
         curl_easy_cleanup(target->curl_handle);
         target->curl_handle = NULL;
     }
-    cleanup_transcode(target, err);
+    cleanup_transcode(target, NULL);
     if (target->f != NULL) {
         fclose(target->f);
         target->f = NULL;
@@ -2390,11 +2394,11 @@ check_transfer_statuses(LrDownload *dd, GError **err)
         if (!ret)  // Error
             return FALSE;
 
+        cleanup_transcode(target, &transfer_err);
+
         if (transfer_err)  // Transfer was unsuccessful
             goto transfer_error;
 
-        cleanup_transcode(target, err);
-
         //
         // Checksum checking
         //
@@ -2489,7 +2493,7 @@ check_transfer_statuses(LrDownload *dd, GError **err)
         target->curl_handle = NULL;
         g_free(target->headercb_interrupt_reason);
         target->headercb_interrupt_reason = NULL;
-        cleanup_transcode(target, err);
+        cleanup_transcode(target, NULL);
         fclose(target->f);
         target->f = NULL;
         if (target->curl_rqheaders) {
@@ -2893,7 +2897,7 @@ lr_download(GSList *targets,
             curl_multi_remove_handle(dd.multi_handle, target->curl_handle);
             curl_easy_cleanup(target->curl_handle);
             target->curl_handle = NULL;
-            cleanup_transcode(target, err);
+            cleanup_transcode(target, NULL);
             fclose(target->f);
             target->f = NULL;
             g_free(target->headercb_interrupt_reason);