From 97dceb8f926e6868989e7f6be84e2bccb7c6f40e Mon Sep 17 00:00:00 2001 From: chantra Date: Feb 26 2022 03:54:02 +0000 Subject: import librepo from c9s, add support for rpm2extents --- diff --git a/222.patch b/222.patch new file mode 100644 index 0000000..a9bf194 --- /dev/null +++ b/222.patch @@ -0,0 +1,493 @@ +From 638dd0a374b5936a5bb1558468cfd4a973f36907 Mon Sep 17 00:00:00 2001 +From: Matthew Almond +Date: Mon, 21 Sep 2020 15:20:07 -0700 +Subject: [PATCH 1/2] Add support for rpm2extents transcoder + +Two related parts: + +1. If `LIBREPO_TRANSCODE_RPMS` environment is set to a program (with parameters) then downloads are piped through it. +2. Transcoded RPMS by definition will not have the same bits on disk as downloaded. This is inherent. The transcoder is tasked with measuring the bits that enter stdin and storing a copy of the digest(s) seen in the footer. `librepo` can then use these stored digests instead if the environment variable is set. + +This is part of changes described in https://fedoraproject.org/wiki/Changes/RPMCoW +--- + librepo/checksum.c | 111 ++++++++++++++++++++++++++++++++- + librepo/downloader.c | 145 ++++++++++++++++++++++++++++++++++++++++++- + librepo/rcodes.h | 2 + + 3 files changed, 254 insertions(+), 4 deletions(-) + +diff --git a/librepo/checksum.c b/librepo/checksum.c +index db370404..b1d3d20a 100644 +--- a/librepo/checksum.c ++++ b/librepo/checksum.c +@@ -39,6 +39,9 @@ + #define BUFFER_SIZE 2048 + #define MAX_CHECKSUM_NAME_LEN 7 + ++/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */ ++#define MAGIC 3472329499408095051 ++ + LrChecksumType + lr_checksum_type(const char *type) + { +@@ -102,6 +105,100 @@ lr_checksum_type_to_str(LrChecksumType type) + return NULL; + } + ++char * ++lr_checksum_cow_fd(LrChecksumType type, int fd, GError **err) ++{ ++ struct __attribute__ ((__packed__)) csum_offset_magic { ++ off64_t csum_offset; ++ uint64_t magic; ++ }; ++ struct __attribute__ ((__packed__)) orig_size_algos_len { ++ ssize_t orig_size; ++ uint32_t algos_len; ++ }; ++ struct __attribute__ ((__packed__)) algo_len_digest_len { ++ uint32_t algo_len; ++ uint32_t digest_len; ++ }; ++ ++ struct csum_offset_magic csum_offset_magic; ++ struct orig_size_algos_len orig_size_algos_len; ++ struct algo_len_digest_len algo_len_digest_len; ++ char *algo, *checksum; ++ unsigned char *digest; ++ size_t len = sizeof(csum_offset_magic); ++ ++ if (g_getenv("LIBREPO_TRANSCODE_RPMS") == NULL) { ++ g_debug("Transcoding not enabled, skipping path"); ++ return NULL; ++ } ++ if (lseek(fd, -len, SEEK_END) == -1) { ++ g_warning("seek for transcode failed, probably too small"); ++ return NULL; ++ } ++ if (read(fd, &csum_offset_magic, len) != len) { ++ g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE, ++ "Cannot read csum_offset, magic. size = %lu", len); ++ return NULL; ++ } ++ if (csum_offset_magic.magic != MAGIC) { ++ g_debug("Not transcoded"); ++ return NULL; ++ } ++ g_debug("Is transcoded"); ++ if (lseek(fd, csum_offset_magic.csum_offset, SEEK_SET) == -1) { ++ g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE, ++ "seek for transcode csum_offset failed"); ++ return NULL; ++ } ++ len = sizeof(orig_size_algos_len); ++ if (read(fd, &orig_size_algos_len, len) != len) { ++ g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE, ++ "Cannot read orig_size_algos_len"); ++ return NULL; ++ } ++ while (orig_size_algos_len.algos_len > 0) { ++ len = sizeof(algo_len_digest_len); ++ if (read(fd, &algo_len_digest_len, len) != len) { ++ g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE, ++ "Cannot read algo_len_digest_len"); ++ return NULL; ++ } ++ ++ len = algo_len_digest_len.algo_len; ++ algo = lr_malloc0(len + 1); ++ if (read(fd, algo, len) != len) { ++ g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE, ++ "Cannot read algo"); ++ lr_free(algo); ++ return NULL; ++ } ++ len = algo_len_digest_len.digest_len; ++ digest = lr_malloc0(len); ++ if (read(fd, digest, len) != len) { ++ g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE, ++ "Cannot read digest"); ++ lr_free(algo); ++ lr_free(digest); ++ return NULL; ++ } ++ if (lr_checksum_type(algo) == type) { ++ /* found it, do the same as lr_checksum_fd does */ ++ checksum = lr_malloc0(sizeof(char) * (len * 2 + 1)); ++ for (size_t x = 0; x < len; x++) { ++ sprintf(checksum+(x*2), "%02x", digest[x]); ++ } ++ lr_free(algo); ++ lr_free(digest); ++ return checksum; ++ } ++ lr_free(algo); ++ lr_free(digest); ++ orig_size_algos_len.algos_len--; ++ } ++ return NULL; ++} ++ + char * + lr_checksum_fd(LrChecksumType type, int fd, GError **err) + { +@@ -259,9 +356,17 @@ lr_checksum_fd_compare(LrChecksumType type, + } + } + +- checksum = lr_checksum_fd(type, fd, err); +- if (!checksum) +- return FALSE; ++ checksum = lr_checksum_cow_fd(type, fd, err); ++ if (checksum) { ++ // if checksum is found in CoW package, do not cache it in xattr ++ // because looking this up is nearly constant time (cheap) but ++ // is not valid when CoW is not enabled in RPM. ++ caching = FALSE; ++ } else { ++ checksum = lr_checksum_fd(type, fd, err); ++ if (!checksum) ++ return FALSE; ++ } + + *matches = (strcmp(expected, checksum)) ? FALSE : TRUE; + +diff --git a/librepo/downloader.c b/librepo/downloader.c +index c5278fbc..5326eb7a 100644 +--- a/librepo/downloader.c ++++ b/librepo/downloader.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -151,6 +152,10 @@ typedef struct { + FILE *f; /*!< + fdopened file descriptor from LrDownloadTarget and used + in curl_handle. */ ++ FILE *writef; /*!< ++ the fd to write data to. Could be a subprocess. */ ++ pid_t pid; /*!< ++ the pid of a transcoder. */ + char errorbuffer[CURL_ERROR_SIZE]; /*!< + Error buffer used in curl handle */ + GSList *tried_mirrors; /*!< +@@ -614,7 +619,7 @@ lr_writecb(char *ptr, size_t size, size_t nmemb, void *userdata) + if (range_start <= 0 && range_end <= 0) { + // Write everything curl give to you + target->writecb_recieved += all; +- return fwrite(ptr, size, nmemb, target->f); ++ return fwrite(ptr, size, nmemb, target->writef); + } + + /* Deal with situation when user wants only specific byte range of the +@@ -1428,6 +1433,136 @@ open_target_file(LrTarget *target, GError **err) + return f; + } + ++/** Maybe transcode the file ++ */ ++void ++maybe_transcode(LrTarget *target, GError **err) ++{ ++ const char *e = g_getenv("LIBREPO_TRANSCODE_RPMS"); ++ int transcoder_stdin[2], fd; ++ pid_t pid; ++ FILE *out; ++ _cleanup_strv_free_ gchar **args = NULL; ++ target->writef = NULL; ++ if (!e) { ++ g_debug("Not transcoding"); ++ target->writef = target->f; ++ return; ++ } ++ if (g_str_has_suffix(target->target->path, ".rpm") == FALSE) { ++ g_debug("Not transcoding %s due to name", target->target->path); ++ target->writef = target->f; ++ return; ++ } ++ g_debug("Transcoding %s", target->target->path); ++ args = g_strsplit(e, " ", -1); ++ if (args[0] == NULL) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "transcode env empty"); ++ return; ++ } ++ if (pipe(transcoder_stdin) != 0) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "input pipe creation failed: %s", ++ g_strerror(errno)); ++ return; ++ } ++ /** librepo collects the 'write' ends of the pipes. We must mark these as ++ * FD_CLOEXEC so a second download/transcode does not inherit them and ++ * hold them open, as it'll prevent an EOF and cause a deadlock. ++ */ ++ if (fcntl(transcoder_stdin[1], F_SETFD, FD_CLOEXEC) != 0) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "input pipe write close-on-fork failed: %s", ++ g_strerror(errno)); ++ return; ++ } ++ pid = fork(); ++ if (pid == -1) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "fork failed: %s", ++ g_strerror(errno)); ++ return; ++ } ++ if (pid == 0) { ++ /* child */ ++ if (dup2(transcoder_stdin[0], STDIN_FILENO) == -1) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "dup2 of stdin failed: %s", ++ g_strerror(errno)); ++ return; ++ } ++ close(transcoder_stdin[0]); ++ close(transcoder_stdin[1]); ++ fd = fileno(target->f); ++ if (fd == -1) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "fileno for target failed"); ++ return; ++ } ++ if (dup2(fd, STDOUT_FILENO) == -1) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "dup2 of stdout failed: %s", ++ g_strerror(errno)); ++ return; ++ } ++ if (execv(args[0], args) == -1) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "execv failed: %s", g_strerror(errno)); ++ } ++ /* we never get here, but appease static analysis */ ++ return; ++ } else { ++ /* parent */ ++ close(transcoder_stdin[0]); ++ out = fdopen(transcoder_stdin[1], "w"); ++ if (out == NULL) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "fdopen failed: %s", ++ g_strerror(errno)); ++ return; ++ } ++ target->pid = pid; ++ target->writef = out; ++ /* resuming a transcode is not yet implemented */ ++ target->resume = FALSE; ++ } ++} ++ ++void ++cleanup_transcode(LrTarget *target, GError **err) ++{ ++ int wstatus, trc; ++ if (!target->writef) { ++ return; ++ } ++ if (target->writef == target->f) { ++ return; ++ } ++ fclose(target->writef); ++ if(waitpid(target->pid, &wstatus, 0) == -1) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "transcode waitpid failed: %s", g_strerror(errno)); ++ } else if (WIFEXITED(wstatus)) { ++ trc = WEXITSTATUS(wstatus); ++ if (trc != 0) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "transcode process non-zero exit code %d", trc); ++ } ++ } else if (WIFSIGNALED(wstatus)) { ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "transcode process was terminated with a signal: %d", ++ WTERMSIG(wstatus)); ++ } else { ++ /* don't think this can happen, but covering all bases */ ++ g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ "transcode unhandled circumstance in waitpid"); ++ } ++ target->writef = NULL; ++ /* pid is only valid if writef is not NULL */ ++ /* target->pid = -1; */ ++} ++ + /** Prepare next transfer + */ + static gboolean +@@ -1509,6 +1644,9 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err) + target->f = open_target_file(target, err); + if (!target->f) + goto fail; ++ maybe_transcode(target, err); ++ if (!target->writef) ++ goto fail; + target->writecb_recieved = 0; + target->writecb_required_range_written = FALSE; + +@@ -1684,6 +1822,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err) + curl_easy_cleanup(target->curl_handle); + target->curl_handle = NULL; + } ++ cleanup_transcode(target, err); + if (target->f != NULL) { + fclose(target->f); + target->f = NULL; +@@ -2254,6 +2393,8 @@ check_transfer_statuses(LrDownload *dd, GError **err) + if (transfer_err) // Transfer was unsuccessful + goto transfer_error; + ++ cleanup_transcode(target, err); ++ + // + // Checksum checking + // +@@ -2348,6 +2489,7 @@ check_transfer_statuses(LrDownload *dd, GError **err) + target->curl_handle = NULL; + g_free(target->headercb_interrupt_reason); + target->headercb_interrupt_reason = NULL; ++ cleanup_transcode(target, err); + fclose(target->f); + target->f = NULL; + if (target->curl_rqheaders) { +@@ -2751,6 +2893,7 @@ lr_download(GSList *targets, + curl_multi_remove_handle(dd.multi_handle, target->curl_handle); + curl_easy_cleanup(target->curl_handle); + target->curl_handle = NULL; ++ cleanup_transcode(target, err); + fclose(target->f); + target->f = NULL; + g_free(target->headercb_interrupt_reason); +diff --git a/librepo/rcodes.h b/librepo/rcodes.h +index dcbeb413..fd65bd60 100644 +--- a/librepo/rcodes.h ++++ b/librepo/rcodes.h +@@ -125,6 +125,8 @@ typedef enum { + key/group not found, ...) */ + LRE_ZCK, /*!< + (41) Zchunk error (error reading zchunk file, ...) */ ++ LRE_TRANSCODE, /*!< ++ (42) Transcode error (env empty, ...) */ + LRE_UNKNOWNERROR, /*!< + (xx) unknown error - sentinel of error codes enum */ + } LrRc; /*!< Return codes */ + +From 566400afbc489c228b88f4126b7d440681e08824 Mon Sep 17 00:00:00 2001 +From: Matthew Almond +Date: Fri, 29 Jan 2021 23:14:28 -0800 +Subject: [PATCH 2/2] Fix hard download error on 404 etc + +I've observed errors when some mirrors are not completely synced. The +library tries to download a file, but gets a 404 error. This means we +need to retry with another mirror, not crash out. This was crashing +because setting `err` in `clean_transcode()` was firing the assert at +the start of `truncate_transfer_file()`. Note this failure mode was most +common with 404's, but any transfer error could likely have turned +fatal, for invalid reasons. + +We use `cleanup_transcode()` in two contexts. + +1. Within `check_transfer_statuses()`. The first call here happens + during a normal download after `check_finished_transfer_status()`. + The cleanup checks for errors, and any here will be flagged as a + `transfer_err` (not a general, err). +2. In 3 other places where an error has already occurred. We need to + wait for the program to exit (and it should stop due to a SIGPIPE + or short read from stdin), but we don't need to set an error because + something already is handling one. It doesn't matter that the + transcoder crashed because we're not going to use the output anyway, + and we are likely to retry. +--- + librepo/downloader.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +diff --git a/librepo/downloader.c b/librepo/downloader.c +index 5326eb7a..39567a19 100644 +--- a/librepo/downloader.c ++++ b/librepo/downloader.c +@@ -1530,8 +1530,12 @@ maybe_transcode(LrTarget *target, GError **err) + } + + void +-cleanup_transcode(LrTarget *target, GError **err) ++cleanup_transcode(LrTarget *target, GError **transfer_err) + { ++ /** transfer_err can be NULL if we're using this to clean up a failed ++ * transfer. In that circumstance g_set_error does nothing which is fine, ++ * we don't need to pile on a second failure reason. ++ */ + int wstatus, trc; + if (!target->writef) { + return; +@@ -1541,21 +1545,21 @@ cleanup_transcode(LrTarget *target, GError **err) + } + fclose(target->writef); + if(waitpid(target->pid, &wstatus, 0) == -1) { +- g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, + "transcode waitpid failed: %s", g_strerror(errno)); + } else if (WIFEXITED(wstatus)) { + trc = WEXITSTATUS(wstatus); + if (trc != 0) { +- g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, + "transcode process non-zero exit code %d", trc); + } + } else if (WIFSIGNALED(wstatus)) { +- g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, + "transcode process was terminated with a signal: %d", + WTERMSIG(wstatus)); + } else { + /* don't think this can happen, but covering all bases */ +- g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, ++ g_set_error(transfer_err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE, + "transcode unhandled circumstance in waitpid"); + } + target->writef = NULL; +@@ -1822,7 +1826,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err) + curl_easy_cleanup(target->curl_handle); + target->curl_handle = NULL; + } +- cleanup_transcode(target, err); ++ cleanup_transcode(target, NULL); + if (target->f != NULL) { + fclose(target->f); + target->f = NULL; +@@ -2390,11 +2394,11 @@ check_transfer_statuses(LrDownload *dd, GError **err) + if (!ret) // Error + return FALSE; + ++ cleanup_transcode(target, &transfer_err); ++ + if (transfer_err) // Transfer was unsuccessful + goto transfer_error; + +- cleanup_transcode(target, err); +- + // + // Checksum checking + // +@@ -2489,7 +2493,7 @@ check_transfer_statuses(LrDownload *dd, GError **err) + target->curl_handle = NULL; + g_free(target->headercb_interrupt_reason); + target->headercb_interrupt_reason = NULL; +- cleanup_transcode(target, err); ++ cleanup_transcode(target, NULL); + fclose(target->f); + target->f = NULL; + if (target->curl_rqheaders) { +@@ -2893,7 +2897,7 @@ lr_download(GSList *targets, + curl_multi_remove_handle(dd.multi_handle, target->curl_handle); + curl_easy_cleanup(target->curl_handle); + target->curl_handle = NULL; +- cleanup_transcode(target, err); ++ cleanup_transcode(target, NULL); + fclose(target->f); + target->f = NULL; + g_free(target->headercb_interrupt_reason); diff --git a/librepo.spec b/librepo.spec index 5992046..0401546 100644 --- a/librepo.spec +++ b/librepo.spec @@ -12,12 +12,14 @@ Name: librepo Version: 1.14.2 -Release: 1%{?dist} +Release: 1.1%{?dist} Summary: Repodata downloading library License: LGPLv2+ URL: https://github.com/rpm-software-management/librepo Source0: %{url}/archive/%{version}/%{name}-%{version}.tar.gz +Patch9999: https://github.com/rpm-software-management/librepo/pull/222.patch +Provides: librepo(pr222) BuildRequires: cmake BuildRequires: gcc @@ -96,6 +98,10 @@ Python 3 bindings for the librepo library. %{python3_sitearch}/%{name}/ %changelog +* Fri Feb 25 2022 Manu Bretelle - 1.14.2-1.1 +- Import c9s librepo +- Add support for rpm2extents digest (PR#222) + * Mon Oct 25 2021 Pavla Kratochvilova - 1.14.2-1 - Update to 1.14.2 - Reduce time to load metadata