diff --git a/269.patch b/269.patch new file mode 100644 index 0000000..6e3b50e --- /dev/null +++ b/269.patch @@ -0,0 +1,263 @@ +From b4d103c6dec2aa0f8461e1ca78ad23d692e68d36 Mon Sep 17 00:00:00 2001 +From: Matthew Almond +Date: Thu, 20 May 2021 13:35:13 -0700 +Subject: [PATCH 1/3] Add option --skip-filelists + +This is a site-local optimization. Some packages and repos include an +enormous number of files. This is extremely expensive if said repo is +also fast changing. + +Impact of skipping filelists: breaking ability to resolve file/path +based dependencies, `-f` (file ownership) and `-l` (list) options in +repoquery. +--- + doc/createrepo_c.8 | 3 +++ + src/cmd_parser.c | 2 ++ + src/cmd_parser.h | 1 + + src/createrepo_c.c | 1 + + src/dumper_thread.c | 5 +++++ + src/dumper_thread.h | 1 + + src/parsehdr.c | 3 ++- + src/parsehdr.h | 1 + + 8 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8 +index e9b3fc2d..10702f48 100644 +--- a/doc/createrepo_c.8 ++++ b/doc/createrepo_c.8 +@@ -213,5 +213,8 @@ Exit with retval 2 if there were any errors during processing + .SS \-\-ignore\-lock + .sp + Expert (risky) option: Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format .repodata.time.microseconds.pid/ will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed \- it can be inconsistent and wrong. ++.SS \-\-skip\-filelists ++.sp ++Expert (risky) option: Skip filelist generation. + .\" Generated by docutils manpage writer. + . +diff --git a/src/cmd_parser.c b/src/cmd_parser.c +index bbefa080..0ecf7f99 100644 +--- a/src/cmd_parser.c ++++ b/src/cmd_parser.c +@@ -224,6 +224,8 @@ static GOptionEntry expert_entries[] = + "own risk! If two createrepos run simultaneously, then the state of the " + "generated metadata is not guaranteed - it can be inconsistent and wrong.", + NULL }, ++ { "skip-filelists", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.skip_filelists), ++ "Expert (risky) option: Skip filelist generation.", NULL}, + { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }, + }; + +diff --git a/src/cmd_parser.h b/src/cmd_parser.h +index 32bcf992..5eff9d9d 100644 +--- a/src/cmd_parser.h ++++ b/src/cmd_parser.h +@@ -57,6 +57,7 @@ struct CmdOptions { + char *general_compress_type;/*!< which compression type to use (even for + primary, filelists and other xml) */ + gboolean skip_symlinks; /*!< ignore symlinks of packages */ ++ gboolean skip_filelists; /*!< Skip creating filelists */ + gint changelog_limit; /*!< number of changelog messages in + other.(xml|sqlite) */ + gboolean unique_md_filenames; /*!< include the file checksums in +diff --git a/src/createrepo_c.c b/src/createrepo_c.c +index f4f45445..9dd288e5 100644 +--- a/src/createrepo_c.c ++++ b/src/createrepo_c.c +@@ -1253,6 +1253,7 @@ main(int argc, char **argv) + user_data.checksum_type = cmd_options->checksum_type; + user_data.checksum_cachedir = cmd_options->checksum_cachedir; + user_data.skip_symlinks = cmd_options->skip_symlinks; ++ user_data.skip_filelists = cmd_options->skip_filelists; + user_data.repodir_name_len = strlen(in_dir); + user_data.task_count = task_count; + user_data.package_count = 0; +diff --git a/src/dumper_thread.c b/src/dumper_thread.c +index 119f3bd8..f7c4e356 100644 +--- a/src/dumper_thread.c ++++ b/src/dumper_thread.c +@@ -431,6 +431,11 @@ cr_dumper_thread(gpointer data, gpointer user_data) + if (udata->checksum_cachedir) + hdrrflags = CR_HDRR_LOADHDRID | CR_HDRR_LOADSIGNATURES; + ++ ++ // Load filelists, unless --skip-filelists is passed. ++ if (udata->skip_filelists) ++ hdrrflags |= CR_HDRR_SKIPFILES; ++ + // Get stat info about file + if (udata->old_metadata && !(udata->skip_stat)) { + if (stat(task->full_path, &stat_buf) == -1) { +diff --git a/src/dumper_thread.h b/src/dumper_thread.h +index 60f984d7..654991fc 100644 +--- a/src/dumper_thread.h ++++ b/src/dumper_thread.h +@@ -66,6 +66,7 @@ struct UserData { + cr_ChecksumType checksum_type; // Constant representing selected checksum + const char *checksum_cachedir; // Dir with cached checksums + gboolean skip_symlinks; // Skip symlinks ++ gboolean skip_filelists; // Skip filelists + long task_count; // Total number of task to process + long package_count; // Total number of packages processed + +diff --git a/src/parsehdr.c b/src/parsehdr.c +index 2775bf31..97bb01e4 100644 +--- a/src/parsehdr.c ++++ b/src/parsehdr.c +@@ -253,7 +253,8 @@ cr_package_from_header(Header hdr, + assert(x == dir_count); + } + +- if (headerGet(hdr, RPMTAG_FILENAMES, full_filenames, flags) && ++ if (!(hdrrflags & CR_HDRR_SKIPFILES) && ++ headerGet(hdr, RPMTAG_FILENAMES, full_filenames, flags) && + headerGet(hdr, RPMTAG_DIRINDEXES, indexes, flags) && + headerGet(hdr, RPMTAG_BASENAMES, filenames, flags) && + headerGet(hdr, RPMTAG_FILEFLAGS, fileflags, flags) && +diff --git a/src/parsehdr.h b/src/parsehdr.h +index 032accad..e7a4a4aa 100644 +--- a/src/parsehdr.h ++++ b/src/parsehdr.h +@@ -39,6 +39,7 @@ typedef enum { + CR_HDRR_NONE = (1 << 0), + CR_HDRR_LOADHDRID = (1 << 1), /*!< Load hdrid */ + CR_HDRR_LOADSIGNATURES = (1 << 2), /*!< Load siggpg and siggpg */ ++ CR_HDRR_SKIPFILES = (1 << 3), /*!< Skip filelists */ + } cr_HeaderReadingFlags; + + /** Read data from header and return filled cr_Package structure. + +From da623813071274201d2bf75a7df25def03222f11 Mon Sep 17 00:00:00 2001 +From: Matthew Almond +Date: Fri, 4 Jun 2021 11:48:23 -0700 +Subject: [PATCH 2/3] Expand documentation, add warning + +Help text: + +``` +$ createrepo_c --help-expert +Usage: + createrepo_c [OPTION?] + +Program that creates a repomd (xml-based rpm metadata) repository from a set of rpms. + +Expert (risky) options + --ignore-lock Expert (risky) option: +Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format ".repodata.time.microseconds.pid/" will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed - it can be inconsistent and wrong. + --skip-filelists Expert (risky) option: +Skip filelist generation, potentially saving significant bandwidth for repos with large numbers of files in packages. NOTE: Use this option on your own risk! This is a site-local optimization and should not be used for public repos. The site operator warrants the filenames in the packages in the repo are not named as required in other packages. The site operator also warrants that all clients do not need to use repoquery -l and -f to list or find packages that own a given file. +``` + +Example run: + +``` +$ createrepo_c --skip-filelists . +Directory walk started +Directory walk done - 9 packages +Temporary output repo path: ./.repodata/ +Preparing sqlite DBs +Warning: Expert option: --skip-filelists for site-local optimization active +Pool started (with 5 workers) +Pool finished +``` +--- + doc/createrepo_c.8 | 2 +- + src/cmd_parser.c | 9 ++++++++- + src/createrepo_c.c | 4 ++++ + 3 files changed, 13 insertions(+), 2 deletions(-) + +diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8 +index 10702f48..0788929d 100644 +--- a/doc/createrepo_c.8 ++++ b/doc/createrepo_c.8 +@@ -215,6 +215,6 @@ Exit with retval 2 if there were any errors during processing + Expert (risky) option: Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format .repodata.time.microseconds.pid/ will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed \- it can be inconsistent and wrong. + .SS \-\-skip\-filelists + .sp +-Expert (risky) option: Skip filelist generation. ++Expert (risky) option: Skip filelist generation, potentially saving significant bandwidth for repos with large numbers of files in packages. NOTE: Use this option on your own risk! This is a site-local optimization and should not be used for public repos. The site operator warrants the filenames in the packages in the repo are not named as required in other packages. The site operator also warrants that all clients do not need to use repoquery -l and -f to list or find packages that own a given file. + .\" Generated by docutils manpage writer. + . +diff --git a/src/cmd_parser.c b/src/cmd_parser.c +index 0ecf7f99..92a2ca9d 100644 +--- a/src/cmd_parser.c ++++ b/src/cmd_parser.c +@@ -225,7 +225,14 @@ static GOptionEntry expert_entries[] = + "generated metadata is not guaranteed - it can be inconsistent and wrong.", + NULL }, + { "skip-filelists", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.skip_filelists), +- "Expert (risky) option: Skip filelist generation.", NULL}, ++ "Expert (risky) option: Skip filelist generation, potentially saving " ++ "significant bandwidth for repos with large numbers of files in " ++ "packages. NOTE: Use this option on your own risk! This is a site-local " ++ "optimization and should not be used for public repos. The site " ++ "operator warrants the filenames in the packages in the repo are not " ++ "named as required in other packages. The site operator also warrants " ++ "that all clients do not need to use repoquery -l and -f to list or " ++ "find packages that own a given file.", NULL}, + { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }, + }; + +diff --git a/src/createrepo_c.c b/src/createrepo_c.c +index 9dd288e5..f942b1d6 100644 +--- a/src/createrepo_c.c ++++ b/src/createrepo_c.c +@@ -1282,6 +1282,10 @@ main(int argc, char **argv) + g_mutex_init(&(user_data.mutex_old_md)); + g_mutex_init(&(user_data.mutex_deltatargetpackages)); + ++ if (cmd_options->skip_filelists) { ++ g_warning("Expert option: --skip-filelists for site-local optimization active"); ++ } ++ + g_debug("Thread pool user data ready"); + + // Start pool + +From 1688eb2a379aa440769d07eae71d0d44318f1afa Mon Sep 17 00:00:00 2001 +From: Matthew Almond +Date: Mon, 7 Jun 2021 15:47:42 -0700 +Subject: [PATCH 3/3] Reworded + +--- + doc/createrepo_c.8 | 2 +- + src/cmd_parser.c | 14 +++++++------- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8 +index 0788929d..8e1e8b35 100644 +--- a/doc/createrepo_c.8 ++++ b/doc/createrepo_c.8 +@@ -215,6 +215,6 @@ Exit with retval 2 if there were any errors during processing + Expert (risky) option: Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format .repodata.time.microseconds.pid/ will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed \- it can be inconsistent and wrong. + .SS \-\-skip\-filelists + .sp +-Expert (risky) option: Skip filelist generation, potentially saving significant bandwidth for repos with large numbers of files in packages. NOTE: Use this option on your own risk! This is a site-local optimization and should not be used for public repos. The site operator warrants the filenames in the packages in the repo are not named as required in other packages. The site operator also warrants that all clients do not need to use repoquery -l and -f to list or find packages that own a given file. ++Expert (dangerous) option: Skip filelist generation, potentially saving significant bandwidth for repos with large numbers of files in packages. NOTE: Use this option on your own risk! This is a site-local optimization and should not be used for public repos. This option is known to break dependency resolution for any packages which depend on files provided by packages within this repository, e.g. /bin/bash). It will also prevent using repoquery -l and -f from listing files owned by a package or discover which package owns a particular file. + .\" Generated by docutils manpage writer. + . +diff --git a/src/cmd_parser.c b/src/cmd_parser.c +index 92a2ca9d..8c355fb5 100644 +--- a/src/cmd_parser.c ++++ b/src/cmd_parser.c +@@ -225,14 +225,14 @@ static GOptionEntry expert_entries[] = + "generated metadata is not guaranteed - it can be inconsistent and wrong.", + NULL }, + { "skip-filelists", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.skip_filelists), +- "Expert (risky) option: Skip filelist generation, potentially saving " +- "significant bandwidth for repos with large numbers of files in " ++ "Expert (dangerous) option: Skip filelist generation, potentially " ++ "saving significant bandwidth for repos with large numbers of files in " + "packages. NOTE: Use this option on your own risk! This is a site-local " +- "optimization and should not be used for public repos. The site " +- "operator warrants the filenames in the packages in the repo are not " +- "named as required in other packages. The site operator also warrants " +- "that all clients do not need to use repoquery -l and -f to list or " +- "find packages that own a given file.", NULL}, ++ "optimization and should not be used for public repos. This option is " ++ "known to break dependency resolution for any packages which depend on " ++ "files provided by packages within this repository, e.g. /bin/bash). It " ++ "will also prevent using repoquery -l and -f from listing files owned " ++ "by a package or discover which package owns a particular file.", NULL}, + { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }, + }; + diff --git a/createrepo_c.spec b/createrepo_c.spec index 9f9b01e..5656020 100644 --- a/createrepo_c.spec +++ b/createrepo_c.spec @@ -25,7 +25,7 @@ Summary: Creates a common metadata repository Name: createrepo_c Version: 0.17.7 -Release: 4%{?dist} +Release: 4.1%{?dist} License: GPLv2+ URL: https://github.com/rpm-software-management/createrepo_c Source0: %{url}/archive/%{version}/%{name}-%{version}.tar.gz @@ -36,6 +36,11 @@ Patch4: 0004-Revert-Wrap-c-api-to-python-for-parsing-metadata-tog.patch Patch5: 0005-Revert-Add-c-tests-for-parsing-metadata-together.patch Patch6: 0006-Revert-Add-c-API-for-parsing-metadata-together.patch +# Hyperscale patches +# Add option --skip-filelists +Patch100: https://github.com/rpm-software-management/createrepo_c/pull/269.patch +Provides: createrepo_c(pr269) + BuildRequires: cmake BuildRequires: gcc BuildRequires: bzip2-devel @@ -181,6 +186,10 @@ ln -sr %{buildroot}%{_bindir}/modifyrepo_c %{buildroot}%{_bindir}/modifyrepo %{python3_sitearch}/%{name}-%{version}-py%{python3_version}.egg-info %changelog +* Wed Aug 24 2022 Davide Cavalca - 0.17.7-4.1 +- Hyperscale build +- Backport PR269 for --skip-filelists + * Mon Jun 6 2022 Lukas Hrazky - 0.17.7-4 - Revert addition of new API for parsing main metadata together (RhBug:2063141)