From b4d103c6dec2aa0f8461e1ca78ad23d692e68d36 Mon Sep 17 00:00:00 2001 From: Matthew Almond Date: Thu, 20 May 2021 13:35:13 -0700 Subject: [PATCH] Add option --skip-filelists This is a site-local optimization. Some packages and repos include an enormous number of files. This is extremely expensive if said repo is also fast changing. Impact of skipping filelists: breaking ability to resolve file/path based dependencies, `-f` (file ownership) and `-l` (list) options in repoquery. --- doc/createrepo_c.8 | 3 +++ src/cmd_parser.c | 2 ++ src/cmd_parser.h | 1 + src/createrepo_c.c | 1 + src/dumper_thread.c | 5 +++++ src/dumper_thread.h | 1 + src/parsehdr.c | 3 ++- src/parsehdr.h | 1 + 8 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8 index e9b3fc2..10702f4 100644 --- a/doc/createrepo_c.8 +++ b/doc/createrepo_c.8 @@ -213,5 +213,8 @@ Exit with retval 2 if there were any errors during processing .SS \-\-ignore\-lock .sp Expert (risky) option: Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format .repodata.time.microseconds.pid/ will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed \- it can be inconsistent and wrong. +.SS \-\-skip\-filelists +.sp +Expert (risky) option: Skip filelist generation. .\" Generated by docutils manpage writer. . diff --git a/src/cmd_parser.c b/src/cmd_parser.c index bbefa08..0ecf7f9 100644 --- a/src/cmd_parser.c +++ b/src/cmd_parser.c @@ -224,6 +224,8 @@ static GOptionEntry expert_entries[] = "own risk! If two createrepos run simultaneously, then the state of the " "generated metadata is not guaranteed - it can be inconsistent and wrong.", NULL }, + { "skip-filelists", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.skip_filelists), + "Expert (risky) option: Skip filelist generation.", NULL}, { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }, }; diff --git a/src/cmd_parser.h b/src/cmd_parser.h index 32bcf99..5eff9d9 100644 --- a/src/cmd_parser.h +++ b/src/cmd_parser.h @@ -57,6 +57,7 @@ struct CmdOptions { char *general_compress_type;/*!< which compression type to use (even for primary, filelists and other xml) */ gboolean skip_symlinks; /*!< ignore symlinks of packages */ + gboolean skip_filelists; /*!< Skip creating filelists */ gint changelog_limit; /*!< number of changelog messages in other.(xml|sqlite) */ gboolean unique_md_filenames; /*!< include the file checksums in diff --git a/src/createrepo_c.c b/src/createrepo_c.c index f4f4544..9dd288e 100644 --- a/src/createrepo_c.c +++ b/src/createrepo_c.c @@ -1253,6 +1253,7 @@ main(int argc, char **argv) user_data.checksum_type = cmd_options->checksum_type; user_data.checksum_cachedir = cmd_options->checksum_cachedir; user_data.skip_symlinks = cmd_options->skip_symlinks; + user_data.skip_filelists = cmd_options->skip_filelists; user_data.repodir_name_len = strlen(in_dir); user_data.task_count = task_count; user_data.package_count = 0; diff --git a/src/dumper_thread.c b/src/dumper_thread.c index 119f3bd..f7c4e35 100644 --- a/src/dumper_thread.c +++ b/src/dumper_thread.c @@ -431,6 +431,11 @@ cr_dumper_thread(gpointer data, gpointer user_data) if (udata->checksum_cachedir) hdrrflags = CR_HDRR_LOADHDRID | CR_HDRR_LOADSIGNATURES; + + // Load filelists, unless --skip-filelists is passed. + if (udata->skip_filelists) + hdrrflags |= CR_HDRR_SKIPFILES; + // Get stat info about file if (udata->old_metadata && !(udata->skip_stat)) { if (stat(task->full_path, &stat_buf) == -1) { diff --git a/src/dumper_thread.h b/src/dumper_thread.h index 60f984d..654991f 100644 --- a/src/dumper_thread.h +++ b/src/dumper_thread.h @@ -66,6 +66,7 @@ struct UserData { cr_ChecksumType checksum_type; // Constant representing selected checksum const char *checksum_cachedir; // Dir with cached checksums gboolean skip_symlinks; // Skip symlinks + gboolean skip_filelists; // Skip filelists long task_count; // Total number of task to process long package_count; // Total number of packages processed diff --git a/src/parsehdr.c b/src/parsehdr.c index 2775bf3..97bb01e 100644 --- a/src/parsehdr.c +++ b/src/parsehdr.c @@ -253,7 +253,8 @@ cr_package_from_header(Header hdr, assert(x == dir_count); } - if (headerGet(hdr, RPMTAG_FILENAMES, full_filenames, flags) && + if (!(hdrrflags & CR_HDRR_SKIPFILES) && + headerGet(hdr, RPMTAG_FILENAMES, full_filenames, flags) && headerGet(hdr, RPMTAG_DIRINDEXES, indexes, flags) && headerGet(hdr, RPMTAG_BASENAMES, filenames, flags) && headerGet(hdr, RPMTAG_FILEFLAGS, fileflags, flags) && diff --git a/src/parsehdr.h b/src/parsehdr.h index 032acca..e7a4a4a 100644 --- a/src/parsehdr.h +++ b/src/parsehdr.h @@ -39,6 +39,7 @@ typedef enum { CR_HDRR_NONE = (1 << 0), CR_HDRR_LOADHDRID = (1 << 1), /*!< Load hdrid */ CR_HDRR_LOADSIGNATURES = (1 << 2), /*!< Load siggpg and siggpg */ + CR_HDRR_SKIPFILES = (1 << 3), /*!< Skip filelists */ } cr_HeaderReadingFlags; /** Read data from header and return filled cr_Package structure.