Blob Blame History Raw
From b4d103c6dec2aa0f8461e1ca78ad23d692e68d36 Mon Sep 17 00:00:00 2001
From: Matthew Almond <malmond@fb.com>
Date: Thu, 20 May 2021 13:35:13 -0700
Subject: [PATCH] Add option --skip-filelists

This is a site-local optimization. Some packages and repos include an
enormous number of files. This is extremely expensive if said repo is
also fast changing.

Impact of skipping filelists: breaking ability to resolve file/path
based dependencies, `-f` (file ownership) and `-l` (list) options in
repoquery.
---
 doc/createrepo_c.8  | 3 +++
 src/cmd_parser.c    | 2 ++
 src/cmd_parser.h    | 1 +
 src/createrepo_c.c  | 1 +
 src/dumper_thread.c | 5 +++++
 src/dumper_thread.h | 1 +
 src/parsehdr.c      | 3 ++-
 src/parsehdr.h      | 1 +
 8 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8
index e9b3fc2..10702f4 100644
--- a/doc/createrepo_c.8
+++ b/doc/createrepo_c.8
@@ -213,5 +213,8 @@ Exit with retval 2 if there were any errors during processing
 .SS \-\-ignore\-lock
 .sp
 Expert (risky) option: Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format .repodata.time.microseconds.pid/ will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed \- it can be inconsistent and wrong.
+.SS \-\-skip\-filelists
+.sp
+Expert (risky) option: Skip filelist generation.
 .\" Generated by docutils manpage writer.
 .
diff --git a/src/cmd_parser.c b/src/cmd_parser.c
index bbefa08..0ecf7f9 100644
--- a/src/cmd_parser.c
+++ b/src/cmd_parser.c
@@ -224,6 +224,8 @@ static GOptionEntry expert_entries[] =
       "own risk! If two createrepos run simultaneously, then the state of the "
       "generated metadata is not guaranteed - it can be inconsistent and wrong.",
       NULL },
+    { "skip-filelists", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.skip_filelists),
+      "Expert (risky) option: Skip filelist generation.", NULL},
     { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL },
 };
 
diff --git a/src/cmd_parser.h b/src/cmd_parser.h
index 32bcf99..5eff9d9 100644
--- a/src/cmd_parser.h
+++ b/src/cmd_parser.h
@@ -57,6 +57,7 @@ struct CmdOptions {
     char *general_compress_type;/*!< which compression type to use (even for
                                      primary, filelists and other xml) */
     gboolean skip_symlinks;     /*!< ignore symlinks of packages */
+    gboolean skip_filelists;    /*!< Skip creating filelists */
     gint changelog_limit;       /*!< number of changelog messages in
                                      other.(xml|sqlite) */
     gboolean unique_md_filenames;       /*!< include the file checksums in
diff --git a/src/createrepo_c.c b/src/createrepo_c.c
index f4f4544..9dd288e 100644
--- a/src/createrepo_c.c
+++ b/src/createrepo_c.c
@@ -1253,6 +1253,7 @@ main(int argc, char **argv)
     user_data.checksum_type     = cmd_options->checksum_type;
     user_data.checksum_cachedir = cmd_options->checksum_cachedir;
     user_data.skip_symlinks     = cmd_options->skip_symlinks;
+    user_data.skip_filelists    = cmd_options->skip_filelists;
     user_data.repodir_name_len  = strlen(in_dir);
     user_data.task_count        = task_count;
     user_data.package_count     = 0;
diff --git a/src/dumper_thread.c b/src/dumper_thread.c
index 119f3bd..f7c4e35 100644
--- a/src/dumper_thread.c
+++ b/src/dumper_thread.c
@@ -431,6 +431,11 @@ cr_dumper_thread(gpointer data, gpointer user_data)
     if (udata->checksum_cachedir)
         hdrrflags = CR_HDRR_LOADHDRID | CR_HDRR_LOADSIGNATURES;
 
+
+    // Load filelists, unless --skip-filelists is passed.
+    if (udata->skip_filelists)
+        hdrrflags |= CR_HDRR_SKIPFILES;
+
     // Get stat info about file
     if (udata->old_metadata && !(udata->skip_stat)) {
         if (stat(task->full_path, &stat_buf) == -1) {
diff --git a/src/dumper_thread.h b/src/dumper_thread.h
index 60f984d..654991f 100644
--- a/src/dumper_thread.h
+++ b/src/dumper_thread.h
@@ -66,6 +66,7 @@ struct UserData {
     cr_ChecksumType checksum_type;  // Constant representing selected checksum
     const char *checksum_cachedir;  // Dir with cached checksums
     gboolean skip_symlinks;         // Skip symlinks
+    gboolean skip_filelists;        // Skip filelists
     long task_count;                // Total number of task to process
     long package_count;             // Total number of packages processed
 
diff --git a/src/parsehdr.c b/src/parsehdr.c
index 2775bf3..97bb01e 100644
--- a/src/parsehdr.c
+++ b/src/parsehdr.c
@@ -253,7 +253,8 @@ cr_package_from_header(Header hdr,
         assert(x == dir_count);
     }
 
-    if (headerGet(hdr, RPMTAG_FILENAMES,  full_filenames,  flags) &&
+    if (!(hdrrflags & CR_HDRR_SKIPFILES) &&
+        headerGet(hdr, RPMTAG_FILENAMES,  full_filenames,  flags) &&
         headerGet(hdr, RPMTAG_DIRINDEXES, indexes,  flags) &&
         headerGet(hdr, RPMTAG_BASENAMES,  filenames, flags) &&
         headerGet(hdr, RPMTAG_FILEFLAGS,  fileflags, flags) &&
diff --git a/src/parsehdr.h b/src/parsehdr.h
index 032acca..e7a4a4a 100644
--- a/src/parsehdr.h
+++ b/src/parsehdr.h
@@ -39,6 +39,7 @@ typedef enum {
     CR_HDRR_NONE            = (1 << 0),
     CR_HDRR_LOADHDRID       = (1 << 1), /*!< Load hdrid */
     CR_HDRR_LOADSIGNATURES  = (1 << 2), /*!< Load siggpg and siggpg */
+    CR_HDRR_SKIPFILES       = (1 << 3), /*!< Skip filelists */
 } cr_HeaderReadingFlags;
 
 /** Read data from header and return filled cr_Package structure.