Blame SOURCES/0003-Correct-pkg-count-in-headers-if-there-were-invalid-pkgs-RhBug1596211.patch

6cec19
From dfe7218f07ffa70b73c51c71b0f051be926b6d92 Mon Sep 17 00:00:00 2001
6cec19
From: Aleš Matěj <amatej@redhat.com>
6cec19
Date: Tue, 14 May 2019 16:48:13 +0200
6cec19
Subject: [PATCH] Correct pkg count in headers if there were invalid pkgs (RhBug:1596211)
6cec19
6cec19
---
6cec19
 src/createrepo_c.c  | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
6cec19
 src/dumper_thread.c |   4 +++-
6cec19
 src/dumper_thread.h |   3 ++-
6cec19
 src/threads.c       |  23 +++++++++++++++++++++++
6cec19
 src/threads.h       |   5 +++++
6cec19
 src/xml_file.c      | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6cec19
 src/xml_file.h      |  15 +++++++++++++++
6cec19
 7 files changed, 260 insertions(+), 16 deletions(-)
6cec19
6cec19
diff --git a/src/createrepo_c.c b/src/createrepo_c.c
6cec19
index e16ae34..67c2752 100644
6cec19
--- a/src/createrepo_c.c
6cec19
+++ b/src/createrepo_c.c
6cec19
@@ -124,7 +124,7 @@ fill_pool(GThreadPool *pool,
6cec19
           struct CmdOptions *cmd_options,
6cec19
           GSList **current_pkglist,
6cec19
           FILE *output_pkg_list,
6cec19
-          long *package_count,
6cec19
+          long *task_count,
6cec19
           int  media_id)
6cec19
 {
6cec19
     GQueue queue = G_QUEUE_INIT;
6cec19
@@ -259,13 +259,13 @@ fill_pool(GThreadPool *pool,
6cec19
 
6cec19
     // Push sorted tasks into the thread pool
6cec19
     while ((task = g_queue_pop_head(&queue)) != NULL) {
6cec19
-        task->id = *package_count;
6cec19
+        task->id = *task_count;
6cec19
         task->media_id = media_id;
6cec19
         g_thread_pool_push(pool, task, NULL);
6cec19
-        ++*package_count;
6cec19
+        ++*task_count;
6cec19
     }
6cec19
 
6cec19
-    return *package_count;
6cec19
+    return *task_count;
6cec19
 }
6cec19
 
6cec19
 
6cec19
@@ -321,6 +321,27 @@ prepare_cache_dir(struct CmdOptions *cmd_options,
6cec19
     return TRUE;
6cec19
 }
6cec19
 
6cec19
+/** Check if task finished without error, if yes
6cec19
+ *  use content stats of the new file
6cec19
+ *
6cec19
+ * @param task          Rewrite pkg count task
6cec19
+ * @param filename      Name of file with wrong package count
6cec19
+ * @param exit_val      If errors occured set createrepo_c exit value
6cec19
+ * @param content_stat  Content stats for filename
6cec19
+ *
6cec19
+ */
6cec19
+static void
6cec19
+error_check_and_set_content_stat(cr_CompressionTask *task, char *filename, int *exit_val, cr_ContentStat **content_stat){
6cec19
+    if (task->err) {
6cec19
+        g_critical("Cannot rewrite pkg count in %s: %s",
6cec19
+                   filename, task->err->message);
6cec19
+        *exit_val = 2;
6cec19
+    }else{
6cec19
+        cr_contentstat_free(*content_stat, NULL);
6cec19
+        *content_stat = task->stat;
6cec19
+        task->stat = NULL;
6cec19
+    }
6cec19
+}
6cec19
 
6cec19
 int
6cec19
 main(int argc, char **argv)
6cec19
@@ -478,7 +499,7 @@ main(int argc, char **argv)
6cec19
                                           NULL);
6cec19
     g_debug("Thread pool ready");
6cec19
 
6cec19
-    long package_count = 0;
6cec19
+    long task_count = 0;
6cec19
     GSList *current_pkglist = NULL;
6cec19
     /* ^^^ List with basenames of files which will be processed */
6cec19
 
6cec19
@@ -490,26 +511,26 @@ main(int argc, char **argv)
6cec19
                   cmd_options,
6cec19
                   &current_pkglist,
6cec19
                   output_pkg_list,
6cec19
-                  &package_count,
6cec19
+                  &task_count,
6cec19
                   media_id);
6cec19
         g_free(tmp_in_dir);
6cec19
     }
6cec19
 
6cec19
-    g_debug("Package count: %ld", package_count);
6cec19
-    g_message("Directory walk done - %ld packages", package_count);
6cec19
+    g_debug("Package count: %ld", task_count);
6cec19
+    g_message("Directory walk done - %ld packages", task_count);
6cec19
 
6cec19
     if (output_pkg_list)
6cec19
         fclose(output_pkg_list);
6cec19
 
6cec19
 
6cec19
     // Load old metadata if --update
6cec19
     cr_Metadata *old_metadata = NULL;
6cec19
     struct cr_MetadataLocation *old_metadata_location = NULL;
6cec19
 
6cec19
-    if (!package_count)
6cec19
+    if (!task_count)
6cec19
         g_debug("No packages found - skipping metadata loading");
6cec19
 
6cec19
-    if (package_count && cmd_options->update) {
6cec19
+    if (task_count && cmd_options->update) {
6cec19
         int ret;
6cec19
         old_metadata = cr_metadata_new(CR_HT_KEY_FILENAME, 1, current_pkglist);
6cec19
         cr_metadata_set_dupaction(old_metadata, CR_HT_DUPACT_REMOVEALL);
6cec19
@@ -741,9 +762,9 @@ main(int argc, char **argv)
6cec19
 
6cec19
     // Set number of packages
6cec19
     g_debug("Setting number of packages");
6cec19
-    cr_xmlfile_set_num_of_pkgs(pri_cr_file, package_count, NULL);
6cec19
-    cr_xmlfile_set_num_of_pkgs(fil_cr_file, package_count, NULL);
6cec19
-    cr_xmlfile_set_num_of_pkgs(oth_cr_file, package_count, NULL);
6cec19
+    cr_xmlfile_set_num_of_pkgs(pri_cr_file, task_count, NULL);
6cec19
+    cr_xmlfile_set_num_of_pkgs(fil_cr_file, task_count, NULL);
6cec19
+    cr_xmlfile_set_num_of_pkgs(oth_cr_file, task_count, NULL);
6cec19
 
6cec19
     // Open sqlite databases
6cec19
     gchar *pri_db_filename = NULL;
6cec19
@@ -832,7 +853,8 @@ main(int argc, char **argv)
6cec19
     user_data.checksum_cachedir = cmd_options->checksum_cachedir;
6cec19
     user_data.skip_symlinks     = cmd_options->skip_symlinks;
6cec19
     user_data.repodir_name_len  = strlen(in_dir);
6cec19
-    user_data.package_count     = package_count;
6cec19
+    user_data.task_count        = task_count;
6cec19
+    user_data.package_count     = 0;
6cec19
     user_data.skip_stat         = cmd_options->skip_stat;
6cec19
     user_data.old_metadata      = old_metadata;
6cec19
     user_data.mutex_pri         = g_mutex_new();
6cec19
@@ -876,6 +898,59 @@ main(int argc, char **argv)
6cec19
     cr_xmlfile_close(fil_cr_file, NULL);
6cec19
     cr_xmlfile_close(oth_cr_file, NULL);
6cec19
 
6cec19
+
6cec19
+    /* At the time of writing xml metadata headers we haven't yet parsed all
6cec19
+     * the packages and we don't know whether there were some invalid ones,
6cec19
+     * therefore we write the task count into the headers instead of the actual package count.
6cec19
+     * If there actually were some invalid packages we have to correct this value
6cec19
+     * that unfortunately means we have to decompress metadata files change package
6cec19
+     * count value and compress them again.
6cec19
+     */
6cec19
+    if (user_data.package_count != user_data.task_count){
6cec19
+        g_message("Warning: There were some invalid packages: we have to recompress other, filelists and primary xml metadata files in order to have correct package counts");
6cec19
+
6cec19
+        GThreadPool *rewrite_pkg_count_pool = g_thread_pool_new(cr_rewrite_pkg_count_thread,
6cec19
+                                                                &user_data, 3, FALSE, NULL);
6cec19
+
6cec19
+        cr_CompressionTask *pri_rewrite_pkg_count_task;
6cec19
+        cr_CompressionTask *fil_rewrite_pkg_count_task;
6cec19
+        cr_CompressionTask *oth_rewrite_pkg_count_task;
6cec19
+
6cec19
+        pri_rewrite_pkg_count_task = cr_compressiontask_new(pri_xml_filename,
6cec19
+                                                            NULL,
6cec19
+                                                            xml_compression,
6cec19
+                                                            cmd_options->repomd_checksum_type,
6cec19
+                                                            1,
6cec19
+                                                            &tmp_err);
6cec19
+        g_thread_pool_push(rewrite_pkg_count_pool, pri_rewrite_pkg_count_task, NULL);
6cec19
+
6cec19
+        fil_rewrite_pkg_count_task = cr_compressiontask_new(fil_xml_filename,
6cec19
+                                                            NULL,
6cec19
+                                                            xml_compression,
6cec19
+                                                            cmd_options->repomd_checksum_type,
6cec19
+                                                            1,
6cec19
+                                                            &tmp_err);
6cec19
+        g_thread_pool_push(rewrite_pkg_count_pool, fil_rewrite_pkg_count_task, NULL);
6cec19
+
6cec19
+        oth_rewrite_pkg_count_task = cr_compressiontask_new(oth_xml_filename,
6cec19
+                                                            NULL,
6cec19
+                                                            xml_compression,
6cec19
+                                                            cmd_options->repomd_checksum_type,
6cec19
+                                                            1,
6cec19
+                                                            &tmp_err);
6cec19
+        g_thread_pool_push(rewrite_pkg_count_pool, oth_rewrite_pkg_count_task, NULL);
6cec19
+
6cec19
+        g_thread_pool_free(rewrite_pkg_count_pool, FALSE, TRUE);
6cec19
+
6cec19
+        error_check_and_set_content_stat(pri_rewrite_pkg_count_task, pri_xml_filename, &exit_val, &pri_stat);
6cec19
+        error_check_and_set_content_stat(fil_rewrite_pkg_count_task, fil_xml_filename, &exit_val, &fil_stat);
6cec19
+        error_check_and_set_content_stat(oth_rewrite_pkg_count_task, oth_xml_filename, &exit_val, &oth_stat);
6cec19
+
6cec19
+        cr_compressiontask_free(pri_rewrite_pkg_count_task, NULL);
6cec19
+        cr_compressiontask_free(fil_rewrite_pkg_count_task, NULL);
6cec19
+        cr_compressiontask_free(oth_rewrite_pkg_count_task, NULL);
6cec19
+    }
6cec19
+
6cec19
     g_queue_free(user_data.buffer);
6cec19
     g_mutex_free(user_data.mutex_buffer);
6cec19
     g_cond_free(user_data.cond_pri);
6cec19
diff --git a/src/dumper_thread.c b/src/dumper_thread.c
6cec19
index fbaa5be..e282f96 100644
6cec19
--- a/src/dumper_thread.c
6cec19
+++ b/src/dumper_thread.c
6cec19
@@ -74,6 +74,8 @@ write_pkg(long id,
6cec19
     g_mutex_lock(udata->mutex_pri);
6cec19
     while (udata->id_pri != id)
6cec19
         g_cond_wait (udata->cond_pri, udata->mutex_pri);
6cec19
+
6cec19
+    udata->package_count++;
6cec19
     ++udata->id_pri;
6cec19
     cr_xmlfile_add_chunk(udata->pri_f, (const char *) res.primary, &tmp_err);
6cec19
     if (tmp_err) {
6cec19
@@ -476,7 +478,7 @@ cr_dumper_thread(gpointer data, gpointer user_data)
6cec19
 
6cec19
     if (g_queue_get_length(udata->buffer) < MAX_TASK_BUFFER_LEN
6cec19
         && udata->id_pri != task->id
6cec19
-        && udata->package_count > (task->id + 1))
6cec19
+        && udata->task_count > (task->id + 1))
6cec19
     {
6cec19
         // If:
6cec19
         //  * this isn't our turn
6cec19
diff --git a/src/dumper_thread.h b/src/dumper_thread.h
6cec19
index ed21053..4e18869 100644
6cec19
--- a/src/dumper_thread.h
6cec19
+++ b/src/dumper_thread.h
6cec19
@@ -61,7 +61,8 @@ struct UserData {
6cec19
     cr_ChecksumType checksum_type;  // Constant representing selected checksum
6cec19
     const char *checksum_cachedir;  // Dir with cached checksums
6cec19
     gboolean skip_symlinks;         // Skip symlinks
6cec19
-    long package_count;             // Total number of packages to process
6cec19
+    long task_count;                // Total number of task to process
6cec19
+    long package_count;             // Total number of packages processed
6cec19
 
6cec19
     // Update stuff
6cec19
     gboolean skip_stat;             // Skip stat() while updating
6cec19
diff --git a/src/threads.c b/src/threads.c
6cec19
index aee07d1..844e900 100644
6cec19
--- a/src/threads.c
6cec19
+++ b/src/threads.c
6cec19
@@ -21,6 +21,7 @@
6cec19
 #include "threads.h"
6cec19
 #include "error.h"
6cec19
 #include "misc.h"
6cec19
+#include "dumper_thread.h"
6cec19
 
6cec19
 #define ERR_DOMAIN      CREATEREPO_C_ERROR
6cec19
 
6cec19
@@ -108,6 +109,28 @@ cr_compressing_thread(gpointer data, G_GNUC_UNUSED gpointer user_data)
6cec19
     }
6cec19
 }
6cec19
 
6cec19
+void
6cec19
+cr_rewrite_pkg_count_thread(gpointer data, gpointer user_data)
6cec19
+{
6cec19
+    cr_CompressionTask *task = data;
6cec19
+    struct UserData *ud = user_data;
6cec19
+    GError *tmp_err = NULL;
6cec19
+
6cec19
+    assert(task);
6cec19
+
6cec19
+    cr_rewrite_header_package_count(task->src,
6cec19
+                                    task->type,
6cec19
+                                    ud->package_count,
6cec19
+                                    ud->task_count,
6cec19
+                                    task->stat,
6cec19
+                                    &tmp_err);
6cec19
+
6cec19
+    if (tmp_err) {
6cec19
+        // Error encountered
6cec19
+        g_propagate_error(&task->err, tmp_err);
6cec19
+    }
6cec19
+}
6cec19
+
6cec19
 /** Parallel Repomd Record Fill */
6cec19
 
6cec19
 cr_RepomdRecordFillTask *
6cec19
diff --git a/src/threads.h b/src/threads.h
6cec19
index 2d554cd..19ba917 100644
6cec19
--- a/src/threads.h
6cec19
+++ b/src/threads.h
6cec19
@@ -150,6 +150,11 @@ cr_repomdrecordfilltask_free(cr_RepomdRecordFillTask *task, GError **err);
6cec19
 void
6cec19
 cr_repomd_record_fill_thread(gpointer data, gpointer user_data);
6cec19
 
6cec19
+/** Function for GThread Pool.
6cec19
+ */
6cec19
+void
6cec19
+cr_rewrite_pkg_count_thread(gpointer data, gpointer user_data);
6cec19
+
6cec19
 /** @} */
6cec19
 
6cec19
 #ifdef __cplusplus
6cec19
diff --git a/src/xml_file.c b/src/xml_file.c
6cec19
index 65fb945..1d670ae 100644
6cec19
--- a/src/xml_file.c
6cec19
+++ b/src/xml_file.c
6cec19
@@ -18,8 +18,10 @@
6cec19
  */
6cec19
 
6cec19
 #include <glib.h>
6cec19
+#include <glib/gstdio.h>
6cec19
 #include <assert.h>
6cec19
 #include "xml_file.h"
6cec19
+#include <errno.h>
6cec19
 #include "error.h"
6cec19
 #include "xml_dump.h"
6cec19
 #include "compression_wrapper.h"
6cec19
@@ -40,6 +42,9 @@
6cec19
 #define XML_PRESTODELTA_HEADER  XML_HEADER"<prestodelta>\n"
6cec19
 #define XML_UPDATEINFO_HEADER   XML_HEADER"<updates>\n"
6cec19
 
6cec19
+#define XML_MAX_HEADER_SIZE     300
6cec19
+#define XML_RECOMPRESS_BUFFER_SIZE   8192
6cec19
+
6cec19
 #define XML_PRIMARY_FOOTER      "</metadata>"
6cec19
 #define XML_FILELISTS_FOOTER    "</filelists>"
6cec19
 #define XML_OTHER_FOOTER        "</otherdata>"
6cec19
@@ -317,3 +322,121 @@ cr_xmlfile_close(cr_XmlFile *f, GError **err)
6cec19
 
6cec19
     return CRE_OK;
6cec19
 }
6cec19
+
6cec19
+static int
6cec19
+write_modified_header(int task_count,
6cec19
+                      int package_count,
6cec19
+                      cr_XmlFile *cr_file,
6cec19
+                      gchar *header_buf,
6cec19
+                      int header_len,
6cec19
+                      GError **err)
6cec19
+{
6cec19
+    GError *tmp_err = NULL;
6cec19
+    gchar *package_count_string;
6cec19
+    gchar *task_count_string;
6cec19
+    int bytes_written = 0;
6cec19
+    int package_count_string_len = rasprintf(&package_count_string, "packages=\"%i\"", package_count);
6cec19
+    int task_count_string_len = rasprintf(&task_count_string, "packages=\"%i\"", task_count);
6cec19
+
6cec19
+    gchar *pointer_to_pkgs = strstr(header_buf, task_count_string);
6cec19
+    if (!pointer_to_pkgs){
6cec19
+        g_free(package_count_string);
6cec19
+        g_free(task_count_string);
6cec19
+        return 0;
6cec19
+    }
6cec19
+    gchar *pointer_to_pkgs_end = pointer_to_pkgs + task_count_string_len;
6cec19
+
6cec19
+    bytes_written += cr_write(cr_file->f, header_buf, pointer_to_pkgs - header_buf, &tmp_err);
6cec19
+    if (!tmp_err)
6cec19
+        bytes_written += cr_write(cr_file->f, package_count_string, package_count_string_len, &tmp_err);
6cec19
+    if (!tmp_err)
6cec19
+        bytes_written += cr_write(cr_file->f, pointer_to_pkgs_end, header_len - (pointer_to_pkgs_end - header_buf), &tmp_err);
6cec19
+    if (tmp_err) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while writing header part:");
6cec19
+        g_free(package_count_string);
6cec19
+        g_free(task_count_string);
6cec19
+        return 0;
6cec19
+    }
6cec19
+    g_free(package_count_string);
6cec19
+    g_free(task_count_string);
6cec19
+    return bytes_written;
6cec19
+}
6cec19
+
6cec19
+void
6cec19
+cr_rewrite_header_package_count(gchar *original_filename,
6cec19
+                                cr_CompressionType xml_compression,
6cec19
+                                int package_count,
6cec19
+                                int task_count,
6cec19
+                                cr_ContentStat *file_stat,
6cec19
+                                GError **err)
6cec19
+{
6cec19
+    GError *tmp_err = NULL;
6cec19
+    CR_FILE *original_file = cr_open(original_filename, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
6cec19
+    if (tmp_err) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while reopening for reading:");
6cec19
+        return;
6cec19
+    }
6cec19
+
6cec19
+    gchar *tmp_xml_filename = g_strconcat(original_filename, ".tmp", NULL);
6cec19
+    cr_XmlFile *new_file = cr_xmlfile_sopen_primary(tmp_xml_filename,
6cec19
+                                                    xml_compression,
6cec19
+                                                    file_stat,
6cec19
+                                                    &tmp_err);
6cec19
+    if (tmp_err) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while opening for writing:");
6cec19
+        cr_close(original_file, NULL);
6cec19
+        g_free(tmp_xml_filename);
6cec19
+        return;
6cec19
+    }
6cec19
+
6cec19
+    gchar header_buf[XML_MAX_HEADER_SIZE];
6cec19
+    int len_read = cr_read(original_file, header_buf, XML_MAX_HEADER_SIZE, &tmp_err);
6cec19
+    if (!tmp_err)
6cec19
+        write_modified_header(task_count, package_count, new_file, header_buf, len_read, &tmp_err);
6cec19
+    if (tmp_err) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while recompressing:");
6cec19
+        cr_xmlfile_close(new_file, NULL);
6cec19
+        cr_close(original_file, NULL);
6cec19
+        g_free(tmp_xml_filename);
6cec19
+        return;
6cec19
+    }
6cec19
+    //Copy the rest of the file
6cec19
+    gchar copy_buf[XML_RECOMPRESS_BUFFER_SIZE];
6cec19
+    while(len_read)
6cec19
+    {
6cec19
+        len_read = cr_read(original_file, copy_buf, XML_RECOMPRESS_BUFFER_SIZE, &tmp_err);
6cec19
+        if (!tmp_err)
6cec19
+            cr_write(new_file->f, copy_buf, len_read, &tmp_err);
6cec19
+        if (tmp_err) {
6cec19
+            g_propagate_prefixed_error(err, tmp_err, "Error encountered while recompressing:");
6cec19
+            cr_xmlfile_close(new_file, NULL);
6cec19
+            cr_close(original_file, NULL);
6cec19
+            g_free(tmp_xml_filename);
6cec19
+            return;
6cec19
+        }
6cec19
+    }
6cec19
+
6cec19
+    new_file->header = 1;
6cec19
+    new_file->footer = 1;
6cec19
+
6cec19
+    cr_xmlfile_close(new_file, &tmp_err);
6cec19
+    if (tmp_err) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while writing:");
6cec19
+        cr_close(original_file, NULL);
6cec19
+        g_free(tmp_xml_filename);
6cec19
+        return;
6cec19
+    }
6cec19
+    cr_close(original_file, &tmp_err);
6cec19
+    if (tmp_err) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while writing:");
6cec19
+        g_free(tmp_xml_filename);
6cec19
+        return;
6cec19
+    }
6cec19
+
6cec19
+    if (g_rename(tmp_xml_filename, original_filename) == -1) {
6cec19
+        g_propagate_prefixed_error(err, tmp_err, "Error encountered while renaming:");
6cec19
+        g_free(tmp_xml_filename);
6cec19
+        return;
6cec19
+    }
6cec19
+    g_free(tmp_xml_filename);
6cec19
+}
6cec19
diff --git a/src/xml_file.h b/src/xml_file.h
6cec19
index 96ef5e3..6ac4c97 100644
6cec19
--- a/src/xml_file.h
6cec19
+++ b/src/xml_file.h
6cec19
@@ -221,6 +221,21 @@ int cr_xmlfile_add_chunk(cr_XmlFile *f, const char *chunk, GError **err);
6cec19
  */
6cec19
 int cr_xmlfile_close(cr_XmlFile *f, GError **err);
6cec19
 
6cec19
+/** Rewrite package count field in repodata header in xml file.
6cec19
+ * In order to do this we have to decompress and after the change
6cec19
+ * compress the whole file again, so entirely new file is created.
6cec19
+ * @param original_filename     Current file with wrong value in header
6cec19
+ * @param package_count         Actual package count (desired value in header)
6cec19
+ * @param task_count            Task count (current value in header)
6cec19
+ * @param file_stat             cr_ContentStat for stats of the new file, it will be modified
6cec19
+ * @param err                   **GError
6cec19
+ */
6cec19
+void cr_rewrite_header_package_count(gchar *original_filename,
6cec19
+                                     cr_CompressionType xml_compression,
6cec19
+                                     int package_count,
6cec19
+                                     int task_count,
6cec19
+                                     cr_ContentStat *file_stat,
6cec19
+                                     GError **err);
6cec19
 
6cec19
 /** @} */
6cec19
 
6cec19
--
6cec19
libgit2 0.27.8
6cec19