Blob Blame History Raw
commit 2f832212a0ba5fc8d2ac448b0053ae34b134352d
Author: Michal Domonkos <mdomonko@redhat.com>
Date:   Fri Nov 25 18:39:35 2016 +0100

    createrepo: allow xz and bz2 for xml files. BZ 1125437
    
    Currently, we don't honor --compress-type for the
    primary/filelists/other.xml files and always force gz because libxml2
    (used by yum-metadata-parser) didn't use to support anything other than
    gz.
    
    This has been worked around in yum since then by decompressing the files
    first before passing them to y-m-p (commit cfe43e8).  If we do the same
    in createrepo (which uses y-m-p to generate the sqlite files), we can
    enable these additional compress types for primary/filelists/other.xml
    -- and that's what this commit does.
    
    Note that libxml2 also natively supports xz in addition to gz so we only
    need to do the decompression for bz2.

diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 9e89afc..6d56ff8 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -447,11 +447,10 @@ class MetaDataGenerator:
 
     def _setupPrimary(self):
         # setup the primary metadata file
-        # FIXME - make this be  conf.compress_type once y-m-p is fixed
-        fpz = self.conf.primaryfile + '.' + 'gz'
+        fpz = self.conf.primaryfile + '.' + self.conf.compress_type
         primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
                                        fpz)
-        fo = compressOpen(primaryfilepath, 'w', 'gz')
+        fo = compressOpen(primaryfilepath, 'w', self.conf.compress_type)
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \
             ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
@@ -460,11 +459,10 @@ class MetaDataGenerator:
 
     def _setupFilelists(self):
         # setup the filelist file
-        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
-        fpz = self.conf.filelistsfile + '.' + 'gz'
+        fpz = self.conf.filelistsfile + '.' + self.conf.compress_type
         filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
                                     fpz)
-        fo = compressOpen(filelistpath, 'w', 'gz')
+        fo = compressOpen(filelistpath, 'w', self.conf.compress_type)
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \
                  ' packages="%s">' % self.pkgcount)
@@ -472,11 +470,10 @@ class MetaDataGenerator:
 
     def _setupOther(self):
         # setup the other file
-        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
-        fpz = self.conf.otherfile + '.' + 'gz'
+        fpz = self.conf.otherfile + '.' + self.conf.compress_type
         otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
                                      fpz)
-        fo = compressOpen(otherfilepath, 'w', 'gz')
+        fo = compressOpen(otherfilepath, 'w', self.conf.compress_type)
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \
                  ' packages="%s">' %
@@ -1217,21 +1214,34 @@ class MetaDataGenerator:
             rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)
 
         for (rpm_file, ftype) in workfiles:
-            # when we fix y-m-p and non-gzipped xml files - then we can make this just add
-            # self.conf.compress_type
-            if ftype in ('other', 'filelists', 'primary'):
-                rpm_file = rpm_file + '.' + 'gz'
-            elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression:
+            unpath = os.path.join(repopath, rpm_file)
+            if (ftype in ('other', 'filelists', 'primary')
+                    or (rpm_file.find('.') != -1 and rpm_file.split('.')[-1]
+                        not in _available_compression)):
                 rpm_file = rpm_file + '.' + self.conf.compress_type
             complete_path = os.path.join(repopath, rpm_file)
             zfo = compressOpen(complete_path)
+            dfo = None
+            if (self.conf.compress_type == 'bz2' and self.conf.database and
+                    ftype in ('other', 'filelists', 'primary')):
+                # yum-metadata-parser doesn't understand bz2 so let's write the
+                # decompressed data to a file and pass that via gen_func
+                # instead of the compressed version
+                dfo = open(unpath, 'w')
             # This is misc.checksum() done locally so we can get the size too.
             data = misc.Checksums([sumtype])
-            while data.read(zfo, 2**16):
-                pass
+            while True:
+                chunk = data.read(zfo, 2**16)
+                if not chunk:
+                    break
+                if dfo is not None:
+                    dfo.write(chunk)
             uncsum = data.hexdigest(sumtype)
             unsize = len(data)
             zfo.close()
+            if dfo is not None:
+                dfo.close()
+
             csum = misc.checksum(sumtype, complete_path)
             timestamp = os.stat(complete_path)[8]
 
@@ -1244,21 +1254,29 @@ class MetaDataGenerator:
                         self.callback.log("Starting %s db creation: %s" % (ftype,
                                                                   time.ctime()))
 
+                gen_func = None
                 if ftype == 'primary':
-                    #FIXME - in theory some sort of try/except  here
-                    # TypeError appears to be raised, sometimes :(
-                    rp.getPrimary(complete_path, csum)
-
+                    gen_func = rp.getPrimary
                 elif ftype == 'filelists':
-                    #FIXME and here
-                    rp.getFilelists(complete_path, csum)
-
+                    gen_func = rp.getFilelists
                 elif ftype == 'other':
-                    #FIXME and here
-                    rp.getOtherdata(complete_path, csum)
+                    gen_func = rp.getOtherdata
+                if gen_func is not None:
+                    if dfo is None:
+                        #FIXME - in theory some sort of try/except  here
+                        # TypeError appears to be raised, sometimes :(
+                        gen_func(complete_path, csum)
+                    else:
+                        #FIXME and here
+                        gen_func(unpath, uncsum)
+                        os.unlink(unpath)
 
                 if ftype in ['primary', 'filelists', 'other']:
-                    tmp_result_name = '%s.xml.gz.sqlite' % ftype
+                    if dfo is None:
+                        compress_ext = '.%s' % self.conf.compress_type
+                    else:
+                        compress_ext = ''
+                    tmp_result_name = '%s.xml%s.sqlite' % (ftype, compress_ext)
                     tmp_result_path = os.path.join(repopath, tmp_result_name)
                     good_name = '%s.sqlite' % ftype
                     resultpath = os.path.join(repopath, good_name)
@@ -1323,13 +1341,8 @@ class MetaDataGenerator:
             data.openchecksum = (sumtype, uncsum)
 
             if self.conf.unique_md_filenames:
-                if ftype in ('primary', 'filelists', 'other'):
-                    compress = 'gz'
-                else:
-                    compress = self.conf.compress_type
-                
                 main_name = '.'.join(rpm_file.split('.')[:-1])
-                res_file = '%s-%s.%s' % (csum, main_name, compress)
+                res_file = '%s-%s.%s' % (csum, main_name, self.conf.compress_type)
                 orig_file = os.path.join(repopath, rpm_file)
                 dest_file = os.path.join(repopath, res_file)
                 os.rename(orig_file, dest_file)