commit 2f832212a0ba5fc8d2ac448b0053ae34b134352d Author: Michal Domonkos Date: Fri Nov 25 18:39:35 2016 +0100 createrepo: allow xz and bz2 for xml files. BZ 1125437 Currently, we don't honor --compress-type for the primary/filelists/other.xml files and always force gz because libxml2 (used by yum-metadata-parser) didn't use to support anything other than gz. This has been worked around in yum since then by decompressing the files first before passing them to y-m-p (commit cfe43e8). If we do the same in createrepo (which uses y-m-p to generate the sqlite files), we can enable these additional compress types for primary/filelists/other.xml -- and that's what this commit does. Note that libxml2 also natively supports xz in addition to gz so we only need to do the decompression for bz2. diff --git a/createrepo/__init__.py b/createrepo/__init__.py index 9e89afc..6d56ff8 100644 --- a/createrepo/__init__.py +++ b/createrepo/__init__.py @@ -447,11 +447,10 @@ class MetaDataGenerator: def _setupPrimary(self): # setup the primary metadata file - # FIXME - make this be conf.compress_type once y-m-p is fixed - fpz = self.conf.primaryfile + '.' + 'gz' + fpz = self.conf.primaryfile + '.' + self.conf.compress_type primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, fpz) - fo = compressOpen(primaryfilepath, 'w', 'gz') + fo = compressOpen(primaryfilepath, 'w', self.conf.compress_type) fo.write('\n') fo.write('' % @@ -460,11 +459,10 @@ class MetaDataGenerator: def _setupFilelists(self): # setup the filelist file - # FIXME - make this be conf.compress_type once y-m-p is fixed - fpz = self.conf.filelistsfile + '.' + 'gz' + fpz = self.conf.filelistsfile + '.' + self.conf.compress_type filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, fpz) - fo = compressOpen(filelistpath, 'w', 'gz') + fo = compressOpen(filelistpath, 'w', self.conf.compress_type) fo.write('\n') fo.write('' % self.pkgcount) @@ -472,11 +470,10 @@ class MetaDataGenerator: def _setupOther(self): # setup the other file - # FIXME - make this be conf.compress_type once y-m-p is fixed - fpz = self.conf.otherfile + '.' + 'gz' + fpz = self.conf.otherfile + '.' + self.conf.compress_type otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, fpz) - fo = compressOpen(otherfilepath, 'w', 'gz') + fo = compressOpen(otherfilepath, 'w', self.conf.compress_type) fo.write('\n') fo.write('' % @@ -1217,21 +1214,34 @@ class MetaDataGenerator: rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None) for (rpm_file, ftype) in workfiles: - # when we fix y-m-p and non-gzipped xml files - then we can make this just add - # self.conf.compress_type - if ftype in ('other', 'filelists', 'primary'): - rpm_file = rpm_file + '.' + 'gz' - elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression: + unpath = os.path.join(repopath, rpm_file) + if (ftype in ('other', 'filelists', 'primary') + or (rpm_file.find('.') != -1 and rpm_file.split('.')[-1] + not in _available_compression)): rpm_file = rpm_file + '.' + self.conf.compress_type complete_path = os.path.join(repopath, rpm_file) zfo = compressOpen(complete_path) + dfo = None + if (self.conf.compress_type == 'bz2' and self.conf.database and + ftype in ('other', 'filelists', 'primary')): + # yum-metadata-parser doesn't understand bz2 so let's write the + # decompressed data to a file and pass that via gen_func + # instead of the compressed version + dfo = open(unpath, 'w') # This is misc.checksum() done locally so we can get the size too. data = misc.Checksums([sumtype]) - while data.read(zfo, 2**16): - pass + while True: + chunk = data.read(zfo, 2**16) + if not chunk: + break + if dfo is not None: + dfo.write(chunk) uncsum = data.hexdigest(sumtype) unsize = len(data) zfo.close() + if dfo is not None: + dfo.close() + csum = misc.checksum(sumtype, complete_path) timestamp = os.stat(complete_path)[8] @@ -1244,21 +1254,29 @@ class MetaDataGenerator: self.callback.log("Starting %s db creation: %s" % (ftype, time.ctime())) + gen_func = None if ftype == 'primary': - #FIXME - in theory some sort of try/except here - # TypeError appears to be raised, sometimes :( - rp.getPrimary(complete_path, csum) - + gen_func = rp.getPrimary elif ftype == 'filelists': - #FIXME and here - rp.getFilelists(complete_path, csum) - + gen_func = rp.getFilelists elif ftype == 'other': - #FIXME and here - rp.getOtherdata(complete_path, csum) + gen_func = rp.getOtherdata + if gen_func is not None: + if dfo is None: + #FIXME - in theory some sort of try/except here + # TypeError appears to be raised, sometimes :( + gen_func(complete_path, csum) + else: + #FIXME and here + gen_func(unpath, uncsum) + os.unlink(unpath) if ftype in ['primary', 'filelists', 'other']: - tmp_result_name = '%s.xml.gz.sqlite' % ftype + if dfo is None: + compress_ext = '.%s' % self.conf.compress_type + else: + compress_ext = '' + tmp_result_name = '%s.xml%s.sqlite' % (ftype, compress_ext) tmp_result_path = os.path.join(repopath, tmp_result_name) good_name = '%s.sqlite' % ftype resultpath = os.path.join(repopath, good_name) @@ -1323,13 +1341,8 @@ class MetaDataGenerator: data.openchecksum = (sumtype, uncsum) if self.conf.unique_md_filenames: - if ftype in ('primary', 'filelists', 'other'): - compress = 'gz' - else: - compress = self.conf.compress_type - main_name = '.'.join(rpm_file.split('.')[:-1]) - res_file = '%s-%s.%s' % (csum, main_name, compress) + res_file = '%s-%s.%s' % (csum, main_name, self.conf.compress_type) orig_file = os.path.join(repopath, rpm_file) dest_file = os.path.join(repopath, res_file) os.rename(orig_file, dest_file)