diff --git a/.createrepo.metadata b/.createrepo.metadata new file mode 100644 index 0000000..d47d83a --- /dev/null +++ b/.createrepo.metadata @@ -0,0 +1 @@ +e4ff7aac85eb2c8d077910e6921c621b4eae256e SOURCES/createrepo-0.9.9.tar.gz diff --git a/README.md b/README.md deleted file mode 100644 index 0e7897f..0000000 --- a/README.md +++ /dev/null @@ -1,5 +0,0 @@ -The master branch has no content - -Look at the c7 branch if you are working with CentOS-7, or the c4/c5/c6 branch for CentOS-4, 5 or 6 - -If you find this file in a distro specific branch, it means that no content has been checked in yet diff --git a/SOURCES/createrepo-head.patch b/SOURCES/createrepo-head.patch new file mode 100644 index 0000000..f478085 --- /dev/null +++ b/SOURCES/createrepo-head.patch @@ -0,0 +1,1990 @@ +diff --git a/Makefile b/Makefile +index 60bb9db..0b5738b 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,4 +1,5 @@ + PKGNAME = createrepo ++ALIASES = mergerepo modifyrepo genpkgmetadata.py mergerepo.py modifyrepo.py + VERSION=$(shell awk '/Version:/ { print $$2 }' ${PKGNAME}.spec) + RELEASE=$(shell awk '/Release:/ { print $$2 }' ${PKGNAME}.spec) + CVSTAG=createrepo-$(subst .,_,$(VERSION)-$(RELEASE)) +@@ -26,6 +27,8 @@ docdir = + includedir = ${prefix}/include + oldincludedir = /usr/include + mandir = ${prefix}/share/man ++compdir = $(shell pkg-config --variable=completionsdir bash-completion) ++compdir := $(or $(compdir), "/etc/bash_completion.d") + + pkgdatadir = $(datadir)/$(PKGNAME) + pkglibdir = $(libdir)/$(PKGNAME) +@@ -33,7 +36,7 @@ pkgincludedir = $(includedir)/$(PKGNAME) + top_builddir = + + # all dirs +-DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(sysconfdir)/bash_completion.d \ ++DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(compdir) \ + $(DESTDIR)$(pkgdatadir) $(DESTDIR)$(mandir) + + +@@ -65,7 +68,8 @@ check: + + install: all installdirs + $(INSTALL_MODULES) $(srcdir)/$(MODULES) $(DESTDIR)$(pkgdatadir) +- $(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(sysconfdir)/bash_completion.d ++ $(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(compdir)/$(PKGNAME) ++ (cd $(DESTDIR)$(compdir); for n in $(ALIASES); do ln -s $(PKGNAME) $$n; done) + for subdir in $(SUBDIRS) ; do \ + $(MAKE) -C $$subdir install VERSION=$(VERSION) PKGNAME=$(PKGNAME); \ + done +diff --git a/createrepo.bash b/createrepo.bash +index 54ac8b2..14b43d8 100644 +--- a/createrepo.bash ++++ b/createrepo.bash +@@ -1,11 +1,22 @@ + # bash completion for createrepo and friends + ++_cr_compress_type() ++{ ++ COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \ ++ | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) ) ++} ++ ++_cr_checksum_type() ++{ ++ COMPREPLY=( $( compgen -W 'md5 sha1 sha256 sha512' -- "$1" ) ) ++} ++ + _cr_createrepo() + { + COMPREPLY=() + + case $3 in +- --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\ ++ --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\ + --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size) + return 0 + ;; +@@ -18,8 +29,8 @@ _cr_createrepo() + COMPREPLY=( $( compgen -f -o plusdirs -X '!*.xml' -- "$2" ) ) + return 0 + ;; +- -s|--sumtype) +- COMPREPLY=( $( compgen -W 'md5 sha1 sha256 sha512' -- "$2" ) ) ++ -s|--checksum) ++ _cr_checksum_type "$2" + return 0 + ;; + -i|--pkglist|--read-pkgs-list) +@@ -30,10 +41,24 @@ _cr_createrepo() + COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) ) + return 0 + ;; ++ --retain-old-md) ++ COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) ) ++ return 0 ++ ;; + --num-deltas) + COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) ) + return 0 + ;; ++ --workers) ++ local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null ) ++ [[ -z $max || $max -lt $min ]] && max=$min ++ COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) ) ++ return 0 ++ ;; ++ --compress-type) ++ _cr_compress_type "$1" "$2" ++ return 0 ++ ;; + esac + + if [[ $2 == -* ]] ; then +@@ -42,9 +67,9 @@ _cr_createrepo() + --cachedir --checkts --no-database --update --update-md-path + --skip-stat --split --pkglist --includepkg --outputdir + --skip-symlinks --changelog-limit --unique-md-filenames +- --simple-md-filenames --distro --content --repo --revision --deltas +- --oldpackagedirs --num-deltas --read-pkgs-list +- --max-delta-rpm-size --workers' -- "$2" ) ) ++ --simple-md-filenames --retain-old-md --distro --content --repo ++ --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list ++ --max-delta-rpm-size --workers --compress-type' -- "$2" ) ) + else + COMPREPLY=( $( compgen -d -- "$2" ) ) + fi +@@ -63,10 +88,14 @@ _cr_mergerepo() + COMPREPLY=( $( compgen -d -- "$2" ) ) + return 0 + ;; ++ --compress-type) ++ _cr_compress_type "" "$2" ++ return 0 ++ ;; + esac + + COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database +- --outputdir --nogroups --noupdateinfo' -- "$2" ) ) ++ --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) ) + } && + complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py + +@@ -78,17 +107,27 @@ _cr_modifyrepo() + --version|-h|--help|--mdtype) + return 0 + ;; ++ --compress-type) ++ _cr_compress_type "" "$2" ++ return 0 ++ ;; ++ -s|--checksum) ++ _cr_checksum_type "$2" ++ return 0 ++ ;; + esac + + if [[ $2 == -* ]] ; then +- COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) ) ++ COMPREPLY=( $( compgen -W '--version --help --mdtype --remove ++ --compress --no-compress --compress-type --checksum ++ --unique-md-filenames --simple-md-filenames' -- "$2" ) ) + return 0 + fi + + local i argnum=1 + for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do + if [[ ${COMP_WORDS[i]} != -* && +- ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then ++ ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then + argnum=$(( argnum+1 )) + fi + done +diff --git a/createrepo.spec b/createrepo.spec +index 1e491cd..9a2179b 100644 +--- a/createrepo.spec ++++ b/createrepo.spec +@@ -1,5 +1,17 @@ + %{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")} + ++%if ! 0%{?rhel} ++# we don't have this in rhel yet... ++BuildRequires: bash-completion ++%endif ++ ++# disable broken /usr/lib/rpm/brp-python-bytecompile ++%define __os_install_post %{nil} ++%define compdir %(pkg-config --variable=completionsdir bash-completion) ++%if "%{compdir}" == "" ++%define compdir "/etc/bash_completion.d" ++%endif ++ + Summary: Creates a common metadata repository + Name: createrepo + Version: 0.9.9 +@@ -11,7 +23,7 @@ URL: http://createrepo.baseurl.org/ + BuildRoot: %{_tmppath}/%{name}-%{version}root + BuildArchitectures: noarch + Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python +-Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm ++Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma + + %description + This utility will generate a common metadata repository from a directory of +@@ -32,7 +44,7 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install + %defattr(-, root, root) + %dir %{_datadir}/%{name} + %doc ChangeLog README COPYING COPYING.lib +-%{_sysconfdir}/bash_completion.d/ ++%(dirname %{compdir}) + %{_datadir}/%{name}/* + %{_bindir}/%{name} + %{_bindir}/modifyrepo +@@ -43,6 +55,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install + %{python_sitelib}/createrepo + + %changelog ++* Fri Sep 9 2011 Seth Vidal ++- add lzma dep ++ + * Wed Jan 26 2011 Seth Vidal + - bump to 0.9.9 + - add worker.py +diff --git a/createrepo/__init__.py b/createrepo/__init__.py +index 8f2538e..1b18a9f 100644 +--- a/createrepo/__init__.py ++++ b/createrepo/__init__.py +@@ -26,15 +26,16 @@ import tempfile + import stat + import fcntl + import subprocess ++from select import select + +-from yum import misc, Errors, to_unicode +-from yum.repoMDObject import RepoMD, RepoMDError, RepoData ++from yum import misc, Errors ++from yum.repoMDObject import RepoMD, RepoData + from yum.sqlutils import executeSQL + from yum.packageSack import MetaSack +-from yum.packages import YumAvailablePackage, YumLocalPackage ++from yum.packages import YumAvailablePackage + + import rpmUtils.transaction +-from utils import _, errorprint, MDError ++from utils import _, errorprint, MDError, lzma, _available_compression + import readMetadata + try: + import sqlite3 as sqlite +@@ -46,8 +47,9 @@ try: + except ImportError: + pass + +-from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \ ++from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \ + checksum_and_rename, split_list_into_equal_chunks ++from utils import num_cpus_online + import deltarpms + + __version__ = '0.9.9' +@@ -74,7 +76,7 @@ class MetaDataConfig(object): + self.deltadir = None + self.delta_relative = 'drpms/' + self.oldpackage_paths = [] # where to look for the old packages - +- self.deltafile = 'prestodelta.xml.gz' ++ self.deltafile = 'prestodelta.xml' + self.num_deltas = 1 # number of older versions to delta (max) + self.max_delta_rpm_size = 100000000 + self.update_md_path = None +@@ -86,9 +88,9 @@ class MetaDataConfig(object): + self.skip_symlinks = False + self.pkglist = [] + self.database_only = False +- self.primaryfile = 'primary.xml.gz' +- self.filelistsfile = 'filelists.xml.gz' +- self.otherfile = 'other.xml.gz' ++ self.primaryfile = 'primary.xml' ++ self.filelistsfile = 'filelists.xml' ++ self.otherfile = 'other.xml' + self.repomdfile = 'repomd.xml' + self.tempdir = '.repodata' + self.finaldir = 'repodata' +@@ -108,8 +110,10 @@ class MetaDataConfig(object): + self.collapse_glibc_requires = True + self.workers = 1 # number of workers to fork off to grab metadata from the pkgs + self.worker_cmd = '/usr/share/createrepo/worker.py' +- + #self.worker_cmd = './worker.py' # helpful when testing ++ self.retain_old_md = 0 ++ self.compress_type = 'compat' ++ + + class SimpleMDCallBack(object): + def errorlog(self, thing): +@@ -141,10 +145,23 @@ class MetaDataGenerator: + self.files = [] + self.rpmlib_reqs = {} + self.read_pkgs = [] ++ self.compat_compress = False + + if not self.conf.directory and not self.conf.directories: + raise MDError, "No directory given on which to run." +- ++ ++ if self.conf.compress_type == 'compat': ++ self.compat_compress = True ++ self.conf.compress_type = None ++ ++ if not self.conf.compress_type: ++ self.conf.compress_type = 'gz' ++ ++ if self.conf.compress_type not in utils._available_compression: ++ raise MDError, "Compression %s not available: Please choose from: %s" \ ++ % (self.conf.compress_type, ', '.join(utils._available_compression)) ++ ++ + if not self.conf.directories: # just makes things easier later + self.conf.directories = [self.conf.directory] + if not self.conf.directory: # ensure we have both in the config object +@@ -290,14 +307,13 @@ class MetaDataGenerator: + + def extension_visitor(filelist, dirname, names): + for fn in names: ++ fn = os.path.join(dirname, fn) + if os.path.isdir(fn): + continue + if self.conf.skip_symlinks and os.path.islink(fn): + continue + elif fn[-extlen:].lower() == '%s' % (ext): +- relativepath = dirname.replace(startdir, "", 1) +- relativepath = relativepath.lstrip("/") +- filelist.append(os.path.join(relativepath, fn)) ++ filelist.append(fn[len(startdir):]) + + filelist = [] + startdir = directory + '/' +@@ -311,7 +327,7 @@ class MetaDataGenerator: + def checkTimeStamps(self): + """check the timestamp of our target dir. If it is not newer than + the repodata return False, else True""" +- if self.conf.checkts: ++ if self.conf.checkts and self.conf.mdtimestamp: + dn = os.path.join(self.conf.basedir, self.conf.directory) + files = self.getFileList(dn, '.rpm') + files = self.trimRpms(files) +@@ -410,9 +426,11 @@ class MetaDataGenerator: + + def _setupPrimary(self): + # setup the primary metadata file ++ # FIXME - make this be conf.compress_type once y-m-p is fixed ++ fpz = self.conf.primaryfile + '.' + 'gz' + primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, +- self.conf.primaryfile) +- fo = _gzipOpen(primaryfilepath, 'w') ++ fpz) ++ fo = compressOpen(primaryfilepath, 'w', 'gz') + fo.write('\n') + fo.write('' % +@@ -421,9 +439,11 @@ class MetaDataGenerator: + + def _setupFilelists(self): + # setup the filelist file ++ # FIXME - make this be conf.compress_type once y-m-p is fixed ++ fpz = self.conf.filelistsfile + '.' + 'gz' + filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, +- self.conf.filelistsfile) +- fo = _gzipOpen(filelistpath, 'w') ++ fpz) ++ fo = compressOpen(filelistpath, 'w', 'gz') + fo.write('\n') + fo.write('' % self.pkgcount) +@@ -431,9 +451,11 @@ class MetaDataGenerator: + + def _setupOther(self): + # setup the other file ++ # FIXME - make this be conf.compress_type once y-m-p is fixed ++ fpz = self.conf.otherfile + '.' + 'gz' + otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, +- self.conf.otherfile) +- fo = _gzipOpen(otherfilepath, 'w') ++ fpz) ++ fo = compressOpen(otherfilepath, 'w', 'gz') + fo.write('\n') + fo.write('' % +@@ -442,9 +464,10 @@ class MetaDataGenerator: + + def _setupDelta(self): + # setup the other file ++ fpz = self.conf.deltafile + '.' + self.conf.compress_type + deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, +- self.conf.deltafile) +- fo = _gzipOpen(deltafilepath, 'w') ++ fpz) ++ fo = compressOpen(deltafilepath, 'w', self.conf.compress_type) + fo.write('\n') + fo.write('\n') + return fo +@@ -520,6 +543,7 @@ class MetaDataGenerator: + # go on their merry way + + newpkgs = [] ++ keptpkgs = [] + if self.conf.update: + # if we're in --update mode then only act on the new/changed pkgs + for pkg in pkglist: +@@ -530,39 +554,13 @@ class MetaDataGenerator: + old_pkg = pkg + if pkg.find("://") != -1: + old_pkg = os.path.basename(pkg) +- nodes = self.oldData.getNodes(old_pkg) +- if nodes is not None: # we have a match in the old metadata ++ old_po = self.oldData.getNodes(old_pkg) ++ if old_po: # we have a match in the old metadata + if self.conf.verbose: + self.callback.log(_("Using data from old metadata for %s") + % pkg) +- (primarynode, filenode, othernode) = nodes +- +- for node, outfile in ((primarynode, self.primaryfile), +- (filenode, self.flfile), +- (othernode, self.otherfile)): +- if node is None: +- break +- +- if self.conf.baseurl: +- anode = node.children +- while anode is not None: +- if anode.type != "element": +- anode = anode.next +- continue +- if anode.name == "location": +- anode.setProp('xml:base', self.conf.baseurl) +- anode = anode.next +- +- output = node.serialize('UTF-8', self.conf.pretty) +- if output: +- outfile.write(output) +- else: +- if self.conf.verbose: +- self.callback.log(_("empty serialize on write to" \ +- "%s in %s") % (outfile, pkg)) +- outfile.write('\n') +- +- self.oldData.freeNodes(pkg) ++ keptpkgs.append((pkg, old_po)) ++ + #FIXME - if we're in update and we have deltas enabled + # check the presto data for this pkg and write its info back out + # to our deltafile +@@ -584,32 +582,45 @@ class MetaDataGenerator: + po = None + if isinstance(pkg, YumAvailablePackage): + po = pkg +- self.read_pkgs.append(po.localpath) ++ self.read_pkgs.append(po.localPkg()) + + # if we're dealing with remote pkgs - pitch it over to doing + # them one at a time, for now. + elif pkg.find('://') != -1: +- po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) ++ po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir) + self.read_pkgs.append(pkg) + + if po: +- self.primaryfile.write(po.xml_dump_primary_metadata()) +- self.flfile.write(po.xml_dump_filelists_metadata()) +- self.otherfile.write(po.xml_dump_other_metadata( +- clog_limit=self.conf.changelog_limit)) ++ keptpkgs.append((pkg, po)) + continue + + pkgfiles.append(pkg) +- +- ++ ++ keptpkgs.sort(reverse=True) ++ # keptkgs is a list of (filename, po), pkgfiles is a list if filenames. ++ # Need to write them in sorted(filename) order. We loop over pkgfiles, ++ # inserting keptpkgs in right spots (using the upto argument). ++ def save_keptpkgs(upto): ++ while keptpkgs and (upto is None or keptpkgs[-1][0] < upto): ++ filename, po = keptpkgs.pop() ++ # reset baseurl in the old pkg ++ po.basepath = self.conf.baseurl ++ self.primaryfile.write(po.xml_dump_primary_metadata()) ++ self.flfile.write(po.xml_dump_filelists_metadata()) ++ self.otherfile.write(po.xml_dump_other_metadata( ++ clog_limit=self.conf.changelog_limit)) ++ + if pkgfiles: + # divide that list by the number of workers and fork off that many + # workers to tmpdirs + # waitfor the workers to finish and as each one comes in + # open the files they created and write them out to our metadata + # add up the total pkg counts and return that value +- worker_tmp_path = tempfile.mkdtemp() +- worker_chunks = utils.split_list_into_equal_chunks(pkgfiles, self.conf.workers) ++ self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later ++ if self.conf.workers < 1: ++ self.conf.workers = num_cpus_online() ++ pkgfiles.sort() ++ worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers) + worker_cmd_dict = {} + worker_jobs = {} + base_worker_cmdline = [self.conf.worker_cmd, +@@ -617,7 +628,8 @@ class MetaDataGenerator: + '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires, + '--pkgoptions=_cachedir=%s' % self.conf.cachedir, + '--pkgoptions=_baseurl=%s' % self.conf.baseurl, +- '--globalopts=clog_limit=%s' % self.conf.changelog_limit,] ++ '--globalopts=clog_limit=%s' % self.conf.changelog_limit, ++ '--globalopts=sumtype=%s' % self.conf.sumtype, ] + + if self.conf.quiet: + base_worker_cmdline.append('--quiet') +@@ -626,15 +638,14 @@ class MetaDataGenerator: + base_worker_cmdline.append('--verbose') + + for worker_num in range(self.conf.workers): +- # make the worker directory ++ pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num ++ f = open(pkl, 'w') ++ f.write('\n'.join(worker_chunks[worker_num])) ++ f.close() ++ + workercmdline = [] + workercmdline.extend(base_worker_cmdline) +- thisdir = worker_tmp_path + '/' + str(worker_num) +- if checkAndMakeDir(thisdir): +- workercmdline.append('--tmpmdpath=%s' % thisdir) +- else: +- raise MDError, "Unable to create worker path: %s" % thisdir +- workercmdline.extend(worker_chunks[worker_num]) ++ workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num)) + worker_cmd_dict[worker_num] = workercmdline + + +@@ -647,49 +658,60 @@ class MetaDataGenerator: + stderr=subprocess.PIPE) + worker_jobs[num] = job + +- gimmebreak = 0 +- while gimmebreak != len(worker_jobs.keys()): +- gimmebreak = 0 +- for (num,job) in worker_jobs.items(): +- if job.poll() is not None: +- gimmebreak+=1 +- line = job.stdout.readline() +- if line: ++ files = self.primaryfile, self.flfile, self.otherfile ++ def log_messages(num): ++ job = worker_jobs[num] ++ while True: ++ # check stdout and stderr ++ for stream in select((job.stdout, job.stderr), (), ())[0]: ++ line = stream.readline() ++ if line: break ++ else: ++ return # EOF, EOF ++ if stream is job.stdout: ++ if line.startswith('*** '): ++ # get data, save to local files ++ for out, size in zip(files, line[4:].split()): ++ out.write(stream.read(int(size))) ++ return + self.callback.log('Worker %s: %s' % (num, line.rstrip())) +- line = job.stderr.readline() +- if line: ++ else: + self.callback.errorlog('Worker %s: %s' % (num, line.rstrip())) ++ ++ for i, pkg in enumerate(pkgfiles): ++ # insert cached packages ++ save_keptpkgs(pkg) ++ ++ # save output to local files ++ log_messages(i % self.conf.workers) ++ ++ for (num, job) in worker_jobs.items(): ++ # process remaining messages on stderr ++ log_messages(num) ++ ++ if job.wait() != 0: ++ msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode ++ self.callback.errorlog(msg) ++ raise MDError, msg + +- + if not self.conf.quiet: + self.callback.log("Workers Finished") +- # finished with workers +- # go to their dirs and add the contents +- if not self.conf.quiet: +- self.callback.log("Gathering worker results") +- for num in range(self.conf.workers): +- for (fn, fo) in (('primary.xml', self.primaryfile), +- ('filelists.xml', self.flfile), +- ('other.xml', self.otherfile)): +- fnpath = worker_tmp_path + '/' + str(num) + '/' + fn +- if os.path.exists(fnpath): +- fo.write(open(fnpath, 'r').read()) +- + + for pkgfile in pkgfiles: + if self.conf.deltas: +- po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) +- self._do_delta_rpm_package(po) ++ try: ++ po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) ++ self._do_delta_rpm_package(po) ++ except MDError, e: ++ errorprint(e) ++ continue + self.read_pkgs.append(pkgfile) + ++ save_keptpkgs(None) # append anything left + return self.current_pkg + + + def closeMetadataDocs(self): +- if not self.conf.quiet: +- self.callback.log('') +- +- + # save them up to the tmp locations: + if not self.conf.quiet: + self.callback.log(_('Saving Primary metadata')) +@@ -784,7 +806,6 @@ class MetaDataGenerator: + return self._old_package_dict + + self._old_package_dict = {} +- opl = [] + for d in self.conf.oldpackage_paths: + for f in self.getFileList(d, '.rpm'): + fp = d + '/' + f +@@ -833,7 +854,7 @@ class MetaDataGenerator: + return ' '.join(results) + + def _createRepoDataObject(self, mdfile, mdtype, compress=True, +- compress_type='gzip', attribs={}): ++ compress_type=None, attribs={}): + """return random metadata as RepoData object to be added to RepoMD + mdfile = complete path to file + mdtype = the metadata type to use +@@ -843,15 +864,13 @@ class MetaDataGenerator: + sfile = os.path.basename(mdfile) + fo = open(mdfile, 'r') + outdir = os.path.join(self.conf.outputdir, self.conf.tempdir) ++ if not compress_type: ++ compress_type = self.conf.compress_type + if compress: +- if compress_type == 'gzip': +- sfile = '%s.gz' % sfile +- outfn = os.path.join(outdir, sfile) +- output = GzipFile(filename = outfn, mode='wb') +- elif compress_type == 'bzip2': +- sfile = '%s.bz2' % sfile +- outfn = os.path.join(outdir, sfile) +- output = BZ2File(filename = outfn, mode='wb') ++ sfile = '%s.%s' % (sfile, compress_type) ++ outfn = os.path.join(outdir, sfile) ++ output = compressOpen(outfn, mode='wb', compress_type=compress_type) ++ + else: + outfn = os.path.join(outdir, sfile) + output = open(outfn, 'w') +@@ -874,14 +893,13 @@ class MetaDataGenerator: + + thisdata = RepoData() + thisdata.type = mdtype +- baseloc = None + thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile)) + thisdata.checksum = (self.conf.sumtype, csum) + if compress: + thisdata.openchecksum = (self.conf.sumtype, open_csum) + + thisdata.size = str(os.stat(outfn).st_size) +- thisdata.timestamp = str(os.stat(outfn).st_mtime) ++ thisdata.timestamp = str(int(os.stat(outfn).st_mtime)) + for (k, v) in attribs.items(): + setattr(thisdata, k, str(v)) + +@@ -925,9 +943,14 @@ class MetaDataGenerator: + rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None) + + for (rpm_file, ftype) in workfiles: ++ # when we fix y-m-p and non-gzipped xml files - then we can make this just add ++ # self.conf.compress_type ++ if ftype in ('other', 'filelists', 'primary'): ++ rpm_file = rpm_file + '.' + 'gz' ++ elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression: ++ rpm_file = rpm_file + '.' + self.conf.compress_type + complete_path = os.path.join(repopath, rpm_file) +- +- zfo = _gzipOpen(complete_path) ++ zfo = compressOpen(complete_path) + # This is misc.checksum() done locally so we can get the size too. + data = misc.Checksums([sumtype]) + while data.read(zfo, 2**16): +@@ -966,14 +989,20 @@ class MetaDataGenerator: + good_name = '%s.sqlite' % ftype + resultpath = os.path.join(repopath, good_name) + ++ # compat compression for rhel5 compatibility from fedora :( ++ compress_type = self.conf.compress_type ++ if self.compat_compress: ++ compress_type = 'bz2' ++ + # rename from silly name to not silly name + os.rename(tmp_result_path, resultpath) +- compressed_name = '%s.bz2' % good_name ++ compressed_name = '%s.%s' % (good_name, compress_type) + result_compressed = os.path.join(repopath, compressed_name) + db_csums[ftype] = misc.checksum(sumtype, resultpath) + + # compress the files +- bzipFile(resultpath, result_compressed) ++ ++ compressFile(resultpath, result_compressed, compress_type) + # csum the compressed file + db_compressed_sums[ftype] = misc.checksum(sumtype, + result_compressed) +@@ -983,8 +1012,8 @@ class MetaDataGenerator: + os.unlink(resultpath) + + if self.conf.unique_md_filenames: +- csum_compressed_name = '%s-%s.bz2' % ( +- db_compressed_sums[ftype], good_name) ++ csum_compressed_name = '%s-%s.%s' % ( ++ db_compressed_sums[ftype], good_name, compress_type) + csum_result_compressed = os.path.join(repopath, + csum_compressed_name) + os.rename(result_compressed, csum_result_compressed) +@@ -1001,7 +1030,7 @@ class MetaDataGenerator: + data.location = (self.conf.baseurl, + os.path.join(self.conf.finaldir, compressed_name)) + data.checksum = (sumtype, db_compressed_sums[ftype]) +- data.timestamp = str(db_stat.st_mtime) ++ data.timestamp = str(int(db_stat.st_mtime)) + data.size = str(db_stat.st_size) + data.opensize = str(un_stat.st_size) + data.openchecksum = (sumtype, db_csums[ftype]) +@@ -1020,7 +1049,13 @@ class MetaDataGenerator: + data.openchecksum = (sumtype, uncsum) + + if self.conf.unique_md_filenames: +- res_file = '%s-%s.xml.gz' % (csum, ftype) ++ if ftype in ('primary', 'filelists', 'other'): ++ compress = 'gz' ++ else: ++ compress = self.conf.compress_type ++ ++ main_name = '.'.join(rpm_file.split('.')[:-1]) ++ res_file = '%s-%s.%s' % (csum, main_name, compress) + orig_file = os.path.join(repopath, rpm_file) + dest_file = os.path.join(repopath, res_file) + os.rename(orig_file, dest_file) +@@ -1046,7 +1081,7 @@ class MetaDataGenerator: + + + if self.conf.additional_metadata: +- for md_type, mdfile in self.conf.additional_metadata.items(): ++ for md_type, md_file in self.conf.additional_metadata.items(): + mdcontent = self._createRepoDataObject(md_file, md_type) + repomd.repoData[mdcontent.type] = mdcontent + +@@ -1110,23 +1145,43 @@ class MetaDataGenerator: + raise MDError, _( + 'Could not remove old metadata file: %s: %s') % (oldfile, e) + +- # Move everything else back from olddir (eg. repoview files) +- try: +- old_contents = os.listdir(output_old_dir) +- except (OSError, IOError), e: +- old_contents = [] +- ++ old_to_remove = [] ++ old_pr = [] ++ old_fl = [] ++ old_ot = [] ++ old_pr_db = [] ++ old_fl_db = [] ++ old_ot_db = [] + for f in os.listdir(output_old_dir): + oldfile = os.path.join(output_old_dir, f) + finalfile = os.path.join(output_final_dir, f) +- if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2', +- 'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2', +- 'other.xml.gz','filelists.xml.gz'): +- os.remove(oldfile) # kill off the old ones +- continue +- if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2', +- 'primary.sqlite.bz2'): +- os.remove(oldfile) ++ ++ for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr), ++ ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl), ++ ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)): ++ fn = '.'.join(f.split('.')[:-1]) ++ if fn.endswith(end): ++ lst.append(oldfile) ++ break ++ ++ # make a list of the old metadata files we don't want to remove. ++ for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db): ++ sortlst = sorted(lst, key=lambda x: os.path.getmtime(x), ++ reverse=True) ++ for thisf in sortlst[self.conf.retain_old_md:]: ++ old_to_remove.append(thisf) ++ ++ for f in os.listdir(output_old_dir): ++ oldfile = os.path.join(output_old_dir, f) ++ finalfile = os.path.join(output_final_dir, f) ++ fn = '.'.join(f.split('.')[:-1]) ++ if fn in ('filelists.sqlite', 'other.sqlite', ++ 'primary.sqlite') or oldfile in old_to_remove: ++ try: ++ os.remove(oldfile) ++ except (OSError, IOError), e: ++ raise MDError, _( ++ 'Could not remove old metadata file: %s: %s') % (oldfile, e) + continue + + if os.path.exists(finalfile): +@@ -1147,14 +1202,19 @@ class MetaDataGenerator: + msg += _('Error was %s') % e + raise MDError, msg + +- try: +- os.rmdir(output_old_dir) +- except OSError, e: +- self.errorlog(_('Could not remove old metadata dir: %s') +- % self.conf.olddir) +- self.errorlog(_('Error was %s') % e) +- self.errorlog(_('Please clean up this directory manually.')) ++ self._cleanup_tmp_repodata_dir() ++ self._cleanup_update_tmp_dir() ++ self._write_out_read_pkgs_list() ++ + ++ def _cleanup_update_tmp_dir(self): ++ if not self.conf.update: ++ return ++ ++ shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True) ++ shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True) ++ ++ def _write_out_read_pkgs_list(self): + # write out the read_pkgs_list file with self.read_pkgs + if self.conf.read_pkgs_list: + try: +@@ -1167,6 +1227,23 @@ class MetaDataGenerator: + % self.conf.read_pkgs_list) + self.errorlog(_('Error was %s') % e) + ++ def _cleanup_tmp_repodata_dir(self): ++ output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir) ++ output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir) ++ for dirbase in (self.conf.olddir, self.conf.tempdir): ++ dirpath = os.path.join(self.conf.outputdir, dirbase) ++ if os.path.exists(dirpath): ++ try: ++ os.rmdir(dirpath) ++ except OSError, e: ++ self.errorlog(_('Could not remove temp metadata dir: %s') ++ % dirbase) ++ self.errorlog(_('Error was %s') % e) ++ self.errorlog(_('Please clean up this directory manually.')) ++ # our worker tmp path ++ if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path): ++ shutil.rmtree(self._worker_tmp_path, ignore_errors=True) ++ + def setup_sqlite_dbs(self, initdb=True): + """sets up the sqlite dbs w/table schemas and db_infos""" + destdir = os.path.join(self.conf.outputdir, self.conf.tempdir) +@@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): + (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) + return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) + +- def getFileList(self, directory, ext): +- +- extlen = len(ext) +- +- def extension_visitor(arg, dirname, names): +- for fn in names: +- if os.path.isdir(fn): +- continue +- elif fn[-extlen:].lower() == '%s' % (ext): +- reldir = os.path.basename(dirname) +- if reldir == os.path.basename(directory): +- reldir = "" +- arg.append(os.path.join(reldir, fn)) +- +- rpmlist = [] +- os.path.walk(directory, extension_visitor, rpmlist) +- return rpmlist +- + def doPkgMetadata(self): + """all the heavy lifting for the package metadata""" + if len(self.conf.directories) == 1: +@@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator): + thisdir = os.path.join(self.conf.basedir, mydir) + + filematrix[mydir] = self.getFileList(thisdir, '.rpm') ++ ++ # pkglist is a bit different for split media, as we have to know ++ # which dir. it belongs to. So we walk the dir. and then filter. ++ # We could be faster by not walking the dir. ... but meh. ++ if self.conf.pkglist: ++ pkglist = set(self.conf.pkglist) ++ pkgs = [] ++ for fname in filematrix[mydir]: ++ if fname not in pkglist: ++ continue ++ pkgs.append(fname) ++ filematrix[mydir] = pkgs ++ + self.trimRpms(filematrix[mydir]) + self.pkgcount += len(filematrix[mydir]) + +@@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): + self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) + try: + self.openMetadataDocs() +- original_basedir = self.conf.basedir + for mydir in self.conf.directories: + self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) + self.writeMetadataDocs(filematrix[mydir], mydir) +diff --git a/createrepo/merge.py b/createrepo/merge.py +index b3b2ea1..1ac43bb 100644 +--- a/createrepo/merge.py ++++ b/createrepo/merge.py +@@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir + import yum.update_md + import rpmUtils.arch + import operator ++from utils import MDError + import createrepo + import tempfile + +@@ -84,6 +85,8 @@ class RepoMergeBase: + # in the repolist + count = 0 + for r in self.repolist: ++ if r[0] == '/': ++ r = 'file://' + r # just fix the file repos, this is silly. + count +=1 + rid = 'repo%s' % count + n = self.yumbase.add_enable_repo(rid, baseurls=[r], +@@ -92,7 +95,10 @@ class RepoMergeBase: + n._merge_rank = count + + #setup our sacks +- self.yumbase._getSacks(archlist=self.archlist) ++ try: ++ self.yumbase._getSacks(archlist=self.archlist) ++ except yum.Errors.RepoError, e: ++ raise MDError, "Could not setup merge repo pkgsack: %s" % e + + myrepos = self.yumbase.repos.listEnabled() + +@@ -102,11 +108,16 @@ class RepoMergeBase: + def write_metadata(self, outputdir=None): + mytempdir = tempfile.mkdtemp() + if self.groups: +- comps_fn = mytempdir + '/groups.xml' +- compsfile = open(comps_fn, 'w') +- compsfile.write(self.yumbase.comps.xml()) +- compsfile.close() +- self.mdconf.groupfile=comps_fn ++ try: ++ comps_fn = mytempdir + '/groups.xml' ++ compsfile = open(comps_fn, 'w') ++ compsfile.write(self.yumbase.comps.xml()) ++ compsfile.close() ++ except yum.Errors.GroupsError, e: ++ # groups not being available shouldn't be a fatal error ++ pass ++ else: ++ self.mdconf.groupfile=comps_fn + + if self.updateinfo: + ui_fn = mytempdir + '/updateinfo.xml' +diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py +index 27d3690..54863cb 100644 +--- a/createrepo/readMetadata.py ++++ b/createrepo/readMetadata.py +@@ -16,11 +16,25 @@ + # Copyright 2006 Red Hat + + import os +-import libxml2 + import stat + from utils import errorprint, _ + +-from yum import repoMDObject ++import yum ++from yum import misc ++from yum.Errors import YumBaseError ++import tempfile ++class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite): ++ # special for special people like us. ++ def _return_remote_location(self): ++ ++ if self.basepath: ++ msg = """\n""" % ( ++ misc.to_xml(self.basepath, attrib=True), ++ misc.to_xml(self.relativepath, attrib=True)) ++ else: ++ msg = """\n""" % misc.to_xml(self.relativepath, attrib=True) ++ ++ return msg + + + class MetadataIndex(object): +@@ -30,178 +44,72 @@ class MetadataIndex(object): + opts = {} + self.opts = opts + self.outputdir = outputdir ++ realpath = os.path.realpath(outputdir) + repodatadir = self.outputdir + '/repodata' +- myrepomdxml = repodatadir + '/repomd.xml' +- if os.path.exists(myrepomdxml): +- repomd = repoMDObject.RepoMD('garbageid', myrepomdxml) +- b = repomd.getData('primary').location[1] +- f = repomd.getData('filelists').location[1] +- o = repomd.getData('other').location[1] +- basefile = os.path.join(self.outputdir, b) +- filelistfile = os.path.join(self.outputdir, f) +- otherfile = os.path.join(self.outputdir, o) +- else: +- basefile = filelistfile = otherfile = "" +- +- self.files = {'base' : basefile, +- 'filelist' : filelistfile, +- 'other' : otherfile} +- self.scan() ++ self._repo = yum.yumRepo.YumRepository('garbageid') ++ self._repo.baseurl = 'file://' + realpath ++ self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo") ++ self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p") ++ self._repo.metadata_expire = 1 ++ self._repo.gpgcheck = 0 ++ self._repo.repo_gpgcheck = 0 ++ self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld) ++ self.pkg_tups_by_path = {} ++ try: ++ self.scan() ++ except YumBaseError, e: ++ print "Could not find valid repo at: %s" % self.outputdir ++ + + def scan(self): +- """Read in and index old repo data""" +- self.basenodes = {} +- self.filesnodes = {} +- self.othernodes = {} +- self.pkg_ids = {} ++ """Read in old repodata""" + if self.opts.get('verbose'): + print _("Scanning old repo data") +- for fn in self.files.values(): +- if not os.path.exists(fn): +- #cannot scan +- errorprint(_("Warning: Old repodata file missing: %s") % fn) +- return +- root = libxml2.parseFile(self.files['base']).getRootElement() +- self._scanPackageNodes(root, self._handleBase) +- if self.opts.get('verbose'): +- print _("Indexed %i base nodes" % len(self.basenodes)) +- root = libxml2.parseFile(self.files['filelist']).getRootElement() +- self._scanPackageNodes(root, self._handleFiles) +- if self.opts.get('verbose'): +- print _("Indexed %i filelist nodes" % len(self.filesnodes)) +- root = libxml2.parseFile(self.files['other']).getRootElement() +- self._scanPackageNodes(root, self._handleOther) +- if self.opts.get('verbose'): +- print _("Indexed %i other nodes" % len(self.othernodes)) +- #reverse index pkg ids to track references +- self.pkgrefs = {} +- for relpath, pkgid in self.pkg_ids.iteritems(): +- self.pkgrefs.setdefault(pkgid,[]).append(relpath) +- +- def _scanPackageNodes(self, root, handler): +- node = root.children +- while node is not None: +- if node.type != "element": +- node = node.next ++ self._repo.sack.populate(self._repo, 'all', None, False) ++ for thispo in self._repo.sack: ++ mtime = thispo.filetime ++ size = thispo.size ++ relpath = thispo.relativepath ++ do_stat = self.opts.get('do_stat', True) ++ if mtime is None: ++ print _("mtime missing for %s") % relpath + continue +- if node.name == "package": +- handler(node) +- node = node.next +- +- def _handleBase(self, node): +- top = node +- node = node.children +- pkgid = None +- mtime = None +- size = None +- relpath = None +- do_stat = self.opts.get('do_stat', True) +- while node is not None: +- if node.type != "element": +- node = node.next ++ if size is None: ++ print _("size missing for %s") % relpath + continue +- if node.name == "checksum": +- pkgid = node.content +- elif node.name == "time": +- mtime = int(node.prop('file')) +- elif node.name == "size": +- size = int(node.prop('package')) +- elif node.name == "location": +- relpath = node.prop('href') +- node = node.next +- if relpath is None: +- print _("Incomplete data for node") +- return +- if pkgid is None: +- print _("pkgid missing for %s") % relpath +- return +- if mtime is None: +- print _("mtime missing for %s") % relpath +- return +- if size is None: +- print _("size missing for %s") % relpath +- return +- if do_stat: +- filepath = os.path.join(self.opts['pkgdir'], relpath) +- try: +- st = os.stat(filepath) +- except OSError: +- #file missing -- ignore +- return +- if not stat.S_ISREG(st.st_mode): +- #ignore non files +- return +- #check size and mtime +- if st.st_size != size: +- if self.opts.get('verbose'): +- print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) +- return +- if int(st.st_mtime) != mtime: +- if self.opts.get('verbose'): +- print _("Modification time changed for %s") % filepath +- return +- #otherwise we index +- self.basenodes[relpath] = top +- self.pkg_ids[relpath] = pkgid +- +- def _handleFiles(self, node): +- pkgid = node.prop('pkgid') +- if pkgid: +- self.filesnodes[pkgid] = node +- +- def _handleOther(self, node): +- pkgid = node.prop('pkgid') +- if pkgid: +- self.othernodes[pkgid] = node ++ if do_stat: ++ filepath = os.path.join(self.opts['pkgdir'], relpath) ++ try: ++ st = os.stat(filepath) ++ except OSError: ++ #file missing -- ignore ++ continue ++ if not stat.S_ISREG(st.st_mode): ++ #ignore non files ++ continue ++ #check size and mtime ++ if st.st_size != size: ++ if self.opts.get('verbose'): ++ print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) ++ continue ++ if int(st.st_mtime) != mtime: ++ if self.opts.get('verbose'): ++ print _("Modification time changed for %s") % filepath ++ continue ++ ++ self.pkg_tups_by_path[relpath] = thispo.pkgtup ++ + +- def getNodes(self, relpath): +- """Return base, filelist, and other nodes for file, if they exist + +- Returns a tuple of nodes, or None if not found ++ def getNodes(self, relpath): ++ """return a package object based on relative path of pkg + """ +- bnode = self.basenodes.get(relpath,None) +- if bnode is None: +- return None +- pkgid = self.pkg_ids.get(relpath,None) +- if pkgid is None: +- print _("No pkgid found for: %s") % relpath +- return None +- fnode = self.filesnodes.get(pkgid,None) +- if fnode is None: +- return None +- onode = self.othernodes.get(pkgid,None) +- if onode is None: +- return None +- return bnode, fnode, onode +- +- def freeNodes(self,relpath): +- #causing problems +- """Free up nodes corresponding to file, if possible""" +- bnode = self.basenodes.get(relpath,None) +- if bnode is None: +- print "Missing node for %s" % relpath +- return +- bnode.unlinkNode() +- bnode.freeNode() +- del self.basenodes[relpath] +- pkgid = self.pkg_ids.get(relpath,None) +- if pkgid is None: +- print _("No pkgid found for: %s") % relpath +- return None +- del self.pkg_ids[relpath] +- dups = self.pkgrefs.get(pkgid) +- dups.remove(relpath) +- if len(dups): +- #still referenced +- return +- del self.pkgrefs[pkgid] +- for nodes in self.filesnodes, self.othernodes: +- node = nodes.get(pkgid) +- if node is not None: +- node.unlinkNode() +- node.freeNode() +- del nodes[pkgid] ++ if relpath in self.pkg_tups_by_path: ++ pkgtup = self.pkg_tups_by_path[relpath] ++ return self._repo.sack.searchPkgTuple(pkgtup)[0] ++ return None + ++ + + if __name__ == "__main__": + cwd = os.getcwd() +@@ -209,9 +117,9 @@ if __name__ == "__main__": + 'pkgdir': cwd} + + idx = MetadataIndex(cwd, opts) +- for fn in idx.basenodes.keys(): +- a,b,c, = idx.getNodes(fn) +- a.serialize() +- b.serialize() +- c.serialize() +- idx.freeNodes(fn) ++ for fn in idx.pkg_tups_by_path: ++ po = idx.getNodes(fn) ++ print po.xml_dump_primary_metadata() ++ print po.xml_dump_filelists_metadata() ++ print po.xml_dump_other_metadata() ++ +diff --git a/createrepo/utils.py b/createrepo/utils.py +index 995c3b9..b0d92ec 100644 +--- a/createrepo/utils.py ++++ b/createrepo/utils.py +@@ -23,6 +23,12 @@ import bz2 + import gzip + from gzip import write32u, FNAME + from yum import misc ++_available_compression = ['gz', 'bz2'] ++try: ++ import lzma ++ _available_compression.append('xz') ++except ImportError: ++ lzma = None + + def errorprint(stuff): + print >> sys.stderr, stuff +@@ -34,22 +40,14 @@ def _(args): + + class GzipFile(gzip.GzipFile): + def _write_gzip_header(self): ++ # Generate a header that is easily reproduced with gzip -9 -n on ++ # an unix-like system + self.fileobj.write('\037\213') # magic header + self.fileobj.write('\010') # compression method +- if hasattr(self, 'name'): +- fname = self.name[:-3] +- else: +- fname = self.filename[:-3] +- flags = 0 +- if fname: +- flags = FNAME +- self.fileobj.write(chr(flags)) +- write32u(self.fileobj, long(0)) +- self.fileobj.write('\002') +- self.fileobj.write('\377') +- if fname: +- self.fileobj.write(fname + '\000') +- ++ self.fileobj.write('\000') # flags ++ write32u(self.fileobj, long(0)) # timestamp ++ self.fileobj.write('\002') # max compression ++ self.fileobj.write('\003') # UNIX + + def _gzipOpen(filename, mode="rb", compresslevel=9): + return GzipFile(filename, mode, compresslevel) +@@ -69,6 +67,75 @@ def bzipFile(source, dest): + s_fn.close() + + ++def xzFile(source, dest): ++ if not 'xz' in _available_compression: ++ raise MDError, "Cannot use xz for compression, library/module is not available" ++ ++ s_fn = open(source, 'rb') ++ destination = lzma.LZMAFile(dest, 'w') ++ ++ while True: ++ data = s_fn.read(1024000) ++ ++ if not data: break ++ destination.write(data) ++ ++ destination.close() ++ s_fn.close() ++ ++def gzFile(source, dest): ++ ++ s_fn = open(source, 'rb') ++ destination = GzipFile(dest, 'w') ++ ++ while True: ++ data = s_fn.read(1024000) ++ ++ if not data: break ++ destination.write(data) ++ ++ destination.close() ++ s_fn.close() ++ ++ ++class Duck: ++ def __init__(self, **attr): ++ self.__dict__ = attr ++ ++ ++def compressFile(source, dest, compress_type): ++ """Compress an existing file using any compression type from source to dest""" ++ ++ if compress_type == 'xz': ++ xzFile(source, dest) ++ elif compress_type == 'bz2': ++ bzipFile(source, dest) ++ elif compress_type == 'gz': ++ gzFile(source, dest) ++ else: ++ raise MDError, "Unknown compression type %s" % compress_type ++ ++def compressOpen(fn, mode='rb', compress_type=None): ++ ++ if not compress_type: ++ # we are readonly and we don't give a compress_type - then guess based on the file extension ++ compress_type = fn.split('.')[-1] ++ if compress_type not in _available_compression: ++ compress_type = 'gz' ++ ++ if compress_type == 'xz': ++ fh = lzma.LZMAFile(fn, mode) ++ if mode == 'w': ++ fh = Duck(write=lambda s, write=fh.write: s != '' and write(s), ++ close=fh.close) ++ return fh ++ elif compress_type == 'bz2': ++ return bz2.BZ2File(fn, mode) ++ elif compress_type == 'gz': ++ return _gzipOpen(fn, mode) ++ else: ++ raise MDError, "Unknown compression type %s" % compress_type ++ + def returnFD(filename): + try: + fdno = os.open(filename, os.O_RDONLY) +@@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist): + return result + + def split_list_into_equal_chunks(seq, num_chunks): +- avg = len(seq) / float(num_chunks) +- out = [] +- last = 0.0 +- while last < len(seq): +- out.append(seq[int(last):int(last + avg)]) +- last += avg +- ++ """it's used on sorted input which is then merged in order""" ++ out = [[] for i in range(num_chunks)] ++ for i, item in enumerate(seq): ++ out[i % num_chunks].append(item) + return out + ++def num_cpus_online(unknown=1): ++ if not hasattr(os, "sysconf"): ++ return unknown ++ ++ if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): ++ return unknown ++ ++ ncpus = os.sysconf("SC_NPROCESSORS_ONLN") ++ try: ++ if int(ncpus) > 0: ++ return ncpus ++ except: ++ pass ++ ++ return unknown ++ + + class MDError(Exception): + def __init__(self, value=None): +diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py +index ac06196..f87ac6d 100644 +--- a/createrepo/yumbased.py ++++ b/createrepo/yumbased.py +@@ -16,6 +16,11 @@ + + + import os ++def _get_umask(): ++ oumask = os.umask(0) ++ os.umask(oumask) ++ return oumask ++_b4rpm_oumask = _get_umask() + import rpm + import types + +@@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage): + csumo = os.fdopen(csumo, 'w', -1) + csumo.write(checksum) + csumo.close() ++ # tempfile forces 002 ... we want to undo that, so that users ++ # can share the cache. BZ 833350. ++ os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask) + os.rename(tmpfilename, csumfile) + except: + pass +diff --git a/docs/createrepo.8 b/docs/createrepo.8 +index e3c4c3b..eefd4bf 100644 +--- a/docs/createrepo.8 ++++ b/docs/createrepo.8 +@@ -37,6 +37,10 @@ cache of checksums of packages in the repository. In consecutive runs of + createrepo over the same repository of files that do not have a complete + change out of all packages this decreases the processing time dramatically. + .br ++.IP "\fB\--basedir\fP" ++Basedir for path to directories in the repodata, default is the current working ++directory. ++.br + .IP "\fB\--update\fP" + If metadata already exists in the outputdir and an rpm is unchanged + (based on file size and mtime) since the metadata was generated, reuse +@@ -49,11 +53,15 @@ skip the stat() call on a --update, assumes if the filename is the same + then the file is still the same (only use this if you're fairly trusting or + gullible). + .br ++.IP "\fB\--update-md-path\fP" ++Use the existing repodata for --update, from this path. ++.br + .IP "\fB\-C --checkts\fP" + Don't generate repo metadata, if their timestamps are newer than its rpms. + This option decreases the processing time drastically again, if you happen + to run it on an unmodified repo, but it is (currently) mutual exclusive +-with the --split option. ++with the --split option. NOTE: This command will not notice when ++packages have been removed from repo. Use --update to handle that. + .br + .IP "\fB\--split\fP" + Run in split media mode. Rather than pass a single directory, take a set of +@@ -61,7 +69,7 @@ directories corresponding to different volumes in a media set. + .br + .IP "\fB\-p --pretty\fP" + Output xml files in pretty format. +-.IP "\fB\-V --version\fP" ++.IP "\fB\--version\fP" + Output version. + .IP "\fB\-h --help\fP" + Show help menu. +@@ -89,6 +97,10 @@ Include the file's checksum in the metadata filename, helps HTTP caching (defaul + .IP "\fB\--simple-md-filenames\fP" + Do not include the file's checksum in the metadata filename. + ++.IP "\fB\--retain-old-md\fP" ++Keep around the latest (by timestamp) N copies of the old repodata (so clients ++with older repomd.xml files can still access it). Default is 0. ++ + .IP "\fB\--distro\fP" + Specify distro tags. Can be specified more than once. Optional syntax specifying a + cpeid(http://cpe.mitre.org/) --distro=cpeid,distrotag +@@ -104,7 +116,16 @@ Tells createrepo to generate deltarpms and the delta metadata + paths to look for older pkgs to delta against. Can be specified multiple times + .IP "\fB\--num-deltas\fP int" + the number of older versions to make deltas against. Defaults to 1 +- ++.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST ++output the paths to the pkgs actually read useful with --update ++.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE ++max size of an rpm that to run deltarpm against (in bytes) ++.IP "\fB\--workers\fP WORKERS ++number of workers to spawn to read rpms ++.IP "\fB\--compress-type\fP ++specify which compression method to use: compat (default), ++xz (may not be available), gz, bz2. ++.IP + + .SH "EXAMPLES" + Here is an example of a repository with a groups file. Note that the +diff --git a/genpkgmetadata.py b/genpkgmetadata.py +index 8c98191..4528bf2 100755 +--- a/genpkgmetadata.py ++++ b/genpkgmetadata.py +@@ -22,7 +22,7 @@ + import os + import sys + import re +-from optparse import OptionParser ++from optparse import OptionParser,SUPPRESS_HELP + import time + + import createrepo +@@ -37,6 +37,12 @@ def parse_args(args, conf): + Sanity check all the things being passed in. + """ + ++ def_workers = os.nice(0) ++ if def_workers > 0: ++ def_workers = 1 # We are niced, so just use a single worker. ++ else: ++ def_workers = 0 # zoooom.... ++ + _def = yum.misc._default_checksums[0] + _avail = yum.misc._available_checksums + parser = OptionParser(version = "createrepo %s" % createrepo.__version__) +@@ -95,11 +101,13 @@ def parse_args(args, conf): + parser.add_option("--changelog-limit", dest="changelog_limit", + default=None, help="only import the last N changelog entries") + parser.add_option("--unique-md-filenames", dest="unique_md_filenames", +- help="include the file's checksum in the filename, helps with proxies", ++ help="include the file's checksum in the filename, helps with proxies (default)", + default=True, action="store_true") +- parser.add_option("--simple-md-filenames", dest="simple_md_filenames", +- help="do not include the file's checksum in the filename, helps with proxies", +- default=False, action="store_true") ++ parser.add_option("--simple-md-filenames", dest="unique_md_filenames", ++ help="do not include the file's checksum in the filename", ++ action="store_false") ++ parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md', ++ help="keep around the latest (by timestamp) N copies of the old repodata") + parser.add_option("--distro", default=[], action="append", + help="distro tag and optional cpeid: --distro" "'cpeid,textname'") + parser.add_option("--content", default=[], dest='content_tags', +@@ -119,10 +127,15 @@ def parse_args(args, conf): + parser.add_option("--max-delta-rpm-size", default=100000000, + dest='max_delta_rpm_size', type='int', + help="max size of an rpm that to run deltarpm against (in bytes)") +- +- parser.add_option("--workers", default=1, ++ parser.add_option("--workers", default=def_workers, + dest='workers', type='int', + help="number of workers to spawn to read rpms") ++ parser.add_option("--xz", default=False, ++ action="store_true", ++ help=SUPPRESS_HELP) ++ parser.add_option("--compress-type", default='compat', dest="compress_type", ++ help="which compression type to use") ++ + + (opts, argsleft) = parser.parse_args(args) + if len(argsleft) > 1 and not opts.split: +@@ -138,6 +151,9 @@ def parse_args(args, conf): + else: + directories = argsleft + ++ if opts.workers >= 128: ++ errorprint(_('Warning: More than 128 workers is a lot. Limiting.')) ++ opts.workers = 128 + if opts.sumtype == 'sha1': + errorprint(_('Warning: It is more compatible to use sha instead of sha1')) + +@@ -150,11 +166,13 @@ def parse_args(args, conf): + errorprint(_('--split and --checkts options are mutually exclusive')) + sys.exit(1) + +- if opts.simple_md_filenames: +- opts.unique_md_filenames = False +- + if opts.nodatabase: + opts.database = False ++ ++ # xz is just a shorthand for compress_type ++ if opts.xz and opts.compress_type == 'compat': ++ opts.compress_type='xz' ++ + + # let's switch over to using the conf object - put all the opts into it + for opt in parser.option_list: +@@ -240,6 +258,7 @@ def main(args): + if mdgen.checkTimeStamps(): + if mdgen.conf.verbose: + print _('repo is up to date') ++ mdgen._cleanup_tmp_repodata_dir() + sys.exit(0) + + if conf.profile: +diff --git a/mergerepo.py b/mergerepo.py +index 05e5f5e..80cb1a8 100755 +--- a/mergerepo.py ++++ b/mergerepo.py +@@ -18,6 +18,7 @@ + + import sys + import createrepo.merge ++from createrepo.utils import MDError + from optparse import OptionParser + + #TODO: +@@ -47,6 +48,9 @@ def parse_args(args): + help="Do not merge group(comps) metadata") + parser.add_option("", "--noupdateinfo", default=False, action="store_true", + help="Do not merge updateinfo metadata") ++ parser.add_option("--compress-type", default=None, dest="compress_type", ++ help="which compression type to use") ++ + (opts, argsleft) = parser.parse_args(args) + + if len(opts.repos) < 2: +@@ -77,9 +81,14 @@ def main(args): + rmbase.groups = False + if opts.noupdateinfo: + rmbase.updateinfo = False +- +- rmbase.merge_repos() +- rmbase.write_metadata() +- ++ if opts.compress_type: ++ rmbase.mdconf.compress_type = opts.compress_type ++ try: ++ rmbase.merge_repos() ++ rmbase.write_metadata() ++ except MDError, e: ++ print >> sys.stderr, "Could not merge repos: %s" % e ++ sys.exit(1) ++ + if __name__ == "__main__": + main(sys.argv[1:]) +diff --git a/modifyrepo.py b/modifyrepo.py +index 17094a4..bffe99a 100755 +--- a/modifyrepo.py ++++ b/modifyrepo.py +@@ -1,11 +1,15 @@ + #!/usr/bin/python +-# This tools is used to insert arbitrary metadata into an RPM repository. ++# This tool is used to manipulate arbitrary metadata in a RPM repository. + # Example: + # ./modifyrepo.py updateinfo.xml myrepo/repodata ++# or ++# ./modifyrepo.py --remove updateinfo.xml myrepo/repodata + # or in Python: + # >>> from modifyrepo import RepoMetadata + # >>> repomd = RepoMetadata('myrepo/repodata') + # >>> repomd.add('updateinfo.xml') ++# or ++# >>> repomd.remove('updateinfo.xml') + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by +@@ -20,11 +24,13 @@ + # (C) Copyright 2006 Red Hat, Inc. + # Luke Macken + # modified by Seth Vidal 2008 ++# modified by Daniel Mach 2011 + + import os + import sys + from createrepo import __version__ +-from createrepo.utils import checksum_and_rename, GzipFile, MDError ++from createrepo.utils import checksum_and_rename, compressOpen, MDError ++from createrepo.utils import _available_compression + from yum.misc import checksum + + from yum.repoMDObject import RepoMD, RepoMDError, RepoData +@@ -38,7 +44,7 @@ class RepoMetadata: + """ Parses the repomd.xml file existing in the given repo directory. """ + self.repodir = os.path.abspath(repo) + self.repomdxml = os.path.join(self.repodir, 'repomd.xml') +- self.checksum_type = 'sha256' ++ self.compress_type = _available_compression[-1] # best available + + if not os.path.exists(self.repomdxml): + raise MDError, '%s not found' % self.repomdxml +@@ -49,6 +55,35 @@ class RepoMetadata: + except RepoMDError, e: + raise MDError, 'Could not parse %s' % self.repomdxml + ++ def _get_mdtype(self, mdname, mdtype=None): ++ """ Get mdtype from existing mdtype or from a mdname. """ ++ if mdtype: ++ return mdtype ++ return mdname.split('.')[0] ++ ++ def _print_repodata(self, repodata): ++ """ Print repodata details. """ ++ print " type =", repodata.type ++ print " location =", repodata.location[1] ++ print " checksum =", repodata.checksum[1] ++ print " timestamp =", repodata.timestamp ++ print " open-checksum =", repodata.openchecksum[1] ++ ++ def _write_repomd(self): ++ """ Write the updated repomd.xml. """ ++ outmd = file(self.repomdxml, 'w') ++ outmd.write(self.repoobj.dump_xml()) ++ outmd.close() ++ print "Wrote:", self.repomdxml ++ ++ def _remove_repodata_file(self, repodata): ++ """ Remove a file specified in repodata location """ ++ try: ++ os.remove(repodata.location[1]) ++ except OSError, ex: ++ if ex.errno != 2: ++ # continue on a missing file ++ raise MDError("could not remove file %s" % repodata.location[1]) + + def add(self, metadata, mdtype=None): + """ Insert arbitrary metadata into this repository. +@@ -63,8 +98,8 @@ class RepoMetadata: + mdname = 'updateinfo.xml' + elif isinstance(metadata, str): + if os.path.exists(metadata): +- if metadata.endswith('.gz'): +- oldmd = GzipFile(filename=metadata, mode='rb') ++ if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'): ++ oldmd = compressOpen(metadata, mode='rb') + else: + oldmd = file(metadata, 'r') + md = oldmd.read() +@@ -75,27 +110,32 @@ class RepoMetadata: + else: + raise MDError, 'invalid metadata type' + ++ do_compress = False + ## Compress the metadata and move it into the repodata +- if not mdname.endswith('.gz'): +- mdname += '.gz' +- if not mdtype: +- mdtype = mdname.split('.')[0] +- ++ if self.compress and mdname.split('.')[-1] not in ('gz', 'bz2', 'xz'): ++ do_compress = True ++ mdname += '.' + self.compress_type ++ mdtype = self._get_mdtype(mdname, mdtype) ++ + destmd = os.path.join(self.repodir, mdname) +- newmd = GzipFile(filename=destmd, mode='wb') ++ if do_compress: ++ newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type) ++ else: ++ newmd = open(destmd, 'wb') ++ + newmd.write(md) + newmd.close() + print "Wrote:", destmd + + open_csum = checksum(self.checksum_type, metadata) +- csum, destmd = checksum_and_rename(destmd, self.checksum_type) ++ if self.unique_md_filenames: ++ csum, destmd = checksum_and_rename(destmd, self.checksum_type) ++ else: ++ csum = checksum(self.checksum_type, destmd) + base_destmd = os.path.basename(destmd) + +- +- ## Remove any stale metadata +- if mdtype in self.repoobj.repoData: +- del self.repoobj.repoData[mdtype] +- ++ # Remove any stale metadata ++ old_rd = self.repoobj.repoData.pop(mdtype, None) + + new_rd = RepoData() + new_rd.type = mdtype +@@ -103,20 +143,30 @@ class RepoMetadata: + new_rd.checksum = (self.checksum_type, csum) + new_rd.openchecksum = (self.checksum_type, open_csum) + new_rd.size = str(os.stat(destmd).st_size) +- new_rd.timestamp = str(os.stat(destmd).st_mtime) ++ new_rd.timestamp = str(int(os.stat(destmd).st_mtime)) + self.repoobj.repoData[new_rd.type] = new_rd +- +- print " type =", new_rd.type +- print " location =", new_rd.location[1] +- print " checksum =", new_rd.checksum[1] +- print " timestamp =", new_rd.timestamp +- print " open-checksum =", new_rd.openchecksum[1] +- +- ## Write the updated repomd.xml +- outmd = file(self.repomdxml, 'w') +- outmd.write(self.repoobj.dump_xml()) +- outmd.close() +- print "Wrote:", self.repomdxml ++ self._print_repodata(new_rd) ++ self._write_repomd() ++ ++ if old_rd is not None and old_rd.location[1] != new_rd.location[1]: ++ # remove the old file when overwriting metadata ++ # with the same mdtype but different location ++ self._remove_repodata_file(old_rd) ++ ++ def remove(self, metadata, mdtype=None): ++ """ Remove metadata from this repository. """ ++ mdname = metadata ++ mdtype = self._get_mdtype(mdname, mdtype) ++ ++ old_rd = self.repoobj.repoData.pop(mdtype, None) ++ if old_rd is None: ++ print "Metadata not found: %s" % mdtype ++ return ++ ++ self._remove_repodata_file(old_rd) ++ print "Removed:" ++ self._print_repodata(old_rd) ++ self._write_repomd() + + + def main(args): +@@ -124,7 +174,23 @@ def main(args): + # query options + parser.add_option("--mdtype", dest='mdtype', + help="specific datatype of the metadata, will be derived from the filename if not specified") +- parser.usage = "modifyrepo [options] " ++ parser.add_option("--remove", action="store_true", ++ help="remove specified file from repodata") ++ parser.add_option("--compress", action="store_true", default=True, ++ help="compress the new repodata before adding it to the repo (default)") ++ parser.add_option("--no-compress", action="store_false", dest="compress", ++ help="do not compress the new repodata before adding it to the repo") ++ parser.add_option("--compress-type", dest='compress_type', default='gz', ++ help="compression format to use") ++ parser.add_option("-s", "--checksum", default='sha256', dest='sumtype', ++ help="specify the checksum type to use (default: sha256)") ++ parser.add_option("--unique-md-filenames", dest="unique_md_filenames", ++ help="include the file's checksum in the filename, helps with proxies (default)", ++ default=True, action="store_true") ++ parser.add_option("--simple-md-filenames", dest="unique_md_filenames", ++ help="do not include the file's checksum in the filename", ++ action="store_false") ++ parser.usage = "modifyrepo [options] [--remove] " + + (opts, argsleft) = parser.parse_args(args) + if len(argsleft) != 2: +@@ -137,11 +203,32 @@ def main(args): + except MDError, e: + print "Could not access repository: %s" % str(e) + return 1 ++ ++ ++ repomd.checksum_type = opts.sumtype ++ repomd.unique_md_filenames = opts.unique_md_filenames ++ repomd.compress = opts.compress ++ if opts.compress_type not in _available_compression: ++ print "Compression %s not available: Please choose from: %s" % (opts.compress_type, ', '.join(_available_compression)) ++ return 1 ++ repomd.compress_type = opts.compress_type ++ ++ # remove ++ if opts.remove: ++ try: ++ repomd.remove(metadata) ++ except MDError, ex: ++ print "Could not remove metadata: %s" % (metadata, str(ex)) ++ return 1 ++ return ++ ++ # add + try: + repomd.add(metadata, mdtype=opts.mdtype) + except MDError, e: + print "Could not add metadata from file %s: %s" % (metadata, str(e)) + return 1 ++ + + if __name__ == '__main__': + ret = main(sys.argv[1:]) +diff --git a/worker.py b/worker.py +index eb35ef7..b67b5bd 100755 +--- a/worker.py ++++ b/worker.py +@@ -5,6 +5,7 @@ import yum + import createrepo + import os + import rpmUtils ++import re + from optparse import OptionParser + + +@@ -23,6 +24,8 @@ def main(args): + parser = OptionParser() + parser.add_option('--tmpmdpath', default=None, + help="path where the outputs should be dumped for this worker") ++ parser.add_option('--pkglist', default=None, ++ help="file to read the pkglist from in lieu of all of them on the cli") + parser.add_option("--pkgoptions", default=[], action='append', + help="pkgoptions in the format of key=value") + parser.add_option("--quiet", default=False, action='store_true', +@@ -36,10 +39,6 @@ def main(args): + opts, pkgs = parser.parse_args(args) + external_data = {'_packagenumber': 1} + globalopts = {} +- if not opts.tmpmdpath: +- print >> sys.stderr, "tmpmdpath required for destination files" +- sys.exit(1) +- + + for strs in opts.pkgoptions: + k,v = strs.split('=') +@@ -61,18 +60,39 @@ def main(args): + v = None + globalopts[k] = v + ++ # turn off buffering on stdout ++ sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) + + reldir = external_data['_reldir'] + ts = rpmUtils.transaction.initReadOnlyTransaction() +- pri = open(opts.tmpmdpath + '/primary.xml' , 'w') +- fl = open(opts.tmpmdpath + '/filelists.xml' , 'w') +- other = open(opts.tmpmdpath + '/other.xml' , 'w') +- +- ++ if opts.tmpmdpath: ++ files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w') ++ for i in ('primary', 'filelists', 'other')] ++ def output(*xml): ++ for fh, buf in zip(files, xml): ++ fh.write(buf) ++ else: ++ def output(*xml): ++ buf = ' '.join(str(len(i)) for i in xml) ++ sys.stdout.write('*** %s\n' % buf) ++ for buf in xml: ++ sys.stdout.write(buf) ++ ++ if opts.pkglist: ++ for line in open(opts.pkglist,'r').readlines(): ++ line = line.strip() ++ if re.match('^\s*\#.*', line) or re.match('^\s*$', line): ++ continue ++ pkgs.append(line) ++ ++ clog_limit=globalopts.get('clog_limit', None) ++ if clog_limit is not None: ++ clog_limit = int(clog_limit) + for pkgfile in pkgs: + pkgpath = reldir + '/' + pkgfile + if not os.path.exists(pkgpath): + print >> sys.stderr, "File not found: %s" % pkgpath ++ output() + continue + + try: +@@ -80,20 +100,17 @@ def main(args): + print "reading %s" % (pkgfile) + + pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath, +- external_data=external_data) +- pri.write(pkg.xml_dump_primary_metadata()) +- fl.write(pkg.xml_dump_filelists_metadata()) +- other.write(pkg.xml_dump_other_metadata(clog_limit= +- globalopts.get('clog_limit', None))) ++ sumtype=globalopts.get('sumtype', None), ++ external_data=external_data) ++ output(pkg.xml_dump_primary_metadata(), ++ pkg.xml_dump_filelists_metadata(), ++ pkg.xml_dump_other_metadata(clog_limit=clog_limit)) + except yum.Errors.YumBaseError, e: + print >> sys.stderr, "Error: %s" % e ++ output() + continue + else: + external_data['_packagenumber']+=1 + +- pri.close() +- fl.close() +- other.close() +- + if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/SOURCES/ten-changelog-limit.patch b/SOURCES/ten-changelog-limit.patch new file mode 100644 index 0000000..07b6106 --- /dev/null +++ b/SOURCES/ten-changelog-limit.patch @@ -0,0 +1,11 @@ +--- createrepo/__init__.py~ 2011-01-26 12:14:30.000000000 -0500 ++++ createrepo/__init__.py 2011-01-26 12:34:17.291441719 -0500 +@@ -96,7 +96,7 @@ + self.mdtimestamp = 0 + self.directory = None + self.directories = [] +- self.changelog_limit = None # needs to be an int or None ++ self.changelog_limit = 10 # needs to be an int or None + self.unique_md_filenames = True + self.additional_metadata = {} # dict of 'type':'filename' + self.revision = str(int(time.time())) diff --git a/SPECS/createrepo.spec b/SPECS/createrepo.spec new file mode 100644 index 0000000..931328e --- /dev/null +++ b/SPECS/createrepo.spec @@ -0,0 +1,388 @@ +%{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")} + +%if ! 0%{?rhel} +# we don't have this in rhel yet... +BuildRequires: bash-completion +%endif + +# disable broken /usr/lib/rpm/brp-python-bytecompile +%define __os_install_post %{nil} +%define compdir %(pkg-config --variable=completionsdir bash-completion) +%if "%{compdir}" == "" +%define compdir "/etc/bash_completion.d" +%endif + +Summary: Creates a common metadata repository +Name: createrepo +Version: 0.9.9 +Release: 21%{?dist} +License: GPLv2 +Group: System Environment/Base +Source: %{name}-%{version}.tar.gz +Patch0: createrepo-head.patch +Patch1: ten-changelog-limit.patch +URL: http://createrepo.baseurl.org/ +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +BuildArchitectures: noarch +Requires: python >= 2.1, rpm-python, rpm >= 4.1.1, libxml2-python +Requires: yum-metadata-parser, yum >= 3.4.3-4, python-deltarpm, deltarpm, pyliblzma +BuildRequires: python + +%description +This utility will generate a common metadata repository from a directory of rpm +packages. + +%prep +%setup -q +%patch0 -p1 +%patch1 -p0 + +%build + +%install +rm -rf $RPM_BUILD_ROOT +make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install + +%clean +rm -rf $RPM_BUILD_ROOT + + +%files +%defattr(-, root, root,-) +%doc ChangeLog README COPYING COPYING.lib +%(dirname %{compdir}) +%{_datadir}/%{name}/ +%{_bindir}/createrepo +%{_bindir}/modifyrepo +%{_bindir}/mergerepo +%{_mandir}/*/* +%{python_sitelib}/createrepo + +%changelog +* Wed Apr 17 2013 Zdenek Pavlas - 0.9.9-21 +- update to latest HEAD +- don't BuildRequire bash-completion in rhel +- Fail for bad compress-type options to modifyrepo, like createrepo. BZ 886589 +- Fix options documentation. BZ 892657. +- modifyrepo: fix --compress option bug. BZ 950724 +- modifyrepo: add --checksum and --{unique,simple}-md-filenames options + +* Thu Mar 28 2013 Zdenek Pavlas - 0.9.9-20 +- package also %{compdir}'s parent + +* Wed Mar 20 2013 Zdenek Pavlas - 0.9.9-19 +- add BuildRequires: bash-completion + +* Wed Mar 20 2013 Zdenek Pavlas - 0.9.9-18 +- add bash-completion aliases, use pkg-config. + +* Tue Mar 19 2013 Zdenek Pavlas - 0.9.9-17 +- move bash-completion scripts to /usr/share/ BZ 923001 + +* Wed Mar 6 2013 Zdenek Pavlas - 0.9.9-16 +- update to latest HEAD +- turn off stdout buffering in worker to prevent a deadlock +- modifyrepo: use integer timestamps + +* Wed Feb 13 2013 Fedora Release Engineering - 0.9.9-15 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Fri Dec 21 2012 Zdenek Pavlas - 0.9.9-14 +- update to latest HEAD +- Fix the deadlock issue. BZ 856363 +- Manually set the permmissions for tempfile created cachefiles. BZ 833350 +- modifyrepo: use available compression only. BZ 865845 +- No baseurl means no baseurl. BZ 875029 +- Change the compress-type for modifyrepo to .gz for compat. BZ 874682. +- fix the --skip-symlinks option +- no repomd.xml && --checkts: skip .rpm timestamp checking. BZ 877301 +- new worker piping code (no tempfiles, should be faster) + +* Thu Sep 13 2012 James Antill - 0.9.9-13 +- update to latest head +- Fix for workers that output a lot. + +* Wed Jul 18 2012 Fedora Release Engineering - 0.9.9-12 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Thu Feb 16 2012 James Antill - 0.9.9-11 +- update to latest head +- fix for lots of workers and not many rpms. + +* Thu Jan 5 2012 Seth Vidal - 0.9.9-10 +- update to latest head +- fix for generating repos for rhel5 on fedora + +* Fri Oct 28 2011 Seth Vidal - 0.9.9-9 +- 3rd time is the charm +- fix it so prestodelta's get made with the right name and don't traceback + +* Wed Oct 26 2011 Seth Vidal - 0.9.9-8 +- change how compressOpen() defaults so mash doesn't break +- add requires for pyliblzma + +* Mon Oct 24 2011 Seth Vidal - 0.9.9-7 +- latest upstream +- --compress-type among other deals. + +* Fri Jul 29 2011 Seth Vidal - 0.9.9-6 +- latest upstream +- fixes bugs: 713747, 581632, 581628 + +* Wed Jul 20 2011 Seth Vidal - 0.9.9-5 +- new patch to fix us breaking certain pungi configs + +* Tue Jul 19 2011 Seth Vidal - 0.9.9-4 +- latest upstream head +- change --update to use sqlite for old repodata + +* Tue Feb 08 2011 Fedora Release Engineering - 0.9.9-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Thu Jan 27 2011 Seth Vidal - 0.9.9-2 +- make sure when a worker exits with a non-zero returncode we exit, too. + +* Wed Jan 26 2011 Seth Vidal - 0.9.9-1 +- 0.9.9 +- change yum requires to 3.2.29 + +* Wed Jul 21 2010 David Malcolm - 0.9.8-5 +- Rebuilt for https://fedoraproject.org/wiki/Features/Python_2.7/MassRebuild + +* Thu Jan 7 2010 Seth Vidal - 0.9.8-4 +- latest head with fixes for --update w/o --skipstat + + +* Tue Dec 22 2009 Seth Vidal - 0.9.8-3 +- patch to latest HEAD from upstream + +* Thu Sep 3 2009 Seth Vidal - 0.9.8-2 +- add drpm patch from https://bugzilla.redhat.com/show_bug.cgi?id=518658 + + +* Fri Aug 28 2009 Seth Vidal - 0.9.8-1 +- bump yum requires version +- remove head patch +- bump to 0.9.8 upstream + +* Tue Aug 18 2009 Seth Vidal - 0.9.7-15 +- update HEAD patch to include fix from mbonnet for typo'd PRAGMA in the filelists setup + +* Tue Aug 4 2009 Seth Vidal - 0.9.7-14 +- minor fix for rh bug 512610 + +* Fri Jul 24 2009 Fedora Release Engineering - 0.9.7-13 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild + +* Wed Jun 17 2009 Seth Vidal - 0.9.7-11 +- more profile output for deltarpms + +* Tue Jun 16 2009 Seth Vidal - 0.9.7-8 +- more patches from head +- speed up generating prestodelta, massively + +* Tue May 5 2009 Seth Vidal +- more head fixes - theoretically solving ALL of the sha1/sha silliness + +* Wed Apr 15 2009 Seth Vidal - 0.9.7-2 +- fix 495845 and other presto issues + +* Tue Mar 24 2009 Seth Vidal - 0.9.7-1 +- 0.9.7 +- require yum 3.2.22 + +* Tue Feb 24 2009 Fedora Release Engineering - 0.9.6-12 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild + +* Tue Feb 10 2009 Seth Vidal - 0.9.6-11 +- change the order of deltarpms + +* Wed Feb 4 2009 Seth Vidal - 0.9.6-10 +- working mergerepo again + +* Tue Feb 3 2009 Seth Vidal - 0.9.6-9 +- fix normal createrepo'ing w/o the presto patches :( + +* Mon Feb 2 2009 Seth Vidal - 0.9.6-7 +- add deltarpm requirement for making presto metadata + +* Tue Jan 27 2009 Seth Vidal - 0.9.6-6 +- one more patch set to make sure modifyrepo works with sha256's, too + +* Mon Jan 26 2009 Seth Vidal - 0.9.6-5 +- add patch from upstream head for sha256 support + +* Sat Nov 29 2008 Ignacio Vazquez-Abrams - 0.9.6-4 +- Rebuild for Python 2.6 + +* Tue Oct 28 2008 Seth Vidal - 0.9.6-1 +- 0.9.6-1 +- add mergerepo + +* Thu Oct 9 2008 James Antill - 0.9.5-5 +- Do atomic updates to the cachedir, for parallel runs +- Fix the patch + +* Fri Feb 22 2008 Seth Vidal - 0.9.5-2 +- patch for the mistake in the raise for an empty pkgid + +* Tue Feb 19 2008 Seth Vidal - 0.9.5-1 +- 0.9.5 +- ten-changelog-limit patch by default in fedora + +* Thu Jan 31 2008 Seth Vidal - 0.9.4-3 +- skip if no old metadata and --update was called. + +* Wed Jan 30 2008 Seth Vidal - 0.9.4-1 +- 0.9.4 + +* Tue Jan 22 2008 Seth Vidal - 0.9.3 +- 0.9.3 + +* Thu Jan 17 2008 Seth Vidal - 0.9.2-1 +- remove all other patches - 0.9.2 + +* Tue Jan 15 2008 Seth Vidal 0.9.1-3 +- more patches - almost 0.9.2 but not quite + +* Thu Jan 10 2008 Seth Vidal 0.9.1-2 +- patch to fix bug until 0.9.2 + +* Wed Jan 9 2008 Seth Vidal 0.9.1-1 +- 0.9.1 + +* Mon Jan 7 2008 Seth Vidal 0.9-1 +- 0.9 +- add yum dep + + +* Mon Nov 26 2007 Luke Macken - 0.4.11-1 +- Update to 0.4.11 +- Include COPYING file and change License to GPLv2 + +* Thu Jun 07 2007 Paul Nasrat - 0.4.10-1 +- Update to 0.4.10 + +* Wed May 16 2007 Paul Nasrat - 0.4.9-1 +- Update to 0.4.9 + +* Tue May 15 2007 Jeremy Katz - 0.4.8-4 +- fix the last patch + +* Tue May 15 2007 Jeremy Katz - 0.4.8-3 +- use dbversion given by yum-metadata-parser instead of hardcoded + value (#239938) + +* Wed Mar 14 2007 Paul Nasrat - 0.4.8-2 +- Remove requires (#227680) + +* Wed Feb 21 2007 Jeremy Katz - 0.4.8-1 +- update to 0.4.8 + +* Mon Feb 12 2007 Jesse Keating - 0.4.7-3 +- Require yum-metadata-parser. + +* Thu Feb 8 2007 Jeremy Katz - 0.4.7-2 +- add modifyrepo to the file list + +* Thu Feb 8 2007 Jeremy Katz - 0.4.7-1 +- update to 0.4.7 + +* Mon Feb 05 2007 Paul Nasrat - 0.4.6-2 +- Packaging guidelines (#225661) + +* Thu Nov 09 2006 Paul Nasrat - 0.4.6-1 +- Upgrade to latest release +- Fix requires (#214388) + +* Wed Jul 19 2006 Paul Nasrat - 0.4.4-2 +- Fixup relative paths (#199228) + +* Wed Jul 12 2006 Jesse Keating - 0.4.4-1.1 +- rebuild + +* Mon Apr 17 2006 Paul Nasrat - 0.4.4-1 +- Update to latest upstream + +* Fri Dec 09 2005 Jesse Keating +- rebuilt + +* Fri Nov 18 2005 Paul Nasrat - 0.4.3-5 +- Fix split with normalised directories + +* Fri Nov 18 2005 Paul Nasrat - 0.4.3-4 +- Another typo fix +- Normalise directories + +* Thu Nov 17 2005 Paul Nasrat - 0.4.3-3.1 +- really fix them + +* Thu Nov 17 2005 Paul Nasrat - 0.4.3-3 +- Fix regressions for absolute/relative paths + +* Sun Nov 13 2005 Paul Nasrat - 0.4.3-2 +- Sync upto HEAD +- Split media support + +* Thu Jul 14 2005 Paul Nasrat - 0.4.3-1 +- New upstream version 0.4.3 (cachedir support) + +* Tue Jan 18 2005 Jeremy Katz - 0.4.2-2 +- add the manpage + +* Tue Jan 18 2005 Jeremy Katz - 0.4.2-1 +- 0.4.2 + +* Thu Oct 21 2004 Paul Nasrat +- 0.4.1, fixes #136613 +- matched ghosts not being added into primary.xml files + +* Mon Oct 18 2004 Bill Nottingham +- 0.4.0, fixes #134776 + +* Thu Sep 30 2004 Paul Nasrat +- Rebuild new upstream release - 0.3.9 + +* Thu Sep 30 2004 Seth Vidal +- 0.3.9 +- fix for groups checksum creation + +* Sat Sep 11 2004 Seth Vidal +- 0.3.8 + +* Wed Sep 1 2004 Seth Vidal +- 0.3.7 + +* Fri Jul 23 2004 Seth Vidal +- make filelists right + + +* Fri Jul 23 2004 Seth Vidal +- fix for broken filelists + +* Mon Jul 19 2004 Seth Vidal +- re-enable groups +- update num to 0.3.4 + +* Tue Jun 8 2004 Seth Vidal +- update to the format +- versioned deps +- package counts +- uncompressed checksum in repomd.xml + + +* Fri Apr 16 2004 Seth Vidal +- 0.3.2 - small addition of -p flag + +* Sun Jan 18 2004 Seth Vidal +- I'm an idiot + +* Sun Jan 18 2004 Seth Vidal +- 0.3 + +* Tue Jan 13 2004 Seth Vidal +- 0.2 - + +* Sat Jan 10 2004 Seth Vidal +- first packaging +