Blob Blame History Raw
diff --git a/cli.py b/cli.py
index 5d9791bc..403a92f9 100755
--- a/cli.py
+++ b/cli.py
@@ -29,6 +29,7 @@ import math
 from optparse import OptionParser,OptionGroup,SUPPRESS_HELP
 import rpm
 import ctypes
+import glob
 
 from weakref import proxy as weakref
 
@@ -1748,26 +1749,10 @@ class YumBaseCli(yum.YumBase, output.YumOutput):
         msg = self.fmtKeyValFill(_('Cleaning repos: '), 
                         ' '.join([ x.id for x in self.repos.listEnabled()]))
         self.verbose_logger.log(yum.logginglevels.INFO_2, msg)
+        msg = (_('Operating on %s (see CLEAN OPTIONS in yum(8) for details)')
+               % self.conf.cachedir)
+        self.verbose_logger.log(yum.logginglevels.DEBUG_3, msg)
         if 'all' in userlist:
-            self.verbose_logger.log(yum.logginglevels.INFO_2,
-                _('Cleaning up everything'))
-
-            # Print a "maybe you want rm -rf" hint to compensate for the fact
-            # that yum clean all is often misunderstood.  Don't do that,
-            # however, if cachedir is non-default as we would have to replace
-            # arbitrary yum vars with * and that could produce a harmful
-            # command, e.g. for /mydata/$myvar we would say rm -rf /mydata/*
-            cachedir = self.conf.cachedir
-            if cachedir.startswith(('/var/cache/yum', '/var/tmp/yum-')):
-                # Take just the first 3 path components
-                rmdir = '/'.join(cachedir.split('/')[:4])
-                self.verbose_logger.log(
-                    yum.logginglevels.INFO_2,
-                    _('Maybe you want: rm -rf %s, to also free up space taken '
-                      'by orphaned data from disabled or removed repos'
-                      % rmdir),
-                )
-
             pkgcode, pkgresults = self.cleanPackages()
             hdrcode, hdrresults = self.cleanHeaders()
             xmlcode, xmlresults = self.cleanMetadata()
@@ -1780,6 +1765,67 @@ class YumBaseCli(yum.YumBase, output.YumOutput):
                        rpmresults)
             for msg in results:
                 self.logger.debug(msg)
+
+            # Walk the cachedir, look for any leftovers and categorize them
+            cacheglob = self.getCachedirGlob(['basearch', 'releasever'])
+            paths = glob.glob(cacheglob + '/*')
+            table = ([], [], [], [])  # (enabled, disabled, untracked, other)
+            repos = self.repos.repos
+            for path in paths:
+                base = os.path.basename(path)
+                if os.path.isdir(path):
+                    # Repodir
+                    if base not in repos:
+                        col = 2
+                    elif repos[base].enabled:
+                        col = 0
+                    else:
+                        col = 1
+                    # Recursively gather all files in this repodir
+                    files = yum.misc.getFileList(path, '', [])
+                else:
+                    # Ordinary file (such as timedhosts)
+                    col = 3
+                    files = [path]
+                usage = sum(map(os.path.getsize, files))
+                if usage > 0:
+                    table[col].append((usage, path))
+
+            # Print the table (verbose mode only)
+            lines = [_('Disk usage of %s after cleanup:') % cacheglob]
+            headers = ('enabled repos', 'disabled repos', 'untracked repos',
+                       'other data')
+            totals = [0, 0, 0, 0]
+            for col, header in enumerate(headers):
+                rows = []
+                total = 0
+                # Start with the biggest items
+                for usage, path in sorted(table[col], key=lambda x:x[0],
+                                          reverse=True):
+                    rows.append('  %-5s  %s'
+                                % (self.format_number(usage), path))
+                    total += usage
+                colon = ':' if rows else ''
+                lines += ['%-5s  %s%s'
+                          % (self.format_number(total), _(header), colon)]
+                lines += rows
+                totals[col] = total
+            lines += [_('%-5s  total') % self.format_number(sum(totals))]
+            msg = '\n'.join(lines)
+            self.verbose_logger.log(yum.logginglevels.DEBUG_3, msg)
+
+            # Print a short hint for leftover repos specifically (non-verbose
+            # mode only)
+            total = sum(totals[:3])
+            if self.conf.debuglevel == 6 or not total:
+                return code, []
+            total = self.format_number(total)
+            if total[-1] == ' ':
+                total = total[:-1] + 'bytes'
+            msg = (_('Other repos take up %s of disk space '
+                     '(use --verbose for details)') % total)
+            self.verbose_logger.log(yum.logginglevels.INFO_2, msg)
+
             return code, []
             
         if 'headers' in userlist:
diff --git a/docs/yum.8 b/docs/yum.8
index e3f1d32a..34a27f53 100644
--- a/docs/yum.8
+++ b/docs/yum.8
@@ -1031,11 +1031,37 @@ Or:          \fByum list available 'foo*'\fP
 .IP
 .PP 
 .SH "CLEAN OPTIONS"
-The following are the ways which you can invoke \fByum\fP in clean
-mode. Note that "all files" in the commands below means 
-"all files in currently enabled repositories". 
-If you want to also clean any (temporarily) disabled repositories you need to
-use \fB\-\-enablerepo='*'\fP option.
+The following are the ways which you can invoke \fByum\fP in clean mode.
+
+Note that these commands only operate on files in currently enabled
+repositories.
+If you use substitution variables (such as $releasever) in your \fBcachedir\fP
+configuration, the operation is further restricted to the current values of
+those variables.
+
+For fine-grained control over what is being cleaned, you can use
+\fB\-\-enablerepo\fP, \fB\-\-disablerepo\fP and \fB\-\-releasever\fP as
+desired.
+Note, however, that you cannot use \fB\-\-releasever='*'\fP to do the cleaning
+for all values previously used.
+Also note that untracked (no longer configured) repositories will not be
+automatically cleaned.
+
+To purge the entire cache in one go, the easiest way is to delete the files
+manually.
+Depending on your \fBcachedir\fP configuration, this usually means treating any
+variables as shell wildcards and recursively removing matching directories.
+For example, if your \fBcachedir\fP is /var/cache/yum/$basearch/$releasever,
+then the whole /var/cache/yum directory has to be removed.
+If you do this, \fByum\fP will rebuild the cache as required the next time it
+is run (this may take a while).
+
+As a convenience, when you run \fByum clean all\fP, a recursive lookup will be
+done to detect any repositories not cleaned due to the above restrictions.
+If some are found, a message will be printed stating how much disk space they
+occupy and thus how much you can reclaim by cleaning them.
+If you also supply \fB\-\-verbose\fP, a more detailed breakdown will be
+printed.
 
 .IP "\fByum clean expire-cache\fP"
 Eliminate the local data saying when the metadata and mirrorlists were downloaded for each repo. This means yum will revalidate the cache for each repo. next time it is used. However if the cache is still valid, nothing significant was deleted.
diff --git a/yum/__init__.py b/yum/__init__.py
index a156a6a6..c6ced7d5 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -2890,6 +2890,16 @@ much more problems).
             filelist = misc.getFileList(cachedir, '', [])
         return self._cleanFilelist('rpmdb', filelist)
 
+    def getCachedirGlob(self, dynvar):
+        """Return a glob matching all dirs where yum stores cache files, based
+        on cachedir and the given list of dynamic vars."""
+        yumvar = self.conf.yumvar.copy()
+        for d in dynvar:
+            yumvar[d] = '*'
+        instroot = config.varReplace(self.conf.installroot, self.conf.yumvar)
+        cachedir = config.varReplace(self.conf._pristine_cachedir, yumvar)
+        return (instroot + cachedir).replace('//', '/')
+
     def _cleanFiles(self, exts, pathattr, filetype):
         filelist = []
         for ext in exts:
diff --git a/yum/config.py b/yum/config.py
index 5eac325e..76128fa5 100644
--- a/yum/config.py
+++ b/yum/config.py
@@ -945,6 +945,9 @@ class YumConf(StartupConf):
 
     _reposlist = []
 
+    # cachedir before variable substitutions
+    _pristine_cachedir = None
+
     def dump(self):
         """Return a string representing the values of all the
         configuration options.
@@ -1147,6 +1150,9 @@ def readMainConfig(startupconf):
     yumconf = YumConf()
     yumconf.populate(startupconf._parser, 'main')
 
+    # Store the original cachedir (for later reference in clean commands)
+    yumconf._pristine_cachedir = yumconf.cachedir
+
     # Apply the installroot to directory options
     def _apply_installroot(yumconf, option):
         path = getattr(yumconf, option)