Blame SOURCES/sos-bz1624043-symlinks-not-copied.patch

b72d26
From 2e07f7c4778145d4366476ecc4383d491458b541 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 31 Aug 2018 12:50:24 +0100
b72d26
Subject: [PATCH 1/4] [sosreport] properly raise exceptions when --debug is
b72d26
 given
b72d26
b72d26
OSError and IOError exceptions were not raised to the terminal
b72d26
when --debug is in effect since they were silently caught in the
b72d26
generic exception handler.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/sosreport.py | 5 ++++-
b72d26
 1 file changed, 4 insertions(+), 1 deletion(-)
b72d26
b72d26
diff --git a/sos/sosreport.py b/sos/sosreport.py
b72d26
index 00c3e811..80633966 100644
b72d26
--- a/sos/sosreport.py
b72d26
+++ b/sos/sosreport.py
b72d26
@@ -995,7 +995,8 @@ class SoSReport(object):
b72d26
                 print(" %s while setting up archive" % e.strerror)
b72d26
                 print("")
b72d26
             else:
b72d26
-                raise e
b72d26
+                print("Error setting up archive: %s" % e)
b72d26
+                raise
b72d26
         except Exception as e:
b72d26
             self.ui_log.error("")
b72d26
             self.ui_log.error(" Unexpected exception setting up archive:")
b72d26
@@ -1467,6 +1468,8 @@ class SoSReport(object):
b72d26
             return self.final_work()
b72d26
 
b72d26
         except (OSError):
b72d26
+            if self.opts.debug:
b72d26
+                raise
b72d26
             self._cleanup()
b72d26
         except (KeyboardInterrupt):
b72d26
             self.ui_log.error("\nExiting on user cancel")
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From c496d2bec8cae175faf986567e73d16d401d8564 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 31 Aug 2018 12:52:38 +0100
b72d26
Subject: [PATCH 2/4] [archive] simplify FileCacheArchive.makedirs()
b72d26
b72d26
Simplify the makedirs() method of FileCacheArchive and have it
b72d26
bypass _check_path() and directly call os.makedirs(): a subsequent
b72d26
patch will restrict the use of the method to setting up the sos_*
b72d26
directories in the archive root.
b72d26
b72d26
File, directory and other object type add_* methods will use a
b72d26
new method that correctly handles symbolic links in intermediate
b72d26
path components.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 8 ++++----
b72d26
 1 file changed, 4 insertions(+), 4 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 5d99170f..ffa54036 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -361,11 +361,11 @@ class FileCacheArchive(Archive):
b72d26
         return self._archive_root
b72d26
 
b72d26
     def makedirs(self, path, mode=0o700):
b72d26
-        dest = self._check_path(path, P_DIR)
b72d26
-        if not dest:
b72d26
-            return
b72d26
+        """Create path, including leading components.
b72d26
 
b72d26
-        self._makedirs(self.dest_path(path))
b72d26
+            Used by sos.sosreport to set up sos_* directories.
b72d26
+        """
b72d26
+        os.makedirs(os.path.join(self._archive_root, path), mode=mode)
b72d26
         self.log_debug("created directory at '%s' in FileCacheArchive '%s'"
b72d26
                        % (path, self._archive_root))
b72d26
 
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From ca422720b74181b2433473428e29e90af59b3cf8 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 31 Aug 2018 12:55:51 +0100
b72d26
Subject: [PATCH 3/4] [archive] normalise dest_dir in
b72d26
 FileCacheArchive._check_path()
b72d26
b72d26
Always set a valid dest_dir in _check_path() and do not assume
b72d26
that it can be obtained by splitting the path: in the case of
b72d26
a directory it is the unmodified 'dest' value.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 5 ++++-
b72d26
 1 file changed, 4 insertions(+), 1 deletion(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index ffa54036..903cc672 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -191,7 +191,10 @@ class FileCacheArchive(Archive):
b72d26
                       copied now or `None` otherwise
b72d26
         """
b72d26
         dest = dest or self.dest_path(src)
b72d26
-        dest_dir = os.path.split(dest)[0]
b72d26
+        if path_type == P_DIR:
b72d26
+            dest_dir = dest
b72d26
+        else:
b72d26
+            dest_dir = os.path.split(dest)[0]
b72d26
         if not dest_dir:
b72d26
             return dest
b72d26
 
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From 75d759066e8ee0a469abc37f48f7bfcdfe8182b5 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 31 Aug 2018 12:58:01 +0100
b72d26
Subject: [PATCH 4/4] [archive] replace FileCacheArchive._makedirs()
b72d26
b72d26
The Python os.makedirs() implementation is inadequate for sos's
b72d26
needs: it will create leading directories given an intended path
b72d26
destination, but it is not able to reflect cases where some of
b72d26
the intermediate paths are actually symbolic links.
b72d26
b72d26
Replace the use of os.makedirs() with a method that walks over
b72d26
the path, and either creates directories, or symbolic links (and
b72d26
their directory target) to better correspond with the content of
b72d26
the host file system.
b72d26
b72d26
This fixes a situation where two plugins can race in the archive,
b72d26
leading to an exception in the plugin that runs last:
b72d26
b72d26
 - /foo/bar exists and is a link to /foo/bar.qux
b72d26
 - One plugin attempts to collect /foo/bar
b72d26
 - Another plugin attempts to collect a link /foo/qux -> /foo/bar/baz
b72d26
b72d26
If the 2nd plugin happens to run first it will create the path
b72d26
"/foo/bar" as a _directory_ (via _makedirs()). Since the archive
b72d26
now checks for matching object types when a path collision occurs,
b72d26
the first plugin will arrive at add_dir(), note that "/foo/bar" is
b72d26
present and is not a symbolic link, and will raise an exception.
b72d26
b72d26
Correct this by ensuring that whichever plugin executes first, the
b72d26
correct link/directory path structure will be set up.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 72 ++++++++++++++++++++++++++++++++++++++++++++------
b72d26
 1 file changed, 64 insertions(+), 8 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 903cc672..11afa7aa 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -159,6 +159,67 @@ class FileCacheArchive(Archive):
b72d26
             name = name.lstrip(os.sep)
b72d26
         return (os.path.join(self._archive_root, name))
b72d26
 
b72d26
+    def _make_leading_paths(self, src, mode=0o700):
b72d26
+        """Create leading path components
b72d26
+
b72d26
+            The standard python `os.makedirs` is insufficient for our
b72d26
+            needs: it will only create directories, and ignores the fact
b72d26
+            that some path components may be symbolic links.
b72d26
+        """
b72d26
+        self.log_debug("Making leading paths for %s" % src)
b72d26
+        root = self._archive_root
b72d26
+
b72d26
+        def in_archive(path):
b72d26
+            """Test whether path ``path`` is inside the archive.
b72d26
+            """
b72d26
+            return path.startswith(os.path.join(root, ""))
b72d26
+
b72d26
+        if not src.startswith("/"):
b72d26
+            # Sos archive path (sos_commands, sos_logs etc.)
b72d26
+            src_dir = src
b72d26
+        else:
b72d26
+            # Host file path
b72d26
+            src_dir = src if os.path.isdir(src) else os.path.split(src)[0]
b72d26
+
b72d26
+        # Build a list of path components in root-to-leaf order.
b72d26
+        path = src_dir
b72d26
+        path_comps = []
b72d26
+        while path != '/' and path != '':
b72d26
+            head, tail = os.path.split(path)
b72d26
+            path_comps.append(tail)
b72d26
+            path = head
b72d26
+        path_comps.reverse()
b72d26
+
b72d26
+        abs_path = root
b72d26
+        rel_path = ""
b72d26
+
b72d26
+        # Check and create components as needed
b72d26
+        for comp in path_comps:
b72d26
+            abs_path = os.path.join(abs_path, comp)
b72d26
+
b72d26
+            if not in_archive(abs_path):
b72d26
+                continue
b72d26
+
b72d26
+            rel_path = os.path.join(rel_path, comp)
b72d26
+            src_path = os.path.join("/", rel_path)
b72d26
+
b72d26
+            if not os.path.exists(abs_path):
b72d26
+                self.log_debug("Making path %s" % abs_path)
b72d26
+                if os.path.islink(src_path) and os.path.isdir(src_path):
b72d26
+                    target = os.readlink(src_path)
b72d26
+                    abs_target = os.path.join(root, target)
b72d26
+
b72d26
+                    # Recursively create leading components of target
b72d26
+                    self._make_leading_paths(abs_target, mode=mode)
b72d26
+
b72d26
+                    self.log_debug("Making symlink '%s' -> '%s'" %
b72d26
+                                   (abs_path, target))
b72d26
+                    target = os.path.relpath(target)
b72d26
+                    os.symlink(target, abs_path)
b72d26
+                else:
b72d26
+                    self.log_debug("Making directory %s" % abs_path)
b72d26
+                    os.mkdir(abs_path, mode)
b72d26
+
b72d26
     def _check_path(self, src, path_type, dest=None, force=False):
b72d26
         """Check a new destination path in the archive.
b72d26
 
b72d26
@@ -203,7 +264,8 @@ class FileCacheArchive(Archive):
b72d26
             raise ValueError("path '%s' exists and is not a directory" %
b72d26
                              dest_dir)
b72d26
         elif not os.path.exists(dest_dir):
b72d26
-            self._makedirs(dest_dir)
b72d26
+            src_dir = src if path_type == P_DIR else os.path.split(src)[0]
b72d26
+            self._make_leading_paths(src_dir)
b72d26
 
b72d26
         def is_special(mode):
b72d26
             return any([
b72d26
@@ -326,10 +388,7 @@ class FileCacheArchive(Archive):
b72d26
 
b72d26
     def add_dir(self, path):
b72d26
         with self._path_lock:
b72d26
-            dest = self._check_path(path, P_DIR)
b72d26
-            if not dest:
b72d26
-                return
b72d26
-            self.makedirs(path)
b72d26
+            self._check_path(path, P_DIR)
b72d26
 
b72d26
     def add_node(self, path, mode, device):
b72d26
         dest = self._check_path(path, P_NODE)
b72d26
@@ -347,9 +406,6 @@ class FileCacheArchive(Archive):
b72d26
                 raise e
b72d26
             shutil.copystat(path, dest)
b72d26
 
b72d26
-    def _makedirs(self, path, mode=0o700):
b72d26
-        os.makedirs(path, mode)
b72d26
-
b72d26
     def name_max(self):
b72d26
         if 'PC_NAME_MAX' in os.pathconf_names:
b72d26
             pc_name_max = os.pathconf_names['PC_NAME_MAX']
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
From 5d6228b85e174dee8abcc4c206a1e9034242c6c6 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 7 Sep 2018 12:06:34 -0400
b72d26
Subject: [PATCH 1/6] [sosreport] ensure ThreadPool exceptions are raised
b72d26
b72d26
The ThreadPoolExecutor does not raise exceptions to the parent
b72d26
thread immediately: it stores them in-line in the pool's results
b72d26
list, and raises them to the caller on acccess to that slot in
b72d26
the results iterator.
b72d26
b72d26
Make sure that these exceptions are handled by iterating over all
b72d26
results and asserting that they are non-None (in practice, this
b72d26
code is never executed since the resulting raise will trap to an
b72d26
exception handler, but it is less confusing than a bare 'pass').
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/sosreport.py | 8 ++++++--
b72d26
 1 file changed, 6 insertions(+), 2 deletions(-)
b72d26
b72d26
diff --git a/sos/sosreport.py b/sos/sosreport.py
b72d26
index 80633966..44be75a1 100644
b72d26
--- a/sos/sosreport.py
b72d26
+++ b/sos/sosreport.py
b72d26
@@ -1065,9 +1065,13 @@ class SoSReport(object):
b72d26
         try:
b72d26
             self.plugpool = ThreadPoolExecutor(self.opts.threads)
b72d26
             # Pass the plugpool its own private copy of self.pluglist
b72d26
-            self.plugpool.map(self._collect_plugin, list(self.pluglist),
b72d26
-                              chunksize=1)
b72d26
+            results = self.plugpool.map(self._collect_plugin,
b72d26
+                                        list(self.pluglist), chunksize=1)
b72d26
             self.plugpool.shutdown(wait=True)
b72d26
+            for res in results:
b72d26
+                if not res:
b72d26
+                    self.soslog.debug("Unexpected plugin task result: %s" %
b72d26
+                                      res)
b72d26
             self.ui_log.info("")
b72d26
         except KeyboardInterrupt:
b72d26
             # We may not be at a newline when the user issues Ctrl-C
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From 9aaba972bf6a42c33ea9bca80f07bfb880ba45a1 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 7 Sep 2018 12:15:10 -0400
b72d26
Subject: [PATCH 2/6] [sosreport] trap directly to PDB in handle_exception()
b72d26
b72d26
Now that plugins are run in a threadpool, it is not possible to
b72d26
defer the call to pdb.post_mortem() to the top-level exception
b72d26
handler in the main thread: this is due to the fact that in a pool,
b72d26
exceptions are caught and saved to be re-raised to thread calling
b72d26
the pool when results are returned. When the saved exception is
b72d26
raised to the top-level handler the execution context it relates
b72d26
to is gone: the backtrace and stack frame have been torn down and
b72d26
only very limited information is available from the exception
b72d26
frame.
b72d26
b72d26
Instead, catch these exceptions _inside_ the thread pool context,
b72d26
and directly trap to the Python debugger. This allows plugin code
b72d26
to be debugged interactively with the full backtrace and with all
b72d26
access to local variables and the execution stack. In addition,
b72d26
this means that after the debugger has handled the exception it is
b72d26
possible to return to the run and continue until report completion.
b72d26
b72d26
One side effect of this change is that the *-plugin-errors.txt
b72d26
file containng the backtrace is now written into the archive
b72d26
whether or not --debug is given.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/sosreport.py | 9 ++++++++-
b72d26
 1 file changed, 8 insertions(+), 1 deletion(-)
b72d26
b72d26
diff --git a/sos/sosreport.py b/sos/sosreport.py
b72d26
index 44be75a1..77ae7161 100644
b72d26
--- a/sos/sosreport.py
b72d26
+++ b/sos/sosreport.py
b72d26
@@ -30,6 +30,7 @@ from shutil import rmtree
b72d26
 import tempfile
b72d26
 import hashlib
b72d26
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
b72d26
+import pdb
b72d26
 
b72d26
 from sos import _sos as _
b72d26
 from sos import __version__
b72d26
@@ -504,7 +505,13 @@ class SoSReport(object):
b72d26
 
b72d26
     def handle_exception(self, plugname=None, func=None):
b72d26
         if self.raise_plugins or self.exit_process:
b72d26
-            raise
b72d26
+            # retrieve exception info for the current thread and stack.
b72d26
+            (etype, val, tb) = sys.exc_info()
b72d26
+            # we are NOT in interactive mode, print the exception...
b72d26
+            traceback.print_exception(etype, val, tb, file=sys.stdout)
b72d26
+            print_()
b72d26
+            # ...then start the debugger in post-mortem mode.
b72d26
+            pdb.post_mortem(tb)
b72d26
         if plugname and func:
b72d26
             self._log_plugin_exception(plugname, func)
b72d26
 
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From 0ea62d1ea57f41c1b75ccb83e69fdda386a7d280 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 7 Sep 2018 13:00:52 -0400
b72d26
Subject: [PATCH 3/6] [Plugin] fix exception raise in Plugin._copy_dir()
b72d26
b72d26
Use a naked 'raise' statement rather than raising the already caught
b72d26
exception in _copy_dir(), so that the original stack and backtrace
b72d26
are avaialable.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/plugins/__init__.py | 2 +-
b72d26
 1 file changed, 1 insertion(+), 1 deletion(-)
b72d26
b72d26
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
b72d26
index 252de4d0..ac2c0bc8 100644
b72d26
--- a/sos/plugins/__init__.py
b72d26
+++ b/sos/plugins/__init__.py
b72d26
@@ -401,7 +401,7 @@ class Plugin(object):
b72d26
                 msg = "Too many levels of symbolic links copying"
b72d26
                 self._log_error("_copy_dir: %s '%s'" % (msg, srcpath))
b72d26
                 return
b72d26
-            raise e
b72d26
+            raise
b72d26
 
b72d26
     def _get_dest_for_srcpath(self, srcpath):
b72d26
         if self.use_sysroot():
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From d84c1cd6dedf51a8ed7b1a511585c0ac2db0f083 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Wed, 5 Sep 2018 12:46:16 +0100
b72d26
Subject: [PATCH 4/6] [archive] fix leading path creation
b72d26
b72d26
Fix the creation of leading path components for both paths that
b72d26
contain intermediate components that are symbolic links (with both
b72d26
absolute and relative targets), and those that contain only
b72d26
directory components.
b72d26
b72d26
Since symlinks may link to other files, and other symlinks, it is
b72d26
necessary to handle these paths recursively and to include any
b72d26
intermediate symlinked directories, or symlink targets in the set
b72d26
of paths added to the archive.
b72d26
b72d26
Related: #1404
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 41 ++++++++++++++++++++++++++++++++++-------
b72d26
 1 file changed, 34 insertions(+), 7 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 11afa7aa..c256a01f 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -165,9 +165,24 @@ class FileCacheArchive(Archive):
b72d26
             The standard python `os.makedirs` is insufficient for our
b72d26
             needs: it will only create directories, and ignores the fact
b72d26
             that some path components may be symbolic links.
b72d26
+
b72d26
+            :param src: The source path in the host file system for which
b72d26
+                        leading components should be created, or the path
b72d26
+                        to an sos_* virtual directory inside the archive.
b72d26
+
b72d26
+                        Host paths must be absolute (initial '/'), and
b72d26
+                        sos_* directory paths must be a path relative to
b72d26
+                        the root of the archive.
b72d26
+
b72d26
+            :param mode: An optional mode to be used when creating path
b72d26
+                         components.
b72d26
+            :returns: A rewritten destination path in the case that one
b72d26
+                      or more symbolic links in intermediate components
b72d26
+                      of the path have altered the path destination.
b72d26
         """
b72d26
         self.log_debug("Making leading paths for %s" % src)
b72d26
         root = self._archive_root
b72d26
+        dest = src
b72d26
 
b72d26
         def in_archive(path):
b72d26
             """Test whether path ``path`` is inside the archive.
b72d26
@@ -191,34 +206,42 @@ class FileCacheArchive(Archive):
b72d26
         path_comps.reverse()
b72d26
 
b72d26
         abs_path = root
b72d26
-        rel_path = ""
b72d26
+        src_path = "/"
b72d26
 
b72d26
         # Check and create components as needed
b72d26
         for comp in path_comps:
b72d26
             abs_path = os.path.join(abs_path, comp)
b72d26
 
b72d26
+            # Do not create components that are above the archive root.
b72d26
             if not in_archive(abs_path):
b72d26
                 continue
b72d26
 
b72d26
-            rel_path = os.path.join(rel_path, comp)
b72d26
-            src_path = os.path.join("/", rel_path)
b72d26
+            src_path = os.path.join(src_path, comp)
b72d26
 
b72d26
             if not os.path.exists(abs_path):
b72d26
                 self.log_debug("Making path %s" % abs_path)
b72d26
                 if os.path.islink(src_path) and os.path.isdir(src_path):
b72d26
                     target = os.readlink(src_path)
b72d26
-                    abs_target = os.path.join(root, target)
b72d26
+
b72d26
+                    # The directory containing the source in the host fs,
b72d26
+                    # adjusted for the current level of path creation.
b72d26
+                    target_dir = os.path.split(src_path)[0]
b72d26
+
b72d26
+                    # The source path of the target in the host fs to be
b72d26
+                    # recursively copied.
b72d26
+                    target_src = os.path.join(target_dir, target)
b72d26
 
b72d26
                     # Recursively create leading components of target
b72d26
-                    self._make_leading_paths(abs_target, mode=mode)
b72d26
+                    dest = self._make_leading_paths(target_src, mode=mode)
b72d26
+                    dest = os.path.normpath(dest)
b72d26
 
b72d26
                     self.log_debug("Making symlink '%s' -> '%s'" %
b72d26
                                    (abs_path, target))
b72d26
-                    target = os.path.relpath(target)
b72d26
                     os.symlink(target, abs_path)
b72d26
                 else:
b72d26
                     self.log_debug("Making directory %s" % abs_path)
b72d26
                     os.mkdir(abs_path, mode)
b72d26
+        return dest
b72d26
 
b72d26
     def _check_path(self, src, path_type, dest=None, force=False):
b72d26
         """Check a new destination path in the archive.
b72d26
@@ -259,13 +282,17 @@ class FileCacheArchive(Archive):
b72d26
         if not dest_dir:
b72d26
             return dest
b72d26
 
b72d26
+        # Preserve destination basename for rewritten dest_dir
b72d26
+        dest_name = os.path.split(src)[1]
b72d26
+
b72d26
         # Check containing directory presence and path type
b72d26
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
b72d26
             raise ValueError("path '%s' exists and is not a directory" %
b72d26
                              dest_dir)
b72d26
         elif not os.path.exists(dest_dir):
b72d26
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
b72d26
-            self._make_leading_paths(src_dir)
b72d26
+            src_dir = self._make_leading_paths(src_dir)
b72d26
+            dest = self.dest_path(os.path.join(src_dir, dest_name))
b72d26
 
b72d26
         def is_special(mode):
b72d26
             return any([
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From 322f4a517ae336cc1443f9a399a0d15d45ec48b9 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Fri, 7 Sep 2018 13:11:03 -0400
b72d26
Subject: [PATCH 5/6] [archive] add link follow-up to
b72d26
 FileCacheArchive.add_link()
b72d26
b72d26
Creating a link may trigger further actions in the archive: if the
b72d26
link target is a regular file, we must copy that file into the
b72d26
archive, and if the target is a symbolic link, then we must create
b72d26
that link, and copy in the link target.
b72d26
b72d26
Handle this by calling add_file() or (recursively) add_link() in
b72d26
order to create the missing pieces of the symlink chain.
b72d26
b72d26
These operations must take place outside of the path lock since
b72d26
they do not modify the archive namespace and will call methods of
b72d26
the Archive object that will attempt to re-acquire this lock.
b72d26
b72d26
Resolves: #1404
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 38 +++++++++++++++++++++++++++++++++++---
b72d26
 1 file changed, 35 insertions(+), 3 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index c256a01f..6db398fc 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -403,6 +403,7 @@ class FileCacheArchive(Archive):
b72d26
                            % (dest, self._archive_root))
b72d26
 
b72d26
     def add_link(self, source, link_name):
b72d26
+        self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source))
b72d26
         with self._path_lock:
b72d26
             dest = self._check_path(link_name, P_LINK)
b72d26
             if not dest:
b72d26
@@ -410,10 +411,41 @@ class FileCacheArchive(Archive):
b72d26
 
b72d26
             if not os.path.lexists(dest):
b72d26
                 os.symlink(source, dest)
b72d26
-            self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
b72d26
-                           % (dest, source, self._archive_root))
b72d26
+                self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
b72d26
+                               % (dest, source, self._archive_root))
b72d26
+
b72d26
+        # Follow-up must be outside the path lock: we recurse into
b72d26
+        # other monitor methods that will attempt to reacquire it.
b72d26
+
b72d26
+        source_dir = os.path.dirname(link_name)
b72d26
+        host_source = os.path.join(source_dir, source)
b72d26
+        if not os.path.exists(self.dest_path(host_source)):
b72d26
+            if os.path.islink(host_source):
b72d26
+                link_dir = os.path.dirname(link_name)
b72d26
+                link_name = os.path.normpath(os.path.join(link_dir, source))
b72d26
+                dest_dir = os.path.dirname(link_name)
b72d26
+                source = os.path.join(dest_dir, os.readlink(link_name))
b72d26
+                source = os.path.relpath(source)
b72d26
+                self.log_debug("Adding link %s -> %s for link follow up" %
b72d26
+                               (link_name, source))
b72d26
+                self.add_link(source, link_name)
b72d26
+            elif os.path.isdir(host_source):
b72d26
+                self.log_debug("Adding dir %s for link follow up" % source)
b72d26
+                self.add_dir(host_source)
b72d26
+            elif os.path.isfile(host_source):
b72d26
+                self.log_debug("Adding file %s for link follow up" % source)
b72d26
+                self.add_file(host_source)
b72d26
+            else:
b72d26
+                self.log_debug("No link follow up: source=%s link_name=%s" %
b72d26
+                               (source, link_name))
b72d26
 
b72d26
-    def add_dir(self, path):
b72d26
+
b72d26
+    def add_dir(self, path, copy=False):
b72d26
+        """Create a directory in the archive.
b72d26
+
b72d26
+            :param path: the path in the host file system to add
b72d26
+        """
b72d26
+        # Establish path structure
b72d26
         with self._path_lock:
b72d26
             self._check_path(path, P_DIR)
b72d26
 
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From 6e79c4b4a4f32fa549708dbb8c8b9af73ab8ff61 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Mon, 10 Sep 2018 16:33:33 +0100
b72d26
Subject: [PATCH 6/6] [archive] remove unused 'copy' arg from
b72d26
 FileCacheArchive.add_dir()
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 3 +--
b72d26
 1 file changed, 1 insertion(+), 2 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 6db398fc..4b30630b 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -439,8 +439,7 @@ class FileCacheArchive(Archive):
b72d26
                 self.log_debug("No link follow up: source=%s link_name=%s" %
b72d26
                                (source, link_name))
b72d26
 
b72d26
-
b72d26
-    def add_dir(self, path, copy=False):
b72d26
+    def add_dir(self, path):
b72d26
         """Create a directory in the archive.
b72d26
 
b72d26
             :param path: the path in the host file system to add
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
From 919e8671a6ab9684d59525eb7f3607b3aab08ee1 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Tue, 11 Sep 2018 12:16:57 -0400
b72d26
Subject: [PATCH] [archive] fix link rewriting logic in
b72d26
 FileCacheArchive.add_link()
b72d26
b72d26
When processing link follow up for an original symbolic link, the
b72d26
add_link() logic incorrectly used the _original_ host link name,
b72d26
rather than the to-be-created name when calculating relative path
b72d26
structures. If the prior link is at a greater or lesser level of
b72d26
directory nesting this will lead to broken relative links in the
b72d26
archive (one level too high or too low).
b72d26
b72d26
In some cases (systemd) this behaviour was masked due to the fact
b72d26
that identically named links exist at multiple levels of the path
b72d26
hierarchy.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 30 +++++++++++++++++++-----------
b72d26
 1 file changed, 19 insertions(+), 11 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 528cfa576..7a7717de7 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -417,27 +417,35 @@ def add_link(self, source, link_name):
b72d26
         # Follow-up must be outside the path lock: we recurse into
b72d26
         # other monitor methods that will attempt to reacquire it.
b72d26
 
b72d26
+        self.log_debug("Link follow up: source=%s link_name=%s dest=%s" %
b72d26
+                       (source, link_name, dest))
b72d26
+
b72d26
         source_dir = os.path.dirname(link_name)
b72d26
-        host_source = os.path.join(source_dir, source)
b72d26
-        if not os.path.exists(self.dest_path(host_source)):
b72d26
-            if os.path.islink(host_source):
b72d26
-                link_dir = os.path.dirname(link_name)
b72d26
-                link_name = os.path.normpath(os.path.join(link_dir, source))
b72d26
+        host_path_name = os.path.normpath(os.path.join(source_dir, source))
b72d26
+        dest_path_name = self.dest_path(host_path_name)
b72d26
+
b72d26
+        if not os.path.exists(dest_path_name):
b72d26
+            if os.path.islink(host_path_name):
b72d26
+                # Normalised path for the new link_name
b72d26
+                link_name = host_path_name
b72d26
+                # Containing directory for the new link
b72d26
                 dest_dir = os.path.dirname(link_name)
b72d26
-                source = os.path.join(dest_dir, os.readlink(link_name))
b72d26
-                source = os.path.relpath(source)
b72d26
+                # Relative source path of the new link
b72d26
+                source = os.path.join(dest_dir, os.readlink(host_path_name))
b72d26
+                source = os.path.relpath(source, dest_dir)
b72d26
                 self.log_debug("Adding link %s -> %s for link follow up" %
b72d26
                                (link_name, source))
b72d26
                 self.add_link(source, link_name)
b72d26
-            elif os.path.isdir(host_source):
b72d26
+            elif os.path.isdir(host_path_name):
b72d26
                 self.log_debug("Adding dir %s for link follow up" % source)
b72d26
-                self.add_dir(host_source)
b72d26
-            elif os.path.isfile(host_source):
b72d26
+                self.add_dir(host_path_name)
b72d26
+            elif os.path.isfile(host_path_name):
b72d26
                 self.log_debug("Adding file %s for link follow up" % source)
b72d26
-                self.add_file(host_source)
b72d26
+                self.add_file(host_path_name)
b72d26
             else:
b72d26
                 self.log_debug("No link follow up: source=%s link_name=%s" %
b72d26
                                (source, link_name))
b72d26
+        self.log_debug("leaving add_link()")
b72d26
 
b72d26
     def add_dir(self, path):
b72d26
         """Create a directory in the archive.
b72d26
From c065be9715dc845b6411a9a0b2d6171bbeb1c390 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Wed, 12 Sep 2018 12:02:33 +0100
b72d26
Subject: [PATCH] [plugin] canonicalize link target path in
b72d26
 Plugin._copy_symlink()
b72d26
b72d26
Since we may be dealing with paths that contain intermediate
b72d26
symlinked directories, it is necessary to canonicalize the path
b72d26
for the link target in order to eliminate additional levels of
b72d26
symbolic links, and to calculate the correct relative path to
b72d26
use within the archive.
b72d26
b72d26
Related: #1404
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/plugins/__init__.py | 5 ++++-
b72d26
 1 file changed, 4 insertions(+), 1 deletion(-)
b72d26
b72d26
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
b72d26
index ac2c0bc8c..7d011a02c 100644
b72d26
--- a/sos/plugins/__init__.py
b72d26
+++ b/sos/plugins/__init__.py
b72d26
@@ -353,7 +353,10 @@ def _copy_symlink(self, srcpath):
b72d26
         absdest = os.path.normpath(dest)
b72d26
         # adjust the target used inside the report to always be relative
b72d26
         if os.path.isabs(linkdest):
b72d26
-            reldest = os.path.relpath(linkdest, os.path.dirname(srcpath))
b72d26
+            # Canonicalize the link target path to avoid additional levels
b72d26
+            # of symbolic links (that would affect the path nesting level).
b72d26
+            realdir = os.path.realpath(os.path.dirname(srcpath))
b72d26
+            reldest = os.path.relpath(linkdest, start=realdir)
b72d26
             # trim leading /sysroot
b72d26
             if self.use_sysroot():
b72d26
                 reldest = reldest[len(os.sep + os.pardir):]
b72d26
From 868966cd9dbb96ce3635d884e67e738b18658140 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Wed, 12 Sep 2018 16:11:07 +0100
b72d26
Subject: [PATCH] [archive] canonicalise paths for link follow up
b72d26
b72d26
Ensure that the canonical path is used when processing link follow
b72d26
up actions: the actual link path may contain one or more levels of
b72d26
symbolic links, leading to broken links if the link target path is
b72d26
assumed to be relative to the containing directory.
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 2 +-
b72d26
 1 file changed, 1 insertion(+), 1 deletion(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 7a7717de7..483d66f4f 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -421,7 +421,7 @@ def add_link(self, source, link_name):
b72d26
                        (source, link_name, dest))
b72d26
 
b72d26
         source_dir = os.path.dirname(link_name)
b72d26
-        host_path_name = os.path.normpath(os.path.join(source_dir, source))
b72d26
+        host_path_name = os.path.realpath(os.path.join(source_dir, source))
b72d26
         dest_path_name = self.dest_path(host_path_name)
b72d26
 
b72d26
         if not os.path.exists(dest_path_name):
b72d26
From 8e60e299cdfb0027d6b6ea845234ef54ae785186 Mon Sep 17 00:00:00 2001
b72d26
From: "Bryn M. Reeves" <bmr@redhat.com>
b72d26
Date: Thu, 13 Sep 2018 16:14:12 +0100
b72d26
Subject: [PATCH 1/2] [archive, plugin] avoid recursing on symbolic link loops
b72d26
b72d26
It's possible that symlink loops exist in the host file system,
b72d26
either 'simple' ('a'->'a'), or indirect ('a'->'b'->'a'). We need
b72d26
to avoid recursing on these loops, to avoid exceeding the maximum
b72d26
link or recursion depths, but we should still represent these
b72d26
inodes as accurately as possible in the resulting archive.
b72d26
b72d26
Detect loops in both the Plugin link handling code and in the new
b72d26
Archive link follow-up code by creating the first requested level
b72d26
of loop, and then skipping the recursive follow-up. This means
b72d26
that the looping links are still created in the archive so long
b72d26
as they are referenced in a copy spec but that we do not attempt
b72d26
to indefinitely recurse while collecting them.
b72d26
b72d26
Resolves: #1430
b72d26
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py          | 27 +++++++++++++++++++++++++++
b72d26
 sos/plugins/__init__.py | 20 +++++++++++++++-----
b72d26
 2 files changed, 42 insertions(+), 5 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index 483d66f4..e5819432 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -424,6 +424,29 @@ class FileCacheArchive(Archive):
b72d26
         host_path_name = os.path.realpath(os.path.join(source_dir, source))
b72d26
         dest_path_name = self.dest_path(host_path_name)
b72d26
 
b72d26
+        def is_loop(link_name, source):
b72d26
+            """Return ``True`` if the symbolic link ``link_name`` is part
b72d26
+                of a file system loop, or ``False`` otherwise.
b72d26
+            """
b72d26
+            link_dir = os.path.dirname(link_name)
b72d26
+            if not os.path.isabs(source):
b72d26
+                source = os.path.realpath(os.path.join(link_dir, source))
b72d26
+            link_name = os.path.realpath(link_name)
b72d26
+
b72d26
+            # Simple a -> a loop
b72d26
+            if link_name == source:
b72d26
+                return True
b72d26
+
b72d26
+            # Find indirect loops (a->b-a) by stat()ing the first step
b72d26
+            # in the symlink chain
b72d26
+            try:
b72d26
+                os.stat(link_name)
b72d26
+            except OSError as e:
b72d26
+                if e.errno == 40:
b72d26
+                    return True
b72d26
+                raise
b72d26
+            return False
b72d26
+
b72d26
         if not os.path.exists(dest_path_name):
b72d26
             if os.path.islink(host_path_name):
b72d26
                 # Normalised path for the new link_name
b72d26
@@ -433,6 +456,10 @@ class FileCacheArchive(Archive):
b72d26
                 # Relative source path of the new link
b72d26
                 source = os.path.join(dest_dir, os.readlink(host_path_name))
b72d26
                 source = os.path.relpath(source, dest_dir)
b72d26
+                if is_loop(link_name, source):
b72d26
+                    self.log_debug("Link '%s' - '%s' loops: skipping..." %
b72d26
+                                   (link_name, source))
b72d26
+                    return
b72d26
                 self.log_debug("Adding link %s -> %s for link follow up" %
b72d26
                                (link_name, source))
b72d26
                 self.add_link(source, link_name)
b72d26
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
b72d26
index 7d011a02..7d2a8b2d 100644
b72d26
--- a/sos/plugins/__init__.py
b72d26
+++ b/sos/plugins/__init__.py
b72d26
@@ -376,6 +376,21 @@ class Plugin(object):
b72d26
             self._log_debug("link '%s' is a directory, skipping..." % linkdest)
b72d26
             return
b72d26
 
b72d26
+        self.copied_files.append({'srcpath': srcpath,
b72d26
+                                  'dstpath': dstpath,
b72d26
+                                  'symlink': "yes",
b72d26
+                                  'pointsto': linkdest})
b72d26
+
b72d26
+        # Check for indirect symlink loops by stat()ing the next step
b72d26
+        # in the link chain.
b72d26
+        try:
b72d26
+            os.stat(absdest)
b72d26
+        except OSError as e:
b72d26
+            if e.errno == 40:
b72d26
+                self._log_debug("link '%s' is part of a file system "
b72d26
+                                "loop, skipping target..." % dstpath)
b72d26
+                return
b72d26
+
b72d26
         # copy the symlink target translating relative targets
b72d26
         # to absolute paths to pass to _do_copy_path.
b72d26
         self._log_debug("normalized link target '%s' as '%s'"
b72d26
@@ -388,11 +403,6 @@ class Plugin(object):
b72d26
             self._log_debug("link '%s' points to itself, skipping target..."
b72d26
                             % linkdest)
b72d26
 
b72d26
-        self.copied_files.append({'srcpath': srcpath,
b72d26
-                                  'dstpath': dstpath,
b72d26
-                                  'symlink': "yes",
b72d26
-                                  'pointsto': linkdest})
b72d26
-
b72d26
     def _copy_dir(self, srcpath):
b72d26
         try:
b72d26
             for afile in os.listdir(srcpath):
b72d26
-- 
b72d26
2.17.1
b72d26
b72d26
b72d26
From e108d7c03834446f8dac66ad69f5eade4f2c5fce Mon Sep 17 00:00:00 2001
b72d26
From: Pavel Moravec <pmoravec@redhat.com>
b72d26
Date: Fri, 14 Sep 2018 10:42:07 +0200
b72d26
Subject: [PATCH 2/2] [archive] fix and simplify directory destination
b72d26
 rewriting
b72d26
b72d26
Rewriting of the destination path by _make_leading_paths() only
b72d26
applies when creating intermediate path components that are a
b72d26
symbolic link. The final level of path creation must always be
b72d26
a directory, and the destination is always the absolute path to
b72d26
that directory.
b72d26
b72d26
Always return the directory path when creating a new directory,
b72d26
and do not attempt to rewrite the destination at the top level
b72d26
in FileCacheArchive._check_path() since all intermediate links
b72d26
have already been handled inside _make_leading_paths() (i.e.
b72d26
the returned/rewritten destination is always equal to the path
b72d26
that was passed into the function).
b72d26
b72d26
Resolves: #1432
b72d26
b72d26
Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
b72d26
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
b72d26
---
b72d26
 sos/archive.py | 8 +++-----
b72d26
 1 file changed, 3 insertions(+), 5 deletions(-)
b72d26
b72d26
diff --git a/sos/archive.py b/sos/archive.py
b72d26
index e5819432..b02b75f7 100644
b72d26
--- a/sos/archive.py
b72d26
+++ b/sos/archive.py
b72d26
@@ -241,6 +241,8 @@ class FileCacheArchive(Archive):
b72d26
                 else:
b72d26
                     self.log_debug("Making directory %s" % abs_path)
b72d26
                     os.mkdir(abs_path, mode)
b72d26
+                    dest = src_path
b72d26
+
b72d26
         return dest
b72d26
 
b72d26
     def _check_path(self, src, path_type, dest=None, force=False):
b72d26
@@ -282,17 +284,13 @@ class FileCacheArchive(Archive):
b72d26
         if not dest_dir:
b72d26
             return dest
b72d26
 
b72d26
-        # Preserve destination basename for rewritten dest_dir
b72d26
-        dest_name = os.path.split(src)[1]
b72d26
-
b72d26
         # Check containing directory presence and path type
b72d26
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
b72d26
             raise ValueError("path '%s' exists and is not a directory" %
b72d26
                              dest_dir)
b72d26
         elif not os.path.exists(dest_dir):
b72d26
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
b72d26
-            src_dir = self._make_leading_paths(src_dir)
b72d26
-            dest = self.dest_path(os.path.join(src_dir, dest_name))
b72d26
+            self._make_leading_paths(src_dir)
b72d26
 
b72d26
         def is_special(mode):
b72d26
             return any([
b72d26
-- 
b72d26
2.17.1
b72d26