Blob Blame History Raw
From 2e07f7c4778145d4366476ecc4383d491458b541 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 31 Aug 2018 12:50:24 +0100
Subject: [PATCH 1/4] [sosreport] properly raise exceptions when --debug is
 given

OSError and IOError exceptions were not raised to the terminal
when --debug is in effect since they were silently caught in the
generic exception handler.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/sosreport.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sos/sosreport.py b/sos/sosreport.py
index 00c3e811..80633966 100644
--- a/sos/sosreport.py
+++ b/sos/sosreport.py
@@ -995,7 +995,8 @@ class SoSReport(object):
                 print(" %s while setting up archive" % e.strerror)
                 print("")
             else:
-                raise e
+                print("Error setting up archive: %s" % e)
+                raise
         except Exception as e:
             self.ui_log.error("")
             self.ui_log.error(" Unexpected exception setting up archive:")
@@ -1467,6 +1468,8 @@ class SoSReport(object):
             return self.final_work()
 
         except (OSError):
+            if self.opts.debug:
+                raise
             self._cleanup()
         except (KeyboardInterrupt):
             self.ui_log.error("\nExiting on user cancel")
-- 
2.17.1


From c496d2bec8cae175faf986567e73d16d401d8564 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 31 Aug 2018 12:52:38 +0100
Subject: [PATCH 2/4] [archive] simplify FileCacheArchive.makedirs()

Simplify the makedirs() method of FileCacheArchive and have it
bypass _check_path() and directly call os.makedirs(): a subsequent
patch will restrict the use of the method to setting up the sos_*
directories in the archive root.

File, directory and other object type add_* methods will use a
new method that correctly handles symbolic links in intermediate
path components.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index 5d99170f..ffa54036 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -361,11 +361,11 @@ class FileCacheArchive(Archive):
         return self._archive_root
 
     def makedirs(self, path, mode=0o700):
-        dest = self._check_path(path, P_DIR)
-        if not dest:
-            return
+        """Create path, including leading components.
 
-        self._makedirs(self.dest_path(path))
+            Used by sos.sosreport to set up sos_* directories.
+        """
+        os.makedirs(os.path.join(self._archive_root, path), mode=mode)
         self.log_debug("created directory at '%s' in FileCacheArchive '%s'"
                        % (path, self._archive_root))
 
-- 
2.17.1


From ca422720b74181b2433473428e29e90af59b3cf8 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 31 Aug 2018 12:55:51 +0100
Subject: [PATCH 3/4] [archive] normalise dest_dir in
 FileCacheArchive._check_path()

Always set a valid dest_dir in _check_path() and do not assume
that it can be obtained by splitting the path: in the case of
a directory it is the unmodified 'dest' value.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sos/archive.py b/sos/archive.py
index ffa54036..903cc672 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -191,7 +191,10 @@ class FileCacheArchive(Archive):
                       copied now or `None` otherwise
         """
         dest = dest or self.dest_path(src)
-        dest_dir = os.path.split(dest)[0]
+        if path_type == P_DIR:
+            dest_dir = dest
+        else:
+            dest_dir = os.path.split(dest)[0]
         if not dest_dir:
             return dest
 
-- 
2.17.1


From 75d759066e8ee0a469abc37f48f7bfcdfe8182b5 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 31 Aug 2018 12:58:01 +0100
Subject: [PATCH 4/4] [archive] replace FileCacheArchive._makedirs()

The Python os.makedirs() implementation is inadequate for sos's
needs: it will create leading directories given an intended path
destination, but it is not able to reflect cases where some of
the intermediate paths are actually symbolic links.

Replace the use of os.makedirs() with a method that walks over
the path, and either creates directories, or symbolic links (and
their directory target) to better correspond with the content of
the host file system.

This fixes a situation where two plugins can race in the archive,
leading to an exception in the plugin that runs last:

 - /foo/bar exists and is a link to /foo/bar.qux
 - One plugin attempts to collect /foo/bar
 - Another plugin attempts to collect a link /foo/qux -> /foo/bar/baz

If the 2nd plugin happens to run first it will create the path
"/foo/bar" as a _directory_ (via _makedirs()). Since the archive
now checks for matching object types when a path collision occurs,
the first plugin will arrive at add_dir(), note that "/foo/bar" is
present and is not a symbolic link, and will raise an exception.

Correct this by ensuring that whichever plugin executes first, the
correct link/directory path structure will be set up.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 72 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 64 insertions(+), 8 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index 903cc672..11afa7aa 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -159,6 +159,67 @@ class FileCacheArchive(Archive):
             name = name.lstrip(os.sep)
         return (os.path.join(self._archive_root, name))
 
+    def _make_leading_paths(self, src, mode=0o700):
+        """Create leading path components
+
+            The standard python `os.makedirs` is insufficient for our
+            needs: it will only create directories, and ignores the fact
+            that some path components may be symbolic links.
+        """
+        self.log_debug("Making leading paths for %s" % src)
+        root = self._archive_root
+
+        def in_archive(path):
+            """Test whether path ``path`` is inside the archive.
+            """
+            return path.startswith(os.path.join(root, ""))
+
+        if not src.startswith("/"):
+            # Sos archive path (sos_commands, sos_logs etc.)
+            src_dir = src
+        else:
+            # Host file path
+            src_dir = src if os.path.isdir(src) else os.path.split(src)[0]
+
+        # Build a list of path components in root-to-leaf order.
+        path = src_dir
+        path_comps = []
+        while path != '/' and path != '':
+            head, tail = os.path.split(path)
+            path_comps.append(tail)
+            path = head
+        path_comps.reverse()
+
+        abs_path = root
+        rel_path = ""
+
+        # Check and create components as needed
+        for comp in path_comps:
+            abs_path = os.path.join(abs_path, comp)
+
+            if not in_archive(abs_path):
+                continue
+
+            rel_path = os.path.join(rel_path, comp)
+            src_path = os.path.join("/", rel_path)
+
+            if not os.path.exists(abs_path):
+                self.log_debug("Making path %s" % abs_path)
+                if os.path.islink(src_path) and os.path.isdir(src_path):
+                    target = os.readlink(src_path)
+                    abs_target = os.path.join(root, target)
+
+                    # Recursively create leading components of target
+                    self._make_leading_paths(abs_target, mode=mode)
+
+                    self.log_debug("Making symlink '%s' -> '%s'" %
+                                   (abs_path, target))
+                    target = os.path.relpath(target)
+                    os.symlink(target, abs_path)
+                else:
+                    self.log_debug("Making directory %s" % abs_path)
+                    os.mkdir(abs_path, mode)
+
     def _check_path(self, src, path_type, dest=None, force=False):
         """Check a new destination path in the archive.
 
@@ -203,7 +264,8 @@ class FileCacheArchive(Archive):
             raise ValueError("path '%s' exists and is not a directory" %
                              dest_dir)
         elif not os.path.exists(dest_dir):
-            self._makedirs(dest_dir)
+            src_dir = src if path_type == P_DIR else os.path.split(src)[0]
+            self._make_leading_paths(src_dir)
 
         def is_special(mode):
             return any([
@@ -326,10 +388,7 @@ class FileCacheArchive(Archive):
 
     def add_dir(self, path):
         with self._path_lock:
-            dest = self._check_path(path, P_DIR)
-            if not dest:
-                return
-            self.makedirs(path)
+            self._check_path(path, P_DIR)
 
     def add_node(self, path, mode, device):
         dest = self._check_path(path, P_NODE)
@@ -347,9 +406,6 @@ class FileCacheArchive(Archive):
                 raise e
             shutil.copystat(path, dest)
 
-    def _makedirs(self, path, mode=0o700):
-        os.makedirs(path, mode)
-
     def name_max(self):
         if 'PC_NAME_MAX' in os.pathconf_names:
             pc_name_max = os.pathconf_names['PC_NAME_MAX']
-- 
2.17.1

From 5d6228b85e174dee8abcc4c206a1e9034242c6c6 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 7 Sep 2018 12:06:34 -0400
Subject: [PATCH 1/6] [sosreport] ensure ThreadPool exceptions are raised

The ThreadPoolExecutor does not raise exceptions to the parent
thread immediately: it stores them in-line in the pool's results
list, and raises them to the caller on acccess to that slot in
the results iterator.

Make sure that these exceptions are handled by iterating over all
results and asserting that they are non-None (in practice, this
code is never executed since the resulting raise will trap to an
exception handler, but it is less confusing than a bare 'pass').

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/sosreport.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sos/sosreport.py b/sos/sosreport.py
index 80633966..44be75a1 100644
--- a/sos/sosreport.py
+++ b/sos/sosreport.py
@@ -1065,9 +1065,13 @@ class SoSReport(object):
         try:
             self.plugpool = ThreadPoolExecutor(self.opts.threads)
             # Pass the plugpool its own private copy of self.pluglist
-            self.plugpool.map(self._collect_plugin, list(self.pluglist),
-                              chunksize=1)
+            results = self.plugpool.map(self._collect_plugin,
+                                        list(self.pluglist), chunksize=1)
             self.plugpool.shutdown(wait=True)
+            for res in results:
+                if not res:
+                    self.soslog.debug("Unexpected plugin task result: %s" %
+                                      res)
             self.ui_log.info("")
         except KeyboardInterrupt:
             # We may not be at a newline when the user issues Ctrl-C
-- 
2.17.1


From 9aaba972bf6a42c33ea9bca80f07bfb880ba45a1 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 7 Sep 2018 12:15:10 -0400
Subject: [PATCH 2/6] [sosreport] trap directly to PDB in handle_exception()

Now that plugins are run in a threadpool, it is not possible to
defer the call to pdb.post_mortem() to the top-level exception
handler in the main thread: this is due to the fact that in a pool,
exceptions are caught and saved to be re-raised to thread calling
the pool when results are returned. When the saved exception is
raised to the top-level handler the execution context it relates
to is gone: the backtrace and stack frame have been torn down and
only very limited information is available from the exception
frame.

Instead, catch these exceptions _inside_ the thread pool context,
and directly trap to the Python debugger. This allows plugin code
to be debugged interactively with the full backtrace and with all
access to local variables and the execution stack. In addition,
this means that after the debugger has handled the exception it is
possible to return to the run and continue until report completion.

One side effect of this change is that the *-plugin-errors.txt
file containng the backtrace is now written into the archive
whether or not --debug is given.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/sosreport.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sos/sosreport.py b/sos/sosreport.py
index 44be75a1..77ae7161 100644
--- a/sos/sosreport.py
+++ b/sos/sosreport.py
@@ -30,6 +30,7 @@ from shutil import rmtree
 import tempfile
 import hashlib
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
+import pdb
 
 from sos import _sos as _
 from sos import __version__
@@ -504,7 +505,13 @@ class SoSReport(object):
 
     def handle_exception(self, plugname=None, func=None):
         if self.raise_plugins or self.exit_process:
-            raise
+            # retrieve exception info for the current thread and stack.
+            (etype, val, tb) = sys.exc_info()
+            # we are NOT in interactive mode, print the exception...
+            traceback.print_exception(etype, val, tb, file=sys.stdout)
+            print_()
+            # ...then start the debugger in post-mortem mode.
+            pdb.post_mortem(tb)
         if plugname and func:
             self._log_plugin_exception(plugname, func)
 
-- 
2.17.1


From 0ea62d1ea57f41c1b75ccb83e69fdda386a7d280 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 7 Sep 2018 13:00:52 -0400
Subject: [PATCH 3/6] [Plugin] fix exception raise in Plugin._copy_dir()

Use a naked 'raise' statement rather than raising the already caught
exception in _copy_dir(), so that the original stack and backtrace
are avaialable.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/plugins/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
index 252de4d0..ac2c0bc8 100644
--- a/sos/plugins/__init__.py
+++ b/sos/plugins/__init__.py
@@ -401,7 +401,7 @@ class Plugin(object):
                 msg = "Too many levels of symbolic links copying"
                 self._log_error("_copy_dir: %s '%s'" % (msg, srcpath))
                 return
-            raise e
+            raise
 
     def _get_dest_for_srcpath(self, srcpath):
         if self.use_sysroot():
-- 
2.17.1


From d84c1cd6dedf51a8ed7b1a511585c0ac2db0f083 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Wed, 5 Sep 2018 12:46:16 +0100
Subject: [PATCH 4/6] [archive] fix leading path creation

Fix the creation of leading path components for both paths that
contain intermediate components that are symbolic links (with both
absolute and relative targets), and those that contain only
directory components.

Since symlinks may link to other files, and other symlinks, it is
necessary to handle these paths recursively and to include any
intermediate symlinked directories, or symlink targets in the set
of paths added to the archive.

Related: #1404

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index 11afa7aa..c256a01f 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -165,9 +165,24 @@ class FileCacheArchive(Archive):
             The standard python `os.makedirs` is insufficient for our
             needs: it will only create directories, and ignores the fact
             that some path components may be symbolic links.
+
+            :param src: The source path in the host file system for which
+                        leading components should be created, or the path
+                        to an sos_* virtual directory inside the archive.
+
+                        Host paths must be absolute (initial '/'), and
+                        sos_* directory paths must be a path relative to
+                        the root of the archive.
+
+            :param mode: An optional mode to be used when creating path
+                         components.
+            :returns: A rewritten destination path in the case that one
+                      or more symbolic links in intermediate components
+                      of the path have altered the path destination.
         """
         self.log_debug("Making leading paths for %s" % src)
         root = self._archive_root
+        dest = src
 
         def in_archive(path):
             """Test whether path ``path`` is inside the archive.
@@ -191,34 +206,42 @@ class FileCacheArchive(Archive):
         path_comps.reverse()
 
         abs_path = root
-        rel_path = ""
+        src_path = "/"
 
         # Check and create components as needed
         for comp in path_comps:
             abs_path = os.path.join(abs_path, comp)
 
+            # Do not create components that are above the archive root.
             if not in_archive(abs_path):
                 continue
 
-            rel_path = os.path.join(rel_path, comp)
-            src_path = os.path.join("/", rel_path)
+            src_path = os.path.join(src_path, comp)
 
             if not os.path.exists(abs_path):
                 self.log_debug("Making path %s" % abs_path)
                 if os.path.islink(src_path) and os.path.isdir(src_path):
                     target = os.readlink(src_path)
-                    abs_target = os.path.join(root, target)
+
+                    # The directory containing the source in the host fs,
+                    # adjusted for the current level of path creation.
+                    target_dir = os.path.split(src_path)[0]
+
+                    # The source path of the target in the host fs to be
+                    # recursively copied.
+                    target_src = os.path.join(target_dir, target)
 
                     # Recursively create leading components of target
-                    self._make_leading_paths(abs_target, mode=mode)
+                    dest = self._make_leading_paths(target_src, mode=mode)
+                    dest = os.path.normpath(dest)
 
                     self.log_debug("Making symlink '%s' -> '%s'" %
                                    (abs_path, target))
-                    target = os.path.relpath(target)
                     os.symlink(target, abs_path)
                 else:
                     self.log_debug("Making directory %s" % abs_path)
                     os.mkdir(abs_path, mode)
+        return dest
 
     def _check_path(self, src, path_type, dest=None, force=False):
         """Check a new destination path in the archive.
@@ -259,13 +282,17 @@ class FileCacheArchive(Archive):
         if not dest_dir:
             return dest
 
+        # Preserve destination basename for rewritten dest_dir
+        dest_name = os.path.split(src)[1]
+
         # Check containing directory presence and path type
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
             raise ValueError("path '%s' exists and is not a directory" %
                              dest_dir)
         elif not os.path.exists(dest_dir):
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
-            self._make_leading_paths(src_dir)
+            src_dir = self._make_leading_paths(src_dir)
+            dest = self.dest_path(os.path.join(src_dir, dest_name))
 
         def is_special(mode):
             return any([
-- 
2.17.1


From 322f4a517ae336cc1443f9a399a0d15d45ec48b9 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Fri, 7 Sep 2018 13:11:03 -0400
Subject: [PATCH 5/6] [archive] add link follow-up to
 FileCacheArchive.add_link()

Creating a link may trigger further actions in the archive: if the
link target is a regular file, we must copy that file into the
archive, and if the target is a symbolic link, then we must create
that link, and copy in the link target.

Handle this by calling add_file() or (recursively) add_link() in
order to create the missing pieces of the symlink chain.

These operations must take place outside of the path lock since
they do not modify the archive namespace and will call methods of
the Archive object that will attempt to re-acquire this lock.

Resolves: #1404

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index c256a01f..6db398fc 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -403,6 +403,7 @@ class FileCacheArchive(Archive):
                            % (dest, self._archive_root))
 
     def add_link(self, source, link_name):
+        self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source))
         with self._path_lock:
             dest = self._check_path(link_name, P_LINK)
             if not dest:
@@ -410,10 +411,41 @@ class FileCacheArchive(Archive):
 
             if not os.path.lexists(dest):
                 os.symlink(source, dest)
-            self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
-                           % (dest, source, self._archive_root))
+                self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
+                               % (dest, source, self._archive_root))
+
+        # Follow-up must be outside the path lock: we recurse into
+        # other monitor methods that will attempt to reacquire it.
+
+        source_dir = os.path.dirname(link_name)
+        host_source = os.path.join(source_dir, source)
+        if not os.path.exists(self.dest_path(host_source)):
+            if os.path.islink(host_source):
+                link_dir = os.path.dirname(link_name)
+                link_name = os.path.normpath(os.path.join(link_dir, source))
+                dest_dir = os.path.dirname(link_name)
+                source = os.path.join(dest_dir, os.readlink(link_name))
+                source = os.path.relpath(source)
+                self.log_debug("Adding link %s -> %s for link follow up" %
+                               (link_name, source))
+                self.add_link(source, link_name)
+            elif os.path.isdir(host_source):
+                self.log_debug("Adding dir %s for link follow up" % source)
+                self.add_dir(host_source)
+            elif os.path.isfile(host_source):
+                self.log_debug("Adding file %s for link follow up" % source)
+                self.add_file(host_source)
+            else:
+                self.log_debug("No link follow up: source=%s link_name=%s" %
+                               (source, link_name))
 
-    def add_dir(self, path):
+
+    def add_dir(self, path, copy=False):
+        """Create a directory in the archive.
+
+            :param path: the path in the host file system to add
+        """
+        # Establish path structure
         with self._path_lock:
             self._check_path(path, P_DIR)
 
-- 
2.17.1


From 6e79c4b4a4f32fa549708dbb8c8b9af73ab8ff61 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Mon, 10 Sep 2018 16:33:33 +0100
Subject: [PATCH 6/6] [archive] remove unused 'copy' arg from
 FileCacheArchive.add_dir()

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index 6db398fc..4b30630b 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -439,8 +439,7 @@ class FileCacheArchive(Archive):
                 self.log_debug("No link follow up: source=%s link_name=%s" %
                                (source, link_name))
 
-
-    def add_dir(self, path, copy=False):
+    def add_dir(self, path):
         """Create a directory in the archive.
 
             :param path: the path in the host file system to add
-- 
2.17.1

From 919e8671a6ab9684d59525eb7f3607b3aab08ee1 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Tue, 11 Sep 2018 12:16:57 -0400
Subject: [PATCH] [archive] fix link rewriting logic in
 FileCacheArchive.add_link()

When processing link follow up for an original symbolic link, the
add_link() logic incorrectly used the _original_ host link name,
rather than the to-be-created name when calculating relative path
structures. If the prior link is at a greater or lesser level of
directory nesting this will lead to broken relative links in the
archive (one level too high or too low).

In some cases (systemd) this behaviour was masked due to the fact
that identically named links exist at multiple levels of the path
hierarchy.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index 528cfa576..7a7717de7 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -417,27 +417,35 @@ def add_link(self, source, link_name):
         # Follow-up must be outside the path lock: we recurse into
         # other monitor methods that will attempt to reacquire it.
 
+        self.log_debug("Link follow up: source=%s link_name=%s dest=%s" %
+                       (source, link_name, dest))
+
         source_dir = os.path.dirname(link_name)
-        host_source = os.path.join(source_dir, source)
-        if not os.path.exists(self.dest_path(host_source)):
-            if os.path.islink(host_source):
-                link_dir = os.path.dirname(link_name)
-                link_name = os.path.normpath(os.path.join(link_dir, source))
+        host_path_name = os.path.normpath(os.path.join(source_dir, source))
+        dest_path_name = self.dest_path(host_path_name)
+
+        if not os.path.exists(dest_path_name):
+            if os.path.islink(host_path_name):
+                # Normalised path for the new link_name
+                link_name = host_path_name
+                # Containing directory for the new link
                 dest_dir = os.path.dirname(link_name)
-                source = os.path.join(dest_dir, os.readlink(link_name))
-                source = os.path.relpath(source)
+                # Relative source path of the new link
+                source = os.path.join(dest_dir, os.readlink(host_path_name))
+                source = os.path.relpath(source, dest_dir)
                 self.log_debug("Adding link %s -> %s for link follow up" %
                                (link_name, source))
                 self.add_link(source, link_name)
-            elif os.path.isdir(host_source):
+            elif os.path.isdir(host_path_name):
                 self.log_debug("Adding dir %s for link follow up" % source)
-                self.add_dir(host_source)
-            elif os.path.isfile(host_source):
+                self.add_dir(host_path_name)
+            elif os.path.isfile(host_path_name):
                 self.log_debug("Adding file %s for link follow up" % source)
-                self.add_file(host_source)
+                self.add_file(host_path_name)
             else:
                 self.log_debug("No link follow up: source=%s link_name=%s" %
                                (source, link_name))
+        self.log_debug("leaving add_link()")
 
     def add_dir(self, path):
         """Create a directory in the archive.
From c065be9715dc845b6411a9a0b2d6171bbeb1c390 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Wed, 12 Sep 2018 12:02:33 +0100
Subject: [PATCH] [plugin] canonicalize link target path in
 Plugin._copy_symlink()

Since we may be dealing with paths that contain intermediate
symlinked directories, it is necessary to canonicalize the path
for the link target in order to eliminate additional levels of
symbolic links, and to calculate the correct relative path to
use within the archive.

Related: #1404

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/plugins/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
index ac2c0bc8c..7d011a02c 100644
--- a/sos/plugins/__init__.py
+++ b/sos/plugins/__init__.py
@@ -353,7 +353,10 @@ def _copy_symlink(self, srcpath):
         absdest = os.path.normpath(dest)
         # adjust the target used inside the report to always be relative
         if os.path.isabs(linkdest):
-            reldest = os.path.relpath(linkdest, os.path.dirname(srcpath))
+            # Canonicalize the link target path to avoid additional levels
+            # of symbolic links (that would affect the path nesting level).
+            realdir = os.path.realpath(os.path.dirname(srcpath))
+            reldest = os.path.relpath(linkdest, start=realdir)
             # trim leading /sysroot
             if self.use_sysroot():
                 reldest = reldest[len(os.sep + os.pardir):]
From 868966cd9dbb96ce3635d884e67e738b18658140 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Wed, 12 Sep 2018 16:11:07 +0100
Subject: [PATCH] [archive] canonicalise paths for link follow up

Ensure that the canonical path is used when processing link follow
up actions: the actual link path may contain one or more levels of
symbolic links, leading to broken links if the link target path is
assumed to be relative to the containing directory.

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sos/archive.py b/sos/archive.py
index 7a7717de7..483d66f4f 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -421,7 +421,7 @@ def add_link(self, source, link_name):
                        (source, link_name, dest))
 
         source_dir = os.path.dirname(link_name)
-        host_path_name = os.path.normpath(os.path.join(source_dir, source))
+        host_path_name = os.path.realpath(os.path.join(source_dir, source))
         dest_path_name = self.dest_path(host_path_name)
 
         if not os.path.exists(dest_path_name):
From 8e60e299cdfb0027d6b6ea845234ef54ae785186 Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Thu, 13 Sep 2018 16:14:12 +0100
Subject: [PATCH 1/2] [archive, plugin] avoid recursing on symbolic link loops

It's possible that symlink loops exist in the host file system,
either 'simple' ('a'->'a'), or indirect ('a'->'b'->'a'). We need
to avoid recursing on these loops, to avoid exceeding the maximum
link or recursion depths, but we should still represent these
inodes as accurately as possible in the resulting archive.

Detect loops in both the Plugin link handling code and in the new
Archive link follow-up code by creating the first requested level
of loop, and then skipping the recursive follow-up. This means
that the looping links are still created in the archive so long
as they are referenced in a copy spec but that we do not attempt
to indefinitely recurse while collecting them.

Resolves: #1430

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py          | 27 +++++++++++++++++++++++++++
 sos/plugins/__init__.py | 20 +++++++++++++++-----
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index 483d66f4..e5819432 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -424,6 +424,29 @@ class FileCacheArchive(Archive):
         host_path_name = os.path.realpath(os.path.join(source_dir, source))
         dest_path_name = self.dest_path(host_path_name)
 
+        def is_loop(link_name, source):
+            """Return ``True`` if the symbolic link ``link_name`` is part
+                of a file system loop, or ``False`` otherwise.
+            """
+            link_dir = os.path.dirname(link_name)
+            if not os.path.isabs(source):
+                source = os.path.realpath(os.path.join(link_dir, source))
+            link_name = os.path.realpath(link_name)
+
+            # Simple a -> a loop
+            if link_name == source:
+                return True
+
+            # Find indirect loops (a->b-a) by stat()ing the first step
+            # in the symlink chain
+            try:
+                os.stat(link_name)
+            except OSError as e:
+                if e.errno == 40:
+                    return True
+                raise
+            return False
+
         if not os.path.exists(dest_path_name):
             if os.path.islink(host_path_name):
                 # Normalised path for the new link_name
@@ -433,6 +456,10 @@ class FileCacheArchive(Archive):
                 # Relative source path of the new link
                 source = os.path.join(dest_dir, os.readlink(host_path_name))
                 source = os.path.relpath(source, dest_dir)
+                if is_loop(link_name, source):
+                    self.log_debug("Link '%s' - '%s' loops: skipping..." %
+                                   (link_name, source))
+                    return
                 self.log_debug("Adding link %s -> %s for link follow up" %
                                (link_name, source))
                 self.add_link(source, link_name)
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
index 7d011a02..7d2a8b2d 100644
--- a/sos/plugins/__init__.py
+++ b/sos/plugins/__init__.py
@@ -376,6 +376,21 @@ class Plugin(object):
             self._log_debug("link '%s' is a directory, skipping..." % linkdest)
             return
 
+        self.copied_files.append({'srcpath': srcpath,
+                                  'dstpath': dstpath,
+                                  'symlink': "yes",
+                                  'pointsto': linkdest})
+
+        # Check for indirect symlink loops by stat()ing the next step
+        # in the link chain.
+        try:
+            os.stat(absdest)
+        except OSError as e:
+            if e.errno == 40:
+                self._log_debug("link '%s' is part of a file system "
+                                "loop, skipping target..." % dstpath)
+                return
+
         # copy the symlink target translating relative targets
         # to absolute paths to pass to _do_copy_path.
         self._log_debug("normalized link target '%s' as '%s'"
@@ -388,11 +403,6 @@ class Plugin(object):
             self._log_debug("link '%s' points to itself, skipping target..."
                             % linkdest)
 
-        self.copied_files.append({'srcpath': srcpath,
-                                  'dstpath': dstpath,
-                                  'symlink': "yes",
-                                  'pointsto': linkdest})
-
     def _copy_dir(self, srcpath):
         try:
             for afile in os.listdir(srcpath):
-- 
2.17.1


From e108d7c03834446f8dac66ad69f5eade4f2c5fce Mon Sep 17 00:00:00 2001
From: Pavel Moravec <pmoravec@redhat.com>
Date: Fri, 14 Sep 2018 10:42:07 +0200
Subject: [PATCH 2/2] [archive] fix and simplify directory destination
 rewriting

Rewriting of the destination path by _make_leading_paths() only
applies when creating intermediate path components that are a
symbolic link. The final level of path creation must always be
a directory, and the destination is always the absolute path to
that directory.

Always return the directory path when creating a new directory,
and do not attempt to rewrite the destination at the top level
in FileCacheArchive._check_path() since all intermediate links
have already been handled inside _make_leading_paths() (i.e.
the returned/rewritten destination is always equal to the path
that was passed into the function).

Resolves: #1432

Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
---
 sos/archive.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sos/archive.py b/sos/archive.py
index e5819432..b02b75f7 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -241,6 +241,8 @@ class FileCacheArchive(Archive):
                 else:
                     self.log_debug("Making directory %s" % abs_path)
                     os.mkdir(abs_path, mode)
+                    dest = src_path
+
         return dest
 
     def _check_path(self, src, path_type, dest=None, force=False):
@@ -282,17 +284,13 @@ class FileCacheArchive(Archive):
         if not dest_dir:
             return dest
 
-        # Preserve destination basename for rewritten dest_dir
-        dest_name = os.path.split(src)[1]
-
         # Check containing directory presence and path type
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
             raise ValueError("path '%s' exists and is not a directory" %
                              dest_dir)
         elif not os.path.exists(dest_dir):
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
-            src_dir = self._make_leading_paths(src_dir)
-            dest = self.dest_path(os.path.join(src_dir, dest_name))
+            self._make_leading_paths(src_dir)
 
         def is_special(mode):
             return any([
-- 
2.17.1