Blame SOURCES/sos-bz1627543-symlinks-not-copied.patch

fce3c4
From 2e07f7c4778145d4366476ecc4383d491458b541 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 31 Aug 2018 12:50:24 +0100
fce3c4
Subject: [PATCH 1/4] [sosreport] properly raise exceptions when --debug is
fce3c4
 given
fce3c4
fce3c4
OSError and IOError exceptions were not raised to the terminal
fce3c4
when --debug is in effect since they were silently caught in the
fce3c4
generic exception handler.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/sosreport.py | 5 ++++-
fce3c4
 1 file changed, 4 insertions(+), 1 deletion(-)
fce3c4
fce3c4
diff --git a/sos/sosreport.py b/sos/sosreport.py
fce3c4
index 00c3e811..80633966 100644
fce3c4
--- a/sos/sosreport.py
fce3c4
+++ b/sos/sosreport.py
fce3c4
@@ -995,7 +995,8 @@ class SoSReport(object):
fce3c4
                 print(" %s while setting up archive" % e.strerror)
fce3c4
                 print("")
fce3c4
             else:
fce3c4
-                raise e
fce3c4
+                print("Error setting up archive: %s" % e)
fce3c4
+                raise
fce3c4
         except Exception as e:
fce3c4
             self.ui_log.error("")
fce3c4
             self.ui_log.error(" Unexpected exception setting up archive:")
fce3c4
@@ -1467,6 +1468,8 @@ class SoSReport(object):
fce3c4
             return self.final_work()
fce3c4
 
fce3c4
         except (OSError):
fce3c4
+            if self.opts.debug:
fce3c4
+                raise
fce3c4
             self._cleanup()
fce3c4
         except (KeyboardInterrupt):
fce3c4
             self.ui_log.error("\nExiting on user cancel")
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From c496d2bec8cae175faf986567e73d16d401d8564 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 31 Aug 2018 12:52:38 +0100
fce3c4
Subject: [PATCH 2/4] [archive] simplify FileCacheArchive.makedirs()
fce3c4
fce3c4
Simplify the makedirs() method of FileCacheArchive and have it
fce3c4
bypass _check_path() and directly call os.makedirs(): a subsequent
fce3c4
patch will restrict the use of the method to setting up the sos_*
fce3c4
directories in the archive root.
fce3c4
fce3c4
File, directory and other object type add_* methods will use a
fce3c4
new method that correctly handles symbolic links in intermediate
fce3c4
path components.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 8 ++++----
fce3c4
 1 file changed, 4 insertions(+), 4 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 5d99170f..ffa54036 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -361,11 +361,11 @@ class FileCacheArchive(Archive):
fce3c4
         return self._archive_root
fce3c4
 
fce3c4
     def makedirs(self, path, mode=0o700):
fce3c4
-        dest = self._check_path(path, P_DIR)
fce3c4
-        if not dest:
fce3c4
-            return
fce3c4
+        """Create path, including leading components.
fce3c4
 
fce3c4
-        self._makedirs(self.dest_path(path))
fce3c4
+            Used by sos.sosreport to set up sos_* directories.
fce3c4
+        """
fce3c4
+        os.makedirs(os.path.join(self._archive_root, path), mode=mode)
fce3c4
         self.log_debug("created directory at '%s' in FileCacheArchive '%s'"
fce3c4
                        % (path, self._archive_root))
fce3c4
 
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From ca422720b74181b2433473428e29e90af59b3cf8 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 31 Aug 2018 12:55:51 +0100
fce3c4
Subject: [PATCH 3/4] [archive] normalise dest_dir in
fce3c4
 FileCacheArchive._check_path()
fce3c4
fce3c4
Always set a valid dest_dir in _check_path() and do not assume
fce3c4
that it can be obtained by splitting the path: in the case of
fce3c4
a directory it is the unmodified 'dest' value.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 5 ++++-
fce3c4
 1 file changed, 4 insertions(+), 1 deletion(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index ffa54036..903cc672 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -191,7 +191,10 @@ class FileCacheArchive(Archive):
fce3c4
                       copied now or `None` otherwise
fce3c4
         """
fce3c4
         dest = dest or self.dest_path(src)
fce3c4
-        dest_dir = os.path.split(dest)[0]
fce3c4
+        if path_type == P_DIR:
fce3c4
+            dest_dir = dest
fce3c4
+        else:
fce3c4
+            dest_dir = os.path.split(dest)[0]
fce3c4
         if not dest_dir:
fce3c4
             return dest
fce3c4
 
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From 75d759066e8ee0a469abc37f48f7bfcdfe8182b5 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 31 Aug 2018 12:58:01 +0100
fce3c4
Subject: [PATCH 4/4] [archive] replace FileCacheArchive._makedirs()
fce3c4
fce3c4
The Python os.makedirs() implementation is inadequate for sos's
fce3c4
needs: it will create leading directories given an intended path
fce3c4
destination, but it is not able to reflect cases where some of
fce3c4
the intermediate paths are actually symbolic links.
fce3c4
fce3c4
Replace the use of os.makedirs() with a method that walks over
fce3c4
the path, and either creates directories, or symbolic links (and
fce3c4
their directory target) to better correspond with the content of
fce3c4
the host file system.
fce3c4
fce3c4
This fixes a situation where two plugins can race in the archive,
fce3c4
leading to an exception in the plugin that runs last:
fce3c4
fce3c4
 - /foo/bar exists and is a link to /foo/bar.qux
fce3c4
 - One plugin attempts to collect /foo/bar
fce3c4
 - Another plugin attempts to collect a link /foo/qux -> /foo/bar/baz
fce3c4
fce3c4
If the 2nd plugin happens to run first it will create the path
fce3c4
"/foo/bar" as a _directory_ (via _makedirs()). Since the archive
fce3c4
now checks for matching object types when a path collision occurs,
fce3c4
the first plugin will arrive at add_dir(), note that "/foo/bar" is
fce3c4
present and is not a symbolic link, and will raise an exception.
fce3c4
fce3c4
Correct this by ensuring that whichever plugin executes first, the
fce3c4
correct link/directory path structure will be set up.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 72 ++++++++++++++++++++++++++++++++++++++++++++------
fce3c4
 1 file changed, 64 insertions(+), 8 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 903cc672..11afa7aa 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -159,6 +159,67 @@ class FileCacheArchive(Archive):
fce3c4
             name = name.lstrip(os.sep)
fce3c4
         return (os.path.join(self._archive_root, name))
fce3c4
 
fce3c4
+    def _make_leading_paths(self, src, mode=0o700):
fce3c4
+        """Create leading path components
fce3c4
+
fce3c4
+            The standard python `os.makedirs` is insufficient for our
fce3c4
+            needs: it will only create directories, and ignores the fact
fce3c4
+            that some path components may be symbolic links.
fce3c4
+        """
fce3c4
+        self.log_debug("Making leading paths for %s" % src)
fce3c4
+        root = self._archive_root
fce3c4
+
fce3c4
+        def in_archive(path):
fce3c4
+            """Test whether path ``path`` is inside the archive.
fce3c4
+            """
fce3c4
+            return path.startswith(os.path.join(root, ""))
fce3c4
+
fce3c4
+        if not src.startswith("/"):
fce3c4
+            # Sos archive path (sos_commands, sos_logs etc.)
fce3c4
+            src_dir = src
fce3c4
+        else:
fce3c4
+            # Host file path
fce3c4
+            src_dir = src if os.path.isdir(src) else os.path.split(src)[0]
fce3c4
+
fce3c4
+        # Build a list of path components in root-to-leaf order.
fce3c4
+        path = src_dir
fce3c4
+        path_comps = []
fce3c4
+        while path != '/' and path != '':
fce3c4
+            head, tail = os.path.split(path)
fce3c4
+            path_comps.append(tail)
fce3c4
+            path = head
fce3c4
+        path_comps.reverse()
fce3c4
+
fce3c4
+        abs_path = root
fce3c4
+        rel_path = ""
fce3c4
+
fce3c4
+        # Check and create components as needed
fce3c4
+        for comp in path_comps:
fce3c4
+            abs_path = os.path.join(abs_path, comp)
fce3c4
+
fce3c4
+            if not in_archive(abs_path):
fce3c4
+                continue
fce3c4
+
fce3c4
+            rel_path = os.path.join(rel_path, comp)
fce3c4
+            src_path = os.path.join("/", rel_path)
fce3c4
+
fce3c4
+            if not os.path.exists(abs_path):
fce3c4
+                self.log_debug("Making path %s" % abs_path)
fce3c4
+                if os.path.islink(src_path) and os.path.isdir(src_path):
fce3c4
+                    target = os.readlink(src_path)
fce3c4
+                    abs_target = os.path.join(root, target)
fce3c4
+
fce3c4
+                    # Recursively create leading components of target
fce3c4
+                    self._make_leading_paths(abs_target, mode=mode)
fce3c4
+
fce3c4
+                    self.log_debug("Making symlink '%s' -> '%s'" %
fce3c4
+                                   (abs_path, target))
fce3c4
+                    target = os.path.relpath(target)
fce3c4
+                    os.symlink(target, abs_path)
fce3c4
+                else:
fce3c4
+                    self.log_debug("Making directory %s" % abs_path)
fce3c4
+                    os.mkdir(abs_path, mode)
fce3c4
+
fce3c4
     def _check_path(self, src, path_type, dest=None, force=False):
fce3c4
         """Check a new destination path in the archive.
fce3c4
 
fce3c4
@@ -203,7 +264,8 @@ class FileCacheArchive(Archive):
fce3c4
             raise ValueError("path '%s' exists and is not a directory" %
fce3c4
                              dest_dir)
fce3c4
         elif not os.path.exists(dest_dir):
fce3c4
-            self._makedirs(dest_dir)
fce3c4
+            src_dir = src if path_type == P_DIR else os.path.split(src)[0]
fce3c4
+            self._make_leading_paths(src_dir)
fce3c4
 
fce3c4
         def is_special(mode):
fce3c4
             return any([
fce3c4
@@ -326,10 +388,7 @@ class FileCacheArchive(Archive):
fce3c4
 
fce3c4
     def add_dir(self, path):
fce3c4
         with self._path_lock:
fce3c4
-            dest = self._check_path(path, P_DIR)
fce3c4
-            if not dest:
fce3c4
-                return
fce3c4
-            self.makedirs(path)
fce3c4
+            self._check_path(path, P_DIR)
fce3c4
 
fce3c4
     def add_node(self, path, mode, device):
fce3c4
         dest = self._check_path(path, P_NODE)
fce3c4
@@ -347,9 +406,6 @@ class FileCacheArchive(Archive):
fce3c4
                 raise e
fce3c4
             shutil.copystat(path, dest)
fce3c4
 
fce3c4
-    def _makedirs(self, path, mode=0o700):
fce3c4
-        os.makedirs(path, mode)
fce3c4
-
fce3c4
     def name_max(self):
fce3c4
         if 'PC_NAME_MAX' in os.pathconf_names:
fce3c4
             pc_name_max = os.pathconf_names['PC_NAME_MAX']
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
From 5d6228b85e174dee8abcc4c206a1e9034242c6c6 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 7 Sep 2018 12:06:34 -0400
fce3c4
Subject: [PATCH 1/6] [sosreport] ensure ThreadPool exceptions are raised
fce3c4
fce3c4
The ThreadPoolExecutor does not raise exceptions to the parent
fce3c4
thread immediately: it stores them in-line in the pool's results
fce3c4
list, and raises them to the caller on acccess to that slot in
fce3c4
the results iterator.
fce3c4
fce3c4
Make sure that these exceptions are handled by iterating over all
fce3c4
results and asserting that they are non-None (in practice, this
fce3c4
code is never executed since the resulting raise will trap to an
fce3c4
exception handler, but it is less confusing than a bare 'pass').
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/sosreport.py | 8 ++++++--
fce3c4
 1 file changed, 6 insertions(+), 2 deletions(-)
fce3c4
fce3c4
diff --git a/sos/sosreport.py b/sos/sosreport.py
fce3c4
index 80633966..44be75a1 100644
fce3c4
--- a/sos/sosreport.py
fce3c4
+++ b/sos/sosreport.py
fce3c4
@@ -1065,9 +1065,13 @@ class SoSReport(object):
fce3c4
         try:
fce3c4
             self.plugpool = ThreadPoolExecutor(self.opts.threads)
fce3c4
             # Pass the plugpool its own private copy of self.pluglist
fce3c4
-            self.plugpool.map(self._collect_plugin, list(self.pluglist),
fce3c4
-                              chunksize=1)
fce3c4
+            results = self.plugpool.map(self._collect_plugin,
fce3c4
+                                        list(self.pluglist), chunksize=1)
fce3c4
             self.plugpool.shutdown(wait=True)
fce3c4
+            for res in results:
fce3c4
+                if not res:
fce3c4
+                    self.soslog.debug("Unexpected plugin task result: %s" %
fce3c4
+                                      res)
fce3c4
             self.ui_log.info("")
fce3c4
         except KeyboardInterrupt:
fce3c4
             # We may not be at a newline when the user issues Ctrl-C
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From 9aaba972bf6a42c33ea9bca80f07bfb880ba45a1 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 7 Sep 2018 12:15:10 -0400
fce3c4
Subject: [PATCH 2/6] [sosreport] trap directly to PDB in handle_exception()
fce3c4
fce3c4
Now that plugins are run in a threadpool, it is not possible to
fce3c4
defer the call to pdb.post_mortem() to the top-level exception
fce3c4
handler in the main thread: this is due to the fact that in a pool,
fce3c4
exceptions are caught and saved to be re-raised to thread calling
fce3c4
the pool when results are returned. When the saved exception is
fce3c4
raised to the top-level handler the execution context it relates
fce3c4
to is gone: the backtrace and stack frame have been torn down and
fce3c4
only very limited information is available from the exception
fce3c4
frame.
fce3c4
fce3c4
Instead, catch these exceptions _inside_ the thread pool context,
fce3c4
and directly trap to the Python debugger. This allows plugin code
fce3c4
to be debugged interactively with the full backtrace and with all
fce3c4
access to local variables and the execution stack. In addition,
fce3c4
this means that after the debugger has handled the exception it is
fce3c4
possible to return to the run and continue until report completion.
fce3c4
fce3c4
One side effect of this change is that the *-plugin-errors.txt
fce3c4
file containng the backtrace is now written into the archive
fce3c4
whether or not --debug is given.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/sosreport.py | 9 ++++++++-
fce3c4
 1 file changed, 8 insertions(+), 1 deletion(-)
fce3c4
fce3c4
diff --git a/sos/sosreport.py b/sos/sosreport.py
fce3c4
index 44be75a1..77ae7161 100644
fce3c4
--- a/sos/sosreport.py
fce3c4
+++ b/sos/sosreport.py
fce3c4
@@ -30,6 +30,7 @@ from shutil import rmtree
fce3c4
 import tempfile
fce3c4
 import hashlib
fce3c4
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
fce3c4
+import pdb
fce3c4
 
fce3c4
 from sos import _sos as _
fce3c4
 from sos import __version__
fce3c4
@@ -504,7 +505,13 @@ class SoSReport(object):
fce3c4
 
fce3c4
     def handle_exception(self, plugname=None, func=None):
fce3c4
         if self.raise_plugins or self.exit_process:
fce3c4
-            raise
fce3c4
+            # retrieve exception info for the current thread and stack.
fce3c4
+            (etype, val, tb) = sys.exc_info()
fce3c4
+            # we are NOT in interactive mode, print the exception...
fce3c4
+            traceback.print_exception(etype, val, tb, file=sys.stdout)
fce3c4
+            print_()
fce3c4
+            # ...then start the debugger in post-mortem mode.
fce3c4
+            pdb.post_mortem(tb)
fce3c4
         if plugname and func:
fce3c4
             self._log_plugin_exception(plugname, func)
fce3c4
 
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From 0ea62d1ea57f41c1b75ccb83e69fdda386a7d280 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 7 Sep 2018 13:00:52 -0400
fce3c4
Subject: [PATCH 3/6] [Plugin] fix exception raise in Plugin._copy_dir()
fce3c4
fce3c4
Use a naked 'raise' statement rather than raising the already caught
fce3c4
exception in _copy_dir(), so that the original stack and backtrace
fce3c4
are avaialable.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/plugins/__init__.py | 2 +-
fce3c4
 1 file changed, 1 insertion(+), 1 deletion(-)
fce3c4
fce3c4
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
fce3c4
index 252de4d0..ac2c0bc8 100644
fce3c4
--- a/sos/plugins/__init__.py
fce3c4
+++ b/sos/plugins/__init__.py
fce3c4
@@ -401,7 +401,7 @@ class Plugin(object):
fce3c4
                 msg = "Too many levels of symbolic links copying"
fce3c4
                 self._log_error("_copy_dir: %s '%s'" % (msg, srcpath))
fce3c4
                 return
fce3c4
-            raise e
fce3c4
+            raise
fce3c4
 
fce3c4
     def _get_dest_for_srcpath(self, srcpath):
fce3c4
         if self.use_sysroot():
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From d84c1cd6dedf51a8ed7b1a511585c0ac2db0f083 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Wed, 5 Sep 2018 12:46:16 +0100
fce3c4
Subject: [PATCH 4/6] [archive] fix leading path creation
fce3c4
fce3c4
Fix the creation of leading path components for both paths that
fce3c4
contain intermediate components that are symbolic links (with both
fce3c4
absolute and relative targets), and those that contain only
fce3c4
directory components.
fce3c4
fce3c4
Since symlinks may link to other files, and other symlinks, it is
fce3c4
necessary to handle these paths recursively and to include any
fce3c4
intermediate symlinked directories, or symlink targets in the set
fce3c4
of paths added to the archive.
fce3c4
fce3c4
Related: #1404
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 41 ++++++++++++++++++++++++++++++++++-------
fce3c4
 1 file changed, 34 insertions(+), 7 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 11afa7aa..c256a01f 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -165,9 +165,24 @@ class FileCacheArchive(Archive):
fce3c4
             The standard python `os.makedirs` is insufficient for our
fce3c4
             needs: it will only create directories, and ignores the fact
fce3c4
             that some path components may be symbolic links.
fce3c4
+
fce3c4
+            :param src: The source path in the host file system for which
fce3c4
+                        leading components should be created, or the path
fce3c4
+                        to an sos_* virtual directory inside the archive.
fce3c4
+
fce3c4
+                        Host paths must be absolute (initial '/'), and
fce3c4
+                        sos_* directory paths must be a path relative to
fce3c4
+                        the root of the archive.
fce3c4
+
fce3c4
+            :param mode: An optional mode to be used when creating path
fce3c4
+                         components.
fce3c4
+            :returns: A rewritten destination path in the case that one
fce3c4
+                      or more symbolic links in intermediate components
fce3c4
+                      of the path have altered the path destination.
fce3c4
         """
fce3c4
         self.log_debug("Making leading paths for %s" % src)
fce3c4
         root = self._archive_root
fce3c4
+        dest = src
fce3c4
 
fce3c4
         def in_archive(path):
fce3c4
             """Test whether path ``path`` is inside the archive.
fce3c4
@@ -191,34 +206,42 @@ class FileCacheArchive(Archive):
fce3c4
         path_comps.reverse()
fce3c4
 
fce3c4
         abs_path = root
fce3c4
-        rel_path = ""
fce3c4
+        src_path = "/"
fce3c4
 
fce3c4
         # Check and create components as needed
fce3c4
         for comp in path_comps:
fce3c4
             abs_path = os.path.join(abs_path, comp)
fce3c4
 
fce3c4
+            # Do not create components that are above the archive root.
fce3c4
             if not in_archive(abs_path):
fce3c4
                 continue
fce3c4
 
fce3c4
-            rel_path = os.path.join(rel_path, comp)
fce3c4
-            src_path = os.path.join("/", rel_path)
fce3c4
+            src_path = os.path.join(src_path, comp)
fce3c4
 
fce3c4
             if not os.path.exists(abs_path):
fce3c4
                 self.log_debug("Making path %s" % abs_path)
fce3c4
                 if os.path.islink(src_path) and os.path.isdir(src_path):
fce3c4
                     target = os.readlink(src_path)
fce3c4
-                    abs_target = os.path.join(root, target)
fce3c4
+
fce3c4
+                    # The directory containing the source in the host fs,
fce3c4
+                    # adjusted for the current level of path creation.
fce3c4
+                    target_dir = os.path.split(src_path)[0]
fce3c4
+
fce3c4
+                    # The source path of the target in the host fs to be
fce3c4
+                    # recursively copied.
fce3c4
+                    target_src = os.path.join(target_dir, target)
fce3c4
 
fce3c4
                     # Recursively create leading components of target
fce3c4
-                    self._make_leading_paths(abs_target, mode=mode)
fce3c4
+                    dest = self._make_leading_paths(target_src, mode=mode)
fce3c4
+                    dest = os.path.normpath(dest)
fce3c4
 
fce3c4
                     self.log_debug("Making symlink '%s' -> '%s'" %
fce3c4
                                    (abs_path, target))
fce3c4
-                    target = os.path.relpath(target)
fce3c4
                     os.symlink(target, abs_path)
fce3c4
                 else:
fce3c4
                     self.log_debug("Making directory %s" % abs_path)
fce3c4
                     os.mkdir(abs_path, mode)
fce3c4
+        return dest
fce3c4
 
fce3c4
     def _check_path(self, src, path_type, dest=None, force=False):
fce3c4
         """Check a new destination path in the archive.
fce3c4
@@ -259,13 +282,17 @@ class FileCacheArchive(Archive):
fce3c4
         if not dest_dir:
fce3c4
             return dest
fce3c4
 
fce3c4
+        # Preserve destination basename for rewritten dest_dir
fce3c4
+        dest_name = os.path.split(src)[1]
fce3c4
+
fce3c4
         # Check containing directory presence and path type
fce3c4
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
fce3c4
             raise ValueError("path '%s' exists and is not a directory" %
fce3c4
                              dest_dir)
fce3c4
         elif not os.path.exists(dest_dir):
fce3c4
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
fce3c4
-            self._make_leading_paths(src_dir)
fce3c4
+            src_dir = self._make_leading_paths(src_dir)
fce3c4
+            dest = self.dest_path(os.path.join(src_dir, dest_name))
fce3c4
 
fce3c4
         def is_special(mode):
fce3c4
             return any([
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From 322f4a517ae336cc1443f9a399a0d15d45ec48b9 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Fri, 7 Sep 2018 13:11:03 -0400
fce3c4
Subject: [PATCH 5/6] [archive] add link follow-up to
fce3c4
 FileCacheArchive.add_link()
fce3c4
fce3c4
Creating a link may trigger further actions in the archive: if the
fce3c4
link target is a regular file, we must copy that file into the
fce3c4
archive, and if the target is a symbolic link, then we must create
fce3c4
that link, and copy in the link target.
fce3c4
fce3c4
Handle this by calling add_file() or (recursively) add_link() in
fce3c4
order to create the missing pieces of the symlink chain.
fce3c4
fce3c4
These operations must take place outside of the path lock since
fce3c4
they do not modify the archive namespace and will call methods of
fce3c4
the Archive object that will attempt to re-acquire this lock.
fce3c4
fce3c4
Resolves: #1404
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 38 +++++++++++++++++++++++++++++++++++---
fce3c4
 1 file changed, 35 insertions(+), 3 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index c256a01f..6db398fc 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -403,6 +403,7 @@ class FileCacheArchive(Archive):
fce3c4
                            % (dest, self._archive_root))
fce3c4
 
fce3c4
     def add_link(self, source, link_name):
fce3c4
+        self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source))
fce3c4
         with self._path_lock:
fce3c4
             dest = self._check_path(link_name, P_LINK)
fce3c4
             if not dest:
fce3c4
@@ -410,10 +411,41 @@ class FileCacheArchive(Archive):
fce3c4
 
fce3c4
             if not os.path.lexists(dest):
fce3c4
                 os.symlink(source, dest)
fce3c4
-            self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
fce3c4
-                           % (dest, source, self._archive_root))
fce3c4
+                self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
fce3c4
+                               % (dest, source, self._archive_root))
fce3c4
+
fce3c4
+        # Follow-up must be outside the path lock: we recurse into
fce3c4
+        # other monitor methods that will attempt to reacquire it.
fce3c4
+
fce3c4
+        source_dir = os.path.dirname(link_name)
fce3c4
+        host_source = os.path.join(source_dir, source)
fce3c4
+        if not os.path.exists(self.dest_path(host_source)):
fce3c4
+            if os.path.islink(host_source):
fce3c4
+                link_dir = os.path.dirname(link_name)
fce3c4
+                link_name = os.path.normpath(os.path.join(link_dir, source))
fce3c4
+                dest_dir = os.path.dirname(link_name)
fce3c4
+                source = os.path.join(dest_dir, os.readlink(link_name))
fce3c4
+                source = os.path.relpath(source)
fce3c4
+                self.log_debug("Adding link %s -> %s for link follow up" %
fce3c4
+                               (link_name, source))
fce3c4
+                self.add_link(source, link_name)
fce3c4
+            elif os.path.isdir(host_source):
fce3c4
+                self.log_debug("Adding dir %s for link follow up" % source)
fce3c4
+                self.add_dir(host_source)
fce3c4
+            elif os.path.isfile(host_source):
fce3c4
+                self.log_debug("Adding file %s for link follow up" % source)
fce3c4
+                self.add_file(host_source)
fce3c4
+            else:
fce3c4
+                self.log_debug("No link follow up: source=%s link_name=%s" %
fce3c4
+                               (source, link_name))
fce3c4
 
fce3c4
-    def add_dir(self, path):
fce3c4
+
fce3c4
+    def add_dir(self, path, copy=False):
fce3c4
+        """Create a directory in the archive.
fce3c4
+
fce3c4
+            :param path: the path in the host file system to add
fce3c4
+        """
fce3c4
+        # Establish path structure
fce3c4
         with self._path_lock:
fce3c4
             self._check_path(path, P_DIR)
fce3c4
 
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From 6e79c4b4a4f32fa549708dbb8c8b9af73ab8ff61 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Mon, 10 Sep 2018 16:33:33 +0100
fce3c4
Subject: [PATCH 6/6] [archive] remove unused 'copy' arg from
fce3c4
 FileCacheArchive.add_dir()
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 3 +--
fce3c4
 1 file changed, 1 insertion(+), 2 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 6db398fc..4b30630b 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -439,8 +439,7 @@ class FileCacheArchive(Archive):
fce3c4
                 self.log_debug("No link follow up: source=%s link_name=%s" %
fce3c4
                                (source, link_name))
fce3c4
 
fce3c4
-
fce3c4
-    def add_dir(self, path, copy=False):
fce3c4
+    def add_dir(self, path):
fce3c4
         """Create a directory in the archive.
fce3c4
 
fce3c4
             :param path: the path in the host file system to add
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
From 919e8671a6ab9684d59525eb7f3607b3aab08ee1 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Tue, 11 Sep 2018 12:16:57 -0400
fce3c4
Subject: [PATCH] [archive] fix link rewriting logic in
fce3c4
 FileCacheArchive.add_link()
fce3c4
fce3c4
When processing link follow up for an original symbolic link, the
fce3c4
add_link() logic incorrectly used the _original_ host link name,
fce3c4
rather than the to-be-created name when calculating relative path
fce3c4
structures. If the prior link is at a greater or lesser level of
fce3c4
directory nesting this will lead to broken relative links in the
fce3c4
archive (one level too high or too low).
fce3c4
fce3c4
In some cases (systemd) this behaviour was masked due to the fact
fce3c4
that identically named links exist at multiple levels of the path
fce3c4
hierarchy.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 30 +++++++++++++++++++-----------
fce3c4
 1 file changed, 19 insertions(+), 11 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 528cfa576..7a7717de7 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -417,27 +417,35 @@ def add_link(self, source, link_name):
fce3c4
         # Follow-up must be outside the path lock: we recurse into
fce3c4
         # other monitor methods that will attempt to reacquire it.
fce3c4
 
fce3c4
+        self.log_debug("Link follow up: source=%s link_name=%s dest=%s" %
fce3c4
+                       (source, link_name, dest))
fce3c4
+
fce3c4
         source_dir = os.path.dirname(link_name)
fce3c4
-        host_source = os.path.join(source_dir, source)
fce3c4
-        if not os.path.exists(self.dest_path(host_source)):
fce3c4
-            if os.path.islink(host_source):
fce3c4
-                link_dir = os.path.dirname(link_name)
fce3c4
-                link_name = os.path.normpath(os.path.join(link_dir, source))
fce3c4
+        host_path_name = os.path.normpath(os.path.join(source_dir, source))
fce3c4
+        dest_path_name = self.dest_path(host_path_name)
fce3c4
+
fce3c4
+        if not os.path.exists(dest_path_name):
fce3c4
+            if os.path.islink(host_path_name):
fce3c4
+                # Normalised path for the new link_name
fce3c4
+                link_name = host_path_name
fce3c4
+                # Containing directory for the new link
fce3c4
                 dest_dir = os.path.dirname(link_name)
fce3c4
-                source = os.path.join(dest_dir, os.readlink(link_name))
fce3c4
-                source = os.path.relpath(source)
fce3c4
+                # Relative source path of the new link
fce3c4
+                source = os.path.join(dest_dir, os.readlink(host_path_name))
fce3c4
+                source = os.path.relpath(source, dest_dir)
fce3c4
                 self.log_debug("Adding link %s -> %s for link follow up" %
fce3c4
                                (link_name, source))
fce3c4
                 self.add_link(source, link_name)
fce3c4
-            elif os.path.isdir(host_source):
fce3c4
+            elif os.path.isdir(host_path_name):
fce3c4
                 self.log_debug("Adding dir %s for link follow up" % source)
fce3c4
-                self.add_dir(host_source)
fce3c4
-            elif os.path.isfile(host_source):
fce3c4
+                self.add_dir(host_path_name)
fce3c4
+            elif os.path.isfile(host_path_name):
fce3c4
                 self.log_debug("Adding file %s for link follow up" % source)
fce3c4
-                self.add_file(host_source)
fce3c4
+                self.add_file(host_path_name)
fce3c4
             else:
fce3c4
                 self.log_debug("No link follow up: source=%s link_name=%s" %
fce3c4
                                (source, link_name))
fce3c4
+        self.log_debug("leaving add_link()")
fce3c4
 
fce3c4
     def add_dir(self, path):
fce3c4
         """Create a directory in the archive.
fce3c4
From c065be9715dc845b6411a9a0b2d6171bbeb1c390 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Wed, 12 Sep 2018 12:02:33 +0100
fce3c4
Subject: [PATCH] [plugin] canonicalize link target path in
fce3c4
 Plugin._copy_symlink()
fce3c4
fce3c4
Since we may be dealing with paths that contain intermediate
fce3c4
symlinked directories, it is necessary to canonicalize the path
fce3c4
for the link target in order to eliminate additional levels of
fce3c4
symbolic links, and to calculate the correct relative path to
fce3c4
use within the archive.
fce3c4
fce3c4
Related: #1404
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/plugins/__init__.py | 5 ++++-
fce3c4
 1 file changed, 4 insertions(+), 1 deletion(-)
fce3c4
fce3c4
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
fce3c4
index ac2c0bc8c..7d011a02c 100644
fce3c4
--- a/sos/plugins/__init__.py
fce3c4
+++ b/sos/plugins/__init__.py
fce3c4
@@ -353,7 +353,10 @@ def _copy_symlink(self, srcpath):
fce3c4
         absdest = os.path.normpath(dest)
fce3c4
         # adjust the target used inside the report to always be relative
fce3c4
         if os.path.isabs(linkdest):
fce3c4
-            reldest = os.path.relpath(linkdest, os.path.dirname(srcpath))
fce3c4
+            # Canonicalize the link target path to avoid additional levels
fce3c4
+            # of symbolic links (that would affect the path nesting level).
fce3c4
+            realdir = os.path.realpath(os.path.dirname(srcpath))
fce3c4
+            reldest = os.path.relpath(linkdest, start=realdir)
fce3c4
             # trim leading /sysroot
fce3c4
             if self.use_sysroot():
fce3c4
                 reldest = reldest[len(os.sep + os.pardir):]
fce3c4
From 868966cd9dbb96ce3635d884e67e738b18658140 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Wed, 12 Sep 2018 16:11:07 +0100
fce3c4
Subject: [PATCH] [archive] canonicalise paths for link follow up
fce3c4
fce3c4
Ensure that the canonical path is used when processing link follow
fce3c4
up actions: the actual link path may contain one or more levels of
fce3c4
symbolic links, leading to broken links if the link target path is
fce3c4
assumed to be relative to the containing directory.
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 2 +-
fce3c4
 1 file changed, 1 insertion(+), 1 deletion(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 7a7717de7..483d66f4f 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -421,7 +421,7 @@ def add_link(self, source, link_name):
fce3c4
                        (source, link_name, dest))
fce3c4
 
fce3c4
         source_dir = os.path.dirname(link_name)
fce3c4
-        host_path_name = os.path.normpath(os.path.join(source_dir, source))
fce3c4
+        host_path_name = os.path.realpath(os.path.join(source_dir, source))
fce3c4
         dest_path_name = self.dest_path(host_path_name)
fce3c4
 
fce3c4
         if not os.path.exists(dest_path_name):
fce3c4
From 8e60e299cdfb0027d6b6ea845234ef54ae785186 Mon Sep 17 00:00:00 2001
fce3c4
From: "Bryn M. Reeves" <bmr@redhat.com>
fce3c4
Date: Thu, 13 Sep 2018 16:14:12 +0100
fce3c4
Subject: [PATCH 1/2] [archive, plugin] avoid recursing on symbolic link loops
fce3c4
fce3c4
It's possible that symlink loops exist in the host file system,
fce3c4
either 'simple' ('a'->'a'), or indirect ('a'->'b'->'a'). We need
fce3c4
to avoid recursing on these loops, to avoid exceeding the maximum
fce3c4
link or recursion depths, but we should still represent these
fce3c4
inodes as accurately as possible in the resulting archive.
fce3c4
fce3c4
Detect loops in both the Plugin link handling code and in the new
fce3c4
Archive link follow-up code by creating the first requested level
fce3c4
of loop, and then skipping the recursive follow-up. This means
fce3c4
that the looping links are still created in the archive so long
fce3c4
as they are referenced in a copy spec but that we do not attempt
fce3c4
to indefinitely recurse while collecting them.
fce3c4
fce3c4
Resolves: #1430
fce3c4
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py          | 27 +++++++++++++++++++++++++++
fce3c4
 sos/plugins/__init__.py | 20 +++++++++++++++-----
fce3c4
 2 files changed, 42 insertions(+), 5 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index 483d66f4..e5819432 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -424,6 +424,29 @@ class FileCacheArchive(Archive):
fce3c4
         host_path_name = os.path.realpath(os.path.join(source_dir, source))
fce3c4
         dest_path_name = self.dest_path(host_path_name)
fce3c4
 
fce3c4
+        def is_loop(link_name, source):
fce3c4
+            """Return ``True`` if the symbolic link ``link_name`` is part
fce3c4
+                of a file system loop, or ``False`` otherwise.
fce3c4
+            """
fce3c4
+            link_dir = os.path.dirname(link_name)
fce3c4
+            if not os.path.isabs(source):
fce3c4
+                source = os.path.realpath(os.path.join(link_dir, source))
fce3c4
+            link_name = os.path.realpath(link_name)
fce3c4
+
fce3c4
+            # Simple a -> a loop
fce3c4
+            if link_name == source:
fce3c4
+                return True
fce3c4
+
fce3c4
+            # Find indirect loops (a->b-a) by stat()ing the first step
fce3c4
+            # in the symlink chain
fce3c4
+            try:
fce3c4
+                os.stat(link_name)
fce3c4
+            except OSError as e:
fce3c4
+                if e.errno == 40:
fce3c4
+                    return True
fce3c4
+                raise
fce3c4
+            return False
fce3c4
+
fce3c4
         if not os.path.exists(dest_path_name):
fce3c4
             if os.path.islink(host_path_name):
fce3c4
                 # Normalised path for the new link_name
fce3c4
@@ -433,6 +456,10 @@ class FileCacheArchive(Archive):
fce3c4
                 # Relative source path of the new link
fce3c4
                 source = os.path.join(dest_dir, os.readlink(host_path_name))
fce3c4
                 source = os.path.relpath(source, dest_dir)
fce3c4
+                if is_loop(link_name, source):
fce3c4
+                    self.log_debug("Link '%s' - '%s' loops: skipping..." %
fce3c4
+                                   (link_name, source))
fce3c4
+                    return
fce3c4
                 self.log_debug("Adding link %s -> %s for link follow up" %
fce3c4
                                (link_name, source))
fce3c4
                 self.add_link(source, link_name)
fce3c4
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
fce3c4
index 7d011a02..7d2a8b2d 100644
fce3c4
--- a/sos/plugins/__init__.py
fce3c4
+++ b/sos/plugins/__init__.py
fce3c4
@@ -376,6 +376,21 @@ class Plugin(object):
fce3c4
             self._log_debug("link '%s' is a directory, skipping..." % linkdest)
fce3c4
             return
fce3c4
 
fce3c4
+        self.copied_files.append({'srcpath': srcpath,
fce3c4
+                                  'dstpath': dstpath,
fce3c4
+                                  'symlink': "yes",
fce3c4
+                                  'pointsto': linkdest})
fce3c4
+
fce3c4
+        # Check for indirect symlink loops by stat()ing the next step
fce3c4
+        # in the link chain.
fce3c4
+        try:
fce3c4
+            os.stat(absdest)
fce3c4
+        except OSError as e:
fce3c4
+            if e.errno == 40:
fce3c4
+                self._log_debug("link '%s' is part of a file system "
fce3c4
+                                "loop, skipping target..." % dstpath)
fce3c4
+                return
fce3c4
+
fce3c4
         # copy the symlink target translating relative targets
fce3c4
         # to absolute paths to pass to _do_copy_path.
fce3c4
         self._log_debug("normalized link target '%s' as '%s'"
fce3c4
@@ -388,11 +403,6 @@ class Plugin(object):
fce3c4
             self._log_debug("link '%s' points to itself, skipping target..."
fce3c4
                             % linkdest)
fce3c4
 
fce3c4
-        self.copied_files.append({'srcpath': srcpath,
fce3c4
-                                  'dstpath': dstpath,
fce3c4
-                                  'symlink': "yes",
fce3c4
-                                  'pointsto': linkdest})
fce3c4
-
fce3c4
     def _copy_dir(self, srcpath):
fce3c4
         try:
fce3c4
             for afile in os.listdir(srcpath):
fce3c4
-- 
fce3c4
2.17.1
fce3c4
fce3c4
fce3c4
From e108d7c03834446f8dac66ad69f5eade4f2c5fce Mon Sep 17 00:00:00 2001
fce3c4
From: Pavel Moravec <pmoravec@redhat.com>
fce3c4
Date: Fri, 14 Sep 2018 10:42:07 +0200
fce3c4
Subject: [PATCH 2/2] [archive] fix and simplify directory destination
fce3c4
 rewriting
fce3c4
fce3c4
Rewriting of the destination path by _make_leading_paths() only
fce3c4
applies when creating intermediate path components that are a
fce3c4
symbolic link. The final level of path creation must always be
fce3c4
a directory, and the destination is always the absolute path to
fce3c4
that directory.
fce3c4
fce3c4
Always return the directory path when creating a new directory,
fce3c4
and do not attempt to rewrite the destination at the top level
fce3c4
in FileCacheArchive._check_path() since all intermediate links
fce3c4
have already been handled inside _make_leading_paths() (i.e.
fce3c4
the returned/rewritten destination is always equal to the path
fce3c4
that was passed into the function).
fce3c4
fce3c4
Resolves: #1432
fce3c4
fce3c4
Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
fce3c4
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
fce3c4
---
fce3c4
 sos/archive.py | 8 +++-----
fce3c4
 1 file changed, 3 insertions(+), 5 deletions(-)
fce3c4
fce3c4
diff --git a/sos/archive.py b/sos/archive.py
fce3c4
index e5819432..b02b75f7 100644
fce3c4
--- a/sos/archive.py
fce3c4
+++ b/sos/archive.py
fce3c4
@@ -241,6 +241,8 @@ class FileCacheArchive(Archive):
fce3c4
                 else:
fce3c4
                     self.log_debug("Making directory %s" % abs_path)
fce3c4
                     os.mkdir(abs_path, mode)
fce3c4
+                    dest = src_path
fce3c4
+
fce3c4
         return dest
fce3c4
 
fce3c4
     def _check_path(self, src, path_type, dest=None, force=False):
fce3c4
@@ -282,17 +284,13 @@ class FileCacheArchive(Archive):
fce3c4
         if not dest_dir:
fce3c4
             return dest
fce3c4
 
fce3c4
-        # Preserve destination basename for rewritten dest_dir
fce3c4
-        dest_name = os.path.split(src)[1]
fce3c4
-
fce3c4
         # Check containing directory presence and path type
fce3c4
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
fce3c4
             raise ValueError("path '%s' exists and is not a directory" %
fce3c4
                              dest_dir)
fce3c4
         elif not os.path.exists(dest_dir):
fce3c4
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
fce3c4
-            src_dir = self._make_leading_paths(src_dir)
fce3c4
-            dest = self.dest_path(os.path.join(src_dir, dest_name))
fce3c4
+            self._make_leading_paths(src_dir)
fce3c4
 
fce3c4
         def is_special(mode):
fce3c4
             return any([
fce3c4
-- 
fce3c4
2.17.1
fce3c4