Blame SOURCES/sos-bz1624043-symlinks-not-copied.patch

c33036
From 2e07f7c4778145d4366476ecc4383d491458b541 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 31 Aug 2018 12:50:24 +0100
c33036
Subject: [PATCH 1/4] [sosreport] properly raise exceptions when --debug is
c33036
 given
c33036
c33036
OSError and IOError exceptions were not raised to the terminal
c33036
when --debug is in effect since they were silently caught in the
c33036
generic exception handler.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/sosreport.py | 5 ++++-
c33036
 1 file changed, 4 insertions(+), 1 deletion(-)
c33036
c33036
diff --git a/sos/sosreport.py b/sos/sosreport.py
c33036
index 00c3e811..80633966 100644
c33036
--- a/sos/sosreport.py
c33036
+++ b/sos/sosreport.py
c33036
@@ -995,7 +995,8 @@ class SoSReport(object):
c33036
                 print(" %s while setting up archive" % e.strerror)
c33036
                 print("")
c33036
             else:
c33036
-                raise e
c33036
+                print("Error setting up archive: %s" % e)
c33036
+                raise
c33036
         except Exception as e:
c33036
             self.ui_log.error("")
c33036
             self.ui_log.error(" Unexpected exception setting up archive:")
c33036
@@ -1467,6 +1468,8 @@ class SoSReport(object):
c33036
             return self.final_work()
c33036
 
c33036
         except (OSError):
c33036
+            if self.opts.debug:
c33036
+                raise
c33036
             self._cleanup()
c33036
         except (KeyboardInterrupt):
c33036
             self.ui_log.error("\nExiting on user cancel")
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From c496d2bec8cae175faf986567e73d16d401d8564 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 31 Aug 2018 12:52:38 +0100
c33036
Subject: [PATCH 2/4] [archive] simplify FileCacheArchive.makedirs()
c33036
c33036
Simplify the makedirs() method of FileCacheArchive and have it
c33036
bypass _check_path() and directly call os.makedirs(): a subsequent
c33036
patch will restrict the use of the method to setting up the sos_*
c33036
directories in the archive root.
c33036
c33036
File, directory and other object type add_* methods will use a
c33036
new method that correctly handles symbolic links in intermediate
c33036
path components.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 8 ++++----
c33036
 1 file changed, 4 insertions(+), 4 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 5d99170f..ffa54036 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -361,11 +361,11 @@ class FileCacheArchive(Archive):
c33036
         return self._archive_root
c33036
 
c33036
     def makedirs(self, path, mode=0o700):
c33036
-        dest = self._check_path(path, P_DIR)
c33036
-        if not dest:
c33036
-            return
c33036
+        """Create path, including leading components.
c33036
 
c33036
-        self._makedirs(self.dest_path(path))
c33036
+            Used by sos.sosreport to set up sos_* directories.
c33036
+        """
c33036
+        os.makedirs(os.path.join(self._archive_root, path), mode=mode)
c33036
         self.log_debug("created directory at '%s' in FileCacheArchive '%s'"
c33036
                        % (path, self._archive_root))
c33036
 
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From ca422720b74181b2433473428e29e90af59b3cf8 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 31 Aug 2018 12:55:51 +0100
c33036
Subject: [PATCH 3/4] [archive] normalise dest_dir in
c33036
 FileCacheArchive._check_path()
c33036
c33036
Always set a valid dest_dir in _check_path() and do not assume
c33036
that it can be obtained by splitting the path: in the case of
c33036
a directory it is the unmodified 'dest' value.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 5 ++++-
c33036
 1 file changed, 4 insertions(+), 1 deletion(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index ffa54036..903cc672 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -191,7 +191,10 @@ class FileCacheArchive(Archive):
c33036
                       copied now or `None` otherwise
c33036
         """
c33036
         dest = dest or self.dest_path(src)
c33036
-        dest_dir = os.path.split(dest)[0]
c33036
+        if path_type == P_DIR:
c33036
+            dest_dir = dest
c33036
+        else:
c33036
+            dest_dir = os.path.split(dest)[0]
c33036
         if not dest_dir:
c33036
             return dest
c33036
 
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From 75d759066e8ee0a469abc37f48f7bfcdfe8182b5 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 31 Aug 2018 12:58:01 +0100
c33036
Subject: [PATCH 4/4] [archive] replace FileCacheArchive._makedirs()
c33036
c33036
The Python os.makedirs() implementation is inadequate for sos's
c33036
needs: it will create leading directories given an intended path
c33036
destination, but it is not able to reflect cases where some of
c33036
the intermediate paths are actually symbolic links.
c33036
c33036
Replace the use of os.makedirs() with a method that walks over
c33036
the path, and either creates directories, or symbolic links (and
c33036
their directory target) to better correspond with the content of
c33036
the host file system.
c33036
c33036
This fixes a situation where two plugins can race in the archive,
c33036
leading to an exception in the plugin that runs last:
c33036
c33036
 - /foo/bar exists and is a link to /foo/bar.qux
c33036
 - One plugin attempts to collect /foo/bar
c33036
 - Another plugin attempts to collect a link /foo/qux -> /foo/bar/baz
c33036
c33036
If the 2nd plugin happens to run first it will create the path
c33036
"/foo/bar" as a _directory_ (via _makedirs()). Since the archive
c33036
now checks for matching object types when a path collision occurs,
c33036
the first plugin will arrive at add_dir(), note that "/foo/bar" is
c33036
present and is not a symbolic link, and will raise an exception.
c33036
c33036
Correct this by ensuring that whichever plugin executes first, the
c33036
correct link/directory path structure will be set up.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 72 ++++++++++++++++++++++++++++++++++++++++++++------
c33036
 1 file changed, 64 insertions(+), 8 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 903cc672..11afa7aa 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -159,6 +159,67 @@ class FileCacheArchive(Archive):
c33036
             name = name.lstrip(os.sep)
c33036
         return (os.path.join(self._archive_root, name))
c33036
 
c33036
+    def _make_leading_paths(self, src, mode=0o700):
c33036
+        """Create leading path components
c33036
+
c33036
+            The standard python `os.makedirs` is insufficient for our
c33036
+            needs: it will only create directories, and ignores the fact
c33036
+            that some path components may be symbolic links.
c33036
+        """
c33036
+        self.log_debug("Making leading paths for %s" % src)
c33036
+        root = self._archive_root
c33036
+
c33036
+        def in_archive(path):
c33036
+            """Test whether path ``path`` is inside the archive.
c33036
+            """
c33036
+            return path.startswith(os.path.join(root, ""))
c33036
+
c33036
+        if not src.startswith("/"):
c33036
+            # Sos archive path (sos_commands, sos_logs etc.)
c33036
+            src_dir = src
c33036
+        else:
c33036
+            # Host file path
c33036
+            src_dir = src if os.path.isdir(src) else os.path.split(src)[0]
c33036
+
c33036
+        # Build a list of path components in root-to-leaf order.
c33036
+        path = src_dir
c33036
+        path_comps = []
c33036
+        while path != '/' and path != '':
c33036
+            head, tail = os.path.split(path)
c33036
+            path_comps.append(tail)
c33036
+            path = head
c33036
+        path_comps.reverse()
c33036
+
c33036
+        abs_path = root
c33036
+        rel_path = ""
c33036
+
c33036
+        # Check and create components as needed
c33036
+        for comp in path_comps:
c33036
+            abs_path = os.path.join(abs_path, comp)
c33036
+
c33036
+            if not in_archive(abs_path):
c33036
+                continue
c33036
+
c33036
+            rel_path = os.path.join(rel_path, comp)
c33036
+            src_path = os.path.join("/", rel_path)
c33036
+
c33036
+            if not os.path.exists(abs_path):
c33036
+                self.log_debug("Making path %s" % abs_path)
c33036
+                if os.path.islink(src_path) and os.path.isdir(src_path):
c33036
+                    target = os.readlink(src_path)
c33036
+                    abs_target = os.path.join(root, target)
c33036
+
c33036
+                    # Recursively create leading components of target
c33036
+                    self._make_leading_paths(abs_target, mode=mode)
c33036
+
c33036
+                    self.log_debug("Making symlink '%s' -> '%s'" %
c33036
+                                   (abs_path, target))
c33036
+                    target = os.path.relpath(target)
c33036
+                    os.symlink(target, abs_path)
c33036
+                else:
c33036
+                    self.log_debug("Making directory %s" % abs_path)
c33036
+                    os.mkdir(abs_path, mode)
c33036
+
c33036
     def _check_path(self, src, path_type, dest=None, force=False):
c33036
         """Check a new destination path in the archive.
c33036
 
c33036
@@ -203,7 +264,8 @@ class FileCacheArchive(Archive):
c33036
             raise ValueError("path '%s' exists and is not a directory" %
c33036
                              dest_dir)
c33036
         elif not os.path.exists(dest_dir):
c33036
-            self._makedirs(dest_dir)
c33036
+            src_dir = src if path_type == P_DIR else os.path.split(src)[0]
c33036
+            self._make_leading_paths(src_dir)
c33036
 
c33036
         def is_special(mode):
c33036
             return any([
c33036
@@ -326,10 +388,7 @@ class FileCacheArchive(Archive):
c33036
 
c33036
     def add_dir(self, path):
c33036
         with self._path_lock:
c33036
-            dest = self._check_path(path, P_DIR)
c33036
-            if not dest:
c33036
-                return
c33036
-            self.makedirs(path)
c33036
+            self._check_path(path, P_DIR)
c33036
 
c33036
     def add_node(self, path, mode, device):
c33036
         dest = self._check_path(path, P_NODE)
c33036
@@ -347,9 +406,6 @@ class FileCacheArchive(Archive):
c33036
                 raise e
c33036
             shutil.copystat(path, dest)
c33036
 
c33036
-    def _makedirs(self, path, mode=0o700):
c33036
-        os.makedirs(path, mode)
c33036
-
c33036
     def name_max(self):
c33036
         if 'PC_NAME_MAX' in os.pathconf_names:
c33036
             pc_name_max = os.pathconf_names['PC_NAME_MAX']
c33036
-- 
c33036
2.17.1
c33036
c33036
From 5d6228b85e174dee8abcc4c206a1e9034242c6c6 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 7 Sep 2018 12:06:34 -0400
c33036
Subject: [PATCH 1/6] [sosreport] ensure ThreadPool exceptions are raised
c33036
c33036
The ThreadPoolExecutor does not raise exceptions to the parent
c33036
thread immediately: it stores them in-line in the pool's results
c33036
list, and raises them to the caller on acccess to that slot in
c33036
the results iterator.
c33036
c33036
Make sure that these exceptions are handled by iterating over all
c33036
results and asserting that they are non-None (in practice, this
c33036
code is never executed since the resulting raise will trap to an
c33036
exception handler, but it is less confusing than a bare 'pass').
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/sosreport.py | 8 ++++++--
c33036
 1 file changed, 6 insertions(+), 2 deletions(-)
c33036
c33036
diff --git a/sos/sosreport.py b/sos/sosreport.py
c33036
index 80633966..44be75a1 100644
c33036
--- a/sos/sosreport.py
c33036
+++ b/sos/sosreport.py
c33036
@@ -1065,9 +1065,13 @@ class SoSReport(object):
c33036
         try:
c33036
             self.plugpool = ThreadPoolExecutor(self.opts.threads)
c33036
             # Pass the plugpool its own private copy of self.pluglist
c33036
-            self.plugpool.map(self._collect_plugin, list(self.pluglist),
c33036
-                              chunksize=1)
c33036
+            results = self.plugpool.map(self._collect_plugin,
c33036
+                                        list(self.pluglist), chunksize=1)
c33036
             self.plugpool.shutdown(wait=True)
c33036
+            for res in results:
c33036
+                if not res:
c33036
+                    self.soslog.debug("Unexpected plugin task result: %s" %
c33036
+                                      res)
c33036
             self.ui_log.info("")
c33036
         except KeyboardInterrupt:
c33036
             # We may not be at a newline when the user issues Ctrl-C
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From 9aaba972bf6a42c33ea9bca80f07bfb880ba45a1 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 7 Sep 2018 12:15:10 -0400
c33036
Subject: [PATCH 2/6] [sosreport] trap directly to PDB in handle_exception()
c33036
c33036
Now that plugins are run in a threadpool, it is not possible to
c33036
defer the call to pdb.post_mortem() to the top-level exception
c33036
handler in the main thread: this is due to the fact that in a pool,
c33036
exceptions are caught and saved to be re-raised to thread calling
c33036
the pool when results are returned. When the saved exception is
c33036
raised to the top-level handler the execution context it relates
c33036
to is gone: the backtrace and stack frame have been torn down and
c33036
only very limited information is available from the exception
c33036
frame.
c33036
c33036
Instead, catch these exceptions _inside_ the thread pool context,
c33036
and directly trap to the Python debugger. This allows plugin code
c33036
to be debugged interactively with the full backtrace and with all
c33036
access to local variables and the execution stack. In addition,
c33036
this means that after the debugger has handled the exception it is
c33036
possible to return to the run and continue until report completion.
c33036
c33036
One side effect of this change is that the *-plugin-errors.txt
c33036
file containng the backtrace is now written into the archive
c33036
whether or not --debug is given.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/sosreport.py | 9 ++++++++-
c33036
 1 file changed, 8 insertions(+), 1 deletion(-)
c33036
c33036
diff --git a/sos/sosreport.py b/sos/sosreport.py
c33036
index 44be75a1..77ae7161 100644
c33036
--- a/sos/sosreport.py
c33036
+++ b/sos/sosreport.py
c33036
@@ -30,6 +30,7 @@ from shutil import rmtree
c33036
 import tempfile
c33036
 import hashlib
c33036
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
c33036
+import pdb
c33036
 
c33036
 from sos import _sos as _
c33036
 from sos import __version__
c33036
@@ -504,7 +505,13 @@ class SoSReport(object):
c33036
 
c33036
     def handle_exception(self, plugname=None, func=None):
c33036
         if self.raise_plugins or self.exit_process:
c33036
-            raise
c33036
+            # retrieve exception info for the current thread and stack.
c33036
+            (etype, val, tb) = sys.exc_info()
c33036
+            # we are NOT in interactive mode, print the exception...
c33036
+            traceback.print_exception(etype, val, tb, file=sys.stdout)
c33036
+            print_()
c33036
+            # ...then start the debugger in post-mortem mode.
c33036
+            pdb.post_mortem(tb)
c33036
         if plugname and func:
c33036
             self._log_plugin_exception(plugname, func)
c33036
 
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From 0ea62d1ea57f41c1b75ccb83e69fdda386a7d280 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 7 Sep 2018 13:00:52 -0400
c33036
Subject: [PATCH 3/6] [Plugin] fix exception raise in Plugin._copy_dir()
c33036
c33036
Use a naked 'raise' statement rather than raising the already caught
c33036
exception in _copy_dir(), so that the original stack and backtrace
c33036
are avaialable.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/plugins/__init__.py | 2 +-
c33036
 1 file changed, 1 insertion(+), 1 deletion(-)
c33036
c33036
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
c33036
index 252de4d0..ac2c0bc8 100644
c33036
--- a/sos/plugins/__init__.py
c33036
+++ b/sos/plugins/__init__.py
c33036
@@ -401,7 +401,7 @@ class Plugin(object):
c33036
                 msg = "Too many levels of symbolic links copying"
c33036
                 self._log_error("_copy_dir: %s '%s'" % (msg, srcpath))
c33036
                 return
c33036
-            raise e
c33036
+            raise
c33036
 
c33036
     def _get_dest_for_srcpath(self, srcpath):
c33036
         if self.use_sysroot():
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From d84c1cd6dedf51a8ed7b1a511585c0ac2db0f083 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Wed, 5 Sep 2018 12:46:16 +0100
c33036
Subject: [PATCH 4/6] [archive] fix leading path creation
c33036
c33036
Fix the creation of leading path components for both paths that
c33036
contain intermediate components that are symbolic links (with both
c33036
absolute and relative targets), and those that contain only
c33036
directory components.
c33036
c33036
Since symlinks may link to other files, and other symlinks, it is
c33036
necessary to handle these paths recursively and to include any
c33036
intermediate symlinked directories, or symlink targets in the set
c33036
of paths added to the archive.
c33036
c33036
Related: #1404
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 41 ++++++++++++++++++++++++++++++++++-------
c33036
 1 file changed, 34 insertions(+), 7 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 11afa7aa..c256a01f 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -165,9 +165,24 @@ class FileCacheArchive(Archive):
c33036
             The standard python `os.makedirs` is insufficient for our
c33036
             needs: it will only create directories, and ignores the fact
c33036
             that some path components may be symbolic links.
c33036
+
c33036
+            :param src: The source path in the host file system for which
c33036
+                        leading components should be created, or the path
c33036
+                        to an sos_* virtual directory inside the archive.
c33036
+
c33036
+                        Host paths must be absolute (initial '/'), and
c33036
+                        sos_* directory paths must be a path relative to
c33036
+                        the root of the archive.
c33036
+
c33036
+            :param mode: An optional mode to be used when creating path
c33036
+                         components.
c33036
+            :returns: A rewritten destination path in the case that one
c33036
+                      or more symbolic links in intermediate components
c33036
+                      of the path have altered the path destination.
c33036
         """
c33036
         self.log_debug("Making leading paths for %s" % src)
c33036
         root = self._archive_root
c33036
+        dest = src
c33036
 
c33036
         def in_archive(path):
c33036
             """Test whether path ``path`` is inside the archive.
c33036
@@ -191,34 +206,42 @@ class FileCacheArchive(Archive):
c33036
         path_comps.reverse()
c33036
 
c33036
         abs_path = root
c33036
-        rel_path = ""
c33036
+        src_path = "/"
c33036
 
c33036
         # Check and create components as needed
c33036
         for comp in path_comps:
c33036
             abs_path = os.path.join(abs_path, comp)
c33036
 
c33036
+            # Do not create components that are above the archive root.
c33036
             if not in_archive(abs_path):
c33036
                 continue
c33036
 
c33036
-            rel_path = os.path.join(rel_path, comp)
c33036
-            src_path = os.path.join("/", rel_path)
c33036
+            src_path = os.path.join(src_path, comp)
c33036
 
c33036
             if not os.path.exists(abs_path):
c33036
                 self.log_debug("Making path %s" % abs_path)
c33036
                 if os.path.islink(src_path) and os.path.isdir(src_path):
c33036
                     target = os.readlink(src_path)
c33036
-                    abs_target = os.path.join(root, target)
c33036
+
c33036
+                    # The directory containing the source in the host fs,
c33036
+                    # adjusted for the current level of path creation.
c33036
+                    target_dir = os.path.split(src_path)[0]
c33036
+
c33036
+                    # The source path of the target in the host fs to be
c33036
+                    # recursively copied.
c33036
+                    target_src = os.path.join(target_dir, target)
c33036
 
c33036
                     # Recursively create leading components of target
c33036
-                    self._make_leading_paths(abs_target, mode=mode)
c33036
+                    dest = self._make_leading_paths(target_src, mode=mode)
c33036
+                    dest = os.path.normpath(dest)
c33036
 
c33036
                     self.log_debug("Making symlink '%s' -> '%s'" %
c33036
                                    (abs_path, target))
c33036
-                    target = os.path.relpath(target)
c33036
                     os.symlink(target, abs_path)
c33036
                 else:
c33036
                     self.log_debug("Making directory %s" % abs_path)
c33036
                     os.mkdir(abs_path, mode)
c33036
+        return dest
c33036
 
c33036
     def _check_path(self, src, path_type, dest=None, force=False):
c33036
         """Check a new destination path in the archive.
c33036
@@ -259,13 +282,17 @@ class FileCacheArchive(Archive):
c33036
         if not dest_dir:
c33036
             return dest
c33036
 
c33036
+        # Preserve destination basename for rewritten dest_dir
c33036
+        dest_name = os.path.split(src)[1]
c33036
+
c33036
         # Check containing directory presence and path type
c33036
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
c33036
             raise ValueError("path '%s' exists and is not a directory" %
c33036
                              dest_dir)
c33036
         elif not os.path.exists(dest_dir):
c33036
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
c33036
-            self._make_leading_paths(src_dir)
c33036
+            src_dir = self._make_leading_paths(src_dir)
c33036
+            dest = self.dest_path(os.path.join(src_dir, dest_name))
c33036
 
c33036
         def is_special(mode):
c33036
             return any([
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From 322f4a517ae336cc1443f9a399a0d15d45ec48b9 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Fri, 7 Sep 2018 13:11:03 -0400
c33036
Subject: [PATCH 5/6] [archive] add link follow-up to
c33036
 FileCacheArchive.add_link()
c33036
c33036
Creating a link may trigger further actions in the archive: if the
c33036
link target is a regular file, we must copy that file into the
c33036
archive, and if the target is a symbolic link, then we must create
c33036
that link, and copy in the link target.
c33036
c33036
Handle this by calling add_file() or (recursively) add_link() in
c33036
order to create the missing pieces of the symlink chain.
c33036
c33036
These operations must take place outside of the path lock since
c33036
they do not modify the archive namespace and will call methods of
c33036
the Archive object that will attempt to re-acquire this lock.
c33036
c33036
Resolves: #1404
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 38 +++++++++++++++++++++++++++++++++++---
c33036
 1 file changed, 35 insertions(+), 3 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index c256a01f..6db398fc 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -403,6 +403,7 @@ class FileCacheArchive(Archive):
c33036
                            % (dest, self._archive_root))
c33036
 
c33036
     def add_link(self, source, link_name):
c33036
+        self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source))
c33036
         with self._path_lock:
c33036
             dest = self._check_path(link_name, P_LINK)
c33036
             if not dest:
c33036
@@ -410,10 +411,41 @@ class FileCacheArchive(Archive):
c33036
 
c33036
             if not os.path.lexists(dest):
c33036
                 os.symlink(source, dest)
c33036
-            self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
c33036
-                           % (dest, source, self._archive_root))
c33036
+                self.log_debug("added symlink at '%s' to '%s' in archive '%s'"
c33036
+                               % (dest, source, self._archive_root))
c33036
+
c33036
+        # Follow-up must be outside the path lock: we recurse into
c33036
+        # other monitor methods that will attempt to reacquire it.
c33036
+
c33036
+        source_dir = os.path.dirname(link_name)
c33036
+        host_source = os.path.join(source_dir, source)
c33036
+        if not os.path.exists(self.dest_path(host_source)):
c33036
+            if os.path.islink(host_source):
c33036
+                link_dir = os.path.dirname(link_name)
c33036
+                link_name = os.path.normpath(os.path.join(link_dir, source))
c33036
+                dest_dir = os.path.dirname(link_name)
c33036
+                source = os.path.join(dest_dir, os.readlink(link_name))
c33036
+                source = os.path.relpath(source)
c33036
+                self.log_debug("Adding link %s -> %s for link follow up" %
c33036
+                               (link_name, source))
c33036
+                self.add_link(source, link_name)
c33036
+            elif os.path.isdir(host_source):
c33036
+                self.log_debug("Adding dir %s for link follow up" % source)
c33036
+                self.add_dir(host_source)
c33036
+            elif os.path.isfile(host_source):
c33036
+                self.log_debug("Adding file %s for link follow up" % source)
c33036
+                self.add_file(host_source)
c33036
+            else:
c33036
+                self.log_debug("No link follow up: source=%s link_name=%s" %
c33036
+                               (source, link_name))
c33036
 
c33036
-    def add_dir(self, path):
c33036
+
c33036
+    def add_dir(self, path, copy=False):
c33036
+        """Create a directory in the archive.
c33036
+
c33036
+            :param path: the path in the host file system to add
c33036
+        """
c33036
+        # Establish path structure
c33036
         with self._path_lock:
c33036
             self._check_path(path, P_DIR)
c33036
 
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From 6e79c4b4a4f32fa549708dbb8c8b9af73ab8ff61 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Mon, 10 Sep 2018 16:33:33 +0100
c33036
Subject: [PATCH 6/6] [archive] remove unused 'copy' arg from
c33036
 FileCacheArchive.add_dir()
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 3 +--
c33036
 1 file changed, 1 insertion(+), 2 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 6db398fc..4b30630b 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -439,8 +439,7 @@ class FileCacheArchive(Archive):
c33036
                 self.log_debug("No link follow up: source=%s link_name=%s" %
c33036
                                (source, link_name))
c33036
 
c33036
-
c33036
-    def add_dir(self, path, copy=False):
c33036
+    def add_dir(self, path):
c33036
         """Create a directory in the archive.
c33036
 
c33036
             :param path: the path in the host file system to add
c33036
-- 
c33036
2.17.1
c33036
c33036
From 919e8671a6ab9684d59525eb7f3607b3aab08ee1 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Tue, 11 Sep 2018 12:16:57 -0400
c33036
Subject: [PATCH] [archive] fix link rewriting logic in
c33036
 FileCacheArchive.add_link()
c33036
c33036
When processing link follow up for an original symbolic link, the
c33036
add_link() logic incorrectly used the _original_ host link name,
c33036
rather than the to-be-created name when calculating relative path
c33036
structures. If the prior link is at a greater or lesser level of
c33036
directory nesting this will lead to broken relative links in the
c33036
archive (one level too high or too low).
c33036
c33036
In some cases (systemd) this behaviour was masked due to the fact
c33036
that identically named links exist at multiple levels of the path
c33036
hierarchy.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 30 +++++++++++++++++++-----------
c33036
 1 file changed, 19 insertions(+), 11 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 528cfa576..7a7717de7 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -417,27 +417,35 @@ def add_link(self, source, link_name):
c33036
         # Follow-up must be outside the path lock: we recurse into
c33036
         # other monitor methods that will attempt to reacquire it.
c33036
 
c33036
+        self.log_debug("Link follow up: source=%s link_name=%s dest=%s" %
c33036
+                       (source, link_name, dest))
c33036
+
c33036
         source_dir = os.path.dirname(link_name)
c33036
-        host_source = os.path.join(source_dir, source)
c33036
-        if not os.path.exists(self.dest_path(host_source)):
c33036
-            if os.path.islink(host_source):
c33036
-                link_dir = os.path.dirname(link_name)
c33036
-                link_name = os.path.normpath(os.path.join(link_dir, source))
c33036
+        host_path_name = os.path.normpath(os.path.join(source_dir, source))
c33036
+        dest_path_name = self.dest_path(host_path_name)
c33036
+
c33036
+        if not os.path.exists(dest_path_name):
c33036
+            if os.path.islink(host_path_name):
c33036
+                # Normalised path for the new link_name
c33036
+                link_name = host_path_name
c33036
+                # Containing directory for the new link
c33036
                 dest_dir = os.path.dirname(link_name)
c33036
-                source = os.path.join(dest_dir, os.readlink(link_name))
c33036
-                source = os.path.relpath(source)
c33036
+                # Relative source path of the new link
c33036
+                source = os.path.join(dest_dir, os.readlink(host_path_name))
c33036
+                source = os.path.relpath(source, dest_dir)
c33036
                 self.log_debug("Adding link %s -> %s for link follow up" %
c33036
                                (link_name, source))
c33036
                 self.add_link(source, link_name)
c33036
-            elif os.path.isdir(host_source):
c33036
+            elif os.path.isdir(host_path_name):
c33036
                 self.log_debug("Adding dir %s for link follow up" % source)
c33036
-                self.add_dir(host_source)
c33036
-            elif os.path.isfile(host_source):
c33036
+                self.add_dir(host_path_name)
c33036
+            elif os.path.isfile(host_path_name):
c33036
                 self.log_debug("Adding file %s for link follow up" % source)
c33036
-                self.add_file(host_source)
c33036
+                self.add_file(host_path_name)
c33036
             else:
c33036
                 self.log_debug("No link follow up: source=%s link_name=%s" %
c33036
                                (source, link_name))
c33036
+        self.log_debug("leaving add_link()")
c33036
 
c33036
     def add_dir(self, path):
c33036
         """Create a directory in the archive.
c33036
From c065be9715dc845b6411a9a0b2d6171bbeb1c390 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Wed, 12 Sep 2018 12:02:33 +0100
c33036
Subject: [PATCH] [plugin] canonicalize link target path in
c33036
 Plugin._copy_symlink()
c33036
c33036
Since we may be dealing with paths that contain intermediate
c33036
symlinked directories, it is necessary to canonicalize the path
c33036
for the link target in order to eliminate additional levels of
c33036
symbolic links, and to calculate the correct relative path to
c33036
use within the archive.
c33036
c33036
Related: #1404
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/plugins/__init__.py | 5 ++++-
c33036
 1 file changed, 4 insertions(+), 1 deletion(-)
c33036
c33036
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
c33036
index ac2c0bc8c..7d011a02c 100644
c33036
--- a/sos/plugins/__init__.py
c33036
+++ b/sos/plugins/__init__.py
c33036
@@ -353,7 +353,10 @@ def _copy_symlink(self, srcpath):
c33036
         absdest = os.path.normpath(dest)
c33036
         # adjust the target used inside the report to always be relative
c33036
         if os.path.isabs(linkdest):
c33036
-            reldest = os.path.relpath(linkdest, os.path.dirname(srcpath))
c33036
+            # Canonicalize the link target path to avoid additional levels
c33036
+            # of symbolic links (that would affect the path nesting level).
c33036
+            realdir = os.path.realpath(os.path.dirname(srcpath))
c33036
+            reldest = os.path.relpath(linkdest, start=realdir)
c33036
             # trim leading /sysroot
c33036
             if self.use_sysroot():
c33036
                 reldest = reldest[len(os.sep + os.pardir):]
c33036
From 868966cd9dbb96ce3635d884e67e738b18658140 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Wed, 12 Sep 2018 16:11:07 +0100
c33036
Subject: [PATCH] [archive] canonicalise paths for link follow up
c33036
c33036
Ensure that the canonical path is used when processing link follow
c33036
up actions: the actual link path may contain one or more levels of
c33036
symbolic links, leading to broken links if the link target path is
c33036
assumed to be relative to the containing directory.
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 2 +-
c33036
 1 file changed, 1 insertion(+), 1 deletion(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 7a7717de7..483d66f4f 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -421,7 +421,7 @@ def add_link(self, source, link_name):
c33036
                        (source, link_name, dest))
c33036
 
c33036
         source_dir = os.path.dirname(link_name)
c33036
-        host_path_name = os.path.normpath(os.path.join(source_dir, source))
c33036
+        host_path_name = os.path.realpath(os.path.join(source_dir, source))
c33036
         dest_path_name = self.dest_path(host_path_name)
c33036
 
c33036
         if not os.path.exists(dest_path_name):
c33036
From 8e60e299cdfb0027d6b6ea845234ef54ae785186 Mon Sep 17 00:00:00 2001
c33036
From: "Bryn M. Reeves" <bmr@redhat.com>
c33036
Date: Thu, 13 Sep 2018 16:14:12 +0100
c33036
Subject: [PATCH 1/2] [archive, plugin] avoid recursing on symbolic link loops
c33036
c33036
It's possible that symlink loops exist in the host file system,
c33036
either 'simple' ('a'->'a'), or indirect ('a'->'b'->'a'). We need
c33036
to avoid recursing on these loops, to avoid exceeding the maximum
c33036
link or recursion depths, but we should still represent these
c33036
inodes as accurately as possible in the resulting archive.
c33036
c33036
Detect loops in both the Plugin link handling code and in the new
c33036
Archive link follow-up code by creating the first requested level
c33036
of loop, and then skipping the recursive follow-up. This means
c33036
that the looping links are still created in the archive so long
c33036
as they are referenced in a copy spec but that we do not attempt
c33036
to indefinitely recurse while collecting them.
c33036
c33036
Resolves: #1430
c33036
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py          | 27 +++++++++++++++++++++++++++
c33036
 sos/plugins/__init__.py | 20 +++++++++++++++-----
c33036
 2 files changed, 42 insertions(+), 5 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index 483d66f4..e5819432 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -424,6 +424,29 @@ class FileCacheArchive(Archive):
c33036
         host_path_name = os.path.realpath(os.path.join(source_dir, source))
c33036
         dest_path_name = self.dest_path(host_path_name)
c33036
 
c33036
+        def is_loop(link_name, source):
c33036
+            """Return ``True`` if the symbolic link ``link_name`` is part
c33036
+                of a file system loop, or ``False`` otherwise.
c33036
+            """
c33036
+            link_dir = os.path.dirname(link_name)
c33036
+            if not os.path.isabs(source):
c33036
+                source = os.path.realpath(os.path.join(link_dir, source))
c33036
+            link_name = os.path.realpath(link_name)
c33036
+
c33036
+            # Simple a -> a loop
c33036
+            if link_name == source:
c33036
+                return True
c33036
+
c33036
+            # Find indirect loops (a->b-a) by stat()ing the first step
c33036
+            # in the symlink chain
c33036
+            try:
c33036
+                os.stat(link_name)
c33036
+            except OSError as e:
c33036
+                if e.errno == 40:
c33036
+                    return True
c33036
+                raise
c33036
+            return False
c33036
+
c33036
         if not os.path.exists(dest_path_name):
c33036
             if os.path.islink(host_path_name):
c33036
                 # Normalised path for the new link_name
c33036
@@ -433,6 +456,10 @@ class FileCacheArchive(Archive):
c33036
                 # Relative source path of the new link
c33036
                 source = os.path.join(dest_dir, os.readlink(host_path_name))
c33036
                 source = os.path.relpath(source, dest_dir)
c33036
+                if is_loop(link_name, source):
c33036
+                    self.log_debug("Link '%s' - '%s' loops: skipping..." %
c33036
+                                   (link_name, source))
c33036
+                    return
c33036
                 self.log_debug("Adding link %s -> %s for link follow up" %
c33036
                                (link_name, source))
c33036
                 self.add_link(source, link_name)
c33036
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
c33036
index 7d011a02..7d2a8b2d 100644
c33036
--- a/sos/plugins/__init__.py
c33036
+++ b/sos/plugins/__init__.py
c33036
@@ -376,6 +376,21 @@ class Plugin(object):
c33036
             self._log_debug("link '%s' is a directory, skipping..." % linkdest)
c33036
             return
c33036
 
c33036
+        self.copied_files.append({'srcpath': srcpath,
c33036
+                                  'dstpath': dstpath,
c33036
+                                  'symlink': "yes",
c33036
+                                  'pointsto': linkdest})
c33036
+
c33036
+        # Check for indirect symlink loops by stat()ing the next step
c33036
+        # in the link chain.
c33036
+        try:
c33036
+            os.stat(absdest)
c33036
+        except OSError as e:
c33036
+            if e.errno == 40:
c33036
+                self._log_debug("link '%s' is part of a file system "
c33036
+                                "loop, skipping target..." % dstpath)
c33036
+                return
c33036
+
c33036
         # copy the symlink target translating relative targets
c33036
         # to absolute paths to pass to _do_copy_path.
c33036
         self._log_debug("normalized link target '%s' as '%s'"
c33036
@@ -388,11 +403,6 @@ class Plugin(object):
c33036
             self._log_debug("link '%s' points to itself, skipping target..."
c33036
                             % linkdest)
c33036
 
c33036
-        self.copied_files.append({'srcpath': srcpath,
c33036
-                                  'dstpath': dstpath,
c33036
-                                  'symlink': "yes",
c33036
-                                  'pointsto': linkdest})
c33036
-
c33036
     def _copy_dir(self, srcpath):
c33036
         try:
c33036
             for afile in os.listdir(srcpath):
c33036
-- 
c33036
2.17.1
c33036
c33036
c33036
From e108d7c03834446f8dac66ad69f5eade4f2c5fce Mon Sep 17 00:00:00 2001
c33036
From: Pavel Moravec <pmoravec@redhat.com>
c33036
Date: Fri, 14 Sep 2018 10:42:07 +0200
c33036
Subject: [PATCH 2/2] [archive] fix and simplify directory destination
c33036
 rewriting
c33036
c33036
Rewriting of the destination path by _make_leading_paths() only
c33036
applies when creating intermediate path components that are a
c33036
symbolic link. The final level of path creation must always be
c33036
a directory, and the destination is always the absolute path to
c33036
that directory.
c33036
c33036
Always return the directory path when creating a new directory,
c33036
and do not attempt to rewrite the destination at the top level
c33036
in FileCacheArchive._check_path() since all intermediate links
c33036
have already been handled inside _make_leading_paths() (i.e.
c33036
the returned/rewritten destination is always equal to the path
c33036
that was passed into the function).
c33036
c33036
Resolves: #1432
c33036
c33036
Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
c33036
Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
c33036
---
c33036
 sos/archive.py | 8 +++-----
c33036
 1 file changed, 3 insertions(+), 5 deletions(-)
c33036
c33036
diff --git a/sos/archive.py b/sos/archive.py
c33036
index e5819432..b02b75f7 100644
c33036
--- a/sos/archive.py
c33036
+++ b/sos/archive.py
c33036
@@ -241,6 +241,8 @@ class FileCacheArchive(Archive):
c33036
                 else:
c33036
                     self.log_debug("Making directory %s" % abs_path)
c33036
                     os.mkdir(abs_path, mode)
c33036
+                    dest = src_path
c33036
+
c33036
         return dest
c33036
 
c33036
     def _check_path(self, src, path_type, dest=None, force=False):
c33036
@@ -282,17 +284,13 @@ class FileCacheArchive(Archive):
c33036
         if not dest_dir:
c33036
             return dest
c33036
 
c33036
-        # Preserve destination basename for rewritten dest_dir
c33036
-        dest_name = os.path.split(src)[1]
c33036
-
c33036
         # Check containing directory presence and path type
c33036
         if os.path.exists(dest_dir) and not os.path.isdir(dest_dir):
c33036
             raise ValueError("path '%s' exists and is not a directory" %
c33036
                              dest_dir)
c33036
         elif not os.path.exists(dest_dir):
c33036
             src_dir = src if path_type == P_DIR else os.path.split(src)[0]
c33036
-            src_dir = self._make_leading_paths(src_dir)
c33036
-            dest = self.dest_path(os.path.join(src_dir, dest_name))
c33036
+            self._make_leading_paths(src_dir)
c33036
 
c33036
         def is_special(mode):
c33036
             return any([
c33036
-- 
c33036
2.17.1
c33036