From 2e07f7c4778145d4366476ecc4383d491458b541 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 31 Aug 2018 12:50:24 +0100 Subject: [PATCH 1/4] [sosreport] properly raise exceptions when --debug is given OSError and IOError exceptions were not raised to the terminal when --debug is in effect since they were silently caught in the generic exception handler. Signed-off-by: Bryn M. Reeves --- sos/sosreport.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sos/sosreport.py b/sos/sosreport.py index 00c3e811..80633966 100644 --- a/sos/sosreport.py +++ b/sos/sosreport.py @@ -995,7 +995,8 @@ class SoSReport(object): print(" %s while setting up archive" % e.strerror) print("") else: - raise e + print("Error setting up archive: %s" % e) + raise except Exception as e: self.ui_log.error("") self.ui_log.error(" Unexpected exception setting up archive:") @@ -1467,6 +1468,8 @@ class SoSReport(object): return self.final_work() except (OSError): + if self.opts.debug: + raise self._cleanup() except (KeyboardInterrupt): self.ui_log.error("\nExiting on user cancel") -- 2.17.1 From c496d2bec8cae175faf986567e73d16d401d8564 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 31 Aug 2018 12:52:38 +0100 Subject: [PATCH 2/4] [archive] simplify FileCacheArchive.makedirs() Simplify the makedirs() method of FileCacheArchive and have it bypass _check_path() and directly call os.makedirs(): a subsequent patch will restrict the use of the method to setting up the sos_* directories in the archive root. File, directory and other object type add_* methods will use a new method that correctly handles symbolic links in intermediate path components. Signed-off-by: Bryn M. Reeves --- sos/archive.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index 5d99170f..ffa54036 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -361,11 +361,11 @@ class FileCacheArchive(Archive): return self._archive_root def makedirs(self, path, mode=0o700): - dest = self._check_path(path, P_DIR) - if not dest: - return + """Create path, including leading components. - self._makedirs(self.dest_path(path)) + Used by sos.sosreport to set up sos_* directories. + """ + os.makedirs(os.path.join(self._archive_root, path), mode=mode) self.log_debug("created directory at '%s' in FileCacheArchive '%s'" % (path, self._archive_root)) -- 2.17.1 From ca422720b74181b2433473428e29e90af59b3cf8 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 31 Aug 2018 12:55:51 +0100 Subject: [PATCH 3/4] [archive] normalise dest_dir in FileCacheArchive._check_path() Always set a valid dest_dir in _check_path() and do not assume that it can be obtained by splitting the path: in the case of a directory it is the unmodified 'dest' value. Signed-off-by: Bryn M. Reeves --- sos/archive.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sos/archive.py b/sos/archive.py index ffa54036..903cc672 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -191,7 +191,10 @@ class FileCacheArchive(Archive): copied now or `None` otherwise """ dest = dest or self.dest_path(src) - dest_dir = os.path.split(dest)[0] + if path_type == P_DIR: + dest_dir = dest + else: + dest_dir = os.path.split(dest)[0] if not dest_dir: return dest -- 2.17.1 From 75d759066e8ee0a469abc37f48f7bfcdfe8182b5 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 31 Aug 2018 12:58:01 +0100 Subject: [PATCH 4/4] [archive] replace FileCacheArchive._makedirs() The Python os.makedirs() implementation is inadequate for sos's needs: it will create leading directories given an intended path destination, but it is not able to reflect cases where some of the intermediate paths are actually symbolic links. Replace the use of os.makedirs() with a method that walks over the path, and either creates directories, or symbolic links (and their directory target) to better correspond with the content of the host file system. This fixes a situation where two plugins can race in the archive, leading to an exception in the plugin that runs last: - /foo/bar exists and is a link to /foo/bar.qux - One plugin attempts to collect /foo/bar - Another plugin attempts to collect a link /foo/qux -> /foo/bar/baz If the 2nd plugin happens to run first it will create the path "/foo/bar" as a _directory_ (via _makedirs()). Since the archive now checks for matching object types when a path collision occurs, the first plugin will arrive at add_dir(), note that "/foo/bar" is present and is not a symbolic link, and will raise an exception. Correct this by ensuring that whichever plugin executes first, the correct link/directory path structure will be set up. Signed-off-by: Bryn M. Reeves --- sos/archive.py | 72 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index 903cc672..11afa7aa 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -159,6 +159,67 @@ class FileCacheArchive(Archive): name = name.lstrip(os.sep) return (os.path.join(self._archive_root, name)) + def _make_leading_paths(self, src, mode=0o700): + """Create leading path components + + The standard python `os.makedirs` is insufficient for our + needs: it will only create directories, and ignores the fact + that some path components may be symbolic links. + """ + self.log_debug("Making leading paths for %s" % src) + root = self._archive_root + + def in_archive(path): + """Test whether path ``path`` is inside the archive. + """ + return path.startswith(os.path.join(root, "")) + + if not src.startswith("/"): + # Sos archive path (sos_commands, sos_logs etc.) + src_dir = src + else: + # Host file path + src_dir = src if os.path.isdir(src) else os.path.split(src)[0] + + # Build a list of path components in root-to-leaf order. + path = src_dir + path_comps = [] + while path != '/' and path != '': + head, tail = os.path.split(path) + path_comps.append(tail) + path = head + path_comps.reverse() + + abs_path = root + rel_path = "" + + # Check and create components as needed + for comp in path_comps: + abs_path = os.path.join(abs_path, comp) + + if not in_archive(abs_path): + continue + + rel_path = os.path.join(rel_path, comp) + src_path = os.path.join("/", rel_path) + + if not os.path.exists(abs_path): + self.log_debug("Making path %s" % abs_path) + if os.path.islink(src_path) and os.path.isdir(src_path): + target = os.readlink(src_path) + abs_target = os.path.join(root, target) + + # Recursively create leading components of target + self._make_leading_paths(abs_target, mode=mode) + + self.log_debug("Making symlink '%s' -> '%s'" % + (abs_path, target)) + target = os.path.relpath(target) + os.symlink(target, abs_path) + else: + self.log_debug("Making directory %s" % abs_path) + os.mkdir(abs_path, mode) + def _check_path(self, src, path_type, dest=None, force=False): """Check a new destination path in the archive. @@ -203,7 +264,8 @@ class FileCacheArchive(Archive): raise ValueError("path '%s' exists and is not a directory" % dest_dir) elif not os.path.exists(dest_dir): - self._makedirs(dest_dir) + src_dir = src if path_type == P_DIR else os.path.split(src)[0] + self._make_leading_paths(src_dir) def is_special(mode): return any([ @@ -326,10 +388,7 @@ class FileCacheArchive(Archive): def add_dir(self, path): with self._path_lock: - dest = self._check_path(path, P_DIR) - if not dest: - return - self.makedirs(path) + self._check_path(path, P_DIR) def add_node(self, path, mode, device): dest = self._check_path(path, P_NODE) @@ -347,9 +406,6 @@ class FileCacheArchive(Archive): raise e shutil.copystat(path, dest) - def _makedirs(self, path, mode=0o700): - os.makedirs(path, mode) - def name_max(self): if 'PC_NAME_MAX' in os.pathconf_names: pc_name_max = os.pathconf_names['PC_NAME_MAX'] -- 2.17.1 From 5d6228b85e174dee8abcc4c206a1e9034242c6c6 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 7 Sep 2018 12:06:34 -0400 Subject: [PATCH 1/6] [sosreport] ensure ThreadPool exceptions are raised The ThreadPoolExecutor does not raise exceptions to the parent thread immediately: it stores them in-line in the pool's results list, and raises them to the caller on acccess to that slot in the results iterator. Make sure that these exceptions are handled by iterating over all results and asserting that they are non-None (in practice, this code is never executed since the resulting raise will trap to an exception handler, but it is less confusing than a bare 'pass'). Signed-off-by: Bryn M. Reeves --- sos/sosreport.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sos/sosreport.py b/sos/sosreport.py index 80633966..44be75a1 100644 --- a/sos/sosreport.py +++ b/sos/sosreport.py @@ -1065,9 +1065,13 @@ class SoSReport(object): try: self.plugpool = ThreadPoolExecutor(self.opts.threads) # Pass the plugpool its own private copy of self.pluglist - self.plugpool.map(self._collect_plugin, list(self.pluglist), - chunksize=1) + results = self.plugpool.map(self._collect_plugin, + list(self.pluglist), chunksize=1) self.plugpool.shutdown(wait=True) + for res in results: + if not res: + self.soslog.debug("Unexpected plugin task result: %s" % + res) self.ui_log.info("") except KeyboardInterrupt: # We may not be at a newline when the user issues Ctrl-C -- 2.17.1 From 9aaba972bf6a42c33ea9bca80f07bfb880ba45a1 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 7 Sep 2018 12:15:10 -0400 Subject: [PATCH 2/6] [sosreport] trap directly to PDB in handle_exception() Now that plugins are run in a threadpool, it is not possible to defer the call to pdb.post_mortem() to the top-level exception handler in the main thread: this is due to the fact that in a pool, exceptions are caught and saved to be re-raised to thread calling the pool when results are returned. When the saved exception is raised to the top-level handler the execution context it relates to is gone: the backtrace and stack frame have been torn down and only very limited information is available from the exception frame. Instead, catch these exceptions _inside_ the thread pool context, and directly trap to the Python debugger. This allows plugin code to be debugged interactively with the full backtrace and with all access to local variables and the execution stack. In addition, this means that after the debugger has handled the exception it is possible to return to the run and continue until report completion. One side effect of this change is that the *-plugin-errors.txt file containng the backtrace is now written into the archive whether or not --debug is given. Signed-off-by: Bryn M. Reeves --- sos/sosreport.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sos/sosreport.py b/sos/sosreport.py index 44be75a1..77ae7161 100644 --- a/sos/sosreport.py +++ b/sos/sosreport.py @@ -30,6 +30,7 @@ from shutil import rmtree import tempfile import hashlib from concurrent.futures import ThreadPoolExecutor, TimeoutError +import pdb from sos import _sos as _ from sos import __version__ @@ -504,7 +505,13 @@ class SoSReport(object): def handle_exception(self, plugname=None, func=None): if self.raise_plugins or self.exit_process: - raise + # retrieve exception info for the current thread and stack. + (etype, val, tb) = sys.exc_info() + # we are NOT in interactive mode, print the exception... + traceback.print_exception(etype, val, tb, file=sys.stdout) + print_() + # ...then start the debugger in post-mortem mode. + pdb.post_mortem(tb) if plugname and func: self._log_plugin_exception(plugname, func) -- 2.17.1 From 0ea62d1ea57f41c1b75ccb83e69fdda386a7d280 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 7 Sep 2018 13:00:52 -0400 Subject: [PATCH 3/6] [Plugin] fix exception raise in Plugin._copy_dir() Use a naked 'raise' statement rather than raising the already caught exception in _copy_dir(), so that the original stack and backtrace are avaialable. Signed-off-by: Bryn M. Reeves --- sos/plugins/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py index 252de4d0..ac2c0bc8 100644 --- a/sos/plugins/__init__.py +++ b/sos/plugins/__init__.py @@ -401,7 +401,7 @@ class Plugin(object): msg = "Too many levels of symbolic links copying" self._log_error("_copy_dir: %s '%s'" % (msg, srcpath)) return - raise e + raise def _get_dest_for_srcpath(self, srcpath): if self.use_sysroot(): -- 2.17.1 From d84c1cd6dedf51a8ed7b1a511585c0ac2db0f083 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Wed, 5 Sep 2018 12:46:16 +0100 Subject: [PATCH 4/6] [archive] fix leading path creation Fix the creation of leading path components for both paths that contain intermediate components that are symbolic links (with both absolute and relative targets), and those that contain only directory components. Since symlinks may link to other files, and other symlinks, it is necessary to handle these paths recursively and to include any intermediate symlinked directories, or symlink targets in the set of paths added to the archive. Related: #1404 Signed-off-by: Bryn M. Reeves --- sos/archive.py | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index 11afa7aa..c256a01f 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -165,9 +165,24 @@ class FileCacheArchive(Archive): The standard python `os.makedirs` is insufficient for our needs: it will only create directories, and ignores the fact that some path components may be symbolic links. + + :param src: The source path in the host file system for which + leading components should be created, or the path + to an sos_* virtual directory inside the archive. + + Host paths must be absolute (initial '/'), and + sos_* directory paths must be a path relative to + the root of the archive. + + :param mode: An optional mode to be used when creating path + components. + :returns: A rewritten destination path in the case that one + or more symbolic links in intermediate components + of the path have altered the path destination. """ self.log_debug("Making leading paths for %s" % src) root = self._archive_root + dest = src def in_archive(path): """Test whether path ``path`` is inside the archive. @@ -191,34 +206,42 @@ class FileCacheArchive(Archive): path_comps.reverse() abs_path = root - rel_path = "" + src_path = "/" # Check and create components as needed for comp in path_comps: abs_path = os.path.join(abs_path, comp) + # Do not create components that are above the archive root. if not in_archive(abs_path): continue - rel_path = os.path.join(rel_path, comp) - src_path = os.path.join("/", rel_path) + src_path = os.path.join(src_path, comp) if not os.path.exists(abs_path): self.log_debug("Making path %s" % abs_path) if os.path.islink(src_path) and os.path.isdir(src_path): target = os.readlink(src_path) - abs_target = os.path.join(root, target) + + # The directory containing the source in the host fs, + # adjusted for the current level of path creation. + target_dir = os.path.split(src_path)[0] + + # The source path of the target in the host fs to be + # recursively copied. + target_src = os.path.join(target_dir, target) # Recursively create leading components of target - self._make_leading_paths(abs_target, mode=mode) + dest = self._make_leading_paths(target_src, mode=mode) + dest = os.path.normpath(dest) self.log_debug("Making symlink '%s' -> '%s'" % (abs_path, target)) - target = os.path.relpath(target) os.symlink(target, abs_path) else: self.log_debug("Making directory %s" % abs_path) os.mkdir(abs_path, mode) + return dest def _check_path(self, src, path_type, dest=None, force=False): """Check a new destination path in the archive. @@ -259,13 +282,17 @@ class FileCacheArchive(Archive): if not dest_dir: return dest + # Preserve destination basename for rewritten dest_dir + dest_name = os.path.split(src)[1] + # Check containing directory presence and path type if os.path.exists(dest_dir) and not os.path.isdir(dest_dir): raise ValueError("path '%s' exists and is not a directory" % dest_dir) elif not os.path.exists(dest_dir): src_dir = src if path_type == P_DIR else os.path.split(src)[0] - self._make_leading_paths(src_dir) + src_dir = self._make_leading_paths(src_dir) + dest = self.dest_path(os.path.join(src_dir, dest_name)) def is_special(mode): return any([ -- 2.17.1 From 322f4a517ae336cc1443f9a399a0d15d45ec48b9 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Fri, 7 Sep 2018 13:11:03 -0400 Subject: [PATCH 5/6] [archive] add link follow-up to FileCacheArchive.add_link() Creating a link may trigger further actions in the archive: if the link target is a regular file, we must copy that file into the archive, and if the target is a symbolic link, then we must create that link, and copy in the link target. Handle this by calling add_file() or (recursively) add_link() in order to create the missing pieces of the symlink chain. These operations must take place outside of the path lock since they do not modify the archive namespace and will call methods of the Archive object that will attempt to re-acquire this lock. Resolves: #1404 Signed-off-by: Bryn M. Reeves --- sos/archive.py | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index c256a01f..6db398fc 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -403,6 +403,7 @@ class FileCacheArchive(Archive): % (dest, self._archive_root)) def add_link(self, source, link_name): + self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source)) with self._path_lock: dest = self._check_path(link_name, P_LINK) if not dest: @@ -410,10 +411,41 @@ class FileCacheArchive(Archive): if not os.path.lexists(dest): os.symlink(source, dest) - self.log_debug("added symlink at '%s' to '%s' in archive '%s'" - % (dest, source, self._archive_root)) + self.log_debug("added symlink at '%s' to '%s' in archive '%s'" + % (dest, source, self._archive_root)) + + # Follow-up must be outside the path lock: we recurse into + # other monitor methods that will attempt to reacquire it. + + source_dir = os.path.dirname(link_name) + host_source = os.path.join(source_dir, source) + if not os.path.exists(self.dest_path(host_source)): + if os.path.islink(host_source): + link_dir = os.path.dirname(link_name) + link_name = os.path.normpath(os.path.join(link_dir, source)) + dest_dir = os.path.dirname(link_name) + source = os.path.join(dest_dir, os.readlink(link_name)) + source = os.path.relpath(source) + self.log_debug("Adding link %s -> %s for link follow up" % + (link_name, source)) + self.add_link(source, link_name) + elif os.path.isdir(host_source): + self.log_debug("Adding dir %s for link follow up" % source) + self.add_dir(host_source) + elif os.path.isfile(host_source): + self.log_debug("Adding file %s for link follow up" % source) + self.add_file(host_source) + else: + self.log_debug("No link follow up: source=%s link_name=%s" % + (source, link_name)) - def add_dir(self, path): + + def add_dir(self, path, copy=False): + """Create a directory in the archive. + + :param path: the path in the host file system to add + """ + # Establish path structure with self._path_lock: self._check_path(path, P_DIR) -- 2.17.1 From 6e79c4b4a4f32fa549708dbb8c8b9af73ab8ff61 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Mon, 10 Sep 2018 16:33:33 +0100 Subject: [PATCH 6/6] [archive] remove unused 'copy' arg from FileCacheArchive.add_dir() Signed-off-by: Bryn M. Reeves --- sos/archive.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index 6db398fc..4b30630b 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -439,8 +439,7 @@ class FileCacheArchive(Archive): self.log_debug("No link follow up: source=%s link_name=%s" % (source, link_name)) - - def add_dir(self, path, copy=False): + def add_dir(self, path): """Create a directory in the archive. :param path: the path in the host file system to add -- 2.17.1 From 919e8671a6ab9684d59525eb7f3607b3aab08ee1 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Tue, 11 Sep 2018 12:16:57 -0400 Subject: [PATCH] [archive] fix link rewriting logic in FileCacheArchive.add_link() When processing link follow up for an original symbolic link, the add_link() logic incorrectly used the _original_ host link name, rather than the to-be-created name when calculating relative path structures. If the prior link is at a greater or lesser level of directory nesting this will lead to broken relative links in the archive (one level too high or too low). In some cases (systemd) this behaviour was masked due to the fact that identically named links exist at multiple levels of the path hierarchy. Signed-off-by: Bryn M. Reeves --- sos/archive.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index 528cfa576..7a7717de7 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -417,27 +417,35 @@ def add_link(self, source, link_name): # Follow-up must be outside the path lock: we recurse into # other monitor methods that will attempt to reacquire it. + self.log_debug("Link follow up: source=%s link_name=%s dest=%s" % + (source, link_name, dest)) + source_dir = os.path.dirname(link_name) - host_source = os.path.join(source_dir, source) - if not os.path.exists(self.dest_path(host_source)): - if os.path.islink(host_source): - link_dir = os.path.dirname(link_name) - link_name = os.path.normpath(os.path.join(link_dir, source)) + host_path_name = os.path.normpath(os.path.join(source_dir, source)) + dest_path_name = self.dest_path(host_path_name) + + if not os.path.exists(dest_path_name): + if os.path.islink(host_path_name): + # Normalised path for the new link_name + link_name = host_path_name + # Containing directory for the new link dest_dir = os.path.dirname(link_name) - source = os.path.join(dest_dir, os.readlink(link_name)) - source = os.path.relpath(source) + # Relative source path of the new link + source = os.path.join(dest_dir, os.readlink(host_path_name)) + source = os.path.relpath(source, dest_dir) self.log_debug("Adding link %s -> %s for link follow up" % (link_name, source)) self.add_link(source, link_name) - elif os.path.isdir(host_source): + elif os.path.isdir(host_path_name): self.log_debug("Adding dir %s for link follow up" % source) - self.add_dir(host_source) - elif os.path.isfile(host_source): + self.add_dir(host_path_name) + elif os.path.isfile(host_path_name): self.log_debug("Adding file %s for link follow up" % source) - self.add_file(host_source) + self.add_file(host_path_name) else: self.log_debug("No link follow up: source=%s link_name=%s" % (source, link_name)) + self.log_debug("leaving add_link()") def add_dir(self, path): """Create a directory in the archive. From c065be9715dc845b6411a9a0b2d6171bbeb1c390 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Wed, 12 Sep 2018 12:02:33 +0100 Subject: [PATCH] [plugin] canonicalize link target path in Plugin._copy_symlink() Since we may be dealing with paths that contain intermediate symlinked directories, it is necessary to canonicalize the path for the link target in order to eliminate additional levels of symbolic links, and to calculate the correct relative path to use within the archive. Related: #1404 Signed-off-by: Bryn M. Reeves --- sos/plugins/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py index ac2c0bc8c..7d011a02c 100644 --- a/sos/plugins/__init__.py +++ b/sos/plugins/__init__.py @@ -353,7 +353,10 @@ def _copy_symlink(self, srcpath): absdest = os.path.normpath(dest) # adjust the target used inside the report to always be relative if os.path.isabs(linkdest): - reldest = os.path.relpath(linkdest, os.path.dirname(srcpath)) + # Canonicalize the link target path to avoid additional levels + # of symbolic links (that would affect the path nesting level). + realdir = os.path.realpath(os.path.dirname(srcpath)) + reldest = os.path.relpath(linkdest, start=realdir) # trim leading /sysroot if self.use_sysroot(): reldest = reldest[len(os.sep + os.pardir):] From 868966cd9dbb96ce3635d884e67e738b18658140 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Wed, 12 Sep 2018 16:11:07 +0100 Subject: [PATCH] [archive] canonicalise paths for link follow up Ensure that the canonical path is used when processing link follow up actions: the actual link path may contain one or more levels of symbolic links, leading to broken links if the link target path is assumed to be relative to the containing directory. Signed-off-by: Bryn M. Reeves --- sos/archive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sos/archive.py b/sos/archive.py index 7a7717de7..483d66f4f 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -421,7 +421,7 @@ def add_link(self, source, link_name): (source, link_name, dest)) source_dir = os.path.dirname(link_name) - host_path_name = os.path.normpath(os.path.join(source_dir, source)) + host_path_name = os.path.realpath(os.path.join(source_dir, source)) dest_path_name = self.dest_path(host_path_name) if not os.path.exists(dest_path_name): From 8e60e299cdfb0027d6b6ea845234ef54ae785186 Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Thu, 13 Sep 2018 16:14:12 +0100 Subject: [PATCH 1/2] [archive, plugin] avoid recursing on symbolic link loops It's possible that symlink loops exist in the host file system, either 'simple' ('a'->'a'), or indirect ('a'->'b'->'a'). We need to avoid recursing on these loops, to avoid exceeding the maximum link or recursion depths, but we should still represent these inodes as accurately as possible in the resulting archive. Detect loops in both the Plugin link handling code and in the new Archive link follow-up code by creating the first requested level of loop, and then skipping the recursive follow-up. This means that the looping links are still created in the archive so long as they are referenced in a copy spec but that we do not attempt to indefinitely recurse while collecting them. Resolves: #1430 Signed-off-by: Bryn M. Reeves --- sos/archive.py | 27 +++++++++++++++++++++++++++ sos/plugins/__init__.py | 20 +++++++++++++++----- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index 483d66f4..e5819432 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -424,6 +424,29 @@ class FileCacheArchive(Archive): host_path_name = os.path.realpath(os.path.join(source_dir, source)) dest_path_name = self.dest_path(host_path_name) + def is_loop(link_name, source): + """Return ``True`` if the symbolic link ``link_name`` is part + of a file system loop, or ``False`` otherwise. + """ + link_dir = os.path.dirname(link_name) + if not os.path.isabs(source): + source = os.path.realpath(os.path.join(link_dir, source)) + link_name = os.path.realpath(link_name) + + # Simple a -> a loop + if link_name == source: + return True + + # Find indirect loops (a->b-a) by stat()ing the first step + # in the symlink chain + try: + os.stat(link_name) + except OSError as e: + if e.errno == 40: + return True + raise + return False + if not os.path.exists(dest_path_name): if os.path.islink(host_path_name): # Normalised path for the new link_name @@ -433,6 +456,10 @@ class FileCacheArchive(Archive): # Relative source path of the new link source = os.path.join(dest_dir, os.readlink(host_path_name)) source = os.path.relpath(source, dest_dir) + if is_loop(link_name, source): + self.log_debug("Link '%s' - '%s' loops: skipping..." % + (link_name, source)) + return self.log_debug("Adding link %s -> %s for link follow up" % (link_name, source)) self.add_link(source, link_name) diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py index 7d011a02..7d2a8b2d 100644 --- a/sos/plugins/__init__.py +++ b/sos/plugins/__init__.py @@ -376,6 +376,21 @@ class Plugin(object): self._log_debug("link '%s' is a directory, skipping..." % linkdest) return + self.copied_files.append({'srcpath': srcpath, + 'dstpath': dstpath, + 'symlink': "yes", + 'pointsto': linkdest}) + + # Check for indirect symlink loops by stat()ing the next step + # in the link chain. + try: + os.stat(absdest) + except OSError as e: + if e.errno == 40: + self._log_debug("link '%s' is part of a file system " + "loop, skipping target..." % dstpath) + return + # copy the symlink target translating relative targets # to absolute paths to pass to _do_copy_path. self._log_debug("normalized link target '%s' as '%s'" @@ -388,11 +403,6 @@ class Plugin(object): self._log_debug("link '%s' points to itself, skipping target..." % linkdest) - self.copied_files.append({'srcpath': srcpath, - 'dstpath': dstpath, - 'symlink': "yes", - 'pointsto': linkdest}) - def _copy_dir(self, srcpath): try: for afile in os.listdir(srcpath): -- 2.17.1 From e108d7c03834446f8dac66ad69f5eade4f2c5fce Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Fri, 14 Sep 2018 10:42:07 +0200 Subject: [PATCH 2/2] [archive] fix and simplify directory destination rewriting Rewriting of the destination path by _make_leading_paths() only applies when creating intermediate path components that are a symbolic link. The final level of path creation must always be a directory, and the destination is always the absolute path to that directory. Always return the directory path when creating a new directory, and do not attempt to rewrite the destination at the top level in FileCacheArchive._check_path() since all intermediate links have already been handled inside _make_leading_paths() (i.e. the returned/rewritten destination is always equal to the path that was passed into the function). Resolves: #1432 Signed-off-by: Pavel Moravec Signed-off-by: Bryn M. Reeves --- sos/archive.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sos/archive.py b/sos/archive.py index e5819432..b02b75f7 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -241,6 +241,8 @@ class FileCacheArchive(Archive): else: self.log_debug("Making directory %s" % abs_path) os.mkdir(abs_path, mode) + dest = src_path + return dest def _check_path(self, src, path_type, dest=None, force=False): @@ -282,17 +284,13 @@ class FileCacheArchive(Archive): if not dest_dir: return dest - # Preserve destination basename for rewritten dest_dir - dest_name = os.path.split(src)[1] - # Check containing directory presence and path type if os.path.exists(dest_dir) and not os.path.isdir(dest_dir): raise ValueError("path '%s' exists and is not a directory" % dest_dir) elif not os.path.exists(dest_dir): src_dir = src if path_type == P_DIR else os.path.split(src)[0] - src_dir = self._make_leading_paths(src_dir) - dest = self.dest_path(os.path.join(src_dir, dest_name)) + self._make_leading_paths(src_dir) def is_special(mode): return any([ -- 2.17.1