Blame SOURCES/sos-bz2122354-forbidden-path-efficient.patch

4b82b4
From 1dc3625fabea7331570f713fd1c87ac812d72d92 Mon Sep 17 00:00:00 2001
4b82b4
From: Jake Hunsaker <jhunsake@redhat.com>
4b82b4
Date: Wed, 18 May 2022 13:39:38 -0400
4b82b4
Subject: [PATCH] [Plugin] Make forbidden path checks more efficient
4b82b4
4b82b4
Forbidden path checks have up until now worked by taking a given file
4b82b4
path (potentially with globs), expanding that against all discovered
4b82b4
files that actually exist on the system, and then comparing a potential
4b82b4
collection path against that list.
4b82b4
4b82b4
While this works, and works reasonably fast for most scenarios, it isn't
4b82b4
very efficient and causes significant slow downs when a non-standard
4b82b4
configuration is in play - e.g. thousands of block devices which sos
4b82b4
would individually have to compare against tens of thousands of paths
4b82b4
for every path the `block` plugin wants to collect.
4b82b4
4b82b4
Improve this by first not expanding the forbidden path globs, but taking
4b82b4
them as distinct patterns, translating from shell-style (to maintain
4b82b4
historical precedent of using globs to specify paths to be skipped) to
4b82b4
python regex patterns as needed. Second, use `re` to handle our pattern
4b82b4
matching for comparison against the distinct patterns provided by a
4b82b4
plugin to skip.
4b82b4
4b82b4
Closes: #2938
4b82b4
4b82b4
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
4b82b4
---
4b82b4
 sos/report/plugins/__init__.py | 20 +++++++++-----------
4b82b4
 sos/report/plugins/cgroups.py  |  6 ++----
4b82b4
 sos/report/plugins/pulpcore.py |  2 +-
4b82b4
 sos/report/plugins/rhui.py     |  2 +-
4b82b4
 4 files changed, 13 insertions(+), 17 deletions(-)
4b82b4
4b82b4
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
4b82b4
index 2a42e6b0a..ba1397a8a 100644
4b82b4
--- a/sos/report/plugins/__init__.py
4b82b4
+++ b/sos/report/plugins/__init__.py
4b82b4
@@ -46,11 +46,6 @@ def _mangle_command(command, name_max):
4b82b4
     return mangledname
4b82b4
 
4b82b4
 
4b82b4
-def _path_in_path_list(path, path_list):
4b82b4
-    return any((p == path or path.startswith(os.path.abspath(p)+os.sep)
4b82b4
-                for p in path_list))
4b82b4
-
4b82b4
-
4b82b4
 def _node_type(st):
4b82b4
     """ return a string indicating the type of special node represented by
4b82b4
     the stat buffer st (block, character, fifo, socket).
4b82b4
@@ -1407,7 +1402,9 @@ def _get_dest_for_srcpath(self, srcpath):
4b82b4
         return None
4b82b4
 
4b82b4
     def _is_forbidden_path(self, path):
4b82b4
-        return _path_in_path_list(path, self.forbidden_paths)
4b82b4
+        return any(
4b82b4
+            re.match(forbid, path) for forbid in self.forbidden_paths
4b82b4
+        )
4b82b4
 
4b82b4
     def _is_policy_forbidden_path(self, path):
4b82b4
         return any([
4b82b4
@@ -1495,14 +1492,12 @@ def _do_copy_path(self, srcpath, dest=None):
4b82b4
             'symlink': "no"
4b82b4
         })
4b82b4
 
4b82b4
-    def add_forbidden_path(self, forbidden, recursive=False):
4b82b4
+    def add_forbidden_path(self, forbidden):
4b82b4
         """Specify a path, or list of paths, to not copy, even if it's part of
4b82b4
         an ``add_copy_spec()`` call
4b82b4
 
4b82b4
         :param forbidden: A filepath to forbid collection from
4b82b4
         :type forbidden: ``str`` or a ``list`` of strings
4b82b4
-
4b82b4
-        :param recursive: Should forbidden glob be applied recursively
4b82b4
         """
4b82b4
         if isinstance(forbidden, str):
4b82b4
             forbidden = [forbidden]
4b82b4
@@ -1512,8 +1507,11 @@ def add_forbidden_path(self, forbidden, recursive=False):
4b82b4
 
4b82b4
         for forbid in forbidden:
4b82b4
             self._log_info("adding forbidden path '%s'" % forbid)
4b82b4
-            for path in glob.glob(forbid, recursive=recursive):
4b82b4
-                self.forbidden_paths.append(path)
4b82b4
+            if "*" in forbid:
4b82b4
+                # calling translate() here on a dir-level path will break the
4b82b4
+                # re.match() call during path comparison
4b82b4
+                forbid = fnmatch.translate(forbid)
4b82b4
+            self.forbidden_paths.append(forbid)
4b82b4
 
4b82b4
     def set_option(self, optionname, value):
4b82b4
         """Set the named option to value. Ensure the original type of the
4b82b4
diff --git a/sos/report/plugins/pulpcore.py b/sos/report/plugins/pulpcore.py
4b82b4
index 6c4237cae..f6bc194c7 100644
4b82b4
--- a/sos/report/plugins/pulpcore.py
4b82b4
+++ b/sos/report/plugins/pulpcore.py
4b82b4
@@ -89,7 +89,7 @@ class PulpCore(Plugin, IndependentPlugin
4b82b4
             "/etc/pki/pulp/*"
4b82b4
         ])
4b82b4
         # skip collecting certificate keys
4b82b4
-        self.add_forbidden_path("/etc/pki/pulp/**/*.key", recursive=True)
4b82b4
+        self.add_forbidden_path("/etc/pki/pulp/**/*.key")
4b82b4
 
4b82b4
         self.add_cmd_output("rq info -u redis://localhost:6379/8",
4b82b4
                             env={"LC_ALL": "en_US.UTF-8"},
4b82b4
diff --git a/sos/report/plugins/rhui.py b/sos/report/plugins/rhui.py
4b82b4
index add024613..8063fd51c 100644
4b82b4
--- a/sos/report/plugins/rhui.py
4b82b4
+++ b/sos/report/plugins/rhui.py
4b82b4
@@ -30,7 +30,7 @@ def setup(self):
4b82b4
             "/var/log/rhui/*",
4b82b4
         ])
4b82b4
         # skip collecting certificate keys
4b82b4
-        self.add_forbidden_path("/etc/pki/rhui/**/*.key", recursive=True)
4b82b4
+        self.add_forbidden_path("/etc/pki/rhui/**/*.key")
4b82b4
 
4b82b4
         # call rhui-manager commands with 1m timeout and
4b82b4
         # with an env. variable ensuring that "RHUI Username:"