Blame SOURCES/sos-bz1985037-cleaner-AD-users-obfuscation.patch

ba407d
From 7e471676fe41dab155a939c60446cc7b7dab773b Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Tue, 20 Jul 2021 11:09:29 -0400
ba407d
Subject: [PATCH] [username parser] Load usernames from `last` for LDAP users
ba407d
ba407d
AD/LDAP users are not reported into `lastlog` generally, however they
ba407d
are reported in `last`. Conversely, `last` does not report local users
ba407d
who have not logged in but still exist.
ba407d
ba407d
In order to obfuscate both kinds of users, we need to look at both
ba407d
sources.
ba407d
ba407d
For this, first allow parsers to specify multiple prep files. Second,
ba407d
update the username parser to search through all `lastlog` collections
ba407d
as well as the `last` collection.
ba407d
ba407d
Also includes a small update to the username parser's prep loading logic
ba407d
to ensure we are iterating over each username discovered only once.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/__init__.py                | 38 ++++++++++++++------------
ba407d
 sos/cleaner/parsers/__init__.py        |  2 +-
ba407d
 sos/cleaner/parsers/username_parser.py | 24 +++++++++++++---
ba407d
 3 files changed, 42 insertions(+), 22 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ba407d
index ca5f93e5..6aadfe79 100644
ba407d
--- a/sos/cleaner/__init__.py
ba407d
+++ b/sos/cleaner/__init__.py
ba407d
@@ -518,23 +518,27 @@ third party.
ba407d
             for _parser in self.parsers:
ba407d
                 if not _parser.prep_map_file:
ba407d
                     continue
ba407d
-                _arc_path = os.path.join(_arc_name, _parser.prep_map_file)
ba407d
-                try:
ba407d
-                    if is_dir:
ba407d
-                        _pfile = open(_arc_path, 'r')
ba407d
-                        content = _pfile.read()
ba407d
-                    else:
ba407d
-                        _pfile = archive.extractfile(_arc_path)
ba407d
-                        content = _pfile.read().decode('utf-8')
ba407d
-                    _pfile.close()
ba407d
-                    if isinstance(_parser, SoSUsernameParser):
ba407d
-                        _parser.load_usernames_into_map(content)
ba407d
-                    for line in content.splitlines():
ba407d
-                        if isinstance(_parser, SoSHostnameParser):
ba407d
-                            _parser.load_hostname_into_map(line)
ba407d
-                        self.obfuscate_line(line)
ba407d
-                except Exception as err:
ba407d
-                    self.log_debug("Could not prep %s: %s" % (_arc_path, err))
ba407d
+                if isinstance(_parser.prep_map_file, str):
ba407d
+                    _parser.prep_map_file = [_parser.prep_map_file]
ba407d
+                for parse_file in _parser.prep_map_file:
ba407d
+                    _arc_path = os.path.join(_arc_name, parse_file)
ba407d
+                    try:
ba407d
+                        if is_dir:
ba407d
+                            _pfile = open(_arc_path, 'r')
ba407d
+                            content = _pfile.read()
ba407d
+                        else:
ba407d
+                            _pfile = archive.extractfile(_arc_path)
ba407d
+                            content = _pfile.read().decode('utf-8')
ba407d
+                        _pfile.close()
ba407d
+                        if isinstance(_parser, SoSUsernameParser):
ba407d
+                            _parser.load_usernames_into_map(content)
ba407d
+                        for line in content.splitlines():
ba407d
+                            if isinstance(_parser, SoSHostnameParser):
ba407d
+                                _parser.load_hostname_into_map(line)
ba407d
+                            self.obfuscate_line(line)
ba407d
+                    except Exception as err:
ba407d
+                        self.log_debug("Could not prep %s: %s"
ba407d
+                                       % (_arc_path, err))
ba407d
 
ba407d
     def obfuscate_report(self, report):
ba407d
         """Individually handle each archive or directory we've discovered by
ba407d
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ba407d
index 3076db39..af6e375e 100644
ba407d
--- a/sos/cleaner/parsers/__init__.py
ba407d
+++ b/sos/cleaner/parsers/__init__.py
ba407d
@@ -50,7 +50,7 @@ class SoSCleanerParser():
ba407d
     skip_line_patterns = []
ba407d
     skip_files = []
ba407d
     map_file_key = 'unset'
ba407d
-    prep_map_file = 'unset'
ba407d
+    prep_map_file = []
ba407d
 
ba407d
     def __init__(self, conf_file=None):
ba407d
         # attempt to load previous run data into the mapping for the parser
ba407d
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
ba407d
index 96ce5f0c..b142e371 100644
ba407d
--- a/sos/cleaner/parsers/username_parser.py
ba407d
+++ b/sos/cleaner/parsers/username_parser.py
ba407d
@@ -25,13 +25,24 @@ class SoSUsernameParser(SoSCleanerParser
ba407d
 
ba407d
     name = 'Username Parser'
ba407d
     map_file_key = 'username_map'
ba407d
-    prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000'
ba407d
+    prep_map_file = [
ba407d
+        'sos_commands/login/lastlog_-u_1000-60000',
ba407d
+        'sos_commands/login/lastlog_-u_60001-65536',
ba407d
+        'sos_commands/login/lastlog_-u_65537-4294967295',
ba407d
+        # AD users will be reported here, but favor the lastlog files since
ba407d
+        # those will include local users who have not logged in
ba407d
+        'sos_commands/login/last'
ba407d
+    ]
ba407d
     regex_patterns = []
ba407d
     skip_list = [
ba407d
         'core',
ba407d
         'nobody',
ba407d
         'nfsnobody',
ba407d
-        'root'
ba407d
+        'shutdown',
ba407d
+        'reboot',
ba407d
+        'root',
ba407d
+        'ubuntu',
ba407d
+        'wtmp'
ba407d
     ]
ba407d
 
ba407d
     def __init__(self, conf_file=None, opt_names=None):
ba407d
@@ -44,11 +54,17 @@ class SoSUsernameParser(SoSCleanerParser):
ba407d
         """Since we don't get the list of usernames from a straight regex for
ba407d
         this parser, we need to override the initial parser prepping here.
ba407d
         """
ba407d
+        users = set()
ba407d
         for line in content.splitlines()[1:]:
ba407d
-            user = line.split()[0]
ba407d
+            try:
ba407d
+                user = line.split()[0]
ba407d
+            except Exception:
ba407d
+                continue
ba407d
             if user in self.skip_list:
ba407d
                 continue
ba407d
-            self.mapping.get(user)
ba407d
+            users.add(user)
ba407d
+        for each in users:
ba407d
+            self.mapping.get(each)
ba407d
 
ba407d
     def parse_line(self, line):
ba407d
         count = 0
ba407d
-- 
ba407d
2.31.1
ba407d