Blob Blame History Raw
From 21101d80610c43a7c00de3dfaa5ff043d1f8324a Mon Sep 17 00:00:00 2001
From: Pavel Moravec <pmoravec@redhat.com>
Date: Thu, 27 Oct 2022 18:00:28 +0200
Subject: [PATCH] [cleaner] Apply compile_regexes after a regular parse line

Hostname parser treats strings like 'host.domain.com' with precompiled
domain 'domain.com' in a wrong way. It first obfuscates the domain while
subsequent _parse_line skips host obfuscation.

Calling _parse_line before _parse_line_with_compiled_regexes does clean
both the host name and the domain name well.

Adding a unittest with a reproducer.

Resolves: #3054

Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
---
 sos/cleaner/parsers/hostname_parser.py | 19 +++++++++++++++++++
 tests/unittests/cleaner_tests.py       |  7 +++++++
 2 files changed, 26 insertions(+)

diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
index debdf182..07eb40f6 100644
--- a/sos/cleaner/parsers/hostname_parser.py
+++ b/sos/cleaner/parsers/hostname_parser.py
@@ -8,6 +8,7 @@
 #
 # See the LICENSE file in the source distribution for further information.
 
+import re
 from sos.cleaner.parsers import SoSCleanerParser
 from sos.cleaner.mappings.hostname_map import SoSHostnameMap
 
@@ -29,6 +30,24 @@ class SoSHostnameParser(SoSCleanerParser):
         self.load_short_names_from_mapping()
         self.mapping.set_initial_counts()
 
+    def parse_line(self, line):
+        """This will be called for every line in every file we process, so that
+        every parser has a chance to scrub everything.
+
+        We are overriding parent method since we need to swap ordering of
+        _parse_line_with_compiled_regexes and _parse_line calls.
+        """
+        count = 0
+        for skip_pattern in self.skip_line_patterns:
+            if re.match(skip_pattern, line, re.I):
+                return line, count
+        line, _count = self._parse_line(line)
+        count += _count
+        if self.compile_regexes:
+            line, _rcount = self._parse_line_with_compiled_regexes(line)
+            count += _rcount
+        return line, count
+
     def load_short_names_from_mapping(self):
         """When we load the mapping file into the hostname map, we have to do
         some dancing to get those loaded properly into the "intermediate" dicts
diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py
index d27481c1..9759b38a 100644
--- a/tests/unittests/cleaner_tests.py
+++ b/tests/unittests/cleaner_tests.py
@@ -171,6 +171,13 @@ class CleanerParserTests(unittest.TestCa
         _test = self.host_parser.parse_line(line)[0]
         self.assertNotEqual(line, _test)
 
+    def test_obfuscate_whole_fqdn_for_given_domainname(self):
+        self.host_parser.load_hostname_into_map('sostestdomain.domain')
+        line = 'let obfuscate soshost.sostestdomain.domain'
+        _test = self.host_parser.parse_line(line)[0]
+        self.assertFalse('soshost' in _test)
+        self.assertFalse('sostestdomain' in _test)
+
     def test_keyword_parser_valid_line(self):
         line = 'this is my foobar test line'
         _test = self.kw_parser.parse_line(line)[0]
-- 
2.37.3