diff --git a/SOURCES/sos-bz2023867-cleaner-hostnames-improvements.patch b/SOURCES/sos-bz2023867-cleaner-hostnames-improvements.patch
new file mode 100644
index 0000000..148adb6
--- /dev/null
+++ b/SOURCES/sos-bz2023867-cleaner-hostnames-improvements.patch
@@ -0,0 +1,1389 @@
+From decd39b7799a0579ea085b0da0728b6eabd49b38 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 1 Sep 2021 00:28:58 -0400
+Subject: [PATCH] [clean] Provide archive abstractions to obfuscate more than
+ sos archives
+
+This commit removes the restriction imposed on `sos clean` since its
+introduction in sos-4.0 to only work against known sos report archives
+or build directories. This is because there has been interest in using
+the obfuscation bits of sos in other data-collector projects.
+
+The `SoSObfuscationArchive()` class has been revamped to now be an
+abstraction for different types of archives, and the cleaner logic has
+been updated to leverage this new abstraction rather than assuming we're
+working on an sos archive.
+
+Abstractions are added for our own native use cases - that being `sos
+report` and `sos collect` for at-runtime obfuscation, as well as
+standalone archives previously generated. Further generic abstractions
+are available for plain directories and tarballs however these will not
+provide the same level of coverage as fully supported archive types, as
+is noted in the manpage for sos-clean.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ man/en/sos-clean.1                            |  25 ++
+ sos/cleaner/__init__.py                       | 308 +++++++++---------
+ .../__init__.py}                              |  80 ++++-
+ sos/cleaner/archives/generic.py               |  52 +++
+ sos/cleaner/archives/sos.py                   | 106 ++++++
+ sos/cleaner/parsers/__init__.py               |   6 -
+ sos/cleaner/parsers/hostname_parser.py        |   1 -
+ sos/cleaner/parsers/ip_parser.py              |   1 -
+ sos/cleaner/parsers/keyword_parser.py         |   1 -
+ sos/cleaner/parsers/mac_parser.py             |   1 -
+ sos/cleaner/parsers/username_parser.py        |   8 -
+ tests/cleaner_tests/existing_archive.py       |   7 +
+ tests/cleaner_tests/full_report_run.py        |   3 +
+ tests/cleaner_tests/report_with_mask.py       |   3 +
+ 14 files changed, 423 insertions(+), 179 deletions(-)
+ rename sos/cleaner/{obfuscation_archive.py => archives/__init__.py} (81%)
+ create mode 100644 sos/cleaner/archives/generic.py
+ create mode 100644 sos/cleaner/archives/sos.py
+
+diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
+index b77bc63c..54026713 100644
+--- a/man/en/sos-clean.1
++++ b/man/en/sos-clean.1
+@@ -10,6 +10,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
+     [\-\-jobs]
+     [\-\-no-update]
+     [\-\-keep-binary-files]
++    [\-\-archive-type]
+ 
+ .SH DESCRIPTION
+ \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
+@@ -88,6 +89,30 @@ Users should review any archive that keeps binary files in place before sending
+ a third party.
+ 
+ Default: False (remove encountered binary files)
++.TP
++.B \-\-archive-type TYPE
++Specify the type of archive that TARGET was generated as.
++When sos inspects a TARGET archive, it tries to identify what type of archive it is.
++For example, it may be a report generated by \fBsos report\fR, or a collection of those
++reports generated by \fBsos collect\fR, which require separate approaches.
++
++This option may be useful if a given TARGET archive is known to be of a specific type,
++but due to unknown reasons or some malformed/missing information in the archive directly,
++that is not properly identified by sos.
++
++The following are accepted values for this option:
++
++    \fBauto\fR          Automatically detect the archive type
++    \fBreport\fR        An archive generated by \fBsos report\fR
++    \fBcollect\fR       An archive generated by \fBsos collect\fR
++
++The following may also be used, however note that these do not attempt to pre-load
++any information from the archives into the parsers. This means that, among other limitations,
++items like host and domain names may not be obfuscated unless an obfuscated mapping already exists
++on the system from a previous execution.
++
++    \fBdata-dir\fR      A plain directory on the filesystem.
++    \fBtarball\fR       A generic tar archive not associated with any known tool
+ 
+ .SH SEE ALSO
+ .BR sos (1)
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index 6aadfe79..6d2eb483 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -12,9 +12,7 @@ import hashlib
+ import json
+ import logging
+ import os
+-import re
+ import shutil
+-import tarfile
+ import tempfile
+ 
+ from concurrent.futures import ThreadPoolExecutor
+@@ -27,7 +25,10 @@ from sos.cleaner.parsers.mac_parser import SoSMacParser
+ from sos.cleaner.parsers.hostname_parser import SoSHostnameParser
+ from sos.cleaner.parsers.keyword_parser import SoSKeywordParser
+ from sos.cleaner.parsers.username_parser import SoSUsernameParser
+-from sos.cleaner.obfuscation_archive import SoSObfuscationArchive
++from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
++                                      SoSCollectorArchive,
++                                      SoSCollectorDirectory)
++from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
+ from sos.utilities import get_human_readable
+ from textwrap import fill
+ 
+@@ -41,6 +42,7 @@ class SoSCleaner(SoSComponent):
+     desc = "Obfuscate sensitive networking information in a report"
+ 
+     arg_defaults = {
++        'archive_type': 'auto',
+         'domains': [],
+         'jobs': 4,
+         'keywords': [],
+@@ -70,6 +72,7 @@ class SoSCleaner(SoSComponent):
+             self.from_cmdline = False
+             if not hasattr(self.opts, 'jobs'):
+                 self.opts.jobs = 4
++            self.opts.archive_type = 'auto'
+             self.soslog = logging.getLogger('sos')
+             self.ui_log = logging.getLogger('sos_ui')
+             # create the tmp subdir here to avoid a potential race condition
+@@ -92,6 +95,17 @@ class SoSCleaner(SoSComponent):
+             SoSUsernameParser(self.cleaner_mapping, self.opts.usernames)
+         ]
+ 
++        self.archive_types = [
++            SoSReportDirectory,
++            SoSReportArchive,
++            SoSCollectorDirectory,
++            SoSCollectorArchive,
++            # make sure these two are always last as they are fallbacks
++            DataDirArchive,
++            TarballArchive
++        ]
++        self.nested_archive = None
++
+         self.log_info("Cleaner initialized. From cmdline: %s"
+                       % self.from_cmdline)
+ 
+@@ -178,6 +192,11 @@ third party.
+         )
+         clean_grp.add_argument('target', metavar='TARGET',
+                                help='The directory or archive to obfuscate')
++        clean_grp.add_argument('--archive-type', default='auto',
++                               choices=['auto', 'report', 'collect',
++                                        'data-dir', 'tarball'],
++                               help=('Specify what kind of archive the target '
++                                     'was generated as'))
+         clean_grp.add_argument('--domains', action='extend', default=[],
+                                help='List of domain names to obfuscate')
+         clean_grp.add_argument('-j', '--jobs', default=4, type=int,
+@@ -218,59 +237,28 @@ third party.
+ 
+         In the event the target path is not an archive, abort.
+         """
+-        if not tarfile.is_tarfile(self.opts.target):
+-            self.ui_log.error(
+-                "Invalid target: must be directory or tar archive"
+-            )
+-            self._exit(1)
+-
+-        archive = tarfile.open(self.opts.target)
+-        self.arc_name = self.opts.target.split('/')[-1].split('.')[:-2][0]
+-
+-        try:
+-            archive.getmember(os.path.join(self.arc_name, 'sos_logs'))
+-        except Exception:
+-            # this is not an sos archive
+-            self.ui_log.error("Invalid target: not an sos archive")
+-            self._exit(1)
+-
+-        # see if there are archives within this archive
+-        nested_archives = []
+-        for _file in archive.getmembers():
+-            if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
+-                    (_file.name.endswith(('.md5', '.sha256')))):
+-                nested_archives.append(_file.name.split('/')[-1])
+-
+-        if nested_archives:
+-            self.log_info("Found nested archive(s), extracting top level")
+-            nested_path = self.extract_archive(archive)
+-            for arc_file in os.listdir(nested_path):
+-                if re.match('sosreport.*.tar.*', arc_file):
+-                    if arc_file.endswith(('.md5', '.sha256')):
+-                        continue
+-                    self.report_paths.append(os.path.join(nested_path,
+-                                                          arc_file))
+-            # add the toplevel extracted archive
+-            self.report_paths.append(nested_path)
++        _arc = None
++        if self.opts.archive_type != 'auto':
++            check_type = self.opts.archive_type.replace('-', '_')
++            for archive in self.archive_types:
++                if archive.type_name == check_type:
++                    _arc = archive(self.opts.target, self.tmpdir)
+         else:
+-            self.report_paths.append(self.opts.target)
+-
+-        archive.close()
+-
+-    def extract_archive(self, archive):
+-        """Extract an archive into our tmpdir so that we may inspect it or
+-        iterate through its contents for obfuscation
+-
+-        Positional arguments:
+-
+-            :param archive:     An open TarFile object for the archive
+-
+-        """
+-        if not isinstance(archive, tarfile.TarFile):
+-            archive = tarfile.open(archive)
+-        path = os.path.join(self.tmpdir, 'cleaner')
+-        archive.extractall(path)
+-        return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
++            for arc in self.archive_types:
++                if arc.check_is_type(self.opts.target):
++                    _arc = arc(self.opts.target, self.tmpdir)
++                    break
++        if not _arc:
++            return
++        self.report_paths.append(_arc)
++        if _arc.is_nested:
++            self.report_paths.extend(_arc.get_nested_archives())
++            # We need to preserve the top level archive until all
++            # nested archives are processed
++            self.report_paths.remove(_arc)
++            self.nested_archive = _arc
++        if self.nested_archive:
++            self.nested_archive.ui_name = self.nested_archive.description
+ 
+     def execute(self):
+         """SoSCleaner will begin by inspecting the TARGET option to determine
+@@ -283,6 +271,7 @@ third party.
+         be unpacked, cleaned, and repacked and the final top-level archive will
+         then be repacked as well.
+         """
++        self.arc_name = self.opts.target.split('/')[-1].split('.tar')[0]
+         if self.from_cmdline:
+             self.print_disclaimer()
+         self.report_paths = []
+@@ -290,23 +279,11 @@ third party.
+             self.ui_log.error("Invalid target: no such file or directory %s"
+                               % self.opts.target)
+             self._exit(1)
+-        if os.path.isdir(self.opts.target):
+-            self.arc_name = self.opts.target.split('/')[-1]
+-            for _file in os.listdir(self.opts.target):
+-                if _file == 'sos_logs':
+-                    self.report_paths.append(self.opts.target)
+-                if (_file.startswith('sosreport') and
+-                   (_file.endswith(".tar.gz") or _file.endswith(".tar.xz"))):
+-                    self.report_paths.append(os.path.join(self.opts.target,
+-                                                          _file))
+-            if not self.report_paths:
+-                self.ui_log.error("Invalid target: not an sos directory")
+-                self._exit(1)
+-        else:
+-            self.inspect_target_archive()
++
++        self.inspect_target_archive()
+ 
+         if not self.report_paths:
+-            self.ui_log.error("No valid sos archives or directories found\n")
++            self.ui_log.error("No valid archives or directories found\n")
+             self._exit(1)
+ 
+         # we have at least one valid target to obfuscate
+@@ -334,33 +311,7 @@ third party.
+ 
+         final_path = None
+         if len(self.completed_reports) > 1:
+-            # we have an archive of archives, so repack the obfuscated tarball
+-            arc_name = self.arc_name + '-obfuscated'
+-            self.setup_archive(name=arc_name)
+-            for arc in self.completed_reports:
+-                if arc.is_tarfile:
+-                    arc_dest = self.obfuscate_string(
+-                        arc.final_archive_path.split('/')[-1]
+-                    )
+-                    self.archive.add_file(arc.final_archive_path,
+-                                          dest=arc_dest)
+-                    checksum = self.get_new_checksum(arc.final_archive_path)
+-                    if checksum is not None:
+-                        dname = self.obfuscate_string(
+-                            "checksums/%s.%s" % (arc_dest, self.hash_name)
+-                        )
+-                        self.archive.add_string(checksum, dest=dname)
+-                else:
+-                    for dirname, dirs, files in os.walk(arc.archive_path):
+-                        for filename in files:
+-                            if filename.startswith('sosreport'):
+-                                continue
+-                            fname = os.path.join(dirname, filename)
+-                            dnm = self.obfuscate_string(
+-                                fname.split(arc.archive_name)[-1].lstrip('/')
+-                            )
+-                            self.archive.add_file(fname, dest=dnm)
+-            arc_path = self.archive.finalize(self.opts.compression_type)
++            arc_path = self.rebuild_nested_archive()
+         else:
+             arc = self.completed_reports[0]
+             arc_path = arc.final_archive_path
+@@ -371,8 +322,7 @@ third party.
+                 )
+                 with open(os.path.join(self.sys_tmp, chksum_name), 'w') as cf:
+                     cf.write(checksum)
+-
+-        self.write_cleaner_log()
++            self.write_cleaner_log()
+ 
+         final_path = self.obfuscate_string(
+             os.path.join(self.sys_tmp, arc_path.split('/')[-1])
+@@ -393,6 +343,30 @@ third party.
+ 
+         self.cleanup()
+ 
++    def rebuild_nested_archive(self):
++        """Handles repacking the nested tarball, now containing only obfuscated
++        copies of the reports, log files, manifest, etc...
++        """
++        # we have an archive of archives, so repack the obfuscated tarball
++        arc_name = self.arc_name + '-obfuscated'
++        self.setup_archive(name=arc_name)
++        for archive in self.completed_reports:
++            arc_dest = archive.final_archive_path.split('/')[-1]
++            checksum = self.get_new_checksum(archive.final_archive_path)
++            if checksum is not None:
++                dname = "checksums/%s.%s" % (arc_dest, self.hash_name)
++                self.archive.add_string(checksum, dest=dname)
++        for dirn, dirs, files in os.walk(self.nested_archive.extracted_path):
++            for filename in files:
++                fname = os.path.join(dirn, filename)
++                dname = fname.split(self.nested_archive.extracted_path)[-1]
++                dname = dname.lstrip('/')
++                self.archive.add_file(fname, dest=dname)
++                # remove it now so we don't balloon our fs space needs
++                os.remove(fname)
++        self.write_cleaner_log(archive=True)
++        return self.archive.finalize(self.opts.compression_type)
++
+     def compile_mapping_dict(self):
+         """Build a dict that contains each parser's map as a key, with the
+         contents as that key's value. This will then be written to disk in the
+@@ -441,7 +415,7 @@ third party.
+                 self.log_error("Could not update mapping config file: %s"
+                                % err)
+ 
+-    def write_cleaner_log(self):
++    def write_cleaner_log(self, archive=False):
+         """When invoked via the command line, the logging from SoSCleaner will
+         not be added to the archive(s) it processes, so we need to write it
+         separately to disk
+@@ -454,6 +428,10 @@ third party.
+             for line in self.sos_log_file.readlines():
+                 logfile.write(line)
+ 
++        if archive:
++            self.obfuscate_file(log_name)
++            self.archive.add_file(log_name, dest="sos_logs/cleaner.log")
++
+     def get_new_checksum(self, archive_path):
+         """Calculate a new checksum for the obfuscated archive, as the previous
+         checksum will no longer be valid
+@@ -481,11 +459,11 @@ third party.
+         be obfuscated concurrently.
+         """
+         try:
+-            if len(self.report_paths) > 1:
+-                msg = ("Found %s total reports to obfuscate, processing up to "
+-                       "%s concurrently\n"
+-                       % (len(self.report_paths), self.opts.jobs))
+-                self.ui_log.info(msg)
++            msg = (
++                "Found %s total reports to obfuscate, processing up to %s "
++                "concurrently\n" % (len(self.report_paths), self.opts.jobs)
++            )
++            self.ui_log.info(msg)
+             if self.opts.keep_binary_files:
+                 self.ui_log.warning(
+                     "WARNING: binary files that potentially contain sensitive "
+@@ -494,53 +472,67 @@ third party.
+             pool = ThreadPoolExecutor(self.opts.jobs)
+             pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
+             pool.shutdown(wait=True)
++            # finally, obfuscate the nested archive if one exists
++            if self.nested_archive:
++                self._replace_obfuscated_archives()
++                self.obfuscate_report(self.nested_archive)
+         except KeyboardInterrupt:
+             self.ui_log.info("Exiting on user cancel")
+             os._exit(130)
+ 
++    def _replace_obfuscated_archives(self):
++        """When we have a nested archive, we need to rebuild the original
++        archive, which entails replacing the existing archives with their
++        obfuscated counterparts
++        """
++        for archive in self.completed_reports:
++            os.remove(archive.archive_path)
++            dest = self.nested_archive.extracted_path
++            tarball = archive.final_archive_path.split('/')[-1]
++            dest_name = os.path.join(dest, tarball)
++            shutil.move(archive.final_archive_path, dest)
++            archive.final_archive_path = dest_name
++
+     def preload_all_archives_into_maps(self):
+         """Before doing the actual obfuscation, if we have multiple archives
+         to obfuscate then we need to preload each of them into the mappings
+         to ensure that node1 is obfuscated in node2 as well as node2 being
+         obfuscated in node1's archive.
+         """
+-        self.log_info("Pre-loading multiple archives into obfuscation maps")
++        self.log_info("Pre-loading all archives into obfuscation maps")
+         for _arc in self.report_paths:
+-            is_dir = os.path.isdir(_arc)
+-            if is_dir:
+-                _arc_name = _arc
+-            else:
+-                archive = tarfile.open(_arc)
+-                _arc_name = _arc.split('/')[-1].split('.tar')[0]
+-            # for each parser, load the map_prep_file into memory, and then
+-            # send that for obfuscation. We don't actually obfuscate the file
+-            # here, do that in the normal archive loop
+             for _parser in self.parsers:
+-                if not _parser.prep_map_file:
++                try:
++                    pfile = _arc.prep_files[_parser.name.lower().split()[0]]
++                    if not pfile:
++                        continue
++                except (IndexError, KeyError):
+                     continue
+-                if isinstance(_parser.prep_map_file, str):
+-                    _parser.prep_map_file = [_parser.prep_map_file]
+-                for parse_file in _parser.prep_map_file:
+-                    _arc_path = os.path.join(_arc_name, parse_file)
++                if isinstance(pfile, str):
++                    pfile = [pfile]
++                for parse_file in pfile:
++                    self.log_debug("Attempting to load %s" % parse_file)
+                     try:
+-                        if is_dir:
+-                            _pfile = open(_arc_path, 'r')
+-                            content = _pfile.read()
+-                        else:
+-                            _pfile = archive.extractfile(_arc_path)
+-                            content = _pfile.read().decode('utf-8')
+-                        _pfile.close()
++                        content = _arc.get_file_content(parse_file)
++                        if not content:
++                            continue
+                         if isinstance(_parser, SoSUsernameParser):
+                             _parser.load_usernames_into_map(content)
+-                        for line in content.splitlines():
+-                            if isinstance(_parser, SoSHostnameParser):
+-                                _parser.load_hostname_into_map(line)
+-                            self.obfuscate_line(line)
++                        elif isinstance(_parser, SoSHostnameParser):
++                            _parser.load_hostname_into_map(
++                                content.splitlines()[0]
++                            )
++                        else:
++                            for line in content.splitlines():
++                                self.obfuscate_line(line)
+                     except Exception as err:
+-                        self.log_debug("Could not prep %s: %s"
+-                                       % (_arc_path, err))
++                        self.log_info(
++                            "Could not prepare %s from %s (archive: %s): %s"
++                            % (_parser.name, parse_file, _arc.archive_name,
++                               err)
++                        )
+ 
+-    def obfuscate_report(self, report):
++    def obfuscate_report(self, archive):
+         """Individually handle each archive or directory we've discovered by
+         running through each file therein.
+ 
+@@ -549,17 +541,12 @@ third party.
+             :param report str:      Filepath to the directory or archive
+         """
+         try:
+-            if not os.access(report, os.W_OK):
+-                msg = "Insufficient permissions on %s" % report
+-                self.log_info(msg)
+-                self.ui_log.error(msg)
+-                return
+-
+-            archive = SoSObfuscationArchive(report, self.tmpdir)
+             arc_md = self.cleaner_md.add_section(archive.archive_name)
+             start_time = datetime.now()
+             arc_md.add_field('start_time', start_time)
+-            archive.extract()
++            # don't double extract nested archives
++            if not archive.is_extracted:
++                archive.extract()
+             archive.report_msg("Beginning obfuscation...")
+ 
+             file_list = archive.get_file_list()
+@@ -586,27 +573,28 @@ third party.
+                               caller=archive.archive_name)
+ 
+             # if the archive was already a tarball, repack it
+-            method = archive.get_compression()
+-            if method:
+-                archive.report_msg("Re-compressing...")
+-                try:
+-                    archive.rename_top_dir(
+-                        self.obfuscate_string(archive.archive_name)
+-                    )
+-                    archive.compress(method)
+-                except Exception as err:
+-                    self.log_debug("Archive %s failed to compress: %s"
+-                                   % (archive.archive_name, err))
+-                    archive.report_msg("Failed to re-compress archive: %s"
+-                                       % err)
+-                    return
++            if not archive.is_nested:
++                method = archive.get_compression()
++                if method:
++                    archive.report_msg("Re-compressing...")
++                    try:
++                        archive.rename_top_dir(
++                            self.obfuscate_string(archive.archive_name)
++                        )
++                        archive.compress(method)
++                    except Exception as err:
++                        self.log_debug("Archive %s failed to compress: %s"
++                                       % (archive.archive_name, err))
++                        archive.report_msg("Failed to re-compress archive: %s"
++                                           % err)
++                        return
++                self.completed_reports.append(archive)
+ 
+             end_time = datetime.now()
+             arc_md.add_field('end_time', end_time)
+             arc_md.add_field('run_time', end_time - start_time)
+             arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
+             arc_md.add_field('total_substitutions', archive.total_sub_count)
+-            self.completed_reports.append(archive)
+             rmsg = ''
+             if archive.removed_file_count:
+                 rmsg = " [removed %s unprocessable files]"
+@@ -615,7 +603,7 @@ third party.
+ 
+         except Exception as err:
+             self.ui_log.info("Exception while processing %s: %s"
+-                             % (report, err))
++                             % (archive.archive_name, err))
+ 
+     def obfuscate_file(self, filename, short_name=None, arc_name=None):
+         """Obfuscate and individual file, line by line.
+@@ -635,6 +623,8 @@ third party.
+             # the requested file doesn't exist in the archive
+             return
+         subs = 0
++        if not short_name:
++            short_name = filename.split('/')[-1]
+         if not os.path.islink(filename):
+             # don't run the obfuscation on the link, but on the actual file
+             # at some other point.
+@@ -745,3 +735,5 @@ third party.
+         for parser in self.parsers:
+             _sec = parse_sec.add_section(parser.name.replace(' ', '_').lower())
+             _sec.add_field('entries', len(parser.mapping.dataset.keys()))
++
++# vim: set et ts=4 sw=4 :
+diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/archives/__init__.py
+similarity index 81%
+rename from sos/cleaner/obfuscation_archive.py
+rename to sos/cleaner/archives/__init__.py
+index ea0b7012..795c5a78 100644
+--- a/sos/cleaner/obfuscation_archive.py
++++ b/sos/cleaner/archives/__init__.py
+@@ -40,6 +40,10 @@ class SoSObfuscationArchive():
+     file_sub_list = []
+     total_sub_count = 0
+     removed_file_count = 0
++    type_name = 'undetermined'
++    description = 'undetermined'
++    is_nested = False
++    prep_files = {}
+ 
+     def __init__(self, archive_path, tmpdir):
+         self.archive_path = archive_path
+@@ -50,7 +54,43 @@ class SoSObfuscationArchive():
+         self.soslog = logging.getLogger('sos')
+         self.ui_log = logging.getLogger('sos_ui')
+         self.skip_list = self._load_skip_list()
+-        self.log_info("Loaded %s as an archive" % self.archive_path)
++        self.is_extracted = False
++        self._load_self()
++        self.archive_root = ''
++        self.log_info(
++            "Loaded %s as type %s"
++            % (self.archive_path, self.description)
++        )
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        """Check if the archive is a well-known type we directly support"""
++        return False
++
++    def _load_self(self):
++        if self.is_tarfile:
++            self.tarobj = tarfile.open(self.archive_path)
++
++    def get_nested_archives(self):
++        """Return a list of ObfuscationArchives that represent additional
++        archives found within the target archive. For example, an archive from
++        `sos collect` will return a list of ``SoSReportArchive`` objects.
++
++        This should be overridden by individual types of ObfuscationArchive's
++        """
++        return []
++
++    def get_archive_root(self):
++        """Set the root path for the archive that should be prepended to any
++        filenames given to methods in this class.
++        """
++        if self.is_tarfile:
++            toplevel = self.tarobj.firstmember
++            if toplevel.isdir():
++                return toplevel.name
++            else:
++                return os.sep
++        return os.path.abspath(self.archive_path)
+ 
+     def report_msg(self, msg):
+         """Helper to easily format ui messages on a per-report basis"""
+@@ -96,10 +136,42 @@ class SoSObfuscationArchive():
+             os.remove(full_fname)
+             self.removed_file_count += 1
+ 
+-    def extract(self):
++    def format_file_name(self, fname):
++        """Based on the type of archive we're dealing with, do whatever that
++        archive requires to a provided **relative** filepath to be able to
++        access it within the archive
++        """
++        if not self.is_extracted:
++            if not self.archive_root:
++                self.archive_root = self.get_archive_root()
++            return os.path.join(self.archive_root, fname)
++        else:
++            return os.path.join(self.extracted_path, fname)
++
++    def get_file_content(self, fname):
++        """Return the content from the specified fname. Particularly useful for
++        tarball-type archives so we can retrieve prep file contents prior to
++        extracting the entire archive
++        """
++        if self.is_extracted is False and self.is_tarfile:
++            filename = self.format_file_name(fname)
++            try:
++                return self.tarobj.extractfile(filename).read().decode('utf-8')
++            except KeyError:
++                self.log_debug(
++                    "Unable to retrieve %s: no such file in archive" % fname
++                )
++                return ''
++        else:
++            with open(self.format_file_name(fname), 'r') as to_read:
++                return to_read.read()
++
++    def extract(self, quiet=False):
+         if self.is_tarfile:
+-            self.report_msg("Extracting...")
++            if not quiet:
++                self.report_msg("Extracting...")
+             self.extracted_path = self.extract_self()
++            self.is_extracted = True
+         else:
+             self.extracted_path = self.archive_path
+         # if we're running as non-root (e.g. collector), then we can have a
+@@ -317,3 +389,5 @@ class SoSObfuscationArchive():
+                 return False
+             except UnicodeDecodeError:
+                 return True
++
++# vim: set et ts=4 sw=4 :
+diff --git a/sos/cleaner/archives/generic.py b/sos/cleaner/archives/generic.py
+new file mode 100644
+index 00000000..2ce6f09b
+--- /dev/null
++++ b/sos/cleaner/archives/generic.py
+@@ -0,0 +1,52 @@
++# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
++
++# This file is part of the sos project: https://github.com/sosreport/sos
++#
++# This copyrighted material is made available to anyone wishing to use,
++# modify, copy, or redistribute it subject to the terms and conditions of
++# version 2 of the GNU General Public License.
++#
++# See the LICENSE file in the source distribution for further information.
++
++
++from sos.cleaner.archives import SoSObfuscationArchive
++
++import os
++import tarfile
++
++
++class DataDirArchive(SoSObfuscationArchive):
++    """A plain directory on the filesystem that is not directly associated with
++    any known or supported collection utility
++    """
++
++    type_name = 'data_dir'
++    description = 'unassociated directory'
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        return os.path.isdir(arc_path)
++
++    def set_archive_root(self):
++        return os.path.abspath(self.archive_path)
++
++
++class TarballArchive(SoSObfuscationArchive):
++    """A generic tar archive that is not associated with any known or supported
++    collection utility
++    """
++
++    type_name = 'tarball'
++    description = 'unassociated tarball'
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        try:
++            return tarfile.is_tarfile(arc_path)
++        except Exception:
++            return False
++
++    def set_archive_root(self):
++        if self.tarobj.firstmember.isdir():
++            return self.tarobj.firstmember.name
++        return ''
+diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
+new file mode 100644
+index 00000000..4401d710
+--- /dev/null
++++ b/sos/cleaner/archives/sos.py
+@@ -0,0 +1,106 @@
++# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
++
++# This file is part of the sos project: https://github.com/sosreport/sos
++#
++# This copyrighted material is made available to anyone wishing to use,
++# modify, copy, or redistribute it subject to the terms and conditions of
++# version 2 of the GNU General Public License.
++#
++# See the LICENSE file in the source distribution for further information.
++
++
++from sos.cleaner.archives import SoSObfuscationArchive
++
++import os
++import tarfile
++
++
++class SoSReportArchive(SoSObfuscationArchive):
++    """This is the class representing an sos report, or in other words the
++    type the archive the SoS project natively generates
++    """
++
++    type_name = 'report'
++    description = 'sos report archive'
++    prep_files = {
++        'hostname': 'sos_commands/host/hostname',
++        'ip': 'sos_commands/networking/ip_-o_addr',
++        'mac': 'sos_commands/networking/ip_-d_address',
++        'username': [
++            'sos_commands/login/lastlog_-u_1000-60000',
++            'sos_commands/login/lastlog_-u_60001-65536',
++            'sos_commands/login/lastlog_-u_65537-4294967295',
++            # AD users will be reported here, but favor the lastlog files since
++            # those will include local users who have not logged in
++            'sos_commands/login/last'
++        ]
++    }
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        try:
++            return tarfile.is_tarfile(arc_path) and 'sosreport-' in arc_path
++        except Exception:
++            return False
++
++
++class SoSReportDirectory(SoSReportArchive):
++    """This is the archive class representing a build directory, or in other
++    words what `sos report --clean` will end up using for in-line obfuscation
++    """
++
++    type_name = 'report_dir'
++    description = 'sos report directory'
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        if os.path.isdir(arc_path):
++            return 'sos_logs' in os.listdir(arc_path)
++        return False
++
++
++class SoSCollectorArchive(SoSObfuscationArchive):
++    """Archive class representing the tarball created by ``sos collect``. It
++    will not provide prep files on its own, however it will provide a list
++    of SoSReportArchive's which will then be used to prep the parsers
++    """
++
++    type_name = 'collect'
++    description = 'sos collect tarball'
++    is_nested = True
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        try:
++            return (tarfile.is_tarfile(arc_path) and 'sos-collect' in arc_path)
++        except Exception:
++            return False
++
++    def get_nested_archives(self):
++        self.extract(quiet=True)
++        _path = self.extracted_path
++        archives = []
++        for fname in os.listdir(_path):
++            arc_name = os.path.join(_path, fname)
++            if 'sosreport-' in fname and tarfile.is_tarfile(arc_name):
++                archives.append(SoSReportArchive(arc_name, self.tmpdir))
++        return archives
++
++
++class SoSCollectorDirectory(SoSCollectorArchive):
++    """The archive class representing the temp directory used by ``sos
++    collect`` when ``--clean`` is used during runtime.
++    """
++
++    type_name = 'collect_dir'
++    description = 'sos collect directory'
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        if os.path.isdir(arc_path):
++            for fname in os.listdir(arc_path):
++                if 'sos-collector-' in fname:
++                    return True
++        return False
++
++# vim: set et ts=4 sw=4 :
+diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
+index af6e375e..e62fd938 100644
+--- a/sos/cleaner/parsers/__init__.py
++++ b/sos/cleaner/parsers/__init__.py
+@@ -37,11 +37,6 @@ class SoSCleanerParser():
+     :cvar map_file_key: The key in the ``map_file`` to read when loading
+                         previous obfuscation matches
+     :vartype map_file_key: ``str``
+-
+-
+-    :cvar prep_map_file: File to read from an archive to pre-seed the map with
+-                         matches. E.G. ip_addr for loading IP addresses
+-    :vartype prep_map_fie: ``str``
+     """
+ 
+     name = 'Undefined Parser'
+@@ -49,7 +44,6 @@ class SoSCleanerParser():
+     skip_line_patterns = []
+     skip_files = []
+     map_file_key = 'unset'
+-    prep_map_file = []
+ 
+     def __init__(self, config={}):
+         if self.map_file_key in config:
+diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
+index 71e13d3f..daa76a62 100644
+--- a/sos/cleaner/parsers/hostname_parser.py
++++ b/sos/cleaner/parsers/hostname_parser.py
+@@ -16,7 +16,6 @@ class SoSHostnameParser(SoSCleanerParser):
+ 
+     name = 'Hostname Parser'
+     map_file_key = 'hostname_map'
+-    prep_map_file = 'sos_commands/host/hostname'
+     regex_patterns = [
+         r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
+     ]
+diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
+index 525139e8..71d38be8 100644
+--- a/sos/cleaner/parsers/ip_parser.py
++++ b/sos/cleaner/parsers/ip_parser.py
+@@ -41,7 +41,6 @@ class SoSIPParser(SoSCleanerParser):
+     ]
+ 
+     map_file_key = 'ip_map'
+-    prep_map_file = 'sos_commands/networking/ip_-o_addr'
+ 
+     def __init__(self, config):
+         self.mapping = SoSIPMap()
+diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
+index 68de3727..694c6073 100644
+--- a/sos/cleaner/parsers/keyword_parser.py
++++ b/sos/cleaner/parsers/keyword_parser.py
+@@ -20,7 +20,6 @@ class SoSKeywordParser(SoSCleanerParser):
+ 
+     name = 'Keyword Parser'
+     map_file_key = 'keyword_map'
+-    prep_map_file = ''
+ 
+     def __init__(self, config, keywords=None, keyword_file=None):
+         self.mapping = SoSKeywordMap()
+diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py
+index 7ca80b8d..c74288cf 100644
+--- a/sos/cleaner/parsers/mac_parser.py
++++ b/sos/cleaner/parsers/mac_parser.py
+@@ -30,7 +30,6 @@ class SoSMacParser(SoSCleanerParser):
+         '534f:53'
+     )
+     map_file_key = 'mac_map'
+-    prep_map_file = 'sos_commands/networking/ip_-d_address'
+ 
+     def __init__(self, config):
+         self.mapping = SoSMacMap()
+diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
+index b142e371..35377a31 100644
+--- a/sos/cleaner/parsers/username_parser.py
++++ b/sos/cleaner/parsers/username_parser.py
+@@ -25,14 +25,6 @@ class SoSUsernameParser(SoSCleanerParser):
+ 
+     name = 'Username Parser'
+     map_file_key = 'username_map'
+-    prep_map_file = [
+-        'sos_commands/login/lastlog_-u_1000-60000',
+-        'sos_commands/login/lastlog_-u_60001-65536',
+-        'sos_commands/login/lastlog_-u_65537-4294967295',
+-        # AD users will be reported here, but favor the lastlog files since
+-        # those will include local users who have not logged in
+-        'sos_commands/login/last'
+-    ]
+     regex_patterns = []
+     skip_list = [
+         'core',
+diff --git a/tests/cleaner_tests/existing_archive.py b/tests/cleaner_tests/existing_archive.py
+index 0eaf6c8d..e13d1cae 100644
+--- a/tests/cleaner_tests/existing_archive.py
++++ b/tests/cleaner_tests/existing_archive.py
+@@ -28,6 +28,13 @@ class ExistingArchiveCleanTest(StageTwoReportTest):
+     def test_obfuscation_log_created(self):
+         self.assertFileExists(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE))
+ 
++    def test_archive_type_correct(self):
++        with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
++            for line in log:
++                if "Loaded %s" % ARCHIVE in line:
++                    assert 'as type sos report archive' in line, "Incorrect archive type detected: %s" % line
++                    break
++
+     def test_from_cmdline_logged(self):
+         with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
+             for line in log:
+diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
+index 3b28e7a2..2de54946 100644
+--- a/tests/cleaner_tests/full_report_run.py
++++ b/tests/cleaner_tests/full_report_run.py
+@@ -35,6 +35,9 @@ class FullCleanTest(StageTwoReportTest):
+     def test_tarball_named_obfuscated(self):
+         self.assertTrue('obfuscated' in self.archive)
+ 
++    def test_archive_type_correct(self):
++        self.assertSosLogContains('Loaded .* as type sos report directory')
++
+     def test_hostname_not_in_any_file(self):
+         host = self.sysinfo['pre']['networking']['hostname']
+         # much faster to just use grep here
+diff --git a/tests/cleaner_tests/report_with_mask.py b/tests/cleaner_tests/report_with_mask.py
+index 4f94ba33..08e873d4 100644
+--- a/tests/cleaner_tests/report_with_mask.py
++++ b/tests/cleaner_tests/report_with_mask.py
+@@ -31,6 +31,9 @@ class ReportWithMask(StageOneReportTest):
+     def test_tarball_named_obfuscated(self):
+         self.assertTrue('obfuscated' in self.archive)
+ 
++    def test_archive_type_correct(self):
++        self.assertSosLogContains('Loaded .* as type sos report directory')
++
+     def test_localhost_was_obfuscated(self):
+         self.assertFileHasContent('/etc/hostname', 'host0')
+ 
+-- 
+2.31.1
+
+From 9b119f860eaec089f7ef884ff39c42589a662994 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 1 Sep 2021 00:34:04 -0400
+Subject: [PATCH] [hostname_map] Add a catch for single-character hostnames
+
+If a log file was truncated at a specific boundary in a string of the
+FQDN of the host such that we only get a couple characters before the
+rest of the domain, we would previously bodly replace all instances of
+that character with the obfuscated short name; not very helpful.
+
+Instead, don't sanitize the short name if this happens and instead
+obfuscate the whole FQDN as 'unknown.example.com'.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/mappings/hostname_map.py | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
+index d4b2c88e..e70a5530 100644
+--- a/sos/cleaner/mappings/hostname_map.py
++++ b/sos/cleaner/mappings/hostname_map.py
+@@ -184,7 +184,14 @@ class SoSHostnameMap(SoSMap):
+             hostname = host[0]
+             domain = host[1:]
+             # obfuscate the short name
+-            ob_hostname = self.sanitize_short_name(hostname)
++            if len(hostname) > 2:
++                ob_hostname = self.sanitize_short_name(hostname)
++            else:
++                # by best practice it appears the host part of the fqdn was cut
++                # off due to some form of truncating, as such don't obfuscate
++                # short strings that are likely to throw off obfuscation of
++                # unrelated bits and paths
++                ob_hostname = 'unknown'
+             ob_domain = self.sanitize_domain(domain)
+             self.dataset[item] = ob_domain
+             return '.'.join([ob_hostname, ob_domain])
+-- 
+2.31.1
+
+From f3f3e763d7c31b7b7cafdf8dd4dab87056fb7696 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 1 Sep 2021 15:54:55 -0400
+Subject: [PATCH] [cleaner] Add support for Insights client archives
+
+Adds a new type of `SoSObfuscationArchive` to add support for
+obfuscating archives generated by the Insights project.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ man/en/sos-clean.1               |  1 +
+ sos/cleaner/__init__.py          |  4 ++-
+ sos/cleaner/archives/insights.py | 42 ++++++++++++++++++++++++++++++++
+ 3 files changed, 46 insertions(+), 1 deletion(-)
+ create mode 100644 sos/cleaner/archives/insights.py
+
+diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
+index 54026713..358ec0cb 100644
+--- a/man/en/sos-clean.1
++++ b/man/en/sos-clean.1
+@@ -105,6 +105,7 @@ The following are accepted values for this option:
+     \fBauto\fR          Automatically detect the archive type
+     \fBreport\fR        An archive generated by \fBsos report\fR
+     \fBcollect\fR       An archive generated by \fBsos collect\fR
++    \fBinsights\fR      An archive generated by the \fBinsights-client\fR package
+ 
+ The following may also be used, however note that these do not attempt to pre-load
+ any information from the archives into the parsers. This means that, among other limitations,
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index 6d2eb483..3e08aa28 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -29,6 +29,7 @@ from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
+                                       SoSCollectorArchive,
+                                       SoSCollectorDirectory)
+ from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
++from sos.cleaner.archives.insights import InsightsArchive
+ from sos.utilities import get_human_readable
+ from textwrap import fill
+ 
+@@ -100,6 +101,7 @@ class SoSCleaner(SoSComponent):
+             SoSReportArchive,
+             SoSCollectorDirectory,
+             SoSCollectorArchive,
++            InsightsArchive,
+             # make sure these two are always last as they are fallbacks
+             DataDirArchive,
+             TarballArchive
+@@ -194,7 +196,7 @@ third party.
+                                help='The directory or archive to obfuscate')
+         clean_grp.add_argument('--archive-type', default='auto',
+                                choices=['auto', 'report', 'collect',
+-                                        'data-dir', 'tarball'],
++                                        'insights', 'data-dir', 'tarball'],
+                                help=('Specify what kind of archive the target '
+                                      'was generated as'))
+         clean_grp.add_argument('--domains', action='extend', default=[],
+diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py
+new file mode 100644
+index 00000000..dab48b16
+--- /dev/null
++++ b/sos/cleaner/archives/insights.py
+@@ -0,0 +1,42 @@
++# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
++
++# This file is part of the sos project: https://github.com/sosreport/sos
++#
++# This copyrighted material is made available to anyone wishing to use,
++# modify, copy, or redistribute it subject to the terms and conditions of
++# version 2 of the GNU General Public License.
++#
++# See the LICENSE file in the source distribution for further information.
++
++
++from sos.cleaner.archives import SoSObfuscationArchive
++
++import tarfile
++
++
++class InsightsArchive(SoSObfuscationArchive):
++    """This class represents archives generated by the insights-client utility
++    for RHEL systems.
++    """
++
++    type_name = 'insights'
++    description = 'insights-client archive'
++
++    prep_files = {
++        'hostname': 'data/insights_commands/hostname_-f',
++        'ip': 'data/insights_commands/ip_addr',
++        'mac': 'data/insights_commands/ip_addr'
++    }
++
++    @classmethod
++    def check_is_type(cls, arc_path):
++        try:
++            return tarfile.is_tarfile(arc_path) and 'insights-' in arc_path
++        except Exception:
++            return False
++
++    def get_archive_root(self):
++        top = self.archive_path.split('/')[-1].split('.tar')[0]
++        if self.tarobj.firstmember.name == '.':
++            top = './' + top
++        return top
+-- 
+2.31.1
+
+From 9639dc3d240076b55f2a1d04b43ea42bebd09215 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Tue, 16 Nov 2021 17:50:42 -0500
+Subject: [PATCH] [clean,hostname_parser] Source /etc/hosts for obfuscation
+
+Up until now, our sourcing of hostnames/domains for obfuscation has been
+dependent upon the output of the `hostname` command. However, some
+scenarios have come up where sourcing `/etc/hosts` is advantageous for
+several reasons:
+
+First, if `hostname` output is unavailable, this provides a fallback
+measure.
+
+Second, `/etc/hosts` is a common place to have short names defined which
+would otherwise not be detected (or at the very least would result in a
+race condition based on where/if the short name was elsewhere able to be
+gleaned from an FQDN), thus leaving the potential for unobfuscated data
+in an archive.
+
+Due to both the nature of hostname obfuscation and the malleable syntax
+of `/etc/hosts`, the parsing of this file needs special handling not
+covered by our more generic parsing and obfuscation methods.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/__init__.py                | 11 ++++++++---
+ sos/cleaner/archives/sos.py            |  5 ++++-
+ sos/cleaner/parsers/hostname_parser.py | 19 +++++++++++++++++++
+ 3 files changed, 31 insertions(+), 4 deletions(-)
+
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index ed461a8f..3f530d44 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -523,9 +523,14 @@ third party.
+                         if isinstance(_parser, SoSUsernameParser):
+                             _parser.load_usernames_into_map(content)
+                         elif isinstance(_parser, SoSHostnameParser):
+-                            _parser.load_hostname_into_map(
+-                                content.splitlines()[0]
+-                            )
++                            if 'hostname' in parse_file:
++                                _parser.load_hostname_into_map(
++                                    content.splitlines()[0]
++                                )
++                            elif 'etc/hosts' in parse_file:
++                                _parser.load_hostname_from_etc_hosts(
++                                    content
++                                )
+                         else:
+                             for line in content.splitlines():
+                                 self.obfuscate_line(line)
+diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
+index 4401d710..f8720c88 100644
+--- a/sos/cleaner/archives/sos.py
++++ b/sos/cleaner/archives/sos.py
+@@ -23,7 +23,10 @@ class SoSReportArchive(SoSObfuscationArchive):
+     type_name = 'report'
+     description = 'sos report archive'
+     prep_files = {
+-        'hostname': 'sos_commands/host/hostname',
++        'hostname': [
++            'sos_commands/host/hostname',
++            'etc/hosts'
++        ],
+         'ip': 'sos_commands/networking/ip_-o_addr',
+         'mac': 'sos_commands/networking/ip_-d_address',
+         'username': [
+diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
+index daa76a62..0a733bee 100644
+--- a/sos/cleaner/parsers/hostname_parser.py
++++ b/sos/cleaner/parsers/hostname_parser.py
+@@ -61,6 +61,25 @@ class SoSHostnameParser(SoSCleanerParser):
+             self.mapping.add(high_domain)
+         self.mapping.add(hostname_string)
+ 
++    def load_hostname_from_etc_hosts(self, content):
++        """Parse an archive's copy of /etc/hosts, which requires handling that
++        is separate from the output of the `hostname` command. Just like
++        load_hostname_into_map(), this has to be done explicitly and we
++        cannot rely upon the more generic methods to do this reliably.
++        """
++        lines = content.splitlines()
++        for line in lines:
++            if line.startswith('#') or 'localhost' in line:
++                continue
++            hostln = line.split()[1:]
++            for host in hostln:
++                if len(host.split('.')) == 1:
++                    # only generate a mapping for fqdns but still record the
++                    # short name here for later obfuscation with parse_line()
++                    self.short_names.append(host)
++                else:
++                    self.mapping.add(host)
++
+     def parse_line(self, line):
+         """Override the default parse_line() method to also check for the
+         shortname of the host derived from the hostname.
+-- 
+2.31.1
+
+From c1680226b53452b18f27f2e76c3e0e03e521f935 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 17 Nov 2021 13:11:33 -0500
+Subject: [PATCH] [clean, hostname] Fix unintentionally case sensitive
+ shortname handling
+
+It was discovered that our extra handling for shortnames was
+unintentionally case sensitive. Fix this to ensure that shortnames are
+obfuscated regardless of case in all collected text.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/mappings/hostname_map.py   |  6 +++---
+ sos/cleaner/parsers/hostname_parser.py |  8 +++++---
+ tests/cleaner_tests/full_report_run.py | 21 ++++++++++++++++++++-
+ 3 files changed, 28 insertions(+), 7 deletions(-)
+
+diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
+index e70a5530..0fe78fb1 100644
+--- a/sos/cleaner/mappings/hostname_map.py
++++ b/sos/cleaner/mappings/hostname_map.py
+@@ -169,13 +169,13 @@ class SoSHostnameMap(SoSMap):
+ 
+     def sanitize_item(self, item):
+         host = item.split('.')
+-        if all([h.isupper() for h in host]):
++        if len(host) > 1 and all([h.isupper() for h in host]):
+             # by convention we have just a domain
+             _host = [h.lower() for h in host]
+             return self.sanitize_domain(_host).upper()
+         if len(host) == 1:
+             # we have a shortname for a host
+-            return self.sanitize_short_name(host[0])
++            return self.sanitize_short_name(host[0].lower())
+         if len(host) == 2:
+             # we have just a domain name, e.g. example.com
+             return self.sanitize_domain(host)
+@@ -185,7 +185,7 @@ class SoSHostnameMap(SoSMap):
+             domain = host[1:]
+             # obfuscate the short name
+             if len(hostname) > 2:
+-                ob_hostname = self.sanitize_short_name(hostname)
++                ob_hostname = self.sanitize_short_name(hostname.lower())
+             else:
+                 # by best practice it appears the host part of the fqdn was cut
+                 # off due to some form of truncating, as such don't obfuscate
+diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
+index 0a733bee..7fd0e698 100644
+--- a/sos/cleaner/parsers/hostname_parser.py
++++ b/sos/cleaner/parsers/hostname_parser.py
+@@ -8,6 +8,8 @@
+ #
+ # See the LICENSE file in the source distribution for further information.
+ 
++import re
++
+ from sos.cleaner.parsers import SoSCleanerParser
+ from sos.cleaner.mappings.hostname_map import SoSHostnameMap
+ 
+@@ -91,9 +93,9 @@ class SoSHostnameParser(SoSCleanerParser):
+             """
+             if search in self.mapping.skip_keys:
+                 return ln, count
+-            if search in ln:
+-                count += ln.count(search)
+-                ln = ln.replace(search, self.mapping.get(repl or search))
++            _reg = re.compile(search, re.I)
++            if _reg.search(ln):
++                return _reg.subn(self.mapping.get(repl or search), ln)
+             return ln, count
+ 
+         count = 0
+diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
+index 2de54946..0b23acaf 100644
+--- a/tests/cleaner_tests/full_report_run.py
++++ b/tests/cleaner_tests/full_report_run.py
+@@ -26,6 +26,24 @@ class FullCleanTest(StageTwoReportTest):
+     # replace with an empty placeholder, make sure that this test case is not
+     # influenced by previous clean runs
+     files = ['/etc/sos/cleaner/default_mapping']
++    packages = {
++        'rhel': ['python3-systemd'],
++        'ubuntu': ['python3-systemd']
++    }
++
++    def pre_sos_setup(self):
++        # ensure that case-insensitive matching of FQDNs and shortnames work
++        from systemd import journal
++        from socket import gethostname
++        host = gethostname()
++        short = host.split('.')[0]
++        sosfd = journal.stream('sos-testing')
++        sosfd.write(
++            "This is a test line from sos clean testing. The hostname %s "
++            "should not appear, nor should %s in an obfuscated archive. The "
++            "shortnames of %s and %s should also not appear."
++            % (host.lower(), host.upper(), short.lower(), short.upper())
++        )
+ 
+     def test_private_map_was_generated(self):
+         self.assertOutputContains('A mapping of obfuscated elements is available at')
+@@ -40,8 +58,9 @@ class FullCleanTest(StageTwoReportTest):
+ 
+     def test_hostname_not_in_any_file(self):
+         host = self.sysinfo['pre']['networking']['hostname']
++        short = host.split('.')[0]
+         # much faster to just use grep here
+-        content = self.grep_for_content(host)
++        content = self.grep_for_content(host) + self.grep_for_content(short)
+         if not content:
+             assert True
+         else:
+-- 
+2.31.1
+
+From aaeb8cb57ed55598ab744b96d4f127aedebcb292 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Tue, 21 Sep 2021 15:23:20 -0400
+Subject: [PATCH] [build] Add archives to setup.py packages
+
+Adds the newly abstracted `sos.cleaner.archives` package to `setup.py`
+so that manual builds will properly include it.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ setup.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/setup.py b/setup.py
+index 1e8d8e2dc5..7653b59de3 100644
+--- a/setup.py
++++ b/setup.py
+@@ -102,7 +102,7 @@ def copy_file (self, filename, dirname):
+         'sos.policies.package_managers', 'sos.policies.init_systems',
+         'sos.report', 'sos.report.plugins', 'sos.collector',
+         'sos.collector.clusters', 'sos.cleaner', 'sos.cleaner.mappings',
+-        'sos.cleaner.parsers'
++        'sos.cleaner.parsers', 'sos.cleaner.archives'
+     ],
+     cmdclass=cmdclass,
+     command_options=command_options,
diff --git a/SOURCES/sos-bz2025610-RHTS-api-change.patch b/SOURCES/sos-bz2025610-RHTS-api-change.patch
new file mode 100644
index 0000000..580117f
--- /dev/null
+++ b/SOURCES/sos-bz2025610-RHTS-api-change.patch
@@ -0,0 +1,224 @@
+From 2e8b5e2d4f30854cce93d149fc7d24b9d9cfd02c Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Fri, 19 Nov 2021 16:16:07 +0100
+Subject: [PATCH 1/3] [policies] strip path from SFTP upload filename
+
+When case_id is not supplied, we ask SFTP server to store the uploaded
+file under name /var/tmp/<tarball>, which is confusing.
+
+Let remove the path from it also in case_id not supplied.
+
+Related to: #2764
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/policies/distros/redhat.py | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
+index 3476e21fb..8817fc785 100644
+--- a/sos/policies/distros/redhat.py
++++ b/sos/policies/distros/redhat.py
+@@ -269,10 +269,10 @@ def _get_sftp_upload_name(self):
+         """The RH SFTP server will only automatically connect file uploads to
+         cases if the filename _starts_ with the case number
+         """
++        fname = self.upload_archive_name.split('/')[-1]
+         if self.case_id:
+-            return "%s_%s" % (self.case_id,
+-                              self.upload_archive_name.split('/')[-1])
+-        return self.upload_archive_name
++            return "%s_%s" % (self.case_id, fname)
++        return fname
+ 
+     def upload_sftp(self):
+         """Override the base upload_sftp to allow for setting an on-demand
+
+From 61023b29a656dd7afaa4a0643368b0a53f1a3779 Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Fri, 19 Nov 2021 17:31:31 +0100
+Subject: [PATCH 2/3] [redhat] update SFTP API version to v2
+
+Change API version from v1 to v2, which includes:
+- change of URL
+- different URI
+- POST method for token generation instead of GET
+
+Resolves: #2764
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/policies/distros/redhat.py | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
+index 8817fc785..e4e2b8835 100644
+--- a/sos/policies/distros/redhat.py
++++ b/sos/policies/distros/redhat.py
+@@ -175,7 +175,7 @@ def get_tmp_dir(self, opt_tmp_dir):
+ No changes will be made to system configuration.
+ """
+ 
+-RH_API_HOST = "https://access.redhat.com"
++RH_API_HOST = "https://api.access.redhat.com"
+ RH_SFTP_HOST = "sftp://sftp.access.redhat.com"
+ 
+ 
+@@ -287,12 +287,12 @@ def upload_sftp(self):
+                             " for obtaining SFTP auth token.")
+         _token = None
+         _user = None
++        url = RH_API_HOST + '/support/v2/sftp/token'
+         # we have a username and password, but we need to reset the password
+         # to be the token returned from the auth endpoint
+         if self.get_upload_user() and self.get_upload_password():
+-            url = RH_API_HOST + '/hydra/rest/v1/sftp/token'
+             auth = self.get_upload_https_auth()
+-            ret = requests.get(url, auth=auth, timeout=10)
++            ret = requests.post(url, auth=auth, timeout=10)
+             if ret.status_code == 200:
+                 # credentials are valid
+                 _user = self.get_upload_user()
+@@ -302,8 +302,8 @@ def upload_sftp(self):
+                       "credentials. Will try anonymous.")
+         # we either do not have a username or password/token, or both
+         if not _token:
+-            aurl = RH_API_HOST + '/hydra/rest/v1/sftp/token?isAnonymous=true'
+-            anon = requests.get(aurl, timeout=10)
++            adata = {"isAnonymous": True}
++            anon = requests.post(url, data=json.dumps(adata), timeout=10)
+             if anon.status_code == 200:
+                 resp = json.loads(anon.text)
+                 _user = resp['username']
+
+From 267da2156ec61f526dd28e760ff6528408a76c3f Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Mon, 22 Nov 2021 15:22:32 +0100
+Subject: [PATCH 3/3] [policies] Deal 200 return code as success
+
+Return code 200 of POST method request must be dealt as success.
+
+Newly required due to the SFTP API change using POST.
+
+Related to: #2764
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/policies/distros/__init__.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
+index 0906fa779..6f257fdce 100644
+--- a/sos/policies/distros/__init__.py
++++ b/sos/policies/distros/__init__.py
+@@ -551,7 +551,7 @@ def upload_https(self):
+                 r = self._upload_https_put(arc, verify)
+             else:
+                 r = self._upload_https_post(arc, verify)
+-            if r.status_code != 201:
++            if r.status_code != 200 and r.status_code != 201:
+                 if r.status_code == 401:
+                     raise Exception(
+                         "Authentication failed: invalid user credentials"
+From 8da1b14246226792c160dd04e5c7c75dd4e8d44b Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Mon, 22 Nov 2021 10:44:09 +0100
+Subject: [PATCH] [collect] fix moved get_upload_url under Policy class
+
+SoSCollector does not further declare get_upload_url method
+as that was moved under Policy class(es).
+
+Resolves: #2766
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/collector/__init__.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
+index 50183e873..42a7731d6 100644
+--- a/sos/collector/__init__.py
++++ b/sos/collector/__init__.py
+@@ -1219,7 +1219,7 @@ this utility or remote systems that it c
+             msg = 'No sosreports were collected, nothing to archive...'
+             self.exit(msg, 1)
+ 
+-        if self.opts.upload and self.get_upload_url():
++        if self.opts.upload and self.policy.get_upload_url():
+             try:
+                 self.policy.upload_archive(arc_name)
+                 self.ui_log.info("Uploaded archive successfully")
+From abb2fc65bd14760021c61699ad3113cab3bd4c64 Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Tue, 30 Nov 2021 11:37:02 +0100
+Subject: [PATCH 1/2] [redhat] Fix broken URI to upload to customer portal
+
+Revert back the unwanted change in URI of uploading tarball to the
+Red Hat Customer portal.
+
+Related: #2772
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/policies/distros/redhat.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
+index e4e2b883..eb442407 100644
+--- a/sos/policies/distros/redhat.py
++++ b/sos/policies/distros/redhat.py
+@@ -250,7 +250,7 @@ support representative.
+         elif self.commons['cmdlineopts'].upload_protocol == 'sftp':
+             return RH_SFTP_HOST
+         else:
+-            rh_case_api = "/hydra/rest/cases/%s/attachments"
++            rh_case_api = "/support/v1/cases/%s/attachments"
+             return RH_API_HOST + rh_case_api % self.case_id
+ 
+     def _get_upload_headers(self):
+-- 
+2.31.1
+
+
+From ea4f9e88a412c80a4791396e1bb78ac1e24ece14 Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Tue, 30 Nov 2021 13:00:26 +0100
+Subject: [PATCH 2/2] [policy] Add error message when FTP upload write failure
+
+When (S)FTP upload fails to write the destination file,
+our "expect" code should detect it sooner than after timeout happens
+and write appropriate error message.
+
+Resolves: #2772
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/policies/distros/__init__.py | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
+index 6f257fdc..7bdc81b8 100644
+--- a/sos/policies/distros/__init__.py
++++ b/sos/policies/distros/__init__.py
+@@ -473,7 +473,8 @@ class LinuxPolicy(Policy):
+         put_expects = [
+             u'100%',
+             pexpect.TIMEOUT,
+-            pexpect.EOF
++            pexpect.EOF,
++            u'No such file or directory'
+         ]
+ 
+         put_success = ret.expect(put_expects, timeout=180)
+@@ -485,6 +486,8 @@ class LinuxPolicy(Policy):
+             raise Exception("Timeout expired while uploading")
+         elif put_success == 2:
+             raise Exception("Unknown error during upload: %s" % ret.before)
++        elif put_success == 3:
++            raise Exception("Unable to write archive to destination")
+         else:
+             raise Exception("Unexpected response from server: %s" % ret.before)
+ 
+-- 
+2.31.1
+
diff --git a/SPECS/sos.spec b/SPECS/sos.spec
index 8498505..a23b105 100644
--- a/SPECS/sos.spec
+++ b/SPECS/sos.spec
@@ -5,13 +5,13 @@
 Summary: A set of tools to gather troubleshooting information from a system
 Name: sos
 Version: 4.2
-Release: 4%{?dist}
+Release: 6%{?dist}
 Group: Applications/System
 Source0: https://github.com/sosreport/sos/archive/%{version}/sos-%{version}.tar.gz
 Source1: sos-audit-%{auditversion}.tgz
 License: GPLv2+
 BuildArch: noarch
-Url: http://github.com/sosreport/sos
+Url: https://github.com/sosreport/sos
 BuildRequires: python3-devel
 BuildRequires: gettext
 Requires: libxml2-python3
@@ -34,6 +34,8 @@ Patch10: sos-bz2004929-openvswitch-offline-analysis.patch
 Patch11: sos-bz2012857-plugin-timeout-unhandled-exception.patch
 Patch12: sos-bz2018033-plugin-timeouts-proper-handling.patch
 Patch13: sos-bz2020777-filter-namespace-per-pattern.patch
+Patch14: sos-bz2023867-cleaner-hostnames-improvements.patch
+Patch15: sos-bz2025610-RHTS-api-change.patch
 
 
 %description
@@ -58,6 +60,8 @@ support technicians and developers.
 %patch11 -p1
 %patch12 -p1
 %patch13 -p1
+%patch14 -p1
+%patch15 -p1
 
 %build
 %py3_build
@@ -124,6 +128,18 @@ of the system. Currently storage and filesystem commands are audited.
 %ghost /etc/audit/rules.d/40-sos-storage.rules
 
 %changelog
+* Tue Nov 30 2021 Pavel Moravec <pmoravec@redhat.com> = 4.2-6
+- [redhat] Fix broken URI to upload to customer portal
+  Resolves: bz2025610
+
+* Mon Nov 22 2021 Pavel Moravec <pmoravec@redhat.com> = 4.2-5
+- [clean,hostname_parser] Source /etc/hosts for obfuscation
+  Resolves: bz2023867
+- [clean, hostname] Fix unintentionally case sensitive
+  Resolves: bz2023863
+- [redhat] update SFTP API version to v2
+  Resolves: bz2025610
+
 * Tue Nov 16 2021 Pavel Moravec <pmoravec@redhat.com> = 4.2-4
 - [report] Calculate sizes of dirs, symlinks and manifest in
   Resolves: bz1873185