Blob Blame History Raw
From 1abc4e96638e819d3fbee74396b36a6ccaf0ab29 Mon Sep 17 00:00:00 2001
From: Matej Tyc <matyc@redhat.com>
Date: Tue, 3 Aug 2021 11:01:59 +0200
Subject: [PATCH] Refactor content identification

Don't use the multiprocessing pool - it sometimes creates probems during
its initialization:
https://bugzilla.redhat.com/show_bug.cgi?id=1989441
---
 org_fedora_oscap/content_handling.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/org_fedora_oscap/content_handling.py b/org_fedora_oscap/content_handling.py
index f2af22f..65d5a28 100644
--- a/org_fedora_oscap/content_handling.py
+++ b/org_fedora_oscap/content_handling.py
@@ -111,9 +111,8 @@ def parse_HTML_from_content(content):
 
 
 def identify_files(fpaths):
-    with multiprocessing.Pool(os.cpu_count()) as p:
-        labels = p.map(get_doc_type, fpaths)
-    return {path: label for (path, label) in zip(fpaths, labels)}
+    result = {path: get_doc_type(path) for path in fpaths}
+    return result
 
 
 def get_doc_type(file_path):
@@ -131,7 +130,9 @@ def get_doc_type(file_path):
     except UnicodeDecodeError:
         # 'oscap info' supplied weird output, which happens when it tries
         # to explain why it can't examine e.g. a JPG.
-        return None
+        pass
+    except Exception as e:
+        log.warning(f"OSCAP addon: Unexpected error when looking at {file_path}: {str(e)}")
     log.info("OSCAP addon: Identified {file_path} as {content_type}"
              .format(file_path=file_path, content_type=content_type))
     return content_type