From 1abc4e96638e819d3fbee74396b36a6ccaf0ab29 Mon Sep 17 00:00:00 2001
From: Matej Tyc <matyc@redhat.com>
Date: Tue, 3 Aug 2021 11:01:59 +0200
Subject: [PATCH] Refactor content identification
Don't use the multiprocessing pool - it sometimes creates probems during
its initialization:
https://bugzilla.redhat.com/show_bug.cgi?id=1989441
---
org_fedora_oscap/content_handling.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/org_fedora_oscap/content_handling.py b/org_fedora_oscap/content_handling.py
index f2af22f..65d5a28 100644
--- a/org_fedora_oscap/content_handling.py
+++ b/org_fedora_oscap/content_handling.py
@@ -111,9 +111,8 @@ def parse_HTML_from_content(content):
def identify_files(fpaths):
- with multiprocessing.Pool(os.cpu_count()) as p:
- labels = p.map(get_doc_type, fpaths)
- return {path: label for (path, label) in zip(fpaths, labels)}
+ result = {path: get_doc_type(path) for path in fpaths}
+ return result
def get_doc_type(file_path):
@@ -131,7 +130,9 @@ def get_doc_type(file_path):
except UnicodeDecodeError:
# 'oscap info' supplied weird output, which happens when it tries
# to explain why it can't examine e.g. a JPG.
- return None
+ pass
+ except Exception as e:
+ log.warning(f"OSCAP addon: Unexpected error when looking at {file_path}: {str(e)}")
log.info("OSCAP addon: Identified {file_path} as {content_type}"
.format(file_path=file_path, content_type=content_type))
return content_type