|
|
11e9fe |
From 1abc4e96638e819d3fbee74396b36a6ccaf0ab29 Mon Sep 17 00:00:00 2001
|
|
|
11e9fe |
From: Matej Tyc <matyc@redhat.com>
|
|
|
11e9fe |
Date: Tue, 3 Aug 2021 11:01:59 +0200
|
|
|
11e9fe |
Subject: [PATCH] Refactor content identification
|
|
|
11e9fe |
|
|
|
11e9fe |
Don't use the multiprocessing pool - it sometimes creates probems during
|
|
|
11e9fe |
its initialization:
|
|
|
11e9fe |
https://bugzilla.redhat.com/show_bug.cgi?id=1989441
|
|
|
11e9fe |
---
|
|
|
11e9fe |
org_fedora_oscap/content_handling.py | 9 +++++----
|
|
|
11e9fe |
1 file changed, 5 insertions(+), 4 deletions(-)
|
|
|
11e9fe |
|
|
|
11e9fe |
diff --git a/org_fedora_oscap/content_handling.py b/org_fedora_oscap/content_handling.py
|
|
|
11e9fe |
index f2af22f..65d5a28 100644
|
|
|
11e9fe |
--- a/org_fedora_oscap/content_handling.py
|
|
|
11e9fe |
+++ b/org_fedora_oscap/content_handling.py
|
|
|
11e9fe |
@@ -111,9 +111,8 @@ def parse_HTML_from_content(content):
|
|
|
11e9fe |
|
|
|
11e9fe |
|
|
|
11e9fe |
def identify_files(fpaths):
|
|
|
11e9fe |
- with multiprocessing.Pool(os.cpu_count()) as p:
|
|
|
11e9fe |
- labels = p.map(get_doc_type, fpaths)
|
|
|
11e9fe |
- return {path: label for (path, label) in zip(fpaths, labels)}
|
|
|
11e9fe |
+ result = {path: get_doc_type(path) for path in fpaths}
|
|
|
11e9fe |
+ return result
|
|
|
11e9fe |
|
|
|
11e9fe |
|
|
|
11e9fe |
def get_doc_type(file_path):
|
|
|
11e9fe |
@@ -131,7 +130,9 @@ def get_doc_type(file_path):
|
|
|
11e9fe |
except UnicodeDecodeError:
|
|
|
11e9fe |
# 'oscap info' supplied weird output, which happens when it tries
|
|
|
11e9fe |
# to explain why it can't examine e.g. a JPG.
|
|
|
11e9fe |
- return None
|
|
|
11e9fe |
+ pass
|
|
|
11e9fe |
+ except Exception as e:
|
|
|
11e9fe |
+ log.warning(f"OSCAP addon: Unexpected error when looking at {file_path}: {str(e)}")
|
|
|
11e9fe |
log.info("OSCAP addon: Identified {file_path} as {content_type}"
|
|
|
11e9fe |
.format(file_path=file_path, content_type=content_type))
|
|
|
11e9fe |
return content_type
|