From 62262cca798dd63d44b11f3b332173b184642773 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: May 02 2022 07:50:30 +0000 Subject: import rh-python38-python-lxml-4.4.1-8.el7 --- diff --git a/SOURCES/CVE-2021-43818.patch b/SOURCES/CVE-2021-43818.patch new file mode 100644 index 0000000..18d9393 --- /dev/null +++ b/SOURCES/CVE-2021-43818.patch @@ -0,0 +1,127 @@ +diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py +index 15298b5..ee2f0f8 100644 +--- a/src/lxml/html/clean.py ++++ b/src/lxml/html/clean.py +@@ -73,18 +73,25 @@ _looks_like_tag_content = re.compile( + + # All kinds of schemes besides just javascript: that can cause + # execution: +-_is_image_dataurl = re.compile( +- r'^data:image/.+;base64', re.I).search ++_find_image_dataurls = re.compile( ++ r'^data:image/(.+);base64,', re.I).findall + _is_possibly_malicious_scheme = re.compile( +- r'(?:javascript|jscript|livescript|vbscript|data|about|mocha):', +- re.I).search ++ r'(javascript|jscript|livescript|vbscript|data|about|mocha):', ++ re.I).findall ++# SVG images can contain script content ++_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).findall ++ + def _is_javascript_scheme(s): +- if _is_image_dataurl(s): +- return None +- return _is_possibly_malicious_scheme(s) ++ is_image_url = False ++ for image_type in _find_image_dataurls(s): ++ is_image_url = True ++ if _is_unsafe_image_type(image_type): ++ return True ++ if is_image_url: ++ return False ++ return bool(_is_possibly_malicious_scheme(s)) + + _substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub +-# FIXME: should data: be blocked? + + # FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx + _conditional_comment_re = re.compile( +@@ -512,6 +519,8 @@ class Cleaner(object): + return True + if 'expression(' in style: + return True ++ if '@import' in style: ++ return True + if '', + lxml.html.tostring(clean_html(s))) + ++ def test_sneaky_import_in_style(self): ++ # Prevent "@@importimport" -> "@import" replacement. ++ style_codes = [ ++ "@@importimport(extstyle.css)", ++ "@ @ import import(extstyle.css)", ++ "@ @ importimport(extstyle.css)", ++ "@@ import import(extstyle.css)", ++ "@ @import import(extstyle.css)", ++ "@@importimport()", ++ ] ++ for style_code in style_codes: ++ html = '' % style_code ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (style_code, cleaned)) ++ ++ def test_svg_data_links(self): ++ # Remove SVG images with potentially insecure content. ++ svg = b'' ++ svgz = gzip.compress(svg) ++ svg_b64 = base64.b64encode(svg).decode('ASCII') ++ svgz_b64 = base64.b64encode(svgz).decode('ASCII') ++ urls = [ ++ "data:image/svg+xml;base64," + svg_b64, ++ "data:image/svg+xml-compressed;base64," + svgz_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ ++ def test_image_data_links(self): ++ data = b'123' ++ data_b64 = base64.b64encode(data).decode('ASCII') ++ urls = [ ++ "data:image/jpeg;base64," + data_b64, ++ "data:image/apng;base64," + data_b64, ++ "data:image/png;base64," + data_b64, ++ "data:image/gif;base64," + data_b64, ++ "data:image/webp;base64," + data_b64, ++ "data:image/bmp;base64," + data_b64, ++ "data:image/tiff;base64," + data_b64, ++ "data:image/x-icon;base64," + data_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ html.encode("UTF-8"), ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ + def test_formaction_attribute_in_button_input(self): + # The formaction attribute overrides the form's action and should be + # treated as a malicious link attribute diff --git a/SPECS/python-lxml.spec b/SPECS/python-lxml.spec index 5bb9083..9a11157 100644 --- a/SPECS/python-lxml.spec +++ b/SPECS/python-lxml.spec @@ -9,7 +9,7 @@ Name: %{?scl_prefix}python-%{modname} Version: 4.4.1 -Release: 7%{?dist} +Release: 8%{?dist} Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD @@ -28,6 +28,13 @@ Patch0: CVE-2020-27783.patch # Fixed upstream: https://github.com/lxml/lxml/commit/2d01a1ba8984e0483ce6619b972832377f208a0d Patch1: CVE-2021-28957.patch +# Fix for CVE-2021-43818: HTML Cleaner allows crafted +# and SVG embedded scripts to pass through +# Fixed upstream: +# https://github.com/lxml/lxml/commit/12fa9669007180a7bb87d990c375cf91ca5b664a +# https://github.com/lxml/lxml/commit/f2330237440df7e8f39c3ad1b1aa8852be3b27c0 +Patch2: CVE-2021-43818.patch + %{?scl:Requires: %{scl}-runtime} %{?scl:BuildRequires: %{scl}-runtime} @@ -88,6 +95,10 @@ set -ex %changelog +* Wed Mar 16 2022 Charalampos Stratakis - 4.4.1-8 +- Security fix for CVE-2021-43818 +Resolves: rhbz#2032569 + * Mon Jul 19 2021 Charalampos Stratakis - 4.4.1-7 - Security fix for CVE-2021-28957 Resolves: rhbz#1941534