From 3fea81ea26b09d4f04d5228b91d12779a86089e0 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: May 10 2022 07:11:06 +0000 Subject: import python-lxml-4.2.3-4.el8 --- diff --git a/SOURCES/CVE-2021-43818.patch b/SOURCES/CVE-2021-43818.patch new file mode 100644 index 0000000..7413abc --- /dev/null +++ b/SOURCES/CVE-2021-43818.patch @@ -0,0 +1,127 @@ +diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py +index 6f3f7de..da5af16 100644 +--- a/src/lxml/html/clean.py ++++ b/src/lxml/html/clean.py +@@ -73,18 +73,25 @@ _looks_like_tag_content = re.compile( + + # All kinds of schemes besides just javascript: that can cause + # execution: +-_is_image_dataurl = re.compile( +- r'^data:image/.+;base64', re.I).search ++_find_image_dataurls = re.compile( ++ r'^data:image/(.+);base64,', re.I).findall + _is_possibly_malicious_scheme = re.compile( +- r'(?:javascript|jscript|livescript|vbscript|data|about|mocha):', +- re.I).search ++ r'(javascript|jscript|livescript|vbscript|data|about|mocha):', ++ re.I).findall ++# SVG images can contain script content ++_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).findall ++ + def _is_javascript_scheme(s): +- if _is_image_dataurl(s): +- return None +- return _is_possibly_malicious_scheme(s) ++ is_image_url = False ++ for image_type in _find_image_dataurls(s): ++ is_image_url = True ++ if _is_unsafe_image_type(image_type): ++ return True ++ if is_image_url: ++ return False ++ return bool(_is_possibly_malicious_scheme(s)) + + _substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub +-# FIXME: should data: be blocked? + + # FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx + _conditional_comment_re = re.compile( +@@ -512,6 +519,8 @@ class Cleaner(object): + return True + if 'expression(' in style: + return True ++ if '@import' in style: ++ return True + if '', + lxml.html.tostring(clean_html(s))) + ++ def test_sneaky_import_in_style(self): ++ # Prevent "@@importimport" -> "@import" replacement. ++ style_codes = [ ++ "@@importimport(extstyle.css)", ++ "@ @ import import(extstyle.css)", ++ "@ @ importimport(extstyle.css)", ++ "@@ import import(extstyle.css)", ++ "@ @import import(extstyle.css)", ++ "@@importimport()", ++ ] ++ for style_code in style_codes: ++ html = '' % style_code ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (style_code, cleaned)) ++ ++ def test_svg_data_links(self): ++ # Remove SVG images with potentially insecure content. ++ svg = b'' ++ svgz = gzip.compress(svg) ++ svg_b64 = base64.b64encode(svg).decode('ASCII') ++ svgz_b64 = base64.b64encode(svgz).decode('ASCII') ++ urls = [ ++ "data:image/svg+xml;base64," + svg_b64, ++ "data:image/svg+xml-compressed;base64," + svgz_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ ++ def test_image_data_links(self): ++ data = b'123' ++ data_b64 = base64.b64encode(data).decode('ASCII') ++ urls = [ ++ "data:image/jpeg;base64," + data_b64, ++ "data:image/apng;base64," + data_b64, ++ "data:image/png;base64," + data_b64, ++ "data:image/gif;base64," + data_b64, ++ "data:image/webp;base64," + data_b64, ++ "data:image/bmp;base64," + data_b64, ++ "data:image/tiff;base64," + data_b64, ++ "data:image/x-icon;base64," + data_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ html.encode("UTF-8"), ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ + def test_formaction_attribute_in_button_input(self): + # The formaction attribute overrides the form's action and should be + # treated as a malicious link attribute diff --git a/SOURCES/fix-threading-tests.patch b/SOURCES/fix-threading-tests.patch new file mode 100644 index 0000000..27a5052 --- /dev/null +++ b/SOURCES/fix-threading-tests.patch @@ -0,0 +1,26 @@ +diff --git a/src/lxml/tests/test_threading.py b/src/lxml/tests/test_threading.py +index 8948c3e..5ede3f8 100644 +--- a/src/lxml/tests/test_threading.py ++++ b/src/lxml/tests/test_threading.py +@@ -130,7 +130,7 @@ class ThreadingTestCase(HelperTestCase): + + + ''' + '\n'.join('' % i for i in range(200)) + ''' +- ++ + ''') + self.assertRaises(etree.XSLTParseError, + etree.XSLT, style) +@@ -153,9 +153,10 @@ class ThreadingTestCase(HelperTestCase): + self.assertTrue(len(log)) + if last_log is not None: + self.assertEqual(len(last_log), len(log)) +- self.assertEqual(4, len(log)) ++ self.assertTrue(len(log) >= 2, len(log)) + for error in log: +- self.assertTrue(':ERROR:XSLT:' in str(error)) ++ self.assertTrue(':ERROR:XSLT:' in str(error), str(error)) ++ self.assertTrue(any('UnExpectedElement' in str(error) for error in log), log) + last_log = log + + def test_thread_xslt_apply_error_log(self): diff --git a/SPECS/python-lxml.spec b/SPECS/python-lxml.spec index cc5da76..3247621 100644 --- a/SPECS/python-lxml.spec +++ b/SPECS/python-lxml.spec @@ -1,15 +1,8 @@ -%if 0%{?rhel} > 7 -# Disable python2 build by default -%bcond_with python2 -%else -%bcond_without python2 -%endif - %global modname lxml Name: python-%{modname} Version: 4.2.3 -Release: 3%{?dist} +Release: 4%{?dist} Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD @@ -28,6 +21,17 @@ Patch0: CVE-2020-27783.patch # Fixed upstream: https://github.com/lxml/lxml/commit/2d01a1ba8984e0483ce6619b972832377f208a0d Patch1: CVE-2021-28957.patch +# Fix for CVE-2021-43818: HTML Cleaner allows crafted +# and SVG embedded scripts to pass through +# Fixed upstream: +# https://github.com/lxml/lxml/commit/12fa9669007180a7bb87d990c375cf91ca5b664a +# https://github.com/lxml/lxml/commit/f2330237440df7e8f39c3ad1b1aa8852be3b27c0 +Patch2: CVE-2021-43818.patch + +# Make test more resilient against changes in latest libxslt releases +# Fixed upstream: https://github.com/lxml/lxml/commit/acef361ca80ff9afd828d91c98ea91c92f9d09af +Patch3: fix-threading-tests.patch + BuildRequires: gcc BuildRequires: libxml2-devel BuildRequires: libxslt-devel @@ -41,22 +45,6 @@ home page < or see our bug tracker at case you want to use the current ... %description %{_description} -%if %{with python2} -%package -n python2-%{modname} -Summary: %{summary} -BuildRequires: python2-devel -BuildRequires: python2-setuptools -BuildRequires: python2-Cython -Recommends: python2-cssselect -Recommends: python2-html5lib -Recommends: python2-beautifulsoup4 -%{?python_provide:%python_provide python2-%{modname}} - -%description -n python2-%{modname} %{_description} - -Python 2 version. -%endif # with python2 - %package -n python3-%{modname} Summary: %{summary} BuildRequires: python3-devel @@ -76,30 +64,17 @@ Python 3 version. %build export WITH_CYTHON=true -%if %{with python2} -%py2_build -%endif # with python2 %py3_build %install -%if %{with python2} -%py2_install -%endif # with python2 %py3_install %check -%if %{with python2} -%{__python2} setup.py test -%endif # with python2 -%{__python3} setup.py test - -%if %{with python2} -%files -n python2-%{modname} -%license doc/licenses/ZopePublicLicense.txt LICENSES.txt -%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt -%{python2_sitearch}/%{modname}/ -%{python2_sitearch}/%{modname}-*.egg-info/ -%endif # with python2 +# The tests assume inplace build, so we copy the built library to source-dir. +# If not done that, Python can either import the tests or the extension modules, but not both. +cp -a build/lib.%{python3_platform}-%{python3_version}/* src/ +# The options are: verbose, unit, functional +%{python3} test.py -vuf %files -n python3-%{modname} %license doc/licenses/ZopePublicLicense.txt LICENSES.txt @@ -108,6 +83,10 @@ export WITH_CYTHON=true %{python3_sitearch}/%{modname}-*.egg-info/ %changelog +* Thu Jan 06 2022 Charalampos Stratakis - 4.2.3-4 +- Security fix for CVE-2021-43818 +Resolves: rhbz#2032569 + * Wed Mar 24 2021 Charalampos Stratakis - 4.2.3-3 - Security fix for CVE-2021-28957 Resolves: rhbz#1941534