diff --git a/SOURCES/CVE-2021-43818.patch b/SOURCES/CVE-2021-43818.patch new file mode 100644 index 0000000..18d9393 --- /dev/null +++ b/SOURCES/CVE-2021-43818.patch @@ -0,0 +1,127 @@ +diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py +index 15298b5..ee2f0f8 100644 +--- a/src/lxml/html/clean.py ++++ b/src/lxml/html/clean.py +@@ -73,18 +73,25 @@ _looks_like_tag_content = re.compile( + + # All kinds of schemes besides just javascript: that can cause + # execution: +-_is_image_dataurl = re.compile( +- r'^data:image/.+;base64', re.I).search ++_find_image_dataurls = re.compile( ++ r'^data:image/(.+);base64,', re.I).findall + _is_possibly_malicious_scheme = re.compile( +- r'(?:javascript|jscript|livescript|vbscript|data|about|mocha):', +- re.I).search ++ r'(javascript|jscript|livescript|vbscript|data|about|mocha):', ++ re.I).findall ++# SVG images can contain script content ++_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).findall ++ + def _is_javascript_scheme(s): +- if _is_image_dataurl(s): +- return None +- return _is_possibly_malicious_scheme(s) ++ is_image_url = False ++ for image_type in _find_image_dataurls(s): ++ is_image_url = True ++ if _is_unsafe_image_type(image_type): ++ return True ++ if is_image_url: ++ return False ++ return bool(_is_possibly_malicious_scheme(s)) + + _substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub +-# FIXME: should data: be blocked? + + # FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx + _conditional_comment_re = re.compile( +@@ -512,6 +519,8 @@ class Cleaner(object): + return True + if 'expression(' in style: + return True ++ if '@import' in style: ++ return True + if '', + lxml.html.tostring(clean_html(s))) + ++ def test_sneaky_import_in_style(self): ++ # Prevent "@@importimport" -> "@import" replacement. ++ style_codes = [ ++ "@@importimport(extstyle.css)", ++ "@ @ import import(extstyle.css)", ++ "@ @ importimport(extstyle.css)", ++ "@@ import import(extstyle.css)", ++ "@ @import import(extstyle.css)", ++ "@@importimport()", ++ ] ++ for style_code in style_codes: ++ html = '' % style_code ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (style_code, cleaned)) ++ ++ def test_svg_data_links(self): ++ # Remove SVG images with potentially insecure content. ++ svg = b'' ++ svgz = gzip.compress(svg) ++ svg_b64 = base64.b64encode(svg).decode('ASCII') ++ svgz_b64 = base64.b64encode(svgz).decode('ASCII') ++ urls = [ ++ "data:image/svg+xml;base64," + svg_b64, ++ "data:image/svg+xml-compressed;base64," + svgz_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ ++ def test_image_data_links(self): ++ data = b'123' ++ data_b64 = base64.b64encode(data).decode('ASCII') ++ urls = [ ++ "data:image/jpeg;base64," + data_b64, ++ "data:image/apng;base64," + data_b64, ++ "data:image/png;base64," + data_b64, ++ "data:image/gif;base64," + data_b64, ++ "data:image/webp;base64," + data_b64, ++ "data:image/bmp;base64," + data_b64, ++ "data:image/tiff;base64," + data_b64, ++ "data:image/x-icon;base64," + data_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ html.encode("UTF-8"), ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ + def test_formaction_attribute_in_button_input(self): + # The formaction attribute overrides the form's action and should be + # treated as a malicious link attribute diff --git a/SPECS/python-lxml.spec b/SPECS/python-lxml.spec index b380b7a..5ff52c5 100644 --- a/SPECS/python-lxml.spec +++ b/SPECS/python-lxml.spec @@ -4,7 +4,7 @@ Name: python-%{modname} Version: 4.4.1 -Release: 6%{?dist} +Release: 7%{?dist} Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD @@ -23,6 +23,13 @@ Patch0: CVE-2020-27783.patch # Fixed upstream: https://github.com/lxml/lxml/commit/2d01a1ba8984e0483ce6619b972832377f208a0d Patch1: CVE-2021-28957.patch +# Fix for CVE-2021-43818: HTML Cleaner allows crafted +# and SVG embedded scripts to pass through +# Fixed upstream: +# https://github.com/lxml/lxml/commit/12fa9669007180a7bb87d990c375cf91ca5b664a +# https://github.com/lxml/lxml/commit/f2330237440df7e8f39c3ad1b1aa8852be3b27c0 +Patch2: CVE-2021-43818.patch + # Exclude i686 arch. Due to a modularity issue it's being added to the # x86_64 compose of CRB, but we don't want to ship it at all. # See: https://projects.engineering.redhat.com/browse/RCM-72605 @@ -41,21 +48,6 @@ home page < or see our bug tracker at case you want to use the current ... %description %{_description} -%if %{with python2} -%package -n python2-%{modname} -Summary: %{summary} -BuildRequires: python2-devel -BuildRequires: python2-setuptools -Suggests: python%{python2_version}dist(cssselect) >= 0.7 -Suggests: python%{python2_version}dist(html5lib) -Suggests: python%{python2_version}dist(beautifulsoup4) -%{?python_provide:%python_provide python2-%{modname}} - -%description -n python2-%{modname} %{_description} - -Python 2 version. -%endif - %package -n python%{python3_pkgversion}-%{modname} Summary: %{summary} BuildRequires: python%{python3_pkgversion}-devel @@ -77,30 +69,18 @@ Python 3 version. find -type f -name '*.c' -print -delete %build -env WITH_CYTHON=true %py3_build -%if %{with python2} -%py2_build -%endif +export WITH_CYTHON=true +%py3_build %install -%if %{with python2} -%py2_install -%endif %py3_install %check -%if %{with python2} -%{__python2} setup.py test -%endif -%{__python3} setup.py test - -%if %{with python2} -%files -n python2-%{modname} -%license doc/licenses/ZopePublicLicense.txt LICENSES.txt -%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt -%{python2_sitearch}/%{modname}/ -%{python2_sitearch}/%{modname}-*.egg-info/ -%endif +# The tests assume inplace build, so we copy the built library to source-dir. +# If not done that, Python can either import the tests or the extension modules, but not both. +cp -a build/lib.%{python3_platform}-%{python3_version}/* src/ +# The options are: verbose, unit, functional +%{python3} test.py -vuf %files -n python%{python3_pkgversion}-%{modname} %license doc/licenses/ZopePublicLicense.txt LICENSES.txt @@ -109,6 +89,10 @@ env WITH_CYTHON=true %py3_build %{python3_sitearch}/%{modname}-*.egg-info/ %changelog +* Thu Jan 06 2022 Charalampos Stratakis - 4.4.1-7 +- Security fix for CVE-2021-43818 +Resolves: rhbz#2032569 + * Wed Mar 24 2021 Charalampos Stratakis - 4.4.1-6 - Security fix for CVE-2021-28957 Resolves: rhbz#1941534