Tree - rpms/rh-python38-python-lxml

rpms / rh-python38-python-lxml

Blame SOURCES/CVE-2021-43818.patch

Blob History Raw

		62262c	`diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py`
		62262c	`index 15298b5..ee2f0f8 100644`
		62262c	`--- a/src/lxml/html/clean.py`
		62262c	`+++ b/src/lxml/html/clean.py`
		62262c	`@@ -73,18 +73,25 @@ _looks_like_tag_content = re.compile(`
		62262c
		62262c	`# All kinds of schemes besides just javascript: that can cause`
		62262c	`# execution:`
		62262c	`-_is_image_dataurl = re.compile(`
		62262c	`- r'^data:image/.+;base64', re.I).search`
		62262c	`+_find_image_dataurls = re.compile(`
		62262c	`+ r'^data:image/(.+);base64,', re.I).findall`
		62262c	`_is_possibly_malicious_scheme = re.compile(`
		62262c	`- r'(?:javascript\|jscript\|livescript\|vbscript\|data\|about\|mocha):',`
		62262c	`- re.I).search`
		62262c	`+ r'(javascript\|jscript\|livescript\|vbscript\|data\|about\|mocha):',`
		62262c	`+ re.I).findall`
		62262c	`+# SVG images can contain script content`
		62262c	`+_is_unsafe_image_type = re.compile(r"(xml\|svg)", re.I).findall`
		62262c	`+`
		62262c	`def _is_javascript_scheme(s):`
		62262c	`- if _is_image_dataurl(s):`
		62262c	`- return None`
		62262c	`- return _is_possibly_malicious_scheme(s)`
		62262c	`+ is_image_url = False`
		62262c	`+ for image_type in _find_image_dataurls(s):`
		62262c	`+ is_image_url = True`
		62262c	`+ if _is_unsafe_image_type(image_type):`
		62262c	`+ return True`
		62262c	`+ if is_image_url:`
		62262c	`+ return False`
		62262c	`+ return bool(_is_possibly_malicious_scheme(s))`
		62262c
		62262c	`_substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub`
		62262c	`-# FIXME: should data: be blocked?`
		62262c
		62262c	`# FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx`
		62262c	`_conditional_comment_re = re.compile(`
		62262c	`@@ -512,6 +519,8 @@ class Cleaner(object):`
		62262c	`return True`
		62262c	`if 'expression(' in style:`
		62262c	`return True`
		62262c	`+ if '@import' in style:`
		62262c	`+ return True`
		62262c	`if '`
		62262c	`# e.g. '">'`
		62262c	`return True`
		62262c	`diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py`
		62262c	`index d8df527..7021e48 100644`
		62262c	`--- a/src/lxml/html/tests/test_clean.py`
		62262c	`+++ b/src/lxml/html/tests/test_clean.py`
		62262c	`@@ -1,3 +1,5 @@`
		62262c	`+import base64`
		62262c	`+import gzip`
		62262c	`import unittest`
		62262c	`from lxml.tests.common_imports import make_doctest`
		62262c
		62262c	`@@ -89,6 +91,69 @@ class CleanerTest(unittest.TestCase):`
		62262c	`b'<math><style>/* deleted */</style></math>',`
		62262c	`lxml.html.tostring(clean_html(s)))`
		62262c
		62262c	`+ def test_sneaky_import_in_style(self):`
		62262c	`+ # Prevent "@@importimport" -> "@import" replacement.`
		62262c	`+ style_codes = [`
		62262c	`+ "@@importimport(extstyle.css)",`
		62262c	`+ "@ @ import import(extstyle.css)",`
		62262c	`+ "@ @ importimport(extstyle.css)",`
		62262c	`+ "@@ import import(extstyle.css)",`
		62262c	`+ "@ @import import(extstyle.css)",`
		62262c	`+ "@@importimport()",`
		62262c	`+ ]`
		62262c	`+ for style_code in style_codes:`
		62262c	`+ html = '<style>%s</style>' % style_code`
		62262c	`+ s = lxml.html.fragment_fromstring(html)`
		62262c	`+`
		62262c	`+ cleaned = lxml.html.tostring(clean_html(s))`
		62262c	`+ self.assertEqual(`
		62262c	`+ b'<style>/* deleted */</style>',`
		62262c	`+ cleaned,`
		62262c	`+ "%s -> %s" % (style_code, cleaned))`
		62262c	`+`
		62262c	`+ def test_svg_data_links(self):`
		62262c	`+ # Remove SVG images with potentially insecure content.`
		62262c	`+ svg = b'<svg onload="alert(123)" />'`
		62262c	`+ svgz = gzip.compress(svg)`
		62262c	`+ svg_b64 = base64.b64encode(svg).decode('ASCII')`
		62262c	`+ svgz_b64 = base64.b64encode(svgz).decode('ASCII')`
		62262c	`+ urls = [`
		62262c	`+ "data:image/svg+xml;base64," + svg_b64,`
		62262c	`+ "data:image/svg+xml-compressed;base64," + svgz_b64,`
		62262c	`+ ]`
		62262c	`+ for url in urls:`
		62262c	`+ html = '' % url`
		62262c	`+ s = lxml.html.fragment_fromstring(html)`
		62262c	`+`
		62262c	`+ cleaned = lxml.html.tostring(clean_html(s))`
		62262c	`+ self.assertEqual(`
		62262c	`+ b'',`
		62262c	`+ cleaned,`
		62262c	`+ "%s -> %s" % (url, cleaned))`
		62262c	`+`
		62262c	`+ def test_image_data_links(self):`
		62262c	`+ data = b'123'`
		62262c	`+ data_b64 = base64.b64encode(data).decode('ASCII')`
		62262c	`+ urls = [`
		62262c	`+ "data:image/jpeg;base64," + data_b64,`
		62262c	`+ "data:image/apng;base64," + data_b64,`
		62262c	`+ "data:image/png;base64," + data_b64,`
		62262c	`+ "data:image/gif;base64," + data_b64,`
		62262c	`+ "data:image/webp;base64," + data_b64,`
		62262c	`+ "data:image/bmp;base64," + data_b64,`
		62262c	`+ "data:image/tiff;base64," + data_b64,`
		62262c	`+ "data:image/x-icon;base64," + data_b64,`
		62262c	`+ ]`
		62262c	`+ for url in urls:`
		62262c	`+ html = '' % url`
		62262c	`+ s = lxml.html.fragment_fromstring(html)`
		62262c	`+`
		62262c	`+ cleaned = lxml.html.tostring(clean_html(s))`
		62262c	`+ self.assertEqual(`
		62262c	`+ html.encode("UTF-8"),`
		62262c	`+ cleaned,`
		62262c	`+ "%s -> %s" % (url, cleaned))`
		62262c	`+`
		62262c	`def test_formaction_attribute_in_button_input(self):`
		62262c	`# The formaction attribute overrides the form's action and should be`
		62262c	`# treated as a malicious link attribute`

rpms / rh-python38-python-lxml

Source Code

Blame SOURCES/CVE-2021-43818.patch