Tree - rpms/python-lxml - CentOS Git server

rpms / python-lxml

Blame SOURCES/CVE-2021-43818.patch

Blob History Raw

		1c4fe0	`diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py`
		1c4fe0	`index 15298b5..ee2f0f8 100644`
		1c4fe0	`--- a/src/lxml/html/clean.py`
		1c4fe0	`+++ b/src/lxml/html/clean.py`
		1c4fe0	`@@ -73,18 +73,25 @@ _looks_like_tag_content = re.compile(`
		1c4fe0
		1c4fe0	`# All kinds of schemes besides just javascript: that can cause`
		1c4fe0	`# execution:`
		1c4fe0	`-_is_image_dataurl = re.compile(`
		1c4fe0	`- r'^data:image/.+;base64', re.I).search`
		1c4fe0	`+_find_image_dataurls = re.compile(`
		1c4fe0	`+ r'^data:image/(.+);base64,', re.I).findall`
		1c4fe0	`_is_possibly_malicious_scheme = re.compile(`
		1c4fe0	`- r'(?:javascript\|jscript\|livescript\|vbscript\|data\|about\|mocha):',`
		1c4fe0	`- re.I).search`
		1c4fe0	`+ r'(javascript\|jscript\|livescript\|vbscript\|data\|about\|mocha):',`
		1c4fe0	`+ re.I).findall`
		1c4fe0	`+# SVG images can contain script content`
		1c4fe0	`+_is_unsafe_image_type = re.compile(r"(xml\|svg)", re.I).findall`
		1c4fe0	`+`
		1c4fe0	`def _is_javascript_scheme(s):`
		1c4fe0	`- if _is_image_dataurl(s):`
		1c4fe0	`- return None`
		1c4fe0	`- return _is_possibly_malicious_scheme(s)`
		1c4fe0	`+ is_image_url = False`
		1c4fe0	`+ for image_type in _find_image_dataurls(s):`
		1c4fe0	`+ is_image_url = True`
		1c4fe0	`+ if _is_unsafe_image_type(image_type):`
		1c4fe0	`+ return True`
		1c4fe0	`+ if is_image_url:`
		1c4fe0	`+ return False`
		1c4fe0	`+ return bool(_is_possibly_malicious_scheme(s))`
		1c4fe0
		1c4fe0	`_substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub`
		1c4fe0	`-# FIXME: should data: be blocked?`
		1c4fe0
		1c4fe0	`# FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx`
		1c4fe0	`_conditional_comment_re = re.compile(`
		1c4fe0	`@@ -512,6 +519,8 @@ class Cleaner(object):`
		1c4fe0	`return True`
		1c4fe0	`if 'expression(' in style:`
		1c4fe0	`return True`
		1c4fe0	`+ if '@import' in style:`
		1c4fe0	`+ return True`
		1c4fe0	`if '`
		1c4fe0	`# e.g. '">'`
		1c4fe0	`return True`
		1c4fe0	`diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py`
		1c4fe0	`index d8df527..7021e48 100644`
		1c4fe0	`--- a/src/lxml/html/tests/test_clean.py`
		1c4fe0	`+++ b/src/lxml/html/tests/test_clean.py`
		1c4fe0	`@@ -1,3 +1,5 @@`
		1c4fe0	`+import base64`
		1c4fe0	`+import gzip`
		1c4fe0	`import unittest`
		1c4fe0	`from lxml.tests.common_imports import make_doctest`
		1c4fe0
		1c4fe0	`@@ -89,6 +91,69 @@ class CleanerTest(unittest.TestCase):`
		1c4fe0	`b'<math><style>/* deleted */</style></math>',`
		1c4fe0	`lxml.html.tostring(clean_html(s)))`
		1c4fe0
		1c4fe0	`+ def test_sneaky_import_in_style(self):`
		1c4fe0	`+ # Prevent "@@importimport" -> "@import" replacement.`
		1c4fe0	`+ style_codes = [`
		1c4fe0	`+ "@@importimport(extstyle.css)",`
		1c4fe0	`+ "@ @ import import(extstyle.css)",`
		1c4fe0	`+ "@ @ importimport(extstyle.css)",`
		1c4fe0	`+ "@@ import import(extstyle.css)",`
		1c4fe0	`+ "@ @import import(extstyle.css)",`
		1c4fe0	`+ "@@importimport()",`
		1c4fe0	`+ ]`
		1c4fe0	`+ for style_code in style_codes:`
		1c4fe0	`+ html = '<style>%s</style>' % style_code`
		1c4fe0	`+ s = lxml.html.fragment_fromstring(html)`
		1c4fe0	`+`
		1c4fe0	`+ cleaned = lxml.html.tostring(clean_html(s))`
		1c4fe0	`+ self.assertEqual(`
		1c4fe0	`+ b'<style>/* deleted */</style>',`
		1c4fe0	`+ cleaned,`
		1c4fe0	`+ "%s -> %s" % (style_code, cleaned))`
		1c4fe0	`+`
		1c4fe0	`+ def test_svg_data_links(self):`
		1c4fe0	`+ # Remove SVG images with potentially insecure content.`
		1c4fe0	`+ svg = b'<svg onload="alert(123)" />'`
		1c4fe0	`+ svgz = gzip.compress(svg)`
		1c4fe0	`+ svg_b64 = base64.b64encode(svg).decode('ASCII')`
		1c4fe0	`+ svgz_b64 = base64.b64encode(svgz).decode('ASCII')`
		1c4fe0	`+ urls = [`
		1c4fe0	`+ "data:image/svg+xml;base64," + svg_b64,`
		1c4fe0	`+ "data:image/svg+xml-compressed;base64," + svgz_b64,`
		1c4fe0	`+ ]`
		1c4fe0	`+ for url in urls:`
		1c4fe0	`+ html = '' % url`
		1c4fe0	`+ s = lxml.html.fragment_fromstring(html)`
		1c4fe0	`+`
		1c4fe0	`+ cleaned = lxml.html.tostring(clean_html(s))`
		1c4fe0	`+ self.assertEqual(`
		1c4fe0	`+ b'',`
		1c4fe0	`+ cleaned,`
		1c4fe0	`+ "%s -> %s" % (url, cleaned))`
		1c4fe0	`+`
		1c4fe0	`+ def test_image_data_links(self):`
		1c4fe0	`+ data = b'123'`
		1c4fe0	`+ data_b64 = base64.b64encode(data).decode('ASCII')`
		1c4fe0	`+ urls = [`
		1c4fe0	`+ "data:image/jpeg;base64," + data_b64,`
		1c4fe0	`+ "data:image/apng;base64," + data_b64,`
		1c4fe0	`+ "data:image/png;base64," + data_b64,`
		1c4fe0	`+ "data:image/gif;base64," + data_b64,`
		1c4fe0	`+ "data:image/webp;base64," + data_b64,`
		1c4fe0	`+ "data:image/bmp;base64," + data_b64,`
		1c4fe0	`+ "data:image/tiff;base64," + data_b64,`
		1c4fe0	`+ "data:image/x-icon;base64," + data_b64,`
		1c4fe0	`+ ]`
		1c4fe0	`+ for url in urls:`
		1c4fe0	`+ html = '' % url`
		1c4fe0	`+ s = lxml.html.fragment_fromstring(html)`
		1c4fe0	`+`
		1c4fe0	`+ cleaned = lxml.html.tostring(clean_html(s))`
		1c4fe0	`+ self.assertEqual(`
		1c4fe0	`+ html.encode("UTF-8"),`
		1c4fe0	`+ cleaned,`
		1c4fe0	`+ "%s -> %s" % (url, cleaned))`
		1c4fe0	`+`
		1c4fe0	`def test_formaction_attribute_in_button_input(self):`
		1c4fe0	`# The formaction attribute overrides the form's action and should be`
		1c4fe0	`# treated as a malicious link attribute`

rpms / python-lxml

Source Code

Blame SOURCES/CVE-2021-43818.patch