From db649273fd6a2e3a624545b6fd14e8d8029198f8 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Thu, 3 Dec 2020 11:53:15 +0100 Subject: [PATCH] CVE-2020-27783 Combines fixes for the CVE from two versions: - Version 4.6.1: https://github.com/lxml/lxml/commit/89e7aad6e7ff9ecd88678ff25f885988b184b26e - Version 4.6.2: https://github.com/lxml/lxml/commit/a105ab8dc262ec6735977c25c13f0bdfcdec72a7 --- src/lxml/html/clean.py | 25 +++++++++++++++++-------- src/lxml/html/tests/test_clean.py | 21 +++++++++++++++++++++ src/lxml/html/tests/test_clean.txt | 12 ++++++++++-- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index aa9fc57..15298b5 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -61,12 +61,15 @@ __all__ = ['clean_html', 'clean', 'Cleaner', 'autolink', 'autolink_html', # This is an IE-specific construct you can have in a stylesheet to # run some Javascript: -_css_javascript_re = re.compile( - r'expression\s*\(.*?\)', re.S|re.I) +_replace_css_javascript = re.compile( + r'expression\s*\(.*?\)', re.S|re.I).sub # Do I have to worry about @\nimport? -_css_import_re = re.compile( - r'@\s*import', re.I) +_replace_css_import = re.compile( + r'@\s*import', re.I).sub + +_looks_like_tag_content = re.compile( + r'' + return True return False def clean_html(self, html): diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py index a193d99..ea7487c 100644 --- a/src/lxml/html/tests/test_clean.py +++ b/src/lxml/html/tests/test_clean.py @@ -69,6 +69,27 @@ class CleanerTest(unittest.TestCase): self.assertEqual('child', clean_html(s).text_content()) + def test_sneaky_noscript_in_style(self): + # This gets parsed as through into the output. + html = '', + lxml.html.tostring(clean_html(s))) + + def test_sneaky_js_in_math_style(self): + # This gets parsed as -> + # thus passing any tag/script/whatever content through into the output. + html = '' + s = lxml.html.fragment_fromstring(html) + + self.assertEqual( + b'', + lxml.html.tostring(clean_html(s))) + + def test_suite(): suite = unittest.TestSuite() suite.addTests([make_doctest('test_clean.txt')]) diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index 2824f64..7df1f1d 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -104,7 +104,11 @@ >>> print(Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc)) - + a link @@ -168,7 +172,11 @@ - + a link -- 2.28.0