From 374bf2ea399cd1f2abbc0890fe796d570416adea Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Tue, 30 Apr 2019 12:36:48 -0400 Subject: [PATCH 1/2] prevent CVE-2019-9740 in 1.24.x adapted from https://github.com/python/cpython/pull/12755 --- test/test_util.py | 5 +++++ urllib3/util/url.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/test/test_util.py b/test/test_util.py index c850d91..5fa49bb 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -142,6 +142,11 @@ class TestUtil(unittest.TestCase): def test_parse_url_invalid_IPv6(self): self.assertRaises(ValueError, parse_url, '[::1') + def test_parse_url_contains_control_characters(self): + # see CVE-2019-9740 + with self.assertRaises(LocationParseError): + parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') + def test_Url_str(self): U = Url('http', host='google.com') self.assertEqual(str(U), U.url) diff --git a/urllib3/util/url.py b/urllib3/util/url.py index b2ec834..7878892 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -1,10 +1,13 @@ from collections import namedtuple +import re from ..exceptions import LocationParseError url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] +_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]') + class Url(namedtuple('Url', url_attrs)): """ @@ -142,6 +145,11 @@ def parse_url(url): # Empty return Url() + # Prevent CVE-2019-9740. + # adapted from https://github.com/python/cpython/pull/12755 + if _contains_disallowed_url_pchar_re.search(url): + raise LocationParseError("URL can't contain control characters. {!r}".format(url)) + scheme = None auth = None host = None From 5661da387242337e09a939120a1e154e7335d18b Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Wed, 1 May 2019 16:46:44 -0400 Subject: [PATCH 2/2] avoid CVE-2019-9740 by percent-encoding invalid path characters this is to avoid breaking changes in downstream libraries like requests --- test/test_util.py | 4 ++-- urllib3/util/url.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 5fa49bb..122e00b 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -144,8 +144,8 @@ class TestUtil(unittest.TestCase): def test_parse_url_contains_control_characters(self): # see CVE-2019-9740 - with self.assertRaises(LocationParseError): - parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') + url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') + self.assertEqual(url.path, '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:') def test_Url_str(self): U = Url('http', host='google.com') diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 7878892..cc15b6b 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,6 +2,7 @@ from collections import namedtuple import re from ..exceptions import LocationParseError +from six.moves.urllib.parse import quote url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -147,8 +148,7 @@ def parse_url(url): # Prevent CVE-2019-9740. # adapted from https://github.com/python/cpython/pull/12755 - if _contains_disallowed_url_pchar_re.search(url): - raise LocationParseError("URL can't contain control characters. {!r}".format(url)) + url = _contains_disallowed_url_pchar_re.sub(lambda match: quote(match.group()), url) scheme = None auth = None