Blob Blame History Raw
From 374bf2ea399cd1f2abbc0890fe796d570416adea Mon Sep 17 00:00:00 2001
From: Ryan Petrello <rpetrell@redhat.com>
Date: Tue, 30 Apr 2019 12:36:48 -0400
Subject: [PATCH 1/2] prevent CVE-2019-9740 in 1.24.x

adapted from https://github.com/python/cpython/pull/12755
---
 test/test_util.py   | 5 +++++
 urllib3/util/url.py | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/test/test_util.py b/test/test_util.py
index c850d91..5fa49bb 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -142,6 +142,11 @@ class TestUtil(unittest.TestCase):
     def test_parse_url_invalid_IPv6(self):
         self.assertRaises(ValueError, parse_url, '[::1')
 
+    def test_parse_url_contains_control_characters(self):
+        # see CVE-2019-9740
+        with self.assertRaises(LocationParseError):
+            parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
+
     def test_Url_str(self):
         U = Url('http', host='google.com')
         self.assertEqual(str(U), U.url)
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
index b2ec834..7878892 100644
--- a/urllib3/util/url.py
+++ b/urllib3/util/url.py
@@ -1,10 +1,13 @@
 from collections import namedtuple
+import re
 
 from ..exceptions import LocationParseError
 
 
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
 
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
+
 
 class Url(namedtuple('Url', url_attrs)):
     """
@@ -142,6 +145,11 @@ def parse_url(url):
         # Empty
         return Url()
 
+    # Prevent CVE-2019-9740.
+    # adapted from https://github.com/python/cpython/pull/12755
+    if _contains_disallowed_url_pchar_re.search(url):
+        raise LocationParseError("URL can't contain control characters. {!r}".format(url))
+
     scheme = None
     auth = None
     host = None

From 5661da387242337e09a939120a1e154e7335d18b Mon Sep 17 00:00:00 2001
From: Ryan Petrello <lists@ryanpetrello.com>
Date: Wed, 1 May 2019 16:46:44 -0400
Subject: [PATCH 2/2] avoid CVE-2019-9740 by percent-encoding invalid path
 characters

this is to avoid breaking changes in downstream libraries like requests
---
 test/test_util.py   | 4 ++--
 urllib3/util/url.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_util.py b/test/test_util.py
index 5fa49bb..122e00b 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -144,8 +144,8 @@ class TestUtil(unittest.TestCase):
 
     def test_parse_url_contains_control_characters(self):
         # see CVE-2019-9740
-        with self.assertRaises(LocationParseError):
-            parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
+        url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
+        self.assertEqual(url.path, '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:')
 
     def test_Url_str(self):
         U = Url('http', host='google.com')
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
index 7878892..cc15b6b 100644
--- a/urllib3/util/url.py
+++ b/urllib3/util/url.py
@@ -2,6 +2,7 @@ from collections import namedtuple
 import re
 
 from ..exceptions import LocationParseError
+from six.moves.urllib.parse import quote
 
 
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
@@ -147,8 +148,7 @@ def parse_url(url):
 
     # Prevent CVE-2019-9740.
     # adapted from https://github.com/python/cpython/pull/12755
-    if _contains_disallowed_url_pchar_re.search(url):
-        raise LocationParseError("URL can't contain control characters. {!r}".format(url))
+    url = _contains_disallowed_url_pchar_re.sub(lambda match: quote(match.group()), url)
 
     scheme = None
     auth = None