10f306
From 374bf2ea399cd1f2abbc0890fe796d570416adea Mon Sep 17 00:00:00 2001
10f306
From: Ryan Petrello <rpetrell@redhat.com>
10f306
Date: Tue, 30 Apr 2019 12:36:48 -0400
10f306
Subject: [PATCH 1/2] prevent CVE-2019-9740 in 1.24.x
10f306
10f306
adapted from https://github.com/python/cpython/pull/12755
10f306
---
10f306
 test/test_util.py   | 5 +++++
10f306
 urllib3/util/url.py | 8 ++++++++
10f306
 2 files changed, 13 insertions(+)
10f306
10f306
diff --git a/test/test_util.py b/test/test_util.py
10f306
index c850d91..5fa49bb 100644
10f306
--- a/test/test_util.py
10f306
+++ b/test/test_util.py
10f306
@@ -142,6 +142,11 @@ class TestUtil(unittest.TestCase):
10f306
     def test_parse_url_invalid_IPv6(self):
10f306
         self.assertRaises(ValueError, parse_url, '[::1')
10f306
 
10f306
+    def test_parse_url_contains_control_characters(self):
10f306
+        # see CVE-2019-9740
10f306
+        with self.assertRaises(LocationParseError):
10f306
+            parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
10f306
+
10f306
     def test_Url_str(self):
10f306
         U = Url('http', host='google.com')
10f306
         self.assertEqual(str(U), U.url)
10f306
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
10f306
index b2ec834..7878892 100644
10f306
--- a/urllib3/util/url.py
10f306
+++ b/urllib3/util/url.py
10f306
@@ -1,10 +1,13 @@
10f306
 from collections import namedtuple
10f306
+import re
10f306
 
10f306
 from ..exceptions import LocationParseError
10f306
 
10f306
 
10f306
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
10f306
 
10f306
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
10f306
+
10f306
 
10f306
 class Url(namedtuple('Url', url_attrs)):
10f306
     """
10f306
@@ -142,6 +145,11 @@ def parse_url(url):
10f306
         # Empty
10f306
         return Url()
10f306
 
10f306
+    # Prevent CVE-2019-9740.
10f306
+    # adapted from https://github.com/python/cpython/pull/12755
10f306
+    if _contains_disallowed_url_pchar_re.search(url):
10f306
+        raise LocationParseError("URL can't contain control characters. {!r}".format(url))
10f306
+
10f306
     scheme = None
10f306
     auth = None
10f306
     host = None
10f306
10f306
From 5661da387242337e09a939120a1e154e7335d18b Mon Sep 17 00:00:00 2001
10f306
From: Ryan Petrello <lists@ryanpetrello.com>
10f306
Date: Wed, 1 May 2019 16:46:44 -0400
10f306
Subject: [PATCH 2/2] avoid CVE-2019-9740 by percent-encoding invalid path
10f306
 characters
10f306
10f306
this is to avoid breaking changes in downstream libraries like requests
10f306
---
10f306
 test/test_util.py   | 4 ++--
10f306
 urllib3/util/url.py | 4 ++--
10f306
 2 files changed, 4 insertions(+), 4 deletions(-)
10f306
10f306
diff --git a/test/test_util.py b/test/test_util.py
10f306
index 5fa49bb..122e00b 100644
10f306
--- a/test/test_util.py
10f306
+++ b/test/test_util.py
10f306
@@ -144,8 +144,8 @@ class TestUtil(unittest.TestCase):
10f306
 
10f306
     def test_parse_url_contains_control_characters(self):
10f306
         # see CVE-2019-9740
10f306
-        with self.assertRaises(LocationParseError):
10f306
-            parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
10f306
+        url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
10f306
+        self.assertEqual(url.path, '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:')
10f306
 
10f306
     def test_Url_str(self):
10f306
         U = Url('http', host='google.com')
10f306
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
10f306
index 7878892..cc15b6b 100644
10f306
--- a/urllib3/util/url.py
10f306
+++ b/urllib3/util/url.py
10f306
@@ -2,6 +2,7 @@ from collections import namedtuple
10f306
 import re
10f306
 
10f306
 from ..exceptions import LocationParseError
10f306
+from six.moves.urllib.parse import quote
10f306
 
10f306
 
10f306
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
10f306
@@ -147,8 +148,7 @@ def parse_url(url):
10f306
 
10f306
     # Prevent CVE-2019-9740.
10f306
     # adapted from https://github.com/python/cpython/pull/12755
10f306
-    if _contains_disallowed_url_pchar_re.search(url):
10f306
-        raise LocationParseError("URL can't contain control characters. {!r}".format(url))
10f306
+    url = _contains_disallowed_url_pchar_re.sub(lambda match: quote(match.group()), url)
10f306
 
10f306
     scheme = None
10f306
     auth = None