38727c
From 374bf2ea399cd1f2abbc0890fe796d570416adea Mon Sep 17 00:00:00 2001
38727c
From: Ryan Petrello <rpetrell@redhat.com>
38727c
Date: Tue, 30 Apr 2019 12:36:48 -0400
38727c
Subject: [PATCH 1/2] prevent CVE-2019-9740 in 1.24.x
38727c
38727c
adapted from https://github.com/python/cpython/pull/12755
38727c
---
38727c
 test/test_util.py   | 5 +++++
38727c
 urllib3/util/url.py | 8 ++++++++
38727c
 2 files changed, 13 insertions(+)
38727c
38727c
diff --git a/test/test_util.py b/test/test_util.py
38727c
index c850d91..5fa49bb 100644
38727c
--- a/test/test_util.py
38727c
+++ b/test/test_util.py
38727c
@@ -142,6 +142,11 @@ class TestUtil(unittest.TestCase):
38727c
     def test_parse_url_invalid_IPv6(self):
38727c
         self.assertRaises(ValueError, parse_url, '[::1')
38727c
 
38727c
+    def test_parse_url_contains_control_characters(self):
38727c
+        # see CVE-2019-9740
38727c
+        with self.assertRaises(LocationParseError):
38727c
+            parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
38727c
+
38727c
     def test_Url_str(self):
38727c
         U = Url('http', host='google.com')
38727c
         self.assertEqual(str(U), U.url)
38727c
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
38727c
index b2ec834..7878892 100644
38727c
--- a/urllib3/util/url.py
38727c
+++ b/urllib3/util/url.py
38727c
@@ -1,10 +1,13 @@
38727c
 from collections import namedtuple
38727c
+import re
38727c
 
38727c
 from ..exceptions import LocationParseError
38727c
 
38727c
 
38727c
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
38727c
 
38727c
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
38727c
+
38727c
 
38727c
 class Url(namedtuple('Url', url_attrs)):
38727c
     """
38727c
@@ -142,6 +145,11 @@ def parse_url(url):
38727c
         # Empty
38727c
         return Url()
38727c
 
38727c
+    # Prevent CVE-2019-9740.
38727c
+    # adapted from https://github.com/python/cpython/pull/12755
38727c
+    if _contains_disallowed_url_pchar_re.search(url):
38727c
+        raise LocationParseError("URL can't contain control characters. {!r}".format(url))
38727c
+
38727c
     scheme = None
38727c
     auth = None
38727c
     host = None
38727c
38727c
From 5661da387242337e09a939120a1e154e7335d18b Mon Sep 17 00:00:00 2001
38727c
From: Ryan Petrello <lists@ryanpetrello.com>
38727c
Date: Wed, 1 May 2019 16:46:44 -0400
38727c
Subject: [PATCH 2/2] avoid CVE-2019-9740 by percent-encoding invalid path
38727c
 characters
38727c
38727c
this is to avoid breaking changes in downstream libraries like requests
38727c
---
38727c
 test/test_util.py   | 4 ++--
38727c
 urllib3/util/url.py | 4 ++--
38727c
 2 files changed, 4 insertions(+), 4 deletions(-)
38727c
38727c
diff --git a/test/test_util.py b/test/test_util.py
38727c
index 5fa49bb..122e00b 100644
38727c
--- a/test/test_util.py
38727c
+++ b/test/test_util.py
38727c
@@ -144,8 +144,8 @@ class TestUtil(unittest.TestCase):
38727c
 
38727c
     def test_parse_url_contains_control_characters(self):
38727c
         # see CVE-2019-9740
38727c
-        with self.assertRaises(LocationParseError):
38727c
-            parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
38727c
+        url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
38727c
+        self.assertEqual(url.path, '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:')
38727c
 
38727c
     def test_Url_str(self):
38727c
         U = Url('http', host='google.com')
38727c
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
38727c
index 7878892..cc15b6b 100644
38727c
--- a/urllib3/util/url.py
38727c
+++ b/urllib3/util/url.py
38727c
@@ -2,6 +2,7 @@ from collections import namedtuple
38727c
 import re
38727c
 
38727c
 from ..exceptions import LocationParseError
38727c
+from six.moves.urllib.parse import quote
38727c
 
38727c
 
38727c
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
38727c
@@ -147,8 +148,7 @@ def parse_url(url):
38727c
 
38727c
     # Prevent CVE-2019-9740.
38727c
     # adapted from https://github.com/python/cpython/pull/12755
38727c
-    if _contains_disallowed_url_pchar_re.search(url):
38727c
-        raise LocationParseError("URL can't contain control characters. {!r}".format(url))
38727c
+    url = _contains_disallowed_url_pchar_re.sub(lambda match: quote(match.group()), url)
38727c
 
38727c
     scheme = None
38727c
     auth = None