From 374bf2ea399cd1f2abbc0890fe796d570416adea Mon Sep 17 00:00:00 2001
From: Ryan Petrello <rpetrell@redhat.com>
Date: Tue, 30 Apr 2019 12:36:48 -0400
Subject: [PATCH 1/2] prevent CVE-2019-9740 in 1.24.x
adapted from https://github.com/python/cpython/pull/12755
---
test/test_util.py | 5 +++++
urllib3/util/url.py | 8 ++++++++
2 files changed, 13 insertions(+)
diff --git a/test/test_util.py b/test/test_util.py
index c850d91..5fa49bb 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -142,6 +142,11 @@ class TestUtil(unittest.TestCase):
def test_parse_url_invalid_IPv6(self):
self.assertRaises(ValueError, parse_url, '[::1')
+ def test_parse_url_contains_control_characters(self):
+ # see CVE-2019-9740
+ with self.assertRaises(LocationParseError):
+ parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
+
def test_Url_str(self):
U = Url('http', host='google.com')
self.assertEqual(str(U), U.url)
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
index b2ec834..7878892 100644
--- a/urllib3/util/url.py
+++ b/urllib3/util/url.py
@@ -1,10 +1,13 @@
from collections import namedtuple
+import re
from ..exceptions import LocationParseError
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
+
class Url(namedtuple('Url', url_attrs)):
"""
@@ -142,6 +145,11 @@ def parse_url(url):
# Empty
return Url()
+ # Prevent CVE-2019-9740.
+ # adapted from https://github.com/python/cpython/pull/12755
+ if _contains_disallowed_url_pchar_re.search(url):
+ raise LocationParseError("URL can't contain control characters. {!r}".format(url))
+
scheme = None
auth = None
host = None
From 5661da387242337e09a939120a1e154e7335d18b Mon Sep 17 00:00:00 2001
From: Ryan Petrello <lists@ryanpetrello.com>
Date: Wed, 1 May 2019 16:46:44 -0400
Subject: [PATCH 2/2] avoid CVE-2019-9740 by percent-encoding invalid path
characters
this is to avoid breaking changes in downstream libraries like requests
---
test/test_util.py | 4 ++--
urllib3/util/url.py | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/test/test_util.py b/test/test_util.py
index 5fa49bb..122e00b 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -144,8 +144,8 @@ class TestUtil(unittest.TestCase):
def test_parse_url_contains_control_characters(self):
# see CVE-2019-9740
- with self.assertRaises(LocationParseError):
- parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
+ url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:')
+ self.assertEqual(url.path, '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:')
def test_Url_str(self):
U = Url('http', host='google.com')
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
index 7878892..cc15b6b 100644
--- a/urllib3/util/url.py
+++ b/urllib3/util/url.py
@@ -2,6 +2,7 @@ from collections import namedtuple
import re
from ..exceptions import LocationParseError
+from six.moves.urllib.parse import quote
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
@@ -147,8 +148,7 @@ def parse_url(url):
# Prevent CVE-2019-9740.
# adapted from https://github.com/python/cpython/pull/12755
- if _contains_disallowed_url_pchar_re.search(url):
- raise LocationParseError("URL can't contain control characters. {!r}".format(url))
+ url = _contains_disallowed_url_pchar_re.sub(lambda match: quote(match.group()), url)
scheme = None
auth = None