|
|
b54164 |
diff --git a/Lib/httplib.py b/Lib/httplib.py
|
|
|
b54164 |
index 60a8fb4..1b41c34 100644
|
|
|
b54164 |
--- a/Lib/httplib.py
|
|
|
b54164 |
+++ b/Lib/httplib.py
|
|
|
b54164 |
@@ -247,6 +247,16 @@ _MAXHEADERS = 100
|
|
|
b54164 |
_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
|
|
|
b54164 |
_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
|
|
|
b54164 |
|
|
|
b54164 |
+# These characters are not allowed within HTTP URL paths.
|
|
|
b54164 |
+# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
|
|
|
b54164 |
+# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
|
|
|
b54164 |
+# Prevents CVE-2019-9740. Includes control characters such as \r\n.
|
|
|
b54164 |
+# Restrict non-ASCII characters above \x7f (0x80-0xff).
|
|
|
b54164 |
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\xff]')
|
|
|
b54164 |
+# Arguably only these _should_ allowed:
|
|
|
b54164 |
+# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
|
|
|
b54164 |
+# We are more lenient for assumed real world compatibility purposes.
|
|
|
b54164 |
+
|
|
|
b54164 |
# We always set the Content-Length header for these methods because some
|
|
|
b54164 |
# servers will otherwise respond with a 411
|
|
|
b54164 |
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
|
|
|
b54164 |
@@ -927,6 +937,12 @@ class HTTPConnection:
|
|
|
b54164 |
self._method = method
|
|
|
b54164 |
if not url:
|
|
|
b54164 |
url = '/'
|
|
|
b54164 |
+ # Prevent CVE-2019-9740.
|
|
|
b54164 |
+ match = _contains_disallowed_url_pchar_re.search(url)
|
|
|
b54164 |
+ if match:
|
|
|
b54164 |
+ raise InvalidURL("URL can't contain control characters. %r "
|
|
|
b54164 |
+ "(found at least %r)"
|
|
|
b54164 |
+ % (url, match.group()))
|
|
|
b54164 |
hdr = '%s %s %s' % (method, url, self._http_vsn_str)
|
|
|
b54164 |
|
|
|
b54164 |
self._output(hdr)
|
|
|
b54164 |
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
|
|
|
706c86 |
index 1ce9201..d7778d4 100644
|
|
|
b54164 |
--- a/Lib/test/test_urllib.py
|
|
|
b54164 |
+++ b/Lib/test/test_urllib.py
|
|
|
706c86 |
@@ -257,6 +257,31 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
|
|
|
b54164 |
finally:
|
|
|
b54164 |
self.unfakehttp()
|
|
|
b54164 |
|
|
|
b54164 |
+ def test_url_with_control_char_rejected(self):
|
|
|
b54164 |
+ for char_no in range(0, 0x21) + range(0x7f, 0x100):
|
|
|
b54164 |
+ char = chr(char_no)
|
|
|
b54164 |
+ schemeless_url = "//localhost:7777/test%s/" % char
|
|
|
b54164 |
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
|
|
|
b54164 |
+ try:
|
|
|
b54164 |
+ # urllib quotes the URL so there is no injection.
|
|
|
b54164 |
+ resp = urllib.urlopen("http:" + schemeless_url)
|
|
|
b54164 |
+ self.assertNotIn(char, resp.geturl())
|
|
|
b54164 |
+ finally:
|
|
|
b54164 |
+ self.unfakehttp()
|
|
|
b54164 |
+
|
|
|
b54164 |
+ def test_url_with_newline_header_injection_rejected(self):
|
|
|
b54164 |
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
|
|
|
b54164 |
+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
|
|
|
b54164 |
+ schemeless_url = "//" + host + ":8080/test/?test=a"
|
|
|
b54164 |
+ try:
|
|
|
b54164 |
+ # urllib quotes the URL so there is no injection.
|
|
|
b54164 |
+ resp = urllib.urlopen("http:" + schemeless_url)
|
|
|
b54164 |
+ self.assertNotIn(' ', resp.geturl())
|
|
|
b54164 |
+ self.assertNotIn('\r', resp.geturl())
|
|
|
b54164 |
+ self.assertNotIn('\n', resp.geturl())
|
|
|
b54164 |
+ finally:
|
|
|
b54164 |
+ self.unfakehttp()
|
|
|
b54164 |
+
|
|
|
b54164 |
def test_read_bogus(self):
|
|
|
b54164 |
# urlopen() should raise IOError for many error codes.
|
|
|
b54164 |
self.fakehttp('''HTTP/1.1 401 Authentication Required
|
|
|
b54164 |
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
|
|
|
706c86 |
index 6d24d5d..9531818 100644
|
|
|
b54164 |
--- a/Lib/test/test_urllib2.py
|
|
|
b54164 |
+++ b/Lib/test/test_urllib2.py
|
|
|
b54164 |
@@ -15,6 +15,9 @@ try:
|
|
|
b54164 |
except ImportError:
|
|
|
b54164 |
ssl = None
|
|
|
b54164 |
|
|
|
b54164 |
+from test.test_urllib import FakeHTTPMixin
|
|
|
b54164 |
+
|
|
|
b54164 |
+
|
|
|
b54164 |
# XXX
|
|
|
b54164 |
# Request
|
|
|
b54164 |
# CacheFTPHandler (hard to write)
|
|
|
b54164 |
@@ -1262,7 +1265,7 @@ class HandlerTests(unittest.TestCase):
|
|
|
b54164 |
self.assertEqual(len(http_handler.requests), 1)
|
|
|
b54164 |
self.assertFalse(http_handler.requests[0].has_header(auth_header))
|
|
|
b54164 |
|
|
|
b54164 |
-class MiscTests(unittest.TestCase):
|
|
|
b54164 |
+class MiscTests(unittest.TestCase, FakeHTTPMixin):
|
|
|
b54164 |
|
|
|
b54164 |
def test_build_opener(self):
|
|
|
b54164 |
class MyHTTPHandler(urllib2.HTTPHandler): pass
|
|
|
b54164 |
@@ -1317,6 +1320,52 @@ class MiscTests(unittest.TestCase):
|
|
|
b54164 |
"Unsupported digest authentication algorithm 'invalid'"
|
|
|
b54164 |
)
|
|
|
b54164 |
|
|
|
b54164 |
+ @unittest.skipUnless(ssl, "ssl module required")
|
|
|
b54164 |
+ def test_url_with_control_char_rejected(self):
|
|
|
b54164 |
+ for char_no in range(0, 0x21) + range(0x7f, 0x100):
|
|
|
b54164 |
+ char = chr(char_no)
|
|
|
b54164 |
+ schemeless_url = "//localhost:7777/test%s/" % char
|
|
|
b54164 |
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
|
|
|
b54164 |
+ try:
|
|
|
b54164 |
+ # We explicitly test urllib.request.urlopen() instead of the top
|
|
|
b54164 |
+ # level 'def urlopen()' function defined in this... (quite ugly)
|
|
|
b54164 |
+ # test suite. They use different url opening codepaths. Plain
|
|
|
b54164 |
+ # urlopen uses FancyURLOpener which goes via a codepath that
|
|
|
b54164 |
+ # calls urllib.parse.quote() on the URL which makes all of the
|
|
|
b54164 |
+ # above attempts at injection within the url _path_ safe.
|
|
|
b54164 |
+ escaped_char_repr = repr(char).replace('\\', r'\\')
|
|
|
b54164 |
+ InvalidURL = httplib.InvalidURL
|
|
|
b54164 |
+ with self.assertRaisesRegexp(
|
|
|
b54164 |
+ InvalidURL, "contain control.*" + escaped_char_repr):
|
|
|
b54164 |
+ urllib2.urlopen("http:" + schemeless_url)
|
|
|
b54164 |
+ with self.assertRaisesRegexp(
|
|
|
b54164 |
+ InvalidURL, "contain control.*" + escaped_char_repr):
|
|
|
b54164 |
+ urllib2.urlopen("https:" + schemeless_url)
|
|
|
b54164 |
+ finally:
|
|
|
b54164 |
+ self.unfakehttp()
|
|
|
b54164 |
+
|
|
|
b54164 |
+ @unittest.skipUnless(ssl, "ssl module required")
|
|
|
b54164 |
+ def test_url_with_newline_header_injection_rejected(self):
|
|
|
b54164 |
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
|
|
|
b54164 |
+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
|
|
|
b54164 |
+ schemeless_url = "//" + host + ":8080/test/?test=a"
|
|
|
b54164 |
+ try:
|
|
|
706c86 |
+ # We explicitly test urllib2.urlopen() instead of the top
|
|
|
b54164 |
+ # level 'def urlopen()' function defined in this... (quite ugly)
|
|
|
b54164 |
+ # test suite. They use different url opening codepaths. Plain
|
|
|
b54164 |
+ # urlopen uses FancyURLOpener which goes via a codepath that
|
|
|
b54164 |
+ # calls urllib.parse.quote() on the URL which makes all of the
|
|
|
b54164 |
+ # above attempts at injection within the url _path_ safe.
|
|
|
b54164 |
+ InvalidURL = httplib.InvalidURL
|
|
|
b54164 |
+ with self.assertRaisesRegexp(
|
|
|
b54164 |
+ InvalidURL, r"contain control.*\\r.*(found at least . .)"):
|
|
|
b54164 |
+ urllib2.urlopen("http:" + schemeless_url)
|
|
|
b54164 |
+ with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"):
|
|
|
b54164 |
+ urllib2.urlopen("https:" + schemeless_url)
|
|
|
b54164 |
+ finally:
|
|
|
b54164 |
+ self.unfakehttp()
|
|
|
b54164 |
+
|
|
|
b54164 |
+
|
|
|
b54164 |
|
|
|
b54164 |
class RequestTests(unittest.TestCase):
|
|
|
b54164 |
|
|
|
b54164 |
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
|
|
|
b54164 |
index 36b3be6..90ccb30 100644
|
|
|
b54164 |
--- a/Lib/test/test_xmlrpc.py
|
|
|
b54164 |
+++ b/Lib/test/test_xmlrpc.py
|
|
|
b54164 |
@@ -659,7 +659,13 @@ class SimpleServerTestCase(BaseServerTestCase):
|
|
|
b54164 |
def test_partial_post(self):
|
|
|
b54164 |
# Check that a partial POST doesn't make the server loop: issue #14001.
|
|
|
b54164 |
conn = httplib.HTTPConnection(ADDR, PORT)
|
|
|
b54164 |
- conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')
|
|
|
b54164 |
+ conn.send('POST /RPC2 HTTP/1.0\r\n'
|
|
|
b54164 |
+ 'Content-Length: 100\r\n\r\n'
|
|
|
b54164 |
+ 'bye HTTP/1.1\r\n'
|
|
|
b54164 |
+ 'Host: %s:%s\r\n'
|
|
|
b54164 |
+ 'Accept-Encoding: identity\r\n'
|
|
|
b54164 |
+ 'Content-Length: 0\r\n\r\n'
|
|
|
b54164 |
+ % (ADDR, PORT))
|
|
|
b54164 |
conn.close()
|
|
|
b54164 |
|
|
|
b54164 |
class SimpleServerEncodingTestCase(BaseServerTestCase):
|