Tree - rpms/python - CentOS Git server

rpms / python

Blame SOURCES/00324-disallow-control-chars-in-http-urls.patch

Blob History Raw

		cb219e	`diff --git a/Lib/httplib.py b/Lib/httplib.py`
		cb219e	`index da2f346..fc8e895 100644`
		cb219e	`--- a/Lib/httplib.py`
		cb219e	`+++ b/Lib/httplib.py`
		cb219e	`@@ -247,6 +247,15 @@ _MAXHEADERS = 100`
		cb219e	`_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match`
		cb219e	`_is_illegal_header_value = re.compile(r'\n(?![ \t])\|\r(?![ \t\n])').search`
		cb219e
		cb219e	`+# These characters are not allowed within HTTP URL paths.`
		cb219e	`+# See https://tools.ietf.org/html/rfc3986#section-3.3 and the`
		cb219e	`+# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.`
		cb219e	`+# Prevents CVE-2019-9740. Includes control characters such as \r\n.`
		cb219e	`+# Restrict non-ASCII characters above \x7f (0x80-0xff).`
		cb219e	`+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\xff]')`
		cb219e	`+# Arguably only these _should_ allowed:`
		cb219e	`+# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")`
		cb219e	`+# We are more lenient for assumed real world compatibility purposes.`
		cb219e
		cb219e	`class HTTPMessage(mimetools.Message):`
		cb219e
		cb219e	`@@ -926,6 +935,12 @@ class HTTPConnection:`
		cb219e	`self._method = method`
		cb219e	`if not url:`
		cb219e	`url = '/'`
		cb219e	`+ # Prevent CVE-2019-9740.`
		cb219e	`+ match = _contains_disallowed_url_pchar_re.search(url)`
		cb219e	`+ if match:`
		cb219e	`+ raise InvalidURL("URL can't contain control characters. %r "`
		cb219e	`+ "(found at least %r)"`
		cb219e	`+ % (url, match.group()))`
		cb219e	`hdr = '%s %s %s' % (method, url, self._http_vsn_str)`
		cb219e
		cb219e	`self._output(hdr)`
		cb219e	`diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py`
		cb219e	`index 3845012..d2da0f8 100644`
		cb219e	`--- a/Lib/test/test_urllib.py`
		cb219e	`+++ b/Lib/test/test_urllib.py`
		cb219e	`@@ -198,6 +198,31 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):`
		cb219e	`finally:`
		cb219e	`self.unfakehttp()`
		cb219e
		cb219e	`+ def test_url_with_control_char_rejected(self):`
		cb219e	`+ for char_no in range(0, 0x21) + range(0x7f, 0x100):`
		cb219e	`+ char = chr(char_no)`
		cb219e	`+ schemeless_url = "//localhost:7777/test%s/" % char`
		cb219e	`+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")`
		cb219e	`+ try:`
		cb219e	`+ # urllib quotes the URL so there is no injection.`
		cb219e	`+ resp = urllib.urlopen("http:" + schemeless_url)`
		cb219e	`+ self.assertNotIn(char, resp.geturl())`
		cb219e	`+ finally:`
		cb219e	`+ self.unfakehttp()`
		cb219e	`+`
		cb219e	`+ def test_url_with_newline_header_injection_rejected(self):`
		cb219e	`+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")`
		cb219e	`+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"`
		cb219e	`+ schemeless_url = "//" + host + ":8080/test/?test=a"`
		cb219e	`+ try:`
		cb219e	`+ # urllib quotes the URL so there is no injection.`
		cb219e	`+ resp = urllib.urlopen("http:" + schemeless_url)`
		cb219e	`+ self.assertNotIn(' ', resp.geturl())`
		cb219e	`+ self.assertNotIn('\r', resp.geturl())`
		cb219e	`+ self.assertNotIn('\n', resp.geturl())`
		cb219e	`+ finally:`
		cb219e	`+ self.unfakehttp()`
		cb219e	`+`
		cb219e	`def test_read_bogus(self):`
		cb219e	`# urlopen() should raise IOError for many error codes.`
		cb219e	`self.fakehttp('''HTTP/1.1 401 Authentication Required`
		cb219e	`@@ -786,6 +811,35 @@ class Pathname_Tests(unittest.TestCase):`
		cb219e	`class Utility_Tests(unittest.TestCase):`
		cb219e	`"""Testcase to test the various utility functions in the urllib."""`
		cb219e
		cb219e	`+ def test_splithost(self):`
		cb219e	`+ splithost = urllib.splithost`
		cb219e	`+ self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),`
		cb219e	`+ ('www.example.org:80', '/foo/bar/baz.html'))`
		cb219e	`+ self.assertEqual(splithost('//www.example.org:80'),`
		cb219e	`+ ('www.example.org:80', ''))`
		cb219e	`+ self.assertEqual(splithost('/foo/bar/baz.html'),`
		cb219e	`+ (None, '/foo/bar/baz.html'))`
		cb219e	`+`
		cb219e	`+ # bpo-30500: # starts a fragment.`
		cb219e	`+ self.assertEqual(splithost('//127.0.0.1#@host.com'),`
		cb219e	`+ ('127.0.0.1', '/#@host.com'))`
		cb219e	`+ self.assertEqual(splithost('//127.0.0.1#@host.com:80'),`
		cb219e	`+ ('127.0.0.1', '/#@host.com:80'))`
		cb219e	`+ self.assertEqual(splithost('//127.0.0.1:80#@host.com'),`
		cb219e	`+ ('127.0.0.1:80', '/#@host.com'))`
		cb219e	`+`
		cb219e	`+ # Empty host is returned as empty string.`
		cb219e	`+ self.assertEqual(splithost("///file"),`
		cb219e	`+ ('', '/file'))`
		cb219e	`+`
		cb219e	`+ # Trailing semicolon, question mark and hash symbol are kept.`
		cb219e	`+ self.assertEqual(splithost("//example.net/file;"),`
		cb219e	`+ ('example.net', '/file;'))`
		cb219e	`+ self.assertEqual(splithost("//example.net/file?"),`
		cb219e	`+ ('example.net', '/file?'))`
		cb219e	`+ self.assertEqual(splithost("//example.net/file#"),`
		cb219e	`+ ('example.net', '/file#'))`
		cb219e	`+`
		cb219e	`def test_splitpasswd(self):`
		cb219e	`"""Some of the password examples are not sensible, but it is added to`
		cb219e	`confirming to RFC2617 and addressing issue4675.`
		cb219e	`diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py`
		cb219e	`index c317b8d..63fefd6 100644`
		cb219e	`--- a/Lib/test/test_urllib2.py`
		cb219e	`+++ b/Lib/test/test_urllib2.py`
		cb219e	`@@ -7,12 +7,16 @@ import StringIO`
		cb219e
		cb219e	`import urllib2`
		cb219e	`from urllib2 import Request, OpenerDirector`
		cb219e	`+import httplib`
		cb219e
		cb219e	`try:`
		cb219e	`import ssl`
		cb219e	`except ImportError:`
		cb219e	`ssl = None`
		cb219e
		cb219e	`+from test.test_urllib import FakeHTTPMixin`
		cb219e	`+`
		cb219e	`+`
		cb219e	`# XXX`
		cb219e	`# Request`
		cb219e	`# CacheFTPHandler (hard to write)`
		cb219e	`@@ -1243,7 +1247,7 @@ class HandlerTests(unittest.TestCase):`
		cb219e	`self.assertEqual(len(http_handler.requests), 1)`
		cb219e	`self.assertFalse(http_handler.requests[0].has_header(auth_header))`
		cb219e
		cb219e	`-class MiscTests(unittest.TestCase):`
		cb219e	`+class MiscTests(unittest.TestCase, FakeHTTPMixin):`
		cb219e
		cb219e	`def test_build_opener(self):`
		cb219e	`class MyHTTPHandler(urllib2.HTTPHandler): pass`
		cb219e	`@@ -1289,6 +1293,53 @@ class MiscTests(unittest.TestCase):`
		cb219e	`else:`
		cb219e	`self.assertTrue(False)`
		cb219e
		cb219e	`+ @unittest.skipUnless(ssl, "ssl module required")`
		cb219e	`+ def test_url_with_control_char_rejected(self):`
		cb219e	`+ for char_no in range(0, 0x21) + range(0x7f, 0x100):`
		cb219e	`+ char = chr(char_no)`
		cb219e	`+ schemeless_url = "//localhost:7777/test%s/" % char`
		cb219e	`+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")`
		cb219e	`+ try:`
		cb219e	`+ # We explicitly test urllib.request.urlopen() instead of the top`
		cb219e	`+ # level 'def urlopen()' function defined in this... (quite ugly)`
		cb219e	`+ # test suite. They use different url opening codepaths. Plain`
		cb219e	`+ # urlopen uses FancyURLOpener which goes via a codepath that`
		cb219e	`+ # calls urllib.parse.quote() on the URL which makes all of the`
		cb219e	`+ # above attempts at injection within the url _path_ safe.`
		cb219e	`+ escaped_char_repr = repr(char).replace('\\', r'\\')`
		cb219e	`+ InvalidURL = httplib.InvalidURL`
		cb219e	`+ with self.assertRaisesRegexp(`
		cb219e	`+ InvalidURL, "contain control.*" + escaped_char_repr):`
		cb219e	`+ urllib2.urlopen("http:" + schemeless_url)`
		cb219e	`+ with self.assertRaisesRegexp(`
		cb219e	`+ InvalidURL, "contain control.*" + escaped_char_repr):`
		cb219e	`+ urllib2.urlopen("https:" + schemeless_url)`
		cb219e	`+ finally:`
		cb219e	`+ self.unfakehttp()`
		cb219e	`+`
		cb219e	`+ @unittest.skipUnless(ssl, "ssl module required")`
		cb219e	`+ def test_url_with_newline_header_injection_rejected(self):`
		cb219e	`+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")`
		cb219e	`+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"`
		cb219e	`+ schemeless_url = "//" + host + ":8080/test/?test=a"`
		cb219e	`+ try:`
		cb219e	`+ # We explicitly test urllib2.urlopen() instead of the top`
		cb219e	`+ # level 'def urlopen()' function defined in this... (quite ugly)`
		cb219e	`+ # test suite. They use different url opening codepaths. Plain`
		cb219e	`+ # urlopen uses FancyURLOpener which goes via a codepath that`
		cb219e	`+ # calls urllib.parse.quote() on the URL which makes all of the`
		cb219e	`+ # above attempts at injection within the url _path_ safe.`
		cb219e	`+ InvalidURL = httplib.InvalidURL`
		cb219e	`+ with self.assertRaisesRegexp(`
		cb219e	`+ InvalidURL, r"contain control.\\r.(found at least . .)"):`
		cb219e	`+ urllib2.urlopen("http:" + schemeless_url)`
		cb219e	`+ with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"):`
		cb219e	`+ urllib2.urlopen("https:" + schemeless_url)`
		cb219e	`+ finally:`
		cb219e	`+ self.unfakehttp()`
		cb219e	`+`
		cb219e	`+`
		cb219e	`+`
		cb219e	`class RequestTests(unittest.TestCase):`
		cb219e
		cb219e	`def setUp(self):`
		cb219e	`diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py`
		cb219e	`index 79e862a..347b494 100644`
		cb219e	`--- a/Lib/test/test_xmlrpc.py`
		cb219e	`+++ b/Lib/test/test_xmlrpc.py`
		cb219e	`@@ -592,7 +592,13 @@ class SimpleServerTestCase(BaseServerTestCase):`
		cb219e	`def test_partial_post(self):`
		cb219e	`# Check that a partial POST doesn't make the server loop: issue #14001.`
		cb219e	`conn = httplib.HTTPConnection(ADDR, PORT)`
		cb219e	`- conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')`
		cb219e	`+ conn.send('POST /RPC2 HTTP/1.0\r\n'`
		cb219e	`+ 'Content-Length: 100\r\n\r\n'`
		cb219e	`+ 'bye HTTP/1.1\r\n'`
		cb219e	`+ 'Host: %s:%s\r\n'`
		cb219e	`+ 'Accept-Encoding: identity\r\n'`
		cb219e	`+ 'Content-Length: 0\r\n\r\n'`
		cb219e	`+ % (ADDR, PORT))`
		cb219e	`conn.close()`
		cb219e
		cb219e	`class MultiPathServerTestCase(BaseServerTestCase):`
		cb219e	`diff --git a/Lib/urllib.py b/Lib/urllib.py`
		cb219e	`index 9b31df1..2201e3e 100644`
		cb219e	`--- a/Lib/urllib.py`
		cb219e	`+++ b/Lib/urllib.py`
		cb219e	`@@ -1079,8 +1079,7 @@ def splithost(url):`
		cb219e	`"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""`
		cb219e	`global _hostprog`
		cb219e	`if _hostprog is None:`
		cb219e	`- import re`
		cb219e	`- _hostprog = re.compile('^//([^/?])(.)$')`
		cb219e	`+ _hostprog = re.compile('//([^/#?])(.)', re.DOTALL)`
		cb219e
		cb219e	`match = _hostprog.match(url)`
		cb219e	`if match:`

rpms / python

Source Code

Blame SOURCES/00324-disallow-control-chars-in-http-urls.patch