Blame SOURCES/00368-CVE-2021-3737.patch

9b3a7c
From f7fb35b563a9182c22fbdd03c72ec3724dafe918 Mon Sep 17 00:00:00 2001
9b3a7c
From: Gen Xu <xgbarry@gmail.com>
9b3a7c
Date: Wed, 5 May 2021 15:42:41 -0700
9b3a7c
Subject: [PATCH] bpo-44022: Fix http client infinite line reading (DoS) after
9b3a7c
 a HTTP 100 Continue (GH-25916)
9b3a7c
9b3a7c
Fixes http.client potential denial of service where it could get stuck reading lines from a malicious server after a 100 Continue response.
9b3a7c
9b3a7c
Co-authored-by: Gregory P. Smith <greg@krypto.org>
9b3a7c
(cherry picked from commit 47895e31b6f626bc6ce47d175fe9d43c1098909d)
9b3a7c
9b3a7c
Co-authored-by: Gen Xu <xgbarry@gmail.com>
9b3a7c
---
9b3a7c
 Lib/http/client.py                            | 38 ++++++++++---------
9b3a7c
 Lib/test/test_httplib.py                      | 10 ++++-
9b3a7c
 .../2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst  |  2 +
9b3a7c
 3 files changed, 32 insertions(+), 18 deletions(-)
9b3a7c
 create mode 100644 Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
9b3a7c
9b3a7c
diff --git a/Lib/http/client.py b/Lib/http/client.py
9b3a7c
index 53581eca20587..07e675fac5981 100644
9b3a7c
--- a/Lib/http/client.py
9b3a7c
+++ b/Lib/http/client.py
9b3a7c
@@ -205,15 +205,11 @@ def getallmatchingheaders(self, name):
9b3a7c
                 lst.append(line)
9b3a7c
         return lst
9b3a7c
 
9b3a7c
-def parse_headers(fp, _class=HTTPMessage):
9b3a7c
-    """Parses only RFC2822 headers from a file pointer.
9b3a7c
-
9b3a7c
-    email Parser wants to see strings rather than bytes.
9b3a7c
-    But a TextIOWrapper around self.rfile would buffer too many bytes
9b3a7c
-    from the stream, bytes which we later need to read as bytes.
9b3a7c
-    So we read the correct bytes here, as bytes, for email Parser
9b3a7c
-    to parse.
9b3a7c
+def _read_headers(fp):
9b3a7c
+    """Reads potential header lines into a list from a file pointer.
9b3a7c
 
9b3a7c
+    Length of line is limited by _MAXLINE, and number of
9b3a7c
+    headers is limited by _MAXHEADERS.
9b3a7c
     """
9b3a7c
     headers = []
9b3a7c
     while True:
9b3a7c
@@ -225,6 +221,19 @@ def parse_headers(fp, _class=HTTPMessage):
9b3a7c
             raise HTTPException("got more than %d headers" % _MAXHEADERS)
9b3a7c
         if line in (b'\r\n', b'\n', b''):
9b3a7c
             break
9b3a7c
+    return headers
9b3a7c
+
9b3a7c
+def parse_headers(fp, _class=HTTPMessage):
9b3a7c
+    """Parses only RFC2822 headers from a file pointer.
9b3a7c
+
9b3a7c
+    email Parser wants to see strings rather than bytes.
9b3a7c
+    But a TextIOWrapper around self.rfile would buffer too many bytes
9b3a7c
+    from the stream, bytes which we later need to read as bytes.
9b3a7c
+    So we read the correct bytes here, as bytes, for email Parser
9b3a7c
+    to parse.
9b3a7c
+
9b3a7c
+    """
9b3a7c
+    headers = _read_headers(fp)
9b3a7c
     hstring = b''.join(headers).decode('iso-8859-1')
9b3a7c
     return email.parser.Parser(_class=_class).parsestr(hstring)
9b3a7c
 
9b3a7c
@@ -312,15 +321,10 @@ def begin(self):
9b3a7c
             if status != CONTINUE:
9b3a7c
                 break
9b3a7c
             # skip the header from the 100 response
9b3a7c
-            while True:
9b3a7c
-                skip = self.fp.readline(_MAXLINE + 1)
9b3a7c
-                if len(skip) > _MAXLINE:
9b3a7c
-                    raise LineTooLong("header line")
9b3a7c
-                skip = skip.strip()
9b3a7c
-                if not skip:
9b3a7c
-                    break
9b3a7c
-                if self.debuglevel > 0:
9b3a7c
-                    print("header:", skip)
9b3a7c
+            skipped_headers = _read_headers(self.fp)
9b3a7c
+            if self.debuglevel > 0:
9b3a7c
+                print("headers:", skipped_headers)
9b3a7c
+            del skipped_headers
9b3a7c
 
9b3a7c
         self.code = self.status = status
9b3a7c
         self.reason = reason.strip()
9b3a7c
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
9b3a7c
index 03e049b13fd21..0db287507c7bf 100644
9b3a7c
--- a/Lib/test/test_httplib.py
9b3a7c
+++ b/Lib/test/test_httplib.py
9b3a7c
@@ -971,6 +971,14 @@ def test_overflowing_header_line(self):
9b3a7c
         resp = client.HTTPResponse(FakeSocket(body))
9b3a7c
         self.assertRaises(client.LineTooLong, resp.begin)
9b3a7c
 
9b3a7c
+    def test_overflowing_header_limit_after_100(self):
9b3a7c
+        body = (
9b3a7c
+            'HTTP/1.1 100 OK\r\n'
9b3a7c
+            'r\n' * 32768
9b3a7c
+        )
9b3a7c
+        resp = client.HTTPResponse(FakeSocket(body))
9b3a7c
+        self.assertRaises(client.HTTPException, resp.begin)
9b3a7c
+
9b3a7c
     def test_overflowing_chunked_line(self):
9b3a7c
         body = (
9b3a7c
             'HTTP/1.1 200 OK\r\n'
9b3a7c
@@ -1377,7 +1385,7 @@ def readline(self, limit):
9b3a7c
 class OfflineTest(TestCase):
9b3a7c
     def test_all(self):
9b3a7c
         # Documented objects defined in the module should be in __all__
9b3a7c
-        expected = {"responses"}  # White-list documented dict() object
9b3a7c
+        expected = {"responses"}  # Allowlist documented dict() object
9b3a7c
         # HTTPMessage, parse_headers(), and the HTTP status code constants are
9b3a7c
         # intentionally omitted for simplicity
9b3a7c
         blacklist = {"HTTPMessage", "parse_headers"}
9b3a7c
diff --git a/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
9b3a7c
new file mode 100644
9b3a7c
index 0000000000000..cf6b63e396155
9b3a7c
--- /dev/null
9b3a7c
+++ b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
9b3a7c
@@ -0,0 +1,2 @@
9b3a7c
+mod:`http.client` now avoids infinitely reading potential HTTP headers after a
9b3a7c
+``100 Continue`` status response from the server.