Blame SOURCES/00359-CVE-2021-23336.patch

0f0bda
commit 9e77ec82c40ab59846f9447b7c483e7b8e368b16
0f0bda
Author: Petr Viktorin <pviktori@redhat.com>
0f0bda
Date:   Thu Mar 4 13:59:56 2021 +0100
0f0bda
0f0bda
    CVE-2021-23336: Add `separator` argument to parse_qs; warn with default
0f0bda
    
0f0bda
    Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
0f0bda
    However, this solution is different than the upstream solution in Python 3.6.13.
0f0bda
    
0f0bda
    An optional argument seperator is added to specify the separator.
0f0bda
    It is recommended to set it to '&' or ';' to match the application or proxy in use.
0f0bda
    The default can be set with an env variable of a config file.
0f0bda
    If neither the argument, env var or config file specifies a separator, "&" is used
0f0bda
    but a warning is raised if parse_qs is used on input that contains ';'.
0f0bda
    
0f0bda
    Co-authors of the upstream change (who do not necessarily agree with this):
0f0bda
    Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
0f0bda
    Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
0f0bda
    Co-authored-by: Éric Araujo <merwok@netwok.org>
0f0bda
0f0bda
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
0f0bda
index 41219eeaaba..ddecc0af23a 100644
0f0bda
--- a/Doc/library/cgi.rst
0f0bda
+++ b/Doc/library/cgi.rst
0f0bda
@@ -277,13 +277,12 @@ These are useful if you want more control, or if you want to employ some of the
0f0bda
 algorithms implemented in this module in other circumstances.
0f0bda
 
0f0bda
 
0f0bda
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
0f0bda
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
0f0bda
 
0f0bda
    Parse a query in the environment or from a file (the file defaults to
0f0bda
-   ``sys.stdin``).  The *keep_blank_values* and *strict_parsing* parameters are
0f0bda
+   ``sys.stdin``).  The *keep_blank_values*, *strict_parsing* and *separator* parameters are
0f0bda
    passed to :func:`urllib.parse.parse_qs` unchanged.
0f0bda
 
0f0bda
-
0f0bda
 .. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False)
0f0bda
 
0f0bda
    This function is deprecated in this module. Use :func:`urllib.parse.parse_qs`
0f0bda
@@ -308,7 +307,6 @@ algorithms implemented in this module in other circumstances.
0f0bda
    Note that this does not parse nested multipart parts --- use
0f0bda
    :class:`FieldStorage` for that.
0f0bda
 
0f0bda
-
0f0bda
 .. function:: parse_header(string)
0f0bda
 
0f0bda
    Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a
0f0bda
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
0f0bda
index 647af613a31..bcab7c142bc 100644
0f0bda
--- a/Doc/library/urllib.parse.rst
0f0bda
+++ b/Doc/library/urllib.parse.rst
0f0bda
@@ -143,7 +143,7 @@ or on combining URL components into a URL string.
0f0bda
       now raise :exc:`ValueError`.
0f0bda
 
0f0bda
 
0f0bda
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
0f0bda
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
0f0bda
 
0f0bda
    Parse a query string given as a string argument (data of type
0f0bda
    :mimetype:`application/x-www-form-urlencoded`).  Data are returned as a
0f0bda
@@ -168,6 +168,15 @@ or on combining URL components into a URL string.
0f0bda
    read. If set, then throws a :exc:`ValueError` if there are more than
0f0bda
    *max_num_fields* fields read.
0f0bda
 
0f0bda
+   The optional argument *separator* is the symbol to use for separating the
0f0bda
+   query arguments. It is recommended to set it to ``'&'`` or ``';'``.
0f0bda
+   It defaults to ``'&'``; a warning is raised if this default is used.
0f0bda
+   This default may be changed with the following environment variable settings:
0f0bda
+
0f0bda
+   - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
0f0bda
+   - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
0f0bda
+   - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
0f0bda
+
0f0bda
    Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
0f0bda
    parameter set to ``True``) to convert such dictionaries into query
0f0bda
    strings.
0f0bda
@@ -204,6 +213,9 @@ or on combining URL components into a URL string.
0f0bda
    read. If set, then throws a :exc:`ValueError` if there are more than
0f0bda
    *max_num_fields* fields read.
0f0bda
 
0f0bda
+   The optional argument *separator* is the symbol to use for separating the
0f0bda
+   query arguments. It works as in :py:func:`parse_qs`.
0f0bda
+
0f0bda
    Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
0f0bda
    query strings.
0f0bda
 
0f0bda
@@ -213,7 +225,6 @@ or on combining URL components into a URL string.
0f0bda
    .. versionchanged:: 3.6.8
0f0bda
       Added *max_num_fields* parameter.
0f0bda
 
0f0bda
-
0f0bda
 .. function:: urlunparse(parts)
0f0bda
 
0f0bda
    Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
0f0bda
diff --git a/Lib/cgi.py b/Lib/cgi.py
0f0bda
index 56f243e09f0..5ab2a5d6af6 100755
0f0bda
--- a/Lib/cgi.py
0f0bda
+++ b/Lib/cgi.py
0f0bda
@@ -117,7 +117,8 @@ log = initlog           # The current logging function
0f0bda
 # 0 ==> unlimited input
0f0bda
 maxlen = 0
0f0bda
 
0f0bda
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
0f0bda
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
0f0bda
+          strict_parsing=0, separator=None):
0f0bda
     """Parse a query in the environment or from a file (default stdin)
0f0bda
 
0f0bda
         Arguments, all optional:
0f0bda
@@ -136,6 +137,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
0f0bda
         strict_parsing: flag indicating what to do with parsing errors.
0f0bda
             If false (the default), errors are silently ignored.
0f0bda
             If true, errors raise a ValueError exception.
0f0bda
+
0f0bda
+        separator: str. The symbol to use for separating the query arguments.
0f0bda
     """
0f0bda
     if fp is None:
0f0bda
         fp = sys.stdin
0f0bda
@@ -156,7 +159,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
0f0bda
     if environ['REQUEST_METHOD'] == 'POST':
0f0bda
         ctype, pdict = parse_header(environ['CONTENT_TYPE'])
0f0bda
         if ctype == 'multipart/form-data':
0f0bda
-            return parse_multipart(fp, pdict)
0f0bda
+            return parse_multipart(fp, pdict, separator=separator)
0f0bda
         elif ctype == 'application/x-www-form-urlencoded':
0f0bda
             clength = int(environ['CONTENT_LENGTH'])
0f0bda
             if maxlen and clength > maxlen:
0f0bda
@@ -182,21 +185,21 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
0f0bda
     return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
0f0bda
                                  encoding=encoding)
0f0bda
 
0f0bda
-
0f0bda
 # parse query string function called from urlparse,
0f0bda
 # this is done in order to maintain backward compatibility.
0f0bda
-
0f0bda
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
0f0bda
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, separator=None):
0f0bda
     """Parse a query given as a string argument."""
0f0bda
     warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
0f0bda
          DeprecationWarning, 2)
0f0bda
-    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
0f0bda
+    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
0f0bda
+                                 separator=separator)
0f0bda
 
0f0bda
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
0f0bda
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, separator=None):
0f0bda
     """Parse a query given as a string argument."""
0f0bda
     warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
0f0bda
          DeprecationWarning, 2)
0f0bda
-    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
0f0bda
+    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing,
0f0bda
+                                  separator=separator)
0f0bda
 
0f0bda
 def parse_multipart(fp, pdict):
0f0bda
     """Parse multipart input.
0f0bda
@@ -297,7 +300,6 @@ def parse_multipart(fp, pdict):
0f0bda
 
0f0bda
     return partdict
0f0bda
 
0f0bda
-
0f0bda
 def _parseparam(s):
0f0bda
     while s[:1] == ';':
0f0bda
         s = s[1:]
0f0bda
@@ -405,7 +407,7 @@ class FieldStorage:
0f0bda
     def __init__(self, fp=None, headers=None, outerboundary=b'',
0f0bda
                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
0f0bda
                  limit=None, encoding='utf-8', errors='replace',
0f0bda
-                 max_num_fields=None):
0f0bda
+                 max_num_fields=None, separator=None):
0f0bda
         """Constructor.  Read multipart/* until last part.
0f0bda
 
0f0bda
         Arguments, all optional:
0f0bda
@@ -453,6 +455,7 @@ class FieldStorage:
0f0bda
         self.keep_blank_values = keep_blank_values
0f0bda
         self.strict_parsing = strict_parsing
0f0bda
         self.max_num_fields = max_num_fields
0f0bda
+        self.separator = separator
0f0bda
         if 'REQUEST_METHOD' in environ:
0f0bda
             method = environ['REQUEST_METHOD'].upper()
0f0bda
         self.qs_on_post = None
0f0bda
@@ -678,7 +681,7 @@ class FieldStorage:
0f0bda
         query = urllib.parse.parse_qsl(
0f0bda
             qs, self.keep_blank_values, self.strict_parsing,
0f0bda
             encoding=self.encoding, errors=self.errors,
0f0bda
-            max_num_fields=self.max_num_fields)
0f0bda
+            max_num_fields=self.max_num_fields, separator=self.separator)
0f0bda
         self.list = [MiniFieldStorage(key, value) for key, value in query]
0f0bda
         self.skip_lines()
0f0bda
 
0f0bda
@@ -694,7 +697,7 @@ class FieldStorage:
0f0bda
             query = urllib.parse.parse_qsl(
0f0bda
                 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
0f0bda
                 encoding=self.encoding, errors=self.errors,
0f0bda
-                max_num_fields=self.max_num_fields)
0f0bda
+                max_num_fields=self.max_num_fields, separator=self.separator)
0f0bda
             self.list.extend(MiniFieldStorage(key, value) for key, value in query)
0f0bda
 
0f0bda
         klass = self.FieldStorageClass or self.__class__
0f0bda
@@ -736,7 +739,8 @@ class FieldStorage:
0f0bda
 
0f0bda
             part = klass(self.fp, headers, ib, environ, keep_blank_values,
0f0bda
                          strict_parsing,self.limit-self.bytes_read,
0f0bda
-                         self.encoding, self.errors, max_num_fields)
0f0bda
+                         self.encoding, self.errors, max_num_fields,
0f0bda
+                         separator=self.separator)
0f0bda
 
0f0bda
             if max_num_fields is not None:
0f0bda
                 max_num_fields -= 1
0f0bda
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
0f0bda
index b3e2d4cce8e..5ae3e085e1e 100644
0f0bda
--- a/Lib/test/test_cgi.py
0f0bda
+++ b/Lib/test/test_cgi.py
0f0bda
@@ -55,12 +55,9 @@ parse_strict_test_cases = [
0f0bda
     ("", ValueError("bad query field: ''")),
0f0bda
     ("&", ValueError("bad query field: ''")),
0f0bda
     ("&&", ValueError("bad query field: ''")),
0f0bda
-    (";", ValueError("bad query field: ''")),
0f0bda
-    (";&;", ValueError("bad query field: ''")),
0f0bda
     # Should the next few really be valid?
0f0bda
     ("=", {}),
0f0bda
     ("=&=", {}),
0f0bda
-    ("=;=", {}),
0f0bda
     # This rest seem to make sense
0f0bda
     ("=a", {'': ['a']}),
0f0bda
     ("&=a", ValueError("bad query field: ''")),
0f0bda
@@ -75,8 +72,6 @@ parse_strict_test_cases = [
0f0bda
     ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
0f0bda
     ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
0f0bda
     ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
0f0bda
-    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
0f0bda
-    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
0f0bda
     ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
0f0bda
      {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
0f0bda
       'cuyer': ['r'],
0f0bda
@@ -164,6 +159,35 @@ class CgiTests(unittest.TestCase):
0f0bda
 
0f0bda
             env = {'QUERY_STRING': orig}
0f0bda
             fs = cgi.FieldStorage(environ=env)
0f0bda
+            if isinstance(expect, dict):
0f0bda
+                # test dict interface
0f0bda
+                self.assertEqual(len(expect), len(fs))
0f0bda
+                self.assertCountEqual(expect.keys(), fs.keys())
0f0bda
+                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
0f0bda
+                # test individual fields
0f0bda
+                for key in expect.keys():
0f0bda
+                    expect_val = expect[key]
0f0bda
+                    self.assertIn(key, fs)
0f0bda
+                    if len(expect_val) > 1:
0f0bda
+                        self.assertEqual(fs.getvalue(key), expect_val)
0f0bda
+                    else:
0f0bda
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
0f0bda
+
0f0bda
+    def test_separator(self):
0f0bda
+        parse_semicolon = [
0f0bda
+            ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
0f0bda
+            ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
0f0bda
+            (";", ValueError("bad query field: ''")),
0f0bda
+            (";;", ValueError("bad query field: ''")),
0f0bda
+            ("=;a", ValueError("bad query field: 'a'")),
0f0bda
+            (";b=a", ValueError("bad query field: ''")),
0f0bda
+            ("b;=a", ValueError("bad query field: 'b'")),
0f0bda
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
0f0bda
+            ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
0f0bda
+        ]
0f0bda
+        for orig, expect in parse_semicolon:
0f0bda
+            env = {'QUERY_STRING': orig}
0f0bda
+            fs = cgi.FieldStorage(separator=';', environ=env)
0f0bda
             if isinstance(expect, dict):
0f0bda
                 # test dict interface
0f0bda
                 self.assertEqual(len(expect), len(fs))
0f0bda
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
0f0bda
index 68f633ca3a7..1ec86ba0fc2 100644
0f0bda
--- a/Lib/test/test_urlparse.py
0f0bda
+++ b/Lib/test/test_urlparse.py
0f0bda
@@ -2,6 +2,11 @@ import sys
0f0bda
 import unicodedata
0f0bda
 import unittest
0f0bda
 import urllib.parse
0f0bda
+from test.support import EnvironmentVarGuard
0f0bda
+from warnings import catch_warnings
0f0bda
+import tempfile
0f0bda
+import contextlib
0f0bda
+import os.path
0f0bda
 
0f0bda
 RFC1808_BASE = "http://a/b/c/d;p?q#f"
0f0bda
 RFC2396_BASE = "http://a/b/c/d;p?q"
0f0bda
@@ -32,6 +37,9 @@ parse_qsl_test_cases = [
0f0bda
     (b"&a=b", [(b'a', b'b')]),
0f0bda
     (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
0f0bda
     (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
0f0bda
+]
0f0bda
+
0f0bda
+parse_qsl_test_cases_semicolon = [
0f0bda
     (";", []),
0f0bda
     (";;", []),
0f0bda
     (";a=b", [('a', 'b')]),
0f0bda
@@ -44,6 +52,21 @@ parse_qsl_test_cases = [
0f0bda
     (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
0f0bda
 ]
0f0bda
 
0f0bda
+parse_qsl_test_cases_legacy = [
0f0bda
+    (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
0f0bda
+    (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
0f0bda
+    (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
0f0bda
+]
0f0bda
+
0f0bda
+parse_qsl_test_cases_warn = [
0f0bda
+    (";a=b", [(';a', 'b')]),
0f0bda
+    ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
0f0bda
+    (b";a=b", [(b';a', b'b')]),
0f0bda
+    (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
0f0bda
+    ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
0f0bda
+    (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
0f0bda
+]
0f0bda
+
0f0bda
 # Each parse_qs testcase is a two-tuple that contains
0f0bda
 # a string with the query and a dictionary with the expected result.
0f0bda
 
0f0bda
@@ -68,6 +91,9 @@ parse_qs_test_cases = [
0f0bda
     (b"&a=b", {b'a': [b'b']}),
0f0bda
     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
0f0bda
     (b"a=1&a=2", {b'a': [b'1', b'2']}),
0f0bda
+]
0f0bda
+
0f0bda
+parse_qs_test_cases_semicolon = [
0f0bda
     (";", {}),
0f0bda
     (";;", {}),
0f0bda
     (";a=b", {'a': ['b']}),
0f0bda
@@ -80,6 +106,24 @@ parse_qs_test_cases = [
0f0bda
     (b"a=1;a=2", {b'a': [b'1', b'2']}),
0f0bda
 ]
0f0bda
 
0f0bda
+parse_qs_test_cases_legacy = [
0f0bda
+    ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
0f0bda
+    ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
0f0bda
+    ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
0f0bda
+    (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
0f0bda
+    (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
0f0bda
+    (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
0f0bda
+]
0f0bda
+
0f0bda
+parse_qs_test_cases_warn = [
0f0bda
+    (";a=b", {';a': ['b']}),
0f0bda
+    ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
0f0bda
+    (b";a=b", {b';a': [b'b']}),
0f0bda
+    (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
0f0bda
+    ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
0f0bda
+    (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
0f0bda
+]
0f0bda
+
0f0bda
 class UrlParseTestCase(unittest.TestCase):
0f0bda
 
0f0bda
     def checkRoundtrips(self, url, parsed, split):
0f0bda
@@ -152,6 +196,40 @@ class UrlParseTestCase(unittest.TestCase):
0f0bda
             self.assertEqual(result, expect_without_blanks,
0f0bda
                             "Error parsing %r" % orig)
0f0bda
 
0f0bda
+    def test_qs_default_warn(self):
0f0bda
+        for orig, expect in parse_qs_test_cases_warn:
0f0bda
+            with self.subTest(orig=orig, expect=expect):
0f0bda
+                with catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
0f0bda
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 1)
0f0bda
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
0f0bda
+
0f0bda
+    def test_qsl_default_warn(self):
0f0bda
+        for orig, expect in parse_qsl_test_cases_warn:
0f0bda
+            with self.subTest(orig=orig, expect=expect):
0f0bda
+                with catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
0f0bda
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 1)
0f0bda
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
0f0bda
+
0f0bda
+    def test_default_qs_no_warnings(self):
0f0bda
+        for orig, expect in parse_qs_test_cases:
0f0bda
+            with self.subTest(orig=orig, expect=expect):
0f0bda
+                with catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
0f0bda
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+
0f0bda
+    def test_default_qsl_no_warnings(self):
0f0bda
+        for orig, expect in parse_qsl_test_cases:
0f0bda
+            with self.subTest(orig=orig, expect=expect):
0f0bda
+                with catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
0f0bda
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+
0f0bda
     def test_roundtrips(self):
0f0bda
         str_cases = [
0f0bda
             ('file:///tmp/junk.txt',
0f0bda
@@ -885,8 +963,151 @@ class UrlParseTestCase(unittest.TestCase):
0f0bda
         with self.assertRaises(ValueError):
0f0bda
             urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
0f0bda
         with self.assertRaises(ValueError):
0f0bda
-            urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
0f0bda
+            urllib.parse.parse_qs(';'.join(['a=a']*11), separator=';', max_num_fields=10)
0f0bda
+        with self.assertRaises(ValueError):
0f0bda
+            urllib.parse.parse_qs('SEP'.join(['a=a']*11), separator='SEP', max_num_fields=10)
0f0bda
         urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
0f0bda
+        urllib.parse.parse_qs(';'.join(['a=a']*10), separator=';', max_num_fields=10)
0f0bda
+        urllib.parse.parse_qs('SEP'.join(['a=a']*10), separator='SEP', max_num_fields=10)
0f0bda
+
0f0bda
+    def test_parse_qs_separator_bytes(self):
0f0bda
+        expected = {b'a': [b'1'], b'b': [b'2']}
0f0bda
+
0f0bda
+        result = urllib.parse.parse_qs(b'a=1;b=2', separator=b';')
0f0bda
+        self.assertEqual(result, expected)
0f0bda
+        result = urllib.parse.parse_qs(b'a=1;b=2', separator=';')
0f0bda
+        self.assertEqual(result, expected)
0f0bda
+        result = urllib.parse.parse_qs('a=1;b=2', separator=';')
0f0bda
+        self.assertEqual(result, {'a': ['1'], 'b': ['2']})
0f0bda
+
0f0bda
+    @contextlib.contextmanager
0f0bda
+    def _qsl_sep_config(self, sep):
0f0bda
+        """Context for the given parse_qsl default separator configured in config file"""
0f0bda
+        old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
0f0bda
+        urllib.parse._default_qs_separator = None
0f0bda
+        try:
0f0bda
+            with tempfile.TemporaryDirectory() as tmpdirname:
0f0bda
+                filename = os.path.join(tmpdirname, 'conf.cfg')
0f0bda
+                with open(filename, 'w') as file:
0f0bda
+                    file.write(f'[parse_qs]\n')
0f0bda
+                    file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
0f0bda
+                urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
0f0bda
+                yield
0f0bda
+        finally:
0f0bda
+            urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
0f0bda
+            urllib.parse._default_qs_separator = None
0f0bda
+
0f0bda
+    def test_parse_qs_separator_semicolon(self):
0f0bda
+        for orig, expect in parse_qs_test_cases_semicolon:
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='arg'):
0f0bda
+                result = urllib.parse.parse_qs(orig, separator=';')
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='env'):
0f0bda
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
0f0bda
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
0f0bda
+                    result = urllib.parse.parse_qs(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='conf'):
0f0bda
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qs(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+
0f0bda
+    def test_parse_qsl_separator_semicolon(self):
0f0bda
+        for orig, expect in parse_qsl_test_cases_semicolon:
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='arg'):
0f0bda
+                result = urllib.parse.parse_qsl(orig, separator=';')
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='env'):
0f0bda
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
0f0bda
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
0f0bda
+                    result = urllib.parse.parse_qsl(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='conf'):
0f0bda
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qsl(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+
0f0bda
+    def test_parse_qs_separator_legacy(self):
0f0bda
+        for orig, expect in parse_qs_test_cases_legacy:
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='env'):
0f0bda
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
0f0bda
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
0f0bda
+                    result = urllib.parse.parse_qs(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='conf'):
0f0bda
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qs(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+
0f0bda
+    def test_parse_qsl_separator_legacy(self):
0f0bda
+        for orig, expect in parse_qsl_test_cases_legacy:
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='env'):
0f0bda
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
0f0bda
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
0f0bda
+                    result = urllib.parse.parse_qsl(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+            with self.subTest(orig=orig, expect=expect, method='conf'):
0f0bda
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
0f0bda
+                    result = urllib.parse.parse_qsl(orig)
0f0bda
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
0f0bda
+                self.assertEqual(len(w), 0)
0f0bda
+
0f0bda
+    def test_parse_qs_separator_bad_value_env_or_config(self):
0f0bda
+        for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
0f0bda
+            with self.subTest(bad_sep, method='env'):
0f0bda
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
0f0bda
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
0f0bda
+                    with self.assertRaises(ValueError):
0f0bda
+                        urllib.parse.parse_qsl('a=1;b=2')
0f0bda
+            with self.subTest(bad_sep, method='conf'):
0f0bda
+                with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
0f0bda
+                    with self.assertRaises(ValueError):
0f0bda
+                        urllib.parse.parse_qsl('a=1;b=2')
0f0bda
+
0f0bda
+    def test_parse_qs_separator_bad_value_arg(self):
0f0bda
+        for bad_sep in True, {}, '':
0f0bda
+            with self.subTest(bad_sep):
0f0bda
+                with self.assertRaises(ValueError):
0f0bda
+                    urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
0f0bda
+
0f0bda
+    def test_parse_qs_separator_num_fields(self):
0f0bda
+        for qs, sep in (
0f0bda
+            ('a&b&c', '&'),
0f0bda
+            ('a;b;c', ';'),
0f0bda
+            ('a&b;c', 'legacy'),
0f0bda
+        ):
0f0bda
+            with self.subTest(qs=qs, sep=sep):
0f0bda
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
0f0bda
+                    if sep != 'legacy':
0f0bda
+                        with self.assertRaises(ValueError):
0f0bda
+                            urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
0f0bda
+                    if sep:
0f0bda
+                        environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
0f0bda
+                    with self.assertRaises(ValueError):
0f0bda
+                        urllib.parse.parse_qsl(qs, max_num_fields=2)
0f0bda
+
0f0bda
+    def test_parse_qs_separator_priority(self):
0f0bda
+        # env variable trumps config file
0f0bda
+        with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
0f0bda
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
0f0bda
+            result = urllib.parse.parse_qs('a=1!b=2~c=3')
0f0bda
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
0f0bda
+        # argument trumps config file
0f0bda
+        with self._qsl_sep_config('~'):
0f0bda
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
0f0bda
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
0f0bda
+        # argument trumps env variable
0f0bda
+        with EnvironmentVarGuard() as environ:
0f0bda
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
0f0bda
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
0f0bda
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
0f0bda
 
0f0bda
     def test_urlencode_sequences(self):
0f0bda
         # Other tests incidentally urlencode things; test non-covered cases:
0f0bda
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
0f0bda
index fa8827a9fa7..57b8fcf8bbd 100644
0f0bda
--- a/Lib/urllib/parse.py
0f0bda
+++ b/Lib/urllib/parse.py
0f0bda
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
0f0bda
 """
0f0bda
 
0f0bda
 import re
0f0bda
+import os
0f0bda
 import sys
0f0bda
 import collections
0f0bda
 
0f0bda
@@ -644,7 +645,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
0f0bda
 
0f0bda
 
0f0bda
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
0f0bda
-             encoding='utf-8', errors='replace', max_num_fields=None):
0f0bda
+             encoding='utf-8', errors='replace', max_num_fields=None,
0f0bda
+             separator=None):
0f0bda
     """Parse a query given as a string argument.
0f0bda
 
0f0bda
         Arguments:
0f0bda
@@ -673,7 +675,8 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
0f0bda
     parsed_result = {}
0f0bda
     pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
0f0bda
                       encoding=encoding, errors=errors,
0f0bda
-                      max_num_fields=max_num_fields)
0f0bda
+                      max_num_fields=max_num_fields,
0f0bda
+                      separator=separator)
0f0bda
     for name, value in pairs:
0f0bda
         if name in parsed_result:
0f0bda
             parsed_result[name].append(value)
0f0bda
@@ -681,9 +684,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
0f0bda
             parsed_result[name] = [value]
0f0bda
     return parsed_result
0f0bda
 
0f0bda
+class _QueryStringSeparatorWarning(RuntimeWarning):
0f0bda
+    """Warning for using default `separator` in parse_qs or parse_qsl"""
0f0bda
+
0f0bda
+# The default "separator" for parse_qsl can be specified in a config file.
0f0bda
+# It's cached after first read.
0f0bda
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
0f0bda
+_default_qs_separator = None
0f0bda
 
0f0bda
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
0f0bda
-              encoding='utf-8', errors='replace', max_num_fields=None):
0f0bda
+              encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
0f0bda
     """Parse a query given as a string argument.
0f0bda
 
0f0bda
         Arguments:
0f0bda
@@ -710,15 +720,77 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
0f0bda
     """
0f0bda
     qs, _coerce_result = _coerce_args(qs)
0f0bda
 
0f0bda
+    if isinstance(separator, bytes):
0f0bda
+        separator = separator.decode('ascii')
0f0bda
+
0f0bda
+    if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
0f0bda
+        raise ValueError("Separator must be of type string or bytes.")
0f0bda
+
0f0bda
+    # Used when both "&" and ";" act as separators. (Need a non-string value.)
0f0bda
+    _legacy = object()
0f0bda
+
0f0bda
+    if separator is None:
0f0bda
+        global _default_qs_separator
0f0bda
+        separator = _default_qs_separator
0f0bda
+        envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
0f0bda
+        if separator is None:
0f0bda
+            # Set default separator from environment variable
0f0bda
+            separator = os.environ.get(envvar_name)
0f0bda
+            config_source = 'environment variable'
0f0bda
+        if separator is None:
0f0bda
+            # Set default separator from the configuration file
0f0bda
+            try:
0f0bda
+                file = open(_QS_SEPARATOR_CONFIG_FILENAME)
0f0bda
+            except FileNotFoundError:
0f0bda
+                pass
0f0bda
+            else:
0f0bda
+                with file:
0f0bda
+                    import configparser
0f0bda
+                    config = configparser.ConfigParser(
0f0bda
+                        interpolation=None,
0f0bda
+                        comment_prefixes=('#', ),
0f0bda
+                    )
0f0bda
+                    config.read_file(file)
0f0bda
+                    separator = config.get('parse_qs', envvar_name, fallback=None)
0f0bda
+                    _default_qs_separator = separator
0f0bda
+                config_source = _QS_SEPARATOR_CONFIG_FILENAME
0f0bda
+        if separator is None:
0f0bda
+            # The default is '&', but warn if not specified explicitly
0f0bda
+            if ';' in qs:
0f0bda
+                from warnings import warn
0f0bda
+                warn("The default separator of urllib.parse.parse_qsl and "
0f0bda
+                    + "parse_qs was changed to '&' to avoid a web cache "
0f0bda
+                    + "poisoning issue (CVE-2021-23336). "
0f0bda
+                    + "By default, semicolons no longer act as query field "
0f0bda
+                    + "separators. "
0f0bda
+                    + "See https://access.redhat.com/articles/5860431 for "
0f0bda
+                    + "more details.",
0f0bda
+                    _QueryStringSeparatorWarning, stacklevel=2)
0f0bda
+            separator = '&'
0f0bda
+        elif separator == 'legacy':
0f0bda
+            separator = _legacy
0f0bda
+        elif len(separator) != 1:
0f0bda
+            raise ValueError(
0f0bda
+                f'{envvar_name} (from {config_source}) must contain '
0f0bda
+                + '1 character, or "legacy". See '
0f0bda
+                + 'https://access.redhat.com/articles/5860431 for more details.'
0f0bda
+            )
0f0bda
+
0f0bda
     # If max_num_fields is defined then check that the number of fields
0f0bda
     # is less than max_num_fields. This prevents a memory exhaustion DOS
0f0bda
     # attack via post bodies with many fields.
0f0bda
     if max_num_fields is not None:
0f0bda
-        num_fields = 1 + qs.count('&') + qs.count(';')
0f0bda
+        if separator is _legacy:
0f0bda
+            num_fields = 1 + qs.count('&') + qs.count(';')
0f0bda
+        else:
0f0bda
+            num_fields = 1 + qs.count(separator)
0f0bda
         if max_num_fields < num_fields:
0f0bda
             raise ValueError('Max number of fields exceeded')
0f0bda
 
0f0bda
-    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
0f0bda
+    if separator is _legacy:
0f0bda
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
0f0bda
+    else:
0f0bda
+        pairs = [s1 for s1 in qs.split(separator)]
0f0bda
     r = []
0f0bda
     for name_value in pairs:
0f0bda
         if not name_value and not strict_parsing:
0f0bda
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
0f0bda
new file mode 100644
0f0bda
index 00000000000..bc82c963067
0f0bda
--- /dev/null
0f0bda
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
0f0bda
@@ -0,0 +1 @@
0f0bda
+Make it possible to fix web cache poisoning vulnerability by allowing the user to choose a custom separator query args.