Blame SOURCES/00359-CVE-2021-23336.patch

4d7d43
commit 9e77ec82c40ab59846f9447b7c483e7b8e368b16
4d7d43
Author: Petr Viktorin <pviktori@redhat.com>
4d7d43
Date:   Thu Mar 4 13:59:56 2021 +0100
4d7d43
4d7d43
    CVE-2021-23336: Add `separator` argument to parse_qs; warn with default
4d7d43
    
4d7d43
    Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
4d7d43
    However, this solution is different than the upstream solution in Python 3.6.13.
4d7d43
    
4d7d43
    An optional argument seperator is added to specify the separator.
4d7d43
    It is recommended to set it to '&' or ';' to match the application or proxy in use.
4d7d43
    The default can be set with an env variable of a config file.
4d7d43
    If neither the argument, env var or config file specifies a separator, "&" is used
4d7d43
    but a warning is raised if parse_qs is used on input that contains ';'.
4d7d43
    
4d7d43
    Co-authors of the upstream change (who do not necessarily agree with this):
4d7d43
    Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
4d7d43
    Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
4d7d43
    Co-authored-by: Éric Araujo <merwok@netwok.org>
4d7d43
4d7d43
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
4d7d43
index 41219eeaaba..ddecc0af23a 100644
4d7d43
--- a/Doc/library/cgi.rst
4d7d43
+++ b/Doc/library/cgi.rst
4d7d43
@@ -277,13 +277,12 @@ These are useful if you want more control, or if you want to employ some of the
4d7d43
 algorithms implemented in this module in other circumstances.
4d7d43
 
4d7d43
 
4d7d43
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
4d7d43
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
4d7d43
 
4d7d43
    Parse a query in the environment or from a file (the file defaults to
4d7d43
-   ``sys.stdin``).  The *keep_blank_values* and *strict_parsing* parameters are
4d7d43
+   ``sys.stdin``).  The *keep_blank_values*, *strict_parsing* and *separator* parameters are
4d7d43
    passed to :func:`urllib.parse.parse_qs` unchanged.
4d7d43
 
4d7d43
-
4d7d43
 .. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False)
4d7d43
 
4d7d43
    This function is deprecated in this module. Use :func:`urllib.parse.parse_qs`
4d7d43
@@ -308,7 +307,6 @@ algorithms implemented in this module in other circumstances.
4d7d43
    Note that this does not parse nested multipart parts --- use
4d7d43
    :class:`FieldStorage` for that.
4d7d43
 
4d7d43
-
4d7d43
 .. function:: parse_header(string)
4d7d43
 
4d7d43
    Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a
4d7d43
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
4d7d43
index 647af613a31..bcab7c142bc 100644
4d7d43
--- a/Doc/library/urllib.parse.rst
4d7d43
+++ b/Doc/library/urllib.parse.rst
4d7d43
@@ -143,7 +143,7 @@ or on combining URL components into a URL string.
4d7d43
       now raise :exc:`ValueError`.
4d7d43
 
4d7d43
 
4d7d43
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
4d7d43
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
4d7d43
 
4d7d43
    Parse a query string given as a string argument (data of type
4d7d43
    :mimetype:`application/x-www-form-urlencoded`).  Data are returned as a
4d7d43
@@ -168,6 +168,15 @@ or on combining URL components into a URL string.
4d7d43
    read. If set, then throws a :exc:`ValueError` if there are more than
4d7d43
    *max_num_fields* fields read.
4d7d43
 
4d7d43
+   The optional argument *separator* is the symbol to use for separating the
4d7d43
+   query arguments. It is recommended to set it to ``'&'`` or ``';'``.
4d7d43
+   It defaults to ``'&'``; a warning is raised if this default is used.
4d7d43
+   This default may be changed with the following environment variable settings:
4d7d43
+
4d7d43
+   - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
4d7d43
+   - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
4d7d43
+   - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
4d7d43
+
4d7d43
    Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
4d7d43
    parameter set to ``True``) to convert such dictionaries into query
4d7d43
    strings.
4d7d43
@@ -204,6 +213,9 @@ or on combining URL components into a URL string.
4d7d43
    read. If set, then throws a :exc:`ValueError` if there are more than
4d7d43
    *max_num_fields* fields read.
4d7d43
 
4d7d43
+   The optional argument *separator* is the symbol to use for separating the
4d7d43
+   query arguments. It works as in :py:func:`parse_qs`.
4d7d43
+
4d7d43
    Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
4d7d43
    query strings.
4d7d43
 
4d7d43
@@ -213,7 +225,6 @@ or on combining URL components into a URL string.
4d7d43
    .. versionchanged:: 3.6.8
4d7d43
       Added *max_num_fields* parameter.
4d7d43
 
4d7d43
-
4d7d43
 .. function:: urlunparse(parts)
4d7d43
 
4d7d43
    Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
4d7d43
diff --git a/Lib/cgi.py b/Lib/cgi.py
4d7d43
index 56f243e09f0..5ab2a5d6af6 100755
4d7d43
--- a/Lib/cgi.py
4d7d43
+++ b/Lib/cgi.py
4d7d43
@@ -117,7 +117,8 @@ log = initlog           # The current logging function
4d7d43
 # 0 ==> unlimited input
4d7d43
 maxlen = 0
4d7d43
 
4d7d43
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
4d7d43
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
4d7d43
+          strict_parsing=0, separator=None):
4d7d43
     """Parse a query in the environment or from a file (default stdin)
4d7d43
 
4d7d43
         Arguments, all optional:
4d7d43
@@ -136,6 +137,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
4d7d43
         strict_parsing: flag indicating what to do with parsing errors.
4d7d43
             If false (the default), errors are silently ignored.
4d7d43
             If true, errors raise a ValueError exception.
4d7d43
+
4d7d43
+        separator: str. The symbol to use for separating the query arguments.
4d7d43
     """
4d7d43
     if fp is None:
4d7d43
         fp = sys.stdin
4d7d43
@@ -156,7 +159,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
4d7d43
     if environ['REQUEST_METHOD'] == 'POST':
4d7d43
         ctype, pdict = parse_header(environ['CONTENT_TYPE'])
4d7d43
         if ctype == 'multipart/form-data':
4d7d43
-            return parse_multipart(fp, pdict)
4d7d43
+            return parse_multipart(fp, pdict, separator=separator)
4d7d43
         elif ctype == 'application/x-www-form-urlencoded':
4d7d43
             clength = int(environ['CONTENT_LENGTH'])
4d7d43
             if maxlen and clength > maxlen:
4d7d43
@@ -182,21 +185,21 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
4d7d43
     return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
4d7d43
                                  encoding=encoding)
4d7d43
 
4d7d43
-
4d7d43
 # parse query string function called from urlparse,
4d7d43
 # this is done in order to maintain backward compatibility.
4d7d43
-
4d7d43
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
4d7d43
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, separator=None):
4d7d43
     """Parse a query given as a string argument."""
4d7d43
     warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
4d7d43
          DeprecationWarning, 2)
4d7d43
-    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
4d7d43
+    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
4d7d43
+                                 separator=separator)
4d7d43
 
4d7d43
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
4d7d43
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, separator=None):
4d7d43
     """Parse a query given as a string argument."""
4d7d43
     warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
4d7d43
          DeprecationWarning, 2)
4d7d43
-    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
4d7d43
+    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing,
4d7d43
+                                  separator=separator)
4d7d43
 
4d7d43
 def parse_multipart(fp, pdict):
4d7d43
     """Parse multipart input.
4d7d43
@@ -297,7 +300,6 @@ def parse_multipart(fp, pdict):
4d7d43
 
4d7d43
     return partdict
4d7d43
 
4d7d43
-
4d7d43
 def _parseparam(s):
4d7d43
     while s[:1] == ';':
4d7d43
         s = s[1:]
4d7d43
@@ -405,7 +407,7 @@ class FieldStorage:
4d7d43
     def __init__(self, fp=None, headers=None, outerboundary=b'',
4d7d43
                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
4d7d43
                  limit=None, encoding='utf-8', errors='replace',
4d7d43
-                 max_num_fields=None):
4d7d43
+                 max_num_fields=None, separator=None):
4d7d43
         """Constructor.  Read multipart/* until last part.
4d7d43
 
4d7d43
         Arguments, all optional:
4d7d43
@@ -453,6 +455,7 @@ class FieldStorage:
4d7d43
         self.keep_blank_values = keep_blank_values
4d7d43
         self.strict_parsing = strict_parsing
4d7d43
         self.max_num_fields = max_num_fields
4d7d43
+        self.separator = separator
4d7d43
         if 'REQUEST_METHOD' in environ:
4d7d43
             method = environ['REQUEST_METHOD'].upper()
4d7d43
         self.qs_on_post = None
4d7d43
@@ -678,7 +681,7 @@ class FieldStorage:
4d7d43
         query = urllib.parse.parse_qsl(
4d7d43
             qs, self.keep_blank_values, self.strict_parsing,
4d7d43
             encoding=self.encoding, errors=self.errors,
4d7d43
-            max_num_fields=self.max_num_fields)
4d7d43
+            max_num_fields=self.max_num_fields, separator=self.separator)
4d7d43
         self.list = [MiniFieldStorage(key, value) for key, value in query]
4d7d43
         self.skip_lines()
4d7d43
 
4d7d43
@@ -694,7 +697,7 @@ class FieldStorage:
4d7d43
             query = urllib.parse.parse_qsl(
4d7d43
                 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
4d7d43
                 encoding=self.encoding, errors=self.errors,
4d7d43
-                max_num_fields=self.max_num_fields)
4d7d43
+                max_num_fields=self.max_num_fields, separator=self.separator)
4d7d43
             self.list.extend(MiniFieldStorage(key, value) for key, value in query)
4d7d43
 
4d7d43
         klass = self.FieldStorageClass or self.__class__
4d7d43
@@ -736,7 +739,8 @@ class FieldStorage:
4d7d43
 
4d7d43
             part = klass(self.fp, headers, ib, environ, keep_blank_values,
4d7d43
                          strict_parsing,self.limit-self.bytes_read,
4d7d43
-                         self.encoding, self.errors, max_num_fields)
4d7d43
+                         self.encoding, self.errors, max_num_fields,
4d7d43
+                         separator=self.separator)
4d7d43
 
4d7d43
             if max_num_fields is not None:
4d7d43
                 max_num_fields -= 1
4d7d43
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
4d7d43
index b3e2d4cce8e..5ae3e085e1e 100644
4d7d43
--- a/Lib/test/test_cgi.py
4d7d43
+++ b/Lib/test/test_cgi.py
4d7d43
@@ -55,12 +55,9 @@ parse_strict_test_cases = [
4d7d43
     ("", ValueError("bad query field: ''")),
4d7d43
     ("&", ValueError("bad query field: ''")),
4d7d43
     ("&&", ValueError("bad query field: ''")),
4d7d43
-    (";", ValueError("bad query field: ''")),
4d7d43
-    (";&;", ValueError("bad query field: ''")),
4d7d43
     # Should the next few really be valid?
4d7d43
     ("=", {}),
4d7d43
     ("=&=", {}),
4d7d43
-    ("=;=", {}),
4d7d43
     # This rest seem to make sense
4d7d43
     ("=a", {'': ['a']}),
4d7d43
     ("&=a", ValueError("bad query field: ''")),
4d7d43
@@ -75,8 +72,6 @@ parse_strict_test_cases = [
4d7d43
     ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
4d7d43
     ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
4d7d43
     ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
4d7d43
-    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
4d7d43
-    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
4d7d43
     ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
4d7d43
      {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
4d7d43
       'cuyer': ['r'],
4d7d43
@@ -164,6 +159,35 @@ class CgiTests(unittest.TestCase):
4d7d43
 
4d7d43
             env = {'QUERY_STRING': orig}
4d7d43
             fs = cgi.FieldStorage(environ=env)
4d7d43
+            if isinstance(expect, dict):
4d7d43
+                # test dict interface
4d7d43
+                self.assertEqual(len(expect), len(fs))
4d7d43
+                self.assertCountEqual(expect.keys(), fs.keys())
4d7d43
+                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
4d7d43
+                # test individual fields
4d7d43
+                for key in expect.keys():
4d7d43
+                    expect_val = expect[key]
4d7d43
+                    self.assertIn(key, fs)
4d7d43
+                    if len(expect_val) > 1:
4d7d43
+                        self.assertEqual(fs.getvalue(key), expect_val)
4d7d43
+                    else:
4d7d43
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
4d7d43
+
4d7d43
+    def test_separator(self):
4d7d43
+        parse_semicolon = [
4d7d43
+            ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
4d7d43
+            ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
4d7d43
+            (";", ValueError("bad query field: ''")),
4d7d43
+            (";;", ValueError("bad query field: ''")),
4d7d43
+            ("=;a", ValueError("bad query field: 'a'")),
4d7d43
+            (";b=a", ValueError("bad query field: ''")),
4d7d43
+            ("b;=a", ValueError("bad query field: 'b'")),
4d7d43
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
4d7d43
+            ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
4d7d43
+        ]
4d7d43
+        for orig, expect in parse_semicolon:
4d7d43
+            env = {'QUERY_STRING': orig}
4d7d43
+            fs = cgi.FieldStorage(separator=';', environ=env)
4d7d43
             if isinstance(expect, dict):
4d7d43
                 # test dict interface
4d7d43
                 self.assertEqual(len(expect), len(fs))
4d7d43
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
4d7d43
index 68f633ca3a7..1ec86ba0fc2 100644
4d7d43
--- a/Lib/test/test_urlparse.py
4d7d43
+++ b/Lib/test/test_urlparse.py
4d7d43
@@ -2,6 +2,11 @@ import sys
4d7d43
 import unicodedata
4d7d43
 import unittest
4d7d43
 import urllib.parse
4d7d43
+from test.support import EnvironmentVarGuard
4d7d43
+from warnings import catch_warnings
4d7d43
+import tempfile
4d7d43
+import contextlib
4d7d43
+import os.path
4d7d43
 
4d7d43
 RFC1808_BASE = "http://a/b/c/d;p?q#f"
4d7d43
 RFC2396_BASE = "http://a/b/c/d;p?q"
4d7d43
@@ -32,6 +37,9 @@ parse_qsl_test_cases = [
4d7d43
     (b"&a=b", [(b'a', b'b')]),
4d7d43
     (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
4d7d43
     (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
4d7d43
+]
4d7d43
+
4d7d43
+parse_qsl_test_cases_semicolon = [
4d7d43
     (";", []),
4d7d43
     (";;", []),
4d7d43
     (";a=b", [('a', 'b')]),
4d7d43
@@ -44,6 +52,21 @@ parse_qsl_test_cases = [
4d7d43
     (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
4d7d43
 ]
4d7d43
 
4d7d43
+parse_qsl_test_cases_legacy = [
4d7d43
+    (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
4d7d43
+    (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
4d7d43
+    (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
4d7d43
+]
4d7d43
+
4d7d43
+parse_qsl_test_cases_warn = [
4d7d43
+    (";a=b", [(';a', 'b')]),
4d7d43
+    ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
4d7d43
+    (b";a=b", [(b';a', b'b')]),
4d7d43
+    (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
4d7d43
+    ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
4d7d43
+    (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
4d7d43
+]
4d7d43
+
4d7d43
 # Each parse_qs testcase is a two-tuple that contains
4d7d43
 # a string with the query and a dictionary with the expected result.
4d7d43
 
4d7d43
@@ -68,6 +91,9 @@ parse_qs_test_cases = [
4d7d43
     (b"&a=b", {b'a': [b'b']}),
4d7d43
     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
4d7d43
     (b"a=1&a=2", {b'a': [b'1', b'2']}),
4d7d43
+]
4d7d43
+
4d7d43
+parse_qs_test_cases_semicolon = [
4d7d43
     (";", {}),
4d7d43
     (";;", {}),
4d7d43
     (";a=b", {'a': ['b']}),
4d7d43
@@ -80,6 +106,24 @@ parse_qs_test_cases = [
4d7d43
     (b"a=1;a=2", {b'a': [b'1', b'2']}),
4d7d43
 ]
4d7d43
 
4d7d43
+parse_qs_test_cases_legacy = [
4d7d43
+    ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
4d7d43
+    ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
4d7d43
+    ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
4d7d43
+    (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
4d7d43
+    (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
4d7d43
+    (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
4d7d43
+]
4d7d43
+
4d7d43
+parse_qs_test_cases_warn = [
4d7d43
+    (";a=b", {';a': ['b']}),
4d7d43
+    ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
4d7d43
+    (b";a=b", {b';a': [b'b']}),
4d7d43
+    (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
4d7d43
+    ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
4d7d43
+    (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
4d7d43
+]
4d7d43
+
4d7d43
 class UrlParseTestCase(unittest.TestCase):
4d7d43
 
4d7d43
     def checkRoundtrips(self, url, parsed, split):
4d7d43
@@ -152,6 +196,40 @@ class UrlParseTestCase(unittest.TestCase):
4d7d43
             self.assertEqual(result, expect_without_blanks,
4d7d43
                             "Error parsing %r" % orig)
4d7d43
 
4d7d43
+    def test_qs_default_warn(self):
4d7d43
+        for orig, expect in parse_qs_test_cases_warn:
4d7d43
+            with self.subTest(orig=orig, expect=expect):
4d7d43
+                with catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
4d7d43
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 1)
4d7d43
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
4d7d43
+
4d7d43
+    def test_qsl_default_warn(self):
4d7d43
+        for orig, expect in parse_qsl_test_cases_warn:
4d7d43
+            with self.subTest(orig=orig, expect=expect):
4d7d43
+                with catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
4d7d43
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 1)
4d7d43
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
4d7d43
+
4d7d43
+    def test_default_qs_no_warnings(self):
4d7d43
+        for orig, expect in parse_qs_test_cases:
4d7d43
+            with self.subTest(orig=orig, expect=expect):
4d7d43
+                with catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
4d7d43
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+
4d7d43
+    def test_default_qsl_no_warnings(self):
4d7d43
+        for orig, expect in parse_qsl_test_cases:
4d7d43
+            with self.subTest(orig=orig, expect=expect):
4d7d43
+                with catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
4d7d43
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+
4d7d43
     def test_roundtrips(self):
4d7d43
         str_cases = [
4d7d43
             ('file:///tmp/junk.txt',
4d7d43
@@ -885,8 +963,151 @@ class UrlParseTestCase(unittest.TestCase):
4d7d43
         with self.assertRaises(ValueError):
4d7d43
             urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
4d7d43
         with self.assertRaises(ValueError):
4d7d43
-            urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
4d7d43
+            urllib.parse.parse_qs(';'.join(['a=a']*11), separator=';', max_num_fields=10)
4d7d43
+        with self.assertRaises(ValueError):
4d7d43
+            urllib.parse.parse_qs('SEP'.join(['a=a']*11), separator='SEP', max_num_fields=10)
4d7d43
         urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
4d7d43
+        urllib.parse.parse_qs(';'.join(['a=a']*10), separator=';', max_num_fields=10)
4d7d43
+        urllib.parse.parse_qs('SEP'.join(['a=a']*10), separator='SEP', max_num_fields=10)
4d7d43
+
4d7d43
+    def test_parse_qs_separator_bytes(self):
4d7d43
+        expected = {b'a': [b'1'], b'b': [b'2']}
4d7d43
+
4d7d43
+        result = urllib.parse.parse_qs(b'a=1;b=2', separator=b';')
4d7d43
+        self.assertEqual(result, expected)
4d7d43
+        result = urllib.parse.parse_qs(b'a=1;b=2', separator=';')
4d7d43
+        self.assertEqual(result, expected)
4d7d43
+        result = urllib.parse.parse_qs('a=1;b=2', separator=';')
4d7d43
+        self.assertEqual(result, {'a': ['1'], 'b': ['2']})
4d7d43
+
4d7d43
+    @contextlib.contextmanager
4d7d43
+    def _qsl_sep_config(self, sep):
4d7d43
+        """Context for the given parse_qsl default separator configured in config file"""
4d7d43
+        old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
4d7d43
+        urllib.parse._default_qs_separator = None
4d7d43
+        try:
4d7d43
+            with tempfile.TemporaryDirectory() as tmpdirname:
4d7d43
+                filename = os.path.join(tmpdirname, 'conf.cfg')
4d7d43
+                with open(filename, 'w') as file:
4d7d43
+                    file.write(f'[parse_qs]\n')
4d7d43
+                    file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
4d7d43
+                urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
4d7d43
+                yield
4d7d43
+        finally:
4d7d43
+            urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
4d7d43
+            urllib.parse._default_qs_separator = None
4d7d43
+
4d7d43
+    def test_parse_qs_separator_semicolon(self):
4d7d43
+        for orig, expect in parse_qs_test_cases_semicolon:
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='arg'):
4d7d43
+                result = urllib.parse.parse_qs(orig, separator=';')
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='env'):
4d7d43
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
4d7d43
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
4d7d43
+                    result = urllib.parse.parse_qs(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='conf'):
4d7d43
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qs(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+
4d7d43
+    def test_parse_qsl_separator_semicolon(self):
4d7d43
+        for orig, expect in parse_qsl_test_cases_semicolon:
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='arg'):
4d7d43
+                result = urllib.parse.parse_qsl(orig, separator=';')
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='env'):
4d7d43
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
4d7d43
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
4d7d43
+                    result = urllib.parse.parse_qsl(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='conf'):
4d7d43
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qsl(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+
4d7d43
+    def test_parse_qs_separator_legacy(self):
4d7d43
+        for orig, expect in parse_qs_test_cases_legacy:
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='env'):
4d7d43
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
4d7d43
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
4d7d43
+                    result = urllib.parse.parse_qs(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='conf'):
4d7d43
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qs(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+
4d7d43
+    def test_parse_qsl_separator_legacy(self):
4d7d43
+        for orig, expect in parse_qsl_test_cases_legacy:
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='env'):
4d7d43
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
4d7d43
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
4d7d43
+                    result = urllib.parse.parse_qsl(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+            with self.subTest(orig=orig, expect=expect, method='conf'):
4d7d43
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
4d7d43
+                    result = urllib.parse.parse_qsl(orig)
4d7d43
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
4d7d43
+                self.assertEqual(len(w), 0)
4d7d43
+
4d7d43
+    def test_parse_qs_separator_bad_value_env_or_config(self):
4d7d43
+        for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
4d7d43
+            with self.subTest(bad_sep, method='env'):
4d7d43
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
4d7d43
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
4d7d43
+                    with self.assertRaises(ValueError):
4d7d43
+                        urllib.parse.parse_qsl('a=1;b=2')
4d7d43
+            with self.subTest(bad_sep, method='conf'):
4d7d43
+                with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
4d7d43
+                    with self.assertRaises(ValueError):
4d7d43
+                        urllib.parse.parse_qsl('a=1;b=2')
4d7d43
+
4d7d43
+    def test_parse_qs_separator_bad_value_arg(self):
4d7d43
+        for bad_sep in True, {}, '':
4d7d43
+            with self.subTest(bad_sep):
4d7d43
+                with self.assertRaises(ValueError):
4d7d43
+                    urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
4d7d43
+
4d7d43
+    def test_parse_qs_separator_num_fields(self):
4d7d43
+        for qs, sep in (
4d7d43
+            ('a&b&c', '&'),
4d7d43
+            ('a;b;c', ';'),
4d7d43
+            ('a&b;c', 'legacy'),
4d7d43
+        ):
4d7d43
+            with self.subTest(qs=qs, sep=sep):
4d7d43
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
4d7d43
+                    if sep != 'legacy':
4d7d43
+                        with self.assertRaises(ValueError):
4d7d43
+                            urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
4d7d43
+                    if sep:
4d7d43
+                        environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
4d7d43
+                    with self.assertRaises(ValueError):
4d7d43
+                        urllib.parse.parse_qsl(qs, max_num_fields=2)
4d7d43
+
4d7d43
+    def test_parse_qs_separator_priority(self):
4d7d43
+        # env variable trumps config file
4d7d43
+        with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
4d7d43
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
4d7d43
+            result = urllib.parse.parse_qs('a=1!b=2~c=3')
4d7d43
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
4d7d43
+        # argument trumps config file
4d7d43
+        with self._qsl_sep_config('~'):
4d7d43
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
4d7d43
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
4d7d43
+        # argument trumps env variable
4d7d43
+        with EnvironmentVarGuard() as environ:
4d7d43
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
4d7d43
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
4d7d43
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
4d7d43
 
4d7d43
     def test_urlencode_sequences(self):
4d7d43
         # Other tests incidentally urlencode things; test non-covered cases:
4d7d43
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
4d7d43
index fa8827a9fa7..57b8fcf8bbd 100644
4d7d43
--- a/Lib/urllib/parse.py
4d7d43
+++ b/Lib/urllib/parse.py
4d7d43
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
4d7d43
 """
4d7d43
 
4d7d43
 import re
4d7d43
+import os
4d7d43
 import sys
4d7d43
 import collections
4d7d43
 
4d7d43
@@ -644,7 +645,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
4d7d43
 
4d7d43
 
4d7d43
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
4d7d43
-             encoding='utf-8', errors='replace', max_num_fields=None):
4d7d43
+             encoding='utf-8', errors='replace', max_num_fields=None,
4d7d43
+             separator=None):
4d7d43
     """Parse a query given as a string argument.
4d7d43
 
4d7d43
         Arguments:
4d7d43
@@ -673,7 +675,8 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
4d7d43
     parsed_result = {}
4d7d43
     pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
4d7d43
                       encoding=encoding, errors=errors,
4d7d43
-                      max_num_fields=max_num_fields)
4d7d43
+                      max_num_fields=max_num_fields,
4d7d43
+                      separator=separator)
4d7d43
     for name, value in pairs:
4d7d43
         if name in parsed_result:
4d7d43
             parsed_result[name].append(value)
4d7d43
@@ -681,9 +684,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
4d7d43
             parsed_result[name] = [value]
4d7d43
     return parsed_result
4d7d43
 
4d7d43
+class _QueryStringSeparatorWarning(RuntimeWarning):
4d7d43
+    """Warning for using default `separator` in parse_qs or parse_qsl"""
4d7d43
+
4d7d43
+# The default "separator" for parse_qsl can be specified in a config file.
4d7d43
+# It's cached after first read.
4d7d43
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
4d7d43
+_default_qs_separator = None
4d7d43
 
4d7d43
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
4d7d43
-              encoding='utf-8', errors='replace', max_num_fields=None):
4d7d43
+              encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
4d7d43
     """Parse a query given as a string argument.
4d7d43
 
4d7d43
         Arguments:
4d7d43
@@ -710,15 +720,77 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
4d7d43
     """
4d7d43
     qs, _coerce_result = _coerce_args(qs)
4d7d43
 
4d7d43
+    if isinstance(separator, bytes):
4d7d43
+        separator = separator.decode('ascii')
4d7d43
+
4d7d43
+    if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
4d7d43
+        raise ValueError("Separator must be of type string or bytes.")
4d7d43
+
4d7d43
+    # Used when both "&" and ";" act as separators. (Need a non-string value.)
4d7d43
+    _legacy = object()
4d7d43
+
4d7d43
+    if separator is None:
4d7d43
+        global _default_qs_separator
4d7d43
+        separator = _default_qs_separator
4d7d43
+        envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
4d7d43
+        if separator is None:
4d7d43
+            # Set default separator from environment variable
4d7d43
+            separator = os.environ.get(envvar_name)
4d7d43
+            config_source = 'environment variable'
4d7d43
+        if separator is None:
4d7d43
+            # Set default separator from the configuration file
4d7d43
+            try:
4d7d43
+                file = open(_QS_SEPARATOR_CONFIG_FILENAME)
4d7d43
+            except FileNotFoundError:
4d7d43
+                pass
4d7d43
+            else:
4d7d43
+                with file:
4d7d43
+                    import configparser
4d7d43
+                    config = configparser.ConfigParser(
4d7d43
+                        interpolation=None,
4d7d43
+                        comment_prefixes=('#', ),
4d7d43
+                    )
4d7d43
+                    config.read_file(file)
4d7d43
+                    separator = config.get('parse_qs', envvar_name, fallback=None)
4d7d43
+                    _default_qs_separator = separator
4d7d43
+                config_source = _QS_SEPARATOR_CONFIG_FILENAME
4d7d43
+        if separator is None:
4d7d43
+            # The default is '&', but warn if not specified explicitly
4d7d43
+            if ';' in qs:
4d7d43
+                from warnings import warn
4d7d43
+                warn("The default separator of urllib.parse.parse_qsl and "
4d7d43
+                    + "parse_qs was changed to '&' to avoid a web cache "
4d7d43
+                    + "poisoning issue (CVE-2021-23336). "
4d7d43
+                    + "By default, semicolons no longer act as query field "
4d7d43
+                    + "separators. "
4d7d43
+                    + "See https://access.redhat.com/articles/5860431 for "
4d7d43
+                    + "more details.",
4d7d43
+                    _QueryStringSeparatorWarning, stacklevel=2)
4d7d43
+            separator = '&'
4d7d43
+        elif separator == 'legacy':
4d7d43
+            separator = _legacy
4d7d43
+        elif len(separator) != 1:
4d7d43
+            raise ValueError(
4d7d43
+                f'{envvar_name} (from {config_source}) must contain '
4d7d43
+                + '1 character, or "legacy". See '
4d7d43
+                + 'https://access.redhat.com/articles/5860431 for more details.'
4d7d43
+            )
4d7d43
+
4d7d43
     # If max_num_fields is defined then check that the number of fields
4d7d43
     # is less than max_num_fields. This prevents a memory exhaustion DOS
4d7d43
     # attack via post bodies with many fields.
4d7d43
     if max_num_fields is not None:
4d7d43
-        num_fields = 1 + qs.count('&') + qs.count(';')
4d7d43
+        if separator is _legacy:
4d7d43
+            num_fields = 1 + qs.count('&') + qs.count(';')
4d7d43
+        else:
4d7d43
+            num_fields = 1 + qs.count(separator)
4d7d43
         if max_num_fields < num_fields:
4d7d43
             raise ValueError('Max number of fields exceeded')
4d7d43
 
4d7d43
-    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
4d7d43
+    if separator is _legacy:
4d7d43
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
4d7d43
+    else:
4d7d43
+        pairs = [s1 for s1 in qs.split(separator)]
4d7d43
     r = []
4d7d43
     for name_value in pairs:
4d7d43
         if not name_value and not strict_parsing:
4d7d43
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
4d7d43
new file mode 100644
4d7d43
index 00000000000..bc82c963067
4d7d43
--- /dev/null
4d7d43
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
4d7d43
@@ -0,0 +1 @@
4d7d43
+Make it possible to fix web cache poisoning vulnerability by allowing the user to choose a custom separator query args.