Blame SOURCES/00359-CVE-2021-23336.patch

71f264
commit 9e77ec82c40ab59846f9447b7c483e7b8e368b16
71f264
Author: Petr Viktorin <pviktori@redhat.com>
71f264
Date:   Thu Mar 4 13:59:56 2021 +0100
71f264
71f264
    CVE-2021-23336: Add `separator` argument to parse_qs; warn with default
71f264
    
71f264
    Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
71f264
    However, this solution is different than the upstream solution in Python 3.6.13.
71f264
    
71f264
    An optional argument seperator is added to specify the separator.
71f264
    It is recommended to set it to '&' or ';' to match the application or proxy in use.
71f264
    The default can be set with an env variable of a config file.
71f264
    If neither the argument, env var or config file specifies a separator, "&" is used
71f264
    but a warning is raised if parse_qs is used on input that contains ';'.
71f264
    
71f264
    Co-authors of the upstream change (who do not necessarily agree with this):
71f264
    Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
71f264
    Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
71f264
    Co-authored-by: Éric Araujo <merwok@netwok.org>
71f264
71f264
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
71f264
index 41219eeaaba..ddecc0af23a 100644
71f264
--- a/Doc/library/cgi.rst
71f264
+++ b/Doc/library/cgi.rst
71f264
@@ -277,13 +277,12 @@ These are useful if you want more control, or if you want to employ some of the
71f264
 algorithms implemented in this module in other circumstances.
71f264
 
71f264
 
71f264
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
71f264
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
71f264
 
71f264
    Parse a query in the environment or from a file (the file defaults to
71f264
-   ``sys.stdin``).  The *keep_blank_values* and *strict_parsing* parameters are
71f264
+   ``sys.stdin``).  The *keep_blank_values*, *strict_parsing* and *separator* parameters are
71f264
    passed to :func:`urllib.parse.parse_qs` unchanged.
71f264
 
71f264
-
71f264
 .. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False)
71f264
 
71f264
    This function is deprecated in this module. Use :func:`urllib.parse.parse_qs`
71f264
@@ -308,7 +307,6 @@ algorithms implemented in this module in other circumstances.
71f264
    Note that this does not parse nested multipart parts --- use
71f264
    :class:`FieldStorage` for that.
71f264
 
71f264
-
71f264
 .. function:: parse_header(string)
71f264
 
71f264
    Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a
71f264
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
71f264
index 647af613a31..bcab7c142bc 100644
71f264
--- a/Doc/library/urllib.parse.rst
71f264
+++ b/Doc/library/urllib.parse.rst
71f264
@@ -143,7 +143,7 @@ or on combining URL components into a URL string.
71f264
       now raise :exc:`ValueError`.
71f264
 
71f264
 
71f264
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
71f264
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
71f264
 
71f264
    Parse a query string given as a string argument (data of type
71f264
    :mimetype:`application/x-www-form-urlencoded`).  Data are returned as a
71f264
@@ -168,6 +168,15 @@ or on combining URL components into a URL string.
71f264
    read. If set, then throws a :exc:`ValueError` if there are more than
71f264
    *max_num_fields* fields read.
71f264
 
71f264
+   The optional argument *separator* is the symbol to use for separating the
71f264
+   query arguments. It is recommended to set it to ``'&'`` or ``';'``.
71f264
+   It defaults to ``'&'``; a warning is raised if this default is used.
71f264
+   This default may be changed with the following environment variable settings:
71f264
+
71f264
+   - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
71f264
+   - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
71f264
+   - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
71f264
+
71f264
    Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
71f264
    parameter set to ``True``) to convert such dictionaries into query
71f264
    strings.
71f264
@@ -204,6 +213,9 @@ or on combining URL components into a URL string.
71f264
    read. If set, then throws a :exc:`ValueError` if there are more than
71f264
    *max_num_fields* fields read.
71f264
 
71f264
+   The optional argument *separator* is the symbol to use for separating the
71f264
+   query arguments. It works as in :py:func:`parse_qs`.
71f264
+
71f264
    Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
71f264
    query strings.
71f264
 
71f264
@@ -213,7 +225,6 @@ or on combining URL components into a URL string.
71f264
    .. versionchanged:: 3.6.8
71f264
       Added *max_num_fields* parameter.
71f264
 
71f264
-
71f264
 .. function:: urlunparse(parts)
71f264
 
71f264
    Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
71f264
diff --git a/Lib/cgi.py b/Lib/cgi.py
71f264
index 56f243e09f0..5ab2a5d6af6 100755
71f264
--- a/Lib/cgi.py
71f264
+++ b/Lib/cgi.py
71f264
@@ -117,7 +117,8 @@ log = initlog           # The current logging function
71f264
 # 0 ==> unlimited input
71f264
 maxlen = 0
71f264
 
71f264
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
71f264
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
71f264
+          strict_parsing=0, separator=None):
71f264
     """Parse a query in the environment or from a file (default stdin)
71f264
 
71f264
         Arguments, all optional:
71f264
@@ -136,6 +137,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
71f264
         strict_parsing: flag indicating what to do with parsing errors.
71f264
             If false (the default), errors are silently ignored.
71f264
             If true, errors raise a ValueError exception.
71f264
+
71f264
+        separator: str. The symbol to use for separating the query arguments.
71f264
     """
71f264
     if fp is None:
71f264
         fp = sys.stdin
71f264
@@ -156,7 +159,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
71f264
     if environ['REQUEST_METHOD'] == 'POST':
71f264
         ctype, pdict = parse_header(environ['CONTENT_TYPE'])
71f264
         if ctype == 'multipart/form-data':
71f264
-            return parse_multipart(fp, pdict)
71f264
+            return parse_multipart(fp, pdict, separator=separator)
71f264
         elif ctype == 'application/x-www-form-urlencoded':
71f264
             clength = int(environ['CONTENT_LENGTH'])
71f264
             if maxlen and clength > maxlen:
71f264
@@ -182,21 +185,21 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
71f264
     return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
71f264
                                  encoding=encoding)
71f264
 
71f264
-
71f264
 # parse query string function called from urlparse,
71f264
 # this is done in order to maintain backward compatibility.
71f264
-
71f264
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
71f264
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, separator=None):
71f264
     """Parse a query given as a string argument."""
71f264
     warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
71f264
          DeprecationWarning, 2)
71f264
-    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
71f264
+    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
71f264
+                                 separator=separator)
71f264
 
71f264
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
71f264
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, separator=None):
71f264
     """Parse a query given as a string argument."""
71f264
     warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
71f264
          DeprecationWarning, 2)
71f264
-    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
71f264
+    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing,
71f264
+                                  separator=separator)
71f264
 
71f264
 def parse_multipart(fp, pdict):
71f264
     """Parse multipart input.
71f264
@@ -297,7 +300,6 @@ def parse_multipart(fp, pdict):
71f264
 
71f264
     return partdict
71f264
 
71f264
-
71f264
 def _parseparam(s):
71f264
     while s[:1] == ';':
71f264
         s = s[1:]
71f264
@@ -405,7 +407,7 @@ class FieldStorage:
71f264
     def __init__(self, fp=None, headers=None, outerboundary=b'',
71f264
                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
71f264
                  limit=None, encoding='utf-8', errors='replace',
71f264
-                 max_num_fields=None):
71f264
+                 max_num_fields=None, separator=None):
71f264
         """Constructor.  Read multipart/* until last part.
71f264
 
71f264
         Arguments, all optional:
71f264
@@ -453,6 +455,7 @@ class FieldStorage:
71f264
         self.keep_blank_values = keep_blank_values
71f264
         self.strict_parsing = strict_parsing
71f264
         self.max_num_fields = max_num_fields
71f264
+        self.separator = separator
71f264
         if 'REQUEST_METHOD' in environ:
71f264
             method = environ['REQUEST_METHOD'].upper()
71f264
         self.qs_on_post = None
71f264
@@ -678,7 +681,7 @@ class FieldStorage:
71f264
         query = urllib.parse.parse_qsl(
71f264
             qs, self.keep_blank_values, self.strict_parsing,
71f264
             encoding=self.encoding, errors=self.errors,
71f264
-            max_num_fields=self.max_num_fields)
71f264
+            max_num_fields=self.max_num_fields, separator=self.separator)
71f264
         self.list = [MiniFieldStorage(key, value) for key, value in query]
71f264
         self.skip_lines()
71f264
 
71f264
@@ -694,7 +697,7 @@ class FieldStorage:
71f264
             query = urllib.parse.parse_qsl(
71f264
                 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
71f264
                 encoding=self.encoding, errors=self.errors,
71f264
-                max_num_fields=self.max_num_fields)
71f264
+                max_num_fields=self.max_num_fields, separator=self.separator)
71f264
             self.list.extend(MiniFieldStorage(key, value) for key, value in query)
71f264
 
71f264
         klass = self.FieldStorageClass or self.__class__
71f264
@@ -736,7 +739,8 @@ class FieldStorage:
71f264
 
71f264
             part = klass(self.fp, headers, ib, environ, keep_blank_values,
71f264
                          strict_parsing,self.limit-self.bytes_read,
71f264
-                         self.encoding, self.errors, max_num_fields)
71f264
+                         self.encoding, self.errors, max_num_fields,
71f264
+                         separator=self.separator)
71f264
 
71f264
             if max_num_fields is not None:
71f264
                 max_num_fields -= 1
71f264
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
71f264
index b3e2d4cce8e..5ae3e085e1e 100644
71f264
--- a/Lib/test/test_cgi.py
71f264
+++ b/Lib/test/test_cgi.py
71f264
@@ -55,12 +55,9 @@ parse_strict_test_cases = [
71f264
     ("", ValueError("bad query field: ''")),
71f264
     ("&", ValueError("bad query field: ''")),
71f264
     ("&&", ValueError("bad query field: ''")),
71f264
-    (";", ValueError("bad query field: ''")),
71f264
-    (";&;", ValueError("bad query field: ''")),
71f264
     # Should the next few really be valid?
71f264
     ("=", {}),
71f264
     ("=&=", {}),
71f264
-    ("=;=", {}),
71f264
     # This rest seem to make sense
71f264
     ("=a", {'': ['a']}),
71f264
     ("&=a", ValueError("bad query field: ''")),
71f264
@@ -75,8 +72,6 @@ parse_strict_test_cases = [
71f264
     ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
71f264
     ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
71f264
     ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
71f264
-    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
71f264
-    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
71f264
     ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
71f264
      {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
71f264
       'cuyer': ['r'],
71f264
@@ -164,6 +159,35 @@ class CgiTests(unittest.TestCase):
71f264
 
71f264
             env = {'QUERY_STRING': orig}
71f264
             fs = cgi.FieldStorage(environ=env)
71f264
+            if isinstance(expect, dict):
71f264
+                # test dict interface
71f264
+                self.assertEqual(len(expect), len(fs))
71f264
+                self.assertCountEqual(expect.keys(), fs.keys())
71f264
+                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
71f264
+                # test individual fields
71f264
+                for key in expect.keys():
71f264
+                    expect_val = expect[key]
71f264
+                    self.assertIn(key, fs)
71f264
+                    if len(expect_val) > 1:
71f264
+                        self.assertEqual(fs.getvalue(key), expect_val)
71f264
+                    else:
71f264
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
71f264
+
71f264
+    def test_separator(self):
71f264
+        parse_semicolon = [
71f264
+            ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
71f264
+            ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
71f264
+            (";", ValueError("bad query field: ''")),
71f264
+            (";;", ValueError("bad query field: ''")),
71f264
+            ("=;a", ValueError("bad query field: 'a'")),
71f264
+            (";b=a", ValueError("bad query field: ''")),
71f264
+            ("b;=a", ValueError("bad query field: 'b'")),
71f264
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
71f264
+            ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
71f264
+        ]
71f264
+        for orig, expect in parse_semicolon:
71f264
+            env = {'QUERY_STRING': orig}
71f264
+            fs = cgi.FieldStorage(separator=';', environ=env)
71f264
             if isinstance(expect, dict):
71f264
                 # test dict interface
71f264
                 self.assertEqual(len(expect), len(fs))
71f264
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
71f264
index 68f633ca3a7..1ec86ba0fc2 100644
71f264
--- a/Lib/test/test_urlparse.py
71f264
+++ b/Lib/test/test_urlparse.py
71f264
@@ -2,6 +2,11 @@ import sys
71f264
 import unicodedata
71f264
 import unittest
71f264
 import urllib.parse
71f264
+from test.support import EnvironmentVarGuard
71f264
+from warnings import catch_warnings
71f264
+import tempfile
71f264
+import contextlib
71f264
+import os.path
71f264
 
71f264
 RFC1808_BASE = "http://a/b/c/d;p?q#f"
71f264
 RFC2396_BASE = "http://a/b/c/d;p?q"
71f264
@@ -32,6 +37,9 @@ parse_qsl_test_cases = [
71f264
     (b"&a=b", [(b'a', b'b')]),
71f264
     (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
71f264
     (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
71f264
+]
71f264
+
71f264
+parse_qsl_test_cases_semicolon = [
71f264
     (";", []),
71f264
     (";;", []),
71f264
     (";a=b", [('a', 'b')]),
71f264
@@ -44,6 +52,21 @@ parse_qsl_test_cases = [
71f264
     (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
71f264
 ]
71f264
 
71f264
+parse_qsl_test_cases_legacy = [
71f264
+    (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
71f264
+    (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
71f264
+    (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
71f264
+]
71f264
+
71f264
+parse_qsl_test_cases_warn = [
71f264
+    (";a=b", [(';a', 'b')]),
71f264
+    ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
71f264
+    (b";a=b", [(b';a', b'b')]),
71f264
+    (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
71f264
+    ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
71f264
+    (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
71f264
+]
71f264
+
71f264
 # Each parse_qs testcase is a two-tuple that contains
71f264
 # a string with the query and a dictionary with the expected result.
71f264
 
71f264
@@ -68,6 +91,9 @@ parse_qs_test_cases = [
71f264
     (b"&a=b", {b'a': [b'b']}),
71f264
     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
71f264
     (b"a=1&a=2", {b'a': [b'1', b'2']}),
71f264
+]
71f264
+
71f264
+parse_qs_test_cases_semicolon = [
71f264
     (";", {}),
71f264
     (";;", {}),
71f264
     (";a=b", {'a': ['b']}),
71f264
@@ -80,6 +106,24 @@ parse_qs_test_cases = [
71f264
     (b"a=1;a=2", {b'a': [b'1', b'2']}),
71f264
 ]
71f264
 
71f264
+parse_qs_test_cases_legacy = [
71f264
+    ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
71f264
+    ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
71f264
+    ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
71f264
+    (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
71f264
+    (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
71f264
+    (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
71f264
+]
71f264
+
71f264
+parse_qs_test_cases_warn = [
71f264
+    (";a=b", {';a': ['b']}),
71f264
+    ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
71f264
+    (b";a=b", {b';a': [b'b']}),
71f264
+    (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
71f264
+    ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
71f264
+    (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
71f264
+]
71f264
+
71f264
 class UrlParseTestCase(unittest.TestCase):
71f264
 
71f264
     def checkRoundtrips(self, url, parsed, split):
71f264
@@ -152,6 +196,40 @@ class UrlParseTestCase(unittest.TestCase):
71f264
             self.assertEqual(result, expect_without_blanks,
71f264
                             "Error parsing %r" % orig)
71f264
 
71f264
+    def test_qs_default_warn(self):
71f264
+        for orig, expect in parse_qs_test_cases_warn:
71f264
+            with self.subTest(orig=orig, expect=expect):
71f264
+                with catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
71f264
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 1)
71f264
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
71f264
+
71f264
+    def test_qsl_default_warn(self):
71f264
+        for orig, expect in parse_qsl_test_cases_warn:
71f264
+            with self.subTest(orig=orig, expect=expect):
71f264
+                with catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
71f264
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 1)
71f264
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
71f264
+
71f264
+    def test_default_qs_no_warnings(self):
71f264
+        for orig, expect in parse_qs_test_cases:
71f264
+            with self.subTest(orig=orig, expect=expect):
71f264
+                with catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
71f264
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+
71f264
+    def test_default_qsl_no_warnings(self):
71f264
+        for orig, expect in parse_qsl_test_cases:
71f264
+            with self.subTest(orig=orig, expect=expect):
71f264
+                with catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
71f264
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+
71f264
     def test_roundtrips(self):
71f264
         str_cases = [
71f264
             ('file:///tmp/junk.txt',
71f264
@@ -885,8 +963,151 @@ class UrlParseTestCase(unittest.TestCase):
71f264
         with self.assertRaises(ValueError):
71f264
             urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
71f264
         with self.assertRaises(ValueError):
71f264
-            urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
71f264
+            urllib.parse.parse_qs(';'.join(['a=a']*11), separator=';', max_num_fields=10)
71f264
+        with self.assertRaises(ValueError):
71f264
+            urllib.parse.parse_qs('SEP'.join(['a=a']*11), separator='SEP', max_num_fields=10)
71f264
         urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
71f264
+        urllib.parse.parse_qs(';'.join(['a=a']*10), separator=';', max_num_fields=10)
71f264
+        urllib.parse.parse_qs('SEP'.join(['a=a']*10), separator='SEP', max_num_fields=10)
71f264
+
71f264
+    def test_parse_qs_separator_bytes(self):
71f264
+        expected = {b'a': [b'1'], b'b': [b'2']}
71f264
+
71f264
+        result = urllib.parse.parse_qs(b'a=1;b=2', separator=b';')
71f264
+        self.assertEqual(result, expected)
71f264
+        result = urllib.parse.parse_qs(b'a=1;b=2', separator=';')
71f264
+        self.assertEqual(result, expected)
71f264
+        result = urllib.parse.parse_qs('a=1;b=2', separator=';')
71f264
+        self.assertEqual(result, {'a': ['1'], 'b': ['2']})
71f264
+
71f264
+    @contextlib.contextmanager
71f264
+    def _qsl_sep_config(self, sep):
71f264
+        """Context for the given parse_qsl default separator configured in config file"""
71f264
+        old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
71f264
+        urllib.parse._default_qs_separator = None
71f264
+        try:
71f264
+            with tempfile.TemporaryDirectory() as tmpdirname:
71f264
+                filename = os.path.join(tmpdirname, 'conf.cfg')
71f264
+                with open(filename, 'w') as file:
71f264
+                    file.write(f'[parse_qs]\n')
71f264
+                    file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
71f264
+                urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
71f264
+                yield
71f264
+        finally:
71f264
+            urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
71f264
+            urllib.parse._default_qs_separator = None
71f264
+
71f264
+    def test_parse_qs_separator_semicolon(self):
71f264
+        for orig, expect in parse_qs_test_cases_semicolon:
71f264
+            with self.subTest(orig=orig, expect=expect, method='arg'):
71f264
+                result = urllib.parse.parse_qs(orig, separator=';')
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+            with self.subTest(orig=orig, expect=expect, method='env'):
71f264
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
71f264
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
71f264
+                    result = urllib.parse.parse_qs(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+            with self.subTest(orig=orig, expect=expect, method='conf'):
71f264
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qs(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+
71f264
+    def test_parse_qsl_separator_semicolon(self):
71f264
+        for orig, expect in parse_qsl_test_cases_semicolon:
71f264
+            with self.subTest(orig=orig, expect=expect, method='arg'):
71f264
+                result = urllib.parse.parse_qsl(orig, separator=';')
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+            with self.subTest(orig=orig, expect=expect, method='env'):
71f264
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
71f264
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
71f264
+                    result = urllib.parse.parse_qsl(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+            with self.subTest(orig=orig, expect=expect, method='conf'):
71f264
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qsl(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+
71f264
+    def test_parse_qs_separator_legacy(self):
71f264
+        for orig, expect in parse_qs_test_cases_legacy:
71f264
+            with self.subTest(orig=orig, expect=expect, method='env'):
71f264
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
71f264
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
71f264
+                    result = urllib.parse.parse_qs(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+            with self.subTest(orig=orig, expect=expect, method='conf'):
71f264
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qs(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+
71f264
+    def test_parse_qsl_separator_legacy(self):
71f264
+        for orig, expect in parse_qsl_test_cases_legacy:
71f264
+            with self.subTest(orig=orig, expect=expect, method='env'):
71f264
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
71f264
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
71f264
+                    result = urllib.parse.parse_qsl(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+            with self.subTest(orig=orig, expect=expect, method='conf'):
71f264
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
71f264
+                    result = urllib.parse.parse_qsl(orig)
71f264
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
71f264
+                self.assertEqual(len(w), 0)
71f264
+
71f264
+    def test_parse_qs_separator_bad_value_env_or_config(self):
71f264
+        for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
71f264
+            with self.subTest(bad_sep, method='env'):
71f264
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
71f264
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
71f264
+                    with self.assertRaises(ValueError):
71f264
+                        urllib.parse.parse_qsl('a=1;b=2')
71f264
+            with self.subTest(bad_sep, method='conf'):
71f264
+                with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
71f264
+                    with self.assertRaises(ValueError):
71f264
+                        urllib.parse.parse_qsl('a=1;b=2')
71f264
+
71f264
+    def test_parse_qs_separator_bad_value_arg(self):
71f264
+        for bad_sep in True, {}, '':
71f264
+            with self.subTest(bad_sep):
71f264
+                with self.assertRaises(ValueError):
71f264
+                    urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
71f264
+
71f264
+    def test_parse_qs_separator_num_fields(self):
71f264
+        for qs, sep in (
71f264
+            ('a&b&c', '&'),
71f264
+            ('a;b;c', ';'),
71f264
+            ('a&b;c', 'legacy'),
71f264
+        ):
71f264
+            with self.subTest(qs=qs, sep=sep):
71f264
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
71f264
+                    if sep != 'legacy':
71f264
+                        with self.assertRaises(ValueError):
71f264
+                            urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
71f264
+                    if sep:
71f264
+                        environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
71f264
+                    with self.assertRaises(ValueError):
71f264
+                        urllib.parse.parse_qsl(qs, max_num_fields=2)
71f264
+
71f264
+    def test_parse_qs_separator_priority(self):
71f264
+        # env variable trumps config file
71f264
+        with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
71f264
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
71f264
+            result = urllib.parse.parse_qs('a=1!b=2~c=3')
71f264
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
71f264
+        # argument trumps config file
71f264
+        with self._qsl_sep_config('~'):
71f264
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
71f264
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
71f264
+        # argument trumps env variable
71f264
+        with EnvironmentVarGuard() as environ:
71f264
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
71f264
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
71f264
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
71f264
 
71f264
     def test_urlencode_sequences(self):
71f264
         # Other tests incidentally urlencode things; test non-covered cases:
71f264
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
71f264
index fa8827a9fa7..57b8fcf8bbd 100644
71f264
--- a/Lib/urllib/parse.py
71f264
+++ b/Lib/urllib/parse.py
71f264
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
71f264
 """
71f264
 
71f264
 import re
71f264
+import os
71f264
 import sys
71f264
 import collections
71f264
 
71f264
@@ -644,7 +645,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
71f264
 
71f264
 
71f264
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
71f264
-             encoding='utf-8', errors='replace', max_num_fields=None):
71f264
+             encoding='utf-8', errors='replace', max_num_fields=None,
71f264
+             separator=None):
71f264
     """Parse a query given as a string argument.
71f264
 
71f264
         Arguments:
71f264
@@ -673,7 +675,8 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
71f264
     parsed_result = {}
71f264
     pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
71f264
                       encoding=encoding, errors=errors,
71f264
-                      max_num_fields=max_num_fields)
71f264
+                      max_num_fields=max_num_fields,
71f264
+                      separator=separator)
71f264
     for name, value in pairs:
71f264
         if name in parsed_result:
71f264
             parsed_result[name].append(value)
71f264
@@ -681,9 +684,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
71f264
             parsed_result[name] = [value]
71f264
     return parsed_result
71f264
 
71f264
+class _QueryStringSeparatorWarning(RuntimeWarning):
71f264
+    """Warning for using default `separator` in parse_qs or parse_qsl"""
71f264
+
71f264
+# The default "separator" for parse_qsl can be specified in a config file.
71f264
+# It's cached after first read.
71f264
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
71f264
+_default_qs_separator = None
71f264
 
71f264
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
71f264
-              encoding='utf-8', errors='replace', max_num_fields=None):
71f264
+              encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
71f264
     """Parse a query given as a string argument.
71f264
 
71f264
         Arguments:
71f264
@@ -710,15 +720,77 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
71f264
     """
71f264
     qs, _coerce_result = _coerce_args(qs)
71f264
 
71f264
+    if isinstance(separator, bytes):
71f264
+        separator = separator.decode('ascii')
71f264
+
71f264
+    if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
71f264
+        raise ValueError("Separator must be of type string or bytes.")
71f264
+
71f264
+    # Used when both "&" and ";" act as separators. (Need a non-string value.)
71f264
+    _legacy = object()
71f264
+
71f264
+    if separator is None:
71f264
+        global _default_qs_separator
71f264
+        separator = _default_qs_separator
71f264
+        envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
71f264
+        if separator is None:
71f264
+            # Set default separator from environment variable
71f264
+            separator = os.environ.get(envvar_name)
71f264
+            config_source = 'environment variable'
71f264
+        if separator is None:
71f264
+            # Set default separator from the configuration file
71f264
+            try:
71f264
+                file = open(_QS_SEPARATOR_CONFIG_FILENAME)
71f264
+            except FileNotFoundError:
71f264
+                pass
71f264
+            else:
71f264
+                with file:
71f264
+                    import configparser
71f264
+                    config = configparser.ConfigParser(
71f264
+                        interpolation=None,
71f264
+                        comment_prefixes=('#', ),
71f264
+                    )
71f264
+                    config.read_file(file)
71f264
+                    separator = config.get('parse_qs', envvar_name, fallback=None)
71f264
+                    _default_qs_separator = separator
71f264
+                config_source = _QS_SEPARATOR_CONFIG_FILENAME
71f264
+        if separator is None:
71f264
+            # The default is '&', but warn if not specified explicitly
71f264
+            if ';' in qs:
71f264
+                from warnings import warn
71f264
+                warn("The default separator of urllib.parse.parse_qsl and "
71f264
+                    + "parse_qs was changed to '&' to avoid a web cache "
71f264
+                    + "poisoning issue (CVE-2021-23336). "
71f264
+                    + "By default, semicolons no longer act as query field "
71f264
+                    + "separators. "
71f264
+                    + "See https://access.redhat.com/articles/5860431 for "
71f264
+                    + "more details.",
71f264
+                    _QueryStringSeparatorWarning, stacklevel=2)
71f264
+            separator = '&'
71f264
+        elif separator == 'legacy':
71f264
+            separator = _legacy
71f264
+        elif len(separator) != 1:
71f264
+            raise ValueError(
71f264
+                f'{envvar_name} (from {config_source}) must contain '
71f264
+                + '1 character, or "legacy". See '
71f264
+                + 'https://access.redhat.com/articles/5860431 for more details.'
71f264
+            )
71f264
+
71f264
     # If max_num_fields is defined then check that the number of fields
71f264
     # is less than max_num_fields. This prevents a memory exhaustion DOS
71f264
     # attack via post bodies with many fields.
71f264
     if max_num_fields is not None:
71f264
-        num_fields = 1 + qs.count('&') + qs.count(';')
71f264
+        if separator is _legacy:
71f264
+            num_fields = 1 + qs.count('&') + qs.count(';')
71f264
+        else:
71f264
+            num_fields = 1 + qs.count(separator)
71f264
         if max_num_fields < num_fields:
71f264
             raise ValueError('Max number of fields exceeded')
71f264
 
71f264
-    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
71f264
+    if separator is _legacy:
71f264
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
71f264
+    else:
71f264
+        pairs = [s1 for s1 in qs.split(separator)]
71f264
     r = []
71f264
     for name_value in pairs:
71f264
         if not name_value and not strict_parsing:
71f264
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
71f264
new file mode 100644
71f264
index 00000000000..bc82c963067
71f264
--- /dev/null
71f264
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
71f264
@@ -0,0 +1 @@
71f264
+Make it possible to fix web cache poisoning vulnerability by allowing the user to choose a custom separator query args.