Blame SOURCES/00359-CVE-2021-23336.patch

893842
From a11d61081c3887c2b4c36e8726597e05f789c2e2 Mon Sep 17 00:00:00 2001
893842
From: Lumir Balhar <lbalhar@redhat.com>
893842
Date: Thu, 1 Apr 2021 08:18:07 +0200
893842
Subject: [PATCH] CVE-2021-23336: Add `separator` argument to parse_qs; warn
893842
 with default
893842
MIME-Version: 1.0
893842
Content-Type: text/plain; charset=UTF-8
893842
Content-Transfer-Encoding: 8bit
893842
893842
Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
893842
However, this solution is different than the upstream solution in Python 3.6.13.
893842
893842
An optional argument seperator is added to specify the separator.
893842
It is recommended to set it to '&' or ';' to match the application or proxy in use.
893842
The default can be set with an env variable of a config file.
893842
If neither the argument, env var or config file specifies a separator, "&" is used
893842
but a warning is raised if parse_qs is used on input that contains ';'.
893842
893842
Co-authors of the upstream change (who do not necessarily agree with this):
893842
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
893842
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
893842
Co-authored-by: Éric Araujo <merwok@netwok.org>
893842
---
893842
 Doc/library/cgi.rst                           |   2 +-
893842
 Doc/library/urllib.parse.rst                  |  12 +-
893842
 Lib/cgi.py                                    |   4 +-
893842
 Lib/test/test_cgi.py                          |  29 +++
893842
 Lib/test/test_urlparse.py                     | 232 +++++++++++++++++-
893842
 Lib/urllib/parse.py                           |  77 +++++-
893842
 .../2021-02-14-15-59-16.bpo-42967.YApqDS.rst  |   1 +
893842
 7 files changed, 340 insertions(+), 17 deletions(-)
893842
 create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
893842
893842
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
893842
index 880074b..d8a6dc1 100644
893842
--- a/Doc/library/cgi.rst
893842
+++ b/Doc/library/cgi.rst
893842
@@ -277,7 +277,7 @@ These are useful if you want more control, or if you want to employ some of the
893842
 algorithms implemented in this module in other circumstances.
893842
 
893842
 
893842
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")
893842
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
893842
 
893842
    Parse a query in the environment or from a file (the file defaults to
893842
    ``sys.stdin``).  The *keep_blank_values*, *strict_parsing* and *separator* parameters are
893842
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
893842
index fcad707..9bcef69 100644
893842
--- a/Doc/library/urllib.parse.rst
893842
+++ b/Doc/library/urllib.parse.rst
893842
@@ -165,7 +165,7 @@ or on combining URL components into a URL string.
893842
       now raise :exc:`ValueError`.
893842
 
893842
 
893842
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
893842
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
893842
 
893842
    Parse a query string given as a string argument (data of type
893842
    :mimetype:`application/x-www-form-urlencoded`).  Data are returned as a
893842
@@ -191,7 +191,13 @@ or on combining URL components into a URL string.
893842
    *max_num_fields* fields read.
893842
 
893842
    The optional argument *separator* is the symbol to use for separating the
893842
-   query arguments. It defaults to ``&``.
893842
+   query arguments. It is recommended to set it to ``'&'`` or ``';'``.
893842
+   It defaults to ``'&'``; a warning is raised if this default is used.
893842
+   This default may be changed with the following environment variable settings:
893842
+
893842
+   - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
893842
+   - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
893842
+   - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
893842
 
893842
    Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
893842
    parameter set to ``True``) to convert such dictionaries into query
893842
@@ -236,7 +242,7 @@ or on combining URL components into a URL string.
893842
    *max_num_fields* fields read.
893842
 
893842
    The optional argument *separator* is the symbol to use for separating the
893842
-   query arguments. It defaults to ``&``.
893842
+   query arguments. It works as in :py:func:`parse_qs`.
893842
 
893842
    Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
893842
    query strings.
893842
diff --git a/Lib/cgi.py b/Lib/cgi.py
893842
index 1e880e5..d7b994b 100755
893842
--- a/Lib/cgi.py
893842
+++ b/Lib/cgi.py
893842
@@ -116,7 +116,7 @@ log = initlog           # The current logging function
893842
 maxlen = 0
893842
 
893842
 def parse(fp=None, environ=os.environ, keep_blank_values=0,
893842
-          strict_parsing=0, separator='&'):
893842
+          strict_parsing=0, separator=None):
893842
     """Parse a query in the environment or from a file (default stdin)
893842
 
893842
         Arguments, all optional:
893842
@@ -319,7 +319,7 @@ class FieldStorage:
893842
     def __init__(self, fp=None, headers=None, outerboundary=b'',
893842
                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
893842
                  limit=None, encoding='utf-8', errors='replace',
893842
-                 max_num_fields=None, separator='&'):
893842
+                 max_num_fields=None, separator=None):
893842
         """Constructor.  Read multipart/* until last part.
893842
 
893842
         Arguments, all optional:
893842
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
893842
index 4e1506a..49b6926 100644
893842
--- a/Lib/test/test_cgi.py
893842
+++ b/Lib/test/test_cgi.py
893842
@@ -180,6 +180,35 @@ Content-Length: 3
893842
 
893842
             env = {'QUERY_STRING': orig}
893842
             fs = cgi.FieldStorage(environ=env)
893842
+            if isinstance(expect, dict):
893842
+                # test dict interface
893842
+                self.assertEqual(len(expect), len(fs))
893842
+                self.assertCountEqual(expect.keys(), fs.keys())
893842
+                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
893842
+                # test individual fields
893842
+                for key in expect.keys():
893842
+                    expect_val = expect[key]
893842
+                    self.assertIn(key, fs)
893842
+                    if len(expect_val) > 1:
893842
+                        self.assertEqual(fs.getvalue(key), expect_val)
893842
+                    else:
893842
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
893842
+
893842
+    def test_separator(self):
893842
+        parse_semicolon = [
893842
+            ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
893842
+            ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
893842
+            (";", ValueError("bad query field: ''")),
893842
+            (";;", ValueError("bad query field: ''")),
893842
+            ("=;a", ValueError("bad query field: 'a'")),
893842
+            (";b=a", ValueError("bad query field: ''")),
893842
+            ("b;=a", ValueError("bad query field: 'b'")),
893842
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
893842
+            ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
893842
+        ]
893842
+        for orig, expect in parse_semicolon:
893842
+            env = {'QUERY_STRING': orig}
893842
+            fs = cgi.FieldStorage(separator=';', environ=env)
893842
             if isinstance(expect, dict):
893842
                 # test dict interface
893842
                 self.assertEqual(len(expect), len(fs))
893842
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
893842
index 90c8d69..90349ee 100644
893842
--- a/Lib/test/test_urlparse.py
893842
+++ b/Lib/test/test_urlparse.py
893842
@@ -2,6 +2,11 @@ import sys
893842
 import unicodedata
893842
 import unittest
893842
 import urllib.parse
893842
+from test.support import EnvironmentVarGuard
893842
+from warnings import catch_warnings
893842
+import tempfile
893842
+import contextlib
893842
+import os.path
893842
 
893842
 RFC1808_BASE = "http://a/b/c/d;p?q#f"
893842
 RFC2396_BASE = "http://a/b/c/d;p?q"
893842
@@ -32,10 +37,34 @@ parse_qsl_test_cases = [
893842
     (b"&a=b", [(b'a', b'b')]),
893842
     (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
893842
     (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
893842
+]
893842
+
893842
+parse_qsl_test_cases_semicolon = [
893842
+    (";", []),
893842
+    (";;", []),
893842
+    (";a=b", [('a', 'b')]),
893842
+    ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
893842
+    ("a=1;a=2", [('a', '1'), ('a', '2')]),
893842
+    (b";", []),
893842
+    (b";;", []),
893842
+    (b";a=b", [(b'a', b'b')]),
893842
+    (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
893842
+    (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
893842
+]
893842
+
893842
+parse_qsl_test_cases_legacy = [
893842
+    (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
893842
+    (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
893842
+    (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
893842
+]
893842
+
893842
+parse_qsl_test_cases_warn = [
893842
     (";a=b", [(';a', 'b')]),
893842
     ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
893842
     (b";a=b", [(b';a', b'b')]),
893842
     (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
893842
+    ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
893842
+    (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
893842
 ]
893842
 
893842
 # Each parse_qs testcase is a two-tuple that contains
893842
@@ -62,10 +91,37 @@ parse_qs_test_cases = [
893842
     (b"&a=b", {b'a': [b'b']}),
893842
     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
893842
     (b"a=1&a=2", {b'a': [b'1', b'2']}),
893842
+]
893842
+
893842
+parse_qs_test_cases_semicolon = [
893842
+    (";", {}),
893842
+    (";;", {}),
893842
+    (";a=b", {'a': ['b']}),
893842
+    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
893842
+    ("a=1;a=2", {'a': ['1', '2']}),
893842
+    (b";", {}),
893842
+    (b";;", {}),
893842
+    (b";a=b", {b'a': [b'b']}),
893842
+    (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
893842
+    (b"a=1;a=2", {b'a': [b'1', b'2']}),
893842
+]
893842
+
893842
+parse_qs_test_cases_legacy = [
893842
+    ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
893842
+    ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
893842
+    ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
893842
+    (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
893842
+    (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
893842
+    (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
893842
+]
893842
+
893842
+parse_qs_test_cases_warn = [
893842
     (";a=b", {';a': ['b']}),
893842
     ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
893842
     (b";a=b", {b';a': [b'b']}),
893842
     (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
893842
+    ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
893842
+    (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
893842
 ]
893842
 
893842
 class UrlParseTestCase(unittest.TestCase):
893842
@@ -123,23 +179,57 @@ class UrlParseTestCase(unittest.TestCase):
893842
 
893842
     def test_qsl(self):
893842
         for orig, expect in parse_qsl_test_cases:
893842
-            result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
893842
+            result = urllib.parse.parse_qsl(orig, keep_blank_values=True, separator="&")
893842
             self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
             expect_without_blanks = [v for v in expect if len(v[1])]
893842
-            result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
893842
+            result = urllib.parse.parse_qsl(orig, keep_blank_values=False, separator="&")
893842
             self.assertEqual(result, expect_without_blanks,
893842
                             "Error parsing %r" % orig)
893842
 
893842
     def test_qs(self):
893842
         for orig, expect in parse_qs_test_cases:
893842
-            result = urllib.parse.parse_qs(orig, keep_blank_values=True)
893842
+            result = urllib.parse.parse_qs(orig, keep_blank_values=True, separator="&")
893842
             self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
             expect_without_blanks = {v: expect[v]
893842
                                      for v in expect if len(expect[v][0])}
893842
-            result = urllib.parse.parse_qs(orig, keep_blank_values=False)
893842
+            result = urllib.parse.parse_qs(orig, keep_blank_values=False, separator="&")
893842
             self.assertEqual(result, expect_without_blanks,
893842
                             "Error parsing %r" % orig)
893842
 
893842
+    def test_qs_default_warn(self):
893842
+        for orig, expect in parse_qs_test_cases_warn:
893842
+            with self.subTest(orig=orig, expect=expect):
893842
+                with catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
893842
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 1)
893842
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
893842
+
893842
+    def test_qsl_default_warn(self):
893842
+        for orig, expect in parse_qsl_test_cases_warn:
893842
+            with self.subTest(orig=orig, expect=expect):
893842
+                with catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
893842
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 1)
893842
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
893842
+
893842
+    def test_default_qs_no_warnings(self):
893842
+        for orig, expect in parse_qs_test_cases:
893842
+            with self.subTest(orig=orig, expect=expect):
893842
+                with catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
893842
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+
893842
+    def test_default_qsl_no_warnings(self):
893842
+        for orig, expect in parse_qsl_test_cases:
893842
+            with self.subTest(orig=orig, expect=expect):
893842
+                with catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
893842
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+
893842
     def test_roundtrips(self):
893842
         str_cases = [
893842
             ('file:///tmp/junk.txt',
893842
@@ -871,8 +961,8 @@ class UrlParseTestCase(unittest.TestCase):
893842
 
893842
     def test_parse_qsl_max_num_fields(self):
893842
         with self.assertRaises(ValueError):
893842
-            urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
893842
-        urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
893842
+            urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10, separator='&')
893842
+        urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10, separator='&')
893842
 
893842
     def test_parse_qs_separator(self):
893842
         parse_qs_semicolon_cases = [
893842
@@ -912,6 +1002,136 @@ class UrlParseTestCase(unittest.TestCase):
893842
                 self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
 
893842
 
893842
+    @contextlib.contextmanager
893842
+    def _qsl_sep_config(self, sep):
893842
+        """Context for the given parse_qsl default separator configured in config file"""
893842
+        old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
893842
+        urllib.parse._default_qs_separator = None
893842
+        try:
893842
+            with tempfile.TemporaryDirectory() as tmpdirname:
893842
+                filename = os.path.join(tmpdirname, 'conf.cfg')
893842
+                with open(filename, 'w') as file:
893842
+                    file.write(f'[parse_qs]\n')
893842
+                    file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
893842
+                urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
893842
+                yield
893842
+        finally:
893842
+            urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
893842
+            urllib.parse._default_qs_separator = None
893842
+
893842
+    def test_parse_qs_separator_semicolon(self):
893842
+        for orig, expect in parse_qs_test_cases_semicolon:
893842
+            with self.subTest(orig=orig, expect=expect, method='arg'):
893842
+                result = urllib.parse.parse_qs(orig, separator=';')
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+            with self.subTest(orig=orig, expect=expect, method='env'):
893842
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
893842
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
893842
+                    result = urllib.parse.parse_qs(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+            with self.subTest(orig=orig, expect=expect, method='conf'):
893842
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qs(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+
893842
+    def test_parse_qsl_separator_semicolon(self):
893842
+        for orig, expect in parse_qsl_test_cases_semicolon:
893842
+            with self.subTest(orig=orig, expect=expect, method='arg'):
893842
+                result = urllib.parse.parse_qsl(orig, separator=';')
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+            with self.subTest(orig=orig, expect=expect, method='env'):
893842
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
893842
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
893842
+                    result = urllib.parse.parse_qsl(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+            with self.subTest(orig=orig, expect=expect, method='conf'):
893842
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qsl(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+
893842
+    def test_parse_qs_separator_legacy(self):
893842
+        for orig, expect in parse_qs_test_cases_legacy:
893842
+            with self.subTest(orig=orig, expect=expect, method='env'):
893842
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
893842
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
893842
+                    result = urllib.parse.parse_qs(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+            with self.subTest(orig=orig, expect=expect, method='conf'):
893842
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qs(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+
893842
+    def test_parse_qsl_separator_legacy(self):
893842
+        for orig, expect in parse_qsl_test_cases_legacy:
893842
+            with self.subTest(orig=orig, expect=expect, method='env'):
893842
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
893842
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
893842
+                    result = urllib.parse.parse_qsl(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+            with self.subTest(orig=orig, expect=expect, method='conf'):
893842
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
893842
+                    result = urllib.parse.parse_qsl(orig)
893842
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
893842
+                self.assertEqual(len(w), 0)
893842
+
893842
+    def test_parse_qs_separator_bad_value_env_or_config(self):
893842
+        for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
893842
+            with self.subTest(bad_sep, method='env'):
893842
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
893842
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
893842
+                    with self.assertRaises(ValueError):
893842
+                        urllib.parse.parse_qsl('a=1;b=2')
893842
+            with self.subTest(bad_sep, method='conf'):
893842
+                with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
893842
+                    with self.assertRaises(ValueError):
893842
+                        urllib.parse.parse_qsl('a=1;b=2')
893842
+
893842
+    def test_parse_qs_separator_bad_value_arg(self):
893842
+        for bad_sep in True, {}, '':
893842
+            with self.subTest(bad_sep):
893842
+                with self.assertRaises(ValueError):
893842
+                    urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
893842
+
893842
+    def test_parse_qs_separator_num_fields(self):
893842
+        for qs, sep in (
893842
+            ('a&b&c', '&'),
893842
+            ('a;b;c', ';'),
893842
+            ('a&b;c', 'legacy'),
893842
+        ):
893842
+            with self.subTest(qs=qs, sep=sep):
893842
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
893842
+                    if sep != 'legacy':
893842
+                        with self.assertRaises(ValueError):
893842
+                            urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
893842
+                    if sep:
893842
+                        environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
893842
+                    with self.assertRaises(ValueError):
893842
+                        urllib.parse.parse_qsl(qs, max_num_fields=2)
893842
+
893842
+    def test_parse_qs_separator_priority(self):
893842
+        # env variable trumps config file
893842
+        with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
893842
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
893842
+            result = urllib.parse.parse_qs('a=1!b=2~c=3')
893842
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
893842
+        # argument trumps config file
893842
+        with self._qsl_sep_config('~'):
893842
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
893842
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
893842
+        # argument trumps env variable
893842
+        with EnvironmentVarGuard() as environ:
893842
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
893842
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
893842
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
893842
+
893842
+
893842
     def test_urlencode_sequences(self):
893842
         # Other tests incidentally urlencode things; test non-covered cases:
893842
         # Sequence and object values.
893842
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
893842
index 0c1c94f..83638bb 100644
893842
--- a/Lib/urllib/parse.py
893842
+++ b/Lib/urllib/parse.py
893842
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
893842
 """
893842
 
893842
 import re
893842
+import os
893842
 import sys
893842
 import collections
893842
 import warnings
893842
@@ -650,7 +651,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
893842
 
893842
 
893842
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
893842
-             encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
893842
+             encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
893842
     """Parse a query given as a string argument.
893842
 
893842
         Arguments:
893842
@@ -690,9 +691,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
893842
             parsed_result[name] = [value]
893842
     return parsed_result
893842
 
893842
+class _QueryStringSeparatorWarning(RuntimeWarning):
893842
+    """Warning for using default `separator` in parse_qs or parse_qsl"""
893842
+
893842
+# The default "separator" for parse_qsl can be specified in a config file.
893842
+# It's cached after first read.
893842
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
893842
+_default_qs_separator = None
893842
 
893842
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
893842
-              encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
893842
+              encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
893842
     """Parse a query given as a string argument.
893842
 
893842
         Arguments:
893842
@@ -722,18 +730,77 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
893842
     """
893842
     qs, _coerce_result = _coerce_args(qs)
893842
 
893842
-    if not separator or (not isinstance(separator, (str, bytes))):
893842
+    if isinstance(separator, bytes):
893842
+        separator = separator.decode('ascii')
893842
+
893842
+    if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
893842
         raise ValueError("Separator must be of type string or bytes.")
893842
 
893842
+    # Used when both "&" and ";" act as separators. (Need a non-string value.)
893842
+    _legacy = object()
893842
+
893842
+    if separator is None:
893842
+        global _default_qs_separator
893842
+        separator = _default_qs_separator
893842
+        envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
893842
+        if separator is None:
893842
+            # Set default separator from environment variable
893842
+            separator = os.environ.get(envvar_name)
893842
+            config_source = 'environment variable'
893842
+        if separator is None:
893842
+            # Set default separator from the configuration file
893842
+            try:
893842
+                file = open(_QS_SEPARATOR_CONFIG_FILENAME)
893842
+            except FileNotFoundError:
893842
+                pass
893842
+            else:
893842
+                with file:
893842
+                    import configparser
893842
+                    config = configparser.ConfigParser(
893842
+                        interpolation=None,
893842
+                        comment_prefixes=('#', ),
893842
+                    )
893842
+                    config.read_file(file)
893842
+                    separator = config.get('parse_qs', envvar_name, fallback=None)
893842
+                    _default_qs_separator = separator
893842
+                config_source = _QS_SEPARATOR_CONFIG_FILENAME
893842
+        if separator is None:
893842
+            # The default is '&', but warn if not specified explicitly
893842
+            if ';' in qs:
893842
+                from warnings import warn
893842
+                warn("The default separator of urllib.parse.parse_qsl and "
893842
+                    + "parse_qs was changed to '&' to avoid a web cache "
893842
+                    + "poisoning issue (CVE-2021-23336). "
893842
+                    + "By default, semicolons no longer act as query field "
893842
+                    + "separators. "
893842
+                    + "See https://access.redhat.com/articles/5860431 for "
893842
+                    + "more details.",
893842
+                    _QueryStringSeparatorWarning, stacklevel=2)
893842
+            separator = '&'
893842
+        elif separator == 'legacy':
893842
+            separator = _legacy
893842
+        elif len(separator) != 1:
893842
+            raise ValueError(
893842
+                f'{envvar_name} (from {config_source}) must contain '
893842
+                + '1 character, or "legacy". See '
893842
+                + 'https://access.redhat.com/articles/5860431 for more details.'
893842
+            )
893842
+
893842
     # If max_num_fields is defined then check that the number of fields
893842
     # is less than max_num_fields. This prevents a memory exhaustion DOS
893842
     # attack via post bodies with many fields.
893842
     if max_num_fields is not None:
893842
-        num_fields = 1 + qs.count(separator)
893842
+        if separator is _legacy:
893842
+            num_fields = 1 + qs.count('&') + qs.count(';')
893842
+        else:
893842
+            num_fields = 1 + qs.count(separator)
893842
         if max_num_fields < num_fields:
893842
             raise ValueError('Max number of fields exceeded')
893842
 
893842
-    pairs = [s1 for s1 in qs.split(separator)]
893842
+    if separator is _legacy:
893842
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
893842
+    else:
893842
+        pairs = [s1 for s1 in qs.split(separator)]
893842
     r = []
893842
     for name_value in pairs:
893842
         if not name_value and not strict_parsing:
893842
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
893842
new file mode 100644
893842
index 0000000..bc82c96
893842
--- /dev/null
893842
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
893842
@@ -0,0 +1 @@
893842
+Make it possible to fix web cache poisoning vulnerability by allowing the user to choose a custom separator query args.
893842
-- 
893842
2.30.2
893842