Blame SOURCES/00359-CVE-2021-23336.patch

76c371
From 78da9e020385fe78e36c20f99a0910bbc4a0c100 Mon Sep 17 00:00:00 2001
f068d2
From: Lumir Balhar <lbalhar@redhat.com>
f068d2
Date: Thu, 1 Apr 2021 08:18:07 +0200
f068d2
Subject: [PATCH] CVE-2021-23336: Add `separator` argument to parse_qs; warn
f068d2
 with default
f068d2
MIME-Version: 1.0
f068d2
Content-Type: text/plain; charset=UTF-8
f068d2
Content-Transfer-Encoding: 8bit
f068d2
f068d2
Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
f068d2
However, this solution is different than the upstream solution in Python 3.6.13.
f068d2
f068d2
An optional argument seperator is added to specify the separator.
f068d2
It is recommended to set it to '&' or ';' to match the application or proxy in use.
f068d2
The default can be set with an env variable of a config file.
f068d2
If neither the argument, env var or config file specifies a separator, "&" is used
f068d2
but a warning is raised if parse_qs is used on input that contains ';'.
f068d2
f068d2
Co-authors of the upstream change (who do not necessarily agree with this):
f068d2
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
f068d2
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
f068d2
Co-authored-by: Éric Araujo <merwok@netwok.org>
f068d2
---
76c371
 Doc/library/cgi.rst          |   2 +-
76c371
 Doc/library/urllib.parse.rst |  12 +-
76c371
 Lib/cgi.py                   |   4 +-
76c371
 Lib/test/test_cgi.py         |  29 +++++
76c371
 Lib/test/test_urlparse.py    | 232 ++++++++++++++++++++++++++++++++++-
76c371
 Lib/urllib/parse.py          |  78 +++++++++++-
76c371
 6 files changed, 339 insertions(+), 18 deletions(-)
f068d2
f068d2
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
f068d2
index 880074b..d8a6dc1 100644
f068d2
--- a/Doc/library/cgi.rst
f068d2
+++ b/Doc/library/cgi.rst
f068d2
@@ -277,7 +277,7 @@ These are useful if you want more control, or if you want to employ some of the
f068d2
 algorithms implemented in this module in other circumstances.
f068d2
 
f068d2
 
f068d2
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")
f068d2
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
f068d2
 
f068d2
    Parse a query in the environment or from a file (the file defaults to
f068d2
    ``sys.stdin``).  The *keep_blank_values*, *strict_parsing* and *separator* parameters are
f068d2
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
76c371
index a6cfc5d..85b2448 100644
f068d2
--- a/Doc/library/urllib.parse.rst
f068d2
+++ b/Doc/library/urllib.parse.rst
f068d2
@@ -165,7 +165,7 @@ or on combining URL components into a URL string.
f068d2
       now raise :exc:`ValueError`.
f068d2
 
f068d2
 
f068d2
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
f068d2
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
f068d2
 
f068d2
    Parse a query string given as a string argument (data of type
f068d2
    :mimetype:`application/x-www-form-urlencoded`).  Data are returned as a
f068d2
@@ -191,7 +191,13 @@ or on combining URL components into a URL string.
f068d2
    *max_num_fields* fields read.
f068d2
 
f068d2
    The optional argument *separator* is the symbol to use for separating the
f068d2
-   query arguments. It defaults to ``&``.
f068d2
+   query arguments. It is recommended to set it to ``'&'`` or ``';'``.
f068d2
+   It defaults to ``'&'``; a warning is raised if this default is used.
f068d2
+   This default may be changed with the following environment variable settings:
f068d2
+
f068d2
+   - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
f068d2
+   - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
f068d2
+   - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
f068d2
 
f068d2
    Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
f068d2
    parameter set to ``True``) to convert such dictionaries into query
f068d2
@@ -236,7 +242,7 @@ or on combining URL components into a URL string.
f068d2
    *max_num_fields* fields read.
f068d2
 
f068d2
    The optional argument *separator* is the symbol to use for separating the
f068d2
-   query arguments. It defaults to ``&``.
f068d2
+   query arguments. It works as in :py:func:`parse_qs`.
f068d2
 
f068d2
    Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
f068d2
    query strings.
f068d2
diff --git a/Lib/cgi.py b/Lib/cgi.py
f068d2
index 1e880e5..d7b994b 100755
f068d2
--- a/Lib/cgi.py
f068d2
+++ b/Lib/cgi.py
f068d2
@@ -116,7 +116,7 @@ log = initlog           # The current logging function
f068d2
 maxlen = 0
f068d2
 
f068d2
 def parse(fp=None, environ=os.environ, keep_blank_values=0,
f068d2
-          strict_parsing=0, separator='&'):
f068d2
+          strict_parsing=0, separator=None):
f068d2
     """Parse a query in the environment or from a file (default stdin)
f068d2
 
f068d2
         Arguments, all optional:
f068d2
@@ -319,7 +319,7 @@ class FieldStorage:
f068d2
     def __init__(self, fp=None, headers=None, outerboundary=b'',
f068d2
                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
f068d2
                  limit=None, encoding='utf-8', errors='replace',
f068d2
-                 max_num_fields=None, separator='&'):
f068d2
+                 max_num_fields=None, separator=None):
f068d2
         """Constructor.  Read multipart/* until last part.
f068d2
 
f068d2
         Arguments, all optional:
f068d2
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
f068d2
index 4e1506a..49b6926 100644
f068d2
--- a/Lib/test/test_cgi.py
f068d2
+++ b/Lib/test/test_cgi.py
f068d2
@@ -180,6 +180,35 @@ Content-Length: 3
f068d2
 
f068d2
             env = {'QUERY_STRING': orig}
f068d2
             fs = cgi.FieldStorage(environ=env)
f068d2
+            if isinstance(expect, dict):
f068d2
+                # test dict interface
f068d2
+                self.assertEqual(len(expect), len(fs))
f068d2
+                self.assertCountEqual(expect.keys(), fs.keys())
f068d2
+                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
f068d2
+                # test individual fields
f068d2
+                for key in expect.keys():
f068d2
+                    expect_val = expect[key]
f068d2
+                    self.assertIn(key, fs)
f068d2
+                    if len(expect_val) > 1:
f068d2
+                        self.assertEqual(fs.getvalue(key), expect_val)
f068d2
+                    else:
f068d2
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
f068d2
+
f068d2
+    def test_separator(self):
f068d2
+        parse_semicolon = [
f068d2
+            ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
f068d2
+            ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
f068d2
+            (";", ValueError("bad query field: ''")),
f068d2
+            (";;", ValueError("bad query field: ''")),
f068d2
+            ("=;a", ValueError("bad query field: 'a'")),
f068d2
+            (";b=a", ValueError("bad query field: ''")),
f068d2
+            ("b;=a", ValueError("bad query field: 'b'")),
f068d2
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
f068d2
+            ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
f068d2
+        ]
f068d2
+        for orig, expect in parse_semicolon:
f068d2
+            env = {'QUERY_STRING': orig}
f068d2
+            fs = cgi.FieldStorage(separator=';', environ=env)
f068d2
             if isinstance(expect, dict):
f068d2
                 # test dict interface
f068d2
                 self.assertEqual(len(expect), len(fs))
f068d2
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
76c371
index 0f99130..4e0d7e5 100644
f068d2
--- a/Lib/test/test_urlparse.py
f068d2
+++ b/Lib/test/test_urlparse.py
f068d2
@@ -2,6 +2,11 @@ import sys
f068d2
 import unicodedata
f068d2
 import unittest
f068d2
 import urllib.parse
f068d2
+from test.support import EnvironmentVarGuard
f068d2
+from warnings import catch_warnings
f068d2
+import tempfile
f068d2
+import contextlib
f068d2
+import os.path
f068d2
 
f068d2
 RFC1808_BASE = "http://a/b/c/d;p?q#f"
f068d2
 RFC2396_BASE = "http://a/b/c/d;p?q"
f068d2
@@ -32,10 +37,34 @@ parse_qsl_test_cases = [
f068d2
     (b"&a=b", [(b'a', b'b')]),
f068d2
     (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
f068d2
     (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
f068d2
+]
f068d2
+
f068d2
+parse_qsl_test_cases_semicolon = [
f068d2
+    (";", []),
f068d2
+    (";;", []),
f068d2
+    (";a=b", [('a', 'b')]),
f068d2
+    ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
f068d2
+    ("a=1;a=2", [('a', '1'), ('a', '2')]),
f068d2
+    (b";", []),
f068d2
+    (b";;", []),
f068d2
+    (b";a=b", [(b'a', b'b')]),
f068d2
+    (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
f068d2
+    (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
f068d2
+]
f068d2
+
f068d2
+parse_qsl_test_cases_legacy = [
f068d2
+    (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
f068d2
+    (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
f068d2
+    (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
f068d2
+]
f068d2
+
f068d2
+parse_qsl_test_cases_warn = [
f068d2
     (";a=b", [(';a', 'b')]),
f068d2
     ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
f068d2
     (b";a=b", [(b';a', b'b')]),
f068d2
     (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
f068d2
+    ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
f068d2
+    (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
f068d2
 ]
f068d2
 
f068d2
 # Each parse_qs testcase is a two-tuple that contains
f068d2
@@ -62,10 +91,37 @@ parse_qs_test_cases = [
f068d2
     (b"&a=b", {b'a': [b'b']}),
f068d2
     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
f068d2
     (b"a=1&a=2", {b'a': [b'1', b'2']}),
f068d2
+]
f068d2
+
f068d2
+parse_qs_test_cases_semicolon = [
f068d2
+    (";", {}),
f068d2
+    (";;", {}),
f068d2
+    (";a=b", {'a': ['b']}),
f068d2
+    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
f068d2
+    ("a=1;a=2", {'a': ['1', '2']}),
f068d2
+    (b";", {}),
f068d2
+    (b";;", {}),
f068d2
+    (b";a=b", {b'a': [b'b']}),
f068d2
+    (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
f068d2
+    (b"a=1;a=2", {b'a': [b'1', b'2']}),
f068d2
+]
f068d2
+
f068d2
+parse_qs_test_cases_legacy = [
f068d2
+    ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
f068d2
+    ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
f068d2
+    ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
f068d2
+    (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
f068d2
+    (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
f068d2
+    (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
f068d2
+]
f068d2
+
f068d2
+parse_qs_test_cases_warn = [
f068d2
     (";a=b", {';a': ['b']}),
f068d2
     ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
f068d2
     (b";a=b", {b';a': [b'b']}),
f068d2
     (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
f068d2
+    ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
f068d2
+    (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
f068d2
 ]
f068d2
 
f068d2
 class UrlParseTestCase(unittest.TestCase):
f068d2
@@ -123,23 +179,57 @@ class UrlParseTestCase(unittest.TestCase):
f068d2
 
f068d2
     def test_qsl(self):
f068d2
         for orig, expect in parse_qsl_test_cases:
f068d2
-            result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
f068d2
+            result = urllib.parse.parse_qsl(orig, keep_blank_values=True, separator="&")
f068d2
             self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
             expect_without_blanks = [v for v in expect if len(v[1])]
f068d2
-            result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
f068d2
+            result = urllib.parse.parse_qsl(orig, keep_blank_values=False, separator="&")
f068d2
             self.assertEqual(result, expect_without_blanks,
f068d2
                             "Error parsing %r" % orig)
f068d2
 
f068d2
     def test_qs(self):
f068d2
         for orig, expect in parse_qs_test_cases:
f068d2
-            result = urllib.parse.parse_qs(orig, keep_blank_values=True)
f068d2
+            result = urllib.parse.parse_qs(orig, keep_blank_values=True, separator="&")
f068d2
             self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
             expect_without_blanks = {v: expect[v]
f068d2
                                      for v in expect if len(expect[v][0])}
f068d2
-            result = urllib.parse.parse_qs(orig, keep_blank_values=False)
f068d2
+            result = urllib.parse.parse_qs(orig, keep_blank_values=False, separator="&")
f068d2
             self.assertEqual(result, expect_without_blanks,
f068d2
                             "Error parsing %r" % orig)
f068d2
 
f068d2
+    def test_qs_default_warn(self):
f068d2
+        for orig, expect in parse_qs_test_cases_warn:
f068d2
+            with self.subTest(orig=orig, expect=expect):
f068d2
+                with catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
f068d2
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 1)
f068d2
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
f068d2
+
f068d2
+    def test_qsl_default_warn(self):
f068d2
+        for orig, expect in parse_qsl_test_cases_warn:
f068d2
+            with self.subTest(orig=orig, expect=expect):
f068d2
+                with catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
f068d2
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 1)
f068d2
+                self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
f068d2
+
f068d2
+    def test_default_qs_no_warnings(self):
f068d2
+        for orig, expect in parse_qs_test_cases:
f068d2
+            with self.subTest(orig=orig, expect=expect):
f068d2
+                with catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qs(orig, keep_blank_values=True)
f068d2
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+
f068d2
+    def test_default_qsl_no_warnings(self):
f068d2
+        for orig, expect in parse_qsl_test_cases:
f068d2
+            with self.subTest(orig=orig, expect=expect):
f068d2
+                with catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
f068d2
+                    self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+
f068d2
     def test_roundtrips(self):
f068d2
         str_cases = [
f068d2
             ('file:///tmp/junk.txt',
76c371
@@ -919,8 +1009,8 @@ class UrlParseTestCase(unittest.TestCase):
f068d2
 
f068d2
     def test_parse_qsl_max_num_fields(self):
f068d2
         with self.assertRaises(ValueError):
f068d2
-            urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
f068d2
-        urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
f068d2
+            urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10, separator='&')
f068d2
+        urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10, separator='&')
f068d2
 
f068d2
     def test_parse_qs_separator(self):
f068d2
         parse_qs_semicolon_cases = [
76c371
@@ -964,6 +1054,136 @@ class UrlParseTestCase(unittest.TestCase):
76c371
                 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
f068d2
 
f068d2
 
f068d2
+    @contextlib.contextmanager
f068d2
+    def _qsl_sep_config(self, sep):
f068d2
+        """Context for the given parse_qsl default separator configured in config file"""
f068d2
+        old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
f068d2
+        urllib.parse._default_qs_separator = None
f068d2
+        try:
f068d2
+            with tempfile.TemporaryDirectory() as tmpdirname:
f068d2
+                filename = os.path.join(tmpdirname, 'conf.cfg')
f068d2
+                with open(filename, 'w') as file:
f068d2
+                    file.write(f'[parse_qs]\n')
f068d2
+                    file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
f068d2
+                urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
f068d2
+                yield
f068d2
+        finally:
f068d2
+            urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
f068d2
+            urllib.parse._default_qs_separator = None
f068d2
+
f068d2
+    def test_parse_qs_separator_semicolon(self):
f068d2
+        for orig, expect in parse_qs_test_cases_semicolon:
f068d2
+            with self.subTest(orig=orig, expect=expect, method='arg'):
f068d2
+                result = urllib.parse.parse_qs(orig, separator=';')
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+            with self.subTest(orig=orig, expect=expect, method='env'):
f068d2
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
f068d2
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
f068d2
+                    result = urllib.parse.parse_qs(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+            with self.subTest(orig=orig, expect=expect, method='conf'):
f068d2
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qs(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+
f068d2
+    def test_parse_qsl_separator_semicolon(self):
f068d2
+        for orig, expect in parse_qsl_test_cases_semicolon:
f068d2
+            with self.subTest(orig=orig, expect=expect, method='arg'):
f068d2
+                result = urllib.parse.parse_qsl(orig, separator=';')
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+            with self.subTest(orig=orig, expect=expect, method='env'):
f068d2
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
f068d2
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
f068d2
+                    result = urllib.parse.parse_qsl(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+            with self.subTest(orig=orig, expect=expect, method='conf'):
f068d2
+                with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qsl(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+
f068d2
+    def test_parse_qs_separator_legacy(self):
f068d2
+        for orig, expect in parse_qs_test_cases_legacy:
f068d2
+            with self.subTest(orig=orig, expect=expect, method='env'):
f068d2
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
f068d2
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
f068d2
+                    result = urllib.parse.parse_qs(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+            with self.subTest(orig=orig, expect=expect, method='conf'):
f068d2
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qs(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+
f068d2
+    def test_parse_qsl_separator_legacy(self):
f068d2
+        for orig, expect in parse_qsl_test_cases_legacy:
f068d2
+            with self.subTest(orig=orig, expect=expect, method='env'):
f068d2
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
f068d2
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
f068d2
+                    result = urllib.parse.parse_qsl(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+            with self.subTest(orig=orig, expect=expect, method='conf'):
f068d2
+                with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
f068d2
+                    result = urllib.parse.parse_qsl(orig)
f068d2
+                self.assertEqual(result, expect, "Error parsing %r" % orig)
f068d2
+                self.assertEqual(len(w), 0)
f068d2
+
f068d2
+    def test_parse_qs_separator_bad_value_env_or_config(self):
f068d2
+        for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
f068d2
+            with self.subTest(bad_sep, method='env'):
f068d2
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
f068d2
+                    environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
f068d2
+                    with self.assertRaises(ValueError):
f068d2
+                        urllib.parse.parse_qsl('a=1;b=2')
f068d2
+            with self.subTest(bad_sep, method='conf'):
f068d2
+                with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
f068d2
+                    with self.assertRaises(ValueError):
f068d2
+                        urllib.parse.parse_qsl('a=1;b=2')
f068d2
+
f068d2
+    def test_parse_qs_separator_bad_value_arg(self):
f068d2
+        for bad_sep in True, {}, '':
f068d2
+            with self.subTest(bad_sep):
f068d2
+                with self.assertRaises(ValueError):
f068d2
+                    urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
f068d2
+
f068d2
+    def test_parse_qs_separator_num_fields(self):
f068d2
+        for qs, sep in (
f068d2
+            ('a&b&c', '&'),
f068d2
+            ('a;b;c', ';'),
f068d2
+            ('a&b;c', 'legacy'),
f068d2
+        ):
f068d2
+            with self.subTest(qs=qs, sep=sep):
f068d2
+                with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
f068d2
+                    if sep != 'legacy':
f068d2
+                        with self.assertRaises(ValueError):
f068d2
+                            urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
f068d2
+                    if sep:
f068d2
+                        environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
f068d2
+                    with self.assertRaises(ValueError):
f068d2
+                        urllib.parse.parse_qsl(qs, max_num_fields=2)
f068d2
+
f068d2
+    def test_parse_qs_separator_priority(self):
f068d2
+        # env variable trumps config file
f068d2
+        with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
f068d2
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
f068d2
+            result = urllib.parse.parse_qs('a=1!b=2~c=3')
f068d2
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
f068d2
+        # argument trumps config file
f068d2
+        with self._qsl_sep_config('~'):
f068d2
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
f068d2
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
f068d2
+        # argument trumps env variable
f068d2
+        with EnvironmentVarGuard() as environ:
f068d2
+            environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
f068d2
+            result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
f068d2
+            self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
f068d2
+
f068d2
+
f068d2
     def test_urlencode_sequences(self):
f068d2
         # Other tests incidentally urlencode things; test non-covered cases:
f068d2
         # Sequence and object values.
f068d2
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
76c371
index f0d9d4d..70fc268 100644
f068d2
--- a/Lib/urllib/parse.py
f068d2
+++ b/Lib/urllib/parse.py
f068d2
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
f068d2
 """
f068d2
 
f068d2
 import re
f068d2
+import os
f068d2
 import sys
f068d2
 import collections
f068d2
 import warnings
76c371
@@ -660,7 +661,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
f068d2
 
f068d2
 
f068d2
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
f068d2
-             encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
f068d2
+             encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
f068d2
     """Parse a query given as a string argument.
f068d2
 
f068d2
         Arguments:
76c371
@@ -700,9 +701,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
f068d2
             parsed_result[name] = [value]
f068d2
     return parsed_result
f068d2
 
f068d2
+class _QueryStringSeparatorWarning(RuntimeWarning):
f068d2
+    """Warning for using default `separator` in parse_qs or parse_qsl"""
f068d2
+
f068d2
+# The default "separator" for parse_qsl can be specified in a config file.
f068d2
+# It's cached after first read.
f068d2
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
f068d2
+_default_qs_separator = None
f068d2
 
f068d2
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
f068d2
-              encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
f068d2
+              encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
f068d2
     """Parse a query given as a string argument.
f068d2
 
f068d2
         Arguments:
76c371
@@ -731,20 +739,78 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
76c371
         Returns a list, as G-d intended.
f068d2
     """
f068d2
     qs, _coerce_result = _coerce_args(qs)
76c371
-    separator, _ = _coerce_args(separator)
f068d2
 
f068d2
-    if not separator or (not isinstance(separator, (str, bytes))):
f068d2
+    if isinstance(separator, bytes):
f068d2
+        separator = separator.decode('ascii')
f068d2
+
f068d2
+    if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
f068d2
         raise ValueError("Separator must be of type string or bytes.")
f068d2
 
f068d2
+    # Used when both "&" and ";" act as separators. (Need a non-string value.)
f068d2
+    _legacy = object()
f068d2
+
f068d2
+    if separator is None:
f068d2
+        global _default_qs_separator
f068d2
+        separator = _default_qs_separator
f068d2
+        envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
f068d2
+        if separator is None:
f068d2
+            # Set default separator from environment variable
f068d2
+            separator = os.environ.get(envvar_name)
f068d2
+            config_source = 'environment variable'
f068d2
+        if separator is None:
f068d2
+            # Set default separator from the configuration file
f068d2
+            try:
f068d2
+                file = open(_QS_SEPARATOR_CONFIG_FILENAME)
f068d2
+            except FileNotFoundError:
f068d2
+                pass
f068d2
+            else:
f068d2
+                with file:
f068d2
+                    import configparser
f068d2
+                    config = configparser.ConfigParser(
f068d2
+                        interpolation=None,
f068d2
+                        comment_prefixes=('#', ),
f068d2
+                    )
f068d2
+                    config.read_file(file)
f068d2
+                    separator = config.get('parse_qs', envvar_name, fallback=None)
f068d2
+                    _default_qs_separator = separator
f068d2
+                config_source = _QS_SEPARATOR_CONFIG_FILENAME
f068d2
+        if separator is None:
f068d2
+            # The default is '&', but warn if not specified explicitly
f068d2
+            if ';' in qs:
f068d2
+                from warnings import warn
f068d2
+                warn("The default separator of urllib.parse.parse_qsl and "
f068d2
+                    + "parse_qs was changed to '&' to avoid a web cache "
f068d2
+                    + "poisoning issue (CVE-2021-23336). "
f068d2
+                    + "By default, semicolons no longer act as query field "
f068d2
+                    + "separators. "
f068d2
+                    + "See https://access.redhat.com/articles/5860431 for "
f068d2
+                    + "more details.",
f068d2
+                    _QueryStringSeparatorWarning, stacklevel=2)
f068d2
+            separator = '&'
f068d2
+        elif separator == 'legacy':
f068d2
+            separator = _legacy
f068d2
+        elif len(separator) != 1:
f068d2
+            raise ValueError(
f068d2
+                f'{envvar_name} (from {config_source}) must contain '
f068d2
+                + '1 character, or "legacy". See '
f068d2
+                + 'https://access.redhat.com/articles/5860431 for more details.'
f068d2
+            )
f068d2
+
f068d2
     # If max_num_fields is defined then check that the number of fields
f068d2
     # is less than max_num_fields. This prevents a memory exhaustion DOS
f068d2
     # attack via post bodies with many fields.
f068d2
     if max_num_fields is not None:
f068d2
-        num_fields = 1 + qs.count(separator)
f068d2
+        if separator is _legacy:
f068d2
+            num_fields = 1 + qs.count('&') + qs.count(';')
f068d2
+        else:
f068d2
+            num_fields = 1 + qs.count(separator)
f068d2
         if max_num_fields < num_fields:
f068d2
             raise ValueError('Max number of fields exceeded')
f068d2
 
f068d2
-    pairs = [s1 for s1 in qs.split(separator)]
f068d2
+    if separator is _legacy:
f068d2
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
f068d2
+    else:
f068d2
+        pairs = [s1 for s1 in qs.split(separator)]
f068d2
     r = []
f068d2
     for name_value in pairs:
f068d2
         if not name_value and not strict_parsing:
f068d2
-- 
76c371
2.31.1
f068d2