Blame SOURCES/00394-cve-2022-45061-cpu-denial-of-service-via-inefficient-idna-decoder.patch

42ae64
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
42ae64
From: "Miss Islington (bot)"
42ae64
 <31488909+miss-islington@users.noreply.github.com>
42ae64
Date: Mon, 7 Nov 2022 19:22:14 -0800
42ae64
Subject: [PATCH] 
42ae64
 00394-cve-2022-45061-cpu-denial-of-service-via-inefficient-idna-decoder.patch
42ae64
42ae64
00394 #
42ae64
gh-98433: Fix quadratic time idna decoding.
42ae64
42ae64
There was an unnecessary quadratic loop in idna decoding. This restores
42ae64
the behavior to linear.
42ae64
42ae64
Backported from python3.
42ae64
42ae64
(cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15)
42ae64
42ae64
Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
42ae64
Co-authored-by: Gregory P. Smith <greg@krypto.org>
42ae64
---
42ae64
 Lib/encodings/idna.py                         | 32 +++++++++----------
42ae64
 Lib/test/test_codecs.py                       |  6 ++++
42ae64
 ...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst |  6 ++++
42ae64
 3 files changed, 27 insertions(+), 17 deletions(-)
42ae64
 create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
42ae64
42ae64
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
42ae64
index ea90d67142f..2ce798cf47e 100644
42ae64
--- a/Lib/encodings/idna.py
42ae64
+++ b/Lib/encodings/idna.py
42ae64
@@ -39,23 +39,21 @@ def nameprep(label):
42ae64
 
42ae64
     # Check bidi
42ae64
     RandAL = map(stringprep.in_table_d1, label)
42ae64
-    for c in RandAL:
42ae64
-        if c:
42ae64
-            # There is a RandAL char in the string. Must perform further
42ae64
-            # tests:
42ae64
-            # 1) The characters in section 5.8 MUST be prohibited.
42ae64
-            # This is table C.8, which was already checked
42ae64
-            # 2) If a string contains any RandALCat character, the string
42ae64
-            # MUST NOT contain any LCat character.
42ae64
-            if filter(stringprep.in_table_d2, label):
42ae64
-                raise UnicodeError("Violation of BIDI requirement 2")
42ae64
-
42ae64
-            # 3) If a string contains any RandALCat character, a
42ae64
-            # RandALCat character MUST be the first character of the
42ae64
-            # string, and a RandALCat character MUST be the last
42ae64
-            # character of the string.
42ae64
-            if not RandAL[0] or not RandAL[-1]:
42ae64
-                raise UnicodeError("Violation of BIDI requirement 3")
42ae64
+    if any(RandAL):
42ae64
+        # There is a RandAL char in the string. Must perform further
42ae64
+        # tests:
42ae64
+        # 1) The characters in section 5.8 MUST be prohibited.
42ae64
+        # This is table C.8, which was already checked
42ae64
+        # 2) If a string contains any RandALCat character, the string
42ae64
+        # MUST NOT contain any LCat character.
42ae64
+        if any(stringprep.in_table_d2(x) for x in label):
42ae64
+            raise UnicodeError("Violation of BIDI requirement 2")
42ae64
+        # 3) If a string contains any RandALCat character, a
42ae64
+        # RandALCat character MUST be the first character of the
42ae64
+        # string, and a RandALCat character MUST be the last
42ae64
+        # character of the string.
42ae64
+        if not RandAL[0] or not RandAL[-1]:
42ae64
+            raise UnicodeError("Violation of BIDI requirement 3")
42ae64
 
42ae64
     return label
42ae64
 
42ae64
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
42ae64
index 0ec8bf5a4b4..76428e1794a 100644
42ae64
--- a/Lib/test/test_codecs.py
42ae64
+++ b/Lib/test/test_codecs.py
42ae64
@@ -1318,6 +1318,12 @@ class IDNACodecTest(unittest.TestCase):
42ae64
         self.assertEqual(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
42ae64
         self.assertEqual(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
42ae64
 
42ae64
+    def test_builtin_decode_length_limit(self):
42ae64
+        with self.assertRaisesRegexp(UnicodeError, "too long"):
42ae64
+            (b"xn--016c"+b"a"*1100).decode("idna")
42ae64
+        with self.assertRaisesRegexp(UnicodeError, "too long"):
42ae64
+            (b"xn--016c"+b"a"*70).decode("idna")
42ae64
+
42ae64
     def test_stream(self):
42ae64
         import StringIO
42ae64
         r = codecs.getreader("idna")(StringIO.StringIO("abc"))
42ae64
diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
42ae64
new file mode 100644
42ae64
index 00000000000..5185fac2e29
42ae64
--- /dev/null
42ae64
+++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
42ae64
@@ -0,0 +1,6 @@
42ae64
+The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
42ae64
+related name resolution functions no longer involves a quadratic algorithm.
42ae64
+This prevents a potential CPU denial of service if an out-of-spec excessive
42ae64
+length hostname involving bidirectional characters were decoded. Some protocols
42ae64
+such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
42ae64
+to supply such a name.