e4a5b5
From 2d4a3fee6de2fa45eb82169361918f759269b4ec Mon Sep 17 00:00:00 2001
e4a5b5
From: Seth Michael Larson <sethmichaellarson@gmail.com>
e4a5b5
Date: Wed, 26 May 2021 10:43:12 -0500
e4a5b5
Subject: [PATCH] Improve performance of sub-authority splitting in URL
e4a5b5
e4a5b5
---
e4a5b5
 src/urllib3/util/url.py |  8 +++++---
e4a5b5
 test/test_util.py       | 10 ++++++++++
e4a5b5
 2 files changed, 15 insertions(+), 3 deletions(-)
e4a5b5
e4a5b5
diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py
e4a5b5
index 6ff238fe3c..81a03da9e3 100644
e4a5b5
--- a/src/urllib3/util/url.py
e4a5b5
+++ b/src/urllib3/util/url.py
e4a5b5
@@ -63,12 +63,12 @@
e4a5b5
 BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
e4a5b5
 ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
e4a5b5
 
e4a5b5
-SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
e4a5b5
+_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
e4a5b5
     REG_NAME_PAT,
e4a5b5
     IPV4_PAT,
e4a5b5
     IPV6_ADDRZ_PAT,
e4a5b5
 )
e4a5b5
-SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL)
e4a5b5
+_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
e4a5b5
 
e4a5b5
 UNRESERVED_CHARS = set(
e4a5b5
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
e4a5b5
@@ -365,7 +365,9 @@ def parse_url(url):
e4a5b5
             scheme = scheme.lower()
e4a5b5
 
e4a5b5
         if authority:
e4a5b5
-            auth, host, port = SUBAUTHORITY_RE.match(authority).groups()
e4a5b5
+            auth, _, host_port = authority.rpartition("@")
e4a5b5
+            auth = auth or None
e4a5b5
+            host, port = _HOST_PORT_RE.match(host_port).groups()
e4a5b5
             if auth and normalize_uri:
e4a5b5
                 auth = _encode_invalid_chars(auth, USERINFO_CHARS)
e4a5b5
             if port == "":
e4a5b5
diff --git a/test/test_util.py b/test/test_util.py
e4a5b5
index a5b68a084b..88409e2d6c 100644
e4a5b5
--- a/test/test_util.py
e4a5b5
+++ b/test/test_util.py
e4a5b5
@@ -438,6 +438,16 @@ def test_netloc(self, url, expected_netloc):
e4a5b5
                 fragment="hash",
e4a5b5
             ),
e4a5b5
         ),
e4a5b5
+        # Tons of '@' causing backtracking
e4a5b5
+        ("https://" + ("@" * 10000) + "[", False),
e4a5b5
+        (
e4a5b5
+            "https://user:" + ("@" * 10000) + "example.com",
e4a5b5
+            Url(
e4a5b5
+                scheme="https",
e4a5b5
+                auth="user:" + ("%40" * 9999),
e4a5b5
+                host="example.com",
e4a5b5
+            ),
e4a5b5
+        ),
e4a5b5
     ]
e4a5b5
 
e4a5b5
     @pytest.mark.parametrize("url, expected_url", url_vulnerabilities)