Blame SOURCES/CVE-2020-28493.patch

678512
diff --git a/jinja2/utils.py b/jinja2/utils.py
678512
index 49e9e9a..0e67d88 100644
678512
--- a/jinja2/utils.py
678512
+++ b/jinja2/utils.py
678512
@@ -11,6 +11,8 @@
678512
 import re
678512
 import sys
678512
 import errno
678512
+from string import ascii_letters as _letters
678512
+from string import digits as _digits
678512
 try:
678512
     from thread import allocate_lock
678512
 except ImportError:
678512
@@ -19,19 +21,6 @@ from collections import deque
678512
 from itertools import imap
678512
 
678512
 
678512
-_word_split_re = re.compile(r'(\s+)')
678512
-_punctuation_re = re.compile(
678512
-    '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
678512
-        '|'.join(imap(re.escape, ('(', '<', '<'))),
678512
-        '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '>')))
678512
-    )
678512
-)
678512
-_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
678512
-_striptags_re = re.compile(r'(|<[^>]*>)')
678512
-_entity_re = re.compile(r'&([^;]+);')
678512
-_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
678512
-_digits = '0123456789'
678512
-
678512
 # special singleton representing missing values for the runtime
678512
 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
678512
 
678512
@@ -271,32 +260,61 @@ def urlize(text, trim_url_limit=None, nofollow=False):
678512
     trim_url = lambda x, limit=trim_url_limit: limit is not None \
678512
                          and (x[:limit] + (len(x) >=limit and '...'
678512
                          or '')) or x
678512
-    words = _word_split_re.split(unicode(escape(text)))
678512
+    words = re.split(r"(\s+)", unicode(escape(text)))
678512
     nofollow_attr = nofollow and ' rel="nofollow"' or ''
678512
     for i, word in enumerate(words):
678512
-        match = _punctuation_re.match(word)
678512
+        head, middle, tail = "", word, ""
678512
+        match = re.match(r"^([(<]|<)+", middle)
678512
+
678512
         if match:
678512
-            lead, middle, trail = match.groups()
678512
-            if middle.startswith('www.') or (
678512
-                '@' not in middle and
678512
-                not middle.startswith('http://') and
678512
-                len(middle) > 0 and
678512
-                middle[0] in _letters + _digits and (
678512
-                    middle.endswith('.org') or
678512
-                    middle.endswith('.net') or
678512
-                    middle.endswith('.com')
678512
-                )):
678512
-                middle = '%s' % (middle,
678512
-                    nofollow_attr, trim_url(middle))
678512
-            if middle.startswith('http://') or \
678512
-               middle.startswith('https://'):
678512
-                middle = '%s' % (middle,
678512
-                    nofollow_attr, trim_url(middle))
678512
-            if '@' in middle and not middle.startswith('www.') and \
678512
-               not ':' in middle and _simple_email_re.match(middle):
678512
-                middle = '%s' % (middle, middle)
678512
-            if lead + middle + trail != word:
678512
-                words[i] = lead + middle + trail
678512
+            head = match.group()
678512
+            middle = middle[match.end() :]
678512
+
678512
+        # Unlike lead, which is anchored to the start of the string,
678512
+        # need to check that the string ends with any of the characters
678512
+        # before trying to match all of them, to avoid backtracking.
678512
+        if middle.endswith((")", ">", ".", ",", "\n", ">")):
678512
+            match = re.search(r"([)>.,\n]|>)+$", middle)
678512
+
678512
+            if match:
678512
+                tail = match.group()
678512
+                middle = middle[: match.start()]
678512
+
678512
+        if middle.startswith("www.") or (
678512
+            "@" not in middle
678512
+            and not middle.startswith("http://")
678512
+            and not middle.startswith("https://")
678512
+            and len(middle) > 0
678512
+            and middle[0] in _letters + _digits
678512
+            and (
678512
+                middle.endswith(".org")
678512
+                or middle.endswith(".net")
678512
+                or middle.endswith(".com")
678512
+            )
678512
+        ):
678512
+            middle = '%s' % (
678512
+                middle,
678512
+                nofollow_attr,
678512
+                trim_url(middle),
678512
+            )
678512
+
678512
+        if middle.startswith("http://") or middle.startswith("https://"):
678512
+            middle = '%s' % (
678512
+                middle,
678512
+                nofollow_attr,
678512
+                trim_url(middle),
678512
+            )
678512
+
678512
+        if (
678512
+            "@" in middle
678512
+            and not middle.startswith("www.")
678512
+            and ":" not in middle
678512
+            and re.match(r"^\S@\w[\w.-]*\.\w$", middle)
678512
+        ):
678512
+            middle = '%s' % (middle, middle)
678512
+
678512
+        words[i] = head + middle + tail
678512
+
678512
     return u''.join(words)
678512
 
678512