7f6e73
From 2b76a5a3aa898fd1621c72c6da935cddfb484424 Mon Sep 17 00:00:00 2001
7f6e73
From: Lumir Balhar <lbalhar@redhat.com>
7f6e73
Date: Fri, 12 Mar 2021 14:34:06 +0100
7f6e73
Subject: [PATCH] CVE-2020-28493
7f6e73
7f6e73
---
7f6e73
 Jinja2-2.10.1/jinja2/utils.py | 94 +++++++++++++++++++++--------------
7f6e73
 1 file changed, 56 insertions(+), 38 deletions(-)
7f6e73
7f6e73
diff --git a/Jinja2-2.10.1/jinja2/utils.py b/Jinja2-2.10.1/jinja2/utils.py
7f6e73
index 502a311..25dd78f 100644
7f6e73
--- a/Jinja2-2.10.1/jinja2/utils.py
7f6e73
+++ b/Jinja2-2.10.1/jinja2/utils.py
7f6e73
@@ -12,24 +12,12 @@ import re
7f6e73
 import json
7f6e73
 import errno
7f6e73
 from collections import deque
7f6e73
+from string import ascii_letters as _letters
7f6e73
+from string import digits as _digits
7f6e73
 from threading import Lock
7f6e73
 from jinja2._compat import text_type, string_types, implements_iterator, \
7f6e73
      url_quote
7f6e73
 
7f6e73
-
7f6e73
-_word_split_re = re.compile(r'(\s+)')
7f6e73
-_punctuation_re = re.compile(
7f6e73
-    '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
7f6e73
-        '|'.join(map(re.escape, ('(', '<', '<'))),
7f6e73
-        '|'.join(map(re.escape, ('.', ',', ')', '>', '\n', '>')))
7f6e73
-    )
7f6e73
-)
7f6e73
-_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
7f6e73
-_striptags_re = re.compile(r'(|<[^>]*>)')
7f6e73
-_entity_re = re.compile(r'&([^;]+);')
7f6e73
-_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
7f6e73
-_digits = '0123456789'
7f6e73
-
7f6e73
 # special singleton representing missing values for the runtime
7f6e73
 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
7f6e73
 
7f6e73
@@ -203,35 +191,65 @@ def urlize(text, trim_url_limit=None, rel=None, target=None):
7f6e73
     trim_url = lambda x, limit=trim_url_limit: limit is not None \
7f6e73
                          and (x[:limit] + (len(x) >=limit and '...'
7f6e73
                          or '')) or x
7f6e73
-    words = _word_split_re.split(text_type(escape(text)))
7f6e73
+    words = re.split(r"(\s+)", text_type(escape(text)))
7f6e73
     rel_attr = rel and ' rel="%s"' % text_type(escape(rel)) or ''
7f6e73
     target_attr = target and ' target="%s"' % escape(target) or ''
7f6e73
 
7f6e73
     for i, word in enumerate(words):
7f6e73
-        match = _punctuation_re.match(word)
7f6e73
+        head, middle, tail = "", word, ""
7f6e73
+        match = re.match(r"^([(<]|<)+", middle)
7f6e73
+
7f6e73
         if match:
7f6e73
-            lead, middle, trail = match.groups()
7f6e73
-            if middle.startswith('www.') or (
7f6e73
-                '@' not in middle and
7f6e73
-                not middle.startswith('http://') and
7f6e73
-                not middle.startswith('https://') and
7f6e73
-                len(middle) > 0 and
7f6e73
-                middle[0] in _letters + _digits and (
7f6e73
-                    middle.endswith('.org') or
7f6e73
-                    middle.endswith('.net') or
7f6e73
-                    middle.endswith('.com')
7f6e73
-                )):
7f6e73
-                middle = '%s' % (middle,
7f6e73
-                    rel_attr, target_attr, trim_url(middle))
7f6e73
-            if middle.startswith('http://') or \
7f6e73
-               middle.startswith('https://'):
7f6e73
-                middle = '%s' % (middle,
7f6e73
-                    rel_attr, target_attr, trim_url(middle))
7f6e73
-            if '@' in middle and not middle.startswith('www.') and \
7f6e73
-               not ':' in middle and _simple_email_re.match(middle):
7f6e73
-                middle = '%s' % (middle, middle)
7f6e73
-            if lead + middle + trail != word:
7f6e73
-                words[i] = lead + middle + trail
7f6e73
+            head = match.group()
7f6e73
+            middle = middle[match.end() :]
7f6e73
+
7f6e73
+        # Unlike lead, which is anchored to the start of the string,
7f6e73
+        # need to check that the string ends with any of the characters
7f6e73
+        # before trying to match all of them, to avoid backtracking.
7f6e73
+        if middle.endswith((")", ">", ".", ",", "\n", ">")):
7f6e73
+            match = re.search(r"([)>.,\n]|>)+$", middle)
7f6e73
+
7f6e73
+            if match:
7f6e73
+                tail = match.group()
7f6e73
+                middle = middle[: match.start()]
7f6e73
+
7f6e73
+        if middle.startswith("www.") or (
7f6e73
+            "@" not in middle
7f6e73
+            and not middle.startswith("http://")
7f6e73
+            and not middle.startswith("https://")
7f6e73
+            and len(middle) > 0
7f6e73
+            and middle[0] in _letters + _digits
7f6e73
+            and (
7f6e73
+                middle.endswith(".org")
7f6e73
+                or middle.endswith(".net")
7f6e73
+                or middle.endswith(".com")
7f6e73
+            )
7f6e73
+        ):
7f6e73
+            middle = '%s' % (
7f6e73
+                middle,
7f6e73
+                rel_attr,
7f6e73
+                target_attr,
7f6e73
+                trim_url(middle),
7f6e73
+            )
7f6e73
+
7f6e73
+        if middle.startswith("http://") or middle.startswith("https://"):
7f6e73
+            middle = '%s' % (
7f6e73
+                middle,
7f6e73
+                rel_attr,
7f6e73
+                target_attr,
7f6e73
+                trim_url(middle),
7f6e73
+            )
7f6e73
+
7f6e73
+        if (
7f6e73
+            "@" in middle
7f6e73
+            and not middle.startswith("www.")
7f6e73
+            and ":" not in middle
7f6e73
+            and re.match(r"^\S@\w[\w.-]*\.\w$", middle)
7f6e73
+        ):
7f6e73
+            middle = '%s' % (middle, middle)
7f6e73
+
7f6e73
+        words[i] = head + middle + tail
7f6e73
+
7f6e73
     return u''.join(words)
7f6e73
 
7f6e73
 
7f6e73
-- 
7f6e73
2.29.2
7f6e73