From 42d67347988a9d09b940d550f1ffa32a8d7e43b2 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Fri, 12 Mar 2021 16:04:15 +0100 Subject: [PATCH] CVE-2020-28493 --- jinja2/utils.py | 94 +++++++++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/jinja2/utils.py b/jinja2/utils.py index db9c5d0..6ab77f7 100644 --- a/jinja2/utils.py +++ b/jinja2/utils.py @@ -12,24 +12,12 @@ import re import json import errno from collections import deque +from string import ascii_letters as _letters +from string import digits as _digits from threading import Lock from jinja2._compat import text_type, string_types, implements_iterator, \ url_quote, abc - -_word_split_re = re.compile(r'(\s+)') -_punctuation_re = re.compile( - '^(?P(?:%s)*)(?P.*?)(?P(?:%s)*)$' % ( - '|'.join(map(re.escape, ('(', '<', '<'))), - '|'.join(map(re.escape, ('.', ',', ')', '>', '\n', '>'))) - ) -) -_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') -_striptags_re = re.compile(r'(|<[^>]*>)') -_entity_re = re.compile(r'&([^;]+);') -_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' -_digits = '0123456789' - # special singleton representing missing values for the runtime missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})() @@ -203,35 +191,65 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): trim_url = lambda x, limit=trim_url_limit: limit is not None \ and (x[:limit] + (len(x) >=limit and '...' or '')) or x - words = _word_split_re.split(text_type(escape(text))) + words = re.split(r"(\s+)", text_type(escape(text))) rel_attr = rel and ' rel="%s"' % text_type(escape(rel)) or '' target_attr = target and ' target="%s"' % escape(target) or '' for i, word in enumerate(words): - match = _punctuation_re.match(word) + head, middle, tail = "", word, "" + match = re.match(r"^([(<]|<)+", middle) + if match: - lead, middle, trail = match.groups() - if middle.startswith('www.') or ( - '@' not in middle and - not middle.startswith('http://') and - not middle.startswith('https://') and - len(middle) > 0 and - middle[0] in _letters + _digits and ( - middle.endswith('.org') or - middle.endswith('.net') or - middle.endswith('.com') - )): - middle = '%s' % (middle, - rel_attr, target_attr, trim_url(middle)) - if middle.startswith('http://') or \ - middle.startswith('https://'): - middle = '%s' % (middle, - rel_attr, target_attr, trim_url(middle)) - if '@' in middle and not middle.startswith('www.') and \ - not ':' in middle and _simple_email_re.match(middle): - middle = '%s' % (middle, middle) - if lead + middle + trail != word: - words[i] = lead + middle + trail + head = match.group() + middle = middle[match.end() :] + + # Unlike lead, which is anchored to the start of the string, + # need to check that the string ends with any of the characters + # before trying to match all of them, to avoid backtracking. + if middle.endswith((")", ">", ".", ",", "\n", ">")): + match = re.search(r"([)>.,\n]|>)+$", middle) + + if match: + tail = match.group() + middle = middle[: match.start()] + + if middle.startswith("www.") or ( + "@" not in middle + and not middle.startswith("http://") + and not middle.startswith("https://") + and len(middle) > 0 + and middle[0] in _letters + _digits + and ( + middle.endswith(".org") + or middle.endswith(".net") + or middle.endswith(".com") + ) + ): + middle = '%s' % ( + middle, + rel_attr, + target_attr, + trim_url(middle), + ) + + if middle.startswith("http://") or middle.startswith("https://"): + middle = '%s' % ( + middle, + rel_attr, + target_attr, + trim_url(middle), + ) + + if ( + "@" in middle + and not middle.startswith("www.") + and ":" not in middle + and re.match(r"^\S@\w[\w.-]*\.\w$", middle) + ): + middle = '%s' % (middle, middle) + + words[i] = head + middle + tail + return u''.join(words) -- 2.29.2