Blob Blame History Raw
From fae3baa78626f420a963abcd7426092423a2b71b Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Mon, 29 Aug 2022 12:28:52 -0400
Subject: [PATCH] fix tag regexp to match quoted groups correctly

Fixed issue in lexer where the regexp used to match tags would not
correctly interpret quoted sections individually. While this parsing issue
still produced the same expected tag structure later on, the mis-handling
of quoted sections was also subject to a regexp crash if a tag had a large
number of quotes within its quoted sections.

Fixes: #366
Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0
---
 doc/build/unreleased/366.rst |  9 +++++++++
 mako/lexer.py                | 13 +++++++++----
 test/test_lexer.py           |  4 ++++
 3 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 doc/build/unreleased/366.rst

diff --git a/doc/build/unreleased/366.rst b/doc/build/unreleased/366.rst
new file mode 100644
index 0000000..27b0278
--- /dev/null
+++ b/doc/build/unreleased/366.rst
@@ -0,0 +1,9 @@
+.. change::
+    :tags: bug, lexer
+    :tickets: 366
+
+    Fixed issue in lexer where the regexp used to match tags would not
+    correctly interpret quoted sections individually. While this parsing issue
+    still produced the same expected tag structure later on, the mis-handling
+    of quoted sections was also subject to a regexp crash if a tag had a large
+    number of quotes within its quoted sections.
\ No newline at end of file
diff --git a/mako/lexer.py b/mako/lexer.py
index cf4187f..2224e77 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -271,21 +271,26 @@ class Lexer(object):
         return self.template
 
     def match_tag_start(self):
-        match = self.match(r'''
+        reg = r"""
             \<%     # opening tag
 
             ([\w\.\:]+)   # keyword
 
-            ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*)  # attrname, = \
+            ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*)  # attrname, = \
                                                #        sign, string expression
+                                               # comma is for backwards compat
+                                               # identified in #366
 
             \s*     # more whitespace
 
             (/)?>   # closing
 
-            ''',
+        """
 
-                           re.I | re.S | re.X)
+        match = self.match(
+            reg,
+            re.I | re.S | re.X
+        )
 
         if match:
             keyword, attr, isend = match.groups()
diff --git a/test/test_lexer.py b/test/test_lexer.py
index 06ebb05..bcf787e 100644
--- a/test/test_lexer.py
+++ b/test/test_lexer.py
@@ -105,6 +105,10 @@ class LexerTest(TemplateTest):
         self.assertRaises(exceptions.CompileException,
                           Lexer(template).parse)
 
+    def test_tag_many_quotes(self):
+        template = "<%0" + '"' * 3000
+        self.assertRaises(exceptions.SyntaxException, Lexer(template).parse)
+
     def test_unmatched_tag(self):
         template = \
             """
-- 
2.39.0