Blame SOURCES/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch

cb67f2
From 93c413c5fac105d90f77ab5d03e31e0f64fc6142 Mon Sep 17 00:00:00 2001
cb67f2
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
cb67f2
Date: Tue, 27 May 2014 13:18:31 +0000
cb67f2
Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
cb67f2
MIME-Version: 1.0
cb67f2
Content-Type: text/plain; charset=UTF-8
cb67f2
Content-Transfer-Encoding: 8bit
cb67f2
cb67f2
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
cb67f2
cb67f2
Petr Pisar: Ported to 8.33.
cb67f2
cb67f2
Signed-off-by: Petr Písař <ppisar@redhat.com>
cb67f2
---
cb67f2
 pcre_exec.c          | 41 ++++++++++++++++++++++++++---------------
cb67f2
 testdata/testinput1  |  9 +++++++++
cb67f2
 testdata/testinput8  |  6 ++++++
cb67f2
 testdata/testoutput1 | 12 ++++++++++++
cb67f2
 testdata/testoutput8 |  8 ++++++++
cb67f2
 5 files changed, 61 insertions(+), 15 deletions(-)
cb67f2
cb67f2
diff --git a/pcre_exec.c b/pcre_exec.c
cb67f2
index ab76d02..481e899 100644
cb67f2
--- a/pcre_exec.c
cb67f2
+++ b/pcre_exec.c
cb67f2
@@ -1169,10 +1169,15 @@ for (;;)
cb67f2
         if (rrc == MATCH_KETRPOS)
cb67f2
           {
cb67f2
           offset_top = md->end_offset_top;
cb67f2
-          eptr = md->end_match_ptr;
cb67f2
           ecode = md->start_code + code_offset;
cb67f2
           save_capture_last = md->capture_last;
cb67f2
           matched_once = TRUE;
cb67f2
+          if (eptr == md->end_match_ptr)   /* Matched an empty string */
cb67f2
+            {
cb67f2
+            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
cb67f2
+            break;
cb67f2
+            }  
cb67f2
+          eptr = md->end_match_ptr;
cb67f2
           continue;
cb67f2
           }
cb67f2
 
cb67f2
@@ -1242,9 +1247,14 @@ for (;;)
cb67f2
       if (rrc == MATCH_KETRPOS)
cb67f2
         {
cb67f2
         offset_top = md->end_offset_top;
cb67f2
-        eptr = md->end_match_ptr;
cb67f2
         ecode = md->start_code + code_offset;
cb67f2
         matched_once = TRUE;
cb67f2
+        if (eptr == md->end_match_ptr)  /* Matched an empty string */
cb67f2
+          {
cb67f2
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
cb67f2
+          break;
cb67f2
+          }  
cb67f2
+        eptr = md->end_match_ptr;
cb67f2
         continue;
cb67f2
         }
cb67f2
 
cb67f2
@@ -1976,7 +1986,7 @@ for (;;)
cb67f2
     case OP_KETRMAX:
cb67f2
     case OP_KETRPOS:
cb67f2
     prev = ecode - GET(ecode, 1);
cb67f2
-
cb67f2
+    
cb67f2
     /* If this was a group that remembered the subject start, in order to break
cb67f2
     infinite repeats of empty string matches, retrieve the subject start from
cb67f2
     the chain. Otherwise, set it NULL. */
cb67f2
@@ -2001,7 +2011,7 @@ for (;;)
cb67f2
       md->start_match_ptr = mstart;
cb67f2
       RRETURN(MATCH_MATCH);         /* Sets md->mark */
cb67f2
       }
cb67f2
-
cb67f2
+      
cb67f2
     /* For capturing groups we have to check the group number back at the start
cb67f2
     and if necessary complete handling an extraction by setting the offsets and
cb67f2
     bumping the high water mark. Whole-pattern recursion is coded as a recurse
cb67f2
@@ -2061,6 +2071,18 @@ for (;;)
cb67f2
         }
cb67f2
       }
cb67f2
 
cb67f2
+    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
cb67f2
+    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
cb67f2
+    at a time from the outer level, thus saving stack. This must precede the 
cb67f2
+    empty string test - in this case that test is done at the outer level. */
cb67f2
+
cb67f2
+    if (*ecode == OP_KETRPOS)
cb67f2
+      {
cb67f2
+      md->end_match_ptr = eptr;
cb67f2
+      md->end_offset_top = offset_top;
cb67f2
+      RRETURN(MATCH_KETRPOS);
cb67f2
+      }
cb67f2
+
cb67f2
     /* For an ordinary non-repeating ket, just continue at this level. This
cb67f2
     also happens for a repeating ket if no characters were matched in the
cb67f2
     group. This is the forcible breaking of infinite loops as implemented in
cb67f2
@@ -2083,17 +2105,6 @@ for (;;)
cb67f2
       break;
cb67f2
       }
cb67f2
 
cb67f2
-    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
cb67f2
-    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
cb67f2
-    at a time from the outer level, thus saving stack. */
cb67f2
-
cb67f2
-    if (*ecode == OP_KETRPOS)
cb67f2
-      {
cb67f2
-      md->end_match_ptr = eptr;
cb67f2
-      md->end_offset_top = offset_top;
cb67f2
-      RRETURN(MATCH_KETRPOS);
cb67f2
-      }
cb67f2
-
cb67f2
     /* The normal repeating kets try the rest of the pattern or restart from
cb67f2
     the preceding bracket, in the appropriate order. In the second case, we can
cb67f2
     use tail recursion to avoid using another stack frame, unless we have an
cb67f2
diff --git a/testdata/testinput1 b/testdata/testinput1
cb67f2
index d77d8ac..6bde9ec 100644
cb67f2
--- a/testdata/testinput1
cb67f2
+++ b/testdata/testinput1
cb67f2
@@ -5614,4 +5614,13 @@ AbcdCBefgBhiBqz
cb67f2
 /[\Q]a\E]+/
cb67f2
     aa]]
cb67f2
 
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+
cb67f2
 /-- End of testinput1 --/
cb67f2
diff --git a/testdata/testinput8 b/testdata/testinput8
cb67f2
index d91013b..98a0b38 100644
cb67f2
--- a/testdata/testinput8
cb67f2
+++ b/testdata/testinput8
cb67f2
@@ -4801,4 +4801,10 @@
cb67f2
 /abcd/
cb67f2
     abcd\O0
cb67f2
 
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+
cb67f2
 /-- End of testinput8 --/
cb67f2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
cb67f2
index 1b0b8dc..cb9592d 100644
cb67f2
--- a/testdata/testoutput1
cb67f2
+++ b/testdata/testoutput1
cb67f2
@@ -9208,4 +9208,16 @@ No match
cb67f2
     aa]]
cb67f2
  0: aa]]
cb67f2
 
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+ 0: NON QUOTED "QUOT""ED" AFTER 
cb67f2
+
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+ 0: NON QUOTED "QUOT""ED" AFTER 
cb67f2
+
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+ 0: NON QUOTED "QUOT""ED" AFTER 
cb67f2
+
cb67f2
 /-- End of testinput1 --/
cb67f2
diff --git a/testdata/testoutput8 b/testdata/testoutput8
cb67f2
index 75affbe..666b67e 100644
cb67f2
--- a/testdata/testoutput8
cb67f2
+++ b/testdata/testoutput8
cb67f2
@@ -8020,4 +8020,12 @@ Error -30 (invalid data in workspace for DFA restart)
cb67f2
     abcd\O0
cb67f2
 Matched, but offsets vector is too small to show all matches
cb67f2
 
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+ 0: NON QUOTED "QUOT""ED" AFTER 
cb67f2
+
cb67f2
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
cb67f2
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
cb67f2
+ 0: NON QUOTED "QUOT""ED" AFTER 
cb67f2
+
cb67f2
 /-- End of testinput8 --/
cb67f2
-- 
cb67f2
1.9.3
cb67f2