Blame SOURCES/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch

191adc
From 93c413c5fac105d90f77ab5d03e31e0f64fc6142 Mon Sep 17 00:00:00 2001
191adc
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
191adc
Date: Tue, 27 May 2014 13:18:31 +0000
191adc
Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
191adc
MIME-Version: 1.0
191adc
Content-Type: text/plain; charset=UTF-8
191adc
Content-Transfer-Encoding: 8bit
191adc
191adc
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
191adc
191adc
Petr Pisar: Ported to 8.33.
191adc
191adc
Signed-off-by: Petr Písař <ppisar@redhat.com>
191adc
---
191adc
 pcre_exec.c          | 41 ++++++++++++++++++++++++++---------------
191adc
 testdata/testinput1  |  9 +++++++++
191adc
 testdata/testinput8  |  6 ++++++
191adc
 testdata/testoutput1 | 12 ++++++++++++
191adc
 testdata/testoutput8 |  8 ++++++++
191adc
 5 files changed, 61 insertions(+), 15 deletions(-)
191adc
191adc
diff --git a/pcre_exec.c b/pcre_exec.c
191adc
index ab76d02..481e899 100644
191adc
--- a/pcre_exec.c
191adc
+++ b/pcre_exec.c
191adc
@@ -1169,10 +1169,15 @@ for (;;)
191adc
         if (rrc == MATCH_KETRPOS)
191adc
           {
191adc
           offset_top = md->end_offset_top;
191adc
-          eptr = md->end_match_ptr;
191adc
           ecode = md->start_code + code_offset;
191adc
           save_capture_last = md->capture_last;
191adc
           matched_once = TRUE;
191adc
+          if (eptr == md->end_match_ptr)   /* Matched an empty string */
191adc
+            {
191adc
+            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
191adc
+            break;
191adc
+            }  
191adc
+          eptr = md->end_match_ptr;
191adc
           continue;
191adc
           }
191adc
 
191adc
@@ -1242,9 +1247,14 @@ for (;;)
191adc
       if (rrc == MATCH_KETRPOS)
191adc
         {
191adc
         offset_top = md->end_offset_top;
191adc
-        eptr = md->end_match_ptr;
191adc
         ecode = md->start_code + code_offset;
191adc
         matched_once = TRUE;
191adc
+        if (eptr == md->end_match_ptr)  /* Matched an empty string */
191adc
+          {
191adc
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
191adc
+          break;
191adc
+          }  
191adc
+        eptr = md->end_match_ptr;
191adc
         continue;
191adc
         }
191adc
 
191adc
@@ -1976,7 +1986,7 @@ for (;;)
191adc
     case OP_KETRMAX:
191adc
     case OP_KETRPOS:
191adc
     prev = ecode - GET(ecode, 1);
191adc
-
191adc
+    
191adc
     /* If this was a group that remembered the subject start, in order to break
191adc
     infinite repeats of empty string matches, retrieve the subject start from
191adc
     the chain. Otherwise, set it NULL. */
191adc
@@ -2001,7 +2011,7 @@ for (;;)
191adc
       md->start_match_ptr = mstart;
191adc
       RRETURN(MATCH_MATCH);         /* Sets md->mark */
191adc
       }
191adc
-
191adc
+      
191adc
     /* For capturing groups we have to check the group number back at the start
191adc
     and if necessary complete handling an extraction by setting the offsets and
191adc
     bumping the high water mark. Whole-pattern recursion is coded as a recurse
191adc
@@ -2061,6 +2071,18 @@ for (;;)
191adc
         }
191adc
       }
191adc
 
191adc
+    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
191adc
+    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
191adc
+    at a time from the outer level, thus saving stack. This must precede the 
191adc
+    empty string test - in this case that test is done at the outer level. */
191adc
+
191adc
+    if (*ecode == OP_KETRPOS)
191adc
+      {
191adc
+      md->end_match_ptr = eptr;
191adc
+      md->end_offset_top = offset_top;
191adc
+      RRETURN(MATCH_KETRPOS);
191adc
+      }
191adc
+
191adc
     /* For an ordinary non-repeating ket, just continue at this level. This
191adc
     also happens for a repeating ket if no characters were matched in the
191adc
     group. This is the forcible breaking of infinite loops as implemented in
191adc
@@ -2083,17 +2105,6 @@ for (;;)
191adc
       break;
191adc
       }
191adc
 
191adc
-    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
191adc
-    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
191adc
-    at a time from the outer level, thus saving stack. */
191adc
-
191adc
-    if (*ecode == OP_KETRPOS)
191adc
-      {
191adc
-      md->end_match_ptr = eptr;
191adc
-      md->end_offset_top = offset_top;
191adc
-      RRETURN(MATCH_KETRPOS);
191adc
-      }
191adc
-
191adc
     /* The normal repeating kets try the rest of the pattern or restart from
191adc
     the preceding bracket, in the appropriate order. In the second case, we can
191adc
     use tail recursion to avoid using another stack frame, unless we have an
191adc
diff --git a/testdata/testinput1 b/testdata/testinput1
191adc
index d77d8ac..6bde9ec 100644
191adc
--- a/testdata/testinput1
191adc
+++ b/testdata/testinput1
191adc
@@ -5614,4 +5614,13 @@ AbcdCBefgBhiBqz
191adc
 /[\Q]a\E]+/
191adc
     aa]]
191adc
 
191adc
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+
191adc
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+
191adc
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+
191adc
 /-- End of testinput1 --/
191adc
diff --git a/testdata/testinput8 b/testdata/testinput8
191adc
index d91013b..98a0b38 100644
191adc
--- a/testdata/testinput8
191adc
+++ b/testdata/testinput8
191adc
@@ -4801,4 +4801,10 @@
191adc
 /abcd/
191adc
     abcd\O0
191adc
 
191adc
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+
191adc
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+
191adc
 /-- End of testinput8 --/
191adc
diff --git a/testdata/testoutput1 b/testdata/testoutput1
191adc
index 1b0b8dc..cb9592d 100644
191adc
--- a/testdata/testoutput1
191adc
+++ b/testdata/testoutput1
191adc
@@ -9208,4 +9208,16 @@ No match
191adc
     aa]]
191adc
  0: aa]]
191adc
 
191adc
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+ 0: NON QUOTED "QUOT""ED" AFTER 
191adc
+
191adc
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+ 0: NON QUOTED "QUOT""ED" AFTER 
191adc
+
191adc
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+ 0: NON QUOTED "QUOT""ED" AFTER 
191adc
+
191adc
 /-- End of testinput1 --/
191adc
diff --git a/testdata/testoutput8 b/testdata/testoutput8
191adc
index 75affbe..666b67e 100644
191adc
--- a/testdata/testoutput8
191adc
+++ b/testdata/testoutput8
191adc
@@ -8020,4 +8020,12 @@ Error -30 (invalid data in workspace for DFA restart)
191adc
     abcd\O0
191adc
 Matched, but offsets vector is too small to show all matches
191adc
 
191adc
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+ 0: NON QUOTED "QUOT""ED" AFTER 
191adc
+
191adc
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
191adc
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
191adc
+ 0: NON QUOTED "QUOT""ED" AFTER 
191adc
+
191adc
 /-- End of testinput8 --/
191adc
-- 
191adc
1.9.3
191adc