|
|
191adc |
From 93c413c5fac105d90f77ab5d03e31e0f64fc6142 Mon Sep 17 00:00:00 2001
|
|
|
191adc |
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
|
191adc |
Date: Tue, 27 May 2014 13:18:31 +0000
|
|
|
191adc |
Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
|
|
|
191adc |
MIME-Version: 1.0
|
|
|
191adc |
Content-Type: text/plain; charset=UTF-8
|
|
|
191adc |
Content-Transfer-Encoding: 8bit
|
|
|
191adc |
|
|
|
191adc |
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
|
|
|
191adc |
|
|
|
191adc |
Petr Pisar: Ported to 8.33.
|
|
|
191adc |
|
|
|
191adc |
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
|
191adc |
---
|
|
|
191adc |
pcre_exec.c | 41 ++++++++++++++++++++++++++---------------
|
|
|
191adc |
testdata/testinput1 | 9 +++++++++
|
|
|
191adc |
testdata/testinput8 | 6 ++++++
|
|
|
191adc |
testdata/testoutput1 | 12 ++++++++++++
|
|
|
191adc |
testdata/testoutput8 | 8 ++++++++
|
|
|
191adc |
5 files changed, 61 insertions(+), 15 deletions(-)
|
|
|
191adc |
|
|
|
191adc |
diff --git a/pcre_exec.c b/pcre_exec.c
|
|
|
191adc |
index ab76d02..481e899 100644
|
|
|
191adc |
--- a/pcre_exec.c
|
|
|
191adc |
+++ b/pcre_exec.c
|
|
|
191adc |
@@ -1169,10 +1169,15 @@ for (;;)
|
|
|
191adc |
if (rrc == MATCH_KETRPOS)
|
|
|
191adc |
{
|
|
|
191adc |
offset_top = md->end_offset_top;
|
|
|
191adc |
- eptr = md->end_match_ptr;
|
|
|
191adc |
ecode = md->start_code + code_offset;
|
|
|
191adc |
save_capture_last = md->capture_last;
|
|
|
191adc |
matched_once = TRUE;
|
|
|
191adc |
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
|
|
|
191adc |
+ {
|
|
|
191adc |
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
|
|
191adc |
+ break;
|
|
|
191adc |
+ }
|
|
|
191adc |
+ eptr = md->end_match_ptr;
|
|
|
191adc |
continue;
|
|
|
191adc |
}
|
|
|
191adc |
|
|
|
191adc |
@@ -1242,9 +1247,14 @@ for (;;)
|
|
|
191adc |
if (rrc == MATCH_KETRPOS)
|
|
|
191adc |
{
|
|
|
191adc |
offset_top = md->end_offset_top;
|
|
|
191adc |
- eptr = md->end_match_ptr;
|
|
|
191adc |
ecode = md->start_code + code_offset;
|
|
|
191adc |
matched_once = TRUE;
|
|
|
191adc |
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
|
|
|
191adc |
+ {
|
|
|
191adc |
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
|
|
191adc |
+ break;
|
|
|
191adc |
+ }
|
|
|
191adc |
+ eptr = md->end_match_ptr;
|
|
|
191adc |
continue;
|
|
|
191adc |
}
|
|
|
191adc |
|
|
|
191adc |
@@ -1976,7 +1986,7 @@ for (;;)
|
|
|
191adc |
case OP_KETRMAX:
|
|
|
191adc |
case OP_KETRPOS:
|
|
|
191adc |
prev = ecode - GET(ecode, 1);
|
|
|
191adc |
-
|
|
|
191adc |
+
|
|
|
191adc |
/* If this was a group that remembered the subject start, in order to break
|
|
|
191adc |
infinite repeats of empty string matches, retrieve the subject start from
|
|
|
191adc |
the chain. Otherwise, set it NULL. */
|
|
|
191adc |
@@ -2001,7 +2011,7 @@ for (;;)
|
|
|
191adc |
md->start_match_ptr = mstart;
|
|
|
191adc |
RRETURN(MATCH_MATCH); /* Sets md->mark */
|
|
|
191adc |
}
|
|
|
191adc |
-
|
|
|
191adc |
+
|
|
|
191adc |
/* For capturing groups we have to check the group number back at the start
|
|
|
191adc |
and if necessary complete handling an extraction by setting the offsets and
|
|
|
191adc |
bumping the high water mark. Whole-pattern recursion is coded as a recurse
|
|
|
191adc |
@@ -2061,6 +2071,18 @@ for (;;)
|
|
|
191adc |
}
|
|
|
191adc |
}
|
|
|
191adc |
|
|
|
191adc |
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
|
|
191adc |
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
|
|
191adc |
+ at a time from the outer level, thus saving stack. This must precede the
|
|
|
191adc |
+ empty string test - in this case that test is done at the outer level. */
|
|
|
191adc |
+
|
|
|
191adc |
+ if (*ecode == OP_KETRPOS)
|
|
|
191adc |
+ {
|
|
|
191adc |
+ md->end_match_ptr = eptr;
|
|
|
191adc |
+ md->end_offset_top = offset_top;
|
|
|
191adc |
+ RRETURN(MATCH_KETRPOS);
|
|
|
191adc |
+ }
|
|
|
191adc |
+
|
|
|
191adc |
/* For an ordinary non-repeating ket, just continue at this level. This
|
|
|
191adc |
also happens for a repeating ket if no characters were matched in the
|
|
|
191adc |
group. This is the forcible breaking of infinite loops as implemented in
|
|
|
191adc |
@@ -2083,17 +2105,6 @@ for (;;)
|
|
|
191adc |
break;
|
|
|
191adc |
}
|
|
|
191adc |
|
|
|
191adc |
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
|
|
191adc |
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
|
|
191adc |
- at a time from the outer level, thus saving stack. */
|
|
|
191adc |
-
|
|
|
191adc |
- if (*ecode == OP_KETRPOS)
|
|
|
191adc |
- {
|
|
|
191adc |
- md->end_match_ptr = eptr;
|
|
|
191adc |
- md->end_offset_top = offset_top;
|
|
|
191adc |
- RRETURN(MATCH_KETRPOS);
|
|
|
191adc |
- }
|
|
|
191adc |
-
|
|
|
191adc |
/* The normal repeating kets try the rest of the pattern or restart from
|
|
|
191adc |
the preceding bracket, in the appropriate order. In the second case, we can
|
|
|
191adc |
use tail recursion to avoid using another stack frame, unless we have an
|
|
|
191adc |
diff --git a/testdata/testinput1 b/testdata/testinput1
|
|
|
191adc |
index d77d8ac..6bde9ec 100644
|
|
|
191adc |
--- a/testdata/testinput1
|
|
|
191adc |
+++ b/testdata/testinput1
|
|
|
191adc |
@@ -5614,4 +5614,13 @@ AbcdCBefgBhiBqz
|
|
|
191adc |
/[\Q]a\E]+/
|
|
|
191adc |
aa]]
|
|
|
191adc |
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+
|
|
|
191adc |
/-- End of testinput1 --/
|
|
|
191adc |
diff --git a/testdata/testinput8 b/testdata/testinput8
|
|
|
191adc |
index d91013b..98a0b38 100644
|
|
|
191adc |
--- a/testdata/testinput8
|
|
|
191adc |
+++ b/testdata/testinput8
|
|
|
191adc |
@@ -4801,4 +4801,10 @@
|
|
|
191adc |
/abcd/
|
|
|
191adc |
abcd\O0
|
|
|
191adc |
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+
|
|
|
191adc |
/-- End of testinput8 --/
|
|
|
191adc |
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
|
|
191adc |
index 1b0b8dc..cb9592d 100644
|
|
|
191adc |
--- a/testdata/testoutput1
|
|
|
191adc |
+++ b/testdata/testoutput1
|
|
|
191adc |
@@ -9208,4 +9208,16 @@ No match
|
|
|
191adc |
aa]]
|
|
|
191adc |
0: aa]]
|
|
|
191adc |
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
|
191adc |
+
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
|
191adc |
+
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
|
191adc |
+
|
|
|
191adc |
/-- End of testinput1 --/
|
|
|
191adc |
diff --git a/testdata/testoutput8 b/testdata/testoutput8
|
|
|
191adc |
index 75affbe..666b67e 100644
|
|
|
191adc |
--- a/testdata/testoutput8
|
|
|
191adc |
+++ b/testdata/testoutput8
|
|
|
191adc |
@@ -8020,4 +8020,12 @@ Error -30 (invalid data in workspace for DFA restart)
|
|
|
191adc |
abcd\O0
|
|
|
191adc |
Matched, but offsets vector is too small to show all matches
|
|
|
191adc |
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
|
191adc |
+
|
|
|
191adc |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
|
191adc |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
|
191adc |
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
|
191adc |
+
|
|
|
191adc |
/-- End of testinput8 --/
|
|
|
191adc |
--
|
|
|
191adc |
1.9.3
|
|
|
191adc |
|