|
|
08805a |
From e3406ec06426fb9a7342541127d4c591d2446b6b Mon Sep 17 00:00:00 2001
|
|
|
08805a |
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
|
08805a |
Date: Fri, 5 Jul 2013 10:38:37 +0000
|
|
|
08805a |
Subject: [PATCH 1/2] Fix checking whether a group could match an empty string
|
|
|
08805a |
MIME-Version: 1.0
|
|
|
08805a |
Content-Type: text/plain; charset=UTF-8
|
|
|
08805a |
Content-Transfer-Encoding: 8bit
|
|
|
08805a |
|
|
|
08805a |
In UTF mode, the code for checking whether a group could match an empty
|
|
|
08805a |
string (which is used for indefinitely repeated groups to allow for
|
|
|
08805a |
breaking an infinite loop) was broken when the group contained a repeated
|
|
|
08805a |
negated single-character class with a character that occupied more than one
|
|
|
08805a |
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
|
|
|
08805a |
UTF-8 mode). The effect was undefined: the group might or might not be
|
|
|
08805a |
deemed as matching an empty string, or the program might have crashed.
|
|
|
08805a |
|
|
|
08805a |
Based on:
|
|
|
08805a |
|
|
|
08805a |
commit 74d96caf6251eff2f6c6a3e879268ce2d2a6c9be
|
|
|
08805a |
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
|
08805a |
Date: Fri Jul 5 10:38:37 2013 +0000
|
|
|
08805a |
|
|
|
08805a |
Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for
|
|
|
08805a |
empty string matching.
|
|
|
08805a |
|
|
|
08805a |
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1348 2f5784b3-3f2a-0410-8824-
|
|
|
08805a |
cb99058d5e15
|
|
|
08805a |
|
|
|
08805a |
Ported to 8.32. Needed for CVE-2015-2328 (bug #1285399).
|
|
|
08805a |
|
|
|
08805a |
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
|
08805a |
---
|
|
|
08805a |
pcre_compile.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++-----------
|
|
|
08805a |
1 file changed, 81 insertions(+), 18 deletions(-)
|
|
|
08805a |
|
|
|
08805a |
diff --git a/pcre_compile.c b/pcre_compile.c
|
|
|
08805a |
index 0de3747..ce72527 100644
|
|
|
08805a |
--- a/pcre_compile.c
|
|
|
08805a |
+++ b/pcre_compile.c
|
|
|
08805a |
@@ -2353,15 +2353,23 @@ Arguments:
|
|
|
08805a |
endcode points to where to stop
|
|
|
08805a |
utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
|
|
|
08805a |
cd contains pointers to tables etc.
|
|
|
08805a |
+ recurses chain of recurse_check to catch mutual recursion
|
|
|
08805a |
|
|
|
08805a |
Returns: TRUE if what is matched could be empty
|
|
|
08805a |
*/
|
|
|
08805a |
|
|
|
08805a |
+typedef struct recurse_check {
|
|
|
08805a |
+ struct recurse_check *prev;
|
|
|
08805a |
+ const pcre_uchar *group;
|
|
|
08805a |
+} recurse_check;
|
|
|
08805a |
+
|
|
|
08805a |
static BOOL
|
|
|
08805a |
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
|
|
|
08805a |
- BOOL utf, compile_data *cd)
|
|
|
08805a |
+ BOOL utf, compile_data *cd, recurse_check *recurses)
|
|
|
08805a |
{
|
|
|
08805a |
register pcre_uchar c;
|
|
|
08805a |
+recurse_check this_recurse;
|
|
|
08805a |
+
|
|
|
08805a |
for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|
|
08805a |
code < endcode;
|
|
|
08805a |
code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
|
|
|
08805a |
@@ -2369,7 +2377,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|
|
08805a |
const pcre_uchar *ccode;
|
|
|
08805a |
|
|
|
08805a |
c = *code;
|
|
|
08805a |
-
|
|
|
08805a |
+
|
|
|
08805a |
/* Skip over forward assertions; the other assertions are skipped by
|
|
|
08805a |
first_significant_code() with a TRUE final argument. */
|
|
|
08805a |
|
|
|
08805a |
@@ -2389,25 +2397,50 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|
|
08805a |
|
|
|
08805a |
if (c == OP_RECURSE)
|
|
|
08805a |
{
|
|
|
08805a |
- const pcre_uchar *scode;
|
|
|
08805a |
+ const pcre_uchar *scode = cd->start_code + GET(code, 1);
|
|
|
08805a |
BOOL empty_branch;
|
|
|
08805a |
|
|
|
08805a |
- /* Test for forward reference */
|
|
|
08805a |
+ /* Test for forward reference or uncompleted reference. This is disabled
|
|
|
08805a |
+ when called to scan a completed pattern by setting cd->start_workspace to
|
|
|
08805a |
+ NULL. */
|
|
|
08805a |
|
|
|
08805a |
- for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
|
|
|
08805a |
- if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
|
|
|
08805a |
+ if (cd->start_workspace != NULL)
|
|
|
08805a |
+ {
|
|
|
08805a |
+ const pcre_uchar *tcode;
|
|
|
08805a |
+ for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
|
|
|
08805a |
+ if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
|
|
|
08805a |
+ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
|
|
|
08805a |
+ }
|
|
|
08805a |
+
|
|
|
08805a |
+ /* If we are scanning a completed pattern, there are no forward references
|
|
|
08805a |
+ and all groups are complete. We need to detect whether this is a recursive
|
|
|
08805a |
+ call, as otherwise there will be an infinite loop. If it is a recursion,
|
|
|
08805a |
+ just skip over it. Simple recursions are easily detected. For mutual
|
|
|
08805a |
+ recursions we keep a chain on the stack. */
|
|
|
08805a |
+
|
|
|
08805a |
+ else
|
|
|
08805a |
+ {
|
|
|
08805a |
+ recurse_check *r = recurses;
|
|
|
08805a |
+ const pcre_uchar *endgroup = scode;
|
|
|
08805a |
+
|
|
|
08805a |
+ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
|
|
|
08805a |
+ if (code >= scode && code <= endgroup) continue; /* Simple recursion */
|
|
|
08805a |
+
|
|
|
08805a |
+ for (r = recurses; r != NULL; r = r->prev)
|
|
|
08805a |
+ if (r->group == scode) break;
|
|
|
08805a |
+ if (r != NULL) continue; /* Mutual recursion */
|
|
|
08805a |
+ }
|
|
|
08805a |
|
|
|
08805a |
- /* Not a forward reference, test for completed backward reference */
|
|
|
08805a |
+ /* Completed reference; scan the referenced group, remembering it on the
|
|
|
08805a |
+ stack chain to detect mutual recursions. */
|
|
|
08805a |
|
|
|
08805a |
empty_branch = FALSE;
|
|
|
08805a |
- scode = cd->start_code + GET(code, 1);
|
|
|
08805a |
- if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
|
|
|
08805a |
-
|
|
|
08805a |
- /* Completed backwards reference */
|
|
|
08805a |
-
|
|
|
08805a |
+ this_recurse.prev = recurses;
|
|
|
08805a |
+ this_recurse.group = scode;
|
|
|
08805a |
+
|
|
|
08805a |
do
|
|
|
08805a |
{
|
|
|
08805a |
- if (could_be_empty_branch(scode, endcode, utf, cd))
|
|
|
08805a |
+ if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
|
|
|
08805a |
{
|
|
|
08805a |
empty_branch = TRUE;
|
|
|
08805a |
break;
|
|
|
08805a |
@@ -2463,7 +2496,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|
|
08805a |
empty_branch = FALSE;
|
|
|
08805a |
do
|
|
|
08805a |
{
|
|
|
08805a |
- if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
|
|
|
08805a |
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
|
|
|
08805a |
empty_branch = TRUE;
|
|
|
08805a |
code += GET(code, 1);
|
|
|
08805a |
}
|
|
|
08805a |
@@ -2582,30 +2615,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|
|
08805a |
return TRUE;
|
|
|
08805a |
|
|
|
08805a |
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
|
|
|
08805a |
- MINUPTO, and POSUPTO may be followed by a multibyte character */
|
|
|
08805a |
+ MINUPTO, and POSUPTO and their caseless and negative versions may be
|
|
|
08805a |
+ followed by a multibyte character. */
|
|
|
08805a |
|
|
|
08805a |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
|
|
|
08805a |
case OP_STAR:
|
|
|
08805a |
case OP_STARI:
|
|
|
08805a |
+ case OP_NOTSTAR:
|
|
|
08805a |
+ case OP_NOTSTARI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_MINSTAR:
|
|
|
08805a |
case OP_MINSTARI:
|
|
|
08805a |
+ case OP_NOTMINSTAR:
|
|
|
08805a |
+ case OP_NOTMINSTARI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_POSSTAR:
|
|
|
08805a |
case OP_POSSTARI:
|
|
|
08805a |
+ case OP_NOTPOSSTAR:
|
|
|
08805a |
+ case OP_NOTPOSSTARI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_QUERY:
|
|
|
08805a |
case OP_QUERYI:
|
|
|
08805a |
+ case OP_NOTQUERY:
|
|
|
08805a |
+ case OP_NOTQUERYI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_MINQUERY:
|
|
|
08805a |
case OP_MINQUERYI:
|
|
|
08805a |
+ case OP_NOTMINQUERY:
|
|
|
08805a |
+ case OP_NOTMINQUERYI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_POSQUERY:
|
|
|
08805a |
case OP_POSQUERYI:
|
|
|
08805a |
+ case OP_NOTPOSQUERY:
|
|
|
08805a |
+ case OP_NOTPOSQUERYI:
|
|
|
08805a |
+
|
|
|
08805a |
if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
|
|
|
08805a |
break;
|
|
|
08805a |
|
|
|
08805a |
case OP_UPTO:
|
|
|
08805a |
case OP_UPTOI:
|
|
|
08805a |
+ case OP_NOTUPTO:
|
|
|
08805a |
+ case OP_NOTUPTOI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_MINUPTO:
|
|
|
08805a |
case OP_MINUPTOI:
|
|
|
08805a |
+ case OP_NOTMINUPTO:
|
|
|
08805a |
+ case OP_NOTMINUPTOI:
|
|
|
08805a |
+
|
|
|
08805a |
case OP_POSUPTO:
|
|
|
08805a |
case OP_POSUPTOI:
|
|
|
08805a |
+ case OP_NOTPOSUPTO:
|
|
|
08805a |
+ case OP_NOTPOSUPTOI:
|
|
|
08805a |
+
|
|
|
08805a |
if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
|
|
|
08805a |
break;
|
|
|
08805a |
#endif
|
|
|
08805a |
@@ -2662,7 +2723,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
|
|
|
08805a |
{
|
|
|
08805a |
while (bcptr != NULL && bcptr->current_branch >= code)
|
|
|
08805a |
{
|
|
|
08805a |
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
|
|
|
08805a |
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
|
|
|
08805a |
return FALSE;
|
|
|
08805a |
bcptr = bcptr->outer;
|
|
|
08805a |
}
|
|
|
08805a |
@@ -5416,7 +5477,7 @@ for (;; ptr++)
|
|
|
08805a |
pcre_uchar *scode = bracode;
|
|
|
08805a |
do
|
|
|
08805a |
{
|
|
|
08805a |
- if (could_be_empty_branch(scode, ketcode, utf, cd))
|
|
|
08805a |
+ if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
|
|
|
08805a |
{
|
|
|
08805a |
*bracode += OP_SBRA - OP_BRA;
|
|
|
08805a |
break;
|
|
|
08805a |
@@ -8172,10 +8233,12 @@ if (cd->hwm > cd->start_workspace)
|
|
|
08805a |
}
|
|
|
08805a |
}
|
|
|
08805a |
|
|
|
08805a |
-/* If the workspace had to be expanded, free the new memory. */
|
|
|
08805a |
+/* If the workspace had to be expanded, free the new memory. Set the pointer to
|
|
|
08805a |
+NULL to indicate that forward references have been filled in. */
|
|
|
08805a |
|
|
|
08805a |
if (cd->workspace_size > COMPILE_WORK_SIZE)
|
|
|
08805a |
(PUBL(free))((void *)cd->start_workspace);
|
|
|
08805a |
+cd->start_workspace = NULL;
|
|
|
08805a |
|
|
|
08805a |
/* Give an error if there's back reference to a non-existent capturing
|
|
|
08805a |
subpattern. */
|
|
|
08805a |
--
|
|
|
08805a |
2.5.5
|
|
|
08805a |
|