Blame SOURCES/pcre-8.32-Fix-checking-whether-a-group-could-match-an-empty-st.patch

cb67f2
From e3406ec06426fb9a7342541127d4c591d2446b6b Mon Sep 17 00:00:00 2001
cb67f2
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
cb67f2
Date: Fri, 5 Jul 2013 10:38:37 +0000
cb67f2
Subject: [PATCH 1/2] Fix checking whether a group could match an empty string
cb67f2
MIME-Version: 1.0
cb67f2
Content-Type: text/plain; charset=UTF-8
cb67f2
Content-Transfer-Encoding: 8bit
cb67f2
cb67f2
In UTF mode, the code for checking whether a group could match an empty
cb67f2
string (which is used for indefinitely repeated groups to allow for
cb67f2
breaking an infinite loop) was broken when the group contained a repeated
cb67f2
negated single-character class with a character that occupied more than one
cb67f2
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
cb67f2
UTF-8 mode). The effect was undefined: the group might or might not be
cb67f2
deemed as matching an empty string, or the program might have crashed.
cb67f2
cb67f2
Based on:
cb67f2
cb67f2
commit 74d96caf6251eff2f6c6a3e879268ce2d2a6c9be
cb67f2
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
cb67f2
Date:   Fri Jul 5 10:38:37 2013 +0000
cb67f2
cb67f2
    Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for
cb67f2
    empty string matching.
cb67f2
cb67f2
    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1348 2f5784b3-3f2a-0410-8824-
cb67f2
cb99058d5e15
cb67f2
cb67f2
Ported to 8.32. Needed for CVE-2015-2328 (bug #1285399).
cb67f2
cb67f2
Signed-off-by: Petr Písař <ppisar@redhat.com>
cb67f2
---
cb67f2
 pcre_compile.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++-----------
cb67f2
 1 file changed, 81 insertions(+), 18 deletions(-)
cb67f2
cb67f2
diff --git a/pcre_compile.c b/pcre_compile.c
cb67f2
index 0de3747..ce72527 100644
cb67f2
--- a/pcre_compile.c
cb67f2
+++ b/pcre_compile.c
cb67f2
@@ -2353,15 +2353,23 @@ Arguments:
cb67f2
   endcode     points to where to stop
cb67f2
   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
cb67f2
   cd          contains pointers to tables etc.
cb67f2
+  recurses    chain of recurse_check to catch mutual recursion
cb67f2
 
cb67f2
 Returns:      TRUE if what is matched could be empty
cb67f2
 */
cb67f2
 
cb67f2
+typedef struct recurse_check {
cb67f2
+  struct recurse_check *prev;
cb67f2
+  const pcre_uchar *group;
cb67f2
+} recurse_check;    
cb67f2
+
cb67f2
 static BOOL
cb67f2
 could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
cb67f2
-  BOOL utf, compile_data *cd)
cb67f2
+  BOOL utf, compile_data *cd, recurse_check *recurses)
cb67f2
 {
cb67f2
 register pcre_uchar c;
cb67f2
+recurse_check this_recurse;
cb67f2
+
cb67f2
 for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
cb67f2
      code < endcode;
cb67f2
      code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
cb67f2
@@ -2369,7 +2377,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
cb67f2
   const pcre_uchar *ccode;
cb67f2
 
cb67f2
   c = *code;
cb67f2
-
cb67f2
+  
cb67f2
   /* Skip over forward assertions; the other assertions are skipped by
cb67f2
   first_significant_code() with a TRUE final argument. */
cb67f2
 
cb67f2
@@ -2389,25 +2397,50 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
cb67f2
 
cb67f2
   if (c == OP_RECURSE)
cb67f2
     {
cb67f2
-    const pcre_uchar *scode;
cb67f2
+    const pcre_uchar *scode = cd->start_code + GET(code, 1);
cb67f2
     BOOL empty_branch;
cb67f2
 
cb67f2
-    /* Test for forward reference */
cb67f2
+    /* Test for forward reference or uncompleted reference. This is disabled
cb67f2
+    when called to scan a completed pattern by setting cd->start_workspace to
cb67f2
+    NULL. */
cb67f2
 
cb67f2
-    for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
cb67f2
-      if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
cb67f2
+    if (cd->start_workspace != NULL)
cb67f2
+      { 
cb67f2
+      const pcre_uchar *tcode; 
cb67f2
+      for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
cb67f2
+        if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
cb67f2
+      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
cb67f2
+      }
cb67f2
+    
cb67f2
+    /* If we are scanning a completed pattern, there are no forward references 
cb67f2
+    and all groups are complete. We need to detect whether this is a recursive 
cb67f2
+    call, as otherwise there will be an infinite loop. If it is a recursion,
cb67f2
+    just skip over it. Simple recursions are easily detected. For mutual 
cb67f2
+    recursions we keep a chain on the stack. */ 
cb67f2
+     
cb67f2
+    else
cb67f2
+      {  
cb67f2
+      recurse_check *r = recurses;
cb67f2
+      const pcre_uchar *endgroup = scode;
cb67f2
+       
cb67f2
+      do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
cb67f2
+      if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
cb67f2
+      
cb67f2
+      for (r = recurses; r != NULL; r = r->prev)
cb67f2
+        if (r->group == scode) break;
cb67f2
+      if (r != NULL) continue;   /* Mutual recursion */
cb67f2
+      }
cb67f2
 
cb67f2
-    /* Not a forward reference, test for completed backward reference */
cb67f2
+    /* Completed reference; scan the referenced group, remembering it on the
cb67f2
+    stack chain to detect mutual recursions. */
cb67f2
 
cb67f2
     empty_branch = FALSE;
cb67f2
-    scode = cd->start_code + GET(code, 1);
cb67f2
-    if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
cb67f2
-
cb67f2
-    /* Completed backwards reference */
cb67f2
-
cb67f2
+    this_recurse.prev = recurses;
cb67f2
+    this_recurse.group = scode; 
cb67f2
+     
cb67f2
     do
cb67f2
       {
cb67f2
-      if (could_be_empty_branch(scode, endcode, utf, cd))
cb67f2
+      if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
cb67f2
         {
cb67f2
         empty_branch = TRUE;
cb67f2
         break;
cb67f2
@@ -2463,7 +2496,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
cb67f2
       empty_branch = FALSE;
cb67f2
       do
cb67f2
         {
cb67f2
-        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
cb67f2
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
cb67f2
           empty_branch = TRUE;
cb67f2
         code += GET(code, 1);
cb67f2
         }
cb67f2
@@ -2582,30 +2615,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
cb67f2
     return TRUE;
cb67f2
 
cb67f2
     /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
cb67f2
-    MINUPTO, and POSUPTO may be followed by a multibyte character */
cb67f2
+    MINUPTO, and POSUPTO and their caseless and negative versions may be
cb67f2
+    followed by a multibyte character. */
cb67f2
 
cb67f2
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
cb67f2
     case OP_STAR:
cb67f2
     case OP_STARI:
cb67f2
+    case OP_NOTSTAR:    
cb67f2
+    case OP_NOTSTARI:   
cb67f2
+     
cb67f2
     case OP_MINSTAR:
cb67f2
     case OP_MINSTARI:
cb67f2
+    case OP_NOTMINSTAR: 
cb67f2
+    case OP_NOTMINSTARI:
cb67f2
+     
cb67f2
     case OP_POSSTAR:
cb67f2
     case OP_POSSTARI:
cb67f2
+    case OP_NOTPOSSTAR: 
cb67f2
+    case OP_NOTPOSSTARI:
cb67f2
+     
cb67f2
     case OP_QUERY:
cb67f2
     case OP_QUERYI:
cb67f2
+    case OP_NOTQUERY:   
cb67f2
+    case OP_NOTQUERYI:  
cb67f2
+     
cb67f2
     case OP_MINQUERY:
cb67f2
     case OP_MINQUERYI:
cb67f2
+    case OP_NOTMINQUERY:
cb67f2
+    case OP_NOTMINQUERYI:
cb67f2
+     
cb67f2
     case OP_POSQUERY:
cb67f2
     case OP_POSQUERYI:
cb67f2
+    case OP_NOTPOSQUERY:
cb67f2
+    case OP_NOTPOSQUERYI:
cb67f2
+     
cb67f2
     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
cb67f2
     break;
cb67f2
 
cb67f2
     case OP_UPTO:
cb67f2
     case OP_UPTOI:
cb67f2
+    case OP_NOTUPTO:    
cb67f2
+    case OP_NOTUPTOI:   
cb67f2
+     
cb67f2
     case OP_MINUPTO:
cb67f2
     case OP_MINUPTOI:
cb67f2
+    case OP_NOTMINUPTO: 
cb67f2
+    case OP_NOTMINUPTOI:
cb67f2
+     
cb67f2
     case OP_POSUPTO:
cb67f2
     case OP_POSUPTOI:
cb67f2
+    case OP_NOTPOSUPTO: 
cb67f2
+    case OP_NOTPOSUPTOI:
cb67f2
+     
cb67f2
     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
cb67f2
     break;
cb67f2
 #endif
cb67f2
@@ -2662,7 +2723,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
cb67f2
 {
cb67f2
 while (bcptr != NULL && bcptr->current_branch >= code)
cb67f2
   {
cb67f2
-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
cb67f2
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
cb67f2
     return FALSE;
cb67f2
   bcptr = bcptr->outer;
cb67f2
   }
cb67f2
@@ -5416,7 +5477,7 @@ for (;; ptr++)
cb67f2
             pcre_uchar *scode = bracode;
cb67f2
             do
cb67f2
               {
cb67f2
-              if (could_be_empty_branch(scode, ketcode, utf, cd))
cb67f2
+              if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
cb67f2
                 {
cb67f2
                 *bracode += OP_SBRA - OP_BRA;
cb67f2
                 break;
cb67f2
@@ -8172,10 +8233,12 @@ if (cd->hwm > cd->start_workspace)
cb67f2
     }
cb67f2
   }
cb67f2
 
cb67f2
-/* If the workspace had to be expanded, free the new memory. */
cb67f2
+/* If the workspace had to be expanded, free the new memory. Set the pointer to 
cb67f2
+NULL to indicate that forward references have been filled in. */
cb67f2
 
cb67f2
 if (cd->workspace_size > COMPILE_WORK_SIZE)
cb67f2
   (PUBL(free))((void *)cd->start_workspace);
cb67f2
+cd->start_workspace = NULL;   
cb67f2
 
cb67f2
 /* Give an error if there's back reference to a non-existent capturing
cb67f2
 subpattern. */
cb67f2
-- 
cb67f2
2.5.5
cb67f2