Blob Blame History Raw
From b760f59c8af5ad8a9a4a02962e1b7d56a0617d3f Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Wed, 5 Aug 2015 15:38:32 +0000
Subject: [PATCH 3/3] Fix buffer overflow for named references in (?|
 situations.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ported for 8.32:

commit 7af8e8717def179fd7b69e173abd347c1a3547cb
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date:   Wed Aug 5 15:38:32 2015 +0000

    Fix buffer overflow for named references in (?| situations.

    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1585 2f5784b3-3f2a-0410-8824-cb99058d5e15

This increases allocation size because of missing "Refactor the code
for creating the name/number table" patch.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 pcre_compile.c           | 47 +++++++++++++++++++++++++++++------------------
 pcre_internal.h          |  1 +
 testdata/testinput2      |  2 ++
 testdata/testoutput11-16 |  2 +-
 testdata/testoutput11-32 |  2 +-
 testdata/testoutput11-8  |  2 +-
 testdata/testoutput2     |  2 ++
 7 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/pcre_compile.c b/pcre_compile.c
index 6777542..0215861 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5796,6 +5796,7 @@ for (;; ptr++)
         /* ------------------------------------------------------------ */
         case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
         reset_bracount = TRUE;
+        cd->dupgroups = TRUE;     /* Record (?| encountered */ 
         /* Fall through */
 
         /* ------------------------------------------------------------ */
@@ -6262,6 +6263,7 @@ for (;; ptr++)
         if (lengthptr != NULL)
           {
           const pcre_uchar *temp;
+          recno = 0;
 
           if (namelen == 0)
             {
@@ -6279,22 +6281,6 @@ for (;; ptr++)
             goto FAILED;
             }
 
-          /* The name table does not exist in the first pass, so we cannot
-          do a simple search as in the code below. Instead, we have to scan the
-          pattern to find the number. It is important that we scan it only as
-          far as we have got because the syntax of named subpatterns has not
-          been checked for the rest of the pattern, and find_parens() assumes
-          correct syntax. In any case, it's a waste of resources to scan
-          further. We stop the scan at the current point by temporarily
-          adjusting the value of cd->endpattern. */
-
-          temp = cd->end_pattern;
-          cd->end_pattern = ptr;
-          recno = find_parens(cd, name, namelen,
-            (options & PCRE_EXTENDED) != 0, utf);
-          cd->end_pattern = temp;
-          if (recno < 0) recno = 0;    /* Forward ref; set dummy number */
-
           /* We have to allow for a named reference to a duplicated name (this
           cannot be determined until the second pass). This needs an extra
           16-bit data item. */
@@ -6307,7 +6293,31 @@ for (;; ptr++)
           real compile this will be picked up and the reference wrapped with
           OP_ONCE to make it atomic, so we must space in case this occurs. */
 
-          if (recno == 0) *lengthptr += 2 + 2*LINK_SIZE;
+          *lengthptr += 2 + 2*LINK_SIZE;
+
+          /* It is even worse than that. The current reference may be to an
+          existing named group with a different number (so apparently not
+          recursive) but which later on is also attached to a group with the
+          current number. This can only happen if $(| has been previous 
+          encountered. In that case, we allow yet more memory, just in case. 
+          (Again, this is fixed "properly" in PCRE2. */
+          
+          if (cd->dupgroups) *lengthptr += 2 + 2*LINK_SIZE;
+
+          /* Otherwise, check for recursion here. The name table does not exist
+          in the first pass; instead we must scan the list of names encountered
+          so far in order to get the number. If the name is not found, leave
+          the value of recno as 0 for a forward reference. */
+           
+          else
+            { 
+            temp = cd->end_pattern;
+            cd->end_pattern = ptr;
+            recno = find_parens(cd, name, namelen,
+              (options & PCRE_EXTENDED) != 0, utf);
+            cd->end_pattern = temp;
+            if (recno < 0) recno = 0;    /* Forward ref; set dummy number */
+            }   
           }
 
         /* In the real compile, seek the name in the table. We check the name
@@ -8087,6 +8097,7 @@ cd->bracount = cd->final_bracount = 0;
 cd->names_found = 0;
 cd->name_entry_size = 0;
 cd->name_table = NULL;
+cd->dupgroups = FALSE;
 cd->start_code = cworkspace;
 cd->hwm = cworkspace;
 cd->start_workspace = cworkspace;
@@ -8116,7 +8127,7 @@ if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
 
 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
   (int)(cd->hwm - cworkspace)));
-
+  
 if (length > MAX_PATTERN_SIZE)
   {
   errorcode = ERR20;
diff --git a/pcre_internal.h b/pcre_internal.h
index f3cb001..536b3d8 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -2410,6 +2410,7 @@ typedef struct compile_data {
   BOOL had_accept;                  /* (*ACCEPT) encountered */
   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
   BOOL check_lookbehind;            /* Lookbehinds need later checking */
+  BOOL dupgroups;                   /* Duplicate groups exist: (?| found */ 
   int  nltype;                      /* Newline type */
   int  nllen;                       /* Newline string length */
   pcre_uchar nl[4];                 /* Newline string when fixed length */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 53c4718..bb11212 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3820,4 +3820,6 @@ settings of the anchored and startline bits. --/
 
 /(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
 
+/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/
+
 /-- End of testinput2 --/
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 4115877..3cb3049 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -232,7 +232,7 @@ Memory allocation (code space): 73
 ------------------------------------------------------------------
 
 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 61
+Memory allocation (code space): 77
 ------------------------------------------------------------------
   0  24 Bra
   2   5 CBra 1
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 3f66acd..10dee82 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -232,7 +232,7 @@ Memory allocation (code space): 155
 ------------------------------------------------------------------
 
 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 125
+Memory allocation (code space): 157
 ------------------------------------------------------------------
   0  24 Bra
   2   5 CBra 1
diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
index 27e2c65..a1bd60a 100644
--- a/testdata/testoutput11-8
+++ b/testdata/testoutput11-8
@@ -232,7 +232,7 @@ Memory allocation (code space): 45
 ------------------------------------------------------------------
 
 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 38
+Memory allocation (code space): 50
 ------------------------------------------------------------------
   0  30 Bra
   3   7 CBra 1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index b0b46d7..2dd2381 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -12517,4 +12517,6 @@ No match
 
 /(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
 
+/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/
+
 /-- End of testinput2 --/
-- 
2.5.5