From b760f59c8af5ad8a9a4a02962e1b7d56a0617d3f Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Wed, 5 Aug 2015 15:38:32 +0000
Subject: [PATCH 3/3] Fix buffer overflow for named references in (?|
situations.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Ported for 8.32:
commit 7af8e8717def179fd7b69e173abd347c1a3547cb
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Wed Aug 5 15:38:32 2015 +0000
Fix buffer overflow for named references in (?| situations.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1585 2f5784b3-3f2a-0410-8824-cb99058d5e15
This increases allocation size because of missing "Refactor the code
for creating the name/number table" patch.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pcre_compile.c | 47 +++++++++++++++++++++++++++++------------------
pcre_internal.h | 1 +
testdata/testinput2 | 2 ++
testdata/testoutput11-16 | 2 +-
testdata/testoutput11-32 | 2 +-
testdata/testoutput11-8 | 2 +-
testdata/testoutput2 | 2 ++
7 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/pcre_compile.c b/pcre_compile.c
index 6777542..0215861 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5796,6 +5796,7 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */
reset_bracount = TRUE;
+ cd->dupgroups = TRUE; /* Record (?| encountered */
/* Fall through */
/* ------------------------------------------------------------ */
@@ -6262,6 +6263,7 @@ for (;; ptr++)
if (lengthptr != NULL)
{
const pcre_uchar *temp;
+ recno = 0;
if (namelen == 0)
{
@@ -6279,22 +6281,6 @@ for (;; ptr++)
goto FAILED;
}
- /* The name table does not exist in the first pass, so we cannot
- do a simple search as in the code below. Instead, we have to scan the
- pattern to find the number. It is important that we scan it only as
- far as we have got because the syntax of named subpatterns has not
- been checked for the rest of the pattern, and find_parens() assumes
- correct syntax. In any case, it's a waste of resources to scan
- further. We stop the scan at the current point by temporarily
- adjusting the value of cd->endpattern. */
-
- temp = cd->end_pattern;
- cd->end_pattern = ptr;
- recno = find_parens(cd, name, namelen,
- (options & PCRE_EXTENDED) != 0, utf);
- cd->end_pattern = temp;
- if (recno < 0) recno = 0; /* Forward ref; set dummy number */
-
/* We have to allow for a named reference to a duplicated name (this
cannot be determined until the second pass). This needs an extra
16-bit data item. */
@@ -6307,7 +6293,31 @@ for (;; ptr++)
real compile this will be picked up and the reference wrapped with
OP_ONCE to make it atomic, so we must space in case this occurs. */
- if (recno == 0) *lengthptr += 2 + 2*LINK_SIZE;
+ *lengthptr += 2 + 2*LINK_SIZE;
+
+ /* It is even worse than that. The current reference may be to an
+ existing named group with a different number (so apparently not
+ recursive) but which later on is also attached to a group with the
+ current number. This can only happen if $(| has been previous
+ encountered. In that case, we allow yet more memory, just in case.
+ (Again, this is fixed "properly" in PCRE2. */
+
+ if (cd->dupgroups) *lengthptr += 2 + 2*LINK_SIZE;
+
+ /* Otherwise, check for recursion here. The name table does not exist
+ in the first pass; instead we must scan the list of names encountered
+ so far in order to get the number. If the name is not found, leave
+ the value of recno as 0 for a forward reference. */
+
+ else
+ {
+ temp = cd->end_pattern;
+ cd->end_pattern = ptr;
+ recno = find_parens(cd, name, namelen,
+ (options & PCRE_EXTENDED) != 0, utf);
+ cd->end_pattern = temp;
+ if (recno < 0) recno = 0; /* Forward ref; set dummy number */
+ }
}
/* In the real compile, seek the name in the table. We check the name
@@ -8087,6 +8097,7 @@ cd->bracount = cd->final_bracount = 0;
cd->names_found = 0;
cd->name_entry_size = 0;
cd->name_table = NULL;
+cd->dupgroups = FALSE;
cd->start_code = cworkspace;
cd->hwm = cworkspace;
cd->start_workspace = cworkspace;
@@ -8116,7 +8127,7 @@ if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
(int)(cd->hwm - cworkspace)));
-
+
if (length > MAX_PATTERN_SIZE)
{
errorcode = ERR20;
diff --git a/pcre_internal.h b/pcre_internal.h
index f3cb001..536b3d8 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -2410,6 +2410,7 @@ typedef struct compile_data {
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
+ BOOL dupgroups; /* Duplicate groups exist: (?| found */
int nltype; /* Newline type */
int nllen; /* Newline string length */
pcre_uchar nl[4]; /* Newline string when fixed length */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 53c4718..bb11212 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3820,4 +3820,6 @@ settings of the anchored and startline bits. --/
/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
+/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 4115877..3cb3049 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -232,7 +232,7 @@ Memory allocation (code space): 73
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 61
+Memory allocation (code space): 77
------------------------------------------------------------------
0 24 Bra
2 5 CBra 1
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 3f66acd..10dee82 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -232,7 +232,7 @@ Memory allocation (code space): 155
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 125
+Memory allocation (code space): 157
------------------------------------------------------------------
0 24 Bra
2 5 CBra 1
diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
index 27e2c65..a1bd60a 100644
--- a/testdata/testoutput11-8
+++ b/testdata/testoutput11-8
@@ -232,7 +232,7 @@ Memory allocation (code space): 45
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 38
+Memory allocation (code space): 50
------------------------------------------------------------------
0 30 Bra
3 7 CBra 1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index b0b46d7..2dd2381 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -12517,4 +12517,6 @@ No match
/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
+/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/
+
/-- End of testinput2 --/
--
2.5.5