From b760f59c8af5ad8a9a4a02962e1b7d56a0617d3f Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 5 Aug 2015 15:38:32 +0000 Subject: [PATCH 3/3] Fix buffer overflow for named references in (?| situations. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported for 8.32: commit 7af8e8717def179fd7b69e173abd347c1a3547cb Author: ph10 Date: Wed Aug 5 15:38:32 2015 +0000 Fix buffer overflow for named references in (?| situations. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1585 2f5784b3-3f2a-0410-8824-cb99058d5e15 This increases allocation size because of missing "Refactor the code for creating the name/number table" patch. Signed-off-by: Petr Písař --- pcre_compile.c | 47 +++++++++++++++++++++++++++++------------------ pcre_internal.h | 1 + testdata/testinput2 | 2 ++ testdata/testoutput11-16 | 2 +- testdata/testoutput11-32 | 2 +- testdata/testoutput11-8 | 2 +- testdata/testoutput2 | 2 ++ 7 files changed, 37 insertions(+), 21 deletions(-) diff --git a/pcre_compile.c b/pcre_compile.c index 6777542..0215861 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -5796,6 +5796,7 @@ for (;; ptr++) /* ------------------------------------------------------------ */ case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ reset_bracount = TRUE; + cd->dupgroups = TRUE; /* Record (?| encountered */ /* Fall through */ /* ------------------------------------------------------------ */ @@ -6262,6 +6263,7 @@ for (;; ptr++) if (lengthptr != NULL) { const pcre_uchar *temp; + recno = 0; if (namelen == 0) { @@ -6279,22 +6281,6 @@ for (;; ptr++) goto FAILED; } - /* The name table does not exist in the first pass, so we cannot - do a simple search as in the code below. Instead, we have to scan the - pattern to find the number. It is important that we scan it only as - far as we have got because the syntax of named subpatterns has not - been checked for the rest of the pattern, and find_parens() assumes - correct syntax. In any case, it's a waste of resources to scan - further. We stop the scan at the current point by temporarily - adjusting the value of cd->endpattern. */ - - temp = cd->end_pattern; - cd->end_pattern = ptr; - recno = find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf); - cd->end_pattern = temp; - if (recno < 0) recno = 0; /* Forward ref; set dummy number */ - /* We have to allow for a named reference to a duplicated name (this cannot be determined until the second pass). This needs an extra 16-bit data item. */ @@ -6307,7 +6293,31 @@ for (;; ptr++) real compile this will be picked up and the reference wrapped with OP_ONCE to make it atomic, so we must space in case this occurs. */ - if (recno == 0) *lengthptr += 2 + 2*LINK_SIZE; + *lengthptr += 2 + 2*LINK_SIZE; + + /* It is even worse than that. The current reference may be to an + existing named group with a different number (so apparently not + recursive) but which later on is also attached to a group with the + current number. This can only happen if $(| has been previous + encountered. In that case, we allow yet more memory, just in case. + (Again, this is fixed "properly" in PCRE2. */ + + if (cd->dupgroups) *lengthptr += 2 + 2*LINK_SIZE; + + /* Otherwise, check for recursion here. The name table does not exist + in the first pass; instead we must scan the list of names encountered + so far in order to get the number. If the name is not found, leave + the value of recno as 0 for a forward reference. */ + + else + { + temp = cd->end_pattern; + cd->end_pattern = ptr; + recno = find_parens(cd, name, namelen, + (options & PCRE_EXTENDED) != 0, utf); + cd->end_pattern = temp; + if (recno < 0) recno = 0; /* Forward ref; set dummy number */ + } } /* In the real compile, seek the name in the table. We check the name @@ -8087,6 +8097,7 @@ cd->bracount = cd->final_bracount = 0; cd->names_found = 0; cd->name_entry_size = 0; cd->name_table = NULL; +cd->dupgroups = FALSE; cd->start_code = cworkspace; cd->hwm = cworkspace; cd->start_workspace = cworkspace; @@ -8116,7 +8127,7 @@ if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, (int)(cd->hwm - cworkspace))); - + if (length > MAX_PATTERN_SIZE) { errorcode = ERR20; diff --git a/pcre_internal.h b/pcre_internal.h index f3cb001..536b3d8 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -2410,6 +2410,7 @@ typedef struct compile_data { BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL check_lookbehind; /* Lookbehinds need later checking */ + BOOL dupgroups; /* Duplicate groups exist: (?| found */ int nltype; /* Newline type */ int nllen; /* Newline string length */ pcre_uchar nl[4]; /* Newline string when fixed length */ diff --git a/testdata/testinput2 b/testdata/testinput2 index 53c4718..bb11212 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3820,4 +3820,6 @@ settings of the anchored and startline bits. --/ /(?(?J)(?1(111111)11|)1|1|)(?()1)/ +/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/ + /-- End of testinput2 --/ diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 4115877..3cb3049 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -232,7 +232,7 @@ Memory allocation (code space): 73 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 61 +Memory allocation (code space): 77 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 3f66acd..10dee82 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -232,7 +232,7 @@ Memory allocation (code space): 155 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 125 +Memory allocation (code space): 157 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 index 27e2c65..a1bd60a 100644 --- a/testdata/testoutput11-8 +++ b/testdata/testoutput11-8 @@ -232,7 +232,7 @@ Memory allocation (code space): 45 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 38 +Memory allocation (code space): 50 ------------------------------------------------------------------ 0 30 Bra 3 7 CBra 1 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index b0b46d7..2dd2381 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -12517,4 +12517,6 @@ No match /(?(?J)(?1(111111)11|)1|1|)(?()1)/ +/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/ + /-- End of testinput2 --/ -- 2.5.5