dcb3b7
From 42e9b60980bb8e29e76629e14c6aa945194c0647 Mon Sep 17 00:00:00 2001
dcb3b7
From: Hugo van der Sanden <hv@crypt.org>
dcb3b7
Date: Wed, 5 Oct 2016 02:20:26 +0100
dcb3b7
Subject: [PATCH] [perl #129061] CURLYX nodes can be studied more than once
dcb3b7
MIME-Version: 1.0
dcb3b7
Content-Type: text/plain; charset=UTF-8
dcb3b7
Content-Transfer-Encoding: 8bit
dcb3b7
dcb3b7
study_chunk() for CURLYX is used to set flags on the linked WHILEM
dcb3b7
node to say it is the whilem_c'th of whilem_seen. However it assumes
dcb3b7
each CURLYX can be studied only once, which is not the case - there
dcb3b7
are various cases such as GOSUB which call study_chunk() recursively
dcb3b7
on already-visited parts of the program.
dcb3b7
dcb3b7
Storing the wrong index can cause the super-linear cache handling in
dcb3b7
regmatch() to read/write the byte after the end of poscache.
dcb3b7
dcb3b7
Also reported in [perl #129281].
dcb3b7
dcb3b7
Signed-off-by: Petr Písař <ppisar@redhat.com>
dcb3b7
---
dcb3b7
 regcomp.c  | 12 +++++++++---
dcb3b7
 t/re/pat.t |  1 -
dcb3b7
 2 files changed, 9 insertions(+), 4 deletions(-)
dcb3b7
dcb3b7
diff --git a/regcomp.c b/regcomp.c
dcb3b7
index 850a6c1..48c8d8d 100644
dcb3b7
--- a/regcomp.c
dcb3b7
+++ b/regcomp.c
dcb3b7
@@ -5218,15 +5218,21 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
dcb3b7
 			    However, this time it's not a subexpression
dcb3b7
 			    we care about, but the expression itself. */
dcb3b7
 			 && (maxcount == REG_INFTY)
dcb3b7
-			 && data && ++data->whilem_c < 16) {
dcb3b7
+			 && data) {
dcb3b7
 		    /* This stays as CURLYX, we can put the count/of pair. */
dcb3b7
 		    /* Find WHILEM (as in regexec.c) */
dcb3b7
 		    regnode *nxt = oscan + NEXT_OFF(oscan);
dcb3b7
 
dcb3b7
 		    if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
dcb3b7
 			nxt += ARG(nxt);
dcb3b7
-		    PREVOPER(nxt)->flags = (U8)(data->whilem_c
dcb3b7
-			| (RExC_whilem_seen << 4)); /* On WHILEM */
dcb3b7
+                    nxt = PREVOPER(nxt);
dcb3b7
+                    if (nxt->flags & 0xf) {
dcb3b7
+                        /* we've already set whilem count on this node */
dcb3b7
+                    } else if (++data->whilem_c < 16) {
dcb3b7
+                        assert(data->whilem_c <= RExC_whilem_seen);
dcb3b7
+                        nxt->flags = (U8)(data->whilem_c
dcb3b7
+                            | (RExC_whilem_seen << 4)); /* On WHILEM */
dcb3b7
+                    }
dcb3b7
 		}
dcb3b7
 		if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
dcb3b7
 		    pars++;
dcb3b7
diff --git a/t/re/pat.t b/t/re/pat.t
dcb3b7
index ecd3af1..16bfc8e 100644
dcb3b7
--- a/t/re/pat.t
dcb3b7
+++ b/t/re/pat.t
dcb3b7
@@ -1909,7 +1909,6 @@ EOP
dcb3b7
     }
dcb3b7
     {
dcb3b7
         # [perl #129281] buffer write overflow, detected by ASAN, valgrind
dcb3b7
-        local $::TODO = "whilem_c  bumped too much";
dcb3b7
         fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much");
dcb3b7
     }
dcb3b7
 } # End of sub run_tests
dcb3b7
-- 
dcb3b7
2.7.4
dcb3b7