From c6e7032a63f2162405644582af6600dcb5ba66d1 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Tue, 10 May 2016 09:44:31 +0200 Subject: [PATCH] fix #128109 - do not move RExC_open_parens[0] in reginsert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Petr Pisar: Two commits ported to 5.24.0: commit da7cf1cc7cedc01f35ceb6724e8260c3b0ee0d12 Author: Yves Orton Date: Tue May 10 09:44:31 2016 +0200 fix #128109 - do not move RExC_open_parens[0] in reginsert In d5a00e4af6b155495be31a35728b8fef8e671ebe I merged GOSUB and GOSTART, part of which involved making RExC_open_parens[0] refer to the start of the pattern, and RExC_close_parens[0] referring to the end of the pattern. This tripped up in reginsert in a subtle way, the start of the pattern cannot and should not move in reginsert(). Unlike a paren that might be at the start of the pattern which should move when something is inserted in front of it, the start is a fixed point and should never move. This patches fixes this up, and adds an assert to check that reginsert() is not called once study_chunk() starts, as reginsert() does not adjust RExC_recurse. This was noticed by hv while debugging [perl #128085], thanks hugo! commit ec5bd2262bb4e28f0dc6a0a3edb9b1f1b5befa2f Author: Dan Collins Date: Fri Jun 17 19:40:57 2016 -0400 Add tests for regex recursion d5a00e4af introduced a bug in reginsert that was fixed by da7cf1cc7, originally documented in [perl #128109]. This patch adds two regression tests for the testcase reported by Jan Goyvaerts in [perl #128420]. Signed-off-by: Petr Písař --- regcomp.c | 13 +++++++++++-- t/re/re_tests | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/regcomp.c b/regcomp.c index f29892c..7462885 100644 --- a/regcomp.c +++ b/regcomp.c @@ -223,6 +223,7 @@ struct RExC_state_t { #endif bool seen_unfolded_sharp_s; bool strict; + bool study_started; }; #define RExC_flags (pRExC_state->flags) @@ -289,6 +290,7 @@ struct RExC_state_t { #define RExC_frame_last (pRExC_state->frame_last) #define RExC_frame_count (pRExC_state->frame_count) #define RExC_strict (pRExC_state->strict) +#define RExC_study_started (pRExC_state->study_started) #define RExC_warn_text (pRExC_state->warn_text) /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set @@ -4104,6 +4106,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, GET_RE_DEBUG_FLAGS_DECL; PERL_ARGS_ASSERT_STUDY_CHUNK; + RExC_study_started= 1; if ( depth == 0 ) { @@ -6886,6 +6889,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_contains_locale = 0; RExC_contains_i = 0; RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT); + RExC_study_started = 0; pRExC_state->runtime_code_qr = NULL; RExC_frame_head= NULL; RExC_frame_last= NULL; @@ -18240,7 +18244,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) RExC_size += size; return; } - + assert(!RExC_study_started); /* I believe we should never use reginsert once we have started + studying. If this is wrong then we need to adjust RExC_recurse + below like we do with RExC_open_parens/RExC_close_parens. */ src = RExC_emit; RExC_emit += size; dst = RExC_emit; @@ -18251,7 +18257,10 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) * iow it is 1 more than the number of parens seen in * the pattern so far. */ for ( paren=0 ; paren < RExC_npar ; paren++ ) { - if ( RExC_open_parens[paren] >= opnd ) { + /* note, RExC_open_parens[0] is the start of the + * regex, it can't move. RExC_close_parens[0] is the end + * of the regex, it *can* move. */ + if ( paren && RExC_open_parens[paren] >= opnd ) { /*DEBUG_PARSE_FMT("open"," - %d",size);*/ RExC_open_parens[paren] += size; } else { diff --git a/t/re/re_tests b/t/re/re_tests index 34ac94a..7e8522d 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1966,6 +1966,8 @@ ab(?#Comment){2}c abbc y $& abbc .{1}?? - c - Nested quantifiers .{1}?+ - c - Nested quantifiers (?:.||)(?|)000000000@ 000000000@ y $& 000000000@ # [perl #126405] +aa$|a(?R)a|a aaa y $& aaa # [perl 128420] recursive matches +(?:\1|a)([bcd])\1(?:(?R)|e)\1 abbaccaddedcb y $& abbaccaddedcb # [perl 128420] recursive match with backreferences # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- 2.5.5