From c031e3ec7c713077659f5f7dc6638d926c69d7b2 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Sat, 11 Apr 2020 14:10:24 +0100 Subject: [PATCH v528 3/3] study_chunk: avoid mutating regexp program within GOSUB gh16947 and gh17743: studying GOSUB may restudy in an inner call (via a mix of recursion and enframing) something that an outer call is in the middle of looking at. Let the outer frame deal with it. (CVE-2020-12723) --- embed.fnc | 2 +- embed.h | 2 +- proto.h | 2 +- regcomp.c | 48 ++++++++++++++++++++++++++++++++---------------- t/re/pat.t | 26 +++++++++++++++++++++++++- 5 files changed, 60 insertions(+), 20 deletions(-) diff --git a/embed.fnc b/embed.fnc index cf89277163..4b1ba28277 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2397,7 +2397,7 @@ Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \ |NULLOK struct scan_data_t *data \ |I32 stopparen|U32 recursed_depth \ |NULLOK regnode_ssc *and_withp \ - |U32 flags|U32 depth + |U32 flags|U32 depth|bool was_mutate_ok EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \ |NN const char* const s|const U32 n rs |void |re_croak2 |bool utf8|NN const char* pat1|NN const char* pat2|... diff --git a/embed.h b/embed.h index 886551ce5c..50fcabc140 100644 --- a/embed.h +++ b/embed.h @@ -1075,7 +1075,7 @@ #define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init #define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c) #define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c) -#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k) +#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l) # endif # if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) #define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d) diff --git a/proto.h b/proto.h index d3f8802c1d..e276f69bd1 100644 --- a/proto.h +++ b/proto.h @@ -5258,7 +5258,7 @@ PERL_STATIC_INLINE void S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, c #define PERL_ARGS_ASSERT_SSC_UNION \ assert(ssc); assert(invlist) #endif -STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth); +STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok); #define PERL_ARGS_ASSERT_STUDY_CHUNK \ assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last) #endif diff --git a/regcomp.c b/regcomp.c index 0a9c6a8085..e66032a16a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -110,6 +110,7 @@ typedef struct scan_frame { regnode *next_regnode; /* next node to process when last is reached */ U32 prev_recursed_depth; I32 stopparen; /* what stopparen do we use */ + bool in_gosub; /* this or an outer frame is for GOSUB */ U32 is_top_frame; /* what flags do we use? */ struct scan_frame *this_prev_frame; /* this previous frame */ @@ -4102,7 +4103,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, - U32 flags, U32 depth) + U32 flags, U32 depth, bool was_mutate_ok) /* scanp: Start here (read-write). */ /* deltap: Write maxlen-minlen here. */ /* last: Stop before this one. */ @@ -4179,6 +4180,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, node length to get a real minimum (because the folded version may be shorter) */ bool unfolded_multi_char = FALSE; + /* avoid mutating ops if we are anywhere within the recursed or + * enframed handling for a GOSUB: the outermost level will handle it. + */ + bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub); /* Peephole optimizer: */ DEBUG_STUDYDATA("Peep:", data, depth); DEBUG_PEEP("Peep", scan, depth); @@ -4189,7 +4194,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * parsing code, as each (?:..) is handled by a different invocation of * reg() -- Yves */ - JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); + if (mutate_ok) + JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); /* Follow the next-chain of the current node and optimize away all the NOTHINGs from it. */ @@ -4238,7 +4244,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * NOTE we dont use the return here! */ (void)study_chunk(pRExC_state, &scan, &minlen, &deltanext, next, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, mutate_ok); scan = next; } else @@ -4305,7 +4311,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* we suppose the run is continuous, last=next...*/ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, next, &data_fake, stopparen, - recursed_depth, NULL, f,depth+1); + recursed_depth, NULL, f, depth+1, + mutate_ok); if (min1 > minnext) min1 = minnext; @@ -4372,9 +4379,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } } - if (PERL_ENABLE_TRIE_OPTIMISATION && - OP( startbranch ) == BRANCH ) - { + if (PERL_ENABLE_TRIE_OPTIMISATION + && OP(startbranch) == BRANCH + && mutate_ok + ) { /* demq. Assuming this was/is a branch we are dealing with: 'scan' @@ -4825,6 +4833,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, newframe->stopparen = stopparen; newframe->prev_recursed_depth = recursed_depth; newframe->this_prev_frame= frame; + newframe->in_gosub = ( + (frame && frame->in_gosub) || OP(scan) == GOSUB + ); DEBUG_STUDYDATA("frame-new:",data,depth); DEBUG_PEEP("fnew", scan, depth); @@ -5043,7 +5054,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, (mincount == 0 ? (f & ~SCF_DO_SUBSTR) : f) - ,depth+1); + , depth+1, mutate_ok); if (flags & SCF_DO_STCLASS) data->start_class = oclass; @@ -5105,7 +5116,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ( OP(oscan) == CURLYX && data && data->flags & SF_IN_PAR && !(data->flags & SF_HAS_EVAL) - && !deltanext && minnext == 1 ) { + && !deltanext && minnext == 1 + && mutate_ok + ) { /* Try to optimize to CURLYN. */ regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; regnode * const nxt1 = nxt; @@ -5151,10 +5164,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, && !(data->flags & SF_HAS_EVAL) && !deltanext /* atom is fixed width */ && minnext != 0 /* CURLYM can't handle zero width */ - /* Nor characters whose fold at run-time may be * multi-character */ && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN) + && mutate_ok ) { /* XXXX How to optimize if data == 0? */ /* Optimize to a simpler form. */ @@ -5201,7 +5214,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, #endif /* Optimize again: */ study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, - NULL, stopparen, recursed_depth, NULL, 0,depth+1); + NULL, stopparen, recursed_depth, NULL, 0, + depth+1, mutate_ok); } else oscan->flags = 0; @@ -5592,7 +5606,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", nscan = NEXTOPER(NEXTOPER(scan)); minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, last, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, + mutate_ok); if (scan->flags) { if (deltanext) { FAIL("Variable length lookbehind not implemented"); @@ -5681,7 +5696,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext, last, &data_fake, stopparen, recursed_depth, NULL, - f,depth+1); + f, depth+1, mutate_ok); if (scan->flags) { if (deltanext) { FAIL("Variable length lookbehind not implemented"); @@ -5841,7 +5856,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", branches even though they arent otherwise used. */ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, (regnode *)nextbranch, &data_fake, - stopparen, recursed_depth, NULL, f,depth+1); + stopparen, recursed_depth, NULL, f, depth+1, + mutate_ok); } if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode*)nextbranch); @@ -7524,7 +7540,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, &data, -1, 0, NULL, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag | (restudied ? SCF_TRIE_DOING_RESTUDY : 0), - 0); + 0, TRUE); CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk")); @@ -7670,7 +7686,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied ? SCF_TRIE_DOING_RESTUDY : 0), - 0); + 0, TRUE); CHECK_RESTUDY_GOTO_butfirst(NOOP); diff --git a/t/re/pat.t b/t/re/pat.t index 1d98fe77d7..1488259b02 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -23,7 +23,7 @@ BEGIN { skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; skip_all_without_unicode_tables(); -plan tests => 840; # Update this when adding/deleting tests. +plan tests => 844; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1929,6 +1929,30 @@ EOP fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly"); } + # gh16947: test regexp corruption (GOSUB) + { + fresh_perl_is(q{ + 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok' + }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)'); + } + # gh16947: test fix doesn't break SUSPEND + { + fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' }, + 'ok', {}, "gh16947: test fix doesn't break SUSPEND"); + } + + # gh17743: more regexp corruption via GOSUB + { + fresh_perl_is(q{ + "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok" + }, 'ok', {}, 'gh17743: test regexp corruption (1)'); + + fresh_perl_is(q{ + "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/; + print "ok" + }, 'ok', {}, 'gh17743: test regexp corruption (2)'); + } + } # End of sub run_tests 1; -- 2.20.1