From d6f7a97d2c1dfcedd728d8b2ac1a07e953922950 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Fri, 18 Dec 2020 14:55:16 +0100 Subject: [PATCH] study_chunk: avoid mutating regexp program within GOSUB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gh16947 and gh17743: studying GOSUB may restudy in an inner call (via a mix of recursion and enframing) something that an outer call is in the middle of looking at. Let the outer frame deal with it. (CVE-2020-12723) Jitka Plesnikova, Petr Pisar: Ported from 3f4ba871d2d397dcd4386ed75e05353c36135c29 to 5.16.3. Signed-off-by: Petr Písař --- embed.fnc | 2 +- embed.h | 2 +- proto.h | 2 +- regcomp.c | 42 ++++++++++++++++++++++++++++++------------ t/re/pat.t | 26 +++++++++++++++++++++++++- 5 files changed, 58 insertions(+), 16 deletions(-) diff --git a/embed.fnc b/embed.fnc index 9546555..019afa9 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1951,7 +1951,7 @@ Es |I32 |study_chunk |NN struct RExC_state_t *pRExC_state \ |NULLOK struct scan_data_t *data \ |I32 stopparen|NULLOK U8* recursed \ |NULLOK struct regnode_charclass_class *and_withp \ - |U32 flags|U32 depth + |U32 flags|U32 depth|bool was_mutate_ok EsRn |U32 |add_data |NN struct RExC_state_t *pRExC_state|U32 n \ |NN const char *s rs |void |re_croak2 |NN const char* pat1|NN const char* pat2|... diff --git a/embed.h b/embed.h index f7db1e0..75f326e 100644 --- a/embed.h +++ b/embed.h @@ -952,7 +952,7 @@ #define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d) #define set_regclass_bit(a,b,c,d,e) S_set_regclass_bit(aTHX_ a,b,c,d,e) #define set_regclass_bit_fold(a,b,c,d,e) S_set_regclass_bit_fold(aTHX_ a,b,c,d,e) -#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k) +#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l) # endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) #define _core_swash_init(a,b,c,d,e,f,g,h) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g,h) diff --git a/proto.h b/proto.h index 143eee0..1b2ece6 100644 --- a/proto.h +++ b/proto.h @@ -6597,7 +6597,7 @@ STATIC U8 S_set_regclass_bit_fold(pTHX_ struct RExC_state_t *pRExC_state, regnod #define PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD \ assert(pRExC_state); assert(node); assert(invlist_ptr); assert(alternate_ptr) -STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth) +STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth, bool was_mutate_ok) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3) diff --git a/regcomp.c b/regcomp.c index 2254159..57505f3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2934,6 +2934,7 @@ typedef struct scan_frame { regnode *next; /* next node to process when last is reached */ struct scan_frame *prev; /*previous frame*/ I32 stop; /* what stopparen do we use */ + bool in_gosub; /* this or an outer frame is for GOSUB */ } scan_frame; @@ -2975,7 +2976,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, - U32 flags, U32 depth) + U32 flags, U32 depth, bool was_mutate_ok) /* scanp: Start here (read-write). */ /* deltap: Write maxlen-minlen here. */ /* last: Stop before this one. */ @@ -3016,6 +3017,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, length to get a real minimum (because the folded version may be shorter) */ bool has_exactf_sharp_s = FALSE; + /* avoid mutating ops if we are anywhere within the recursed or + * enframed handling for a GOSUB: the outermost level will handle it. + */ + bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub); + /* Peephole optimizer: */ DEBUG_STUDYDATA("Peep:", data,depth); DEBUG_PEEP("Peep",scan,depth); @@ -3023,7 +3029,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* Its not clear to khw or hv why this is done here, and not in the * clauses that deal with EXACT nodes. khw's guess is that it's * because of a previous design */ - JOIN_EXACT(scan,&min_subtract, &has_exactf_sharp_s, 0); + if (mutate_ok) + JOIN_EXACT(scan,&min_subtract, &has_exactf_sharp_s, 0); + /* Follow the next-chain of the current node and optimize away all the NOTHINGs from it. */ @@ -3100,7 +3108,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* we suppose the run is continuous, last=next...*/ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, next, &data_fake, - stopparen, recursed, NULL, f,depth+1); + stopparen, recursed, NULL, f,depth+1, + mutate_ok); if (min1 > minnext) min1 = minnext; if (max1 < minnext + deltanext) @@ -3161,7 +3170,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } } - if (PERL_ENABLE_TRIE_OPTIMISATION && OP( startbranch ) == BRANCH ) { + if (PERL_ENABLE_TRIE_OPTIMISATION + && OP( startbranch ) == BRANCH + && mutate_ok) { + /* demq. Assuming this was/is a branch we are dealing with: 'scan' now @@ -3481,6 +3493,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, newframe->last = last; newframe->stop = stopparen; newframe->prev = frame; + newframe->in_gosub = ( + (frame && frame->in_gosub) || (OP(scan) == GOSUB || OP(scan) == GOSTART) + ); frame = newframe; scan = start; @@ -3792,7 +3807,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, last, data, stopparen, recursed, NULL, (mincount == 0 - ? (f & ~SCF_DO_SUBSTR) : f),depth+1); + ? (f & ~SCF_DO_SUBSTR) : f),depth+1,mutate_ok); if (flags & SCF_DO_STCLASS) data->start_class = oclass; @@ -3844,7 +3859,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ( OP(oscan) == CURLYX && data && data->flags & SF_IN_PAR && !(data->flags & SF_HAS_EVAL) - && !deltanext && minnext == 1 ) { + && !deltanext && minnext == 1 + && mutate_ok + ) { /* Try to optimize to CURLYN. */ regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; regnode * const nxt1 = nxt; @@ -3890,6 +3907,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, && !(data->flags & SF_HAS_EVAL) && !deltanext /* atom is fixed width */ && minnext != 0 /* CURLYM can't handle zero width */ + && mutate_ok ) { /* XXXX How to optimize if data == 0? */ /* Optimize to a simpler form. */ @@ -3936,7 +3954,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, #endif /* Optimize again: */ study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, - NULL, stopparen, recursed, NULL, 0,depth+1); + NULL, stopparen, recursed, NULL, 0,depth+1,mutate_ok); } else oscan->flags = 0; @@ -4376,7 +4394,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, next = regnext(scan); nscan = NEXTOPER(NEXTOPER(scan)); minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, - last, &data_fake, stopparen, recursed, NULL, f, depth+1); + last, &data_fake, stopparen, recursed, NULL, f, depth+1,mutate_ok); if (scan->flags) { if (deltanext) { FAIL("Variable length lookbehind not implemented"); @@ -4462,7 +4480,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, nscan = NEXTOPER(NEXTOPER(scan)); *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext, - last, &data_fake, stopparen, recursed, NULL, f,depth+1); + last, &data_fake, stopparen, recursed, NULL, f,depth+1,mutate_ok); if (scan->flags) { if (deltanext) { FAIL("Variable length lookbehind not implemented"); @@ -4625,7 +4643,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, */ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, (regnode *)nextbranch, &data_fake, - stopparen, recursed, NULL, f,depth+1); + stopparen, recursed, NULL, f,depth+1,mutate_ok); } if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode*)nextbranch); @@ -5442,7 +5460,7 @@ reStudy: minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */ &data, -1, NULL, NULL, - SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0); + SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0,TRUE); CHECK_RESTUDY_GOTO; @@ -5627,7 +5645,7 @@ reStudy: minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size, - &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0); + &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0,TRUE); CHECK_RESTUDY_GOTO; diff --git a/t/re/pat.t b/t/re/pat.t index faddbc5..e328fbd 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -19,7 +19,7 @@ BEGIN { require './test.pl'; } -plan tests => 472; # Update this when adding/deleting tests. +plan tests => 476; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1260,6 +1260,30 @@ EOP use re '/aa'; unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; } + # gh16947: test regexp corruption (GOSUB) + { + fresh_perl_is(q{ + 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok' + }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)'); + } + # gh16947: test fix doesn't break SUSPEND + { + fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' }, + 'ok', {}, "gh16947: test fix doesn't break SUSPEND"); + } + + # gh17743: more regexp corruption via GOSUB + { + fresh_perl_is(q{ + "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok" + }, 'ok', {}, 'gh17743: test regexp corruption (1)'); + + fresh_perl_is(q{ + "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/; + print "ok" + }, 'ok', {}, 'gh17743: test regexp corruption (2)'); + } + } # End of sub run_tests 1; -- 2.26.2