708da5
From c031e3ec7c713077659f5f7dc6638d926c69d7b2 Mon Sep 17 00:00:00 2001
708da5
From: Hugo van der Sanden <hv@crypt.org>
708da5
Date: Sat, 11 Apr 2020 14:10:24 +0100
708da5
Subject: [PATCH v528 3/3] study_chunk: avoid mutating regexp program within
708da5
 GOSUB
708da5
708da5
gh16947 and gh17743: studying GOSUB may restudy in an inner call
708da5
(via a mix of recursion and enframing) something that an outer call
708da5
is in the middle of looking at.  Let the outer frame deal with it.
708da5
708da5
(CVE-2020-12723)
708da5
---
708da5
 embed.fnc  |  2 +-
708da5
 embed.h    |  2 +-
708da5
 proto.h    |  2 +-
708da5
 regcomp.c  | 48 ++++++++++++++++++++++++++++++++----------------
708da5
 t/re/pat.t | 26 +++++++++++++++++++++++++-
708da5
 5 files changed, 60 insertions(+), 20 deletions(-)
708da5
708da5
diff --git a/embed.fnc b/embed.fnc
708da5
index cf89277163..4b1ba28277 100644
708da5
--- a/embed.fnc
708da5
+++ b/embed.fnc
708da5
@@ -2397,7 +2397,7 @@ Es	|SSize_t|study_chunk	|NN RExC_state_t *pRExC_state \
708da5
 				|NULLOK struct scan_data_t *data \
708da5
                                 |I32 stopparen|U32 recursed_depth \
708da5
 				|NULLOK regnode_ssc *and_withp \
708da5
-				|U32 flags|U32 depth
708da5
+				|U32 flags|U32 depth|bool was_mutate_ok
708da5
 EsRn	|U32	|add_data	|NN RExC_state_t* const pRExC_state \
708da5
 				|NN const char* const s|const U32 n
708da5
 rs	|void	|re_croak2	|bool utf8|NN const char* pat1|NN const char* pat2|...
708da5
diff --git a/embed.h b/embed.h
708da5
index 886551ce5c..50fcabc140 100644
708da5
--- a/embed.h
708da5
+++ b/embed.h
708da5
@@ -1075,7 +1075,7 @@
708da5
 #define ssc_is_cp_posixl_init	S_ssc_is_cp_posixl_init
708da5
 #define ssc_or(a,b,c)		S_ssc_or(aTHX_ a,b,c)
708da5
 #define ssc_union(a,b,c)	S_ssc_union(aTHX_ a,b,c)
708da5
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k)	S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
708da5
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l)	S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
708da5
 #  endif
708da5
 #  if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
708da5
 #define _invlist_dump(a,b,c,d)	Perl__invlist_dump(aTHX_ a,b,c,d)
708da5
diff --git a/proto.h b/proto.h
708da5
index d3f8802c1d..e276f69bd1 100644
708da5
--- a/proto.h
708da5
+++ b/proto.h
708da5
@@ -5258,7 +5258,7 @@ PERL_STATIC_INLINE void	S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, c
708da5
 #define PERL_ARGS_ASSERT_SSC_UNION	\
708da5
 	assert(ssc); assert(invlist)
708da5
 #endif
708da5
-STATIC SSize_t	S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
708da5
+STATIC SSize_t	S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
708da5
 #define PERL_ARGS_ASSERT_STUDY_CHUNK	\
708da5
 	assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
708da5
 #endif
708da5
diff --git a/regcomp.c b/regcomp.c
708da5
index 0a9c6a8085..e66032a16a 100644
708da5
--- a/regcomp.c
708da5
+++ b/regcomp.c
708da5
@@ -110,6 +110,7 @@ typedef struct scan_frame {
708da5
     regnode *next_regnode;      /* next node to process when last is reached */
708da5
     U32 prev_recursed_depth;
708da5
     I32 stopparen;              /* what stopparen do we use */
708da5
+    bool in_gosub;              /* this or an outer frame is for GOSUB */
708da5
     U32 is_top_frame;           /* what flags do we use? */
708da5
 
708da5
     struct scan_frame *this_prev_frame; /* this previous frame */
708da5
@@ -4102,7 +4103,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
 			I32 stopparen,
708da5
                         U32 recursed_depth,
708da5
 			regnode_ssc *and_withp,
708da5
-			U32 flags, U32 depth)
708da5
+			U32 flags, U32 depth, bool was_mutate_ok)
708da5
 			/* scanp: Start here (read-write). */
708da5
 			/* deltap: Write maxlen-minlen here. */
708da5
 			/* last: Stop before this one. */
708da5
@@ -4179,6 +4180,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
                                    node length to get a real minimum (because
708da5
                                    the folded version may be shorter) */
708da5
 	bool unfolded_multi_char = FALSE;
708da5
+        /* avoid mutating ops if we are anywhere within the recursed or
708da5
+         * enframed handling for a GOSUB: the outermost level will handle it.
708da5
+         */
708da5
+        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
708da5
 	/* Peephole optimizer: */
708da5
         DEBUG_STUDYDATA("Peep:", data, depth);
708da5
         DEBUG_PEEP("Peep", scan, depth);
708da5
@@ -4189,7 +4194,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
          * parsing code, as each (?:..) is handled by a different invocation of
708da5
          * reg() -- Yves
708da5
          */
708da5
-        JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
708da5
+        if (mutate_ok)
708da5
+            JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
708da5
 
708da5
 	/* Follow the next-chain of the current node and optimize
708da5
 	   away all the NOTHINGs from it.  */
708da5
@@ -4238,7 +4244,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
              * NOTE we dont use the return here! */
708da5
             (void)study_chunk(pRExC_state, &scan, &minlen,
708da5
                               &deltanext, next, &data_fake, stopparen,
708da5
-                              recursed_depth, NULL, f, depth+1);
708da5
+                              recursed_depth, NULL, f, depth+1, mutate_ok);
708da5
 
708da5
             scan = next;
708da5
         } else
708da5
@@ -4305,7 +4311,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
 		    /* we suppose the run is continuous, last=next...*/
708da5
 		    minnext = study_chunk(pRExC_state, &scan, minlenp,
708da5
                                       &deltanext, next, &data_fake, stopparen,
708da5
-                                      recursed_depth, NULL, f,depth+1);
708da5
+                                      recursed_depth, NULL, f, depth+1,
708da5
+                                      mutate_ok);
708da5
 
708da5
 		    if (min1 > minnext)
708da5
 			min1 = minnext;
708da5
@@ -4372,9 +4379,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
 		    }
708da5
 		}
708da5
 
708da5
-                if (PERL_ENABLE_TRIE_OPTIMISATION &&
708da5
-                        OP( startbranch ) == BRANCH )
708da5
-                {
708da5
+                if (PERL_ENABLE_TRIE_OPTIMISATION
708da5
+                    && OP(startbranch) == BRANCH
708da5
+                    && mutate_ok
708da5
+                ) {
708da5
 		/* demq.
708da5
 
708da5
                    Assuming this was/is a branch we are dealing with: 'scan'
708da5
@@ -4825,6 +4833,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
                 newframe->stopparen = stopparen;
708da5
                 newframe->prev_recursed_depth = recursed_depth;
708da5
                 newframe->this_prev_frame= frame;
708da5
+                newframe->in_gosub = (
708da5
+                    (frame && frame->in_gosub) || OP(scan) == GOSUB
708da5
+                );
708da5
 
708da5
                 DEBUG_STUDYDATA("frame-new:",data,depth);
708da5
                 DEBUG_PEEP("fnew", scan, depth);
708da5
@@ -5043,7 +5054,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
                                   (mincount == 0
708da5
                                    ? (f & ~SCF_DO_SUBSTR)
708da5
                                    : f)
708da5
-                                  ,depth+1);
708da5
+                                  , depth+1, mutate_ok);
708da5
 
708da5
 		if (flags & SCF_DO_STCLASS)
708da5
 		    data->start_class = oclass;
708da5
@@ -5105,7 +5116,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
 		if (  OP(oscan) == CURLYX && data
708da5
 		      && data->flags & SF_IN_PAR
708da5
 		      && !(data->flags & SF_HAS_EVAL)
708da5
-		      && !deltanext && minnext == 1 ) {
708da5
+		      && !deltanext && minnext == 1
708da5
+                      && mutate_ok
708da5
+                ) {
708da5
 		    /* Try to optimize to CURLYN.  */
708da5
 		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
708da5
 		    regnode * const nxt1 = nxt;
708da5
@@ -5151,10 +5164,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
 		      && !(data->flags & SF_HAS_EVAL)
708da5
 		      && !deltanext	/* atom is fixed width */
708da5
 		      && minnext != 0	/* CURLYM can't handle zero width */
708da5
-
708da5
                          /* Nor characters whose fold at run-time may be
708da5
                           * multi-character */
708da5
                       && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
708da5
+                      && mutate_ok
708da5
 		) {
708da5
 		    /* XXXX How to optimize if data == 0? */
708da5
 		    /* Optimize to a simpler form.  */
708da5
@@ -5201,7 +5214,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
708da5
 #endif
708da5
 			/* Optimize again: */
708da5
 			study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
708da5
-                                    NULL, stopparen, recursed_depth, NULL, 0,depth+1);
708da5
+                                    NULL, stopparen, recursed_depth, NULL, 0,
708da5
+                                    depth+1, mutate_ok);
708da5
 		    }
708da5
 		    else
708da5
 			oscan->flags = 0;
708da5
@@ -5592,7 +5606,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
708da5
                 nscan = NEXTOPER(NEXTOPER(scan));
708da5
                 minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
708da5
                                       last, &data_fake, stopparen,
708da5
-                                      recursed_depth, NULL, f, depth+1);
708da5
+                                      recursed_depth, NULL, f, depth+1,
708da5
+                                      mutate_ok);
708da5
                 if (scan->flags) {
708da5
                     if (deltanext) {
708da5
 			FAIL("Variable length lookbehind not implemented");
708da5
@@ -5681,7 +5696,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
708da5
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
708da5
                                         &deltanext, last, &data_fake,
708da5
                                         stopparen, recursed_depth, NULL,
708da5
-                                        f,depth+1);
708da5
+                                        f, depth+1, mutate_ok);
708da5
                 if (scan->flags) {
708da5
                     if (deltanext) {
708da5
 			FAIL("Variable length lookbehind not implemented");
708da5
@@ -5841,7 +5856,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
708da5
                            branches even though they arent otherwise used. */
708da5
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
708da5
                             &deltanext, (regnode *)nextbranch, &data_fake,
708da5
-                            stopparen, recursed_depth, NULL, f,depth+1);
708da5
+                            stopparen, recursed_depth, NULL, f, depth+1,
708da5
+                            mutate_ok);
708da5
                     }
708da5
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
708da5
                         nextbranch= regnext((regnode*)nextbranch);
708da5
@@ -7524,7 +7540,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
708da5
             &data, -1, 0, NULL,
708da5
             SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
708da5
                           | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
708da5
-            0);
708da5
+            0, TRUE);
708da5
 
708da5
 
708da5
         CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
708da5
@@ -7670,7 +7686,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
708da5
             SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
708da5
                                                       ? SCF_TRIE_DOING_RESTUDY
708da5
                                                       : 0),
708da5
-            0);
708da5
+            0, TRUE);
708da5
 
708da5
         CHECK_RESTUDY_GOTO_butfirst(NOOP);
708da5
 
708da5
diff --git a/t/re/pat.t b/t/re/pat.t
708da5
index 1d98fe77d7..1488259b02 100644
708da5
--- a/t/re/pat.t
708da5
+++ b/t/re/pat.t
708da5
@@ -23,7 +23,7 @@ BEGIN {
708da5
     skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
708da5
     skip_all_without_unicode_tables();
708da5
 
708da5
-plan tests => 840;  # Update this when adding/deleting tests.
708da5
+plan tests => 844;  # Update this when adding/deleting tests.
708da5
 
708da5
 run_tests() unless caller;
708da5
 
708da5
@@ -1929,6 +1929,30 @@ EOP
708da5
         fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly");
708da5
     }
708da5
 
708da5
+    # gh16947: test regexp corruption (GOSUB)
708da5
+    {
708da5
+        fresh_perl_is(q{
708da5
+            'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
708da5
+        }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
708da5
+    }
708da5
+    # gh16947: test fix doesn't break SUSPEND
708da5
+    {
708da5
+        fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
708da5
+                'ok', {}, "gh16947: test fix doesn't break SUSPEND");
708da5
+    }
708da5
+
708da5
+    # gh17743: more regexp corruption via GOSUB
708da5
+    {
708da5
+        fresh_perl_is(q{
708da5
+            "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
708da5
+        }, 'ok', {}, 'gh17743: test regexp corruption (1)');
708da5
+
708da5
+        fresh_perl_is(q{
708da5
+            "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
708da5
+            print "ok"
708da5
+        }, 'ok', {}, 'gh17743: test regexp corruption (2)');
708da5
+    }
708da5
+
708da5
 } # End of sub run_tests
708da5
 
708da5
 1;
708da5
-- 
708da5
2.20.1
708da5