From d6f7a97d2c1dfcedd728d8b2ac1a07e953922950 Mon Sep 17 00:00:00 2001
From: Hugo van der Sanden <hv@crypt.org>
Date: Fri, 18 Dec 2020 14:55:16 +0100
Subject: [PATCH] study_chunk: avoid mutating regexp program within GOSUB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
gh16947 and gh17743: studying GOSUB may restudy in an inner call
(via a mix of recursion and enframing) something that an outer call
is in the middle of looking at. Let the outer frame deal with it.
(CVE-2020-12723)
Jitka Plesnikova, Petr Pisar: Ported from
3f4ba871d2d397dcd4386ed75e05353c36135c29 to 5.16.3.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
embed.fnc | 2 +-
embed.h | 2 +-
proto.h | 2 +-
regcomp.c | 42 ++++++++++++++++++++++++++++++------------
t/re/pat.t | 26 +++++++++++++++++++++++++-
5 files changed, 58 insertions(+), 16 deletions(-)
diff --git a/embed.fnc b/embed.fnc
index 9546555..019afa9 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1951,7 +1951,7 @@ Es |I32 |study_chunk |NN struct RExC_state_t *pRExC_state \
|NULLOK struct scan_data_t *data \
|I32 stopparen|NULLOK U8* recursed \
|NULLOK struct regnode_charclass_class *and_withp \
- |U32 flags|U32 depth
+ |U32 flags|U32 depth|bool was_mutate_ok
EsRn |U32 |add_data |NN struct RExC_state_t *pRExC_state|U32 n \
|NN const char *s
rs |void |re_croak2 |NN const char* pat1|NN const char* pat2|...
diff --git a/embed.h b/embed.h
index f7db1e0..75f326e 100644
--- a/embed.h
+++ b/embed.h
@@ -952,7 +952,7 @@
#define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d)
#define set_regclass_bit(a,b,c,d,e) S_set_regclass_bit(aTHX_ a,b,c,d,e)
#define set_regclass_bit_fold(a,b,c,d,e) S_set_regclass_bit_fold(aTHX_ a,b,c,d,e)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
#define _core_swash_init(a,b,c,d,e,f,g,h) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g,h)
diff --git a/proto.h b/proto.h
index 143eee0..1b2ece6 100644
--- a/proto.h
+++ b/proto.h
@@ -6597,7 +6597,7 @@ STATIC U8 S_set_regclass_bit_fold(pTHX_ struct RExC_state_t *pRExC_state, regnod
#define PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD \
assert(pRExC_state); assert(node); assert(invlist_ptr); assert(alternate_ptr)
-STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth)
+STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth, bool was_mutate_ok)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3)
diff --git a/regcomp.c b/regcomp.c
index 2254159..57505f3 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2934,6 +2934,7 @@ typedef struct scan_frame {
regnode *next; /* next node to process when last is reached */
struct scan_frame *prev; /*previous frame*/
I32 stop; /* what stopparen do we use */
+ bool in_gosub; /* this or an outer frame is for GOSUB */
} scan_frame;
@@ -2975,7 +2976,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
I32 stopparen,
U8* recursed,
struct regnode_charclass_class *and_withp,
- U32 flags, U32 depth)
+ U32 flags, U32 depth, bool was_mutate_ok)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
@@ -3016,6 +3017,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
length to get a real minimum (because the
folded version may be shorter) */
bool has_exactf_sharp_s = FALSE;
+ /* avoid mutating ops if we are anywhere within the recursed or
+ * enframed handling for a GOSUB: the outermost level will handle it.
+ */
+ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
+
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep:", data,depth);
DEBUG_PEEP("Peep",scan,depth);
@@ -3023,7 +3029,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* Its not clear to khw or hv why this is done here, and not in the
* clauses that deal with EXACT nodes. khw's guess is that it's
* because of a previous design */
- JOIN_EXACT(scan,&min_subtract, &has_exactf_sharp_s, 0);
+ if (mutate_ok)
+ JOIN_EXACT(scan,&min_subtract, &has_exactf_sharp_s, 0);
+
/* Follow the next-chain of the current node and optimize
away all the NOTHINGs from it. */
@@ -3100,7 +3108,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* we suppose the run is continuous, last=next...*/
minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
next, &data_fake,
- stopparen, recursed, NULL, f,depth+1);
+ stopparen, recursed, NULL, f,depth+1,
+ mutate_ok);
if (min1 > minnext)
min1 = minnext;
if (max1 < minnext + deltanext)
@@ -3161,7 +3170,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}
- if (PERL_ENABLE_TRIE_OPTIMISATION && OP( startbranch ) == BRANCH ) {
+ if (PERL_ENABLE_TRIE_OPTIMISATION
+ && OP( startbranch ) == BRANCH
+ && mutate_ok) {
+
/* demq.
Assuming this was/is a branch we are dealing with: 'scan' now
@@ -3481,6 +3493,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
newframe->last = last;
newframe->stop = stopparen;
newframe->prev = frame;
+ newframe->in_gosub = (
+ (frame && frame->in_gosub) || (OP(scan) == GOSUB || OP(scan) == GOSTART)
+ );
frame = newframe;
scan = start;
@@ -3792,7 +3807,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
last, data, stopparen, recursed, NULL,
(mincount == 0
- ? (f & ~SCF_DO_SUBSTR) : f),depth+1);
+ ? (f & ~SCF_DO_SUBSTR) : f),depth+1,mutate_ok);
if (flags & SCF_DO_STCLASS)
data->start_class = oclass;
@@ -3844,7 +3859,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if ( OP(oscan) == CURLYX && data
&& data->flags & SF_IN_PAR
&& !(data->flags & SF_HAS_EVAL)
- && !deltanext && minnext == 1 ) {
+ && !deltanext && minnext == 1
+ && mutate_ok
+ ) {
/* Try to optimize to CURLYN. */
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
regnode * const nxt1 = nxt;
@@ -3890,6 +3907,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
&& !(data->flags & SF_HAS_EVAL)
&& !deltanext /* atom is fixed width */
&& minnext != 0 /* CURLYM can't handle zero width */
+ && mutate_ok
) {
/* XXXX How to optimize if data == 0? */
/* Optimize to a simpler form. */
@@ -3936,7 +3954,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
#endif
/* Optimize again: */
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
- NULL, stopparen, recursed, NULL, 0,depth+1);
+ NULL, stopparen, recursed, NULL, 0,depth+1,mutate_ok);
}
else
oscan->flags = 0;
@@ -4376,7 +4394,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
next = regnext(scan);
nscan = NEXTOPER(NEXTOPER(scan));
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
- last, &data_fake, stopparen, recursed, NULL, f, depth+1);
+ last, &data_fake, stopparen, recursed, NULL, f, depth+1,mutate_ok);
if (scan->flags) {
if (deltanext) {
FAIL("Variable length lookbehind not implemented");
@@ -4462,7 +4480,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
nscan = NEXTOPER(NEXTOPER(scan));
*minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext,
- last, &data_fake, stopparen, recursed, NULL, f,depth+1);
+ last, &data_fake, stopparen, recursed, NULL, f,depth+1,mutate_ok);
if (scan->flags) {
if (deltanext) {
FAIL("Variable length lookbehind not implemented");
@@ -4625,7 +4643,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
*/
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
- stopparen, recursed, NULL, f,depth+1);
+ stopparen, recursed, NULL, f,depth+1,mutate_ok);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
@@ -5442,7 +5460,7 @@ reStudy:
minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */
&data, -1, NULL, NULL,
- SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
+ SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0,TRUE);
CHECK_RESTUDY_GOTO;
@@ -5627,7 +5645,7 @@ reStudy:
minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
- &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0);
+ &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0,TRUE);
CHECK_RESTUDY_GOTO;
diff --git a/t/re/pat.t b/t/re/pat.t
index faddbc5..e328fbd 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -19,7 +19,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 472; # Update this when adding/deleting tests.
+plan tests => 476; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1260,6 +1260,30 @@ EOP
use re '/aa';
unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa";
}
+ # gh16947: test regexp corruption (GOSUB)
+ {
+ fresh_perl_is(q{
+ 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
+ }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
+ }
+ # gh16947: test fix doesn't break SUSPEND
+ {
+ fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
+ 'ok', {}, "gh16947: test fix doesn't break SUSPEND");
+ }
+
+ # gh17743: more regexp corruption via GOSUB
+ {
+ fresh_perl_is(q{
+ "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
+ }, 'ok', {}, 'gh17743: test regexp corruption (1)');
+
+ fresh_perl_is(q{
+ "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
+ print "ok"
+ }, 'ok', {}, 'gh17743: test regexp corruption (2)');
+ }
+
} # End of sub run_tests
1;
--
2.26.2