2eef5a
From 0a320d753fe7fca03df259a4dfd8e641e51edaa8 Mon Sep 17 00:00:00 2001
2eef5a
From: Hugo van der Sanden <hv@crypt.org>
2eef5a
Date: Tue, 18 Feb 2020 13:51:16 +0000
2eef5a
Subject: [PATCH 1/2] study_chunk: extract rck_elide_nothing
2eef5a
MIME-Version: 1.0
2eef5a
Content-Type: text/plain; charset=UTF-8
2eef5a
Content-Transfer-Encoding: 8bit
2eef5a
2eef5a
(CVE-2020-10878)
2eef5a
2eef5a
(cherry picked from commit 93dee06613d4e1428fb10905ce1c3c96f53113dc)
2eef5a
Signed-off-by: Petr Písař <ppisar@redhat.com>
2eef5a
---
2eef5a
 embed.fnc |  1 +
2eef5a
 embed.h   |  1 +
2eef5a
 proto.h   |  3 +++
2eef5a
 regcomp.c | 70 ++++++++++++++++++++++++++++++++++---------------------
2eef5a
 4 files changed, 48 insertions(+), 27 deletions(-)
2eef5a
2eef5a
diff --git a/embed.fnc b/embed.fnc
2eef5a
index aedb4baef1..d7cd04d3fc 100644
2eef5a
--- a/embed.fnc
2eef5a
+++ b/embed.fnc
2eef5a
@@ -2481,6 +2481,7 @@ Es	|SSize_t|study_chunk	|NN RExC_state_t *pRExC_state \
2eef5a
                                 |I32 stopparen|U32 recursed_depth \
2eef5a
 				|NULLOK regnode_ssc *and_withp \
2eef5a
 				|U32 flags|U32 depth
2eef5a
+Es	|void	|rck_elide_nothing|NN regnode *node
2eef5a
 EsR	|SV *	|get_ANYOFM_contents|NN const regnode * n
2eef5a
 EsRn	|U32	|add_data	|NN RExC_state_t* const pRExC_state \
2eef5a
 				|NN const char* const s|const U32 n
2eef5a
diff --git a/embed.h b/embed.h
2eef5a
index 75c91f77f4..356a8b98d9 100644
2eef5a
--- a/embed.h
2eef5a
+++ b/embed.h
2eef5a
@@ -1208,6 +1208,7 @@
2eef5a
 #define parse_lparen_question_flags(a)	S_parse_lparen_question_flags(aTHX_ a)
2eef5a
 #define parse_uniprop_string(a,b,c,d,e,f,g,h,i)	Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i)
2eef5a
 #define populate_ANYOF_from_invlist(a,b)	S_populate_ANYOF_from_invlist(aTHX_ a,b)
2eef5a
+#define rck_elide_nothing(a)	S_rck_elide_nothing(aTHX_ a)
2eef5a
 #define reg(a,b,c,d)		S_reg(aTHX_ a,b,c,d)
2eef5a
 #define reg2Lanode(a,b,c,d)	S_reg2Lanode(aTHX_ a,b,c,d)
2eef5a
 #define reg_node(a,b)		S_reg_node(aTHX_ a,b)
2eef5a
diff --git a/proto.h b/proto.h
2eef5a
index 141ddbaee6..f316fe134e 100644
2eef5a
--- a/proto.h
2eef5a
+++ b/proto.h
2eef5a
@@ -5543,6 +5543,9 @@ PERL_CALLCONV SV *	Perl_parse_uniprop_string(pTHX_ const char * const name, cons
2eef5a
 STATIC void	S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr);
2eef5a
 #define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST	\
2eef5a
 	assert(node); assert(invlist_ptr)
2eef5a
+STATIC void	S_rck_elide_nothing(pTHX_ regnode *node);
2eef5a
+#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING	\
2eef5a
+	assert(node)
2eef5a
 PERL_STATIC_NO_RET void	S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2, ...)
2eef5a
 			__attribute__noreturn__;
2eef5a
 #define PERL_ARGS_ASSERT_RE_CROAK2	\
2eef5a
diff --git a/regcomp.c b/regcomp.c
2eef5a
index 5f86be8086..4ba2980db6 100644
2eef5a
--- a/regcomp.c
2eef5a
+++ b/regcomp.c
2eef5a
@@ -4450,6 +4450,44 @@ S_unwind_scan_frames(pTHX_ const void *p)
2eef5a
     } while (f);
2eef5a
 }
2eef5a
 
2eef5a
+/* Follow the next-chain of the current node and optimize away
2eef5a
+   all the NOTHINGs from it.
2eef5a
+ */
2eef5a
+STATIC void
2eef5a
+S_rck_elide_nothing(pTHX_ regnode *node)
2eef5a
+{
2eef5a
+    dVAR;
2eef5a
+
2eef5a
+    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
2eef5a
+
2eef5a
+    if (OP(node) != CURLYX) {
2eef5a
+        const int max = (reg_off_by_arg[OP(node)]
2eef5a
+                        ? I32_MAX
2eef5a
+                          /* I32 may be smaller than U16 on CRAYs! */
2eef5a
+                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
2eef5a
+        int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
2eef5a
+        int noff;
2eef5a
+        regnode *n = node;
2eef5a
+
2eef5a
+        /* Skip NOTHING and LONGJMP. */
2eef5a
+        while (
2eef5a
+            (n = regnext(n))
2eef5a
+            && (
2eef5a
+                (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
2eef5a
+                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
2eef5a
+            )
2eef5a
+            && off + noff < max
2eef5a
+        ) {
2eef5a
+            off += noff;
2eef5a
+        }
2eef5a
+        if (reg_off_by_arg[OP(node)])
2eef5a
+            ARG(node) = off;
2eef5a
+        else
2eef5a
+            NEXT_OFF(node) = off;
2eef5a
+    }
2eef5a
+    return;
2eef5a
+}
2eef5a
+
2eef5a
 /* the return from this sub is the minimum length that could possibly match */
2eef5a
 STATIC SSize_t
2eef5a
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
2eef5a
@@ -4550,28 +4588,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
2eef5a
          */
2eef5a
         JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
2eef5a
 
2eef5a
-	/* Follow the next-chain of the current node and optimize
2eef5a
-	   away all the NOTHINGs from it.  */
2eef5a
-	if (OP(scan) != CURLYX) {
2eef5a
-	    const int max = (reg_off_by_arg[OP(scan)]
2eef5a
-		       ? I32_MAX
2eef5a
-		       /* I32 may be smaller than U16 on CRAYs! */
2eef5a
-		       : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
2eef5a
-	    int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
2eef5a
-	    int noff;
2eef5a
-	    regnode *n = scan;
2eef5a
-
2eef5a
-	    /* Skip NOTHING and LONGJMP. */
2eef5a
-	    while ((n = regnext(n))
2eef5a
-		   && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
2eef5a
-		       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
2eef5a
-		   && off + noff < max)
2eef5a
-		off += noff;
2eef5a
-	    if (reg_off_by_arg[OP(scan)])
2eef5a
-		ARG(scan) = off;
2eef5a
-	    else
2eef5a
-		NEXT_OFF(scan) = off;
2eef5a
-	}
2eef5a
+        /* Follow the next-chain of the current node and optimize
2eef5a
+           away all the NOTHINGs from it.
2eef5a
+         */
2eef5a
+        rck_elide_nothing(scan);
2eef5a
 
2eef5a
 	/* The principal pseudo-switch.  Cannot be a switch, since we
2eef5a
 	   look into several different things.  */
2eef5a
@@ -5745,11 +5765,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
2eef5a
 		if (data && (fl & SF_HAS_EVAL))
2eef5a
 		    data->flags |= SF_HAS_EVAL;
2eef5a
 	      optimize_curly_tail:
2eef5a
-		if (OP(oscan) != CURLYX) {
2eef5a
-		    while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
2eef5a
-			   && NEXT_OFF(next))
2eef5a
-			NEXT_OFF(oscan) += NEXT_OFF(next);
2eef5a
-		}
2eef5a
+		rck_elide_nothing(oscan);
2eef5a
 		continue;
2eef5a
 
2eef5a
 	    default:
2eef5a
-- 
2eef5a
2.25.4
2eef5a