dcb3b7
From a08fa6fd157fd0d61da7f20f07b939fbc302c2c6 Mon Sep 17 00:00:00 2001
dcb3b7
From: Hugo van der Sanden <hv@crypt.org>
dcb3b7
Date: Wed, 5 Oct 2016 12:56:05 +0100
dcb3b7
Subject: [PATCH] [perl #129377] don't read past start of string for unmatched
dcb3b7
 backref
dcb3b7
MIME-Version: 1.0
dcb3b7
Content-Type: text/plain; charset=UTF-8
dcb3b7
Content-Transfer-Encoding: 8bit
dcb3b7
dcb3b7
Ported to 5.24.1:
dcb3b7
dcb3b7
commit 2dfc11ec3af312f4fa3eb244077c79dbb5fc2d85
dcb3b7
Author: Hugo van der Sanden <hv@crypt.org>
dcb3b7
Date:   Wed Oct 5 12:56:05 2016 +0100
dcb3b7
dcb3b7
    [perl #129377] don't read past start of string for unmatched backref
dcb3b7
dcb3b7
    We can have (start, end) == (0, -1) for an unmatched backref, we must
dcb3b7
    check for that.
dcb3b7
dcb3b7
Signed-off-by: Petr Písař <ppisar@redhat.com>
dcb3b7
---
dcb3b7
 regexec.c  | 10 ++++++----
dcb3b7
 t/re/pat.t | 16 +++++++++++++++-
dcb3b7
 2 files changed, 21 insertions(+), 5 deletions(-)
dcb3b7
dcb3b7
diff --git a/regexec.c b/regexec.c
dcb3b7
index a5d5db4..a7bc0c3 100644
dcb3b7
--- a/regexec.c
dcb3b7
+++ b/regexec.c
dcb3b7
@@ -5179,6 +5179,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
dcb3b7
     regnode *next;
dcb3b7
     U32 n = 0;	/* general value; init to avoid compiler warning */
dcb3b7
     SSize_t ln = 0; /* len or last;  init to avoid compiler warning */
dcb3b7
+    SSize_t endref = 0; /* offset of end of backref when ln is start */
dcb3b7
     char *locinput = startpos;
dcb3b7
     char *pushinput; /* where to continue after a PUSH */
dcb3b7
     I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
dcb3b7
@@ -6489,10 +6490,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
dcb3b7
 
dcb3b7
 	  do_nref_ref_common:
dcb3b7
 	    ln = rex->offs[n].start;
dcb3b7
+	    endref = rex->offs[n].end;
dcb3b7
 	    reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
dcb3b7
-	    if (rex->lastparen < n || ln == -1)
dcb3b7
+	    if (rex->lastparen < n || ln == -1 || endref == -1)
dcb3b7
 		sayNO;			/* Do not match unless seen CLOSEn. */
dcb3b7
-	    if (ln == rex->offs[n].end)
dcb3b7
+	    if (ln == endref)
dcb3b7
 		break;
dcb3b7
 
dcb3b7
 	    s = reginfo->strbeg + ln;
dcb3b7
@@ -6506,7 +6508,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
dcb3b7
                     * not going off the end given by reginfo->strend, and
dcb3b7
                     * returns in <limit> upon success, how much of the
dcb3b7
                     * current input was matched */
dcb3b7
-		if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
dcb3b7
+		if (! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target,
dcb3b7
 				    locinput, &limit, 0, utf8_target, utf8_fold_flags))
dcb3b7
 		{
dcb3b7
 		    sayNO;
dcb3b7
@@ -6521,7 +6523,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
dcb3b7
 		(type == REF ||
dcb3b7
 		 UCHARAT(s) != fold_array[nextchr]))
dcb3b7
 		sayNO;
dcb3b7
-	    ln = rex->offs[n].end - ln;
dcb3b7
+	    ln = endref - ln;
dcb3b7
 	    if (locinput + ln > reginfo->strend)
dcb3b7
 		sayNO;
dcb3b7
 	    if (ln > 1 && (type == REF
dcb3b7
diff --git a/t/re/pat.t b/t/re/pat.t
dcb3b7
index 4aa77cf..749edd0 100644
dcb3b7
--- a/t/re/pat.t
dcb3b7
+++ b/t/re/pat.t
dcb3b7
@@ -23,7 +23,7 @@ BEGIN {
dcb3b7
     skip_all_without_unicode_tables();
dcb3b7
 }
dcb3b7
 
dcb3b7
-plan tests => 791;  # Update this when adding/deleting tests.
dcb3b7
+plan tests => 792;  # Update this when adding/deleting tests.
dcb3b7
 
dcb3b7
 run_tests() unless caller;
dcb3b7
 
dcb3b7
@@ -1765,6 +1765,20 @@ EOP
dcb3b7
             utf8::upgrade($str);
dcb3b7
             ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - utf8 case" );
dcb3b7
         }
dcb3b7
+    {
dcb3b7
+	# [perl #129377] backref to an unmatched capture should not cause
dcb3b7
+	# reading before start of string.
dcb3b7
+	SKIP: {
dcb3b7
+	    skip "no re-debug under miniperl" if is_miniperl;
dcb3b7
+	    my $prog = <<'EOP';
dcb3b7
+use re qw(Debug EXECUTE);
dcb3b7
+"x" =~ m{ () y | () \1 }x;
dcb3b7
+EOP
dcb3b7
+	    fresh_perl_like($prog, qr{
dcb3b7
+		\A (?! .* ^ \s+ - )
dcb3b7
+	    }msx, { stderr => 1 }, "Offsets in debug output are not negative");
dcb3b7
+	}
dcb3b7
+    }
dcb3b7
 } # End of sub run_tests
dcb3b7
 
dcb3b7
 1;
dcb3b7
-- 
dcb3b7
2.7.4
dcb3b7