f6ea51
From dc1f8f6b581a8e4efbb782398ab3e7c3a52b062f Mon Sep 17 00:00:00 2001
f6ea51
From: Karl Williamson <khw@cpan.org>
f6ea51
Date: Tue, 8 May 2018 12:13:18 -0600
f6ea51
Subject: [PATCH] PATCH: [perl #133185] Infinite loop in qr//
f6ea51
MIME-Version: 1.0
f6ea51
Content-Type: text/plain; charset=UTF-8
f6ea51
Content-Transfer-Encoding: 8bit
f6ea51
f6ea51
This loop was inadvertently introduced as part of patches to fix
f6ea51
(perl #132227 CVE-2018-6797] heap-buffer-overflow".  The commit in 5.27
f6ea51
responsible was f8fb8615ddc5a80e3bbd4386a8914497f921b62d.
f6ea51
f6ea51
To be vulnerable, the pattern must start out as /d (hence no use 5.012
f6ea51
or higher), and then there must be something that implicitly forces /u
f6ea51
(which the \pp does in the test case added by this patch), and then
f6ea51
(?aa), and then the code point \xDF.  (German Sharp S).  The /i must be
f6ea51
in effect by the time the DF is encountered, but it needn't come in the
f6ea51
(?aa) which the test does.
f6ea51
f6ea51
The problem is that the conditional that is testing that we switched
f6ea51
away from /d rules is assuming that this happened during the
f6ea51
construction of the current EXACTFish node.  The comments I wrote
f6ea51
indicate this assumption.  But this example shows that the switch can
f6ea51
come before this node started getting constructed, and so it loops.
f6ea51
f6ea51
The patch explicitly saves the state at the beginning of this node's
f6ea51
construction, and only retries if it changed during that construction.
f6ea51
Therefore the next time through, it will see that it hasn't changed
f6ea51
since the previous time, and won't loop.
f6ea51
f6ea51
Petr Písař: Ported to 5.26.2 from:
f6ea51
f6ea51
commit 0b9cb33b146b3eb55634853f883a880771dd1413
f6ea51
Author: Karl Williamson <khw@cpan.org>
f6ea51
Date:   Tue May 8 12:13:18 2018 -0600
f6ea51
f6ea51
    PATCH: [perl #133185] Infinite loop in qr//
f6ea51
f6ea51
Signed-off-by: Petr Písař <ppisar@redhat.com>
f6ea51
---
f6ea51
 regcomp.c    | 10 +++++++++-
f6ea51
 t/re/speed.t |  5 ++++-
f6ea51
 2 files changed, 13 insertions(+), 2 deletions(-)
f6ea51
f6ea51
diff --git a/regcomp.c b/regcomp.c
f6ea51
index 845e660..18fa465 100644
f6ea51
--- a/regcomp.c
f6ea51
+++ b/regcomp.c
f6ea51
@@ -13100,6 +13100,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
f6ea51
             bool maybe_exactfu = PASS2
f6ea51
                                && (node_type == EXACTF || node_type == EXACTFL);
f6ea51
 
f6ea51
+            /* To see if RExC_uni_semantics changes during parsing of the node.
f6ea51
+             * */
f6ea51
+            bool uni_semantics_at_node_start;
f6ea51
+
f6ea51
             /* If a folding node contains only code points that don't
f6ea51
              * participate in folds, it can be changed into an EXACT node,
f6ea51
              * which allows the optimizer more things to look for */
f6ea51
@@ -13147,6 +13151,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
f6ea51
                    || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
f6ea51
                    || UTF8_IS_START(UCHARAT(RExC_parse)));
f6ea51
 
f6ea51
+            uni_semantics_at_node_start = RExC_uni_semantics;
f6ea51
+
f6ea51
             /* Here, we have a literal character.  Find the maximal string of
f6ea51
              * them in the input that we can fit into a single EXACTish node.
f6ea51
              * We quit at the first non-literal or when the node gets full */
f6ea51
@@ -13550,7 +13556,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
f6ea51
                              * didn't think it needed to reparse.  But this
f6ea51
                              * sharp s now does indicate the need for
f6ea51
                              * reparsing. */
f6ea51
-                            if (RExC_uni_semantics) {
f6ea51
+                            if (   uni_semantics_at_node_start
f6ea51
+                                     != RExC_uni_semantics)
f6ea51
+                            {
f6ea51
                                 p = oldp;
f6ea51
                                 goto loopdone;
f6ea51
                             }
f6ea51
diff --git a/t/re/speed.t b/t/re/speed.t
f6ea51
index 4a4830f..9a57de1 100644
f6ea51
--- a/t/re/speed.t
f6ea51
+++ b/t/re/speed.t
f6ea51
@@ -24,7 +24,7 @@ BEGIN {
f6ea51
 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
f6ea51
 skip_all_without_unicode_tables();
f6ea51
 
f6ea51
-plan tests => 58;  #** update watchdog timeouts proportionally when adding tests
f6ea51
+plan tests => 59;  #** update watchdog timeouts proportionally when adding tests
f6ea51
 
f6ea51
 use strict;
f6ea51
 use warnings;
f6ea51
@@ -156,6 +156,9 @@ PROG
f6ea51
         ok( $elapsed <= 1, "should not COW on long string with substr and m//g");
f6ea51
     }
f6ea51
 
f6ea51
+    # [perl #133185] Infinite loop
f6ea51
+    like("!\xdf", eval 'qr/\pp(?aai)\xdf/',
f6ea51
+         'Compiling qr/\pp(?aai)\xdf/ doesn\'t loop');
f6ea51
 
f6ea51
 } # End of sub run_tests
f6ea51
 
f6ea51
-- 
f6ea51
2.14.3
f6ea51