b8c914
From dc1f8f6b581a8e4efbb782398ab3e7c3a52b062f Mon Sep 17 00:00:00 2001
b8c914
From: Karl Williamson <khw@cpan.org>
b8c914
Date: Tue, 8 May 2018 12:13:18 -0600
b8c914
Subject: [PATCH] PATCH: [perl #133185] Infinite loop in qr//
b8c914
MIME-Version: 1.0
b8c914
Content-Type: text/plain; charset=UTF-8
b8c914
Content-Transfer-Encoding: 8bit
b8c914
b8c914
This loop was inadvertently introduced as part of patches to fix
b8c914
(perl #132227 CVE-2018-6797] heap-buffer-overflow".  The commit in 5.27
b8c914
responsible was f8fb8615ddc5a80e3bbd4386a8914497f921b62d.
b8c914
b8c914
To be vulnerable, the pattern must start out as /d (hence no use 5.012
b8c914
or higher), and then there must be something that implicitly forces /u
b8c914
(which the \pp does in the test case added by this patch), and then
b8c914
(?aa), and then the code point \xDF.  (German Sharp S).  The /i must be
b8c914
in effect by the time the DF is encountered, but it needn't come in the
b8c914
(?aa) which the test does.
b8c914
b8c914
The problem is that the conditional that is testing that we switched
b8c914
away from /d rules is assuming that this happened during the
b8c914
construction of the current EXACTFish node.  The comments I wrote
b8c914
indicate this assumption.  But this example shows that the switch can
b8c914
come before this node started getting constructed, and so it loops.
b8c914
b8c914
The patch explicitly saves the state at the beginning of this node's
b8c914
construction, and only retries if it changed during that construction.
b8c914
Therefore the next time through, it will see that it hasn't changed
b8c914
since the previous time, and won't loop.
b8c914
b8c914
Petr Písař: Ported to 5.26.2 from:
b8c914
b8c914
commit 0b9cb33b146b3eb55634853f883a880771dd1413
b8c914
Author: Karl Williamson <khw@cpan.org>
b8c914
Date:   Tue May 8 12:13:18 2018 -0600
b8c914
b8c914
    PATCH: [perl #133185] Infinite loop in qr//
b8c914
b8c914
Signed-off-by: Petr Písař <ppisar@redhat.com>
b8c914
---
b8c914
 regcomp.c    | 10 +++++++++-
b8c914
 t/re/speed.t |  5 ++++-
b8c914
 2 files changed, 13 insertions(+), 2 deletions(-)
b8c914
b8c914
diff --git a/regcomp.c b/regcomp.c
b8c914
index 845e660..18fa465 100644
b8c914
--- a/regcomp.c
b8c914
+++ b/regcomp.c
b8c914
@@ -13100,6 +13100,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
b8c914
             bool maybe_exactfu = PASS2
b8c914
                                && (node_type == EXACTF || node_type == EXACTFL);
b8c914
 
b8c914
+            /* To see if RExC_uni_semantics changes during parsing of the node.
b8c914
+             * */
b8c914
+            bool uni_semantics_at_node_start;
b8c914
+
b8c914
             /* If a folding node contains only code points that don't
b8c914
              * participate in folds, it can be changed into an EXACT node,
b8c914
              * which allows the optimizer more things to look for */
b8c914
@@ -13147,6 +13151,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
b8c914
                    || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
b8c914
                    || UTF8_IS_START(UCHARAT(RExC_parse)));
b8c914
 
b8c914
+            uni_semantics_at_node_start = RExC_uni_semantics;
b8c914
+
b8c914
             /* Here, we have a literal character.  Find the maximal string of
b8c914
              * them in the input that we can fit into a single EXACTish node.
b8c914
              * We quit at the first non-literal or when the node gets full */
b8c914
@@ -13550,7 +13556,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
b8c914
                              * didn't think it needed to reparse.  But this
b8c914
                              * sharp s now does indicate the need for
b8c914
                              * reparsing. */
b8c914
-                            if (RExC_uni_semantics) {
b8c914
+                            if (   uni_semantics_at_node_start
b8c914
+                                     != RExC_uni_semantics)
b8c914
+                            {
b8c914
                                 p = oldp;
b8c914
                                 goto loopdone;
b8c914
                             }
b8c914
diff --git a/t/re/speed.t b/t/re/speed.t
b8c914
index 4a4830f..9a57de1 100644
b8c914
--- a/t/re/speed.t
b8c914
+++ b/t/re/speed.t
b8c914
@@ -24,7 +24,7 @@ BEGIN {
b8c914
 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
b8c914
 skip_all_without_unicode_tables();
b8c914
 
b8c914
-plan tests => 58;  #** update watchdog timeouts proportionally when adding tests
b8c914
+plan tests => 59;  #** update watchdog timeouts proportionally when adding tests
b8c914
 
b8c914
 use strict;
b8c914
 use warnings;
b8c914
@@ -156,6 +156,9 @@ PROG
b8c914
         ok( $elapsed <= 1, "should not COW on long string with substr and m//g");
b8c914
     }
b8c914
 
b8c914
+    # [perl #133185] Infinite loop
b8c914
+    like("!\xdf", eval 'qr/\pp(?aai)\xdf/',
b8c914
+         'Compiling qr/\pp(?aai)\xdf/ doesn\'t loop');
b8c914
 
b8c914
 } # End of sub run_tests
b8c914
 
b8c914
-- 
b8c914
2.14.3
b8c914