Blob Blame History Raw
From 1a076cf2730d3a1fbb174af6f56c554691f4cdc3 Mon Sep 17 00:00:00 2001
From: Yves Orton <demerphq@gmail.com>
Date: Mon, 19 Feb 2018 13:49:46 +1100
Subject: [PATCH] v5.24.3: fix TRIE_READ_CHAR and DECL_TRIE_TYPE to account for
 non-utf8 target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Petr Pisar: Ported to 5.24.0.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 regexec.c     | 14 ++++++++++----
 t/re/re_tests |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/regexec.c b/regexec.c
index 3fd12c4..05e148f 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1451,7 +1451,7 @@ Perl_re_intuit_start(pTHX_
 #define DECL_TRIE_TYPE(scan) \
     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold,       \
                  trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold,              \
-                 trie_utf8l, trie_flu8 }                                            \
+                 trie_utf8l, trie_flu8, trie_flu8_latin }                           \
                     trie_type = ((scan->flags == EXACT)                             \
                                  ? (utf8_target ? trie_utf8 : trie_plain)           \
                                  : (scan->flags == EXACTL)                          \
@@ -1461,10 +1461,12 @@ Perl_re_intuit_start(pTHX_
                                          ? trie_utf8_exactfa_fold                   \
                                          : trie_latin_utf8_exactfa_fold)            \
                                       : (scan->flags == EXACTFLU8                   \
-                                         ? trie_flu8                                \
+                                         ? (utf8_target                             \
+                                           ? trie_flu8                              \
+                                           : trie_flu8_latin)                       \
                                          : (utf8_target                             \
                                            ? trie_utf8_fold                         \
-                                           :   trie_latin_utf8_fold)))
+                                           : trie_latin_utf8_fold)))
 
 /* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
  * 'foldbuf+sizeof(foldbuf)' */
@@ -1475,7 +1477,7 @@ STMT_START {
     switch (trie_type) {                                                            \
     case trie_flu8:                                                                 \
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
-        if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
+        if (UTF8_IS_ABOVE_LATIN1(*uc)) {                                            \
             _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc);                \
         }                                                                           \
         goto do_trie_utf8_fold;                                                     \
@@ -1497,10 +1499,14 @@ STMT_START {
             uscan = foldbuf + skiplen;                                              \
         }                                                                           \
         break;                                                                      \
+    case trie_flu8_latin:                                                           \
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
+        goto do_trie_latin_utf8_fold;                                               \
     case trie_latin_utf8_exactfa_fold:                                              \
         flags |= FOLD_FLAGS_NOMIX_ASCII;                                            \
         /* FALLTHROUGH */                                                           \
     case trie_latin_utf8_fold:                                                      \
+      do_trie_latin_utf8_fold:                                                      \
         if ( foldlen>0 ) {                                                          \
             uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
             foldlen -= len;                                                         \
diff --git a/t/re/re_tests b/t/re/re_tests
index 7e8522d..ab7ddbb 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1968,6 +1968,7 @@ ab(?#Comment){2}c	abbc	y	$&	abbc
 (?:.||)(?|)000000000@	000000000@	y	$&	000000000@		#  [perl #126405]
 aa$|a(?R)a|a	aaa	y	$&	aaa		# [perl 128420] recursive matches
 (?:\1|a)([bcd])\1(?:(?R)|e)\1	abbaccaddedcb	y	$&	abbaccaddedcb		# [perl 128420] recursive match with backreferences
+(?il)\x{100}|\x{100}|\x{FF}	\xFF	y	$&	\xFF
 
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab
-- 
2.14.3