Blob Blame History Raw
From d58a6811d830c2f2f850a03a18129c38cb732791 Mon Sep 17 00:00:00 2001
From: Yves Orton <demerphq@gmail.com>
Date: Tue, 13 Feb 2018 16:11:55 +1100
Subject: 5.26.1: fix TRIE_READ_CHAR and DECL_TRIE_TYPE to account for non-utf8
 target

---
 regexec.c     | 14 ++++++++++----
 t/re/re_tests |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/regexec.c b/regexec.c
index fa888823bd..cf81b07e30 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1472,7 +1472,7 @@ Perl_re_intuit_start(pTHX_
 #define DECL_TRIE_TYPE(scan) \
     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold,       \
                  trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold,              \
-                 trie_utf8l, trie_flu8 }                                            \
+                 trie_utf8l, trie_flu8, trie_flu8_latin }                           \
                     trie_type = ((scan->flags == EXACT)                             \
                                  ? (utf8_target ? trie_utf8 : trie_plain)           \
                                  : (scan->flags == EXACTL)                          \
@@ -1482,10 +1482,12 @@ Perl_re_intuit_start(pTHX_
                                          ? trie_utf8_exactfa_fold                   \
                                          : trie_latin_utf8_exactfa_fold)            \
                                       : (scan->flags == EXACTFLU8                   \
-                                         ? trie_flu8                                \
+                                         ? (utf8_target                             \
+                                           ? trie_flu8                              \
+                                           : trie_flu8_latin)                       \
                                          : (utf8_target                             \
                                            ? trie_utf8_fold                         \
-                                           :   trie_latin_utf8_fold)))
+                                           : trie_latin_utf8_fold)))
 
 /* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
  * 'foldbuf+sizeof(foldbuf)' */
@@ -1496,7 +1498,7 @@ STMT_START {
     switch (trie_type) {                                                            \
     case trie_flu8:                                                                 \
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
-        if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
+        if (UTF8_IS_ABOVE_LATIN1(*uc)) {                                            \
             _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc);                \
         }                                                                           \
         goto do_trie_utf8_fold;                                                     \
@@ -1519,10 +1521,14 @@ STMT_START {
             uscan = foldbuf + skiplen;                                              \
         }                                                                           \
         break;                                                                      \
+    case trie_flu8_latin:                                                           \
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
+        goto do_trie_latin_utf8_fold;                                               \
     case trie_latin_utf8_exactfa_fold:                                              \
         flags |= FOLD_FLAGS_NOMIX_ASCII;                                            \
         /* FALLTHROUGH */                                                           \
     case trie_latin_utf8_fold:                                                      \
+      do_trie_latin_utf8_fold:                                                      \
         if ( foldlen>0 ) {                                                          \
             uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
             foldlen -= len;                                                         \
diff --git a/t/re/re_tests b/t/re/re_tests
index 410fceadac..78baed6ffc 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1985,6 +1985,7 @@ AB\s+\x{100}	AB \x{100}X	y	-	-
 /(?x)[a b]/xx	\N{SPACE}	yS	$&	 	# Note a space char here
 /(?xx)[a b]/x	\N{SPACE}	n	-	-
 /(?-x:[a b])/xx	\N{SPACE}	yS	$&	 	# Note a space char here
+(?il)\x{100}|\x{100}|\x{FF}	\xFF	y	$&	\xFF
 
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab
-- 
2.11.0