Blame SOURCES/perl-5.24.3-perl-132063-Heap-buffer-overflow.patch

2c161b
From 29231d73407542051a287cab5e18546e5a622f4a Mon Sep 17 00:00:00 2001
2c161b
From: Karl Williamson <khw@cpan.org>
2c161b
Date: Tue, 6 Feb 2018 14:50:48 -0700
2c161b
Subject: [perl #132063]: Heap buffer overflow
2c161b
2c161b
The proximal cause is several instances in regexec.c of the code
2c161b
assuming that the input was valid UTF-8, whereas the input was too short
2c161b
for what the start byte claimed it would be.
2c161b
2c161b
I grepped through the core for any other similar uses, and did not find
2c161b
any.
2c161b
---
2c161b
 regexec.c              | 29 ++++++++++++++++-------------
2c161b
 t/lib/warnings/regexec |  7 +++++++
2c161b
 2 files changed, 23 insertions(+), 13 deletions(-)
2c161b
2c161b
diff --git a/regexec.c b/regexec.c
2c161b
index 5735b997fd..ea432c39d3 100644
2c161b
--- a/regexec.c
2c161b
+++ b/regexec.c
2c161b
@@ -1466,7 +1466,9 @@ Perl_re_intuit_start(pTHX_
2c161b
                                            ? trie_utf8_fold                         \
2c161b
                                            :   trie_latin_utf8_fold)))
2c161b
 
2c161b
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
2c161b
+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
2c161b
+ * 'foldbuf+sizeof(foldbuf)' */
2c161b
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
2c161b
 STMT_START {                                                                        \
2c161b
     STRLEN skiplen;                                                                 \
2c161b
     U8 flags = FOLD_FLAGS_FULL;                                                     \
2c161b
@@ -1474,7 +1476,7 @@ STMT_START {
2c161b
     case trie_flu8:                                                                 \
2c161b
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
2c161b
         if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
2c161b
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));          \
2c161b
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc);                \
2c161b
         }                                                                           \
2c161b
         goto do_trie_utf8_fold;                                                     \
2c161b
     case trie_utf8_exactfa_fold:                                                    \
2c161b
@@ -1483,7 +1485,7 @@ STMT_START {
2c161b
     case trie_utf8_fold:                                                            \
2c161b
       do_trie_utf8_fold:                                                            \
2c161b
         if ( foldlen>0 ) {                                                          \
2c161b
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
2c161b
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
2c161b
             foldlen -= len;                                                         \
2c161b
             uscan += len;                                                           \
2c161b
             len=0;                                                                  \
2c161b
@@ -1500,7 +1502,7 @@ STMT_START {
2c161b
         /* FALLTHROUGH */                                                           \
2c161b
     case trie_latin_utf8_fold:                                                      \
2c161b
         if ( foldlen>0 ) {                                                          \
2c161b
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
2c161b
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
2c161b
             foldlen -= len;                                                         \
2c161b
             uscan += len;                                                           \
2c161b
             len=0;                                                                  \
2c161b
@@ -1519,7 +1521,7 @@ STMT_START {
2c161b
         }                                                                           \
2c161b
         /* FALLTHROUGH */                                                           \
2c161b
     case trie_utf8:                                                                 \
2c161b
-        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
2c161b
+        uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags );        \
2c161b
         break;                                                                      \
2c161b
     case trie_plain:                                                                \
2c161b
         uvc = (UV)*uc;                                                              \
2c161b
@@ -2599,10 +2601,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
2c161b
                     }
2c161b
                     points[pointpos++ % maxlen]= uc;
2c161b
                     if (foldlen || uc < (U8*)strend) {
2c161b
-                        REXEC_TRIE_READ_CHAR(trie_type, trie,
2c161b
-                                         widecharmap, uc,
2c161b
-                                         uscan, len, uvc, charid, foldlen,
2c161b
-                                         foldbuf, uniflags);
2c161b
+                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
2c161b
+                                             (U8 *) strend, uscan, len, uvc,
2c161b
+                                             charid, foldlen, foldbuf,
2c161b
+                                             uniflags);
2c161b
                         DEBUG_TRIE_EXECUTE_r({
2c161b
                             dump_exec_pos( (char *)uc, c, strend,
2c161b
                                         real_start, s, utf8_target, 0);
2c161b
@@ -5511,8 +5513,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
2c161b
 		    if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
2c161b
 			I32 offset;
2c161b
 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
2c161b
-					     uscan, len, uvc, charid, foldlen,
2c161b
-					     foldbuf, uniflags);
2c161b
+                                             (U8 *) reginfo->strend, uscan,
2c161b
+                                             len, uvc, charid, foldlen,
2c161b
+                                             foldbuf, uniflags);
2c161b
 			charcount++;
2c161b
 			if (foldlen>0)
2c161b
 			    ST.longfold = TRUE;
2c161b
@@ -5642,8 +5645,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
2c161b
 			while (foldlen) {
2c161b
 			    if (!--chars)
2c161b
 				break;
2c161b
-			    uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
2c161b
-					    uniflags);
2c161b
+			    uvc = utf8n_to_uvchr(uscan, foldlen, &len,
2c161b
+                                                 uniflags);
2c161b
 			    uscan += len;
2c161b
 			    foldlen -= len;
2c161b
 			}
2c161b
diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec
2c161b
index 900dd6ee7f..6635142dea 100644
2c161b
--- a/t/lib/warnings/regexec
2c161b
+++ b/t/lib/warnings/regexec
2c161b
@@ -260,3 +260,10 @@ setlocale(&POSIX::LC_CTYPE, $utf8_locale);
2c161b
 "k" =~ /(?[ \N{KELVIN SIGN} ])/i;
2c161b
 ":" =~ /(?[ \: ])/;
2c161b
 EXPECT
2c161b
+########
2c161b
+# NAME perl #132063, read beyond buffer end
2c161b
+# OPTION fatal
2c161b
+"\xff" =~ /(?il)\x{100}|\x{100}/;
2c161b
+EXPECT
2c161b
+Malformed UTF-8 character: \xff (too short; 1 byte available, need 13) in pattern match (m//) at - line 2.
2c161b
+Malformed UTF-8 character (fatal) at - line 2.
2c161b
-- 
2c161b
2.11.0
2c161b