|
|
2c161b |
From 29231d73407542051a287cab5e18546e5a622f4a Mon Sep 17 00:00:00 2001
|
|
|
2c161b |
From: Karl Williamson <khw@cpan.org>
|
|
|
2c161b |
Date: Tue, 6 Feb 2018 14:50:48 -0700
|
|
|
2c161b |
Subject: [perl #132063]: Heap buffer overflow
|
|
|
2c161b |
|
|
|
2c161b |
The proximal cause is several instances in regexec.c of the code
|
|
|
2c161b |
assuming that the input was valid UTF-8, whereas the input was too short
|
|
|
2c161b |
for what the start byte claimed it would be.
|
|
|
2c161b |
|
|
|
2c161b |
I grepped through the core for any other similar uses, and did not find
|
|
|
2c161b |
any.
|
|
|
2c161b |
---
|
|
|
2c161b |
regexec.c | 29 ++++++++++++++++-------------
|
|
|
2c161b |
t/lib/warnings/regexec | 7 +++++++
|
|
|
2c161b |
2 files changed, 23 insertions(+), 13 deletions(-)
|
|
|
2c161b |
|
|
|
2c161b |
diff --git a/regexec.c b/regexec.c
|
|
|
2c161b |
index 5735b997fd..ea432c39d3 100644
|
|
|
2c161b |
--- a/regexec.c
|
|
|
2c161b |
+++ b/regexec.c
|
|
|
2c161b |
@@ -1466,7 +1466,9 @@ Perl_re_intuit_start(pTHX_
|
|
|
2c161b |
? trie_utf8_fold \
|
|
|
2c161b |
: trie_latin_utf8_fold)))
|
|
|
2c161b |
|
|
|
2c161b |
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
|
|
|
2c161b |
+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
|
|
|
2c161b |
+ * 'foldbuf+sizeof(foldbuf)' */
|
|
|
2c161b |
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
|
|
|
2c161b |
STMT_START { \
|
|
|
2c161b |
STRLEN skiplen; \
|
|
|
2c161b |
U8 flags = FOLD_FLAGS_FULL; \
|
|
|
2c161b |
@@ -1474,7 +1476,7 @@ STMT_START {
|
|
|
2c161b |
case trie_flu8: \
|
|
|
2c161b |
_CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
|
|
|
2c161b |
if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \
|
|
|
2c161b |
- _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc)); \
|
|
|
2c161b |
+ _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc); \
|
|
|
2c161b |
} \
|
|
|
2c161b |
goto do_trie_utf8_fold; \
|
|
|
2c161b |
case trie_utf8_exactfa_fold: \
|
|
|
2c161b |
@@ -1483,7 +1485,7 @@ STMT_START {
|
|
|
2c161b |
case trie_utf8_fold: \
|
|
|
2c161b |
do_trie_utf8_fold: \
|
|
|
2c161b |
if ( foldlen>0 ) { \
|
|
|
2c161b |
- uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
|
|
|
2c161b |
+ uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \
|
|
|
2c161b |
foldlen -= len; \
|
|
|
2c161b |
uscan += len; \
|
|
|
2c161b |
len=0; \
|
|
|
2c161b |
@@ -1500,7 +1502,7 @@ STMT_START {
|
|
|
2c161b |
/* FALLTHROUGH */ \
|
|
|
2c161b |
case trie_latin_utf8_fold: \
|
|
|
2c161b |
if ( foldlen>0 ) { \
|
|
|
2c161b |
- uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
|
|
|
2c161b |
+ uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \
|
|
|
2c161b |
foldlen -= len; \
|
|
|
2c161b |
uscan += len; \
|
|
|
2c161b |
len=0; \
|
|
|
2c161b |
@@ -1519,7 +1521,7 @@ STMT_START {
|
|
|
2c161b |
} \
|
|
|
2c161b |
/* FALLTHROUGH */ \
|
|
|
2c161b |
case trie_utf8: \
|
|
|
2c161b |
- uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
|
|
|
2c161b |
+ uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags ); \
|
|
|
2c161b |
break; \
|
|
|
2c161b |
case trie_plain: \
|
|
|
2c161b |
uvc = (UV)*uc; \
|
|
|
2c161b |
@@ -2599,10 +2601,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
|
|
|
2c161b |
}
|
|
|
2c161b |
points[pointpos++ % maxlen]= uc;
|
|
|
2c161b |
if (foldlen || uc < (U8*)strend) {
|
|
|
2c161b |
- REXEC_TRIE_READ_CHAR(trie_type, trie,
|
|
|
2c161b |
- widecharmap, uc,
|
|
|
2c161b |
- uscan, len, uvc, charid, foldlen,
|
|
|
2c161b |
- foldbuf, uniflags);
|
|
|
2c161b |
+ REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
|
|
|
2c161b |
+ (U8 *) strend, uscan, len, uvc,
|
|
|
2c161b |
+ charid, foldlen, foldbuf,
|
|
|
2c161b |
+ uniflags);
|
|
|
2c161b |
DEBUG_TRIE_EXECUTE_r({
|
|
|
2c161b |
dump_exec_pos( (char *)uc, c, strend,
|
|
|
2c161b |
real_start, s, utf8_target, 0);
|
|
|
2c161b |
@@ -5511,8 +5513,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
|
|
|
2c161b |
if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
|
|
|
2c161b |
I32 offset;
|
|
|
2c161b |
REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
|
|
|
2c161b |
- uscan, len, uvc, charid, foldlen,
|
|
|
2c161b |
- foldbuf, uniflags);
|
|
|
2c161b |
+ (U8 *) reginfo->strend, uscan,
|
|
|
2c161b |
+ len, uvc, charid, foldlen,
|
|
|
2c161b |
+ foldbuf, uniflags);
|
|
|
2c161b |
charcount++;
|
|
|
2c161b |
if (foldlen>0)
|
|
|
2c161b |
ST.longfold = TRUE;
|
|
|
2c161b |
@@ -5642,8 +5645,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
|
|
|
2c161b |
while (foldlen) {
|
|
|
2c161b |
if (!--chars)
|
|
|
2c161b |
break;
|
|
|
2c161b |
- uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
|
|
|
2c161b |
- uniflags);
|
|
|
2c161b |
+ uvc = utf8n_to_uvchr(uscan, foldlen, &len,
|
|
|
2c161b |
+ uniflags);
|
|
|
2c161b |
uscan += len;
|
|
|
2c161b |
foldlen -= len;
|
|
|
2c161b |
}
|
|
|
2c161b |
diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec
|
|
|
2c161b |
index 900dd6ee7f..6635142dea 100644
|
|
|
2c161b |
--- a/t/lib/warnings/regexec
|
|
|
2c161b |
+++ b/t/lib/warnings/regexec
|
|
|
2c161b |
@@ -260,3 +260,10 @@ setlocale(&POSIX::LC_CTYPE, $utf8_locale);
|
|
|
2c161b |
"k" =~ /(?[ \N{KELVIN SIGN} ])/i;
|
|
|
2c161b |
":" =~ /(?[ \: ])/;
|
|
|
2c161b |
EXPECT
|
|
|
2c161b |
+########
|
|
|
2c161b |
+# NAME perl #132063, read beyond buffer end
|
|
|
2c161b |
+# OPTION fatal
|
|
|
2c161b |
+"\xff" =~ /(?il)\x{100}|\x{100}/;
|
|
|
2c161b |
+EXPECT
|
|
|
2c161b |
+Malformed UTF-8 character: \xff (too short; 1 byte available, need 13) in pattern match (m//) at - line 2.
|
|
|
2c161b |
+Malformed UTF-8 character (fatal) at - line 2.
|
|
|
2c161b |
--
|
|
|
2c161b |
2.11.0
|
|
|
2c161b |
|