From 7dc63e39d2d84920efd005092bc4d03b6ab24e1c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 6 Feb 2018 14:50:48 -0700 Subject: [perl #132063]: Heap buffer overflow The proximal cause is several instances in regexec.c of the code assuming that the input was valid UTF-8, whereas the input was too short for what the start byte claimed it would be. I grepped through the core for any other similar uses, and did not find any. --- regexec.c | 33 ++++++++++++++++++--------------- t/lib/warnings/regexec | 7 +++++++ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/regexec.c b/regexec.c index 134b196fc4..fa888823bd 100644 --- a/regexec.c +++ b/regexec.c @@ -1487,7 +1487,9 @@ Perl_re_intuit_start(pTHX_ ? trie_utf8_fold \ : trie_latin_utf8_fold))) -#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \ +/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is + * 'foldbuf+sizeof(foldbuf)' */ +#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \ STMT_START { \ STRLEN skiplen; \ U8 flags = FOLD_FLAGS_FULL; \ @@ -1495,7 +1497,7 @@ STMT_START { case trie_flu8: \ _CHECK_AND_WARN_PROBLEMATIC_LOCALE; \ if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \ - _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc)); \ + _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc); \ } \ goto do_trie_utf8_fold; \ case trie_utf8_exactfa_fold: \ @@ -1504,14 +1506,14 @@ STMT_START { case trie_utf8_fold: \ do_trie_utf8_fold: \ if ( foldlen>0 ) { \ - uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \ foldlen -= len; \ uscan += len; \ len=0; \ } else { \ - len = UTF8SKIP(uc); \ - uvc = _toFOLD_utf8_flags( (const U8*) uc, uc + len, foldbuf, &foldlen, \ + uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen, \ flags); \ + len = UTF8SKIP(uc); \ skiplen = UVCHR_SKIP( uvc ); \ foldlen -= skiplen; \ uscan = foldbuf + skiplen; \ @@ -1522,7 +1524,7 @@ STMT_START { /* FALLTHROUGH */ \ case trie_latin_utf8_fold: \ if ( foldlen>0 ) { \ - uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \ foldlen -= len; \ uscan += len; \ len=0; \ @@ -1541,7 +1543,7 @@ STMT_START { } \ /* FALLTHROUGH */ \ case trie_utf8: \ - uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags ); \ break; \ case trie_plain: \ uvc = (UV)*uc; \ @@ -2623,10 +2625,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, } points[pointpos++ % maxlen]= uc; if (foldlen || uc < (U8*)strend) { - REXEC_TRIE_READ_CHAR(trie_type, trie, - widecharmap, uc, - uscan, len, uvc, charid, foldlen, - foldbuf, uniflags); + REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, + (U8 *) strend, uscan, len, uvc, + charid, foldlen, foldbuf, + uniflags); DEBUG_TRIE_EXECUTE_r({ dump_exec_pos( (char *)uc, c, strend, real_start, s, utf8_target, 0); @@ -5686,8 +5688,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if ( base && (foldlen || uc < (U8*)(reginfo->strend))) { I32 offset; REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, - uscan, len, uvc, charid, foldlen, - foldbuf, uniflags); + (U8 *) reginfo->strend, uscan, + len, uvc, charid, foldlen, + foldbuf, uniflags); charcount++; if (foldlen>0) ST.longfold = TRUE; @@ -5822,8 +5825,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) while (foldlen) { if (!--chars) break; - uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len, - uniflags); + uvc = utf8n_to_uvchr(uscan, foldlen, &len, + uniflags); uscan += len; foldlen -= len; } diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec index 900dd6ee7f..6635142dea 100644 --- a/t/lib/warnings/regexec +++ b/t/lib/warnings/regexec @@ -260,3 +260,10 @@ setlocale(&POSIX::LC_CTYPE, $utf8_locale); "k" =~ /(?[ \N{KELVIN SIGN} ])/i; ":" =~ /(?[ \: ])/; EXPECT +######## +# NAME perl #132063, read beyond buffer end +# OPTION fatal +"\xff" =~ /(?il)\x{100}|\x{100}/; +EXPECT +Malformed UTF-8 character: \xff (too short; 1 byte available, need 13) in pattern match (m//) at - line 2. +Malformed UTF-8 character (fatal) at - line 2. -- 2.11.0