Blob Blame History Raw
From d72ba890c8d8ac800c9d00a1f542deca11551f33 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Tue, 13 Feb 2018 07:03:43 -0700
Subject: utf8.c: Don't dump malformation past first NUL

When a UTF-8 string contains a malformation, the bytes are dumped out as
a debugging aid.  One should exercise caution, however, and not dump out
bytes that are actually past the end of the string.  Commit 99a765e9e37
from 2016 added the capability to signal to the dumping routines that
we're not sure where the string ends, and to dump the minimal possible.

It occurred to me that an additional safety measure can be easily added,
which this commit does.  And that is, in the dumping routines to stop at
the first NUL.  All strings automatically get a traiing NUL added, even
if they contain embedded NULs.  A NUL can never be part of a
malformation, and so its presence likely signals the end of the string.
---
 utf8.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/utf8.c b/utf8.c
index a3d5f61b64..61346f0cb6 100644
--- a/utf8.c
+++ b/utf8.c
@@ -810,7 +810,7 @@ Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format)
 PERL_STATIC_INLINE char *
 S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
 
-                                         /* How many bytes to print */
+                                         /* Max number of bytes to print */
                                          STRLEN print_len,
 
                                          /* Which one is the non-continuation */
@@ -826,6 +826,8 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
                                ? "immediately"
                                : Perl_form(aTHX_ "%d bytes",
                                                  (int) non_cont_byte_pos);
+    const U8 * x = s + non_cont_byte_pos;
+    const U8 * e = s + print_len;
 
     PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT;
 
@@ -833,10 +835,20 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
      * calculated, it's likely faster to pass it; verify under DEBUGGING */
     assert(expect_len == UTF8SKIP(s));
 
+    /* As a defensive coding measure, don't output anything past a NUL.  Such
+     * bytes shouldn't be in the middle of a malformation, and could mark the
+     * end of the allocated string, and what comes after is undefined */
+    for (; x < e; x++) {
+        if (*x == '\0') {
+            x++;            /* Output this particular NUL */
+            break;
+        }
+    }
+
     return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
                            " %s after start byte 0x%02x; need %d bytes, got %d)",
                            malformed_text,
-                           _byte_dump_string(s, print_len, 0),
+                           _byte_dump_string(s, x - s, 0),
                            *(s + non_cont_byte_pos),
                            where,
                            *s,
-- 
2.11.0