dcb3b7
From fd25d49cae6409a4ce901fd4d899a197541604b3 Mon Sep 17 00:00:00 2001
dcb3b7
From: David Mitchell <davem@iabyn.com>
dcb3b7
Date: Sat, 4 Feb 2017 15:10:49 +0000
dcb3b7
Subject: [PATCH] buffer overrun with format and 'use bytes'
dcb3b7
MIME-Version: 1.0
dcb3b7
Content-Type: text/plain; charset=UTF-8
dcb3b7
Content-Transfer-Encoding: 8bit
dcb3b7
dcb3b7
Ported to 5.24.1:
dcb3b7
dcb3b7
commit e452bf1c9e9f30813b1f289188a6e8b0894575ba
dcb3b7
Author: David Mitchell <davem@iabyn.com>
dcb3b7
Date:   Sat Feb 4 15:10:49 2017 +0000
dcb3b7
dcb3b7
    buffer overrun with format and 'use bytes'
dcb3b7
dcb3b7
    RT #130703
dcb3b7
dcb3b7
    In the scope of 'use bytes', appending a string to a format where the
dcb3b7
    format is utf8 and the string is non-utf8 but contains lots of chars
dcb3b7
    with ords >= 128, the buffer could be overrun. This is due to all the
dcb3b7
    \x80-type chars going from being stored as 1 bytes to 2 bytes, without
dcb3b7
    growing PL_formtarget accordingly.
dcb3b7
dcb3b7
    This commit contains a minimal fix; the next commit will more generally
dcb3b7
    tidy up the grow code in pp_formline.
dcb3b7
dcb3b7
Signed-off-by: Petr Písař <ppisar@redhat.com>
dcb3b7
---
dcb3b7
 pp_ctl.c     |  3 +++
dcb3b7
 t/op/write.t | 18 +++++++++++++++++-
dcb3b7
 2 files changed, 20 insertions(+), 1 deletion(-)
dcb3b7
dcb3b7
diff --git a/pp_ctl.c b/pp_ctl.c
dcb3b7
index a1fc2f4..4d5ef2e 100644
dcb3b7
--- a/pp_ctl.c
dcb3b7
+++ b/pp_ctl.c
dcb3b7
@@ -505,6 +505,8 @@ PP(pp_formline)
dcb3b7
 	SvTAINTED_on(PL_formtarget);
dcb3b7
     if (DO_UTF8(PL_formtarget))
dcb3b7
 	targ_is_utf8 = TRUE;
dcb3b7
+    /* this is an initial estimate of how much output buffer space
dcb3b7
+     * to allocate. It may be exceeded later */
dcb3b7
     linemax = (SvCUR(formsv) * (IN_BYTES ? 1 : 3) + 1);
dcb3b7
     t = SvGROW(PL_formtarget, len + linemax + 1);
dcb3b7
     /* XXX from now onwards, SvCUR(PL_formtarget) is invalid */
dcb3b7
@@ -766,6 +768,7 @@ PP(pp_formline)
dcb3b7
 
dcb3b7
 		if (targ_is_utf8 && !item_is_utf8) {
dcb3b7
 		    source = tmp = bytes_to_utf8(source, &to_copy);
dcb3b7
+                    grow = to_copy;
dcb3b7
 		} else {
dcb3b7
 		    if (item_is_utf8 && !targ_is_utf8) {
dcb3b7
 			U8 *s;
dcb3b7
diff --git a/t/op/write.t b/t/op/write.t
dcb3b7
index ab2733f..ae4ddb5 100644
dcb3b7
--- a/t/op/write.t
dcb3b7
+++ b/t/op/write.t
dcb3b7
@@ -98,7 +98,7 @@ for my $tref ( @NumTests ){
dcb3b7
 my $bas_tests = 21;
dcb3b7
 
dcb3b7
 # number of tests in section 3
dcb3b7
-my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 3;
dcb3b7
+my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 4;
dcb3b7
 
dcb3b7
 # number of tests in section 4
dcb3b7
 my $hmb_tests = 37;
dcb3b7
@@ -1562,6 +1562,22 @@ ok  defined *{$::{CmT}}{FORMAT}, "glob assign";
dcb3b7
     formline $format, $orig, 12345;
dcb3b7
     is $^A, ("x" x 100) . " 12345\n", "\@* doesn't overflow";
dcb3b7
 
dcb3b7
+    # ...nor this (RT #130703).
dcb3b7
+    # Under 'use bytes', the two bytes (c2, 80) making up each \x80 char
dcb3b7
+    # each get expanded to two bytes (so four in total per \x80 char); the
dcb3b7
+    # buffer growth wasn't accounting for this doubling in size
dcb3b7
+
dcb3b7
+    {
dcb3b7
+        local $^A = '';
dcb3b7
+        my $format = "X\n\x{100}" . ("\x80" x 200);
dcb3b7
+        my $expected = $format;
dcb3b7
+        utf8::encode($expected);
dcb3b7
+        use bytes;
dcb3b7
+        formline($format);
dcb3b7
+        is $^A, $expected, "RT #130703";
dcb3b7
+    }
dcb3b7
+
dcb3b7
+
dcb3b7
     # make sure it can cope with formats > 64k
dcb3b7
 
dcb3b7
     $format = 'x' x 65537;
dcb3b7
-- 
dcb3b7
2.7.4
dcb3b7