Blob Blame History Raw
From fed9fe5b48ccdffef9065a03c12c237cc7418de6 Mon Sep 17 00:00:00 2001
From: Zefram <zefram@fysh.org>
Date: Fri, 16 Feb 2018 17:20:34 +0000
Subject: [PATCH] don't clobber file bytes in :encoding layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PerlIO::encoding layer, when used on input, was creating an SvLEN==0
scalar pointing into the byte buffer, to pass to the ->decode method
of the encoding object.  Since the method mutates this scalar, for some
encodings this led to mutating the byte buffer, and depending on where
it came from that might be something visible elsewhere that should not
be mutated.  Remove the code for the SvLEN==0 scalar, instead always
using the alternate code that would copy the bytes into a separate buffer
owned by the scalar.  Fixes [perl #132833].

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 ext/PerlIO-encoding/encoding.pm  |  2 +-
 ext/PerlIO-encoding/encoding.xs  | 43 ++++++++++------------------------------
 ext/PerlIO-encoding/t/encoding.t | 12 ++++++++++-
 3 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/ext/PerlIO-encoding/encoding.pm b/ext/PerlIO-encoding/encoding.pm
index 08d2df4713..3d740b181a 100644
--- a/ext/PerlIO-encoding/encoding.pm
+++ b/ext/PerlIO-encoding/encoding.pm
@@ -1,7 +1,7 @@
 package PerlIO::encoding;
 
 use strict;
-our $VERSION = '0.25';
+our $VERSION = '0.26';
 our $DEBUG = 0;
 $DEBUG and warn __PACKAGE__, " called by ", join(", ", caller), "\n";
 
diff --git a/ext/PerlIO-encoding/encoding.xs b/ext/PerlIO-encoding/encoding.xs
index bb4754f3d9..941d786266 100644
--- a/ext/PerlIO-encoding/encoding.xs
+++ b/ext/PerlIO-encoding/encoding.xs
@@ -307,42 +307,19 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
 		goto end_of_file;
 	    }
 	}
-	if (SvCUR(e->dataSV)) {
-	    /* something left over from last time - create a normal
-	       SV with new data appended
-	     */
-	    if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
-		if (e->flags & NEEDS_LINES) {
-		    /* Have to grow buffer */
-		    e->base.bufsiz = use + SvCUR(e->dataSV);
-		    PerlIOEncode_get_base(aTHX_ f);
-		}
-		else {
-	       use = e->base.bufsiz - SvCUR(e->dataSV);
-	    }
-	    }
-	    sv_catpvn(e->dataSV,(char*)ptr,use);
-	}
-	else {
-	    /* Create a "dummy" SV to represent the available data from layer below */
-	    if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) {
-		Safefree(SvPVX_mutable(e->dataSV));
-	    }
-	    if (use > (SSize_t)e->base.bufsiz) {
-		if (e->flags & NEEDS_LINES) {
-		    /* Have to grow buffer */
-		    e->base.bufsiz = use;
-		    PerlIOEncode_get_base(aTHX_ f);
-		}
-		else {
-	       use = e->base.bufsiz;
+	if (!SvCUR(e->dataSV))
+	    SvPVCLEAR(e->dataSV);
+	if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
+	    if (e->flags & NEEDS_LINES) {
+		/* Have to grow buffer */
+		e->base.bufsiz = use + SvCUR(e->dataSV);
+		PerlIOEncode_get_base(aTHX_ f);
 	    }
+	    else {
+		use = e->base.bufsiz - SvCUR(e->dataSV);
 	    }
-	    SvPV_set(e->dataSV, (char *) ptr);
-	    SvLEN_set(e->dataSV, 0);  /* Hands off sv.c - it isn't yours */
-	    SvCUR_set(e->dataSV,use);
-	    SvPOK_only(e->dataSV);
 	}
+	sv_catpvn(e->dataSV,(char*)ptr,use);
 	SvUTF8_off(e->dataSV);
 	PUSHMARK(sp);
 	XPUSHs(e->enc);
diff --git a/ext/PerlIO-encoding/t/encoding.t b/ext/PerlIO-encoding/t/encoding.t
index 088f89ee20..41cefcb137 100644
--- a/ext/PerlIO-encoding/t/encoding.t
+++ b/ext/PerlIO-encoding/t/encoding.t
@@ -16,7 +16,7 @@ BEGIN {
     require "../../t/charset_tools.pl";
 }
 
-use Test::More tests => 24;
+use Test::More tests => 27;
 
 my $grk = "grk$$";
 my $utf = "utf$$";
@@ -231,6 +231,16 @@ is $x, "To hymn him who heard her herd herd\n",
 
 } # SKIP
 
+# decoding shouldn't mutate the original bytes [perl #132833]
+{
+    my $b = "a\0b\0\n\0";
+    open my $fh, "<:encoding(UTF16-LE)", \$b or die;
+    is scalar(<$fh>), "ab\n";
+    is $b, "a\0b\0\n\0";
+    close $fh or die;
+    is $b, "a\0b\0\n\0";
+}
+
 END {
     1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
 }
-- 
2.14.3