Tree - rpms/perl - CentOS Git server

rpms / perl

Blame SOURCES/perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch

Blob History Raw

		683572	`From 0c311b7c345769239f38d0139ea7738feec5ca4d Mon Sep 17 00:00:00 2001`
		683572	`From: Karl Williamson <khw@cpan.org>`
		683572	`Date: Sat, 2 Nov 2019 13:59:38 -0600`
		683572	`Subject: [PATCH] toke.c: Fix bug tr/// upgrading to UTF-8 in middle`
		683572	`MIME-Version: 1.0`
		683572	`Content-Type: text/plain; charset=UTF-8`
		683572	`Content-Transfer-Encoding: 8bit`
		683572
		683572	`Consider tr/\x{ff}-\x{100}/AB/.`
		683572
		683572	`While parsing, the code keeps an offset from the beginning of the output`
		683572	`to the beginning of the second number in the range. This is purely for`
		683572	`speed so that it wouldn't have to re-find the beginning of that value,`
		683572	`when it already knew it.`
		683572
		683572	`But the example above shows the folly of this shortcut. The second`
		683572	`number in the range causes the output to be upgraded to UTF-8, which`
		683572	`makes that offset invalid in general. Change to re-find the beginning.`
		683572
		683572	`Signed-off-by: Petr Písař <ppisar@redhat.com>`
		683572	`---`
		683572	`t/op/tr.t \| 12 +++++++++++-`
		683572	`toke.c \| 4 +++-`
		683572	`2 files changed, 14 insertions(+), 2 deletions(-)`
		683572
		683572	`diff --git a/t/op/tr.t b/t/op/tr.t`
		683572	`index 47d603d4fd..25125c5bc7 100644`
		683572	`--- a/t/op/tr.t`
		683572	`+++ b/t/op/tr.t`
		683572	`@@ -13,7 +13,7 @@ BEGIN {`
		683572
		683572	`use utf8;`
		683572
		683572	`-plan tests => 301;`
		683572	`+plan tests => 304;`
		683572
		683572	`# Test this first before we extend the stack with other operations.`
		683572	`# This caused an asan failure due to a bad write past the end of the stack.`
		683572	`@@ -1145,4 +1145,14 @@ for ("", nullrocow) {`
		683572	`'RT #133880 illegal \N{}');`
		683572	`}`
		683572
		683572	`+{`
		683572	`+ my $c = "\xff";`
		683572	`+ my $d = "\x{104}";`
		683572	`+ eval '$c =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/';`
		683572	`+ is($@, "", 'tr/\x{ff}-\x{104}/\x{100}-\x{105}/ compiled');`
		683572	`+ is($c, "\x{100}", 'ff -> 100');`
		683572	`+ eval '$d =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/';`
		683572	`+ is($d, "\x{105}", '104 -> 105');`
		683572	`+}`
		683572	`+`
		683572	`1;`
		683572	`diff --git a/toke.c b/toke.c`
		683572	`index 2995737af2..28f305c62c 100644`
		683572	`--- a/toke.c`
		683572	`+++ b/toke.c`
		683572	`@@ -3044,7 +3044,7 @@ S_scan_const(pTHX_ char *start)`
		683572	`* 'offset_to_max' is the offset in 'sv' at which the character`
		683572	`* (the range's maximum end point) before 'd' begins.`
		683572	`*/`
		683572	`- char * max_ptr = SvPVX(sv) + offset_to_max;`
		683572	`+ char * max_ptr;`
		683572	`char * min_ptr;`
		683572	`IV range_min;`
		683572	`IV range_max; /* last character in range */`
		683572	`@@ -3056,6 +3056,8 @@ S_scan_const(pTHX_ char *start)`
		683572	`IV real_range_max = 0;`
		683572	`#endif`
		683572	`/* Get the code point values of the range ends. */`
		683572	`+ max_ptr = (d_is_utf8) ? (char ) utf8_hop( (U8) d, -1) : d - 1;`
		683572	`+ offset_to_max = max_ptr - SvPVX_const(sv);`
		683572	`if (d_is_utf8) {`
		683572	`/* We know the utf8 is valid, because we just constructed`
		683572	`* it ourselves in previous loop iterations */`
		683572	`--`
		683572	`2.21.0`
		683572

rpms / perl

Source Code

Blame SOURCES/perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch