Tree - rpms/perl - CentOS Git server

rpms / perl

Blame SOURCES/perl-5.27.1-RT-130907-Fix-the-Unicode-Bug-in-split.patch

Blob History Raw

		b8c914	`From 5aca16e032861ea3dfcc96ad417ea87e2b1552e5 Mon Sep 17 00:00:00 2001`
		b8c914	`From: Aaron Crane <arc@cpan.org>`
		b8c914	`Date: Sat, 4 Mar 2017 12:50:58 +0000`
		b8c914	`Subject: [PATCH] RT #130907: Fix the Unicode Bug in split " "`
		b8c914	`MIME-Version: 1.0`
		b8c914	`Content-Type: text/plain; charset=UTF-8`
		b8c914	`Content-Transfer-Encoding: 8bit`
		b8c914
		b8c914	`Ported to 5.26.0:`
		b8c914
		b8c914	`commit 20ae58f7a9bbf84d043d6e90f5988b6e3ca4ee3d`
		b8c914	`Author: Aaron Crane <arc@cpan.org>`
		b8c914	`Date: Sat Mar 4 12:50:58 2017 +0000`
		b8c914
		b8c914	`RT #130907: Fix the Unicode Bug in split " "`
		b8c914
		b8c914	`Signed-off-by: Petr Písař <ppisar@redhat.com>`
		b8c914	`---`
		b8c914	`lib/feature.pm \| 5 +++--`
		b8c914	`pod/perldelta.pod \| 9 +++++++++`
		b8c914	`pod/perlfunc.pod \| 8 ++++++++`
		b8c914	`pod/perlunicode.pod \| 11 +++++++++++`
		b8c914	`pod/perluniintro.pod \| 5 +++--`
		b8c914	`pp.c \| 13 +++++++++++++`
		b8c914	`regen/feature.pl \| 5 +++--`
		b8c914	`t/op/split.t \| 20 +++++++++++++++++++-`
		b8c914	`8 files changed, 69 insertions(+), 7 deletions(-)`
		b8c914
		b8c914	`diff --git a/lib/feature.pm b/lib/feature.pm`
		b8c914	`index ed13273..93e020b 100644`
		b8c914	`--- a/lib/feature.pm`
		b8c914	`+++ b/lib/feature.pm`
		b8c914	`@@ -175,8 +175,9 @@ C<use feature 'unicode_strings'> subpragma is B<strongly> recommended.`
		b8c914
		b8c914	`This feature is available starting with Perl 5.12; was almost fully`
		b8c914	`implemented in Perl 5.14; and extended in Perl 5.16 to cover C<quotemeta>;`
		b8c914	`-and extended further in Perl 5.26 to cover L`
		b8c914	`-operator\|perlop/Range Operators>.`
		b8c914	`+was extended further in Perl 5.26 to cover L`
		b8c914	`+operator\|perlop/Range Operators>; and was extended again in Perl 5.28 to`
		b8c914	`+cover L<special-cased whitespace splitting\|perlfunc/split>.`
		b8c914
		b8c914	`=head2 The 'unicode_eval' and 'evalbytes' features`
		b8c914
		b8c914	`#diff --git a/pod/perldelta.pod b/pod/perldelta.pod`
		b8c914	`#index 06dcd1d..d31335f 100644`
		b8c914	`#--- a/pod/perldelta.pod`
		b8c914	`#+++ b/pod/perldelta.pod`
		b8c914	`#@@ -3206,6 +3206,15 @@ calls.`
		b8c914	`# Parsing bad POSIX charclasses no longer leaks memory.`
		b8c914	`# L<[perl #128313]\|https://rt.perl.org/Public/Bug/Display.html?id=128313>`
		b8c914	`#`
		b8c914	`#+=item *`
		b8c914	`#+`
		b8c914	`#+C<split ' '> now correctly handles the argument being split when in the`
		b8c914	`#+scope of the L<< C<unicode_strings>\|feature/"The 'unicode_strings' feature"`
		b8c914	`#+>> feature. Previously, when a string using the single-byte internal`
		b8c914	`#+representation contained characters that are whitespace by Unicode rules but`
		b8c914	`#+not by ASCII rules, it treated those characters as part of fields rather`
		b8c914	`#+than as field separators. [perl #130907]`
		b8c914	`#+`
		b8c914	`# =back`
		b8c914	`#`
		b8c914	`# =head1 Known Problems`
		b8c914	`diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod`
		b8c914	`index b8dca6e..9abadf4 100644`
		b8c914	`--- a/pod/perlfunc.pod`
		b8c914	`+++ b/pod/perlfunc.pod`
		b8c914	`@@ -7616,6 +7616,14 @@ special case was restricted to the use of a plain S<C<" ">> as the`
		b8c914	`pattern argument to split; in Perl 5.18.0 and later this special case is`
		b8c914	`triggered by any expression which evaluates to the simple string S<C<" ">>.`
		b8c914
		b8c914	`+As of Perl 5.28, this special-cased whitespace splitting works as expected in`
		b8c914	`+the scope of L<< S<C<"use feature 'unicode_strings">>\|feature/The`
		b8c914	`+'unicode_strings' feature >>. In previous versions, and outside the scope of`
		b8c914	`+that feature, it exhibits L<perlunicode/The "Unicode Bug">: characters that are`
		b8c914	`+whitespace according to Unicode rules but not according to ASCII rules can be`
		b8c914	`+treated as part of fields rather than as field separators, depending on the`
		b8c914	`+string's internal encoding.`
		b8c914	`+`
		b8c914	`If omitted, PATTERN defaults to a single space, S<C<" ">>, triggering`
		b8c914	`the previously described I<awk> emulation.`
		b8c914
		b8c914	`diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod`
		b8c914	`index 9c13c35..2e84e95 100644`
		b8c914	`--- a/pod/perlunicode.pod`
		b8c914	`+++ b/pod/perlunicode.pod`
		b8c914	`@@ -1835,6 +1835,17 @@ outside its scope, it could produce strings whose length in characters`
		b8c914	`exceeded that of the right-hand side, where the right-hand side took up more`
		b8c914	`bytes than the correct range endpoint.`
		b8c914
		b8c914	`+=item *`
		b8c914	`+`
		b8c914	`+In L<< C<split>'s special-case whitespace splitting\|perlfunc/split >>.`
		b8c914	`+`
		b8c914	`+Starting in Perl 5.28.0, the C<split> function with a pattern specified as`
		b8c914	`+a string containing a single space handles whitespace characters consistently`
		b8c914	`+within the scope of of C<unicode_strings>. Prior to that, or outside its scope,`
		b8c914	`+characters that are whitespace according to Unicode rules but not according to`
		b8c914	`+ASCII rules were treated as field contents rather than field separators when`
		b8c914	`+they appear in byte-encoded strings.`
		b8c914	`+`
		b8c914	`=back`
		b8c914
		b8c914	`You can see from the above that the effect of C<unicode_strings>`
		b8c914	`diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod`
		b8c914	`index d35de34..595ec46 100644`
		b8c914	`--- a/pod/perluniintro.pod`
		b8c914	`+++ b/pod/perluniintro.pod`
		b8c914	`@@ -151,11 +151,12 @@ serious Unicode work. The maintenance release 5.6.1 fixed many of the`
		b8c914	`problems of the initial Unicode implementation, but for example`
		b8c914	`regular expressions still do not work with Unicode in 5.6.1.`
		b8c914	`Perl v5.14.0 is the first release where Unicode support is`
		b8c914	`-(almost) seamlessly integrable without some gotchas. (There are two`
		b8c914	`+(almost) seamlessly integrable without some gotchas. (There are a few`
		b8c914	`exceptions. Firstly, some differences in L<quotemeta\|perlfunc/quotemeta>`
		b8c914	`were fixed starting in Perl 5.16.0. Secondly, some differences in`
		b8c914	`L<the range operator\|perlop/Range Operators> were fixed starting in`
		b8c914	`-Perl 5.26.0.)`
		b8c914	`+Perl 5.26.0. Thirdly, some differences in L<split\|perlfunc/split> were fixed`
		b8c914	`+started in Perl 5.28.0.)`
		b8c914
		b8c914	`To enable this`
		b8c914	`seamless support, you should C<use feature 'unicode_strings'> (which is`
		b8c914	`diff --git a/pp.c b/pp.c`
		b8c914	`index cc4cb59..d9dd005 100644`
		b8c914	`--- a/pp.c`
		b8c914	`+++ b/pp.c`
		b8c914	`@@ -5740,6 +5740,7 @@ PP(pp_split)`
		b8c914	`STRLEN len;`
		b8c914	`const char *s = SvPV_const(sv, len);`
		b8c914	`const bool do_utf8 = DO_UTF8(sv);`
		b8c914	`+ const bool in_uni_8_bit = IN_UNI_8_BIT;`
		b8c914	`const char *strend = s + len;`
		b8c914	`PMOP *pm = cPMOPx(PL_op);`
		b8c914	`REGEXP *rx;`
		b8c914	`@@ -5826,6 +5827,10 @@ PP(pp_split)`
		b8c914	`while (s < strend && isSPACE_LC(*s))`
		b8c914	`s++;`
		b8c914	`}`
		b8c914	`+ else if (in_uni_8_bit) {`
		b8c914	`+ while (s < strend && isSPACE_L1(*s))`
		b8c914	`+ s++;`
		b8c914	`+ }`
		b8c914	`else {`
		b8c914	`while (s < strend && isSPACE(*s))`
		b8c914	`s++;`
		b8c914	`@@ -5857,6 +5862,10 @@ PP(pp_split)`
		b8c914	`{`
		b8c914	`while (m < strend && !isSPACE_LC(*m))`
		b8c914	`++m;`
		b8c914	`+ }`
		b8c914	`+ else if (in_uni_8_bit) {`
		b8c914	`+ while (m < strend && !isSPACE_L1(*m))`
		b8c914	`+ ++m;`
		b8c914	`} else {`
		b8c914	`while (m < strend && !isSPACE(*m))`
		b8c914	`++m;`
		b8c914	`@@ -5891,6 +5900,10 @@ PP(pp_split)`
		b8c914	`{`
		b8c914	`while (s < strend && isSPACE_LC(*s))`
		b8c914	`++s;`
		b8c914	`+ }`
		b8c914	`+ else if (in_uni_8_bit) {`
		b8c914	`+ while (s < strend && isSPACE_L1(*s))`
		b8c914	`+ ++s;`
		b8c914	`} else {`
		b8c914	`while (s < strend && isSPACE(*s))`
		b8c914	`++s;`
		b8c914	`diff --git a/regen/feature.pl b/regen/feature.pl`
		b8c914	`index 579120e..8a4ce63 100755`
		b8c914	`--- a/regen/feature.pl`
		b8c914	`+++ b/regen/feature.pl`
		b8c914	`@@ -485,8 +485,9 @@ C<use feature 'unicode_strings'> subpragma is B<strongly> recommended.`
		b8c914
		b8c914	`This feature is available starting with Perl 5.12; was almost fully`
		b8c914	`implemented in Perl 5.14; and extended in Perl 5.16 to cover C<quotemeta>;`
		b8c914	`-and extended further in Perl 5.26 to cover L`
		b8c914	`-operator\|perlop/Range Operators>.`
		b8c914	`+was extended further in Perl 5.26 to cover L`
		b8c914	`+operator\|perlop/Range Operators>; and was extended again in Perl 5.28 to`
		b8c914	`+cover L<special-cased whitespace splitting\|perlfunc/split>.`
		b8c914
		b8c914	`=head2 The 'unicode_eval' and 'evalbytes' features`
		b8c914
		b8c914	`diff --git a/t/op/split.t b/t/op/split.t`
		b8c914	`index d60bcaf..038c5d7 100644`
		b8c914	`--- a/t/op/split.t`
		b8c914	`+++ b/t/op/split.t`
		b8c914	`@@ -7,7 +7,7 @@ BEGIN {`
		b8c914	`set_up_inc('../lib');`
		b8c914	`}`
		b8c914
		b8c914	`-plan tests => 163;`
		b8c914	`+plan tests => 172;`
		b8c914
		b8c914	`$FS = ':';`
		b8c914
		b8c914	`@@ -480,6 +480,24 @@ is($cnt, scalar(@ary));`
		b8c914	`qq{split(\$cond ? qr/ / : " ", "$exp") behaves as expected over repeated similar patterns};`
		b8c914	`}`
		b8c914
		b8c914	`+SKIP: {`
		b8c914	`+ # RT #130907: unicode_strings feature doesn't work with split ' '`
		b8c914	`+`
		b8c914	`+ my ($sp) = grep /\s/u, map chr, reverse 128 .. 255 # prefer \xA0 over \x85`
		b8c914	`+ or skip 'no unicode whitespace found in high-8-bit range', 9;`
		b8c914	`+`
		b8c914	`+ for (["$sp$sp. /", "leading unicode whitespace"],`
		b8c914	`+ [".$sp$sp/", "unicode whitespace separator"],`
		b8c914	`+ [". /$sp$sp", "trailing unicode whitespace"]) {`
		b8c914	`+ my ($str, $desc) = @$_;`
		b8c914	`+ use feature "unicode_strings";`
		b8c914	`+ my @got = split " ", $str;`
		b8c914	`+ is @got, 2, "whitespace split: $desc: field count";`
		b8c914	`+ is $got[0], '.', "whitespace split: $desc: field 0";`
		b8c914	`+ is $got[1], '/', "whitespace split: $desc: field 1";`
		b8c914	`+ }`
		b8c914	`+}`
		b8c914	`+`
		b8c914	`{`
		b8c914	`# 'RT #116086: split "\x20" does not work as documented';`
		b8c914	`my @results;`
		b8c914	`--`
		b8c914	`2.9.4`
		b8c914

rpms / perl

Source Code

Blame SOURCES/perl-5.27.1-RT-130907-Fix-the-Unicode-Bug-in-split.patch