3f1b01
From 4cfbe5474a5c5f852a6dbf0138dc796c2800be93 Mon Sep 17 00:00:00 2001
3f1b01
From: Karl Williamson <khw@cpan.org>
3f1b01
Date: Wed, 30 Dec 2020 05:55:08 -0700
3f1b01
Subject: [PATCH] Fix buggy fc() in Turkish locale
3f1b01
MIME-Version: 1.0
3f1b01
Content-Type: text/plain; charset=UTF-8
3f1b01
Content-Transfer-Encoding: 8bit
3f1b01
3f1b01
When Turkish handling was added, fc() wasn't properly updated
3f1b01
3f1b01
Signed-off-by: Petr Písař <ppisar@redhat.com>
3f1b01
---
3f1b01
 pp.c      | 12 +++++++++---
3f1b01
 t/op/lc.t | 23 ++++++++++++++++-------
3f1b01
 2 files changed, 25 insertions(+), 10 deletions(-)
3f1b01
3f1b01
diff --git a/pp.c b/pp.c
3f1b01
index 5e1706346d..23cc6c8adb 100644
3f1b01
--- a/pp.c
3f1b01
+++ b/pp.c
3f1b01
@@ -4813,7 +4813,7 @@ PP(pp_fc)
3f1b01
                         do {
3f1b01
                             extra++;
3f1b01
 
3f1b01
-                            s_peek = (U8 *) memchr(s_peek + 1, 'i',
3f1b01
+                            s_peek = (U8 *) memchr(s_peek + 1, 'I',
3f1b01
                                                    send - (s_peek + 1));
3f1b01
                         } while (s_peek != NULL);
3f1b01
                     }
3f1b01
@@ -4828,8 +4828,14 @@ PP(pp_fc)
3f1b01
                                               + 1 /* Trailing NUL */ );
3f1b01
                     d = (U8*)SvPVX(dest) + len;
3f1b01
 
3f1b01
-                    *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
3f1b01
-                    *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU);
3f1b01
+                    if (*s == 'I') {
3f1b01
+                        *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
3f1b01
+                        *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
3f1b01
+                    }
3f1b01
+                    else {
3f1b01
+                        *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
3f1b01
+                        *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU);
3f1b01
+                    }
3f1b01
                     s++;
3f1b01
 
3f1b01
                     for (; s < send; s++) {
3f1b01
diff --git a/t/op/lc.t b/t/op/lc.t
3f1b01
index fce77f3d34..812c41d6b6 100644
3f1b01
--- a/t/op/lc.t
3f1b01
+++ b/t/op/lc.t
3f1b01
@@ -17,7 +17,7 @@ BEGIN {
3f1b01
 
3f1b01
 use feature qw( fc );
3f1b01
 
3f1b01
-plan tests => 139 + 2 * (4 * 256) + 15;
3f1b01
+plan tests => 139 + 2 * (5 * 256) + 17;
3f1b01
 
3f1b01
 is(lc(undef),	   "", "lc(undef) is ''");
3f1b01
 is(lcfirst(undef), "", "lcfirst(undef) is ''");
3f1b01
@@ -352,13 +352,14 @@ foreach my $turkic (0 .. 1) {
3f1b01
     my $locale = ($turkic) ? $turkic_locale : $non_turkic_locale;
3f1b01
 
3f1b01
   SKIP: {
3f1b01
-    skip "Can't find a $type UTF-8 locale", 4*256 unless defined $locale;
3f1b01
+    skip "Can't find a $type UTF-8 locale", 5*256 unless defined $locale;
3f1b01
 
3f1b01
     use feature qw( unicode_strings );
3f1b01
 
3f1b01
     no locale;
3f1b01
 
3f1b01
     my @unicode_lc;
3f1b01
+    my @unicode_fc;
3f1b01
     my @unicode_uc;
3f1b01
     my @unicode_lcfirst;
3f1b01
     my @unicode_ucfirst;
3f1b01
@@ -366,6 +367,7 @@ foreach my $turkic (0 .. 1) {
3f1b01
     # Get all the values outside of 'locale'
3f1b01
     for my $i (0 .. 255) {
3f1b01
         push @unicode_lc, lc(chr $i);
3f1b01
+        push @unicode_fc, fc(chr $i);
3f1b01
         push @unicode_uc, uc(chr $i);
3f1b01
         push @unicode_lcfirst, lcfirst(chr $i);
3f1b01
         push @unicode_ucfirst, ucfirst(chr $i);
3f1b01
@@ -373,6 +375,7 @@ foreach my $turkic (0 .. 1) {
3f1b01
 
3f1b01
     if ($turkic) {
3f1b01
         $unicode_lc[ord 'I'] = chr 0x131;
3f1b01
+        $unicode_fc[ord 'I'] = chr 0x131;
3f1b01
         $unicode_lcfirst[ord 'I'] = chr 0x131;
3f1b01
         $unicode_uc[ord 'i'] = chr 0x130;
3f1b01
         $unicode_ucfirst[ord 'i'] = chr 0x130;
3f1b01
@@ -384,6 +387,7 @@ foreach my $turkic (0 .. 1) {
3f1b01
     for my $i (0 .. 255) {
3f1b01
         is(lc(chr $i), $unicode_lc[$i], "In a $type UTF-8 locale, lc(chr $i) is the same as official Unicode");
3f1b01
         is(uc(chr $i), $unicode_uc[$i], "In a $type UTF-8 locale, uc(chr $i) is the same as official Unicode");
3f1b01
+        is(fc(chr $i), $unicode_fc[$i], "In a $type UTF-8 locale, fc(chr $i) is the same as official Unicode");
3f1b01
         is(lcfirst(chr $i), $unicode_lcfirst[$i], "In a $type UTF-8 locale, lcfirst(chr $i) is the same as official Unicode");
3f1b01
         is(ucfirst(chr $i), $unicode_ucfirst[$i], "In a $type UTF-8 locale, ucfirst(chr $i) is the same as official Unicode");
3f1b01
     }
3f1b01
@@ -391,27 +395,32 @@ foreach my $turkic (0 .. 1) {
3f1b01
 }
3f1b01
 
3f1b01
 SKIP: {
3f1b01
-    skip "Can't find a turkic UTF-8 locale", 15 unless defined $turkic_locale;
3f1b01
+    skip "Can't find a turkic UTF-8 locale", 17 unless defined $turkic_locale;
3f1b01
 
3f1b01
     # These are designed to stress the calculation of space needed for the
3f1b01
     # strings.  $filler contains a variety of characters that have special
3f1b01
     # handling in the casing functions, and some regular chars as well.
3f1b01
+    # (0x49 = 'I')
3f1b01
     my $filler_length = 10000;
3f1b01
-    my $filler = uni_to_native("\x{df}\x{b5}\x{e0}\x{c1}\x{b6}\x{ff}") x $filler_length;
3f1b01
+    my $filler = uni_to_native("\x{df}\x{49}\x{69}\x{b5}\x{e0}\x{c1}\x{b6}\x{ff}") x $filler_length;
3f1b01
 
3f1b01
     # These are the correct answers to what should happen when the given
3f1b01
     # casing function is called on $filler;
3f1b01
-    my $filler_lc = uni_to_native("\x{df}\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}") x $filler_length;
3f1b01
-    my $filler_fc = ("ss" . uni_to_native("\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}")) x $filler_length;
3f1b01
-    my $filler_uc = ("SS" . uni_to_native("\x{39c}\x{c0}\x{c1}\x{b6}\x{178}")) x $filler_length;
3f1b01
+    my $filler_lc = uni_to_native("\x{df}\x{131}\x{69}\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}") x $filler_length;
3f1b01
+    my $filler_fc = ("ss" . uni_to_native("\x{131}\x{69}\x{3bc}\x{e0}\x{e1}\x{b6}\x{ff}")) x $filler_length;
3f1b01
+    my $filler_uc = ("SS" . uni_to_native("\x{49}\x{130}\x{39c}\x{c0}\x{c1}\x{b6}\x{178}")) x $filler_length;
3f1b01
 
3f1b01
     use locale;
3f1b01
     setlocale(&POSIX::LC_CTYPE, $turkic_locale);
3f1b01
 
3f1b01
     is (lc "IIIIIII$filler", "\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc",
3f1b01
         "lc non-UTF-8, in Turkic locale, beginning with a bunch of I's");
3f1b01
+    is (fc "IIIIIII$filler", "\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_fc",
3f1b01
+        "fc non-UTF-8, in Turkic locale, beginning with a bunch of I's");
3f1b01
     is (lc "${filler}IIIIIII$filler", "$filler_lc\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc",
3f1b01
         "lc non-UTF-8, in Turkic locale, a bunch of I's, but not at the beginning");
3f1b01
+    is (fc "${filler}IIIIIII$filler", "$filler_fc\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_fc",
3f1b01
+        "fc non-UTF-8, in Turkic locale, a bunch of I's, but not at the beginning");
3f1b01
     is (lc "${filler}I\x{307}$filler", "${filler_lc}i$filler_lc",
3f1b01
         "lc in Turkic locale with DOT ABOVE immediately following I");
3f1b01
     is (lc "${filler}I\x{307}IIIIII$filler", "${filler_lc}i\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc",
3f1b01
-- 
3f1b01
2.26.2
3f1b01