94084c
commit 466f2be6c08070e9113ae2fdc7acd5d8828cba50
94084c
Author: Carlos O'Donell <carlos@redhat.com>
94084c
Date:   Wed Sep 1 15:19:19 2021 -0400
94084c
94084c
    Add generic C.UTF-8 locale (Bug 17318)
94084c
    
94084c
    We add a new C.UTF-8 locale. This locale is not builtin to glibc, but
94084c
    is provided as a distinct locale. The locale provides full support for
94084c
    UTF-8 and this includes full code point sorting via STRCMP-based
94084c
    collation (strcmp or wcscmp).
94084c
    
94084c
    The collation uses a new keyword 'codepoint_collation' which drops all
94084c
    collation rules and generates an empty zero rules collation to enable
94084c
    STRCMP usage in collation. This ensures that we get full code point
94084c
    sorting for C.UTF-8 with a minimal 1406 bytes of overhead (LC_COLLATE
94084c
    structure information and ASCII collating tables).
94084c
    
94084c
    The new locale is added to SUPPORTED. Minimal test data for specific
94084c
    code points (minus those not supported by collate-test) is provided in
94084c
    C.UTF-8.in, and this verifies code point sorting is working reasonably
94084c
    across the range. The locale was tested manually with the full set of
94084c
    code points without failure.
94084c
    
94084c
    The locale is harmonized with locales already shipping in various
94084c
    downstream distributions. A new tst-iconv9 test is added which verifies
94084c
    the C.UTF-8 locale is generally usable.
94084c
    
94084c
    Testing for fnmatch, regexec, and recomp is provided by extending
94084c
    bug-regex1, bugregex19, bug-regex4, bug-regex6, transbug, tst-fnmatch,
94084c
    tst-regcomp-truncated, and tst-regex to use C.UTF-8.
94084c
    
94084c
    Tested on x86_64 or i686 without regression.
94084c
    
94084c
    Reviewed-by: Florian Weimer <fweimer@redhat.com>
94084c
94084c
diff --git a/iconv/Makefile b/iconv/Makefile
94084c
index 07d77c9ecaafba1f..9993f2d3f3cd7498 100644
94084c
--- a/iconv/Makefile
94084c
+++ b/iconv/Makefile
94084c
@@ -43,8 +43,19 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
94084c
 CFLAGS-linereader.c += -DNO_TRANSLITERATION
94084c
 CFLAGS-simple-hash.c += -I../locale
94084c
 
94084c
-tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
94084c
-	  tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt
94084c
+tests = \
94084c
+	tst-iconv1 \
94084c
+	tst-iconv2 \
94084c
+	tst-iconv3 \
94084c
+	tst-iconv4 \
94084c
+	tst-iconv5 \
94084c
+	tst-iconv6 \
94084c
+	tst-iconv7 \
94084c
+	tst-iconv8 \
94084c
+	tst-iconv9 \
94084c
+	tst-iconv-mt \
94084c
+	tst-iconv-opt \
94084c
+	# tests
94084c
 
94084c
 others		= iconv_prog iconvconfig
94084c
 install-others-programs	= $(inst_bindir)/iconv
94084c
@@ -83,10 +94,15 @@ endif
94084c
 include ../Rules
94084c
 
94084c
 ifeq ($(run-built-tests),yes)
94084c
-LOCALES := en_US.UTF-8
94084c
+# We have to generate locales (list sorted alphabetically)
94084c
+LOCALES := \
94084c
+	C.UTF-8 \
94084c
+	en_US.UTF-8 \
94084c
+	# LOCALES
94084c
 include ../gen-locales.mk
94084c
 
94084c
 $(objpfx)tst-iconv-opt.out: $(gen-locales)
94084c
+$(objpfx)tst-iconv9.out: $(gen-locales)
94084c
 endif
94084c
 
94084c
 $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
94084c
diff --git a/iconv/tst-iconv9.c b/iconv/tst-iconv9.c
94084c
new file mode 100644
94084c
index 0000000000000000..c46b1833d87b8e55
94084c
--- /dev/null
94084c
+++ b/iconv/tst-iconv9.c
94084c
@@ -0,0 +1,87 @@
94084c
+/* Verify that using C.UTF-8 works.
94084c
+
94084c
+   Copyright (C) 2021 Free Software Foundation, Inc.
94084c
+   This file is part of the GNU C Library.
94084c
+
94084c
+   The GNU C Library is free software; you can redistribute it and/or
94084c
+   modify it under the terms of the GNU Lesser General Public
94084c
+   License as published by the Free Software Foundation; either
94084c
+   version 2.1 of the License, or (at your option) any later version.
94084c
+
94084c
+   The GNU C Library is distributed in the hope that it will be useful,
94084c
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
94084c
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
94084c
+   Lesser General Public License for more details.
94084c
+
94084c
+   You should have received a copy of the GNU Lesser General Public
94084c
+   License along with the GNU C Library; if not, see
94084c
+   <https://www.gnu.org/licenses/>.  */
94084c
+
94084c
+#include <iconv.h>
94084c
+#include <stddef.h>
94084c
+#include <stdio.h>
94084c
+#include <string.h>
94084c
+#include <support/support.h>
94084c
+#include <support/check.h>
94084c
+
94084c
+/* This test does two things:
94084c
+   (1) Verify that we have likely included translit_combining in C.UTF-8.
94084c
+   (2) Verify default_missing is '?' as expected.  */
94084c
+
94084c
+/* ISO-8859-1 encoding of "für".  */
94084c
+char iso88591_in[] = { 0x66, 0xfc, 0x72, 0x0 };
94084c
+/* ASCII transliteration is "fur" with C.UTF-8 translit_combining.  */
94084c
+char ascii_exp[] = { 0x66, 0x75, 0x72, 0x0 };
94084c
+
94084c
+/* First 3-byte UTF-8 code point.  */
94084c
+char utf8_in[] = { 0xe0, 0xa0, 0x80, 0x0 };
94084c
+/* There is no ASCII transliteration for SAMARITAN LETTER ALAF
94084c
+   so we get default_missing used which is '?'.  */
94084c
+char default_missing_exp[] = { 0x3f, 0x0 };
94084c
+
94084c
+static int
94084c
+do_test (void)
94084c
+{
94084c
+  char ascii_out[5];
94084c
+  iconv_t cd;
94084c
+  char *inbuf;
94084c
+  char *outbuf;
94084c
+  size_t inbytes;
94084c
+  size_t outbytes;
94084c
+  size_t n;
94084c
+
94084c
+  /* The C.UTF-8 locale should include translit_combining, which provides
94084c
+     the transliteration for "LATIN SMALL LETTER U WITH DIAERESIS" which
94084c
+     is not provided by locale/C-translit.h.in.  */
94084c
+  xsetlocale (LC_ALL, "C.UTF-8");
94084c
+
94084c
+  /* From ISO-8859-1 to ASCII.  */
94084c
+  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "ISO-8859-1");
94084c
+  TEST_VERIFY (cd != (iconv_t) -1);
94084c
+  inbuf = iso88591_in;
94084c
+  inbytes = 3;
94084c
+  outbuf = ascii_out;
94084c
+  outbytes = 3;
94084c
+  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
94084c
+  TEST_VERIFY (n != -1);
94084c
+  *outbuf = '\0';
94084c
+  TEST_COMPARE_BLOB (ascii_out, 3, ascii_exp, 3);
94084c
+  TEST_VERIFY (iconv_close (cd) == 0);
94084c
+
94084c
+  /* From UTF-8 to ASCII.  */
94084c
+  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "UTF-8");
94084c
+  TEST_VERIFY (cd != (iconv_t) -1);
94084c
+  inbuf = utf8_in;
94084c
+  inbytes = 3;
94084c
+  outbuf = ascii_out;
94084c
+  outbytes = 3;
94084c
+  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
94084c
+  TEST_VERIFY (n != -1);
94084c
+  *outbuf = '\0';
94084c
+  TEST_COMPARE_BLOB (ascii_out, 1, default_missing_exp, 1);
94084c
+  TEST_VERIFY (iconv_close (cd) == 0);
94084c
+
94084c
+  return 0;
94084c
+}
94084c
+
94084c
+#include <support/test-driver.c>
94084c
diff --git a/localedata/C.UTF-8.in b/localedata/C.UTF-8.in
94084c
new file mode 100644
94084c
index 0000000000000000..c31dcc2aa045ee61
94084c
--- /dev/null
94084c
+++ b/localedata/C.UTF-8.in
94084c
@@ -0,0 +1,157 @@
94084c
+? ; <U1>
94084c
+? ; <U2>
94084c
+? ; <U3>
94084c
+? ; <U4>
94084c
+? ; <U5>
94084c
+? ; <U6>
94084c
+? ; <U7>
94084c
+? ; <U8>
94084c
+? ; <UE>
94084c
+? ; <UF>
94084c
+? ; <U10>
94084c
+? ; <U11>
94084c
+? ; <U12>
94084c
+? ; <U13>
94084c
+? ; <U14>
94084c
+? ; <U15>
94084c
+? ; <U16>
94084c
+? ; <U17>
94084c
+? ; <U18>
94084c
+? ; <U19>
94084c
+? ; <U1A>
94084c
+? ; <U1B>
94084c
+? ; <U1C>
94084c
+? ; <U1D>
94084c
+? ; <U1E>
94084c
+? ; <U1F>
94084c
+! ; <U21>
94084c
+" ; <U22>
94084c
+# ; <U23>
94084c
+$ ; <U24>
94084c
+% ; <U25>
94084c
+& ; <U26>
94084c
+' ; <U27>
94084c
+) ; <U29>
94084c
+* ; <U2A>
94084c
++ ; <U2B>
94084c
+, ; <U2C>
94084c
+- ; <U2D>
94084c
+. ; <U2E>
94084c
+/ ; <U2F>
94084c
+0 ; <U30>
94084c
+1 ; <U31>
94084c
+2 ; <U32>
94084c
+3 ; <U33>
94084c
+4 ; <U34>
94084c
+5 ; <U35>
94084c
+6 ; <U36>
94084c
+7 ; <U37>
94084c
+8 ; <U38>
94084c
+9 ; <U39>
94084c
+< ; <U3C>
94084c
+= ; <U3D>
94084c
+> ; <U3E>
94084c
+? ; <U3F>
94084c
+@ ; <U40>
94084c
+A ; <U41>
94084c
+B ; <U42>
94084c
+C ; <U43>
94084c
+D ; <U44>
94084c
+E ; <U45>
94084c
+F ; <U46>
94084c
+G ; <U47>
94084c
+H ; <U48>
94084c
+I ; <U49>
94084c
+J ; <U4A>
94084c
+K ; <U4B>
94084c
+L ; <U4C>
94084c
+M ; <U4D>
94084c
+N ; <U4E>
94084c
+O ; <U4F>
94084c
+P ; <U50>
94084c
+Q ; <U51>
94084c
+R ; <U52>
94084c
+S ; <U53>
94084c
+T ; <U54>
94084c
+U ; <U55>
94084c
+V ; <U56>
94084c
+W ; <U57>
94084c
+X ; <U58>
94084c
+Y ; <U59>
94084c
+Z ; <U5A>
94084c
+[ ; <U5B>
94084c
+\ ; <U5C>
94084c
+] ; <U5D>
94084c
+^ ; <U5E>
94084c
+_ ; <U5F>
94084c
+` ; <U60>
94084c
+a ; <U61>
94084c
+b ; <U62>
94084c
+c ; <U63>
94084c
+d ; <U64>
94084c
+e ; <U65>
94084c
+f ; <U66>
94084c
+g ; <U67>
94084c
+h ; <U68>
94084c
+i ; <U69>
94084c
+j ; <U6A>
94084c
+k ; <U6B>
94084c
+l ; <U6C>
94084c
+m ; <U6D>
94084c
+n ; <U6E>
94084c
+o ; <U6F>
94084c
+p ; <U70>
94084c
+q ; <U71>
94084c
+r ; <U72>
94084c
+s ; <U73>
94084c
+t ; <U74>
94084c
+u ; <U75>
94084c
+v ; <U76>
94084c
+w ; <U77>
94084c
+x ; <U78>
94084c
+y ; <U79>
94084c
+z ; <U7A>
94084c
+{ ; <U7B>
94084c
+| ; <U7C>
94084c
+} ; <U7D>
94084c
+~ ; <U7E>
94084c
+ ; <U7F>
94084c
+€ ; <U80>
94084c
+ÿ ; <UFF>
94084c
+Ā ; <U100>
94084c
+à¿¿ ; <UFFF>
94084c
+က ; <U1000>
94084c
+� ; <UFFFD>
94084c
+ï¿¿ ; <UFFFF>
94084c
+𐀀 ; <U10000>
94084c
+🿿 ; <U1FFFF>
94084c
+ğ €€ ; <U20000>
94084c
+𯿿 ; <U2FFFF>
94084c
+ğ°€€ ; <U30000>
94084c
+ğ¿¿¾ ; <U3FFFE>
94084c
+񀀀 ; <U40000>
94084c
+񏿿 ; <U4FFFF>
94084c
+񐀀 ; <U50000>
94084c
+񟿿 ; <U5FFFF>
94084c
+񠀀 ; <U60000>
94084c
+񯿿 ; <U6FFFF>
94084c
+񰀀 ; <U70000>
94084c
+ñ¿¿¿ ; <U7FFFF>
94084c
+򀀀 ; <U80000>
94084c
+򏿿 ; <U8FFFF>
94084c
+򐀀 ; <U90000>
94084c
+򟿿 ; <U9FFFF>
94084c
+򠀀 ; <UA0000>
94084c
+򯿿 ; <UAFFFF>
94084c
+򰀀 ; <UB0000>
94084c
+ò¿¿¿ ; <UBFFFF>
94084c
+󀀁 ; <UC0001>
94084c
+󏿌 ; <UCFFCC>
94084c
+󐀎 ; <UD000E>
94084c
+󟿿 ; <UDFFFF>
94084c
+󠀁 ; <UE0001>
94084c
+󯿿 ; <UEFFFF>
94084c
+󰀁 ; <UF0001>
94084c
+ó¿¿¿ ; <UFFFFF>
94084c
+􀀁 ; <U100001>
94084c
+􏿿 ; <U10FFFF>
94084c
diff --git a/localedata/Makefile b/localedata/Makefile
94084c
index 0341528b0407ae3b..c9dd5a954e8194cc 100644
94084c
--- a/localedata/Makefile
94084c
+++ b/localedata/Makefile
94084c
@@ -47,6 +47,7 @@ test-input := \
94084c
 	bg_BG.UTF-8 \
94084c
 	br_FR.UTF-8 \
94084c
 	bs_BA.UTF-8 \
94084c
+	C.UTF-8 \
94084c
 	ckb_IQ.UTF-8 \
94084c
 	cmn_TW.UTF-8 \
94084c
 	crh_UA.UTF-8 \
94084c
@@ -206,6 +207,7 @@ LOCALES := \
94084c
 	bg_BG.UTF-8 \
94084c
 	br_FR.UTF-8 \
94084c
 	bs_BA.UTF-8 \
94084c
+	C.UTF-8 \
94084c
 	ckb_IQ.UTF-8 \
94084c
 	cmn_TW.UTF-8 \
94084c
 	crh_UA.UTF-8 \
94084c
diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
94084c
index 34f7a7c3fe2b6526..546ce6cea16a8fdb 100644
94084c
--- a/localedata/SUPPORTED
94084c
+++ b/localedata/SUPPORTED
94084c
@@ -79,6 +79,7 @@ brx_IN/UTF-8 \
94084c
 bs_BA.UTF-8/UTF-8 \
94084c
 bs_BA/ISO-8859-2 \
94084c
 byn_ER/UTF-8 \
94084c
+C.UTF-8/UTF-8 \
94084c
 ca_AD.UTF-8/UTF-8 \
94084c
 ca_AD/ISO-8859-15 \
94084c
 ca_ES.UTF-8/UTF-8 \
94084c
diff --git a/localedata/locales/C b/localedata/locales/C
94084c
new file mode 100644
94084c
index 0000000000000000..ca801c79cf7e953e
94084c
--- /dev/null
94084c
+++ b/localedata/locales/C
94084c
@@ -0,0 +1,194 @@
94084c
+escape_char /
94084c
+comment_char %
94084c
+% Locale for C locale in UTF-8
94084c
+
94084c
+LC_IDENTIFICATION
94084c
+title      "C locale"
94084c
+source     ""
94084c
+address    ""
94084c
+contact    ""
94084c
+email      "bug-glibc-locales@gnu.org"
94084c
+tel        ""
94084c
+fax        ""
94084c
+language   ""
94084c
+territory  ""
94084c
+revision   "2.0"
94084c
+date       "2020-06-28"
94084c
+category  "i18n:2012";LC_IDENTIFICATION
94084c
+category  "i18n:2012";LC_CTYPE
94084c
+category  "i18n:2012";LC_COLLATE
94084c
+category  "i18n:2012";LC_TIME
94084c
+category  "i18n:2012";LC_NUMERIC
94084c
+category  "i18n:2012";LC_MONETARY
94084c
+category  "i18n:2012";LC_MESSAGES
94084c
+category  "i18n:2012";LC_PAPER
94084c
+category  "i18n:2012";LC_NAME
94084c
+category  "i18n:2012";LC_ADDRESS
94084c
+category  "i18n:2012";LC_TELEPHONE
94084c
+category  "i18n:2012";LC_MEASUREMENT
94084c
+END LC_IDENTIFICATION
94084c
+
94084c
+LC_CTYPE
94084c
+% Include only the i18n character type classes without any of the
94084c
+% transliteration that i18n uses by default.
94084c
+copy "i18n_ctype"
94084c
+
94084c
+% Include the neutral transliterations.  The builtin C and
94084c
+% POSIX locales have +1600 transliterations that are built into
94084c
+% the locales, and these are a superset of those.
94084c
+translit_start
94084c
+include "translit_neutral";""
94084c
+% We must use '?' for default_missing because the transliteration
94084c
+% framework includes it directly into the output and so it must
94084c
+% be compatible with ASCII if that is the target character set.
94084c
+default_missing <U003F>
94084c
+translit_end
94084c
+
94084c
+% Include the transliterations that can convert combined characters.
94084c
+% These are generally expected by users.
94084c
+translit_start
94084c
+include "translit_combining";""
94084c
+translit_end
94084c
+
94084c
+END LC_CTYPE
94084c
+
94084c
+LC_COLLATE
94084c
+% The keyword 'codepoint_collation' in any part of any LC_COLLATE
94084c
+% immediately discards all collation information and causes the
94084c
+% locale to use strcmp/wcscmp for collation comparison.  This is
94084c
+% exactly what is needed for C (ASCII) or C.UTF-8.
94084c
+codepoint_collation
94084c
+END LC_COLLATE
94084c
+
94084c
+LC_MONETARY
94084c
+
94084c
+% This is the 14652 i18n fdcc-set definition for the LC_MONETARY
94084c
+% category (except for the int_curr_symbol and currency_symbol, they are
94084c
+% empty in the 14652 i18n fdcc-set definition and also empty in
94084c
+% glibc/locale/C-monetary.c.).
94084c
+int_curr_symbol     ""
94084c
+currency_symbol     ""
94084c
+mon_decimal_point   "."
94084c
+mon_thousands_sep   ""
94084c
+mon_grouping        -1
94084c
+positive_sign       ""
94084c
+negative_sign       "-"
94084c
+int_frac_digits     -1
94084c
+frac_digits         -1
94084c
+p_cs_precedes       -1
94084c
+int_p_sep_by_space  -1
94084c
+p_sep_by_space      -1
94084c
+n_cs_precedes       -1
94084c
+int_n_sep_by_space  -1
94084c
+n_sep_by_space      -1
94084c
+p_sign_posn         -1
94084c
+n_sign_posn         -1
94084c
+%
94084c
+END LC_MONETARY
94084c
+
94084c
+LC_NUMERIC
94084c
+% This is the POSIX Locale definition for
94084c
+% the LC_NUMERIC category.
94084c
+%
94084c
+decimal_point   "."
94084c
+thousands_sep   ""
94084c
+grouping        -1
94084c
+END LC_NUMERIC
94084c
+
94084c
+LC_TIME
94084c
+% This is the POSIX Locale definition for the LC_TIME category with the
94084c
+% exception that time is per ISO 8601 and 24-hour.
94084c
+%
94084c
+% Abbreviated weekday names (%a)
94084c
+abday       "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat"
94084c
+
94084c
+% Full weekday names (%A)
94084c
+day         "Sunday";"Monday";"Tuesday";"Wednesday";"Thursday";/
94084c
+            "Friday";"Saturday"
94084c
+
94084c
+% Abbreviated month names (%b)
94084c
+abmon       "Jan";"Feb";"Mar";"Apr";"May";"Jun";"Jul";"Aug";"Sep";/
94084c
+            "Oct";"Nov";"Dec"
94084c
+
94084c
+% Full month names (%B)
94084c
+mon         "January";"February";"March";"April";"May";"June";"July";/
94084c
+            "August";"September";"October";"November";"December"
94084c
+
94084c
+% Week description, consists of three fields:
94084c
+% 1. Number of days in a week.
94084c
+% 2. Gregorian date that is a first weekday (19971130 for Sunday, 19971201 for Monday).
94084c
+% 3. The weekday number to be contained in the first week of the year.
94084c
+%
94084c
+% ISO 8601 conforming applications should use the values 7, 19971201 (a
94084c
+% Monday), and 4 (Thursday), respectively.
94084c
+week    7;19971201;4
94084c
+first_weekday	1
94084c
+first_workday	2
94084c
+
94084c
+% Appropriate date and time representation (%c)
94084c
+d_t_fmt "%a %b %e %H:%M:%S %Y"
94084c
+
94084c
+% Appropriate date representation (%x)
94084c
+d_fmt   "%m/%d/%y"
94084c
+
94084c
+% Appropriate time representation (%X)
94084c
+t_fmt   "%H:%M:%S"
94084c
+
94084c
+% Appropriate AM/PM time representation (%r)
94084c
+t_fmt_ampm "%I:%M:%S %p"
94084c
+
94084c
+% Equivalent of AM/PM (%p)
94084c
+am_pm	"AM";"PM"
94084c
+
94084c
+% Appropriate date representation (date(1))
94084c
+date_fmt	"%a %b %e %H:%M:%S %Z %Y"
94084c
+END LC_TIME
94084c
+
94084c
+LC_MESSAGES
94084c
+% This is the POSIX Locale definition for
94084c
+% the LC_NUMERIC category.
94084c
+%
94084c
+yesexpr "^[yY]"
94084c
+noexpr  "^[nN]"
94084c
+yesstr  "Yes"
94084c
+nostr   "No"
94084c
+END LC_MESSAGES
94084c
+
94084c
+LC_PAPER
94084c
+% This is the ISO/IEC 14652 "i18n" definition for
94084c
+% the LC_PAPER category.
94084c
+% (A4 paper, this is also used in the built in C/POSIX
94084c
+% locale in glibc/locale/C-paper.c)
94084c
+height   297
94084c
+width    210
94084c
+END LC_PAPER
94084c
+
94084c
+LC_NAME
94084c
+% This is the ISO/IEC 14652 "i18n" definition for
94084c
+% the LC_NAME category.
94084c
+% (also used in the built in C/POSIX locale in glibc/locale/C-name.c)
94084c
+name_fmt    "%p%t%g%t%m%t%f"
94084c
+END LC_NAME
94084c
+
94084c
+LC_ADDRESS
94084c
+% This is the ISO/IEC 14652 "i18n" definition for
94084c
+% the LC_ADDRESS category.
94084c
+% (also used in the built in C/POSIX locale in glibc/locale/C-address.c)
94084c
+postal_fmt    "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
94084c
+END LC_ADDRESS
94084c
+
94084c
+LC_TELEPHONE
94084c
+% This is the ISO/IEC 14652 "i18n" definition for
94084c
+% the LC_TELEPHONE category.
94084c
+% "+%c %a %l"
94084c
+tel_int_fmt    "+%c %a %l"
94084c
+% (also used in the built in C/POSIX locale in glibc/locale/C-telephone.c)
94084c
+END LC_TELEPHONE
94084c
+
94084c
+LC_MEASUREMENT
94084c
+% This is the ISO/IEC 14652 "i18n" definition for
94084c
+% the LC_MEASUREMENT category.
94084c
+% (same as in the built in C/POSIX locale in glibc/locale/C-measurement.c)
94084c
+%metric
94084c
+measurement    1
94084c
+END LC_MEASUREMENT
94084c
diff --git a/posix/Makefile b/posix/Makefile
94084c
index 059efb3cd2706cbe..a5229777eeb0e067 100644
94084c
--- a/posix/Makefile
94084c
+++ b/posix/Makefile
94084c
@@ -190,9 +190,19 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test
94084c
 	$(evaluate-test)
94084c
 endif
94084c
 
94084c
-LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \
94084c
-	   en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \
94084c
-	   cs_CZ.ISO-8859-2
94084c
+LOCALES := \
94084c
+	cs_CZ.ISO-8859-2 \
94084c
+	cs_CZ.UTF-8 \
94084c
+	C.UTF-8 \
94084c
+	da_DK.ISO-8859-1 \
94084c
+	de_DE.ISO-8859-1 \
94084c
+	de_DE.UTF-8 \
94084c
+	en_US.UTF-8 \
94084c
+	es_US.ISO-8859-1 \
94084c
+	es_US.UTF-8 \
94084c
+	ja_JP.EUC-JP \
94084c
+	tr_TR.UTF-8 \
94084c
+	# LOCALES
94084c
 include ../gen-locales.mk
94084c
 
94084c
 $(objpfx)bug-regex1.out: $(gen-locales)
94084c
diff --git a/posix/bug-regex1.c b/posix/bug-regex1.c
94084c
index 38eb543951862492..7e9f4ec430a95631 100644
94084c
--- a/posix/bug-regex1.c
94084c
+++ b/posix/bug-regex1.c
94084c
@@ -41,6 +41,26 @@ main (void)
94084c
 	puts (" -> OK");
94084c
     }
94084c
 
94084c
+  puts ("in C.UTF-8 locale");
94084c
+  setlocale (LC_ALL, "C.UTF-8");
94084c
+  s = re_compile_pattern ("[an\371]*n", 7, &regex);
94084c
+  if (s != NULL)
94084c
+    {
94084c
+      puts ("re_compile_pattern return non-NULL value");
94084c
+      result = 1;
94084c
+    }
94084c
+  else
94084c
+    {
94084c
+      match = re_match (&regex, "an", 2, 0, ®s;;
94084c
+      if (match != 2)
94084c
+	{
94084c
+	  printf ("re_match returned %d, expected 2\n", match);
94084c
+	  result = 1;
94084c
+	}
94084c
+      else
94084c
+	puts (" -> OK");
94084c
+    }
94084c
+
94084c
   puts ("in de_DE.ISO-8859-1 locale");
94084c
   setlocale (LC_ALL, "de_DE.ISO-8859-1");
94084c
   s = re_compile_pattern ("[anù]*n", 7, &regex);
94084c
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
94084c
index b3fee0a7302c3263..e00ff60a14f994bf 100644
94084c
--- a/posix/bug-regex19.c
94084c
+++ b/posix/bug-regex19.c
94084c
@@ -25,6 +25,7 @@
94084c
 #include <string.h>
94084c
 #include <locale.h>
94084c
 #include <libc-diag.h>
94084c
+#include <support/support.h>
94084c
 
94084c
 #define BRE RE_SYNTAX_POSIX_BASIC
94084c
 #define ERE RE_SYNTAX_POSIX_EXTENDED
94084c
@@ -407,8 +408,8 @@ do_mb_tests (const struct test_s *test)
94084c
   return 0;
94084c
 }
94084c
 
94084c
-int
94084c
-main (void)
94084c
+static int
94084c
+do_test (void)
94084c
 {
94084c
   size_t i;
94084c
   int ret = 0;
94084c
@@ -417,20 +418,17 @@ main (void)
94084c
 
94084c
   for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
94084c
     {
94084c
-      if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
94084c
-	{
94084c
-	  puts ("setlocale de_DE.ISO-8859-1 failed");
94084c
-	  ret = 1;
94084c
-	}
94084c
+      xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
94084c
       ret |= do_one_test (&tests[i], "");
94084c
-      if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
94084c
-	{
94084c
-	  puts ("setlocale de_DE.UTF-8 failed");
94084c
-	  ret = 1;
94084c
-	}
94084c
+      xsetlocale (LC_ALL, "de_DE.UTF-8");
94084c
+      ret |= do_one_test (&tests[i], "UTF-8 ");
94084c
+      ret |= do_mb_tests (&tests[i]);
94084c
+      xsetlocale (LC_ALL, "C.UTF-8");
94084c
       ret |= do_one_test (&tests[i], "UTF-8 ");
94084c
       ret |= do_mb_tests (&tests[i]);
94084c
     }
94084c
 
94084c
   return ret;
94084c
 }
94084c
+
94084c
+#include <support/test-driver.c>
94084c
diff --git a/posix/bug-regex4.c b/posix/bug-regex4.c
94084c
index 8d5ae11567889301..6475833c525176b2 100644
94084c
--- a/posix/bug-regex4.c
94084c
+++ b/posix/bug-regex4.c
94084c
@@ -32,8 +32,33 @@ main (void)
94084c
 
94084c
   memset (&regex, '\0', sizeof (regex));
94084c
 
94084c
+  printf ("INFO: Checking C.\n");
94084c
   setlocale (LC_ALL, "C");
94084c
 
94084c
+  s = re_compile_pattern ("ab[cde]", 7, &regex);
94084c
+  if (s != NULL)
94084c
+    {
94084c
+      puts ("re_compile_pattern returned non-NULL value");
94084c
+      result = 1;
94084c
+    }
94084c
+  else
94084c
+    {
94084c
+      match[0] = re_search_2 (&regex, "xyabez", 6, "", 0, 1, 5, NULL, 6);
94084c
+      match[1] = re_search_2 (&regex, NULL, 0, "abc", 3, 0, 3, NULL, 3);
94084c
+      match[2] = re_search_2 (&regex, "xya", 3, "bd", 2, 2, 3, NULL, 5);
94084c
+      if (match[0] != 2 || match[1] != 0 || match[2] != 2)
94084c
+	{
94084c
+	  printf ("re_search_2 returned %d,%d,%d, expected 2,0,2\n",
94084c
+		  match[0], match[1], match[2]);
94084c
+	  result = 1;
94084c
+	}
94084c
+      else
94084c
+	puts (" -> OK");
94084c
+    }
94084c
+
94084c
+  printf ("INFO: Checking C.UTF-8.\n");
94084c
+  setlocale (LC_ALL, "C.UTF-8");
94084c
+
94084c
   s = re_compile_pattern ("ab[cde]", 7, &regex);
94084c
   if (s != NULL)
94084c
     {
94084c
diff --git a/posix/bug-regex6.c b/posix/bug-regex6.c
94084c
index 2bdf2126a49ee99b..0929b69b83c91e5e 100644
94084c
--- a/posix/bug-regex6.c
94084c
+++ b/posix/bug-regex6.c
94084c
@@ -30,7 +30,7 @@ main (int argc, char *argv[])
94084c
   regex_t re;
94084c
   regmatch_t mat[10];
94084c
   int i, j, ret = 0;
94084c
-  const char *locales[] = { "C", "de_DE.UTF-8" };
94084c
+  const char *locales[] = { "C", "C.UTF-8", "de_DE.UTF-8" };
94084c
   const char *string = "http://www.regex.com/pattern/matching.html#intro";
94084c
   regmatch_t expect[10] = {
94084c
     { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 },
94084c
diff --git a/posix/transbug.c b/posix/transbug.c
94084c
index d0983b4d44d04fd2..b240177cf72326ff 100644
94084c
--- a/posix/transbug.c
94084c
+++ b/posix/transbug.c
94084c
@@ -116,16 +116,32 @@ do_test (void)
94084c
   static const char lower[] = "[[:lower:]]+";
94084c
   static const char upper[] = "[[:upper:]]+";
94084c
   struct re_registers regs[4];
94084c
+  int result = 0;
94084c
 
94084c
+#define CHECK(exp) \
94084c
+  if (exp) { puts (#exp); result = 1; }
94084c
+
94084c
+  printf ("INFO: Checking C.\n");
94084c
   setlocale (LC_ALL, "C");
94084c
 
94084c
   (void) re_set_syntax (RE_SYNTAX_GNU_AWK);
94084c
 
94084c
-  int result;
94084c
-#define CHECK(exp) \
94084c
-  if (exp) { puts (#exp); result = 1; }
94084c
+  result |= run_test (lower, regs);
94084c
+  result |= run_test (upper, &regs[2]);
94084c
+  if (! result)
94084c
+    {
94084c
+      CHECK (regs[0].start[0] != regs[2].start[0]);
94084c
+      CHECK (regs[0].end[0] != regs[2].end[0]);
94084c
+      CHECK (regs[1].start[0] != regs[3].start[0]);
94084c
+      CHECK (regs[1].end[0] != regs[3].end[0]);
94084c
+    }
94084c
+
94084c
+  printf ("INFO: Checking C.UTF-8.\n");
94084c
+  setlocale (LC_ALL, "C.UTF-8");
94084c
+
94084c
+  (void) re_set_syntax (RE_SYNTAX_GNU_AWK);
94084c
 
94084c
-  result = run_test (lower, regs);
94084c
+  result |= run_test (lower, regs);
94084c
   result |= run_test (upper, &regs[2]);
94084c
   if (! result)
94084c
     {
94084c
diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input
94084c
index 67aac5aadafd8aeb..6ff5318032e0afb2 100644
94084c
--- a/posix/tst-fnmatch.input
94084c
+++ b/posix/tst-fnmatch.input
94084c
@@ -472,6 +472,397 @@ C		"\\"			"[Z-\\]]"	       0
94084c
 C		"]"			"[Z-\\]]"	       0
94084c
 C		"-"			"[Z-\\]]"	       NOMATCH
94084c
 
94084c
+# B.6 004(C)
94084c
+C.UTF-8		 "!#%+,-./01234567889"	"!#%+,-./01234567889"  0
94084c
+C.UTF-8		 ":;=@ABCDEFGHIJKLMNO"	":;=@ABCDEFGHIJKLMNO"  0
94084c
+C.UTF-8		 "PQRSTUVWXYZ]abcdefg"	"PQRSTUVWXYZ]abcdefg"  0
94084c
+C.UTF-8		 "hijklmnopqrstuvwxyz"	"hijklmnopqrstuvwxyz"  0
94084c
+C.UTF-8		 "^_{}~"		"^_{}~"		       0
94084c
+
94084c
+# B.6 005(C)
94084c
+C.UTF-8		 "\"$&'()"		"\\\"\\$\\&\\'\\(\\)"  0
94084c
+C.UTF-8		 "*?[\\`|"		"\\*\\?\\[\\\\\\`\\|"  0
94084c
+C.UTF-8		 "<>"			"\\<\\>"	       0
94084c
+
94084c
+# B.6 006(C)
94084c
+C.UTF-8		 "?*["			"[?*[][?*[][?*[]"      0
94084c
+C.UTF-8		 "a/b"			"?/b"		       0
94084c
+
94084c
+# B.6 007(C)
94084c
+C.UTF-8		 "a/b"			"a?b"		       0
94084c
+C.UTF-8		 "a/b"			"a/?"		       0
94084c
+C.UTF-8		 "aa/b"			"?/b"		       NOMATCH
94084c
+C.UTF-8		 "aa/b"			"a?b"		       NOMATCH
94084c
+C.UTF-8		 "a/bb"			"a/?"		       NOMATCH
94084c
+
94084c
+# B.6 009(C)
94084c
+C.UTF-8		 "abc"			"[abc]"		       NOMATCH
94084c
+C.UTF-8		 "x"			"[abc]"		       NOMATCH
94084c
+C.UTF-8		 "a"			"[abc]"		       0
94084c
+C.UTF-8		 "["			"[[abc]"	       0
94084c
+C.UTF-8		 "a"			"[][abc]"	       0
94084c
+C.UTF-8		 "a]"			"[]a]]"		       0
94084c
+
94084c
+# B.6 010(C)
94084c
+C.UTF-8		 "xyz"			"[!abc]"	       NOMATCH
94084c
+C.UTF-8		 "x"			"[!abc]"	       0
94084c
+C.UTF-8		 "a"			"[!abc]"	       NOMATCH
94084c
+
94084c
+# B.6 011(C)
94084c
+C.UTF-8		 "]"			"[][abc]"	       0
94084c
+C.UTF-8		 "abc]"			"[][abc]"	       NOMATCH
94084c
+C.UTF-8		 "[]abc"		"[][]abc"	       NOMATCH
94084c
+C.UTF-8		 "]"			"[!]]"		       NOMATCH
94084c
+C.UTF-8		 "aa]"			"[!]a]"		       NOMATCH
94084c
+C.UTF-8		 "]"			"[!a]"		       0
94084c
+C.UTF-8		 "]]"			"[!a]]"		       0
94084c
+
94084c
+# B.6 012(C)
94084c
+C.UTF-8		 "a"			"[[.a.]]"	       0
94084c
+C.UTF-8		 "-"			"[[.-.]]"	       0
94084c
+C.UTF-8		 "-"			"[[.-.][.].]]"	       0
94084c
+C.UTF-8		 "-"			"[[.].][.-.]]"	       0
94084c
+C.UTF-8		 "-"			"[[.-.][=u=]]"	       0
94084c
+C.UTF-8		 "-"			"[[.-.][:alpha:]]"     0
94084c
+C.UTF-8		 "a"			"[![.a.]]"	       NOMATCH
94084c
+
94084c
+# B.6 013(C)
94084c
+C.UTF-8		 "a"			"[[.b.]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[.b.][.c.]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[.b.][=b=]]"	       NOMATCH
94084c
+
94084c
+
94084c
+# B.6 015(C)
94084c
+C.UTF-8		 "a"			"[[=a=]]"	       0
94084c
+C.UTF-8		 "b"			"[[=a=]b]"	       0
94084c
+C.UTF-8		 "b"			"[[=a=][=b=]]"	       0
94084c
+C.UTF-8		 "a"			"[[=a=][=b=]]"	       0
94084c
+C.UTF-8		 "a"			"[[=a=][.b.]]"	       0
94084c
+C.UTF-8		 "a"			"[[=a=][:digit:]]"     0
94084c
+
94084c
+# B.6 016(C)
94084c
+C.UTF-8		 "="			"[[=a=]b]"	       NOMATCH
94084c
+C.UTF-8		 "]"			"[[=a=]b]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[=b=][=c=]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[=b=][.].]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[=b=][:digit:]]"     NOMATCH
94084c
+
94084c
+# B.6 017(C)
94084c
+C.UTF-8		 "a"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "a"			"[![:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "-"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "a]a"			"[[:alnum:]]a"	       NOMATCH
94084c
+C.UTF-8		 "-"			"[[:alnum:]-]"	       0
94084c
+C.UTF-8		 "aa"			"[[:alnum:]]a"	       0
94084c
+C.UTF-8		 "-"			"[![:alnum:]]"	       0
94084c
+C.UTF-8		 "]"			"[!][:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "["			"[![:alnum:][]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "b"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "c"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "d"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "e"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "f"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "g"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "h"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "i"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "j"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "k"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "l"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "m"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "n"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "o"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "p"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "q"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "r"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "s"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "t"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "u"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "v"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "w"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "x"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "y"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "z"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "A"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "B"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "C"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "D"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "E"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "F"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "G"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "H"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "I"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "J"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "K"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "L"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "M"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "N"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "O"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "P"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "Q"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "R"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "S"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "T"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "U"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "V"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "W"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "X"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "Y"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "Z"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "0"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "1"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "2"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "3"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "4"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "5"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "6"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "7"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "8"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "9"			"[[:alnum:]]"	       0
94084c
+C.UTF-8		 "!"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "#"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "%"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "+"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 ","			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "-"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "."			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "/"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 ":"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 ";"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "="			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "@"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "["			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "\\"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "]"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "^"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "_"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "{"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "}"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "~"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "\""			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "$"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "&"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "'"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "("			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 ")"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "*"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "?"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "`"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "|"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "<"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 ">"			"[[:alnum:]]"	       NOMATCH
94084c
+C.UTF-8		 "\t"			"[[:cntrl:]]"	       0
94084c
+C.UTF-8		 "t"			"[[:cntrl:]]"	       NOMATCH
94084c
+C.UTF-8		 "t"			"[[:lower:]]"	       0
94084c
+C.UTF-8		 "\t"			"[[:lower:]]"	       NOMATCH
94084c
+C.UTF-8		 "T"			"[[:lower:]]"	       NOMATCH
94084c
+C.UTF-8		 "\t"			"[[:space:]]"	       0
94084c
+C.UTF-8		 "t"			"[[:space:]]"	       NOMATCH
94084c
+C.UTF-8		 "t"			"[[:alpha:]]"	       0
94084c
+C.UTF-8		 "\t"			"[[:alpha:]]"	       NOMATCH
94084c
+C.UTF-8		 "0"			"[[:digit:]]"	       0
94084c
+C.UTF-8		 "\t"			"[[:digit:]]"	       NOMATCH
94084c
+C.UTF-8		 "t"			"[[:digit:]]"	       NOMATCH
94084c
+C.UTF-8		 "\t"			"[[:print:]]"	       NOMATCH
94084c
+C.UTF-8		 "t"			"[[:print:]]"	       0
94084c
+C.UTF-8		 "T"			"[[:upper:]]"	       0
94084c
+C.UTF-8		 "\t"			"[[:upper:]]"	       NOMATCH
94084c
+C.UTF-8		 "t"			"[[:upper:]]"	       NOMATCH
94084c
+C.UTF-8		 "\t"			"[[:blank:]]"	       0
94084c
+C.UTF-8		 "t"			"[[:blank:]]"	       NOMATCH
94084c
+C.UTF-8		 "\t"			"[[:graph:]]"	       NOMATCH
94084c
+C.UTF-8		 "t"			"[[:graph:]]"	       0
94084c
+C.UTF-8		 "."			"[[:punct:]]"	       0
94084c
+C.UTF-8		 "t"			"[[:punct:]]"	       NOMATCH
94084c
+C.UTF-8		 "\t"			"[[:punct:]]"	       NOMATCH
94084c
+C.UTF-8		 "0"			"[[:xdigit:]]"	       0
94084c
+C.UTF-8		 "\t"			"[[:xdigit:]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[:xdigit:]]"	       0
94084c
+C.UTF-8		 "A"			"[[:xdigit:]]"	       0
94084c
+C.UTF-8		 "t"			"[[:xdigit:]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[alpha]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[alpha:]]"	       NOMATCH
94084c
+C.UTF-8		 "a]"			"[[alpha]]"	       0
94084c
+C.UTF-8		 "a]"			"[[alpha:]]"	       0
94084c
+C.UTF-8		 "a"			"[[:alpha:][.b.]]"     0
94084c
+C.UTF-8		 "a"			"[[:alpha:][=b=]]"     0
94084c
+C.UTF-8		 "a"			"[[:alpha:][:digit:]]" 0
94084c
+C.UTF-8		 "a"			"[[:digit:][:alpha:]]" 0
94084c
+
94084c
+# B.6 018(C)
94084c
+C.UTF-8		 "a"			"[a-c]"		       0
94084c
+C.UTF-8		 "b"			"[a-c]"		       0
94084c
+C.UTF-8		 "c"			"[a-c]"		       0
94084c
+C.UTF-8		 "a"			"[b-c]"		       NOMATCH
94084c
+C.UTF-8		 "d"			"[b-c]"		       NOMATCH
94084c
+C.UTF-8		 "B"			"[a-c]"		       NOMATCH
94084c
+C.UTF-8		 "b"			"[A-C]"		       NOMATCH
94084c
+C.UTF-8		 ""			"[a-c]"		       NOMATCH
94084c
+C.UTF-8		 "as"			"[a-ca-z]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[.a.]-c]"	       0
94084c
+C.UTF-8		 "a"			"[a-[.c.]]"	       0
94084c
+C.UTF-8		 "a"			"[[.a.]-[.c.]]"	       0
94084c
+C.UTF-8		 "b"			"[[.a.]-c]"	       0
94084c
+C.UTF-8		 "b"			"[a-[.c.]]"	       0
94084c
+C.UTF-8		 "b"			"[[.a.]-[.c.]]"	       0
94084c
+C.UTF-8		 "c"			"[[.a.]-c]"	       0
94084c
+C.UTF-8		 "c"			"[a-[.c.]]"	       0
94084c
+C.UTF-8		 "c"			"[[.a.]-[.c.]]"	       0
94084c
+C.UTF-8		 "d"			"[[.a.]-c]"	       NOMATCH
94084c
+C.UTF-8		 "d"			"[a-[.c.]]"	       NOMATCH
94084c
+C.UTF-8		 "d"			"[[.a.]-[.c.]]"	       NOMATCH
94084c
+
94084c
+# B.6 019(C)
94084c
+C.UTF-8		 "a"			"[c-a]"		       NOMATCH
94084c
+C.UTF-8		 "a"			"[[.c.]-a]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[c-[.a.]]"	       NOMATCH
94084c
+C.UTF-8		 "a"			"[[.c.]-[.a.]]"	       NOMATCH
94084c
+C.UTF-8		 "c"			"[c-a]"		       NOMATCH
94084c
+C.UTF-8		 "c"			"[[.c.]-a]"	       NOMATCH
94084c
+C.UTF-8		 "c"			"[c-[.a.]]"	       NOMATCH
94084c
+C.UTF-8		 "c"			"[[.c.]-[.a.]]"	       NOMATCH
94084c
+
94084c
+# B.6 020(C)
94084c
+C.UTF-8		 "a"			"[a-c0-9]"	       0
94084c
+C.UTF-8		 "d"			"[a-c0-9]"	       NOMATCH
94084c
+C.UTF-8		 "B"			"[a-c0-9]"	       NOMATCH
94084c
+
94084c
+# B.6 021(C)
94084c
+C.UTF-8		 "-"			"[-a]"		       0
94084c
+C.UTF-8		 "a"			"[-b]"		       NOMATCH
94084c
+C.UTF-8		 "-"			"[!-a]"		       NOMATCH
94084c
+C.UTF-8		 "a"			"[!-b]"		       0
94084c
+C.UTF-8		 "-"			"[a-c-0-9]"	       0
94084c
+C.UTF-8		 "b"			"[a-c-0-9]"	       0
94084c
+C.UTF-8		 "a:"			"a[0-9-a]"	       NOMATCH
94084c
+C.UTF-8		 "a:"			"a[09-a]"	       0
94084c
+
94084c
+# B.6 024(C)
94084c
+C.UTF-8		 ""			"*"		       0
94084c
+C.UTF-8		 "asd/sdf"		"*"		       0
94084c
+
94084c
+# B.6 025(C)
94084c
+C.UTF-8		 "as"			"[a-c][a-z]"	       0
94084c
+C.UTF-8		 "as"			"??"		       0
94084c
+
94084c
+# B.6 026(C)
94084c
+C.UTF-8		 "asd/sdf"		"as*df"		       0
94084c
+C.UTF-8		 "asd/sdf"		"as*"		       0
94084c
+C.UTF-8		 "asd/sdf"		"*df"		       0
94084c
+C.UTF-8		 "asd/sdf"		"as*dg"		       NOMATCH
94084c
+C.UTF-8		 "asdf"			"as*df"		       0
94084c
+C.UTF-8		 "asdf"			"as*df?"	       NOMATCH
94084c
+C.UTF-8		 "asdf"			"as*??"		       0
94084c
+C.UTF-8		 "asdf"			"a*???"		       0
94084c
+C.UTF-8		 "asdf"			"*????"		       0
94084c
+C.UTF-8		 "asdf"			"????*"		       0
94084c
+C.UTF-8		 "asdf"			"??*?"		       0
94084c
+
94084c
+# B.6 027(C)
94084c
+C.UTF-8		 "/"			"/"		       0
94084c
+C.UTF-8		 "/"			"/*"		       0
94084c
+C.UTF-8		 "/"			"*/"		       0
94084c
+C.UTF-8		 "/"			"/?"		       NOMATCH
94084c
+C.UTF-8		 "/"			"?/"		       NOMATCH
94084c
+C.UTF-8		 "/"			"?"		       0
94084c
+C.UTF-8		 "."			"?"		       0
94084c
+C.UTF-8		 "/."			"??"		       0
94084c
+C.UTF-8		 "/"			"[!a-c]"	       0
94084c
+C.UTF-8		 "."			"[!a-c]"	       0
94084c
+
94084c
+# B.6 029(C)
94084c
+C.UTF-8		 "/"			"/"		       0       PATHNAME
94084c
+C.UTF-8		 "//"			"//"		       0       PATHNAME
94084c
+C.UTF-8		 "/.a"			"/*"		       0       PATHNAME
94084c
+C.UTF-8		 "/.a"			"/?a"		       0       PATHNAME
94084c
+C.UTF-8		 "/.a"			"/[!a-z]a"	       0       PATHNAME
94084c
+C.UTF-8		 "/.a/.b"		"/*/?b"		       0       PATHNAME
94084c
+
94084c
+# B.6 030(C)
94084c
+C.UTF-8		 "/"			"?"		       NOMATCH PATHNAME
94084c
+C.UTF-8		 "/"			"*"		       NOMATCH PATHNAME
94084c
+C.UTF-8		 "a/b"			"a?b"		       NOMATCH PATHNAME
94084c
+C.UTF-8		 "/.a/.b"		"/*b"		       NOMATCH PATHNAME
94084c
+
94084c
+# B.6 031(C)
94084c
+C.UTF-8		 "/$"			"\\/\\$"	       0
94084c
+C.UTF-8		 "/["			"\\/\\["	       0
94084c
+C.UTF-8		 "/["			"\\/["		       0
94084c
+C.UTF-8		 "/[]"			"\\/\\[]"	       0
94084c
+
94084c
+# B.6 032(C)
94084c
+C.UTF-8		 "/$"			"\\/\\$"	       NOMATCH NOESCAPE
94084c
+C.UTF-8		 "/\\$"			"\\/\\$"	       NOMATCH NOESCAPE
94084c
+C.UTF-8		 "\\/\\$"		"\\/\\$"	       0       NOESCAPE
94084c
+
94084c
+# B.6 033(C)
94084c
+C.UTF-8		 ".asd"			".*"		       0       PERIOD
94084c
+C.UTF-8		 "/.asd"		"*"		       0       PERIOD
94084c
+C.UTF-8		 "/as/.df"		"*/?*f"		       0       PERIOD
94084c
+C.UTF-8		 "..asd"		".[!a-z]*"	       0       PERIOD
94084c
+
94084c
+# B.6 034(C)
94084c
+C.UTF-8		 ".asd"			"*"		       NOMATCH PERIOD
94084c
+C.UTF-8		 ".asd"			"?asd"		       NOMATCH PERIOD
94084c
+C.UTF-8		 ".asd"			"[!a-z]*"	       NOMATCH PERIOD
94084c
+
94084c
+# B.6 035(C)
94084c
+C.UTF-8		 "/."			"/."		       0       PATHNAME|PERIOD
94084c
+C.UTF-8		 "/.a./.b."		"/.*/.*"	       0       PATHNAME|PERIOD
94084c
+C.UTF-8		 "/.a./.b."		"/.??/.??"	       0       PATHNAME|PERIOD
94084c
+
94084c
+# B.6 036(C)
94084c
+C.UTF-8		 "/."			"*"		       NOMATCH PATHNAME|PERIOD
94084c
+C.UTF-8		 "/."			"/*"		       NOMATCH PATHNAME|PERIOD
94084c
+C.UTF-8		 "/."			"/?"		       NOMATCH PATHNAME|PERIOD
94084c
+C.UTF-8		 "/."			"/[!a-z]"	       NOMATCH PATHNAME|PERIOD
94084c
+C.UTF-8		 "/a./.b."		"/*/*"		       NOMATCH PATHNAME|PERIOD
94084c
+C.UTF-8		 "/a./.b."		"/??/???"	       NOMATCH PATHNAME|PERIOD
94084c
+
94084c
+# Some home-grown tests.
94084c
+C.UTF-8		"foobar"		"foo*[abc]z"	       NOMATCH
94084c
+C.UTF-8		"foobaz"		"foo*[abc][xyz]"       0
94084c
+C.UTF-8		"foobaz"		"foo?*[abc][xyz]"      0
94084c
+C.UTF-8		"foobaz"		"foo?*[abc][x/yz]"     0
94084c
+C.UTF-8		"foobaz"		"foo?*[abc]/[xyz]"     NOMATCH PATHNAME
94084c
+C.UTF-8		"a"			"a/"                   NOMATCH PATHNAME
94084c
+C.UTF-8		"a/"			"a"		       NOMATCH PATHNAME
94084c
+C.UTF-8		"//a"			"/a"		       NOMATCH PATHNAME
94084c
+C.UTF-8		"/a"			"//a"		       NOMATCH PATHNAME
94084c
+C.UTF-8		"az"			"[a-]z"		       0
94084c
+C.UTF-8		"bz"			"[ab-]z"	       0
94084c
+C.UTF-8		"cz"			"[ab-]z"	       NOMATCH
94084c
+C.UTF-8		"-z"			"[ab-]z"	       0
94084c
+C.UTF-8		"az"			"[-a]z"		       0
94084c
+C.UTF-8		"bz"			"[-ab]z"	       0
94084c
+C.UTF-8		"cz"			"[-ab]z"	       NOMATCH
94084c
+C.UTF-8		"-z"			"[-ab]z"	       0
94084c
+C.UTF-8		"\\"			"[\\\\-a]"	       0
94084c
+C.UTF-8		"_"			"[\\\\-a]"	       0
94084c
+C.UTF-8		"a"			"[\\\\-a]"	       0
94084c
+C.UTF-8		"-"			"[\\\\-a]"	       NOMATCH
94084c
+C.UTF-8		"\\"			"[\\]-a]"	       NOMATCH
94084c
+C.UTF-8		"_"			"[\\]-a]"	       0
94084c
+C.UTF-8		"a"			"[\\]-a]"	       0
94084c
+C.UTF-8		"]"			"[\\]-a]"	       0
94084c
+C.UTF-8		"-"			"[\\]-a]"	       NOMATCH
94084c
+C.UTF-8		"\\"			"[!\\\\-a]"	       NOMATCH
94084c
+C.UTF-8		"_"			"[!\\\\-a]"	       NOMATCH
94084c
+C.UTF-8		"a"			"[!\\\\-a]"	       NOMATCH
94084c
+C.UTF-8		"-"			"[!\\\\-a]"	       0
94084c
+C.UTF-8		"!"			"[\\!-]"	       0
94084c
+C.UTF-8		"-"			"[\\!-]"	       0
94084c
+C.UTF-8		"\\"			"[\\!-]"	       NOMATCH
94084c
+C.UTF-8		"Z"			"[Z-\\\\]"	       0
94084c
+C.UTF-8		"["			"[Z-\\\\]"	       0
94084c
+C.UTF-8		"\\"			"[Z-\\\\]"	       0
94084c
+C.UTF-8		"-"			"[Z-\\\\]"	       NOMATCH
94084c
+C.UTF-8		"Z"			"[Z-\\]]"	       0
94084c
+C.UTF-8		"["			"[Z-\\]]"	       0
94084c
+C.UTF-8		"\\"			"[Z-\\]]"	       0
94084c
+C.UTF-8		"]"			"[Z-\\]]"	       0
94084c
+C.UTF-8		"-"			"[Z-\\]]"	       NOMATCH
94084c
+
94084c
 # Following are tests outside the scope of IEEE 2003.2 since they are using
94084c
 # locales other than the C locale.  The main focus of the tests is on the
94084c
 # handling of ranges and the recognition of character (vs bytes).
94084c
@@ -677,7 +1068,6 @@ C		 "x/y"			"*"		       0       PATHNAME|LEADING_DIR
94084c
 C		 "x/y/z"		"*"		       0       PATHNAME|LEADING_DIR
94084c
 C		 "x"			"*x"		       0       PATHNAME|LEADING_DIR
94084c
 
94084c
-en_US.UTF-8	 "\366.csv"		"*.csv"                0
94084c
 C		 "x/y"			"*x"		       0       PATHNAME|LEADING_DIR
94084c
 C		 "x/y/z"		"*x"		       0       PATHNAME|LEADING_DIR
94084c
 C		 "x"			"x*"		       0       PATHNAME|LEADING_DIR
94084c
@@ -693,6 +1083,33 @@ C		 "x"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
 C		 "x/y"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
 C		 "x/y/z"		"x?y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
 
94084c
+# Duplicate the "Test of GNU extensions." tests but for C.UTF-8.
94084c
+C.UTF-8		 "x"			"x"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y"			"x"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"x"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x"			"*"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y"			"*"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"*"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x"			"*x"		       0       PATHNAME|LEADING_DIR
94084c
+
94084c
+C.UTF-8		 "x/y"			"*x"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"*x"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x"			"x*"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y"			"x*"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"x*"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x"			"a"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y"			"a"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"a"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x"			"x/y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y"			"x/y"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"x/y"		       0       PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+C.UTF-8		 "x/y/z"		"x?y"		       NOMATCH PATHNAME|LEADING_DIR
94084c
+
94084c
+# Bug 14185
94084c
+en_US.UTF-8	 "\366.csv"		"*.csv"                0
94084c
+
94084c
 # ksh style matching.
94084c
 C		"abcd"			"?@(a|b)*@(c)d"	       0       EXTMATCH
94084c
 C		"/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0     PATHNAME|EXTMATCH
94084c
@@ -822,3 +1239,133 @@ C		""			""		       0
94084c
 C		""			""		       0       EXTMATCH
94084c
 C		""			"*([abc])"	       0       EXTMATCH
94084c
 C		""			"?([abc])"	       0       EXTMATCH
94084c
+
94084c
+# Duplicate the "ksh style matching." for C.UTF-8.
94084c
+C.UTF-8		"abcd"			"?@(a|b)*@(c)d"	       0       EXTMATCH
94084c
+C.UTF-8		"/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0     PATHNAME|EXTMATCH
94084c
+C.UTF-8		"12"			"[1-9]*([0-9])"        0       EXTMATCH
94084c
+C.UTF-8		"12abc"			"[1-9]*([0-9])"        NOMATCH EXTMATCH
94084c
+C.UTF-8		"1"			"[1-9]*([0-9])"	       0       EXTMATCH
94084c
+C.UTF-8		"07"			"+([0-7])"	       0       EXTMATCH
94084c
+C.UTF-8		"0377"			"+([0-7])"	       0       EXTMATCH
94084c
+C.UTF-8		"09"			"+([0-7])"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"paragraph"		"para@(chute|graph)"   0       EXTMATCH
94084c
+C.UTF-8		"paramour"		"para@(chute|graph)"   NOMATCH EXTMATCH
94084c
+C.UTF-8		"para991"		"para?([345]|99)1"     0       EXTMATCH
94084c
+C.UTF-8		"para381"		"para?([345]|99)1"     NOMATCH EXTMATCH
94084c
+C.UTF-8		"paragraph"		"para*([0-9])"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"para"			"para*([0-9])"	       0       EXTMATCH
94084c
+C.UTF-8		"para13829383746592"	"para*([0-9])"	       0       EXTMATCH
94084c
+C.UTF-8		"paragraph"		"para+([0-9])"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"para"			"para+([0-9])"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"para987346523"		"para+([0-9])"	       0       EXTMATCH
94084c
+C.UTF-8		"paragraph"		"para!(*.[0-9])"       0       EXTMATCH
94084c
+C.UTF-8		"para.38"		"para!(*.[0-9])"       0       EXTMATCH
94084c
+C.UTF-8		"para.graph"		"para!(*.[0-9])"       0       EXTMATCH
94084c
+C.UTF-8		"para39"		"para!(*.[0-9])"       0       EXTMATCH
94084c
+C.UTF-8		""			"*(0|1|3|5|7|9)"       0       EXTMATCH
94084c
+C.UTF-8		"137577991"		"*(0|1|3|5|7|9)"       0       EXTMATCH
94084c
+C.UTF-8		"2468"			"*(0|1|3|5|7|9)"       NOMATCH EXTMATCH
94084c
+C.UTF-8		"1358"			"*(0|1|3|5|7|9)"       NOMATCH EXTMATCH
94084c
+C.UTF-8		"file.c"		"*.c?(c)"	       0       EXTMATCH
94084c
+C.UTF-8		"file.C"		"*.c?(c)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"file.cc"		"*.c?(c)"	       0       EXTMATCH
94084c
+C.UTF-8		"file.ccc"		"*.c?(c)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"parse.y"		"!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH
94084c
+C.UTF-8		"shell.c"		"!(*.c|*.h|Makefile.in|config*|README)" NOMATCH EXTMATCH
94084c
+C.UTF-8		"Makefile"		"!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH
94084c
+C.UTF-8		"VMS.FILE;1"		"*\;[1-9]*([0-9])"     0       EXTMATCH
94084c
+C.UTF-8		"VMS.FILE;0"		"*\;[1-9]*([0-9])"     NOMATCH EXTMATCH
94084c
+C.UTF-8		"VMS.FILE;"		"*\;[1-9]*([0-9])"     NOMATCH EXTMATCH
94084c
+C.UTF-8		"VMS.FILE;139"		"*\;[1-9]*([0-9])"     0       EXTMATCH
94084c
+C.UTF-8		"VMS.FILE;1N"		"*\;[1-9]*([0-9])"     NOMATCH EXTMATCH
94084c
+C.UTF-8		"abcfefg"		"ab**(e|f)"	       0       EXTMATCH
94084c
+C.UTF-8		"abcfefg"		"ab**(e|f)g"	       0       EXTMATCH
94084c
+C.UTF-8		"ab"			"ab*+(e|f)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"abef"			"ab***ef"	       0       EXTMATCH
94084c
+C.UTF-8		"abef"			"ab**"		       0       EXTMATCH
94084c
+C.UTF-8		"fofo"			"*(f*(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"ffo"			"*(f*(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"foooofo"		"*(f*(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"foooofof"		"*(f*(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"fooofoofofooo"		"*(f*(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"foooofof"		"*(f+(o))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"xfoooofof"		"*(f*(o))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foooofofx"		"*(f*(o))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"ofxoofxo"		"*(*(of*(o)x)o)"       0       EXTMATCH
94084c
+C.UTF-8		"ofooofoofofooo"	"*(f*(o))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foooxfooxfoxfooox"	"*(f*(o)x)"	       0       EXTMATCH
94084c
+C.UTF-8		"foooxfooxofoxfooox"	"*(f*(o)x)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foooxfooxfxfooox"	"*(f*(o)x)"	       0       EXTMATCH
94084c
+C.UTF-8		"ofxoofxo"		"*(*(of*(o)x)o)"       0       EXTMATCH
94084c
+C.UTF-8		"ofoooxoofxo"		"*(*(of*(o)x)o)"       0       EXTMATCH
94084c
+C.UTF-8		"ofoooxoofxoofoooxoofxo" "*(*(of*(o)x)o)"      0       EXTMATCH
94084c
+C.UTF-8		"ofoooxoofxoofoooxoofxoo" "*(*(of*(o)x)o)"     0       EXTMATCH
94084c
+C.UTF-8		"ofoooxoofxoofoooxoofxofo" "*(*(of*(o)x)o)"    NOMATCH EXTMATCH
94084c
+C.UTF-8		"ofoooxoofxoofoooxoofxooofxofxo" "*(*(of*(o)x)o)" 0    EXTMATCH
94084c
+C.UTF-8		"aac"			"*(@(a))a@(c)"	       0       EXTMATCH
94084c
+C.UTF-8		"ac"			"*(@(a))a@(c)"	       0       EXTMATCH
94084c
+C.UTF-8		"c"			"*(@(a))a@(c)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"aaac"			"*(@(a))a@(c)"	       0       EXTMATCH
94084c
+C.UTF-8		"baaac"			"*(@(a))a@(c)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"abcd"			"?@(a|b)*@(c)d"	       0       EXTMATCH
94084c
+C.UTF-8		"abcd"			"@(ab|a*@(b))*(c)d"    0       EXTMATCH
94084c
+C.UTF-8		"acd"			"@(ab|a*(b))*(c)d"     0       EXTMATCH
94084c
+C.UTF-8		"abbcd"			"@(ab|a*(b))*(c)d"     0       EXTMATCH
94084c
+C.UTF-8		"effgz"			"@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
94084c
+C.UTF-8		"efgz"			"@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
94084c
+C.UTF-8		"egz"			"@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
94084c
+C.UTF-8		"egzefffgzbcdij"	"*(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
94084c
+C.UTF-8		"egz"			"@(b+(c)d|e+(f)g?|?(h)i@(j|k))" NOMATCH EXTMATCH
94084c
+C.UTF-8		"ofoofo"		"*(of+(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"oxfoxoxfox"		"*(oxf+(ox))"	       0       EXTMATCH
94084c
+C.UTF-8		"oxfoxfox"		"*(oxf+(ox))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"ofoofo"		"*(of+(o)|f)"	       0       EXTMATCH
94084c
+C.UTF-8		"foofoofo"		"@(foo|f|fo)*(f|of+(o))" 0     EXTMATCH
94084c
+C.UTF-8		"oofooofo"		"*(of|oof+(o))"	       0       EXTMATCH
94084c
+C.UTF-8		"fffooofoooooffoofffooofff" "*(*(f)*(o))"      0       EXTMATCH
94084c
+C.UTF-8		"fofoofoofofoo"		"*(fo|foo)"	       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"!(x)"		       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"!(x)*"		       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"!(foo)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foo"			"!(foo)*"	       0       EXTMATCH
94084c
+C.UTF-8		"foobar"		"!(foo)"	       0       EXTMATCH
94084c
+C.UTF-8		"foobar"		"!(foo)*"	       0       EXTMATCH
94084c
+C.UTF-8		"moo.cow"		"!(*.*).!(*.*)"	       0       EXTMATCH
94084c
+C.UTF-8		"mad.moo.cow"		"!(*.*).!(*.*)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"mucca.pazza"		"mu!(*(c))?.pa!(*(z))?" NOMATCH EXTMATCH
94084c
+C.UTF-8		"fff"			"!(f)"		       0       EXTMATCH
94084c
+C.UTF-8		"fff"			"*(!(f))"	       0       EXTMATCH
94084c
+C.UTF-8		"fff"			"+(!(f))"	       0       EXTMATCH
94084c
+C.UTF-8		"ooo"			"!(f)"		       0       EXTMATCH
94084c
+C.UTF-8		"ooo"			"*(!(f))"	       0       EXTMATCH
94084c
+C.UTF-8		"ooo"			"+(!(f))"	       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"!(f)"		       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"*(!(f))"	       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"+(!(f))"	       0       EXTMATCH
94084c
+C.UTF-8		"f"			"!(f)"		       NOMATCH EXTMATCH
94084c
+C.UTF-8		"f"			"*(!(f))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"f"			"+(!(f))"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foot"			"@(!(z*)|*x)"	       0       EXTMATCH
94084c
+C.UTF-8		"zoot"			"@(!(z*)|*x)"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foox"			"@(!(z*)|*x)"	       0       EXTMATCH
94084c
+C.UTF-8		"zoox"			"@(!(z*)|*x)"	       0       EXTMATCH
94084c
+C.UTF-8		"foo"			"*(!(foo))"	       0       EXTMATCH
94084c
+C.UTF-8		"foob"			"!(foo)b*"	       NOMATCH EXTMATCH
94084c
+C.UTF-8		"foobb"			"!(foo)b*"	       0       EXTMATCH
94084c
+C.UTF-8		"["			"*([a[])"	       0       EXTMATCH
94084c
+C.UTF-8		"]"			"*([]a[])"	       0       EXTMATCH
94084c
+C.UTF-8		"a"			"*([]a[])"	       0       EXTMATCH
94084c
+C.UTF-8		"b"			"*([!]a[])"	       0       EXTMATCH
94084c
+C.UTF-8		"["			"*([!]a[]|[[])"	       0       EXTMATCH
94084c
+C.UTF-8		"]"			"*([!]a[]|[]])"	       0       EXTMATCH
94084c
+C.UTF-8		"["			"!([!]a[])"	       0       EXTMATCH
94084c
+C.UTF-8		"]"			"!([!]a[])"	       0       EXTMATCH
94084c
+C.UTF-8		")"			"*([)])"	       0       EXTMATCH
94084c
+C.UTF-8		"*"			"*([*(])"	       0       EXTMATCH
94084c
+C.UTF-8		"abcd"			"*!(|a)cd"	       0       EXTMATCH
94084c
+C.UTF-8		"ab/.a"			"+([abc])/*"	       NOMATCH EXTMATCH|PATHNAME|PERIOD
94084c
+C.UTF-8		""			""		       0
94084c
+C.UTF-8		""			""		       0       EXTMATCH
94084c
+C.UTF-8		""			"*([abc])"	       0       EXTMATCH
94084c
+C.UTF-8		""			"?([abc])"	       0       EXTMATCH
94084c
diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c
94084c
index 84195fcd2ec153b8..da3f97799e37c607 100644
94084c
--- a/posix/tst-regcomp-truncated.c
94084c
+++ b/posix/tst-regcomp-truncated.c
94084c
@@ -37,6 +37,7 @@
94084c
 static const char locales[][17] =
94084c
   {
94084c
     "C",
94084c
+    "C.UTF-8",
94084c
     "en_US.UTF-8",
94084c
     "de_DE.ISO-8859-1",
94084c
   };
94084c
diff --git a/posix/tst-regex.c b/posix/tst-regex.c
94084c
index e7c2b05e8666a16e..531128de2a9176fa 100644
94084c
--- a/posix/tst-regex.c
94084c
+++ b/posix/tst-regex.c
94084c
@@ -32,6 +32,7 @@
94084c
 #include <sys/stat.h>
94084c
 #include <sys/types.h>
94084c
 #include <regex.h>
94084c
+#include <support/support.h>
94084c
 
94084c
 
94084c
 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
94084c
@@ -58,7 +59,7 @@ do_test (void)
94084c
   const char *file;
94084c
   int fd;
94084c
   struct stat st;
94084c
-  int result;
94084c
+  int result = 0;
94084c
   char *inmem;
94084c
   char *outmem;
94084c
   size_t inlen;
94084c
@@ -123,7 +124,7 @@ do_test (void)
94084c
 
94084c
   /* Run the actual tests.  All tests are run in a single-byte and a
94084c
      multi-byte locale.  */
94084c
-  result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
94084c
+  result |= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
94084c
   result |= test_expr ("G.ran", 2, 3);
94084c
   result |= test_expr ("G.\\{1\\}ran", 2, 3);
94084c
   result |= test_expr ("G.*ran", 3, 44);
94084c
@@ -143,19 +144,33 @@ do_test (void)
94084c
 static int
94084c
 test_expr (const char *expr, int expected, int expectedicase)
94084c
 {
94084c
-  int result;
94084c
+  int result = 0;
94084c
   char *inmem;
94084c
   char *outmem;
94084c
   size_t inlen;
94084c
   size_t outlen;
94084c
   char *uexpr;
94084c
 
94084c
-  /* First test: search with an UTF-8 locale.  */
94084c
-  if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
94084c
-    error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
94084c
+  /* First test: search with basic C.UTF-8 locale.  */
94084c
+  printf ("INFO: Testing C.UTF-8.\n");
94084c
+  xsetlocale (LC_ALL, "C.UTF-8");
94084c
 
94084c
   printf ("\nTest \"%s\" with multi-byte locale\n", expr);
94084c
-  result = run_test (expr, mem, memlen, 0, expected);
94084c
+  result |= run_test (expr, mem, memlen, 0, expected);
94084c
+  printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
94084c
+  result |= run_test (expr, mem, memlen, 1, expectedicase);
94084c
+  printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
94084c
+  result |= run_test_backwards (expr, mem, memlen, 0, expected);
94084c
+  printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
94084c
+	  expr);
94084c
+  result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
94084c
+
94084c
+  /* Second test: search with an UTF-8 locale.  */
94084c
+  printf ("INFO: Testing de_DE.UTF-8.\n");
94084c
+  xsetlocale (LC_ALL, "de_DE.UTF-8");
94084c
+
94084c
+  printf ("\nTest \"%s\" with multi-byte locale\n", expr);
94084c
+  result |= run_test (expr, mem, memlen, 0, expected);
94084c
   printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
94084c
   result |= run_test (expr, mem, memlen, 1, expectedicase);
94084c
   printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
94084c
@@ -165,8 +180,8 @@ test_expr (const char *expr, int expected, int expectedicase)
94084c
   result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
94084c
 
94084c
   /* Second test: search with an ISO-8859-1 locale.  */
94084c
-  if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
94084c
-    error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
94084c
+  printf ("INFO: Testing de_DE.ISO-8859-1.\n");
94084c
+  xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
94084c
 
94084c
   inmem = (char *) expr;
94084c
   inlen = strlen (expr);