0b26f7
commit f5117c6504888fab5423282a4607c552b90fd3f9
0b26f7
Author: Carlos O'Donell <carlos@redhat.com>
0b26f7
Date:   Thu Jul 29 22:45:39 2021 -0400
0b26f7
0b26f7
    Add 'codepoint_collation' support for LC_COLLATE.
0b26f7
    
0b26f7
    Support a new directive 'codepoint_collation' in the LC_COLLATE
0b26f7
    section of a locale source file. This new directive causes all
0b26f7
    collation rules to be dropped and instead STRCMP (strcmp or
0b26f7
    wcscmp) is used for collation of the input character set. This
0b26f7
    is required to allow for a C.UTF-8 that contains zero collation
0b26f7
    rules (minimal size) and sorts using code point sorting.
0b26f7
    
0b26f7
    To date the only implementation of a locale with zero collation
0b26f7
    rules is the C/POSIX locale. The C/POSIX locale provides
0b26f7
    identity tables for _NL_COLLATE_COLLSEQMB and
0b26f7
    _NL_COLLATE_COLLSEQWC that map to ASCII even though it has zero
0b26f7
    rules. This has lead to existing fnmatch, regexec, and regcomp
0b26f7
    implementations that require these tables. It is not correct
0b26f7
    to use these tables when nrules == 0, but the conservative fix
0b26f7
    is to provide these tables when nrules == 0. This assures that
0b26f7
    existing static applications using a new C.UTF-8 locale with
0b26f7
    'codepoint_collation' at least have functional range expressions
0b26f7
    with ASCII e.g. [0-9] or [a-z]. Such static applications would
0b26f7
    not have the fixes to fnmatch, regexec and regcomp that avoid
0b26f7
    the use of the tables when nrules == 0. Future fixes to fnmatch,
0b26f7
    regexec, and regcomp would allow range expressions to use the
0b26f7
    full set of code points for such ranges.
0b26f7
    
0b26f7
    Tested on x86_64 and i686 without regression.
0b26f7
    
0b26f7
    Reviewed-by: Florian Weimer <fweimer@redhat.com>
0b26f7
0b26f7
diff --git a/locale/C-collate-seq.c b/locale/C-collate-seq.c
0b26f7
new file mode 100644
0b26f7
index 0000000000000000..4fb82cb8357936b6
0b26f7
--- /dev/null
0b26f7
+++ b/locale/C-collate-seq.c
0b26f7
@@ -0,0 +1,100 @@
0b26f7
+/* Copyright (C) 1995-2021 Free Software Foundation, Inc.
0b26f7
+   This file is part of the GNU C Library.
0b26f7
+
0b26f7
+   The GNU C Library is free software; you can redistribute it and/or
0b26f7
+   modify it under the terms of the GNU Lesser General Public
0b26f7
+   License as published by the Free Software Foundation; either
0b26f7
+   version 2.1 of the License, or (at your option) any later version.
0b26f7
+
0b26f7
+   The GNU C Library is distributed in the hope that it will be useful,
0b26f7
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
0b26f7
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0b26f7
+   Lesser General Public License for more details.
0b26f7
+
0b26f7
+   You should have received a copy of the GNU Lesser General Public
0b26f7
+   License along with the GNU C Library; if not, see
0b26f7
+   <https://www.gnu.org/licenses/>.  */
0b26f7
+
0b26f7
+#include <stdint.h>
0b26f7
+
0b26f7
+static const char collseqmb[] =
0b26f7
+{
0b26f7
+  '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
0b26f7
+  '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
0b26f7
+  '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
0b26f7
+  '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
0b26f7
+  '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
0b26f7
+  '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
0b26f7
+  '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
0b26f7
+  '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
0b26f7
+  '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
0b26f7
+  '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
0b26f7
+  '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
0b26f7
+  '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
0b26f7
+  '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
0b26f7
+  '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
0b26f7
+  '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
0b26f7
+  '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
0b26f7
+  '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
0b26f7
+  '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
0b26f7
+  '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
0b26f7
+  '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
0b26f7
+  '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
0b26f7
+  '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
0b26f7
+  '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
0b26f7
+  '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
0b26f7
+  '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
0b26f7
+  '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
0b26f7
+  '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
0b26f7
+  '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
0b26f7
+  '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
0b26f7
+  '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
0b26f7
+  '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
0b26f7
+  '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
0b26f7
+};
0b26f7
+
0b26f7
+/* This table must be 256 bytes in size. We index bytes into the
0b26f7
+   table to find the collation sequence.  */
0b26f7
+_Static_assert (sizeof (collseqmb) == 256);
0b26f7
+
0b26f7
+static const uint32_t collseqwc[] =
0b26f7
+{
0b26f7
+  8, 1, 8, 0x0, 0xff,
0b26f7
+  /* 1st-level table */
0b26f7
+  6 * sizeof (uint32_t),
0b26f7
+  /* 2nd-level table */
0b26f7
+  7 * sizeof (uint32_t),
0b26f7
+  /* 3rd-level table */
0b26f7
+  L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07',
0b26f7
+  L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f',
0b26f7
+  L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17',
0b26f7
+  L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f',
0b26f7
+  L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27',
0b26f7
+  L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f',
0b26f7
+  L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37',
0b26f7
+  L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f',
0b26f7
+  L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47',
0b26f7
+  L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f',
0b26f7
+  L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57',
0b26f7
+  L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f',
0b26f7
+  L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67',
0b26f7
+  L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f',
0b26f7
+  L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77',
0b26f7
+  L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f',
0b26f7
+  L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87',
0b26f7
+  L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f',
0b26f7
+  L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97',
0b26f7
+  L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f',
0b26f7
+  L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7',
0b26f7
+  L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf',
0b26f7
+  L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7',
0b26f7
+  L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf',
0b26f7
+  L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7',
0b26f7
+  L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf',
0b26f7
+  L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7',
0b26f7
+  L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf',
0b26f7
+  L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7',
0b26f7
+  L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef',
0b26f7
+  L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7',
0b26f7
+  L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff'
0b26f7
+};
0b26f7
diff --git a/locale/C-collate.c b/locale/C-collate.c
0b26f7
index 76d9373683314943..120ce0a40aeb9a0f 100644
0b26f7
--- a/locale/C-collate.c
0b26f7
+++ b/locale/C-collate.c
0b26f7
@@ -20,83 +20,7 @@
0b26f7
 #include <stdint.h>
0b26f7
 #include "localeinfo.h"
0b26f7
 
0b26f7
-static const char collseqmb[] =
0b26f7
-{
0b26f7
-  '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
0b26f7
-  '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
0b26f7
-  '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
0b26f7
-  '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
0b26f7
-  '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
0b26f7
-  '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
0b26f7
-  '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
0b26f7
-  '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
0b26f7
-  '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
0b26f7
-  '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
0b26f7
-  '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
0b26f7
-  '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
0b26f7
-  '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
0b26f7
-  '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
0b26f7
-  '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
0b26f7
-  '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
0b26f7
-  '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
0b26f7
-  '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
0b26f7
-  '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
0b26f7
-  '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
0b26f7
-  '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
0b26f7
-  '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
0b26f7
-  '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
0b26f7
-  '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
0b26f7
-  '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
0b26f7
-  '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
0b26f7
-  '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
0b26f7
-  '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
0b26f7
-  '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
0b26f7
-  '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
0b26f7
-  '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
0b26f7
-  '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
0b26f7
-};
0b26f7
-
0b26f7
-static const uint32_t collseqwc[] =
0b26f7
-{
0b26f7
-  8, 1, 8, 0x0, 0xff,
0b26f7
-  /* 1st-level table */
0b26f7
-  6 * sizeof (uint32_t),
0b26f7
-  /* 2nd-level table */
0b26f7
-  7 * sizeof (uint32_t),
0b26f7
-  /* 3rd-level table */
0b26f7
-  L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07',
0b26f7
-  L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f',
0b26f7
-  L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17',
0b26f7
-  L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f',
0b26f7
-  L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27',
0b26f7
-  L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f',
0b26f7
-  L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37',
0b26f7
-  L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f',
0b26f7
-  L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47',
0b26f7
-  L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f',
0b26f7
-  L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57',
0b26f7
-  L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f',
0b26f7
-  L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67',
0b26f7
-  L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f',
0b26f7
-  L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77',
0b26f7
-  L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f',
0b26f7
-  L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87',
0b26f7
-  L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f',
0b26f7
-  L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97',
0b26f7
-  L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f',
0b26f7
-  L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7',
0b26f7
-  L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf',
0b26f7
-  L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7',
0b26f7
-  L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf',
0b26f7
-  L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7',
0b26f7
-  L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf',
0b26f7
-  L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7',
0b26f7
-  L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf',
0b26f7
-  L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7',
0b26f7
-  L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef',
0b26f7
-  L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7',
0b26f7
-  L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff'
0b26f7
-};
0b26f7
+#include "C-collate-seq.c"
0b26f7
 
0b26f7
 const struct __locale_data _nl_C_LC_COLLATE attribute_hidden =
0b26f7
 {
0b26f7
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
0b26f7
index b6406b775d3a81ad..0f314e40c4305dea 100644
0b26f7
--- a/locale/programs/ld-collate.c
0b26f7
+++ b/locale/programs/ld-collate.c
0b26f7
@@ -24,6 +24,7 @@
0b26f7
 #include <wchar.h>
0b26f7
 #include <stdint.h>
0b26f7
 #include <sys/param.h>
0b26f7
+#include <array_length.h>
0b26f7
 
0b26f7
 #include "localedef.h"
0b26f7
 #include "charmap.h"
0b26f7
@@ -195,6 +196,9 @@ struct name_list
0b26f7
 /* The real definition of the struct for the LC_COLLATE locale.  */
0b26f7
 struct locale_collate_t
0b26f7
 {
0b26f7
+  /* Does the locale use code points to compare the encoding?  */
0b26f7
+  bool codepoint_collation;
0b26f7
+
0b26f7
   int col_weight_max;
0b26f7
   int cur_weight_max;
0b26f7
 
0b26f7
@@ -1510,6 +1514,7 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale,
0b26f7
 	  obstack_init (&collate->mempool);
0b26f7
 
0b26f7
 	  collate->col_weight_max = -1;
0b26f7
+	  collate->codepoint_collation = false;
0b26f7
 	}
0b26f7
       else
0b26f7
 	/* Reuse the copy_locale's data structures.  */
0b26f7
@@ -1568,6 +1573,10 @@ collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
0b26f7
       return;
0b26f7
     }
0b26f7
 
0b26f7
+  /* No data required.  */
0b26f7
+  if (collate->codepoint_collation)
0b26f7
+    return;
0b26f7
+
0b26f7
   /* If this assertion is hit change the type in `element_t'.  */
0b26f7
   assert (nrules <= sizeof (runp->used_in_level) * 8);
0b26f7
 
0b26f7
@@ -2092,6 +2101,10 @@ add_to_tablewc (uint32_t ch, struct element_t *runp)
0b26f7
     }
0b26f7
 }
0b26f7
 
0b26f7
+/* Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and
0b26f7
+   _NL_COLLATE_COLLSEQWC.  */
0b26f7
+#include "C-collate-seq.c"
0b26f7
+
0b26f7
 void
0b26f7
 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
0b26f7
 		const char *output_path)
0b26f7
@@ -2115,7 +2128,7 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
0b26f7
   add_locale_uint32 (&file, nrules);
0b26f7
 
0b26f7
   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
0b26f7
-  if (collate == NULL)
0b26f7
+  if (collate == NULL || collate->codepoint_collation)
0b26f7
     {
0b26f7
       size_t idx;
0b26f7
       for (idx = 1; idx < nelems; idx++)
0b26f7
@@ -2123,6 +2136,17 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
0b26f7
 	  /* The words have to be handled specially.  */
0b26f7
 	  if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
0b26f7
 	    add_locale_uint32 (&file, 0);
0b26f7
+	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
0b26f7
+		   && collate != NULL)
0b26f7
+	    /* A valid LC_COLLATE must have a code set name.  */
0b26f7
+	    add_locale_string (&file, charmap->code_set_name);
0b26f7
+	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB)
0b26f7
+		   && collate != NULL)
0b26f7
+	    add_locale_raw_data (&file, collseqmb, sizeof (collseqmb));
0b26f7
+	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)
0b26f7
+		   && collate != NULL)
0b26f7
+	    add_locale_uint32_array (&file, collseqwc,
0b26f7
+				     array_length (collseqwc));
0b26f7
 	  else
0b26f7
 	    add_locale_empty (&file;;
0b26f7
 	}
0b26f7
@@ -2672,6 +2696,10 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
0b26f7
 
0b26f7
       switch (nowtok)
0b26f7
 	{
0b26f7
+	case tok_codepoint_collation:
0b26f7
+	  collate->codepoint_collation = true;
0b26f7
+	  break;
0b26f7
+
0b26f7
 	case tok_copy:
0b26f7
 	  /* Allow copying other locales.  */
0b26f7
 	  now = lr_token (ldfile, charmap, result, NULL, verbose);
0b26f7
@@ -3742,9 +3770,11 @@ error while adding equivalent collating symbol"));
0b26f7
 	  /* Next we assume `LC_COLLATE'.  */
0b26f7
 	  if (!ignore_content)
0b26f7
 	    {
0b26f7
-	      if (state == 0 && copy_locale == NULL)
0b26f7
+	      if (state == 0
0b26f7
+		  && copy_locale == NULL
0b26f7
+		  && !collate->codepoint_collation)
0b26f7
 		/* We must either see a copy statement or have
0b26f7
-		   ordering values.  */
0b26f7
+		   ordering values, or codepoint_collation.  */
0b26f7
 		lr_error (ldfile,
0b26f7
 			  _("%s: empty category description not allowed"),
0b26f7
 			  "LC_COLLATE");
0b26f7
diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
0b26f7
index bcded15ddb4c44bb..2e59eb9ac014134b 100644
0b26f7
--- a/locale/programs/locfile-kw.gperf
0b26f7
+++ b/locale/programs/locfile-kw.gperf
0b26f7
@@ -54,6 +54,7 @@ translit_end,           tok_translit_end,           0
0b26f7
 translit_ignore,        tok_translit_ignore,        0
0b26f7
 default_missing,        tok_default_missing,        0
0b26f7
 LC_COLLATE,             tok_lc_collate,             0
0b26f7
+codepoint_collation,    tok_codepoint_collation,    0
0b26f7
 coll_weight_max,        tok_coll_weight_max,        0
0b26f7
 section-symbol,         tok_section_symbol,         0
0b26f7
 collating-element,      tok_collating_element,      0
0b26f7
diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h
0b26f7
index bc1cb8f0845852ad..fe6335692bd422cd 100644
0b26f7
--- a/locale/programs/locfile-kw.h
0b26f7
+++ b/locale/programs/locfile-kw.h
0b26f7
@@ -54,7 +54,7 @@
0b26f7
 #line 24 "locfile-kw.gperf"
0b26f7
 struct keyword_t ;
0b26f7
 
0b26f7
-#define TOTAL_KEYWORDS 178
0b26f7
+#define TOTAL_KEYWORDS 179
0b26f7
 #define MIN_WORD_LENGTH 3
0b26f7
 #define MAX_WORD_LENGTH 22
0b26f7
 #define MIN_HASH_VALUE 3
0b26f7
@@ -134,92 +134,92 @@ locfile_hash (register const char *str, register size_t len)
0b26f7
 #line 31 "locfile-kw.gperf"
0b26f7
       {"END",                    tok_end,                    0},
0b26f7
       {""}, {""},
0b26f7
-#line 70 "locfile-kw.gperf"
0b26f7
+#line 71 "locfile-kw.gperf"
0b26f7
       {"IGNORE",                 tok_ignore,                 0},
0b26f7
-#line 129 "locfile-kw.gperf"
0b26f7
+#line 130 "locfile-kw.gperf"
0b26f7
       {"LC_TIME",                tok_lc_time,                0},
0b26f7
 #line 30 "locfile-kw.gperf"
0b26f7
       {"LC_CTYPE",               tok_lc_ctype,               0},
0b26f7
       {""},
0b26f7
-#line 168 "locfile-kw.gperf"
0b26f7
+#line 169 "locfile-kw.gperf"
0b26f7
       {"LC_ADDRESS",             tok_lc_address,             0},
0b26f7
-#line 153 "locfile-kw.gperf"
0b26f7
+#line 154 "locfile-kw.gperf"
0b26f7
       {"LC_MESSAGES",            tok_lc_messages,            0},
0b26f7
-#line 161 "locfile-kw.gperf"
0b26f7
+#line 162 "locfile-kw.gperf"
0b26f7
       {"LC_NAME",                tok_lc_name,                0},
0b26f7
-#line 158 "locfile-kw.gperf"
0b26f7
+#line 159 "locfile-kw.gperf"
0b26f7
       {"LC_PAPER",               tok_lc_paper,               0},
0b26f7
-#line 186 "locfile-kw.gperf"
0b26f7
+#line 187 "locfile-kw.gperf"
0b26f7
       {"LC_MEASUREMENT",         tok_lc_measurement,         0},
0b26f7
 #line 56 "locfile-kw.gperf"
0b26f7
       {"LC_COLLATE",             tok_lc_collate,             0},
0b26f7
       {""},
0b26f7
-#line 188 "locfile-kw.gperf"
0b26f7
+#line 189 "locfile-kw.gperf"
0b26f7
       {"LC_IDENTIFICATION",      tok_lc_identification,      0},
0b26f7
-#line 201 "locfile-kw.gperf"
0b26f7
+#line 202 "locfile-kw.gperf"
0b26f7
       {"revision",               tok_revision,               0},
0b26f7
-#line 69 "locfile-kw.gperf"
0b26f7
+#line 70 "locfile-kw.gperf"
0b26f7
       {"UNDEFINED",              tok_undefined,              0},
0b26f7
-#line 125 "locfile-kw.gperf"
0b26f7
+#line 126 "locfile-kw.gperf"
0b26f7
       {"LC_NUMERIC",             tok_lc_numeric,             0},
0b26f7
-#line 82 "locfile-kw.gperf"
0b26f7
+#line 83 "locfile-kw.gperf"
0b26f7
       {"LC_MONETARY",            tok_lc_monetary,            0},
0b26f7
-#line 181 "locfile-kw.gperf"
0b26f7
+#line 182 "locfile-kw.gperf"
0b26f7
       {"LC_TELEPHONE",           tok_lc_telephone,           0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 75 "locfile-kw.gperf"
0b26f7
+#line 76 "locfile-kw.gperf"
0b26f7
       {"define",                 tok_define,                 0},
0b26f7
-#line 154 "locfile-kw.gperf"
0b26f7
+#line 155 "locfile-kw.gperf"
0b26f7
       {"yesexpr",                tok_yesexpr,                0},
0b26f7
-#line 141 "locfile-kw.gperf"
0b26f7
+#line 142 "locfile-kw.gperf"
0b26f7
       {"era_year",               tok_era_year,               0},
0b26f7
       {""},
0b26f7
 #line 54 "locfile-kw.gperf"
0b26f7
       {"translit_ignore",        tok_translit_ignore,        0},
0b26f7
-#line 156 "locfile-kw.gperf"
0b26f7
+#line 157 "locfile-kw.gperf"
0b26f7
       {"yesstr",                 tok_yesstr,                 0},
0b26f7
       {""},
0b26f7
-#line 89 "locfile-kw.gperf"
0b26f7
+#line 90 "locfile-kw.gperf"
0b26f7
       {"negative_sign",          tok_negative_sign,          0},
0b26f7
       {""},
0b26f7
-#line 137 "locfile-kw.gperf"
0b26f7
+#line 138 "locfile-kw.gperf"
0b26f7
       {"t_fmt",                  tok_t_fmt,                  0},
0b26f7
-#line 159 "locfile-kw.gperf"
0b26f7
+#line 160 "locfile-kw.gperf"
0b26f7
       {"height",                 tok_height,                 0},
0b26f7
       {""}, {""},
0b26f7
 #line 52 "locfile-kw.gperf"
0b26f7
       {"translit_start",         tok_translit_start,         0},
0b26f7
-#line 136 "locfile-kw.gperf"
0b26f7
+#line 137 "locfile-kw.gperf"
0b26f7
       {"d_fmt",                  tok_d_fmt,                  0},
0b26f7
       {""},
0b26f7
 #line 53 "locfile-kw.gperf"
0b26f7
       {"translit_end",           tok_translit_end,           0},
0b26f7
-#line 94 "locfile-kw.gperf"
0b26f7
+#line 95 "locfile-kw.gperf"
0b26f7
       {"n_cs_precedes",          tok_n_cs_precedes,          0},
0b26f7
-#line 144 "locfile-kw.gperf"
0b26f7
+#line 145 "locfile-kw.gperf"
0b26f7
       {"era_t_fmt",              tok_era_t_fmt,              0},
0b26f7
 #line 39 "locfile-kw.gperf"
0b26f7
       {"space",                  tok_space,                  0},
0b26f7
-#line 72 "locfile-kw.gperf"
0b26f7
-      {"reorder-end",            tok_reorder_end,            0},
0b26f7
 #line 73 "locfile-kw.gperf"
0b26f7
+      {"reorder-end",            tok_reorder_end,            0},
0b26f7
+#line 74 "locfile-kw.gperf"
0b26f7
       {"reorder-sections-after", tok_reorder_sections_after, 0},
0b26f7
       {""},
0b26f7
-#line 142 "locfile-kw.gperf"
0b26f7
+#line 143 "locfile-kw.gperf"
0b26f7
       {"era_d_fmt",              tok_era_d_fmt,              0},
0b26f7
-#line 189 "locfile-kw.gperf"
0b26f7
+#line 190 "locfile-kw.gperf"
0b26f7
       {"title",                  tok_title,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 149 "locfile-kw.gperf"
0b26f7
+#line 150 "locfile-kw.gperf"
0b26f7
       {"timezone",               tok_timezone,               0},
0b26f7
       {""},
0b26f7
-#line 74 "locfile-kw.gperf"
0b26f7
+#line 75 "locfile-kw.gperf"
0b26f7
       {"reorder-sections-end",   tok_reorder_sections_end,   0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 95 "locfile-kw.gperf"
0b26f7
+#line 96 "locfile-kw.gperf"
0b26f7
       {"n_sep_by_space",         tok_n_sep_by_space,         0},
0b26f7
       {""}, {""},
0b26f7
-#line 100 "locfile-kw.gperf"
0b26f7
+#line 101 "locfile-kw.gperf"
0b26f7
       {"int_n_cs_precedes",      tok_int_n_cs_precedes,      0},
0b26f7
       {""}, {""}, {""},
0b26f7
 #line 26 "locfile-kw.gperf"
0b26f7
@@ -233,147 +233,147 @@ locfile_hash (register const char *str, register size_t len)
0b26f7
       {"print",                  tok_print,                  0},
0b26f7
 #line 44 "locfile-kw.gperf"
0b26f7
       {"xdigit",                 tok_xdigit,                 0},
0b26f7
-#line 110 "locfile-kw.gperf"
0b26f7
+#line 111 "locfile-kw.gperf"
0b26f7
       {"duo_n_cs_precedes",      tok_duo_n_cs_precedes,      0},
0b26f7
-#line 127 "locfile-kw.gperf"
0b26f7
+#line 128 "locfile-kw.gperf"
0b26f7
       {"thousands_sep",          tok_thousands_sep,          0},
0b26f7
-#line 197 "locfile-kw.gperf"
0b26f7
+#line 198 "locfile-kw.gperf"
0b26f7
       {"territory",              tok_territory,              0},
0b26f7
 #line 36 "locfile-kw.gperf"
0b26f7
       {"digit",                  tok_digit,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 92 "locfile-kw.gperf"
0b26f7
+#line 93 "locfile-kw.gperf"
0b26f7
       {"p_cs_precedes",          tok_p_cs_precedes,          0},
0b26f7
       {""}, {""},
0b26f7
-#line 62 "locfile-kw.gperf"
0b26f7
+#line 63 "locfile-kw.gperf"
0b26f7
       {"script",                 tok_script,                 0},
0b26f7
 #line 29 "locfile-kw.gperf"
0b26f7
       {"include",                tok_include,                0},
0b26f7
       {""},
0b26f7
-#line 78 "locfile-kw.gperf"
0b26f7
+#line 79 "locfile-kw.gperf"
0b26f7
       {"else",                   tok_else,                   0},
0b26f7
-#line 184 "locfile-kw.gperf"
0b26f7
+#line 185 "locfile-kw.gperf"
0b26f7
       {"int_select",             tok_int_select,             0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 132 "locfile-kw.gperf"
0b26f7
+#line 133 "locfile-kw.gperf"
0b26f7
       {"week",                   tok_week,                   0},
0b26f7
 #line 33 "locfile-kw.gperf"
0b26f7
       {"upper",                  tok_upper,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 194 "locfile-kw.gperf"
0b26f7
+#line 195 "locfile-kw.gperf"
0b26f7
       {"tel",                    tok_tel,                    0},
0b26f7
-#line 93 "locfile-kw.gperf"
0b26f7
+#line 94 "locfile-kw.gperf"
0b26f7
       {"p_sep_by_space",         tok_p_sep_by_space,         0},
0b26f7
-#line 160 "locfile-kw.gperf"
0b26f7
+#line 161 "locfile-kw.gperf"
0b26f7
       {"width",                  tok_width,                  0},
0b26f7
       {""},
0b26f7
-#line 98 "locfile-kw.gperf"
0b26f7
+#line 99 "locfile-kw.gperf"
0b26f7
       {"int_p_cs_precedes",      tok_int_p_cs_precedes,      0},
0b26f7
       {""}, {""},
0b26f7
 #line 41 "locfile-kw.gperf"
0b26f7
       {"punct",                  tok_punct,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 101 "locfile-kw.gperf"
0b26f7
+#line 102 "locfile-kw.gperf"
0b26f7
       {"int_n_sep_by_space",     tok_int_n_sep_by_space,     0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 108 "locfile-kw.gperf"
0b26f7
+#line 109 "locfile-kw.gperf"
0b26f7
       {"duo_p_cs_precedes",      tok_duo_p_cs_precedes,      0},
0b26f7
 #line 48 "locfile-kw.gperf"
0b26f7
       {"charconv",               tok_charconv,               0},
0b26f7
       {""},
0b26f7
 #line 47 "locfile-kw.gperf"
0b26f7
       {"class",                  tok_class,                  0},
0b26f7
-#line 114 "locfile-kw.gperf"
0b26f7
-      {"duo_int_n_cs_precedes",  tok_duo_int_n_cs_precedes,  0},
0b26f7
 #line 115 "locfile-kw.gperf"
0b26f7
+      {"duo_int_n_cs_precedes",  tok_duo_int_n_cs_precedes,  0},
0b26f7
+#line 116 "locfile-kw.gperf"
0b26f7
       {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0},
0b26f7
-#line 111 "locfile-kw.gperf"
0b26f7
+#line 112 "locfile-kw.gperf"
0b26f7
       {"duo_n_sep_by_space",     tok_duo_n_sep_by_space,     0},
0b26f7
-#line 119 "locfile-kw.gperf"
0b26f7
+#line 120 "locfile-kw.gperf"
0b26f7
       {"duo_int_n_sign_posn",    tok_duo_int_n_sign_posn,    0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 58 "locfile-kw.gperf"
0b26f7
+#line 59 "locfile-kw.gperf"
0b26f7
       {"section-symbol",         tok_section_symbol,         0},
0b26f7
-#line 185 "locfile-kw.gperf"
0b26f7
+#line 186 "locfile-kw.gperf"
0b26f7
       {"int_prefix",             tok_int_prefix,             0},
0b26f7
       {""}, {""}, {""}, {""},
0b26f7
 #line 42 "locfile-kw.gperf"
0b26f7
       {"graph",                  tok_graph,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 99 "locfile-kw.gperf"
0b26f7
+#line 100 "locfile-kw.gperf"
0b26f7
       {"int_p_sep_by_space",     tok_int_p_sep_by_space,     0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 112 "locfile-kw.gperf"
0b26f7
-      {"duo_int_p_cs_precedes",  tok_duo_int_p_cs_precedes,  0},
0b26f7
 #line 113 "locfile-kw.gperf"
0b26f7
+      {"duo_int_p_cs_precedes",  tok_duo_int_p_cs_precedes,  0},
0b26f7
+#line 114 "locfile-kw.gperf"
0b26f7
       {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0},
0b26f7
-#line 109 "locfile-kw.gperf"
0b26f7
+#line 110 "locfile-kw.gperf"
0b26f7
       {"duo_p_sep_by_space",     tok_duo_p_sep_by_space,     0},
0b26f7
-#line 118 "locfile-kw.gperf"
0b26f7
+#line 119 "locfile-kw.gperf"
0b26f7
       {"duo_int_p_sign_posn",    tok_duo_int_p_sign_posn,    0},
0b26f7
-#line 157 "locfile-kw.gperf"
0b26f7
+#line 158 "locfile-kw.gperf"
0b26f7
       {"nostr",                  tok_nostr,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 140 "locfile-kw.gperf"
0b26f7
+#line 141 "locfile-kw.gperf"
0b26f7
       {"era",                    tok_era,                    0},
0b26f7
       {""},
0b26f7
-#line 84 "locfile-kw.gperf"
0b26f7
+#line 85 "locfile-kw.gperf"
0b26f7
       {"currency_symbol",        tok_currency_symbol,        0},
0b26f7
       {""},
0b26f7
-#line 167 "locfile-kw.gperf"
0b26f7
+#line 168 "locfile-kw.gperf"
0b26f7
       {"name_ms",                tok_name_ms,                0},
0b26f7
-#line 165 "locfile-kw.gperf"
0b26f7
-      {"name_mrs",               tok_name_mrs,               0},
0b26f7
 #line 166 "locfile-kw.gperf"
0b26f7
+      {"name_mrs",               tok_name_mrs,               0},
0b26f7
+#line 167 "locfile-kw.gperf"
0b26f7
       {"name_miss",              tok_name_miss,              0},
0b26f7
-#line 83 "locfile-kw.gperf"
0b26f7
+#line 84 "locfile-kw.gperf"
0b26f7
       {"int_curr_symbol",        tok_int_curr_symbol,        0},
0b26f7
-#line 190 "locfile-kw.gperf"
0b26f7
+#line 191 "locfile-kw.gperf"
0b26f7
       {"source",                 tok_source,                 0},
0b26f7
-#line 164 "locfile-kw.gperf"
0b26f7
+#line 165 "locfile-kw.gperf"
0b26f7
       {"name_mr",                tok_name_mr,                0},
0b26f7
-#line 163 "locfile-kw.gperf"
0b26f7
+#line 164 "locfile-kw.gperf"
0b26f7
       {"name_gen",               tok_name_gen,               0},
0b26f7
-#line 202 "locfile-kw.gperf"
0b26f7
+#line 203 "locfile-kw.gperf"
0b26f7
       {"date",                   tok_date,                   0},
0b26f7
       {""}, {""},
0b26f7
-#line 191 "locfile-kw.gperf"
0b26f7
+#line 192 "locfile-kw.gperf"
0b26f7
       {"address",                tok_address,                0},
0b26f7
-#line 162 "locfile-kw.gperf"
0b26f7
+#line 163 "locfile-kw.gperf"
0b26f7
       {"name_fmt",               tok_name_fmt,               0},
0b26f7
 #line 32 "locfile-kw.gperf"
0b26f7
       {"copy",                   tok_copy,                   0},
0b26f7
-#line 103 "locfile-kw.gperf"
0b26f7
+#line 104 "locfile-kw.gperf"
0b26f7
       {"int_n_sign_posn",        tok_int_n_sign_posn,        0},
0b26f7
       {""}, {""},
0b26f7
-#line 131 "locfile-kw.gperf"
0b26f7
+#line 132 "locfile-kw.gperf"
0b26f7
       {"day",                    tok_day,                    0},
0b26f7
-#line 105 "locfile-kw.gperf"
0b26f7
+#line 106 "locfile-kw.gperf"
0b26f7
       {"duo_currency_symbol",    tok_duo_currency_symbol,    0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 150 "locfile-kw.gperf"
0b26f7
+#line 151 "locfile-kw.gperf"
0b26f7
       {"date_fmt",               tok_date_fmt,               0},
0b26f7
-#line 64 "locfile-kw.gperf"
0b26f7
+#line 65 "locfile-kw.gperf"
0b26f7
       {"order_end",              tok_order_end,              0},
0b26f7
-#line 117 "locfile-kw.gperf"
0b26f7
+#line 118 "locfile-kw.gperf"
0b26f7
       {"duo_n_sign_posn",        tok_duo_n_sign_posn,        0},
0b26f7
       {""},
0b26f7
-#line 170 "locfile-kw.gperf"
0b26f7
+#line 171 "locfile-kw.gperf"
0b26f7
       {"country_name",           tok_country_name,           0},
0b26f7
-#line 71 "locfile-kw.gperf"
0b26f7
+#line 72 "locfile-kw.gperf"
0b26f7
       {"reorder-after",          tok_reorder_after,          0},
0b26f7
       {""}, {""},
0b26f7
-#line 155 "locfile-kw.gperf"
0b26f7
+#line 156 "locfile-kw.gperf"
0b26f7
       {"noexpr",                 tok_noexpr,                 0},
0b26f7
 #line 50 "locfile-kw.gperf"
0b26f7
       {"tolower",                tok_tolower,                0},
0b26f7
-#line 198 "locfile-kw.gperf"
0b26f7
+#line 199 "locfile-kw.gperf"
0b26f7
       {"audience",               tok_audience,               0},
0b26f7
       {""}, {""}, {""},
0b26f7
 #line 49 "locfile-kw.gperf"
0b26f7
       {"toupper",                tok_toupper,                0},
0b26f7
-#line 68 "locfile-kw.gperf"
0b26f7
+#line 69 "locfile-kw.gperf"
0b26f7
       {"position",               tok_position,               0},
0b26f7
       {""},
0b26f7
 #line 40 "locfile-kw.gperf"
0b26f7
@@ -381,196 +381,197 @@ locfile_hash (register const char *str, register size_t len)
0b26f7
       {""},
0b26f7
 #line 27 "locfile-kw.gperf"
0b26f7
       {"comment_char",           tok_comment_char,           0},
0b26f7
-#line 88 "locfile-kw.gperf"
0b26f7
+#line 89 "locfile-kw.gperf"
0b26f7
       {"positive_sign",          tok_positive_sign,          0},
0b26f7
       {""}, {""}, {""}, {""},
0b26f7
-#line 61 "locfile-kw.gperf"
0b26f7
+#line 62 "locfile-kw.gperf"
0b26f7
       {"symbol-equivalence",     tok_symbol_equivalence,     0},
0b26f7
       {""},
0b26f7
-#line 102 "locfile-kw.gperf"
0b26f7
+#line 103 "locfile-kw.gperf"
0b26f7
       {"int_p_sign_posn",        tok_int_p_sign_posn,        0},
0b26f7
-#line 175 "locfile-kw.gperf"
0b26f7
+#line 176 "locfile-kw.gperf"
0b26f7
       {"country_car",            tok_country_car,            0},
0b26f7
       {""}, {""},
0b26f7
-#line 104 "locfile-kw.gperf"
0b26f7
+#line 105 "locfile-kw.gperf"
0b26f7
       {"duo_int_curr_symbol",    tok_duo_int_curr_symbol,    0},
0b26f7
       {""}, {""},
0b26f7
-#line 135 "locfile-kw.gperf"
0b26f7
+#line 136 "locfile-kw.gperf"
0b26f7
       {"d_t_fmt",                tok_d_t_fmt,                0},
0b26f7
       {""}, {""},
0b26f7
-#line 116 "locfile-kw.gperf"
0b26f7
+#line 117 "locfile-kw.gperf"
0b26f7
       {"duo_p_sign_posn",        tok_duo_p_sign_posn,        0},
0b26f7
-#line 187 "locfile-kw.gperf"
0b26f7
+#line 188 "locfile-kw.gperf"
0b26f7
       {"measurement",            tok_measurement,            0},
0b26f7
-#line 176 "locfile-kw.gperf"
0b26f7
+#line 177 "locfile-kw.gperf"
0b26f7
       {"country_isbn",           tok_country_isbn,           0},
0b26f7
 #line 37 "locfile-kw.gperf"
0b26f7
       {"outdigit",               tok_outdigit,               0},
0b26f7
       {""}, {""},
0b26f7
-#line 143 "locfile-kw.gperf"
0b26f7
+#line 144 "locfile-kw.gperf"
0b26f7
       {"era_d_t_fmt",            tok_era_d_t_fmt,            0},
0b26f7
       {""}, {""}, {""},
0b26f7
 #line 34 "locfile-kw.gperf"
0b26f7
       {"lower",                  tok_lower,                  0},
0b26f7
-#line 183 "locfile-kw.gperf"
0b26f7
+#line 184 "locfile-kw.gperf"
0b26f7
       {"tel_dom_fmt",            tok_tel_dom_fmt,            0},
0b26f7
-#line 171 "locfile-kw.gperf"
0b26f7
+#line 172 "locfile-kw.gperf"
0b26f7
       {"country_post",           tok_country_post,           0},
0b26f7
-#line 148 "locfile-kw.gperf"
0b26f7
+#line 149 "locfile-kw.gperf"
0b26f7
       {"cal_direction",          tok_cal_direction,          0},
0b26f7
-      {""},
0b26f7
-#line 139 "locfile-kw.gperf"
0b26f7
+#line 57 "locfile-kw.gperf"
0b26f7
+      {"codepoint_collation",    tok_codepoint_collation,    0},
0b26f7
+#line 140 "locfile-kw.gperf"
0b26f7
       {"t_fmt_ampm",             tok_t_fmt_ampm,             0},
0b26f7
-#line 91 "locfile-kw.gperf"
0b26f7
+#line 92 "locfile-kw.gperf"
0b26f7
       {"frac_digits",            tok_frac_digits,            0},
0b26f7
       {""}, {""},
0b26f7
-#line 177 "locfile-kw.gperf"
0b26f7
+#line 178 "locfile-kw.gperf"
0b26f7
       {"lang_name",              tok_lang_name,              0},
0b26f7
-#line 90 "locfile-kw.gperf"
0b26f7
+#line 91 "locfile-kw.gperf"
0b26f7
       {"int_frac_digits",        tok_int_frac_digits,        0},
0b26f7
       {""},
0b26f7
-#line 121 "locfile-kw.gperf"
0b26f7
+#line 122 "locfile-kw.gperf"
0b26f7
       {"uno_valid_to",           tok_uno_valid_to,           0},
0b26f7
-#line 126 "locfile-kw.gperf"
0b26f7
+#line 127 "locfile-kw.gperf"
0b26f7
       {"decimal_point",          tok_decimal_point,          0},
0b26f7
       {""},
0b26f7
-#line 133 "locfile-kw.gperf"
0b26f7
+#line 134 "locfile-kw.gperf"
0b26f7
       {"abmon",                  tok_abmon,                  0},
0b26f7
       {""}, {""}, {""}, {""},
0b26f7
-#line 107 "locfile-kw.gperf"
0b26f7
+#line 108 "locfile-kw.gperf"
0b26f7
       {"duo_frac_digits",        tok_duo_frac_digits,        0},
0b26f7
-#line 182 "locfile-kw.gperf"
0b26f7
+#line 183 "locfile-kw.gperf"
0b26f7
       {"tel_int_fmt",            tok_tel_int_fmt,            0},
0b26f7
-#line 123 "locfile-kw.gperf"
0b26f7
+#line 124 "locfile-kw.gperf"
0b26f7
       {"duo_valid_to",           tok_duo_valid_to,           0},
0b26f7
-#line 146 "locfile-kw.gperf"
0b26f7
+#line 147 "locfile-kw.gperf"
0b26f7
       {"first_weekday",          tok_first_weekday,          0},
0b26f7
       {""},
0b26f7
-#line 130 "locfile-kw.gperf"
0b26f7
+#line 131 "locfile-kw.gperf"
0b26f7
       {"abday",                  tok_abday,                  0},
0b26f7
       {""},
0b26f7
-#line 200 "locfile-kw.gperf"
0b26f7
+#line 201 "locfile-kw.gperf"
0b26f7
       {"abbreviation",           tok_abbreviation,           0},
0b26f7
-#line 147 "locfile-kw.gperf"
0b26f7
+#line 148 "locfile-kw.gperf"
0b26f7
       {"first_workday",          tok_first_workday,          0},
0b26f7
       {""}, {""},
0b26f7
-#line 97 "locfile-kw.gperf"
0b26f7
+#line 98 "locfile-kw.gperf"
0b26f7
       {"n_sign_posn",            tok_n_sign_posn,            0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 145 "locfile-kw.gperf"
0b26f7
+#line 146 "locfile-kw.gperf"
0b26f7
       {"alt_digits",             tok_alt_digits,             0},
0b26f7
       {""}, {""},
0b26f7
-#line 128 "locfile-kw.gperf"
0b26f7
+#line 129 "locfile-kw.gperf"
0b26f7
       {"grouping",               tok_grouping,               0},
0b26f7
       {""},
0b26f7
 #line 45 "locfile-kw.gperf"
0b26f7
       {"blank",                  tok_blank,                  0},
0b26f7
       {""}, {""},
0b26f7
-#line 196 "locfile-kw.gperf"
0b26f7
+#line 197 "locfile-kw.gperf"
0b26f7
       {"language",               tok_language,               0},
0b26f7
-#line 120 "locfile-kw.gperf"
0b26f7
+#line 121 "locfile-kw.gperf"
0b26f7
       {"uno_valid_from",         tok_uno_valid_from,         0},
0b26f7
       {""},
0b26f7
-#line 199 "locfile-kw.gperf"
0b26f7
+#line 200 "locfile-kw.gperf"
0b26f7
       {"application",            tok_application,            0},
0b26f7
       {""},
0b26f7
-#line 80 "locfile-kw.gperf"
0b26f7
+#line 81 "locfile-kw.gperf"
0b26f7
       {"elifndef",               tok_elifndef,               0},
0b26f7
       {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 122 "locfile-kw.gperf"
0b26f7
+#line 123 "locfile-kw.gperf"
0b26f7
       {"duo_valid_from",         tok_duo_valid_from,         0},
0b26f7
-#line 57 "locfile-kw.gperf"
0b26f7
+#line 58 "locfile-kw.gperf"
0b26f7
       {"coll_weight_max",        tok_coll_weight_max,        0},
0b26f7
       {""},
0b26f7
-#line 79 "locfile-kw.gperf"
0b26f7
+#line 80 "locfile-kw.gperf"
0b26f7
       {"elifdef",                tok_elifdef,                0},
0b26f7
-#line 67 "locfile-kw.gperf"
0b26f7
+#line 68 "locfile-kw.gperf"
0b26f7
       {"backward",               tok_backward,               0},
0b26f7
-#line 106 "locfile-kw.gperf"
0b26f7
+#line 107 "locfile-kw.gperf"
0b26f7
       {"duo_int_frac_digits",    tok_duo_int_frac_digits,    0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 96 "locfile-kw.gperf"
0b26f7
+#line 97 "locfile-kw.gperf"
0b26f7
       {"p_sign_posn",            tok_p_sign_posn,            0},
0b26f7
       {""},
0b26f7
-#line 203 "locfile-kw.gperf"
0b26f7
+#line 204 "locfile-kw.gperf"
0b26f7
       {"category",               tok_category,               0},
0b26f7
       {""}, {""}, {""}, {""},
0b26f7
-#line 134 "locfile-kw.gperf"
0b26f7
+#line 135 "locfile-kw.gperf"
0b26f7
       {"mon",                    tok_mon,                    0},
0b26f7
       {""},
0b26f7
-#line 124 "locfile-kw.gperf"
0b26f7
+#line 125 "locfile-kw.gperf"
0b26f7
       {"conversion_rate",        tok_conversion_rate,        0},
0b26f7
       {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 63 "locfile-kw.gperf"
0b26f7
+#line 64 "locfile-kw.gperf"
0b26f7
       {"order_start",            tok_order_start,            0},
0b26f7
       {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 178 "locfile-kw.gperf"
0b26f7
+#line 179 "locfile-kw.gperf"
0b26f7
       {"lang_ab",                tok_lang_ab,                0},
0b26f7
-#line 180 "locfile-kw.gperf"
0b26f7
+#line 181 "locfile-kw.gperf"
0b26f7
       {"lang_lib",               tok_lang_lib,               0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 192 "locfile-kw.gperf"
0b26f7
+#line 193 "locfile-kw.gperf"
0b26f7
       {"contact",                tok_contact,                0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 173 "locfile-kw.gperf"
0b26f7
+#line 174 "locfile-kw.gperf"
0b26f7
       {"country_ab3",            tok_country_ab3,            0},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 193 "locfile-kw.gperf"
0b26f7
+#line 194 "locfile-kw.gperf"
0b26f7
       {"email",                  tok_email,                  0},
0b26f7
-#line 172 "locfile-kw.gperf"
0b26f7
+#line 173 "locfile-kw.gperf"
0b26f7
       {"country_ab2",            tok_country_ab2,            0},
0b26f7
       {""}, {""}, {""},
0b26f7
 #line 55 "locfile-kw.gperf"
0b26f7
       {"default_missing",        tok_default_missing,        0},
0b26f7
       {""}, {""},
0b26f7
-#line 195 "locfile-kw.gperf"
0b26f7
+#line 196 "locfile-kw.gperf"
0b26f7
       {"fax",                    tok_fax,                    0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 174 "locfile-kw.gperf"
0b26f7
+#line 175 "locfile-kw.gperf"
0b26f7
       {"country_num",            tok_country_num,            0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
 #line 51 "locfile-kw.gperf"
0b26f7
       {"map",                    tok_map,                    0},
0b26f7
-#line 65 "locfile-kw.gperf"
0b26f7
+#line 66 "locfile-kw.gperf"
0b26f7
       {"from",                   tok_from,                   0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 86 "locfile-kw.gperf"
0b26f7
+#line 87 "locfile-kw.gperf"
0b26f7
       {"mon_thousands_sep",      tok_mon_thousands_sep,      0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""},
0b26f7
-#line 81 "locfile-kw.gperf"
0b26f7
+#line 82 "locfile-kw.gperf"
0b26f7
       {"endif",                  tok_endif,                  0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 151 "locfile-kw.gperf"
0b26f7
+#line 152 "locfile-kw.gperf"
0b26f7
       {"alt_mon",                tok_alt_mon,                0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 76 "locfile-kw.gperf"
0b26f7
+#line 77 "locfile-kw.gperf"
0b26f7
       {"undef",                  tok_undef,                  0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 59 "locfile-kw.gperf"
0b26f7
+#line 60 "locfile-kw.gperf"
0b26f7
       {"collating-element",      tok_collating_element,      0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 152 "locfile-kw.gperf"
0b26f7
+#line 153 "locfile-kw.gperf"
0b26f7
       {"ab_alt_mon",             tok_ab_alt_mon,             0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 66 "locfile-kw.gperf"
0b26f7
+#line 67 "locfile-kw.gperf"
0b26f7
       {"forward",                tok_forward,                0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 85 "locfile-kw.gperf"
0b26f7
+#line 86 "locfile-kw.gperf"
0b26f7
       {"mon_decimal_point",      tok_mon_decimal_point,      0},
0b26f7
       {""}, {""},
0b26f7
-#line 169 "locfile-kw.gperf"
0b26f7
+#line 170 "locfile-kw.gperf"
0b26f7
       {"postal_fmt",             tok_postal_fmt,             0},
0b26f7
       {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 60 "locfile-kw.gperf"
0b26f7
+#line 61 "locfile-kw.gperf"
0b26f7
       {"collating-symbol",       tok_collating_symbol,       0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
@@ -583,15 +584,15 @@ locfile_hash (register const char *str, register size_t len)
0b26f7
 #line 38 "locfile-kw.gperf"
0b26f7
       {"alnum",                  tok_alnum,                  0},
0b26f7
       {""},
0b26f7
-#line 87 "locfile-kw.gperf"
0b26f7
+#line 88 "locfile-kw.gperf"
0b26f7
       {"mon_grouping",           tok_mon_grouping,           0},
0b26f7
       {""},
0b26f7
-#line 179 "locfile-kw.gperf"
0b26f7
+#line 180 "locfile-kw.gperf"
0b26f7
       {"lang_term",              tok_lang_term,              0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
-#line 77 "locfile-kw.gperf"
0b26f7
+#line 78 "locfile-kw.gperf"
0b26f7
       {"ifdef",                  tok_ifdef,                  0},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
@@ -599,7 +600,7 @@ locfile_hash (register const char *str, register size_t len)
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
0b26f7
       {""}, {""}, {""}, {""},
0b26f7
-#line 138 "locfile-kw.gperf"
0b26f7
+#line 139 "locfile-kw.gperf"
0b26f7
       {"am_pm",                  tok_am_pm,                  0}
0b26f7
     };
0b26f7
 
0b26f7
diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
0b26f7
index 414ad3076223e971..f57d594e8d25c06f 100644
0b26f7
--- a/locale/programs/locfile-token.h
0b26f7
+++ b/locale/programs/locfile-token.h
0b26f7
@@ -91,6 +91,7 @@ enum token_t
0b26f7
   tok_translit_ignore,
0b26f7
   tok_default_missing,
0b26f7
   tok_lc_collate,
0b26f7
+  tok_codepoint_collation,
0b26f7
   tok_coll_weight_max,
0b26f7
   tok_section_symbol,
0b26f7
   tok_collating_element,