diff --git a/SOURCES/glibc-rh1704868-1.patch b/SOURCES/glibc-rh1704868-1.patch new file mode 100644 index 0000000..324aaf1 --- /dev/null +++ b/SOURCES/glibc-rh1704868-1.patch @@ -0,0 +1,1365 @@ +commit 91927b7c76437db860cd86a7714476b56bb39d07 +Author: Arjun Shankar +Date: Tue Jul 7 20:31:48 2020 +0200 + + Rewrite iconv option parsing [BZ #19519] + + This commit replaces string manipulation during `iconv_open' and iconv_prog + option parsing with a structured, flag based conversion specification. In + doing so, it alters the internal `__gconv_open' interface and accordingly + adjusts its uses. + + This change fixes several hangs in the iconv program and therefore includes + a new test to exercise iconv_prog options that originally led to these hangs. + It also includes a new regression test for option handling in the iconv + function. + + Reviewed-by: Florian Weimer + Reviewed-by: Siddhesh Poyarekar + Reviewed-by: Carlos O'Donell + +diff --git a/iconv/Makefile b/iconv/Makefile +index d71319b39e772fde..d09b8ac842731780 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -26,7 +26,7 @@ headers = iconv.h gconv.h + routines = iconv_open iconv iconv_close \ + gconv_open gconv gconv_close gconv_db gconv_conf \ + gconv_builtin gconv_simple gconv_trans gconv_cache +-routines += gconv_dl ++routines += gconv_dl gconv_charset + + vpath %.c ../locale/programs ../intl + +@@ -43,7 +43,8 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + CFLAGS-linereader.c += -DNO_TRANSLITERATION + CFLAGS-simple-hash.c += -I../locale + +-tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 ++tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \ ++ tst-iconv-opt + + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv +@@ -60,6 +61,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out ++tests-special += $(objpfx)tst-iconv_prog.out + endif + + # Make a copy of the file because gconv module names are constructed +@@ -78,6 +80,13 @@ endif + + include ../Rules + ++ifeq ($(run-built-tests),yes) ++LOCALES := en_US.UTF-8 ++include ../gen-locales.mk ++ ++$(objpfx)tst-iconv-opt.out: $(gen-locales) ++endif ++ + $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) + $(do-install-program) + +@@ -92,3 +101,8 @@ $(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig + cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \ + rm -f $$tmp) > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ ++ '$(run-program-env)' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/Versions b/iconv/Versions +index 60ab10a277588515..8a5f4cf780b18925 100644 +--- a/iconv/Versions ++++ b/iconv/Versions +@@ -6,6 +6,7 @@ libc { + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; ++ __gconv_open; __gconv_create_spec; + + # function used by the gconv modules + __gconv_transliterate; +diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c +new file mode 100644 +index 0000000000000000..6ccd0773ccb6cd27 +--- /dev/null ++++ b/iconv/gconv_charset.c +@@ -0,0 +1,218 @@ ++/* Charset name normalization. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "gconv_int.h" ++#include "gconv_charset.h" ++ ++ ++/* This function returns a pointer to the last suffix in a conversion code ++ string. Valid suffixes matched by this function are of the form: '/' or ',' ++ followed by arbitrary text that doesn't contain '/' or ','. It does not ++ edit the string in any way. The caller is expected to parse the suffix and ++ remove it (by e.g. truncating the string) before the next call. */ ++static char * ++find_suffix (char *s) ++{ ++ /* The conversion code is in the form of a triplet, separated by '/' chars. ++ The third component of the triplet contains suffixes. If we don't have two ++ slashes, we don't have a suffix. */ ++ ++ int slash_count = 0; ++ char *suffix_term = NULL; ++ ++ for (int i = 0; s[i] != '\0'; i++) ++ switch (s[i]) ++ { ++ case '/': ++ slash_count++; ++ /* Fallthrough */ ++ case ',': ++ suffix_term = &s[i]; ++ } ++ ++ if (slash_count >= 2) ++ return suffix_term; ++ ++ return NULL; ++} ++ ++ ++struct gconv_parsed_code ++{ ++ char *code; ++ bool translit; ++ bool ignore; ++}; ++ ++ ++/* This function parses an iconv_open encoding PC.CODE, strips any suffixes ++ (such as TRANSLIT or IGNORE) from it and sets corresponding flags in it. */ ++static void ++gconv_parse_code (struct gconv_parsed_code *pc) ++{ ++ pc->translit = false; ++ pc->ignore = false; ++ ++ while (1) ++ { ++ /* First drop any trailing whitespaces and separators. */ ++ size_t len = strlen (pc->code); ++ while ((len > 0) ++ && (isspace (pc->code[len - 1]) ++ || pc->code[len - 1] == ',' ++ || pc->code[len - 1] == '/')) ++ len--; ++ ++ pc->code[len] = '\0'; ++ ++ if (len == 0) ++ return; ++ ++ char * suffix = find_suffix (pc->code); ++ if (suffix == NULL) ++ { ++ /* At this point, we have processed and removed all suffixes from the ++ code and what remains of the code is suffix free. */ ++ return; ++ } ++ else ++ { ++ /* A suffix is processed from the end of the code array going ++ backwards, one suffix at a time. The suffix is an index into the ++ code character array and points to: one past the end of the code ++ and any unprocessed suffixes, and to the beginning of the suffix ++ currently being processed during this iteration. We must process ++ this suffix and then drop it from the code by terminating the ++ preceding text with NULL. ++ ++ We want to allow and recognize suffixes such as: ++ ++ "/TRANSLIT" i.e. single suffix ++ "//TRANSLIT" i.e. single suffix and multiple separators ++ "//TRANSLIT/IGNORE" i.e. suffixes separated by "/" ++ "/TRANSLIT//IGNORE" i.e. suffixes separated by "//" ++ "//IGNORE,TRANSLIT" i.e. suffixes separated by "," ++ "//IGNORE," i.e. trailing "," ++ "//TRANSLIT/" i.e. trailing "/" ++ "//TRANSLIT//" i.e. trailing "//" ++ "/" i.e. empty suffix. ++ ++ Unknown suffixes are silently discarded and ignored. */ ++ ++ if ((__strcasecmp_l (suffix, ++ GCONV_TRIPLE_SEPARATOR ++ GCONV_TRANSLIT_SUFFIX, ++ _nl_C_locobj_ptr) == 0) ++ || (__strcasecmp_l (suffix, ++ GCONV_SUFFIX_SEPARATOR ++ GCONV_TRANSLIT_SUFFIX, ++ _nl_C_locobj_ptr) == 0)) ++ pc->translit = true; ++ ++ if ((__strcasecmp_l (suffix, ++ GCONV_TRIPLE_SEPARATOR ++ GCONV_IGNORE_ERRORS_SUFFIX, ++ _nl_C_locobj_ptr) == 0) ++ || (__strcasecmp_l (suffix, ++ GCONV_SUFFIX_SEPARATOR ++ GCONV_IGNORE_ERRORS_SUFFIX, ++ _nl_C_locobj_ptr) == 0)) ++ pc->ignore = true; ++ ++ /* We just processed this suffix. We can now drop it from the ++ code string by truncating it at the suffix's position. */ ++ suffix[0] = '\0'; ++ } ++ } ++} ++ ++ ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. conv_spec must be allocated and freed by the caller. */ ++struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode) ++{ ++ struct gconv_parsed_code pfc, ptc; ++ struct gconv_spec *ret = NULL; ++ ++ pfc.code = __strdup (fromcode); ++ ptc.code = __strdup (tocode); ++ ++ if ((pfc.code == NULL) ++ || (ptc.code == NULL)) ++ goto out; ++ ++ gconv_parse_code (&pfc); ++ gconv_parse_code (&ptc); ++ ++ /* We ignore suffixes in the fromcode because that is how the current ++ implementation has always handled them. Only suffixes in the tocode are ++ processed and handled. The reality is that invalid input in the input ++ character set should only be ignored if the fromcode specifies IGNORE. ++ The current implementation ignores invalid intput in the input character ++ set if the tocode contains IGNORE. We preserve this behavior for ++ backwards compatibility. In the future we may split the handling of ++ IGNORE to allow a finer grained specification of ignorning invalid input ++ and/or ignoring invalid output. */ ++ conv_spec->translit = ptc.translit; ++ conv_spec->ignore = ptc.ignore; ++ ++ /* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might ++ be able to add one or two trailing '/' characters if necessary. */ ++ conv_spec->fromcode = malloc (strlen (fromcode) + 3); ++ if (conv_spec->fromcode == NULL) ++ goto out; ++ ++ conv_spec->tocode = malloc (strlen (tocode) + 3); ++ if (conv_spec->tocode == NULL) ++ { ++ free (conv_spec->fromcode); ++ conv_spec->fromcode = NULL; ++ goto out; ++ } ++ ++ /* Strip unrecognized characters and ensure that the code has two '/' ++ characters as per conversion code triplet specification. */ ++ strip (conv_spec->fromcode, pfc.code); ++ strip (conv_spec->tocode, ptc.code); ++ ret = conv_spec; ++ ++out: ++ free (pfc.code); ++ free (ptc.code); ++ ++ return ret; ++} ++libc_hidden_def (__gconv_create_spec) +diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h +index 123e2a62cefdc017..b85d80313030b649 100644 +--- a/iconv/gconv_charset.h ++++ b/iconv/gconv_charset.h +@@ -19,9 +19,68 @@ + + #include + #include ++#include ++#include ++#include ++#include ++#include "gconv_int.h" + + +-static void ++/* An iconv encoding is in the form of a triplet, with parts separated by ++ a '/' character. The first part is the standard name, the second part is ++ the character set, and the third part is the error handler. If the first ++ part is sufficient to identify both the standard and the character set ++ then the second part can be empty e.g. UTF-8//. If the first part is not ++ sufficient to identify both the standard and the character set then the ++ second part is required e.g. ISO-10646/UTF8/. If neither the first or ++ second parts are provided e.g. //, then the current locale is used. ++ The actual values used in the first and second parts are not entirely ++ relevant to the implementation. The values themselves are used in a hash ++ table to lookup modules and so the naming convention of the first two parts ++ is somewhat arbitrary and only helps locate the entries in the cache. ++ The third part is the error handler and is comprised of a ',' or '/' ++ separated list of suffixes. Currently, we support "TRANSLIT" for ++ transliteration and "IGNORE" for ignoring conversion errors due to ++ unrecognized input characters. */ ++#define GCONV_TRIPLE_SEPARATOR "/" ++#define GCONV_SUFFIX_SEPARATOR "," ++#define GCONV_TRANSLIT_SUFFIX "TRANSLIT" ++#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" ++ ++ ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. */ ++struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode); ++libc_hidden_proto (__gconv_create_spec) ++ ++ ++/* This function frees all heap memory allocated by __gconv_create_spec. */ ++static void __attribute__ ((unused)) ++gconv_destroy_spec (struct gconv_spec *conv_spec) ++{ ++ free (conv_spec->fromcode); ++ free (conv_spec->tocode); ++ return; ++} ++ ++ ++/* This function copies in-order, characters from the source 's' that are ++ either alpha-numeric or one in one of these: "_-.,:/" - into the destination ++ 'wp' while dropping all other characters. In the process, it converts all ++ alphabetical characters to upper case. It then appends up to two '/' ++ characters so that the total number of '/'es in the destination is 2. */ ++static inline void __attribute__ ((unused, always_inline)) + strip (char *wp, const char *s) + { + int slash_count = 0; +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index 3742557caed05c9a..4748e9b1fa3b5426 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -92,6 +92,15 @@ struct gconv_module + }; + + ++/* The specification of the conversion that needs to be performed. */ ++struct gconv_spec ++{ ++ char *fromcode; ++ char *tocode; ++ bool translit; ++ bool ignore; ++}; ++ + /* Flags for `gconv_open'. */ + enum + { +@@ -154,10 +163,12 @@ __libc_lock_define (extern, __gconv_lock attribute_hidden) + }) + + +-/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ +-extern int __gconv_open (const char *toset, const char *fromset, +- __gconv_t *handle, int flags) +- attribute_hidden; ++/* Return in *HANDLE, a decriptor for the transformation. The function expects ++ the specification of the transformation in the structure pointed to by ++ CONV_SPEC. It only reads *CONV_SPEC and does not take ownership of it. */ ++extern int __gconv_open (struct gconv_spec *conv_spec, ++ __gconv_t *handle, int flags); ++libc_hidden_proto (__gconv_open) + + /* Free resources associated with transformation descriptor CD. */ + extern int __gconv_close (__gconv_t cd) +diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c +index f739561f6edba8a8..002faa111a0b9016 100644 +--- a/iconv/gconv_open.c ++++ b/iconv/gconv_open.c +@@ -27,7 +27,7 @@ + + + int +-__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, ++__gconv_open (struct gconv_spec *conv_spec, __gconv_t *handle, + int flags) + { + struct __gconv_step *steps; +@@ -36,77 +36,38 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + size_t cnt = 0; + int res; + int conv_flags = 0; +- const char *errhand; +- const char *ignore; + bool translit = false; ++ char *tocode, *fromcode; + + /* Find out whether any error handling method is specified. */ +- errhand = strchr (toset, '/'); +- if (errhand != NULL) +- errhand = strchr (errhand + 1, '/'); +- if (__glibc_likely (errhand != NULL)) +- { +- if (*++errhand == '\0') +- errhand = NULL; +- else +- { +- /* Make copy without the error handling description. */ +- char *newtoset = (char *) alloca (errhand - toset + 1); +- char *tok; +- char *ptr = NULL /* Work around a bogus warning */; +- +- newtoset[errhand - toset] = '\0'; +- toset = memcpy (newtoset, toset, errhand - toset); ++ translit = conv_spec->translit; + +- /* Find the appropriate transliteration handlers. */ +- tok = strdupa (errhand); ++ if (conv_spec->ignore) ++ conv_flags |= __GCONV_IGNORE_ERRORS; + +- tok = __strtok_r (tok, ",", &ptr); +- while (tok != NULL) +- { +- if (__strcasecmp_l (tok, "TRANSLIT", _nl_C_locobj_ptr) == 0) +- translit = true; +- else if (__strcasecmp_l (tok, "IGNORE", _nl_C_locobj_ptr) == 0) +- /* Set the flag to ignore all errors. */ +- conv_flags |= __GCONV_IGNORE_ERRORS; +- +- tok = __strtok_r (NULL, ",", &ptr); +- } +- } +- } +- +- /* For the source character set we ignore the error handler specification. +- XXX Is this really always the best? */ +- ignore = strchr (fromset, '/'); +- if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL +- && *++ignore != '\0') +- { +- char *newfromset = (char *) alloca (ignore - fromset + 1); +- +- newfromset[ignore - fromset] = '\0'; +- fromset = memcpy (newfromset, fromset, ignore - fromset); +- } ++ tocode = conv_spec->tocode; ++ fromcode = conv_spec->fromcode; + + /* If the string is empty define this to mean the charset of the + currently selected locale. */ +- if (strcmp (toset, "//") == 0) ++ if (strcmp (tocode, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; +- toset = dest = (char *) alloca (len + 3); ++ tocode = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } +- if (strcmp (fromset, "//") == 0) ++ if (strcmp (fromcode, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; +- fromset = dest = (char *) alloca (len + 3); ++ fromcode = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } + +- res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); ++ res = __gconv_find_transform (tocode, fromcode, &steps, &nsteps, flags); + if (res == __GCONV_OK) + { + /* Allocate room for handle. */ +@@ -205,3 +166,4 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + *handle = result; + return res; + } ++libc_hidden_def (__gconv_open) +diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c +index 9f5c32c02096254a..59d1ef4f07ed1022 100644 +--- a/iconv/iconv_open.c ++++ b/iconv/iconv_open.c +@@ -31,49 +31,15 @@ + iconv_t + iconv_open (const char *tocode, const char *fromcode) + { +- /* Normalize the name. We remove all characters beside alpha-numeric, +- '_', '-', '/', '.', and ':'. */ +- size_t tocode_len = strlen (tocode) + 3; +- char *tocode_conv; +- bool tocode_usealloca = __libc_use_alloca (tocode_len); +- if (tocode_usealloca) +- tocode_conv = (char *) alloca (tocode_len); +- else +- { +- tocode_conv = (char *) malloc (tocode_len); +- if (tocode_conv == NULL) +- return (iconv_t) -1; +- } +- strip (tocode_conv, tocode); +- tocode = (tocode_conv[2] == '\0' && tocode[0] != '\0' +- ? upstr (tocode_conv, tocode) : tocode_conv); ++ __gconv_t cd; ++ struct gconv_spec conv_spec; + +- size_t fromcode_len = strlen (fromcode) + 3; +- char *fromcode_conv; +- bool fromcode_usealloca = __libc_use_alloca (fromcode_len); +- if (fromcode_usealloca) +- fromcode_conv = (char *) alloca (fromcode_len); +- else +- { +- fromcode_conv = (char *) malloc (fromcode_len); +- if (fromcode_conv == NULL) +- { +- if (! tocode_usealloca) +- free (tocode_conv); +- return (iconv_t) -1; +- } +- } +- strip (fromcode_conv, fromcode); +- fromcode = (fromcode_conv[2] == '\0' && fromcode[0] != '\0' +- ? upstr (fromcode_conv, fromcode) : fromcode_conv); ++ if (__gconv_create_spec (&conv_spec, fromcode, tocode) == NULL) ++ return (iconv_t) -1; + +- __gconv_t cd; +- int res = __gconv_open (tocode, fromcode, &cd, 0); ++ int res = __gconv_open (&conv_spec, &cd, 0); + +- if (! fromcode_usealloca) +- free (fromcode_conv); +- if (! tocode_usealloca) +- free (tocode_conv); ++ gconv_destroy_spec (&conv_spec); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 52e9d3f3ddec3b2e..552efac81660e82a 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -39,6 +39,7 @@ + #include + #include "iconv_prog.h" + #include "iconvconfig.h" ++#include "gconv_charset.h" + + /* Get libc version number. */ + #include "../version.h" +@@ -118,8 +119,7 @@ main (int argc, char *argv[]) + { + int status = EXIT_SUCCESS; + int remaining; +- iconv_t cd; +- const char *orig_to_code; ++ __gconv_t cd; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; + +@@ -139,39 +139,6 @@ main (int argc, char *argv[]) + exit (EXIT_SUCCESS); + } + +- /* If we have to ignore errors make sure we use the appropriate name for +- the to-character-set. */ +- orig_to_code = to_code; +- if (omit_invalid) +- { +- const char *errhand = strchrnul (to_code, '/'); +- int nslash = 2; +- char *newp; +- char *cp; +- +- if (*errhand == '/') +- { +- --nslash; +- errhand = strchrnul (errhand + 1, '/'); +- +- if (*errhand == '/') +- { +- --nslash; +- errhand = strchr (errhand, '\0'); +- } +- } +- +- newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); +- cp = mempcpy (newp, to_code, errhand - to_code); +- while (nslash-- > 0) +- *cp++ = '/'; +- if (cp[-1] != '/') +- *cp++ = ','; +- memcpy (cp, "IGNORE", sizeof ("IGNORE")); +- +- to_code = newp; +- } +- + /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f + can be file names of charmaps. In this case iconv will have to read + those charmaps and use them to do the conversion. But there are +@@ -184,10 +151,10 @@ main (int argc, char *argv[]) + file. */ + from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); + +- if (strchr (orig_to_code, '/') != NULL) ++ if (strchr (to_code, '/') != NULL) + /* The to-name might be a charmap file name. Try reading the + file. */ +- to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); ++ to_charmap = charmap_read (to_code, /*0, 1,*/1, 0, 0, 0); + + + /* At this point we have to handle two cases. The first one is +@@ -201,9 +168,25 @@ main (int argc, char *argv[]) + argc, remaining, argv, output_file); + else + { ++ struct gconv_spec conv_spec; ++ int res; ++ ++ if (__gconv_create_spec (&conv_spec, from_code, to_code) == NULL) ++ { ++ error (EXIT_FAILURE, errno, ++ _("failed to start conversion processing")); ++ exit (1); ++ } ++ ++ if (omit_invalid) ++ conv_spec.ignore = true; ++ + /* Let's see whether we have these coded character sets. */ +- cd = iconv_open (to_code, from_code); +- if (cd == (iconv_t) -1) ++ res = __gconv_open (&conv_spec, &cd, 0); ++ ++ gconv_destroy_spec (&conv_spec); ++ ++ if (res != __GCONV_OK) + { + if (errno == EINVAL) + { +@@ -221,7 +204,7 @@ main (int argc, char *argv[]) + const char *from_pretty = + (from_code[0] ? from_code : nl_langinfo (CODESET)); + const char *to_pretty = +- (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); ++ (to_code[0] ? to_code : nl_langinfo (CODESET)); + + if (from_wrong) + { +diff --git a/iconv/tst-iconv-opt.c b/iconv/tst-iconv-opt.c +new file mode 100644 +index 0000000000000000..669d812a6a9b8749 +--- /dev/null ++++ b/iconv/tst-iconv-opt.c +@@ -0,0 +1,347 @@ ++/* Test iconv's TRANSLIT and IGNORE option handling ++ ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* Run one iconv test. Arguments: ++ to: destination character set and options ++ from: source character set ++ input: input string to be converted ++ exp_in: expected number of bytes consumed ++ exp_ret: expected return value (error or number of irreversible conversions) ++ exp_out: expected output string ++ exp_err: expected value of `errno' after iconv returns. */ ++static void ++test_iconv (const char *to, const char *from, char *input, size_t exp_in, ++ size_t exp_ret, const char *exp_out, int exp_err) ++{ ++ iconv_t cd; ++ char outbuf[500]; ++ size_t inlen, outlen; ++ char *inptr, *outptr; ++ size_t n; ++ ++ cd = iconv_open (to, from); ++ TEST_VERIFY (cd != (iconv_t) -1); ++ ++ inlen = strlen (input); ++ outlen = sizeof (outbuf); ++ inptr = input; ++ outptr = outbuf; ++ ++ errno = 0; ++ n = iconv (cd, &inptr, &inlen, &outptr, &outlen); ++ ++ TEST_COMPARE (n, exp_ret); ++ TEST_VERIFY (inptr == input + exp_in); ++ TEST_COMPARE (errno, exp_err); ++ TEST_COMPARE_BLOB (outbuf, outptr - outbuf, exp_out, strlen (exp_out)); ++ TEST_VERIFY (iconv_close (cd) == 0); ++} ++ ++ ++/* We test option parsing by converting UTF-8 inputs to ASCII under various ++ option combinations. The UTF-8 inputs fall into three categories: ++ - ASCII-only, ++ - non-ASCII, ++ - non-ASCII with invalid UTF-8 characters. */ ++ ++/* 1. */ ++char ascii[] = "Just some ASCII text"; ++ ++/* 2. Valid UTF-8 input and some corresponding expected outputs with various ++ options. The two non-ASCII characters below are accented alphabets: ++ an `a' then an `o'. */ ++char utf8[] = "UTF-8 text with \u00E1 couple \u00F3f non-ASCII characters"; ++char u2a[] = "UTF-8 text with "; ++char u2a_translit[] = "UTF-8 text with a couple of non-ASCII characters"; ++char u2a_ignore[] = "UTF-8 text with couple f non-ASCII characters"; ++ ++/* 3. Invalid UTF-8 input and some corresponding expected outputs. \xff is ++ invalid UTF-8. It's followed by some valid but non-ASCII UTF-8. */ ++char iutf8[] = "Invalid UTF-8 \xff\u27E6text\u27E7"; ++char iu2a[] = "Invalid UTF-8 "; ++char iu2a_ignore[] = "Invalid UTF-8 text"; ++char iu2a_both[] = "Invalid UTF-8 [|text|]"; ++ ++/* 4. Another invalid UTF-8 input and corresponding expected outputs. This time ++ the valid non-ASCII UTF-8 characters appear before the invalid \xff. */ ++char jutf8[] = "Invalid \u27E6UTF-8\u27E7 \xfftext"; ++char ju2a[] = "Invalid "; ++char ju2a_translit[] = "Invalid [|UTF-8|] "; ++char ju2a_ignore[] = "Invalid UTF-8 text"; ++char ju2a_both[] = "Invalid [|UTF-8|] text"; ++ ++/* We also test option handling for character set names that have the form ++ "A/B". In this test, we test conversions "ISO-10646/UTF-8", and either ++ ISO-8859-1 or ASCII. */ ++ ++/* 5. Accented 'A' and 'a' characters in ISO-8859-1 and UTF-8, and an ++ equivalent ASCII transliteration. */ ++char iso8859_1_a[] = {0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* Accented A's. */ ++ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* Accented a's. */ ++ 0x00}; ++char utf8_a[] = "\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5" ++ "\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5"; ++char ascii_a[] = "AAAAAAaaaaaa"; ++ ++/* 6. An invalid ASCII string where [0] is invalid and [1] is '~'. */ ++char iascii [] = {0x80, '~', '\0'}; ++char empty[] = ""; ++char ia2u_ignore[] = "~"; ++ ++static int ++do_test (void) ++{ ++ xsetlocale (LC_ALL, "en_US.UTF-8"); ++ ++ ++ /* 0. iconv_open should gracefully fail for invalid character sets. */ ++ ++ TEST_VERIFY (iconv_open ("INVALID", "UTF-8") == (iconv_t) -1); ++ TEST_VERIFY (iconv_open ("UTF-8", "INVALID") == (iconv_t) -1); ++ TEST_VERIFY (iconv_open ("INVALID", "INVALID") == (iconv_t) -1); ++ ++ ++ /* 1. ASCII-only UTF-8 input should convert to ASCII with no changes: */ ++ ++ test_iconv ("ASCII", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//TRANSLIT//", "UTF-8", ascii, strlen (ascii), 0, ascii, ++ 0); ++ test_iconv ("ASCII//IGNORE", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//IGNORE//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ ++ ++ /* 2. Valid UTF-8 input with non-ASCII characters: */ ++ ++ /* EILSEQ when converted to ASCII. */ ++ test_iconv ("ASCII", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, EILSEQ); ++ ++ /* Converted without error with TRANSLIT enabled. */ ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, u2a_translit, ++ 0); ++ ++ /* EILSEQ with IGNORE enabled. Non-ASCII chars dropped from output. */ ++ test_iconv ("ASCII//IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, ++ u2a_ignore, EILSEQ); ++ ++ /* With TRANSLIT and IGNORE enabled, transliterated without error. We test ++ four combinations. */ ++ ++ test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ ++ /* Misspellings of TRANSLIT and IGNORE are ignored, but conversion still ++ works while respecting any other correctly spelled options. */ ++ ++ test_iconv ("ASCII//T", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ test_iconv ("ASCII//TRANSLITERATE", "UTF-8", utf8, strlen (u2a), (size_t) -1, ++ u2a, EILSEQ); ++ test_iconv ("ASCII//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ test_iconv ("ASCII//IGNORED", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ test_iconv ("ASCII//TRANSLITERATE//IGNORED", "UTF-8", utf8, strlen (u2a), ++ (size_t) -1, u2a, EILSEQ); ++ test_iconv ("ASCII//IGNORED,TRANSLITERATE", "UTF-8", utf8, strlen (u2a), ++ (size_t) -1, u2a, EILSEQ); ++ test_iconv ("ASCII//T//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ ++ test_iconv ("ASCII//TRANSLIT//I", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//I//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//IGNORED,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//TRANSLIT,IGNORED", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ ++ test_iconv ("ASCII//IGNORE,T", "UTF-8", utf8, strlen (utf8), (size_t) -1, ++ u2a_ignore, EILSEQ); ++ test_iconv ("ASCII//T,IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, ++ u2a_ignore, EILSEQ); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ ++ test_iconv ("ASCII//TRANSLITERATE//IGNORE", "UTF-8", utf8, strlen (utf8), ++ (size_t) -1, u2a_ignore, EILSEQ); ++ test_iconv ("ASCII//IGNORE//TRANSLITERATE", "UTF-8", utf8, strlen (utf8), ++ (size_t) -1, u2a_ignore, EILSEQ); ++ ++ ++ /* 3. Invalid UTF-8 followed by some valid non-ASCII UTF-8 characters: */ ++ ++ /* EILSEQ; output is truncated at the first invalid UTF-8 character. */ ++ test_iconv ("ASCII", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, iu2a, ++ EILSEQ); ++ ++ /* With TRANSLIT enabled: EILSEQ; output still truncated at the first invalid ++ UTF-8 character. */ ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, ++ iu2a, EILSEQ); ++ ++ /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and ++ valid UTF-8 non-ASCII characters. */ ++ test_iconv ("ASCII//IGNORE", "UTF-8", iutf8, strlen (iutf8), (size_t) -1, ++ iu2a_ignore, EILSEQ); ++ ++ /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 ++ characters and transliterates valid non-ASCII UTF-8 characters. We test ++ four combinations. */ ++ ++ test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ ++ test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ ++ ++ /* 4. Invalid UTF-8 with valid non-ASCII UTF-8 chars appearing first: */ ++ ++ /* EILSEQ; output is truncated at the first non-ASCII character. */ ++ test_iconv ("ASCII", "UTF-8", jutf8, strlen (ju2a), (size_t) -1, ju2a, ++ EILSEQ); ++ ++ /* With TRANSLIT enabled: EILSEQ; output now truncated at the first invalid ++ UTF-8 character. */ ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", jutf8, strlen (jutf8) - 5, ++ (size_t) -1, ju2a_translit, EILSEQ); ++ test_iconv ("ASCII//translit", "UTF-8", jutf8, strlen (jutf8) - 5, ++ (size_t) -1, ju2a_translit, EILSEQ); ++ ++ /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and ++ valid UTF-8 non-ASCII characters. */ ++ test_iconv ("ASCII//IGNORE", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, ++ ju2a_ignore, EILSEQ); ++ test_iconv ("ASCII//ignore", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, ++ ju2a_ignore, EILSEQ); ++ ++ /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 ++ characters and transliterates valid non-ASCII UTF-8 characters. We test ++ several combinations. */ ++ ++ test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ ++ test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//translit,ignore", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ /* Trailing whitespace and separators should be ignored. */ ++ test_iconv ("ASCII//IGNORE,TRANSLIT ", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT/", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT//", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT,", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT,,", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT /,", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ ++ /* TRANSLIT or IGNORE suffixes in fromcode should be ignored. */ ++ test_iconv ("ASCII", "UTF-8//TRANSLIT", jutf8, strlen (ju2a), (size_t) -1, ++ ju2a, EILSEQ); ++ test_iconv ("ASCII", "UTF-8//IGNORE", jutf8, strlen (ju2a), (size_t) -1, ++ ju2a, EILSEQ); ++ test_iconv ("ASCII", "UTF-8//TRANSLIT,IGNORE", jutf8, strlen (ju2a), ++ (size_t) -1, ju2a, EILSEQ); ++ ++ ++ /* 5. Charset names of the form "A/B/": */ ++ ++ /* ISO-8859-1 is converted to UTF-8 without needing transliteration. */ ++ test_iconv ("ISO-10646/UTF-8", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT/IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ ++ /* UTF-8 with accented A's is converted to ASCII with transliteration. */ ++ test_iconv ("ASCII", "ISO-10646/UTF-8", utf8_a, ++ 0, (size_t) -1, empty, EILSEQ); ++ test_iconv ("ASCII//IGNORE", "ISO-10646/UTF-8", utf8_a, ++ strlen (utf8_a), (size_t) -1, empty, EILSEQ); ++ test_iconv ("ASCII//TRANSLIT", "ISO-10646/UTF-8", utf8_a, ++ strlen (utf8_a), 12, ascii_a, 0); ++ ++ /* Invalid ASCII is converted to UTF-8 only with IGNORE. */ ++ test_iconv ("ISO-10646/UTF-8", "ASCII", iascii, strlen (empty), (size_t) -1, ++ empty, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ASCII", iascii, strlen (empty), ++ (size_t) -1, empty, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8/IGNORE", "ASCII", iascii, strlen (iascii), ++ (size_t) -1, ia2u_ignore, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following three ++ inputs: */ ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT/IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT,IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ ++ return 0; ++} ++ ++#include +diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh +new file mode 100644 +index 0000000000000000..8298136b7f45d855 +--- /dev/null ++++ b/iconv/tst-iconv_prog.sh +@@ -0,0 +1,280 @@ ++#!/bin/bash ++# Test for some known iconv(1) hangs from bug 19519, and miscellaneous ++# iconv(1) program error conditions. ++# Copyright (C) 2020 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++codir=$1 ++test_wrapper_env="$2" ++run_program_env="$3" ++ ++# We have to have some directories in the library path. ++LIBPATH=$codir:$codir/iconvdata ++ ++# How the start the iconv(1) program. $from is not defined/expanded yet. ++ICONV=' ++$codir/elf/ld.so --library-path $LIBPATH --inhibit-rpath ${from}.so ++$codir/iconv/iconv_prog ++' ++ICONV="$test_wrapper_env $run_program_env $ICONV" ++ ++# List of known hangs; ++# Gathered by running an exhaustive 2 byte input search against glibc-2.28 ++hangarray=( ++"\x00\x23;-c;ANSI_X3.110;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa1;-c;ARMSCII-8;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa1;-c;ASMO_449;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;BIG5;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;BIG5HKSCS;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;BRF;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;BS_4730;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1250;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;CP1251;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1252;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1253;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1254;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1255;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1257;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1258;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;CP932;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;CSA_Z243.4-1985-1;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;CSA_Z243.4-1985-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;DEC-MCS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;DIN_66003;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;DS_2089;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-AT-DE;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-AT-DE-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-CA-FR;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-DK-NO;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-DK-NO-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-ES;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-ES-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-ES-S;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-FI-SE;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-FI-SE-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-FR;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-IS-FRISS;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-IT;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-PT;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-UK;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-US;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ES;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ES2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-CN;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-JISX0213;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-JP;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-JP-MS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-KR;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-TW;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GB18030;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GB_1988-80;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GBK;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GOST_19768-74;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GREEK7;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GREEK7-OLD;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GREEK-CCITT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-GREEK8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-ROMAN8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-ROMAN9;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-THAI8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-TURKISH8;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM038;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM1004;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;IBM1008;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM1046;UTF-8//TRANSLIT//IGNORE" ++"\x00\x51;-c;IBM1132;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa0;-c;IBM1133;UTF-8//TRANSLIT//IGNORE" ++"\x00\xce;-c;IBM1137;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" ++"\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" ++# These are known hangs that are yet to be fixed: ++# "\x00\x0f;-c;IBM1364;UTF-8" ++# "\x00\x0f;-c;IBM1371;UTF-8" ++# "\x00\x0f;-c;IBM1388;UTF-8" ++# "\x00\x0f;-c;IBM1390;UTF-8" ++# "\x00\x0f;-c;IBM1399;UTF-8" ++"\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM281;UTF-8//TRANSLIT//IGNORE" ++"\x00\x57;-c;IBM290;UTF-8//TRANSLIT//IGNORE" ++"\x00\x45;-c;IBM420;UTF-8//TRANSLIT//IGNORE" ++"\x00\x68;-c;IBM423;UTF-8//TRANSLIT//IGNORE" ++"\x00\x70;-c;IBM424;UTF-8//TRANSLIT//IGNORE" ++"\x00\x53;-c;IBM4517;UTF-8//TRANSLIT//IGNORE" ++"\x00\x53;-c;IBM4899;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa5;-c;IBM4909;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdc;-c;IBM4971;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM803;UTF-8//TRANSLIT//IGNORE" ++"\x00\x91;-c;IBM851;UTF-8//TRANSLIT//IGNORE" ++"\x00\x9b;-c;IBM856;UTF-8//TRANSLIT//IGNORE" ++"\x00\xd5;-c;IBM857;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM864;UTF-8//TRANSLIT//IGNORE" ++"\x00\x94;-c;IBM868;UTF-8//TRANSLIT//IGNORE" ++"\x00\x94;-c;IBM869;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM874;UTF-8//TRANSLIT//IGNORE" ++"\x00\x6a;-c;IBM875;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM880;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM891;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM903;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM904;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM905;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM9066;UTF-8//TRANSLIT//IGNORE" ++"\x00\x48;-c;IBM918;UTF-8//TRANSLIT//IGNORE" ++"\x00\x57;-c;IBM930;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM932;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM933;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM935;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM937;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM939;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM943;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;INIS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;INIS-8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;INIS-CYRILLIC;UTF-8//TRANSLIT//IGNORE" ++"\x00\xec;-c;ISIRI-3342;UTF-8//TRANSLIT//IGNORE" ++"\x00\xec;-c;ISO_10367-BOX;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-CN;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-CN-EXT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-JP;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-JP-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-JP-3;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-KR;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_2033;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_5427;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_5427-EXT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_5428;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa4;-c;ISO_6937;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa0;-c;ISO_6937-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-11;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa5;-c;ISO-8859-3;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-6;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-7;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-8;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;ISO-IR-197;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;ISO-IR-209;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IT;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JIS_C6220-1969-RO;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JIS_C6229-1984-B;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JOHAB;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JUS_I.B1.002;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;KOI-8;UTF-8//TRANSLIT//IGNORE" ++"\x00\x88;-c;KOI8-T;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;KSC5636;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;LATIN-GREEK;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;LATIN-GREEK-1;UTF-8//TRANSLIT//IGNORE" ++"\x00\xf6;-c;MAC-IS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;MSZ_7795.3;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NATS-DANO;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NATS-SEFI;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NC_NC00-10;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NF_Z_62-010;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NF_Z_62-010_1973;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NS_4551-1;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NS_4551-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;PT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;PT2;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;RK1048;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;SEN_850200_B;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;SEN_850200_C;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;Shift_JISX0213;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;SJIS;UTF-8//TRANSLIT//IGNORE" ++"\x00\x23;-c;T.61-8BIT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;TIS-620;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;TSCII;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;UHC;UTF-8//TRANSLIT//IGNORE" ++"\x00\xd8;-c;UNICODE;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdc;-c;UTF-16;UTF-8//TRANSLIT//IGNORE" ++"\xdc\x00;-c;UTF-16BE;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdc;-c;UTF-16LE;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;UTF-7;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" ++) ++ ++# List of option combinations that *should* lead to an error ++errorarray=( ++# Converting from/to invalid character sets should cause error ++"\x00\x00;;INVALID;INVALID" ++"\x00\x00;;INVALID;UTF-8" ++"\x00\x00;;UTF-8;INVALID" ++) ++ ++# Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret ++execute_test () ++{ ++ eval PROG=\"$ICONV\" ++ echo -en "$twobyte" \ ++ | timeout -k 4 3 $PROG $c -f $from -t "$to" &>/dev/null ++ ret=$? ++} ++ ++check_hangtest_result () ++{ ++ if [ "$ret" -eq "124" ] || [ "$ret" -eq "137" ]; then # timeout/hang ++ result="HANG" ++ else ++ if [ "$ret" -eq "139" ]; then # segfault ++ result="SEGFAULT" ++ else ++ if [ "$ret" -gt "127" ]; then # unexpected error ++ result="UNEXPECTED" ++ else ++ result="OK" ++ fi ++ fi ++ fi ++ ++ echo -n "$result: from: \"$from\", to: \"$to\"," ++ echo " input \"$twobyte\", flags \"$c\"" ++ ++ if [ "$result" != "OK" ]; then ++ exit 1 ++ fi ++} ++ ++for hangcommand in "${hangarray[@]}"; do ++ twobyte="$(echo "$hangcommand" | cut -d";" -f 1)" ++ c="$(echo "$hangcommand" | cut -d";" -f 2)" ++ from="$(echo "$hangcommand" | cut -d";" -f 3)" ++ to="$(echo "$hangcommand" | cut -d";" -f 4)" ++ execute_test ++ check_hangtest_result ++done ++ ++check_errtest_result () ++{ ++ if [ "$ret" -eq "1" ]; then # we errored out as expected ++ result="PASS" ++ else ++ result="FAIL" ++ fi ++ echo -n "$result: from: \"$from\", to: \"$to\"," ++ echo " input \"$twobyte\", flags \"$c\", return code $ret" ++ ++ if [ "$result" != "PASS" ]; then ++ exit 1 ++ fi ++} ++ ++for errorcommand in "${errorarray[@]}"; do ++ twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" ++ c="$(echo "$errorcommand" | cut -d";" -f 2)" ++ from="$(echo "$errorcommand" | cut -d";" -f 3)" ++ to="$(echo "$errorcommand" | cut -d";" -f 4)" ++ execute_test ++ check_errtest_result ++done +diff --git a/intl/dcigettext.c b/intl/dcigettext.c +index 25f47c5bd3b0ea04..ed48fc8d3e96c7ba 100644 +--- a/intl/dcigettext.c ++++ b/intl/dcigettext.c +@@ -1120,11 +1120,16 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, + outcharset = encoding; + + # ifdef _LIBC +- /* We always want to use transliteration. */ +- outcharset = norm_add_slashes (outcharset, "TRANSLIT"); +- charset = norm_add_slashes (charset, ""); +- int r = __gconv_open (outcharset, charset, &convd->conv, +- GCONV_AVOID_NOCONV); ++ ++ struct gconv_spec conv_spec ++ = { .fromcode = norm_add_slashes (charset, ""), ++ .tocode = norm_add_slashes (outcharset, ""), ++ /* We always want to use transliteration. */ ++ .translit = true, ++ .ignore = false ++ }; ++ int r = __gconv_open (&conv_spec, &convd->conv, ++ GCONV_AVOID_NOCONV); + if (__builtin_expect (r != __GCONV_OK, 0)) + { + /* If the output encoding is the same there is diff --git a/SOURCES/glibc-rh1704868-2.patch b/SOURCES/glibc-rh1704868-2.patch new file mode 100644 index 0000000..50b7ad7 --- /dev/null +++ b/SOURCES/glibc-rh1704868-2.patch @@ -0,0 +1,235 @@ +commit 7d4ec75e111291851620c6aa2c4460647b7fd50d +Author: Arjun Shankar +Date: Fri Sep 25 14:47:06 2020 +0200 + + intl: Handle translation output codesets with suffixes [BZ #26383] + + Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not + handle cases where the output codeset for translations (via the `gettext' + family of functions) might have a caller specified encoding suffix such as + TRANSLIT or IGNORE. This led to a regression where translations did not + work when the codeset had a suffix. + + This commit fixes the above issue by parsing any suffixes passed to + __dcigettext and adds two new test-cases to intl/tst-codeset.c to + verify correct behaviour. The iconv-internal function __gconv_create_spec + and the static iconv-internal function gconv_destroy_spec are now visible + internally within glibc and used in intl/dcigettext.c. + +diff --git a/iconv/Versions b/iconv/Versions +index 8a5f4cf780b18925..d51af52fa34b8793 100644 +--- a/iconv/Versions ++++ b/iconv/Versions +@@ -6,7 +6,9 @@ libc { + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; +- __gconv_open; __gconv_create_spec; ++ ++ # functions used elsewhere in glibc ++ __gconv_open; __gconv_create_spec; __gconv_destroy_spec; + + # function used by the gconv modules + __gconv_transliterate; +diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c +index 6ccd0773ccb6cd27..4ba0aa99f5dae7f7 100644 +--- a/iconv/gconv_charset.c ++++ b/iconv/gconv_charset.c +@@ -216,3 +216,13 @@ out: + return ret; + } + libc_hidden_def (__gconv_create_spec) ++ ++ ++void ++__gconv_destroy_spec (struct gconv_spec *conv_spec) ++{ ++ free (conv_spec->fromcode); ++ free (conv_spec->tocode); ++ return; ++} ++libc_hidden_def (__gconv_destroy_spec) +diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h +index b85d80313030b649..4b98073389bd8707 100644 +--- a/iconv/gconv_charset.h ++++ b/iconv/gconv_charset.h +@@ -48,33 +48,6 @@ + #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" + + +-/* This function accepts the charset names of the source and destination of the +- conversion and populates *conv_spec with an equivalent conversion +- specification that may later be used by __gconv_open. The charset names +- might contain options in the form of suffixes that alter the conversion, +- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring +- and truncating any suffix options in fromcode, and processing and truncating +- any suffix options in tocode. Supported suffix options ("TRANSLIT" or +- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec +- to be set to true. Unrecognized suffix options are silently discarded. If +- the function succeeds, it returns conv_spec back to the caller. It returns +- NULL upon failure. */ +-struct gconv_spec * +-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, +- const char *tocode); +-libc_hidden_proto (__gconv_create_spec) +- +- +-/* This function frees all heap memory allocated by __gconv_create_spec. */ +-static void __attribute__ ((unused)) +-gconv_destroy_spec (struct gconv_spec *conv_spec) +-{ +- free (conv_spec->fromcode); +- free (conv_spec->tocode); +- return; +-} +- +- + /* This function copies in-order, characters from the source 's' that are + either alpha-numeric or one in one of these: "_-.,:/" - into the destination + 'wp' while dropping all other characters. In the process, it converts all +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index 4748e9b1fa3b5426..8067a341b0903e1b 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -170,6 +170,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec, + __gconv_t *handle, int flags); + libc_hidden_proto (__gconv_open) + ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. */ ++extern struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode); ++libc_hidden_proto (__gconv_create_spec) ++ ++/* This function frees all heap memory allocated by __gconv_create_spec. */ ++extern void ++__gconv_destroy_spec (struct gconv_spec *conv_spec); ++libc_hidden_proto (__gconv_destroy_spec) ++ + /* Free resources associated with transformation descriptor CD. */ + extern int __gconv_close (__gconv_t cd) + attribute_hidden; +diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c +index 59d1ef4f07ed1022..46da33bca6c24af0 100644 +--- a/iconv/iconv_open.c ++++ b/iconv/iconv_open.c +@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode) + + int res = __gconv_open (&conv_spec, &cd, 0); + +- gconv_destroy_spec (&conv_spec); ++ __gconv_destroy_spec (&conv_spec); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 552efac81660e82a..e26e9d02ca4121a7 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -184,7 +184,7 @@ main (int argc, char *argv[]) + /* Let's see whether we have these coded character sets. */ + res = __gconv_open (&conv_spec, &cd, 0); + +- gconv_destroy_spec (&conv_spec); ++ __gconv_destroy_spec (&conv_spec); + + if (res != __GCONV_OK) + { +diff --git a/intl/dcigettext.c b/intl/dcigettext.c +index ed48fc8d3e96c7ba..7ebe67b4ac2113e9 100644 +--- a/intl/dcigettext.c ++++ b/intl/dcigettext.c +@@ -1121,15 +1121,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, + + # ifdef _LIBC + +- struct gconv_spec conv_spec +- = { .fromcode = norm_add_slashes (charset, ""), +- .tocode = norm_add_slashes (outcharset, ""), +- /* We always want to use transliteration. */ +- .translit = true, +- .ignore = false +- }; ++ struct gconv_spec conv_spec; ++ ++ __gconv_create_spec (&conv_spec, charset, outcharset); ++ ++ /* We always want to use transliteration. */ ++ conv_spec.translit = true; ++ + int r = __gconv_open (&conv_spec, &convd->conv, + GCONV_AVOID_NOCONV); ++ ++ __gconv_destroy_spec (&conv_spec); ++ + if (__builtin_expect (r != __GCONV_OK, 0)) + { + /* If the output encoding is the same there is +diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c +index e71382aeeeca477b..52e4aaa6ffd3afdb 100644 +--- a/intl/tst-codeset.c ++++ b/intl/tst-codeset.c +@@ -22,13 +22,11 @@ + #include + #include + #include ++#include + + static int + do_test (void) + { +- char *s; +- int result = 0; +- + unsetenv ("LANGUAGE"); + unsetenv ("OUTPUT_CHARSET"); + setlocale (LC_ALL, "de_DE.ISO-8859-1"); +@@ -36,25 +34,21 @@ do_test (void) + bindtextdomain ("codeset", OBJPFX "domaindir"); + + /* Here we expect output in ISO-8859-1. */ +- s = gettext ("cheese"); +- if (strcmp (s, "K\344se")) +- { +- printf ("call 1 returned: %s\n", s); +- result = 1; +- } ++ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); + ++ /* Here we expect output in UTF-8. */ + bind_textdomain_codeset ("codeset", "UTF-8"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); + +- /* Here we expect output in UTF-8. */ +- s = gettext ("cheese"); +- if (strcmp (s, "K\303\244se")) +- { +- printf ("call 2 returned: %s\n", s); +- result = 1; +- } +- +- return result; ++ /* `a with umlaut' is transliterated to `ae'. */ ++ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); ++ ++ /* Transliteration also works by default even if not set. */ ++ bind_textdomain_codeset ("codeset", "ASCII"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); ++ ++ return 0; + } + +-#define TEST_FUNCTION do_test () +-#include "../test-skeleton.c" ++#include diff --git a/SOURCES/glibc-rh1704868-3.patch b/SOURCES/glibc-rh1704868-3.patch new file mode 100644 index 0000000..c5b79c2 --- /dev/null +++ b/SOURCES/glibc-rh1704868-3.patch @@ -0,0 +1,77 @@ +commit 9a99c682144bdbd40792ebf822fe9264e0376fb5 +Author: Arjun Shankar +Date: Wed Nov 4 12:19:38 2020 +0100 + + iconv: Accept redundant shift sequences in IBM1364 [BZ #26224] + + The IBM1364, IBM1371, IBM1388, IBM1390 and IBM1399 character sets + share converter logic (iconvdata/ibm1364.c) which would reject + redundant shift sequences when processing input in these character + sets. This led to a hang in the iconv program (CVE-2020-27618). + + This commit adjusts the converter to ignore redundant shift sequences + and adds test cases for iconv_prog hangs that would be triggered upon + their rejection. This brings the implementation in line with other + converters that also ignore redundant shift sequences (e.g. IBM930 + etc., fixed in commit 692de4b3960d). + + Reviewed-by: Carlos O'Donell + +diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh +index 8298136b7f45d855..d8db7b335c1fcca2 100644 +--- a/iconv/tst-iconv_prog.sh ++++ b/iconv/tst-iconv_prog.sh +@@ -102,12 +102,16 @@ hangarray=( + "\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" + "\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" + "\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" +-# These are known hangs that are yet to be fixed: +-# "\x00\x0f;-c;IBM1364;UTF-8" +-# "\x00\x0f;-c;IBM1371;UTF-8" +-# "\x00\x0f;-c;IBM1388;UTF-8" +-# "\x00\x0f;-c;IBM1390;UTF-8" +-# "\x00\x0f;-c;IBM1399;UTF-8" ++"\x00\x0f;-c;IBM1364;UTF-8" ++"\x0e\x0e;-c;IBM1364;UTF-8" ++"\x00\x0f;-c;IBM1371;UTF-8" ++"\x0e\x0e;-c;IBM1371;UTF-8" ++"\x00\x0f;-c;IBM1388;UTF-8" ++"\x0e\x0e;-c;IBM1388;UTF-8" ++"\x00\x0f;-c;IBM1390;UTF-8" ++"\x0e\x0e;-c;IBM1390;UTF-8" ++"\x00\x0f;-c;IBM1399;UTF-8" ++"\x0e\x0e;-c;IBM1399;UTF-8" + "\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" + "\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" + "\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" +diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +index 517fe60813be0472..ecc3f8ddddbdbc8c 100644 +--- a/iconvdata/ibm1364.c ++++ b/iconvdata/ibm1364.c +@@ -158,24 +158,14 @@ enum + \ + if (__builtin_expect (ch, 0) == SO) \ + { \ +- /* Shift OUT, change to DBCS converter. */ \ +- if (curcs == db) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- break; \ +- } \ ++ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ + curcs = db; \ + ++inptr; \ + continue; \ + } \ + if (__builtin_expect (ch, 0) == SI) \ + { \ +- /* Shift IN, change to SBCS converter. */ \ +- if (curcs == sb) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- break; \ +- } \ ++ /* Shift IN, change to SBCS converter (redundant escape okay). */ \ + curcs = sb; \ + ++inptr; \ + continue; \ diff --git a/SOURCES/glibc-rh1704868-4.patch b/SOURCES/glibc-rh1704868-4.patch new file mode 100644 index 0000000..7bfb219 --- /dev/null +++ b/SOURCES/glibc-rh1704868-4.patch @@ -0,0 +1,66 @@ +commit cce35a50c1de0cec5cd1f6c18979ff6ee3ea1dd1 +Author: Arjun Shankar +Date: Mon Nov 11 14:57:23 2019 +0100 + + support: Add xsetlocale function + +diff --git a/support/Makefile b/support/Makefile +index 37d5dcc92a5c6dee..6afaa6836c944398 100644 +--- a/support/Makefile ++++ b/support/Makefile +@@ -148,6 +148,7 @@ libsupport-routines = \ + xrealloc \ + xrecvfrom \ + xsendto \ ++ xsetlocale \ + xsetsockopt \ + xsigaction \ + xsignal \ +diff --git a/support/support.h b/support/support.h +index 61a10c34982134ff..97d142e9b6f68188 100644 +--- a/support/support.h ++++ b/support/support.h +@@ -91,6 +91,7 @@ char *xasprintf (const char *format, ...) + __attribute__ ((format (printf, 1, 2), malloc)); + char *xstrdup (const char *); + char *xstrndup (const char *, size_t); ++char *xsetlocale (int category, const char *locale); + + /* These point to the TOP of the source/build tree, not your (or + support's) subdirectory. */ +diff --git a/support/xsetlocale.c b/support/xsetlocale.c +new file mode 100644 +index 0000000000000000..063ed4b0d63af884 +--- /dev/null ++++ b/support/xsetlocale.c +@@ -0,0 +1,30 @@ ++/* setlocale with error checking. ++ Copyright (C) 2019 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++ ++char * ++xsetlocale (int category, const char *locale) ++{ ++ char *p = setlocale (category, locale); ++ if (p == NULL) ++ FAIL_EXIT1 ("error: setlocale (%d, \"%s\")\n", category, locale); ++ return p; ++} diff --git a/SOURCES/glibc-rh1704868-5.patch b/SOURCES/glibc-rh1704868-5.patch new file mode 100644 index 0000000..1ffdb97 --- /dev/null +++ b/SOURCES/glibc-rh1704868-5.patch @@ -0,0 +1,35 @@ +The patch (glibc-rh1704868-1.patch) for commit 91927b7c7643 +(Rewrite iconv option parsing) contains a test that depends on +commit 513aaa0d782f (Add Transliterations for Unicode Misc. +Mathematical Symbols-A/B), which is not applied in RHEL-8. This +patch edits the test so as not to depend on the unapplied patch +and its additional transliterations. + +diff --git a/iconv/tst-iconv-opt.c b/iconv/tst-iconv-opt.c +index 669d812a6a9b8749..21e6d887501450a7 100644 +--- a/iconv/tst-iconv-opt.c ++++ b/iconv/tst-iconv-opt.c +@@ -82,18 +82,18 @@ char u2a_ignore[] = "UTF-8 text with couple f non-ASCII characters"; + + /* 3. Invalid UTF-8 input and some corresponding expected outputs. \xff is + invalid UTF-8. It's followed by some valid but non-ASCII UTF-8. */ +-char iutf8[] = "Invalid UTF-8 \xff\u27E6text\u27E7"; ++char iutf8[] = "Invalid UTF-8 \xff\u00B7text\u00B7"; + char iu2a[] = "Invalid UTF-8 "; + char iu2a_ignore[] = "Invalid UTF-8 text"; +-char iu2a_both[] = "Invalid UTF-8 [|text|]"; ++char iu2a_both[] = "Invalid UTF-8 .text."; + + /* 4. Another invalid UTF-8 input and corresponding expected outputs. This time + the valid non-ASCII UTF-8 characters appear before the invalid \xff. */ +-char jutf8[] = "Invalid \u27E6UTF-8\u27E7 \xfftext"; ++char jutf8[] = "Invalid \u00B7UTF-8\u00B7 \xfftext"; + char ju2a[] = "Invalid "; +-char ju2a_translit[] = "Invalid [|UTF-8|] "; ++char ju2a_translit[] = "Invalid .UTF-8. "; + char ju2a_ignore[] = "Invalid UTF-8 text"; +-char ju2a_both[] = "Invalid [|UTF-8|] text"; ++char ju2a_both[] = "Invalid .UTF-8. text"; + + /* We also test option handling for character set names that have the form + "A/B". In this test, we test conversions "ISO-10646/UTF-8", and either diff --git a/SOURCES/glibc-rh1893662-1.patch b/SOURCES/glibc-rh1893662-1.patch new file mode 100644 index 0000000..f225f64 --- /dev/null +++ b/SOURCES/glibc-rh1893662-1.patch @@ -0,0 +1,47 @@ +commit 558251bd8785760ad40fcbfeaaee5d27fa5b0fe4 +Author: Szabolcs Nagy +Date: Thu Oct 22 17:55:01 2020 +0100 + + aarch64: Fix DT_AARCH64_VARIANT_PCS handling [BZ #26798] + + The variant PCS support was ineffective because in the common case + linkmap->l_mach.plt == 0 but then the symbol table flags were ignored + and normal lazy binding was used instead of resolving the relocs early. + (This was a misunderstanding about how GOT[1] is setup by the linker.) + + In practice this mainly affects SVE calls when the vector length is + more than 128 bits, then the top bits of the argument registers get + clobbered during lazy binding. + + Fixes bug 26798. + +diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h +index b39eae4acf4086ee..3fd3c8a265d012b1 100644 +--- a/sysdeps/aarch64/dl-machine.h ++++ b/sysdeps/aarch64/dl-machine.h +@@ -391,13 +391,6 @@ elf_machine_lazy_rel (struct link_map *map, + /* Check for unexpected PLT reloc type. */ + if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) + { +- if (map->l_mach.plt == 0) +- { +- /* Prelinking. */ +- *reloc_addr += l_addr; +- return; +- } +- + if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL)) + { + /* Check the symbol table for variant PCS symbols. */ +@@ -421,7 +414,10 @@ elf_machine_lazy_rel (struct link_map *map, + } + } + +- *reloc_addr = map->l_mach.plt; ++ if (map->l_mach.plt == 0) ++ *reloc_addr += l_addr; ++ else ++ *reloc_addr = map->l_mach.plt; + } + else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) + { diff --git a/SOURCES/glibc-rh1893662-2.patch b/SOURCES/glibc-rh1893662-2.patch new file mode 100644 index 0000000..e421ade --- /dev/null +++ b/SOURCES/glibc-rh1893662-2.patch @@ -0,0 +1,349 @@ +commit e156dabc766d6f6f99ce9402999eae380a3ec1f2 +Author: Szabolcs Nagy +Date: Mon Oct 26 15:48:01 2020 +0000 + + aarch64: Add variant PCS lazy binding test [BZ #26798] + + This test fails without bug 26798 fixed because some integer registers + likely get clobbered by lazy binding and variant PCS only allows x16 + and x17 to be clobbered at call time. + + The test requires binutils 2.32.1 or newer for handling variant PCS + symbols. SVE registers are not covered by this test, to avoid the + complexity of handling multiple compile- and runtime feature support + cases. + +(Trivial textual conflicts due to lack of PAC and BTI support) + +# Conflicts: +# sysdeps/aarch64/Makefile +# sysdeps/aarch64/configure +# sysdeps/aarch64/configure.ac + +diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile +index 94baaf52dda4b801..3ec78fefc6dd5797 100644 +--- a/sysdeps/aarch64/Makefile ++++ b/sysdeps/aarch64/Makefile +@@ -3,6 +3,13 @@ long-double-fcts = yes + ifeq ($(subdir),elf) + sysdep-dl-routines += tlsdesc dl-tlsdesc + gen-as-const-headers += dl-link.sym ++ ++ifeq (yes,$(aarch64-variant-pcs)) ++tests += tst-vpcs ++modules-names += tst-vpcs-mod ++LDFLAGS-tst-vpcs-mod.so = -Wl,-z,lazy ++$(objpfx)tst-vpcs: $(objpfx)tst-vpcs-mod.so ++endif + endif + + ifeq ($(subdir),csu) +diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure +index 5bd355a6917df365..f78a79338aba1e34 100644 +--- a/sysdeps/aarch64/configure ++++ b/sysdeps/aarch64/configure +@@ -172,3 +172,43 @@ else + config_vars="$config_vars + default-abi = lp64" + fi ++ ++# Check if binutils supports variant PCS symbols. ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for variant PCS support" >&5 ++$as_echo_n "checking for variant PCS support... " >&6; } ++if ${libc_cv_aarch64_variant_pcs+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ cat > conftest.S <&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } \ ++ && { ac_try='$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ libc_cv_aarch64_variant_pcs=yes ++ fi ++ rm -rf conftest.* ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_variant_pcs" >&5 ++$as_echo "$libc_cv_aarch64_variant_pcs" >&6; } ++config_vars="$config_vars ++aarch64-variant-pcs = $libc_cv_aarch64_variant_pcs" +diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac +index 7851dd4dac345b2a..7f13bfb93b60bfd7 100644 +--- a/sysdeps/aarch64/configure.ac ++++ b/sysdeps/aarch64/configure.ac +@@ -20,3 +20,25 @@ if test $libc_cv_aarch64_be = yes; then + else + LIBC_CONFIG_VAR([default-abi], [lp64]) + fi ++ ++# Check if binutils supports variant PCS symbols. ++AC_CACHE_CHECK([for variant PCS support], [libc_cv_aarch64_variant_pcs], [dnl ++ cat > conftest.S <. */ ++ ++ .variant_pcs vpcs_call ++ .global vpcs_call ++ .type vpcs_call, %function ++vpcs_call: ++ .cfi_startproc ++ hint 34 /* bti c. */ ++ ++ /* Save register state to *x0. */ ++ stp x0, x1, [x0] ++ stp x2, x3, [x0, 16] ++ stp x4, x5, [x0, 32] ++ stp x6, x7, [x0, 48] ++ stp x8, x9, [x0, 64] ++ stp x10, x11, [x0, 80] ++ stp x12, x13, [x0, 96] ++ stp x14, x15, [x0, 112] ++ stp x16, x17, [x0, 128] ++ stp x18, x19, [x0, 144] ++ stp x20, x21, [x0, 160] ++ stp x22, x23, [x0, 176] ++ stp x24, x25, [x0, 192] ++ stp x26, x27, [x0, 208] ++ stp x28, x29, [x0, 224] ++ mov x1, sp ++ stp x30, x1, [x0, 240] ++ stp q0, q1, [x0, 256] ++ stp q2, q3, [x0, 288] ++ stp q4, q5, [x0, 320] ++ stp q6, q7, [x0, 352] ++ stp q8, q9, [x0, 384] ++ stp q10, q11, [x0, 416] ++ stp q12, q13, [x0, 448] ++ stp q14, q15, [x0, 480] ++ stp q16, q17, [x0, 512] ++ stp q18, q19, [x0, 544] ++ stp q20, q21, [x0, 576] ++ stp q22, q23, [x0, 608] ++ stp q24, q25, [x0, 640] ++ stp q26, q27, [x0, 672] ++ stp q28, q29, [x0, 704] ++ stp q30, q31, [x0, 736] ++ ret ++ .cfi_endproc ++ .size vpcs_call, .-vpcs_call ++ ++ .global vpcs_call_regs ++ .type vpcs_call_regs, %function ++vpcs_call_regs: ++ .cfi_startproc ++ hint 34 /* bti c. */ ++ ++ stp x29, x30, [sp, -160]! ++ mov x29, sp ++ ++ /* Save callee-saved registers. */ ++ stp x19, x20, [sp, 16] ++ stp x21, x22, [sp, 32] ++ stp x23, x24, [sp, 48] ++ stp x25, x26, [sp, 64] ++ stp x27, x28, [sp, 80] ++ stp d8, d9, [sp, 96] ++ stp d10, d11, [sp, 112] ++ stp d12, d13, [sp, 128] ++ stp d14, d15, [sp, 144] ++ ++ /* Initialize most registers from *x1, and save x0, x1, x29, x30, ++ and sp (== x29), so *x1 contains the register state. */ ++ stp x0, x1, [x1] ++ str x29, [x1, 232] ++ ldp x2, x3, [x1, 16] ++ ldp x4, x5, [x1, 32] ++ ldp x6, x7, [x1, 48] ++ ldp x8, x9, [x1, 64] ++ ldp x10, x11, [x1, 80] ++ ldp x12, x13, [x1, 96] ++ ldp x14, x15, [x1, 112] ++ ldp x16, x17, [x1, 128] ++ ldp x18, x19, [x1, 144] ++ ldp x20, x21, [x1, 160] ++ ldp x22, x23, [x1, 176] ++ ldp x24, x25, [x1, 192] ++ ldp x26, x27, [x1, 208] ++ ldr x28, [x1, 224] ++ /* Skip x29, x30, sp. */ ++ ldp q0, q1, [x1, 256] ++ ldp q2, q3, [x1, 288] ++ ldp q4, q5, [x1, 320] ++ ldp q6, q7, [x1, 352] ++ ldp q8, q9, [x1, 384] ++ ldp q10, q11, [x1, 416] ++ ldp q12, q13, [x1, 448] ++ ldp q14, q15, [x1, 480] ++ ldp q16, q17, [x1, 512] ++ ldp q18, q19, [x1, 544] ++ ldp q20, q21, [x1, 576] ++ ldp q22, q23, [x1, 608] ++ ldp q24, q25, [x1, 640] ++ ldp q26, q27, [x1, 672] ++ ldp q28, q29, [x1, 704] ++ ldp q30, q31, [x1, 736] ++ ++ /* Emulate a BL using B, but save x30 before the branch. */ ++ adr x30, .L_return_addr ++ stp x30, x29, [x1, 240] ++ b vpcs_call ++.L_return_addr: ++ ++ /* Restore callee-saved registers. */ ++ ldp x19, x20, [sp, 16] ++ ldp x21, x22, [sp, 32] ++ ldp x23, x24, [sp, 48] ++ ldp x25, x26, [sp, 64] ++ ldp x27, x28, [sp, 80] ++ ldp d8, d9, [sp, 96] ++ ldp d10, d11, [sp, 112] ++ ldp d12, d13, [sp, 128] ++ ldp d14, d15, [sp, 144] ++ ++ ldp x29, x30, [sp], 160 ++ ret ++ .cfi_endproc ++ .size vpcs_call_regs, .-vpcs_call_regs +diff --git a/sysdeps/aarch64/tst-vpcs.c b/sysdeps/aarch64/tst-vpcs.c +new file mode 100644 +index 0000000000000000..92a701eb7cdea8ac +--- /dev/null ++++ b/sysdeps/aarch64/tst-vpcs.c +@@ -0,0 +1,78 @@ ++/* Test that variant PCS calls don't clobber registers with lazy binding. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++struct regs ++{ ++ uint64_t x[32]; ++ union { ++ long double q[32]; ++ uint64_t u[64]; ++ } v; ++}; ++ ++/* Gives the registers in the caller and callee around a variant PCS call. ++ Most registers are initialized from BEFORE in the caller so they can ++ have values that likely show clobbers. Register state extensions such ++ as SVE is not covered here, only the base registers. */ ++void vpcs_call_regs (struct regs *after, struct regs *before); ++ ++static int ++do_test (void) ++{ ++ struct regs before, after; ++ int err = 0; ++ ++ unsigned char *p = (unsigned char *)&before; ++ for (int i = 0; i < sizeof before; i++) ++ p[i] = i & 0xff; ++ ++ vpcs_call_regs (&after, &before); ++ ++ for (int i = 0; i < 32; i++) ++ if (before.x[i] != after.x[i]) ++ { ++ if (i == 16 || i == 17) ++ /* Variant PCS allows clobbering x16 and x17. */ ++ continue; ++ err++; ++ printf ("x%d: before: 0x%016llx after: 0x%016llx\n", ++ i, ++ (unsigned long long)before.x[i], ++ (unsigned long long)after.x[i]); ++ } ++ for (int i = 0; i < 64; i++) ++ if (before.v.u[i] != after.v.u[i]) ++ { ++ err++; ++ printf ("v%d: before: 0x%016llx %016llx after: 0x%016llx %016llx\n", ++ i/2, ++ (unsigned long long)before.v.u[2*(i/2)+1], ++ (unsigned long long)before.v.u[2*(i/2)], ++ (unsigned long long)after.v.u[2*(i/2)+1], ++ (unsigned long long)after.v.u[2*(i/2)]); ++ } ++ if (err) ++ FAIL_EXIT1 ("The variant PCS call clobbered %d registers.\n", err); ++ return 0; ++} ++ ++#include diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index 7f8c632..c5c8610 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.28 %define glibcversion 2.28 -%define glibcrelease 138%{?dist} +%define glibcrelease 140%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -523,6 +523,13 @@ Patch386: glibc-rh1868106-5.patch Patch387: glibc-rh1868106-6.patch Patch388: glibc-rh1856398.patch Patch389: glibc-rh1880670-2.patch +Patch390: glibc-rh1704868-1.patch +Patch391: glibc-rh1704868-2.patch +Patch392: glibc-rh1704868-3.patch +Patch393: glibc-rh1704868-4.patch +Patch394: glibc-rh1704868-5.patch +Patch395: glibc-rh1893662-1.patch +Patch396: glibc-rh1893662-2.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2434,6 +2441,13 @@ fi %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog +* Tue Nov 10 2020 Carlos O'Donell - 2.28-140 +- Fix calling lazily-bound SVE-using functions on AArch64 (#1893662) + +* Tue Nov 10 2020 Arjun Shankar - 2.28-139 +- CVE-2016-10228, CVE-2020-27618: Fix infinite loops in iconv (#1704868, + #1894669) + * Fri Nov 6 2020 Florian Weimer - 2.28-138 - Avoid comments after %%endif in the RPM spec file (#1894340)