| commit 7d4ec75e111291851620c6aa2c4460647b7fd50d |
| Author: Arjun Shankar <arjun@redhat.com> |
| Date: Fri Sep 25 14:47:06 2020 +0200 |
| |
| intl: Handle translation output codesets with suffixes [BZ #26383] |
| |
| Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not |
| handle cases where the output codeset for translations (via the `gettext' |
| family of functions) might have a caller specified encoding suffix such as |
| TRANSLIT or IGNORE. This led to a regression where translations did not |
| work when the codeset had a suffix. |
| |
| This commit fixes the above issue by parsing any suffixes passed to |
| __dcigettext and adds two new test-cases to intl/tst-codeset.c to |
| verify correct behaviour. The iconv-internal function __gconv_create_spec |
| and the static iconv-internal function gconv_destroy_spec are now visible |
| internally within glibc and used in intl/dcigettext.c. |
| |
| diff --git a/iconv/Versions b/iconv/Versions |
| index 8a5f4cf780b18925..d51af52fa34b8793 100644 |
| |
| |
| @@ -6,7 +6,9 @@ libc { |
| GLIBC_PRIVATE { |
| # functions shared with iconv program |
| __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; |
| - __gconv_open; __gconv_create_spec; |
| + |
| + # functions used elsewhere in glibc |
| + __gconv_open; __gconv_create_spec; __gconv_destroy_spec; |
| |
| # function used by the gconv modules |
| __gconv_transliterate; |
| diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c |
| index 6ccd0773ccb6cd27..4ba0aa99f5dae7f7 100644 |
| |
| |
| @@ -216,3 +216,13 @@ out: |
| return ret; |
| } |
| libc_hidden_def (__gconv_create_spec) |
| + |
| + |
| +void |
| +__gconv_destroy_spec (struct gconv_spec *conv_spec) |
| +{ |
| + free (conv_spec->fromcode); |
| + free (conv_spec->tocode); |
| + return; |
| +} |
| +libc_hidden_def (__gconv_destroy_spec) |
| diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h |
| index b85d80313030b649..4b98073389bd8707 100644 |
| |
| |
| @@ -48,33 +48,6 @@ |
| #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" |
| |
| |
| -/* This function accepts the charset names of the source and destination of the |
| - conversion and populates *conv_spec with an equivalent conversion |
| - specification that may later be used by __gconv_open. The charset names |
| - might contain options in the form of suffixes that alter the conversion, |
| - e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring |
| - and truncating any suffix options in fromcode, and processing and truncating |
| - any suffix options in tocode. Supported suffix options ("TRANSLIT" or |
| - "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec |
| - to be set to true. Unrecognized suffix options are silently discarded. If |
| - the function succeeds, it returns conv_spec back to the caller. It returns |
| - NULL upon failure. */ |
| -struct gconv_spec * |
| -__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, |
| - const char *tocode); |
| -libc_hidden_proto (__gconv_create_spec) |
| - |
| - |
| -/* This function frees all heap memory allocated by __gconv_create_spec. */ |
| -static void __attribute__ ((unused)) |
| -gconv_destroy_spec (struct gconv_spec *conv_spec) |
| -{ |
| - free (conv_spec->fromcode); |
| - free (conv_spec->tocode); |
| - return; |
| -} |
| - |
| - |
| /* This function copies in-order, characters from the source 's' that are |
| either alpha-numeric or one in one of these: "_-.,:/" - into the destination |
| 'wp' while dropping all other characters. In the process, it converts all |
| diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h |
| index 4748e9b1fa3b5426..8067a341b0903e1b 100644 |
| |
| |
| @@ -170,6 +170,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec, |
| __gconv_t *handle, int flags); |
| libc_hidden_proto (__gconv_open) |
| |
| +/* This function accepts the charset names of the source and destination of the |
| + conversion and populates *conv_spec with an equivalent conversion |
| + specification that may later be used by __gconv_open. The charset names |
| + might contain options in the form of suffixes that alter the conversion, |
| + e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring |
| + and truncating any suffix options in fromcode, and processing and truncating |
| + any suffix options in tocode. Supported suffix options ("TRANSLIT" or |
| + "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec |
| + to be set to true. Unrecognized suffix options are silently discarded. If |
| + the function succeeds, it returns conv_spec back to the caller. It returns |
| + NULL upon failure. */ |
| +extern struct gconv_spec * |
| +__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, |
| + const char *tocode); |
| +libc_hidden_proto (__gconv_create_spec) |
| + |
| +/* This function frees all heap memory allocated by __gconv_create_spec. */ |
| +extern void |
| +__gconv_destroy_spec (struct gconv_spec *conv_spec); |
| +libc_hidden_proto (__gconv_destroy_spec) |
| + |
| /* Free resources associated with transformation descriptor CD. */ |
| extern int __gconv_close (__gconv_t cd) |
| attribute_hidden; |
| diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c |
| index 59d1ef4f07ed1022..46da33bca6c24af0 100644 |
| |
| |
| @@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode) |
| |
| int res = __gconv_open (&conv_spec, &cd, 0); |
| |
| - gconv_destroy_spec (&conv_spec); |
| + __gconv_destroy_spec (&conv_spec); |
| |
| if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) |
| { |
| diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c |
| index 552efac81660e82a..e26e9d02ca4121a7 100644 |
| |
| |
| @@ -184,7 +184,7 @@ main (int argc, char *argv[]) |
| /* Let's see whether we have these coded character sets. */ |
| res = __gconv_open (&conv_spec, &cd, 0); |
| |
| - gconv_destroy_spec (&conv_spec); |
| + __gconv_destroy_spec (&conv_spec); |
| |
| if (res != __GCONV_OK) |
| { |
| diff --git a/intl/dcigettext.c b/intl/dcigettext.c |
| index ed48fc8d3e96c7ba..7ebe67b4ac2113e9 100644 |
| |
| |
| @@ -1121,15 +1121,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, |
| |
| # ifdef _LIBC |
| |
| - struct gconv_spec conv_spec |
| - = { .fromcode = norm_add_slashes (charset, ""), |
| - .tocode = norm_add_slashes (outcharset, ""), |
| - /* We always want to use transliteration. */ |
| - .translit = true, |
| - .ignore = false |
| - }; |
| + struct gconv_spec conv_spec; |
| + |
| + __gconv_create_spec (&conv_spec, charset, outcharset); |
| + |
| + /* We always want to use transliteration. */ |
| + conv_spec.translit = true; |
| + |
| int r = __gconv_open (&conv_spec, &convd->conv, |
| GCONV_AVOID_NOCONV); |
| + |
| + __gconv_destroy_spec (&conv_spec); |
| + |
| if (__builtin_expect (r != __GCONV_OK, 0)) |
| { |
| /* If the output encoding is the same there is |
| diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c |
| index e71382aeeeca477b..52e4aaa6ffd3afdb 100644 |
| |
| |
| @@ -22,13 +22,11 @@ |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| +#include <support/check.h> |
| |
| static int |
| do_test (void) |
| { |
| - char *s; |
| - int result = 0; |
| - |
| unsetenv ("LANGUAGE"); |
| unsetenv ("OUTPUT_CHARSET"); |
| setlocale (LC_ALL, "de_DE.ISO-8859-1"); |
| @@ -36,25 +34,21 @@ do_test (void) |
| bindtextdomain ("codeset", OBJPFX "domaindir"); |
| |
| /* Here we expect output in ISO-8859-1. */ |
| - s = gettext ("cheese"); |
| - if (strcmp (s, "K\344se")) |
| - { |
| - printf ("call 1 returned: %s\n", s); |
| - result = 1; |
| - } |
| + TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); |
| |
| + /* Here we expect output in UTF-8. */ |
| bind_textdomain_codeset ("codeset", "UTF-8"); |
| + TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); |
| |
| - /* Here we expect output in UTF-8. */ |
| - s = gettext ("cheese"); |
| - if (strcmp (s, "K\303\244se")) |
| - { |
| - printf ("call 2 returned: %s\n", s); |
| - result = 1; |
| - } |
| - |
| - return result; |
| + /* `a with umlaut' is transliterated to `ae'. */ |
| + bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); |
| + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); |
| + |
| + /* Transliteration also works by default even if not set. */ |
| + bind_textdomain_codeset ("codeset", "ASCII"); |
| + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); |
| + |
| + return 0; |
| } |
| |
| -#define TEST_FUNCTION do_test () |
| -#include "../test-skeleton.c" |
| +#include <support/test-driver.c> |