d5474c
commit 7d4ec75e111291851620c6aa2c4460647b7fd50d
d5474c
Author: Arjun Shankar <arjun@redhat.com>
d5474c
Date:   Fri Sep 25 14:47:06 2020 +0200
d5474c
d5474c
    intl: Handle translation output codesets with suffixes [BZ #26383]
d5474c
    
d5474c
    Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not
d5474c
    handle cases where the output codeset for translations (via the `gettext'
d5474c
    family of functions) might have a caller specified encoding suffix such as
d5474c
    TRANSLIT or IGNORE.  This led to a regression where translations did not
d5474c
    work when the codeset had a suffix.
d5474c
    
d5474c
    This commit fixes the above issue by parsing any suffixes passed to
d5474c
    __dcigettext and adds two new test-cases to intl/tst-codeset.c to
d5474c
    verify correct behaviour.  The iconv-internal function __gconv_create_spec
d5474c
    and the static iconv-internal function gconv_destroy_spec are now visible
d5474c
    internally within glibc and used in intl/dcigettext.c.
d5474c
d5474c
diff --git a/iconv/Versions b/iconv/Versions
d5474c
index 8a5f4cf780b18925..d51af52fa34b8793 100644
d5474c
--- a/iconv/Versions
d5474c
+++ b/iconv/Versions
d5474c
@@ -6,7 +6,9 @@ libc {
d5474c
   GLIBC_PRIVATE {
d5474c
     # functions shared with iconv program
d5474c
     __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
d5474c
-    __gconv_open; __gconv_create_spec;
d5474c
+
d5474c
+    # functions used elsewhere in glibc
d5474c
+    __gconv_open; __gconv_create_spec; __gconv_destroy_spec;
d5474c
 
d5474c
     # function used by the gconv modules
d5474c
     __gconv_transliterate;
d5474c
diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
d5474c
index 6ccd0773ccb6cd27..4ba0aa99f5dae7f7 100644
d5474c
--- a/iconv/gconv_charset.c
d5474c
+++ b/iconv/gconv_charset.c
d5474c
@@ -216,3 +216,13 @@ out:
d5474c
   return ret;
d5474c
 }
d5474c
 libc_hidden_def (__gconv_create_spec)
d5474c
+
d5474c
+
d5474c
+void
d5474c
+__gconv_destroy_spec (struct gconv_spec *conv_spec)
d5474c
+{
d5474c
+  free (conv_spec->fromcode);
d5474c
+  free (conv_spec->tocode);
d5474c
+  return;
d5474c
+}
d5474c
+libc_hidden_def (__gconv_destroy_spec)
d5474c
diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
d5474c
index b85d80313030b649..4b98073389bd8707 100644
d5474c
--- a/iconv/gconv_charset.h
d5474c
+++ b/iconv/gconv_charset.h
d5474c
@@ -48,33 +48,6 @@
d5474c
 #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
d5474c
 
d5474c
 
d5474c
-/* This function accepts the charset names of the source and destination of the
d5474c
-   conversion and populates *conv_spec with an equivalent conversion
d5474c
-   specification that may later be used by __gconv_open.  The charset names
d5474c
-   might contain options in the form of suffixes that alter the conversion,
d5474c
-   e.g. "ISO-10646/UTF-8/TRANSLIT".  It processes the charset names, ignoring
d5474c
-   and truncating any suffix options in fromcode, and processing and truncating
d5474c
-   any suffix options in tocode.  Supported suffix options ("TRANSLIT" or
d5474c
-   "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
d5474c
-   to be set to true.  Unrecognized suffix options are silently discarded.  If
d5474c
-   the function succeeds, it returns conv_spec back to the caller.  It returns
d5474c
-   NULL upon failure.  */
d5474c
-struct gconv_spec *
d5474c
-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
d5474c
-                     const char *tocode);
d5474c
-libc_hidden_proto (__gconv_create_spec)
d5474c
-
d5474c
-
d5474c
-/* This function frees all heap memory allocated by __gconv_create_spec.  */
d5474c
-static void __attribute__ ((unused))
d5474c
-gconv_destroy_spec (struct gconv_spec *conv_spec)
d5474c
-{
d5474c
-  free (conv_spec->fromcode);
d5474c
-  free (conv_spec->tocode);
d5474c
-  return;
d5474c
-}
d5474c
-
d5474c
-
d5474c
 /* This function copies in-order, characters from the source 's' that are
d5474c
    either alpha-numeric or one in one of these: "_-.,:/" - into the destination
d5474c
    'wp' while dropping all other characters.  In the process, it converts all
d5474c
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
d5474c
index 4748e9b1fa3b5426..8067a341b0903e1b 100644
d5474c
--- a/iconv/gconv_int.h
d5474c
+++ b/iconv/gconv_int.h
d5474c
@@ -170,6 +170,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
d5474c
                          __gconv_t *handle, int flags);
d5474c
 libc_hidden_proto (__gconv_open)
d5474c
 
d5474c
+/* This function accepts the charset names of the source and destination of the
d5474c
+   conversion and populates *conv_spec with an equivalent conversion
d5474c
+   specification that may later be used by __gconv_open.  The charset names
d5474c
+   might contain options in the form of suffixes that alter the conversion,
d5474c
+   e.g. "ISO-10646/UTF-8/TRANSLIT".  It processes the charset names, ignoring
d5474c
+   and truncating any suffix options in fromcode, and processing and truncating
d5474c
+   any suffix options in tocode.  Supported suffix options ("TRANSLIT" or
d5474c
+   "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
d5474c
+   to be set to true.  Unrecognized suffix options are silently discarded.  If
d5474c
+   the function succeeds, it returns conv_spec back to the caller.  It returns
d5474c
+   NULL upon failure.  */
d5474c
+extern struct gconv_spec *
d5474c
+__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
d5474c
+                     const char *tocode);
d5474c
+libc_hidden_proto (__gconv_create_spec)
d5474c
+
d5474c
+/* This function frees all heap memory allocated by __gconv_create_spec.  */
d5474c
+extern void
d5474c
+__gconv_destroy_spec (struct gconv_spec *conv_spec);
d5474c
+libc_hidden_proto (__gconv_destroy_spec)
d5474c
+
d5474c
 /* Free resources associated with transformation descriptor CD.  */
d5474c
 extern int __gconv_close (__gconv_t cd)
d5474c
      attribute_hidden;
d5474c
diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c
d5474c
index 59d1ef4f07ed1022..46da33bca6c24af0 100644
d5474c
--- a/iconv/iconv_open.c
d5474c
+++ b/iconv/iconv_open.c
d5474c
@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
d5474c
 
d5474c
   int res = __gconv_open (&conv_spec, &cd, 0);
d5474c
 
d5474c
-  gconv_destroy_spec (&conv_spec);
d5474c
+  __gconv_destroy_spec (&conv_spec);
d5474c
 
d5474c
   if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
d5474c
     {
d5474c
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
d5474c
index 552efac81660e82a..e26e9d02ca4121a7 100644
d5474c
--- a/iconv/iconv_prog.c
d5474c
+++ b/iconv/iconv_prog.c
d5474c
@@ -184,7 +184,7 @@ main (int argc, char *argv[])
d5474c
       /* Let's see whether we have these coded character sets.  */
d5474c
       res = __gconv_open (&conv_spec, &cd, 0);
d5474c
 
d5474c
-      gconv_destroy_spec (&conv_spec);
d5474c
+      __gconv_destroy_spec (&conv_spec);
d5474c
 
d5474c
       if (res != __GCONV_OK)
d5474c
 	{
d5474c
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
d5474c
index ed48fc8d3e96c7ba..7ebe67b4ac2113e9 100644
d5474c
--- a/intl/dcigettext.c
d5474c
+++ b/intl/dcigettext.c
d5474c
@@ -1121,15 +1121,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
d5474c
 
d5474c
 # ifdef _LIBC
d5474c
 
d5474c
-		      struct gconv_spec conv_spec
d5474c
-		        = { .fromcode = norm_add_slashes (charset, ""),
d5474c
-		            .tocode = norm_add_slashes (outcharset, ""),
d5474c
-		            /* We always want to use transliteration.  */
d5474c
-		            .translit = true,
d5474c
-		            .ignore = false
d5474c
-		          };
d5474c
+		      struct gconv_spec conv_spec;
d5474c
+
d5474c
+                      __gconv_create_spec (&conv_spec, charset, outcharset);
d5474c
+
d5474c
+		      /* We always want to use transliteration.  */
d5474c
+                      conv_spec.translit = true;
d5474c
+
d5474c
 		      int r = __gconv_open (&conv_spec, &convd->conv,
d5474c
 		                            GCONV_AVOID_NOCONV);
d5474c
+
d5474c
+                      __gconv_destroy_spec (&conv_spec);
d5474c
+
d5474c
 		      if (__builtin_expect (r != __GCONV_OK, 0))
d5474c
 			{
d5474c
 			  /* If the output encoding is the same there is
d5474c
diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c
d5474c
index e71382aeeeca477b..52e4aaa6ffd3afdb 100644
d5474c
--- a/intl/tst-codeset.c
d5474c
+++ b/intl/tst-codeset.c
d5474c
@@ -22,13 +22,11 @@
d5474c
 #include <stdio.h>
d5474c
 #include <stdlib.h>
d5474c
 #include <string.h>
d5474c
+#include <support/check.h>
d5474c
 
d5474c
 static int
d5474c
 do_test (void)
d5474c
 {
d5474c
-  char *s;
d5474c
-  int result = 0;
d5474c
-
d5474c
   unsetenv ("LANGUAGE");
d5474c
   unsetenv ("OUTPUT_CHARSET");
d5474c
   setlocale (LC_ALL, "de_DE.ISO-8859-1");
d5474c
@@ -36,25 +34,21 @@ do_test (void)
d5474c
   bindtextdomain ("codeset", OBJPFX "domaindir");
d5474c
 
d5474c
   /* Here we expect output in ISO-8859-1.  */
d5474c
-  s = gettext ("cheese");
d5474c
-  if (strcmp (s, "K\344se"))
d5474c
-    {
d5474c
-      printf ("call 1 returned: %s\n", s);
d5474c
-      result = 1;
d5474c
-    }
d5474c
+  TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
d5474c
 
d5474c
+  /* Here we expect output in UTF-8.  */
d5474c
   bind_textdomain_codeset ("codeset", "UTF-8");
d5474c
+  TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
d5474c
 
d5474c
-  /* Here we expect output in UTF-8.  */
d5474c
-  s = gettext ("cheese");
d5474c
-  if (strcmp (s, "K\303\244se"))
d5474c
-    {
d5474c
-      printf ("call 2 returned: %s\n", s);
d5474c
-      result = 1;
d5474c
-    }
d5474c
-
d5474c
-  return result;
d5474c
+  /* `a with umlaut' is transliterated to `ae'.  */
d5474c
+  bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
d5474c
+  TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
d5474c
+
d5474c
+  /* Transliteration also works by default even if not set.  */
d5474c
+  bind_textdomain_codeset ("codeset", "ASCII");
d5474c
+  TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
d5474c
+
d5474c
+  return 0;
d5474c
 }
d5474c
 
d5474c
-#define TEST_FUNCTION do_test ()
d5474c
-#include "../test-skeleton.c"
d5474c
+#include <support/test-driver.c>