94084c
commit 2ab8b74567dc0a9a3c98696e6444881997dd6c49
94084c
Author: Carlos O'Donell <carlos@redhat.com>
94084c
Date:   Thu Feb 3 16:51:59 2022 -0500
94084c
94084c
    localedef: Update LC_MONETARY handling (Bug 28845)
94084c
    
94084c
    ISO C17, POSIX Issue 7, and ISO 30112 all allow the char*
94084c
    types to be empty strings i.e. "", integer or char values to
94084c
    be -1 or CHAR_MAX respectively, with the exception of
94084c
    decimal_point which must be non-empty in ISO C. Note that
94084c
    the defaults for mon_grouping vary, but are functionaly
94084c
    equivalent e.g. "\177" (no further grouping reuqired) vs.
94084c
    "" (no grouping defined for all groups).
94084c
    
94084c
    We include a broad comment talking about harmonizing ISO C,
94084c
    POSIX, ISO 30112, and the default C/POSIX locale for glibc.
94084c
    
94084c
    We reorder all setting based on locale/categories.def order.
94084c
    
94084c
    We soften all missing definitions from errors to warnings when
94084c
    defaults exist.
94084c
    
94084c
    Given that ISO C, POSIX and ISO 30112 allow the empty string
94084c
    we change LC_MONETARY handling of mon_decimal_point to allow
94084c
    the empty string.  If mon_decimal_point is not defined at all
94084c
    then we pick the existing legacy glibc default value of
94084c
    <U002E> i.e. ".".
94084c
    
94084c
    We also set the default for mon_thousands_sep_wc at the
94084c
    same time as mon_thousands_sep, but this is not a change in
94084c
    behaviour, it is always either a matching value or L'\0',
94084c
    but if in the future we change the default to a non-empty
94084c
    string we would need to update both at the same time.
94084c
    
94084c
    Tested on x86_64 and i686 without regressions.
94084c
    Tested with install-locale-archive target.
94084c
    Tested with install-locale-files target.
94084c
    
94084c
    Reviewed-by: DJ Delorie <dj@redhat.com>
94084c
94084c
diff --git a/locale/programs/ld-monetary.c b/locale/programs/ld-monetary.c
94084c
index 9b9a55bb4766dfcf..17a972e1a7516aa5 100644
94084c
--- a/locale/programs/ld-monetary.c
94084c
+++ b/locale/programs/ld-monetary.c
94084c
@@ -197,21 +197,105 @@ No definition for %s category found"), "LC_MONETARY");
94084c
 	}
94084c
     }
94084c
 
94084c
+  /* Generally speaking there are 3 standards the define the default,
94084c
+     warning, and error behaviour of LC_MONETARY.  They are ISO/IEC TR 30112,
94084c
+     ISO/IEC 9899:2018 (ISO C17), and POSIX.1-2017.  Within 30112 we have the
94084c
+     definition of a standard i18n FDCC-set, which for LC_MONETARY has the
94084c
+     following default values:
94084c
+	int_curr_symbol		""
94084c
+	currency_symbol		""
94084c
+	mon_decimal_point	"<U002C>" i.e. ","
94084c
+	mon_thousand_sep	""
94084c
+	mon_grouping		"\177" i.e. CHAR_MAX
94084c
+	positive_sign		""
94084c
+	negative_sign		"<U002E>" i.e. "."
94084c
+	int_frac_digits		-1
94084c
+	frac_digits		-1
94084c
+	p_cs_precedes		-1
94084c
+	p_sep_by_space		-1
94084c
+	n_cs_precedes		-1
94084c
+	n_sep_by_space		-1
94084c
+	p_sign_posn		-1
94084c
+	n_sign_posn		-1
94084c
+    Under 30112 a keyword that is not provided implies an empty string ""
94084c
+    for string values or a -1 for integer values, and indicates the value
94084c
+    is unspecified with no default implied.  No errors are considered.
94084c
+    The exception is mon_grouping which is a string with a terminating
94084c
+    CHAR_MAX.
94084c
+    For POSIX Issue 7 we have:
94084c
+    https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html
94084c
+    and again values not provided default to "" or -1, and indicate the value
94084c
+    is not available to the locale.  The exception is mon_grouping which is
94084c
+    a string with a terminating CHAR_MAX.  For the POSIX locale the values of
94084c
+    LC_MONETARY should be:
94084c
+	int_curr_symbol		""
94084c
+	currency_symbol		""
94084c
+	mon_decimal_point	""
94084c
+	mon_thousands_sep	""
94084c
+	mon_grouping		"\177" i.e. CHAR_MAX 
94084c
+	positive_sign		""
94084c
+	negative_sign		""
94084c
+	int_frac_digits		-1
94084c
+	frac_digits		-1
94084c
+	p_cs_precedes		-1
94084c
+	p_sep_by_space		-1
94084c
+	n_cs_precedes		-1
94084c
+	n_sep_by_space		-1
94084c
+	p_sign_posn		-1
94084c
+	n_sign_posn		-1
94084c
+	int_p_cs_precedes	-1
94084c
+	int_p_sep_by_space	-1
94084c
+	int_n_cs_precedes	-1
94084c
+	int_n_sep_by_space	-1
94084c
+	int_p_sign_posn		-1
94084c
+	int_n_sign_posn		-1
94084c
+    Like with 30112, POSIX also considers no error if the keywords are
94084c
+    missing, only that if the cateory as a whole is missing the referencing
94084c
+    of the category results in unspecified behaviour.
94084c
+    For ISO C17 there is no default value provided, but the localeconv
94084c
+    specification in 7.11.2.1 admits that members of char * type may point
94084c
+    to "" to indicate a value is not available or is of length zero.
94084c
+    The exception is decimal_point (not mon_decimal_point) which must be a
94084c
+    defined non-empty string.  The values of char, which are generally
94084c
+    mapped to integer values in 30112 and POSIX, must be non-negative
94084c
+    numbers that map to CHAR_MAX when a value is not available in the
94084c
+    locale.
94084c
+    In ISO C17 for the "C" locale all values are empty strings "", or
94084c
+    CHAR_MAX, with the exception of decimal_point which is "." (defined
94084c
+    in LC_NUMERIC).  ISO C17 makes no exception for mon_grouping like
94084c
+    30112 and POSIX, but a value of "" is functionally equivalent to
94084c
+    "\177" since neither defines a grouping (though the latter terminates
94084c
+    the grouping).
94084c
+
94084c
+    Lastly, we must consider the legacy C/POSIX locale that implemented
94084c
+    as a builtin in glibc and wether a default value mapping to the
94084c
+    C/POSIX locale may benefit the user from a compatibility perspective.
94084c
+
94084c
+    Thus given 30112, POSIX, ISO C, and the builtin C/POSIX locale we
94084c
+    need to pick appropriate defaults below.   */
94084c
+
94084c
+  /* The members of LC_MONETARY are handled in the order of their definition
94084c
+     in locale/categories.def.  Please keep them in that order.  */
94084c
+
94084c
+  /* The purpose of TEST_ELEM is to define a default value for the fields
94084c
+     in the category if the field was not defined in the cateory.  If the
94084c
+     category was present but we didn't see a definition for the field then
94084c
+     we also issue a warning, otherwise the only warning you get is the one
94084c
+     earlier when a default category is created (completely missing category).
94084c
+     This missing field warning is glibc-specific since no standard requires
94084c
+     this warning, but we consider it valuable to print a warning for all
94084c
+     missing fields in the category.  */
94084c
 #define TEST_ELEM(cat, initval) \
94084c
   if (monetary->cat == NULL)						      \
94084c
     {									      \
94084c
       if (! nothing)							      \
94084c
-	record_error (0, 0, _("%s: field `%s' not defined"),		      \
94084c
-		      "LC_MONETARY", #cat);				      \
94084c
+	record_warning (_("%s: field `%s' not defined"),		      \
94084c
+			"LC_MONETARY", #cat);				      \
94084c
       monetary->cat = initval;						      \
94084c
     }
94084c
 
94084c
+  /* Keyword: int_curr_symbol.  */
94084c
   TEST_ELEM (int_curr_symbol, "");
94084c
-  TEST_ELEM (currency_symbol, "");
94084c
-  TEST_ELEM (mon_thousands_sep, "");
94084c
-  TEST_ELEM (positive_sign, "");
94084c
-  TEST_ELEM (negative_sign, "");
94084c
-
94084c
   /* The international currency symbol must come from ISO 4217.  */
94084c
   if (monetary->int_curr_symbol != NULL)
94084c
     {
94084c
@@ -248,41 +332,63 @@ not correspond to a valid name in ISO 4217 [--no-warnings=intcurrsym]"),
94084c
 	}
94084c
     }
94084c
 
94084c
-  /* The decimal point must not be empty.  This is not said explicitly
94084c
-     in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be
94084c
-     != "".  */
94084c
+  /* Keyword: currency_symbol */
94084c
+  TEST_ELEM (currency_symbol, "");
94084c
+
94084c
+  /* Keyword: mon_decimal_point */
94084c
+  /* ISO C17 7.11.2.1.3 explicitly allows mon_decimal_point to be the
94084c
+     empty string e.g. "".  This indicates the value is not available in the
94084c
+     current locale or is of zero length.  However, if the value was never
94084c
+     defined then we issue a warning and use a glibc-specific default.  ISO
94084c
+     30112 in the i18n FDCC-Set uses <U002C> ",", and POSIX Issue 7 in the
94084c
+     POSIX locale uses "".  It is specific to glibc that the default is <U002E>
94084c
+     "."; we retain this existing behaviour for backwards compatibility.  */
94084c
   if (monetary->mon_decimal_point == NULL)
94084c
     {
94084c
       if (! nothing)
94084c
-	record_error (0, 0, _("%s: field `%s' not defined"),
94084c
-		      "LC_MONETARY", "mon_decimal_point");
94084c
+	record_warning (_("%s: field `%s' not defined, using defaults"),
94084c
+			"LC_MONETARY", "mon_decimal_point");
94084c
       monetary->mon_decimal_point = ".";
94084c
       monetary->mon_decimal_point_wc = L'.';
94084c
     }
94084c
-  else if (monetary->mon_decimal_point[0] == '\0' && ! be_quiet && ! nothing)
94084c
+
94084c
+  /* Keyword: mon_thousands_sep */
94084c
+  if (monetary->mon_thousands_sep == NULL)
94084c
     {
94084c
-      record_error (0, 0, _("\
94084c
-%s: value for field `%s' must not be an empty string"),
94084c
-		    "LC_MONETARY", "mon_decimal_point");
94084c
+      if (! nothing)
94084c
+	record_warning (_("%s: field `%s' not defined, using defaults"),
94084c
+			"LC_MONETARY", "mon_thousands_sep");
94084c
+      monetary->mon_thousands_sep = "";
94084c
+      monetary->mon_thousands_sep_wc = L'\0';
94084c
     }
94084c
 
94084c
+  /* Keyword: mon_grouping */
94084c
   if (monetary->mon_grouping_len == 0)
94084c
     {
94084c
       if (! nothing)
94084c
-	record_error (0, 0, _("%s: field `%s' not defined"),
94084c
-		      "LC_MONETARY", "mon_grouping");
94084c
-
94084c
+	record_warning (_("%s: field `%s' not defined"),
94084c
+			"LC_MONETARY", "mon_grouping");
94084c
+      /* Missing entries are given 1 element in their bytearray with
94084c
+	 a value of CHAR_MAX which indicates that "No further grouping
94084c
+	 is to be performed" (functionally equivalent to ISO C's "C"
94084c
+	 locale default of ""). */
94084c
       monetary->mon_grouping = (char *) "\177";
94084c
       monetary->mon_grouping_len = 1;
94084c
     }
94084c
 
94084c
+  /* Keyword: positive_sign */
94084c
+  TEST_ELEM (positive_sign, "");
94084c
+
94084c
+  /* Keyword: negative_sign */
94084c
+  TEST_ELEM (negative_sign, "");
94084c
+
94084c
 #undef TEST_ELEM
94084c
 #define TEST_ELEM(cat, min, max, initval) \
94084c
   if (monetary->cat == -2)						      \
94084c
     {									      \
94084c
        if (! nothing)							      \
94084c
-	 record_error (0, 0, _("%s: field `%s' not defined"),		      \
94084c
-		       "LC_MONETARY", #cat);				      \
94084c
+	 record_warning (_("%s: field `%s' not defined"),		      \
94084c
+			 "LC_MONETARY", #cat);				      \
94084c
        monetary->cat = initval;						      \
94084c
     }									      \
94084c
   else if ((monetary->cat < min || monetary->cat > max)			      \
94084c
@@ -301,16 +407,11 @@ not correspond to a valid name in ISO 4217 [--no-warnings=intcurrsym]"),
94084c
   TEST_ELEM (p_sign_posn, -1, 4, -1);
94084c
   TEST_ELEM (n_sign_posn, -1, 4, -1);
94084c
 
94084c
-  /* The non-POSIX.2 extensions are optional.  */
94084c
-  if (monetary->duo_int_curr_symbol == NULL)
94084c
-    monetary->duo_int_curr_symbol = monetary->int_curr_symbol;
94084c
-  if (monetary->duo_currency_symbol == NULL)
94084c
-    monetary->duo_currency_symbol = monetary->currency_symbol;
94084c
-
94084c
-  if (monetary->duo_int_frac_digits == -2)
94084c
-    monetary->duo_int_frac_digits = monetary->int_frac_digits;
94084c
-  if (monetary->duo_frac_digits == -2)
94084c
-    monetary->duo_frac_digits = monetary->frac_digits;
94084c
+  /* Keyword: crncystr */
94084c
+  monetary->crncystr = (char *) xmalloc (strlen (monetary->currency_symbol)
94084c
+					 + 2);
94084c
+  monetary->crncystr[0] = monetary->p_cs_precedes ? '-' : '+';
94084c
+  strcpy (&monetary->crncystr[1], monetary->currency_symbol);
94084c
 
94084c
 #undef TEST_ELEM
94084c
 #define TEST_ELEM(cat, alt, min, max) \
94084c
@@ -328,6 +429,17 @@ not correspond to a valid name in ISO 4217 [--no-warnings=intcurrsym]"),
94084c
   TEST_ELEM (int_p_sign_posn, p_sign_posn, -1, 4);
94084c
   TEST_ELEM (int_n_sign_posn, n_sign_posn, -1, 4);
94084c
 
94084c
+  /* The non-POSIX.2 extensions are optional.  */
94084c
+  if (monetary->duo_int_curr_symbol == NULL)
94084c
+    monetary->duo_int_curr_symbol = monetary->int_curr_symbol;
94084c
+  if (monetary->duo_currency_symbol == NULL)
94084c
+    monetary->duo_currency_symbol = monetary->currency_symbol;
94084c
+
94084c
+  if (monetary->duo_int_frac_digits == -2)
94084c
+    monetary->duo_int_frac_digits = monetary->int_frac_digits;
94084c
+  if (monetary->duo_frac_digits == -2)
94084c
+    monetary->duo_frac_digits = monetary->frac_digits;
94084c
+
94084c
   TEST_ELEM (duo_p_cs_precedes, p_cs_precedes, -1, 1);
94084c
   TEST_ELEM (duo_p_sep_by_space, p_sep_by_space, -1, 2);
94084c
   TEST_ELEM (duo_n_cs_precedes, n_cs_precedes, -1, 1);
94084c
@@ -350,17 +462,15 @@ not correspond to a valid name in ISO 4217 [--no-warnings=intcurrsym]"),
94084c
   if (monetary->duo_valid_to == 0)
94084c
     monetary->duo_valid_to = 99991231;
94084c
 
94084c
+  /* Keyword: conversion_rate */
94084c
   if (monetary->conversion_rate[0] == 0)
94084c
     {
94084c
       monetary->conversion_rate[0] = 1;
94084c
       monetary->conversion_rate[1] = 1;
94084c
     }
94084c
 
94084c
-  /* Create the crncystr entry.  */
94084c
-  monetary->crncystr = (char *) xmalloc (strlen (monetary->currency_symbol)
94084c
-					 + 2);
94084c
-  monetary->crncystr[0] = monetary->p_cs_precedes ? '-' : '+';
94084c
-  strcpy (&monetary->crncystr[1], monetary->currency_symbol);
94084c
+  /* A value for monetary-decimal-point-wc was set when
94084c
+     monetary_decimal_point was set, likewise for monetary-thousands-sep-wc.  */
94084c
 }
94084c
 
94084c