2be09a
commit f411207a833d0c49578ebe7062aee3660813ed5f
2be09a
Author: Nikita Popov <npv1310@gmail.com>
2be09a
Date:   Tue Nov 2 13:21:42 2021 +0500
2be09a
2be09a
    gconv: Do not emit spurious NUL character in ISO-2022-JP-3 (bug 28524)
2be09a
    
2be09a
    Bugfix 27256 has introduced another issue:
2be09a
    In conversion from ISO-2022-JP-3 encoding, it is possible
2be09a
    to force iconv to emit extra NUL character on internal state reset.
2be09a
    To do this, it is sufficient to feed iconv with escape sequence
2be09a
    which switches active character set.
2be09a
    The simplified check 'data->__statep->__count != ASCII_set'
2be09a
    introduced by the aforementioned bugfix picks that case and
2be09a
    behaves as if '\0' character has been queued thus emitting it.
2be09a
    
2be09a
    To eliminate this issue, these steps are taken:
2be09a
    * Restore original condition
2be09a
    '(data->__statep->__count & ~7) != ASCII_set'.
2be09a
    It is necessary since bits 0-2 may contain
2be09a
    number of buffered input characters.
2be09a
    * Check that queued character is not NUL.
2be09a
    Similar step is taken for main conversion loop.
2be09a
    
2be09a
    Bundled test case follows following logic:
2be09a
    * Try to convert ISO-2022-JP-3 escape sequence
2be09a
    switching active character set
2be09a
    * Reset internal state by providing NULL as input buffer
2be09a
    * Ensure that nothing has been converted.
2be09a
    
2be09a
    Signed-off-by: Nikita Popov <npv1310@gmail.com>
2be09a
    (cherry picked from commit ff012870b2c02a62598c04daa1e54632e020fd7d)
2be09a
2be09a
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
2be09a
index c216f959df1413f8..d5507a048c6a6508 100644
2be09a
--- a/iconvdata/Makefile
2be09a
+++ b/iconvdata/Makefile
2be09a
@@ -1,4 +1,5 @@
2be09a
 # Copyright (C) 1997-2021 Free Software Foundation, Inc.
2be09a
+# Copyright (C) The GNU Toolchain Authors.
2be09a
 # This file is part of the GNU C Library.
2be09a
 
2be09a
 # The GNU C Library is free software; you can redistribute it and/or
2be09a
@@ -74,7 +75,7 @@ ifeq (yes,$(build-shared))
2be09a
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
2be09a
 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
2be09a
 	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
2be09a
-	bug-iconv13 bug-iconv14
2be09a
+	bug-iconv13 bug-iconv14 bug-iconv15
2be09a
 ifeq ($(have-thread-library),yes)
2be09a
 tests += bug-iconv3
2be09a
 endif
2be09a
@@ -327,6 +328,8 @@ $(objpfx)bug-iconv12.out: $(addprefix $(objpfx), $(gconv-modules)) \
2be09a
 			  $(addprefix $(objpfx),$(modules.so))
2be09a
 $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
2be09a
 			  $(addprefix $(objpfx),$(modules.so))
2be09a
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
2be09a
+			  $(addprefix $(objpfx),$(modules.so))
2be09a
 
2be09a
 $(objpfx)iconv-test.out: run-iconv-test.sh \
2be09a
 			 $(addprefix $(objpfx), $(gconv-modules)) \
2be09a
diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
2be09a
new file mode 100644
2be09a
index 0000000000000000..cc04bd0313a68786
2be09a
--- /dev/null
2be09a
+++ b/iconvdata/bug-iconv15.c
2be09a
@@ -0,0 +1,60 @@
2be09a
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
2be09a
+   may emit spurious NUL character on state reset.
2be09a
+   Copyright (C) The GNU Toolchain Authors.
2be09a
+   This file is part of the GNU C Library.
2be09a
+
2be09a
+   The GNU C Library is free software; you can redistribute it and/or
2be09a
+   modify it under the terms of the GNU Lesser General Public
2be09a
+   License as published by the Free Software Foundation; either
2be09a
+   version 2.1 of the License, or (at your option) any later version.
2be09a
+
2be09a
+   The GNU C Library is distributed in the hope that it will be useful,
2be09a
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
2be09a
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
2be09a
+   Lesser General Public License for more details.
2be09a
+
2be09a
+   You should have received a copy of the GNU Lesser General Public
2be09a
+   License along with the GNU C Library; if not, see
2be09a
+   <https://www.gnu.org/licenses/>.  */
2be09a
+
2be09a
+#include <stddef.h>
2be09a
+#include <iconv.h>
2be09a
+#include <support/check.h>
2be09a
+
2be09a
+static int
2be09a
+do_test (void)
2be09a
+{
2be09a
+  char in[] = "\x1b(I";
2be09a
+  char *inbuf = in;
2be09a
+  size_t inleft = sizeof (in) - 1;
2be09a
+  char out[1];
2be09a
+  char *outbuf = out;
2be09a
+  size_t outleft = sizeof (out);
2be09a
+  iconv_t cd;
2be09a
+
2be09a
+  cd = iconv_open ("UTF8", "ISO-2022-JP-3");
2be09a
+  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
2be09a
+
2be09a
+  /* First call to iconv should alter internal state.
2be09a
+     Now, JISX0201_Kana_set is selected and
2be09a
+     state value != ASCII_set.  */
2be09a
+  TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
2be09a
+
2be09a
+  /* No bytes should have been added to
2be09a
+     the output buffer at this point.  */
2be09a
+  TEST_VERIFY (outbuf == out);
2be09a
+  TEST_VERIFY (outleft == sizeof (out));
2be09a
+
2be09a
+  /* Second call shall emit spurious NUL character in unpatched glibc.  */
2be09a
+  TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
2be09a
+
2be09a
+  /* No characters are expected to be produced.  */
2be09a
+  TEST_VERIFY (outbuf == out);
2be09a
+  TEST_VERIFY (outleft == sizeof (out));
2be09a
+
2be09a
+  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
2be09a
+
2be09a
+  return 0;
2be09a
+}
2be09a
+
2be09a
+#include <support/test-driver.c>
2be09a
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
2be09a
index c8ba88cdc9fe9200..5fc0c0f7397935fe 100644
2be09a
--- a/iconvdata/iso-2022-jp-3.c
2be09a
+++ b/iconvdata/iso-2022-jp-3.c
2be09a
@@ -1,5 +1,6 @@
2be09a
 /* Conversion module for ISO-2022-JP-3.
2be09a
    Copyright (C) 1998-2021 Free Software Foundation, Inc.
2be09a
+   Copyright (C) The GNU Toolchain Authors.
2be09a
    This file is part of the GNU C Library.
2be09a
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
2be09a
    and Bruno Haible <bruno@clisp.org>, 2002.
2be09a
@@ -81,20 +82,31 @@ enum
2be09a
    the output state to the initial state.  This has to be done during the
2be09a
    flushing.  */
2be09a
 #define EMIT_SHIFT_TO_INIT \
2be09a
-  if (data->__statep->__count != ASCII_set)			      \
2be09a
+  if ((data->__statep->__count & ~7) != ASCII_set)			      \
2be09a
     {									      \
2be09a
       if (FROM_DIRECTION)						      \
2be09a
 	{								      \
2be09a
-	  if (__glibc_likely (outbuf + 4 <= outend))			      \
2be09a
+	  uint32_t ch = data->__statep->__count >> 6;			      \
2be09a
+									      \
2be09a
+	  if (__glibc_unlikely (ch != 0))				      \
2be09a
 	    {								      \
2be09a
-	      /* Write out the last character.  */			      \
2be09a
-	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \
2be09a
-	      outbuf += sizeof (uint32_t);				      \
2be09a
-	      data->__statep->__count = ASCII_set;			\
2be09a
+	      if (__glibc_likely (outbuf + 4 <= outend))		      \
2be09a
+		{							      \
2be09a
+		  /* Write out the last character.  */			      \
2be09a
+		  put32u (outbuf, ch);					      \
2be09a
+		  outbuf += 4;						      \
2be09a
+		  data->__statep->__count &= 7;				      \
2be09a
+		  data->__statep->__count |= ASCII_set;			      \
2be09a
+		}							      \
2be09a
+	      else							      \
2be09a
+		/* We don't have enough room in the output buffer.  */	      \
2be09a
+		status = __GCONV_FULL_OUTPUT;				      \
2be09a
 	    }								      \
2be09a
 	  else								      \
2be09a
-	    /* We don't have enough room in the output buffer.  */	      \
2be09a
-	    status = __GCONV_FULL_OUTPUT;				      \
2be09a
+	    {								      \
2be09a
+	      data->__statep->__count &= 7;				      \
2be09a
+	      data->__statep->__count |= ASCII_set;			      \
2be09a
+	    }								      \
2be09a
 	}								      \
2be09a
       else								      \
2be09a
 	{								      \