4df9e3
commit ff012870b2c02a62598c04daa1e54632e020fd7d
4df9e3
Author: Nikita Popov <npv1310@gmail.com>
4df9e3
Date:   Tue Nov 2 13:21:42 2021 +0500
4df9e3
4df9e3
    gconv: Do not emit spurious NUL character in ISO-2022-JP-3 (bug 28524)
4df9e3
    
4df9e3
    Bugfix 27256 has introduced another issue:
4df9e3
    In conversion from ISO-2022-JP-3 encoding, it is possible
4df9e3
    to force iconv to emit extra NUL character on internal state reset.
4df9e3
    To do this, it is sufficient to feed iconv with escape sequence
4df9e3
    which switches active character set.
4df9e3
    The simplified check 'data->__statep->__count != ASCII_set'
4df9e3
    introduced by the aforementioned bugfix picks that case and
4df9e3
    behaves as if '\0' character has been queued thus emitting it.
4df9e3
    
4df9e3
    To eliminate this issue, these steps are taken:
4df9e3
    * Restore original condition
4df9e3
    '(data->__statep->__count & ~7) != ASCII_set'.
4df9e3
    It is necessary since bits 0-2 may contain
4df9e3
    number of buffered input characters.
4df9e3
    * Check that queued character is not NUL.
4df9e3
    Similar step is taken for main conversion loop.
4df9e3
    
4df9e3
    Bundled test case follows following logic:
4df9e3
    * Try to convert ISO-2022-JP-3 escape sequence
4df9e3
    switching active character set
4df9e3
    * Reset internal state by providing NULL as input buffer
4df9e3
    * Ensure that nothing has been converted.
4df9e3
    
4df9e3
    Signed-off-by: Nikita Popov <npv1310@gmail.com>
4df9e3
4df9e3
Conflicts:
4df9e3
	iconvdata/Makefile
4df9e3
	  (Copyright header.  Usual test backporting differences.)
4df9e3
	iconvdata/iso-2022-jp-3.c
4df9e3
	  (Copyright header.)
4df9e3
4df9e3
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
4df9e3
index 95e5fb8f722a513b..646e2ccd11478646 100644
4df9e3
--- a/iconvdata/Makefile
4df9e3
+++ b/iconvdata/Makefile
4df9e3
@@ -1,4 +1,5 @@
4df9e3
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
4df9e3
+# Copyright (C) 1997-2021 Free Software Foundation, Inc.
4df9e3
+# Copyright (C) The GNU Toolchain Authors.
4df9e3
 # This file is part of the GNU C Library.
4df9e3
 
4df9e3
 # The GNU C Library is free software; you can redistribute it and/or
4df9e3
@@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules))
4df9e3
 ifeq (yes,$(build-shared))
4df9e3
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
4df9e3
 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
4df9e3
-	bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14
4df9e3
+	bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 \
4df9e3
+	bug-iconv15
4df9e3
 ifeq ($(have-thread-library),yes)
4df9e3
 tests += bug-iconv3
4df9e3
 endif
4df9e3
@@ -321,6 +323,8 @@ $(objpfx)bug-iconv12.out: $(addprefix $(objpfx), $(gconv-modules)) \
4df9e3
 			  $(addprefix $(objpfx),$(modules.so))
4df9e3
 $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
4df9e3
 			  $(addprefix $(objpfx),$(modules.so))
4df9e3
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
4df9e3
+			  $(addprefix $(objpfx),$(modules.so))
4df9e3
 
4df9e3
 $(objpfx)iconv-test.out: run-iconv-test.sh \
4df9e3
 			 $(addprefix $(objpfx), $(gconv-modules)) \
4df9e3
diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
4df9e3
new file mode 100644
4df9e3
index 0000000000000000..cc04bd0313a68786
4df9e3
--- /dev/null
4df9e3
+++ b/iconvdata/bug-iconv15.c
4df9e3
@@ -0,0 +1,60 @@
4df9e3
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
4df9e3
+   may emit spurious NUL character on state reset.
4df9e3
+   Copyright (C) The GNU Toolchain Authors.
4df9e3
+   This file is part of the GNU C Library.
4df9e3
+
4df9e3
+   The GNU C Library is free software; you can redistribute it and/or
4df9e3
+   modify it under the terms of the GNU Lesser General Public
4df9e3
+   License as published by the Free Software Foundation; either
4df9e3
+   version 2.1 of the License, or (at your option) any later version.
4df9e3
+
4df9e3
+   The GNU C Library is distributed in the hope that it will be useful,
4df9e3
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
4df9e3
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
4df9e3
+   Lesser General Public License for more details.
4df9e3
+
4df9e3
+   You should have received a copy of the GNU Lesser General Public
4df9e3
+   License along with the GNU C Library; if not, see
4df9e3
+   <https://www.gnu.org/licenses/>.  */
4df9e3
+
4df9e3
+#include <stddef.h>
4df9e3
+#include <iconv.h>
4df9e3
+#include <support/check.h>
4df9e3
+
4df9e3
+static int
4df9e3
+do_test (void)
4df9e3
+{
4df9e3
+  char in[] = "\x1b(I";
4df9e3
+  char *inbuf = in;
4df9e3
+  size_t inleft = sizeof (in) - 1;
4df9e3
+  char out[1];
4df9e3
+  char *outbuf = out;
4df9e3
+  size_t outleft = sizeof (out);
4df9e3
+  iconv_t cd;
4df9e3
+
4df9e3
+  cd = iconv_open ("UTF8", "ISO-2022-JP-3");
4df9e3
+  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
4df9e3
+
4df9e3
+  /* First call to iconv should alter internal state.
4df9e3
+     Now, JISX0201_Kana_set is selected and
4df9e3
+     state value != ASCII_set.  */
4df9e3
+  TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
4df9e3
+
4df9e3
+  /* No bytes should have been added to
4df9e3
+     the output buffer at this point.  */
4df9e3
+  TEST_VERIFY (outbuf == out);
4df9e3
+  TEST_VERIFY (outleft == sizeof (out));
4df9e3
+
4df9e3
+  /* Second call shall emit spurious NUL character in unpatched glibc.  */
4df9e3
+  TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
4df9e3
+
4df9e3
+  /* No characters are expected to be produced.  */
4df9e3
+  TEST_VERIFY (outbuf == out);
4df9e3
+  TEST_VERIFY (outleft == sizeof (out));
4df9e3
+
4df9e3
+  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
4df9e3
+
4df9e3
+  return 0;
4df9e3
+}
4df9e3
+
4df9e3
+#include <support/test-driver.c>
4df9e3
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
4df9e3
index 047fab8e8dfbde7e..a2b33b171e56392a 100644
4df9e3
--- a/iconvdata/iso-2022-jp-3.c
4df9e3
+++ b/iconvdata/iso-2022-jp-3.c
4df9e3
@@ -1,5 +1,6 @@
4df9e3
 /* Conversion module for ISO-2022-JP-3.
4df9e3
-   Copyright (C) 1998-2018 Free Software Foundation, Inc.
4df9e3
+   Copyright (C) 1998-2021 Free Software Foundation, Inc.
4df9e3
+   Copyright (C) The GNU Toolchain Authors.
4df9e3
    This file is part of the GNU C Library.
4df9e3
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
4df9e3
    and Bruno Haible <bruno@clisp.org>, 2002.
4df9e3
@@ -81,20 +82,31 @@ enum
4df9e3
    the output state to the initial state.  This has to be done during the
4df9e3
    flushing.  */
4df9e3
 #define EMIT_SHIFT_TO_INIT \
4df9e3
-  if (data->__statep->__count != ASCII_set)			      \
4df9e3
+  if ((data->__statep->__count & ~7) != ASCII_set)			      \
4df9e3
     {									      \
4df9e3
       if (FROM_DIRECTION)						      \
4df9e3
 	{								      \
4df9e3
-	  if (__glibc_likely (outbuf + 4 <= outend))			      \
4df9e3
+	  uint32_t ch = data->__statep->__count >> 6;			      \
4df9e3
+									      \
4df9e3
+	  if (__glibc_unlikely (ch != 0))				      \
4df9e3
 	    {								      \
4df9e3
-	      /* Write out the last character.  */			      \
4df9e3
-	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \
4df9e3
-	      outbuf += sizeof (uint32_t);				      \
4df9e3
-	      data->__statep->__count = ASCII_set;			\
4df9e3
+	      if (__glibc_likely (outbuf + 4 <= outend))		      \
4df9e3
+		{							      \
4df9e3
+		  /* Write out the last character.  */			      \
4df9e3
+		  put32u (outbuf, ch);					      \
4df9e3
+		  outbuf += 4;						      \
4df9e3
+		  data->__statep->__count &= 7;				      \
4df9e3
+		  data->__statep->__count |= ASCII_set;			      \
4df9e3
+		}							      \
4df9e3
+	      else							      \
4df9e3
+		/* We don't have enough room in the output buffer.  */	      \
4df9e3
+		status = __GCONV_FULL_OUTPUT;				      \
4df9e3
 	    }								      \
4df9e3
 	  else								      \
4df9e3
-	    /* We don't have enough room in the output buffer.  */	      \
4df9e3
-	    status = __GCONV_FULL_OUTPUT;				      \
4df9e3
+	    {								      \
4df9e3
+	      data->__statep->__count &= 7;				      \
4df9e3
+	      data->__statep->__count |= ASCII_set;			      \
4df9e3
+	    }								      \
4df9e3
 	}								      \
4df9e3
       else								      \
4df9e3
 	{								      \