|
|
db1ad8 |
commit 7d88c6142c6efc160c0ee5e4f85cde382c072888
|
|
|
db1ad8 |
Author: Florian Weimer <fweimer@redhat.com>
|
|
|
db1ad8 |
Date: Wed Jan 27 13:36:12 2021 +0100
|
|
|
db1ad8 |
|
|
|
db1ad8 |
gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256)
|
|
|
db1ad8 |
|
|
|
db1ad8 |
The conversion loop to the internal encoding does not follow
|
|
|
db1ad8 |
the interface contract that __GCONV_FULL_OUTPUT is only returned
|
|
|
db1ad8 |
after the internal wchar_t buffer has been filled completely. This
|
|
|
db1ad8 |
is enforced by the first of the two asserts in iconv/skeleton.c:
|
|
|
db1ad8 |
|
|
|
db1ad8 |
/* We must run out of output buffer space in this
|
|
|
db1ad8 |
rerun. */
|
|
|
db1ad8 |
assert (outbuf == outerr);
|
|
|
db1ad8 |
assert (nstatus == __GCONV_FULL_OUTPUT);
|
|
|
db1ad8 |
|
|
|
db1ad8 |
This commit solves this issue by queuing a second wide character
|
|
|
db1ad8 |
which cannot be written immediately in the state variable, like
|
|
|
db1ad8 |
other converters already do (e.g., BIG5-HKSCS or TSCII).
|
|
|
db1ad8 |
|
|
|
db1ad8 |
Reported-by: Tavis Ormandy <taviso@gmail.com>
|
|
|
db1ad8 |
|
|
|
db1ad8 |
Conflicts:
|
|
|
db1ad8 |
iconvdata/Makefile
|
|
|
db1ad8 |
(Usual differences in backported tests.)
|
|
|
db1ad8 |
|
|
|
db1ad8 |
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
|
|
|
db1ad8 |
index a47a4c07cd2e3d1b..32656ad31d9b434b 100644
|
|
|
db1ad8 |
--- a/iconvdata/Makefile
|
|
|
db1ad8 |
+++ b/iconvdata/Makefile
|
|
|
db1ad8 |
@@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules))
|
|
|
db1ad8 |
ifeq (yes,$(build-shared))
|
|
|
db1ad8 |
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
|
|
|
db1ad8 |
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
|
|
|
db1ad8 |
- bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13
|
|
|
db1ad8 |
+ bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14
|
|
|
db1ad8 |
ifeq ($(have-thread-library),yes)
|
|
|
db1ad8 |
tests += bug-iconv3
|
|
|
db1ad8 |
endif
|
|
|
db1ad8 |
@@ -316,6 +316,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
|
|
|
db1ad8 |
$(addprefix $(objpfx),$(modules.so))
|
|
|
db1ad8 |
$(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
|
|
|
db1ad8 |
$(addprefix $(objpfx),$(modules.so))
|
|
|
db1ad8 |
+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
|
|
|
db1ad8 |
+ $(addprefix $(objpfx),$(modules.so))
|
|
|
db1ad8 |
|
|
|
db1ad8 |
$(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
|
|
|
db1ad8 |
$(addprefix $(objpfx),$(modules.so)) \
|
|
|
db1ad8 |
diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
|
|
|
db1ad8 |
new file mode 100644
|
|
|
db1ad8 |
index 0000000000000000..902f140fa949cbac
|
|
|
db1ad8 |
--- /dev/null
|
|
|
db1ad8 |
+++ b/iconvdata/bug-iconv14.c
|
|
|
db1ad8 |
@@ -0,0 +1,127 @@
|
|
|
db1ad8 |
+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
|
|
|
db1ad8 |
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
|
|
db1ad8 |
+ This file is part of the GNU C Library.
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ The GNU C Library is free software; you can redistribute it and/or
|
|
|
db1ad8 |
+ modify it under the terms of the GNU Lesser General Public
|
|
|
db1ad8 |
+ License as published by the Free Software Foundation; either
|
|
|
db1ad8 |
+ version 2.1 of the License, or (at your option) any later version.
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
|
db1ad8 |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
db1ad8 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
db1ad8 |
+ Lesser General Public License for more details.
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ You should have received a copy of the GNU Lesser General Public
|
|
|
db1ad8 |
+ License along with the GNU C Library; if not, see
|
|
|
db1ad8 |
+ <https://www.gnu.org/licenses/>. */
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+#include <iconv.h>
|
|
|
db1ad8 |
+#include <string.h>
|
|
|
db1ad8 |
+#include <errno.h>
|
|
|
db1ad8 |
+#include <support/check.h>
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+/* Use an escape sequence to return to the initial state. */
|
|
|
db1ad8 |
+static void
|
|
|
db1ad8 |
+with_escape_sequence (void)
|
|
|
db1ad8 |
+{
|
|
|
db1ad8 |
+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
|
|
|
db1ad8 |
+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ char in[] = "\e$(O+D\e(B";
|
|
|
db1ad8 |
+ char *inbuf = in;
|
|
|
db1ad8 |
+ size_t inleft = strlen (in);
|
|
|
db1ad8 |
+ char out[3]; /* Space for one output character. */
|
|
|
db1ad8 |
+ char *outbuf;
|
|
|
db1ad8 |
+ size_t outleft;
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ outbuf = out;
|
|
|
db1ad8 |
+ outleft = sizeof (out);
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
|
|
|
db1ad8 |
+ TEST_COMPARE (errno, E2BIG);
|
|
|
db1ad8 |
+ TEST_COMPARE (inleft, 3);
|
|
|
db1ad8 |
+ TEST_COMPARE (inbuf - in, strlen (in) - 3);
|
|
|
db1ad8 |
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (outbuf - out, 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[0] & 0xff, 0xc3);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[1] & 0xff, 0xa6);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ /* Return to the initial shift state, producing the pending
|
|
|
db1ad8 |
+ character. */
|
|
|
db1ad8 |
+ outbuf = out;
|
|
|
db1ad8 |
+ outleft = sizeof (out);
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (inleft, 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (inbuf - in, strlen (in));
|
|
|
db1ad8 |
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (outbuf - out, 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ /* Nothing should be flushed the second time. */
|
|
|
db1ad8 |
+ outbuf = out;
|
|
|
db1ad8 |
+ outleft = sizeof (out);
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (outleft, sizeof (out));
|
|
|
db1ad8 |
+ TEST_COMPARE (outbuf - out, 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv_close (c), 0);
|
|
|
db1ad8 |
+}
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+/* Use an explicit flush to return to the initial state. */
|
|
|
db1ad8 |
+static void
|
|
|
db1ad8 |
+with_flush (void)
|
|
|
db1ad8 |
+{
|
|
|
db1ad8 |
+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
|
|
|
db1ad8 |
+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ char in[] = "\e$(O+D";
|
|
|
db1ad8 |
+ char *inbuf = in;
|
|
|
db1ad8 |
+ size_t inleft = strlen (in);
|
|
|
db1ad8 |
+ char out[3]; /* Space for one output character. */
|
|
|
db1ad8 |
+ char *outbuf;
|
|
|
db1ad8 |
+ size_t outleft;
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ outbuf = out;
|
|
|
db1ad8 |
+ outleft = sizeof (out);
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
|
|
|
db1ad8 |
+ TEST_COMPARE (errno, E2BIG);
|
|
|
db1ad8 |
+ TEST_COMPARE (inleft, 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (inbuf - in, strlen (in));
|
|
|
db1ad8 |
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (outbuf - out, 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[0] & 0xff, 0xc3);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[1] & 0xff, 0xa6);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ /* Flush the pending character. */
|
|
|
db1ad8 |
+ outbuf = out;
|
|
|
db1ad8 |
+ outleft = sizeof (out);
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (outbuf - out, 2);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ /* Nothing should be flushed the second time. */
|
|
|
db1ad8 |
+ outbuf = out;
|
|
|
db1ad8 |
+ outleft = sizeof (out);
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (outleft, sizeof (out));
|
|
|
db1ad8 |
+ TEST_COMPARE (outbuf - out, 0);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
|
|
db1ad8 |
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+ TEST_COMPARE (iconv_close (c), 0);
|
|
|
db1ad8 |
+}
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+static int
|
|
|
db1ad8 |
+do_test (void)
|
|
|
db1ad8 |
+{
|
|
|
db1ad8 |
+ with_escape_sequence ();
|
|
|
db1ad8 |
+ with_flush ();
|
|
|
db1ad8 |
+ return 0;
|
|
|
db1ad8 |
+}
|
|
|
db1ad8 |
+
|
|
|
db1ad8 |
+#include <support/test-driver.c>
|
|
|
db1ad8 |
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
|
|
|
db1ad8 |
index de259580c3f378bb..047fab8e8dfbde7e 100644
|
|
|
db1ad8 |
--- a/iconvdata/iso-2022-jp-3.c
|
|
|
db1ad8 |
+++ b/iconvdata/iso-2022-jp-3.c
|
|
|
db1ad8 |
@@ -67,23 +67,34 @@ enum
|
|
|
db1ad8 |
CURRENT_SEL_MASK = 7 << 3
|
|
|
db1ad8 |
};
|
|
|
db1ad8 |
|
|
|
db1ad8 |
-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
|
|
|
db1ad8 |
- also contains the last two bytes to be output, shifted by 6 bits, and a
|
|
|
db1ad8 |
- one-bit indicator whether they must be preceded by the shift sequence,
|
|
|
db1ad8 |
- in bit 22. */
|
|
|
db1ad8 |
+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
|
|
|
db1ad8 |
+ state also contains the last two bytes to be output, shifted by 6
|
|
|
db1ad8 |
+ bits, and a one-bit indicator whether they must be preceded by the
|
|
|
db1ad8 |
+ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
|
|
|
db1ad8 |
+ conversion, COUNT may also contain a non-zero pending wide
|
|
|
db1ad8 |
+ character, shifted by six bits. This happens for certain inputs in
|
|
|
db1ad8 |
+ JISX0213_1_2004_set and JISX0213_2_set if the second wide character
|
|
|
db1ad8 |
+ in a combining sequence cannot be written because the buffer is
|
|
|
db1ad8 |
+ full. */
|
|
|
db1ad8 |
|
|
|
db1ad8 |
/* Since this is a stateful encoding we have to provide code which resets
|
|
|
db1ad8 |
the output state to the initial state. This has to be done during the
|
|
|
db1ad8 |
flushing. */
|
|
|
db1ad8 |
#define EMIT_SHIFT_TO_INIT \
|
|
|
db1ad8 |
- if ((data->__statep->__count & ~7) != ASCII_set) \
|
|
|
db1ad8 |
+ if (data->__statep->__count != ASCII_set) \
|
|
|
db1ad8 |
{ \
|
|
|
db1ad8 |
if (FROM_DIRECTION) \
|
|
|
db1ad8 |
{ \
|
|
|
db1ad8 |
- /* It's easy, we don't have to emit anything, we just reset the \
|
|
|
db1ad8 |
- state for the input. */ \
|
|
|
db1ad8 |
- data->__statep->__count &= 7; \
|
|
|
db1ad8 |
- data->__statep->__count |= ASCII_set; \
|
|
|
db1ad8 |
+ if (__glibc_likely (outbuf + 4 <= outend)) \
|
|
|
db1ad8 |
+ { \
|
|
|
db1ad8 |
+ /* Write out the last character. */ \
|
|
|
db1ad8 |
+ *((uint32_t *) outbuf) = data->__statep->__count >> 6; \
|
|
|
db1ad8 |
+ outbuf += sizeof (uint32_t); \
|
|
|
db1ad8 |
+ data->__statep->__count = ASCII_set; \
|
|
|
db1ad8 |
+ } \
|
|
|
db1ad8 |
+ else \
|
|
|
db1ad8 |
+ /* We don't have enough room in the output buffer. */ \
|
|
|
db1ad8 |
+ status = __GCONV_FULL_OUTPUT; \
|
|
|
db1ad8 |
} \
|
|
|
db1ad8 |
else \
|
|
|
db1ad8 |
{ \
|
|
|
db1ad8 |
@@ -151,7 +162,21 @@ enum
|
|
|
db1ad8 |
#define LOOPFCT FROM_LOOP
|
|
|
db1ad8 |
#define BODY \
|
|
|
db1ad8 |
{ \
|
|
|
db1ad8 |
- uint32_t ch = *inptr; \
|
|
|
db1ad8 |
+ uint32_t ch; \
|
|
|
db1ad8 |
+ \
|
|
|
db1ad8 |
+ /* Output any pending character. */ \
|
|
|
db1ad8 |
+ ch = set >> 6; \
|
|
|
db1ad8 |
+ if (__glibc_unlikely (ch != 0)) \
|
|
|
db1ad8 |
+ { \
|
|
|
db1ad8 |
+ put32 (outptr, ch); \
|
|
|
db1ad8 |
+ outptr += 4; \
|
|
|
db1ad8 |
+ /* Remove the pending character, but preserve state bits. */ \
|
|
|
db1ad8 |
+ set &= (1 << 6) - 1; \
|
|
|
db1ad8 |
+ continue; \
|
|
|
db1ad8 |
+ } \
|
|
|
db1ad8 |
+ \
|
|
|
db1ad8 |
+ /* Otherwise read the next input byte. */ \
|
|
|
db1ad8 |
+ ch = *inptr; \
|
|
|
db1ad8 |
\
|
|
|
db1ad8 |
/* Recognize escape sequences. */ \
|
|
|
db1ad8 |
if (__glibc_unlikely (ch == ESC)) \
|
|
|
db1ad8 |
@@ -297,21 +322,25 @@ enum
|
|
|
db1ad8 |
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
|
|
|
db1ad8 |
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
|
|
|
db1ad8 |
\
|
|
|
db1ad8 |
+ inptr += 2; \
|
|
|
db1ad8 |
+ \
|
|
|
db1ad8 |
+ put32 (outptr, u1); \
|
|
|
db1ad8 |
+ outptr += 4; \
|
|
|
db1ad8 |
+ \
|
|
|
db1ad8 |
/* See whether we have room for two characters. */ \
|
|
|
db1ad8 |
- if (outptr + 8 <= outend) \
|
|
|
db1ad8 |
+ if (outptr + 4 <= outend) \
|
|
|
db1ad8 |
{ \
|
|
|
db1ad8 |
- inptr += 2; \
|
|
|
db1ad8 |
- put32 (outptr, u1); \
|
|
|
db1ad8 |
- outptr += 4; \
|
|
|
db1ad8 |
put32 (outptr, u2); \
|
|
|
db1ad8 |
outptr += 4; \
|
|
|
db1ad8 |
continue; \
|
|
|
db1ad8 |
} \
|
|
|
db1ad8 |
- else \
|
|
|
db1ad8 |
- { \
|
|
|
db1ad8 |
- result = __GCONV_FULL_OUTPUT; \
|
|
|
db1ad8 |
- break; \
|
|
|
db1ad8 |
- } \
|
|
|
db1ad8 |
+ \
|
|
|
db1ad8 |
+ /* Otherwise store only the first character now, and \
|
|
|
db1ad8 |
+ put the second one into the queue. */ \
|
|
|
db1ad8 |
+ set |= u2 << 6; \
|
|
|
db1ad8 |
+ /* Tell the caller why we terminate the loop. */ \
|
|
|
db1ad8 |
+ result = __GCONV_FULL_OUTPUT; \
|
|
|
db1ad8 |
+ break; \
|
|
|
db1ad8 |
} \
|
|
|
db1ad8 |
\
|
|
|
db1ad8 |
inptr += 2; \
|