| commit 7d88c6142c6efc160c0ee5e4f85cde382c072888 |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Wed Jan 27 13:36:12 2021 +0100 |
| |
| gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256) |
| |
| The conversion loop to the internal encoding does not follow |
| the interface contract that __GCONV_FULL_OUTPUT is only returned |
| after the internal wchar_t buffer has been filled completely. This |
| is enforced by the first of the two asserts in iconv/skeleton.c: |
| |
| /* We must run out of output buffer space in this |
| rerun. */ |
| assert (outbuf == outerr); |
| assert (nstatus == __GCONV_FULL_OUTPUT); |
| |
| This commit solves this issue by queuing a second wide character |
| which cannot be written immediately in the state variable, like |
| other converters already do (e.g., BIG5-HKSCS or TSCII). |
| |
| Reported-by: Tavis Ormandy <taviso@gmail.com> |
| |
| Conflicts: |
| iconvdata/Makefile |
| (Usual differences in backported tests.) |
| |
| diff --git a/iconvdata/Makefile b/iconvdata/Makefile |
| index a47a4c07cd2e3d1b..32656ad31d9b434b 100644 |
| |
| |
| @@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules)) |
| ifeq (yes,$(build-shared)) |
| tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ |
| tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ |
| - bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 |
| + bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 |
| ifeq ($(have-thread-library),yes) |
| tests += bug-iconv3 |
| endif |
| @@ -316,6 +316,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ |
| $(addprefix $(objpfx),$(modules.so)) |
| $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ |
| $(addprefix $(objpfx),$(modules.so)) |
| +$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \ |
| + $(addprefix $(objpfx),$(modules.so)) |
| |
| $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ |
| $(addprefix $(objpfx),$(modules.so)) \ |
| diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c |
| new file mode 100644 |
| index 0000000000000000..902f140fa949cbac |
| |
| |
| @@ -0,0 +1,127 @@ |
| +/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256). |
| + Copyright (C) 2021 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <https://www.gnu.org/licenses/>. */ |
| + |
| +#include <iconv.h> |
| +#include <string.h> |
| +#include <errno.h> |
| +#include <support/check.h> |
| + |
| +/* Use an escape sequence to return to the initial state. */ |
| +static void |
| +with_escape_sequence (void) |
| +{ |
| + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); |
| + TEST_VERIFY_EXIT (c != (iconv_t) -1); |
| + |
| + char in[] = "\e$(O+D\e(B"; |
| + char *inbuf = in; |
| + size_t inleft = strlen (in); |
| + char out[3]; /* Space for one output character. */ |
| + char *outbuf; |
| + size_t outleft; |
| + |
| + outbuf = out; |
| + outleft = sizeof (out); |
| + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); |
| + TEST_COMPARE (errno, E2BIG); |
| + TEST_COMPARE (inleft, 3); |
| + TEST_COMPARE (inbuf - in, strlen (in) - 3); |
| + TEST_COMPARE (outleft, sizeof (out) - 2); |
| + TEST_COMPARE (outbuf - out, 2); |
| + TEST_COMPARE (out[0] & 0xff, 0xc3); |
| + TEST_COMPARE (out[1] & 0xff, 0xa6); |
| + |
| + /* Return to the initial shift state, producing the pending |
| + character. */ |
| + outbuf = out; |
| + outleft = sizeof (out); |
| + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0); |
| + TEST_COMPARE (inleft, 0); |
| + TEST_COMPARE (inbuf - in, strlen (in)); |
| + TEST_COMPARE (outleft, sizeof (out) - 2); |
| + TEST_COMPARE (outbuf - out, 2); |
| + TEST_COMPARE (out[0] & 0xff, 0xcc); |
| + TEST_COMPARE (out[1] & 0xff, 0x80); |
| + |
| + /* Nothing should be flushed the second time. */ |
| + outbuf = out; |
| + outleft = sizeof (out); |
| + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); |
| + TEST_COMPARE (outleft, sizeof (out)); |
| + TEST_COMPARE (outbuf - out, 0); |
| + TEST_COMPARE (out[0] & 0xff, 0xcc); |
| + TEST_COMPARE (out[1] & 0xff, 0x80); |
| + |
| + TEST_COMPARE (iconv_close (c), 0); |
| +} |
| + |
| +/* Use an explicit flush to return to the initial state. */ |
| +static void |
| +with_flush (void) |
| +{ |
| + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); |
| + TEST_VERIFY_EXIT (c != (iconv_t) -1); |
| + |
| + char in[] = "\e$(O+D"; |
| + char *inbuf = in; |
| + size_t inleft = strlen (in); |
| + char out[3]; /* Space for one output character. */ |
| + char *outbuf; |
| + size_t outleft; |
| + |
| + outbuf = out; |
| + outleft = sizeof (out); |
| + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); |
| + TEST_COMPARE (errno, E2BIG); |
| + TEST_COMPARE (inleft, 0); |
| + TEST_COMPARE (inbuf - in, strlen (in)); |
| + TEST_COMPARE (outleft, sizeof (out) - 2); |
| + TEST_COMPARE (outbuf - out, 2); |
| + TEST_COMPARE (out[0] & 0xff, 0xc3); |
| + TEST_COMPARE (out[1] & 0xff, 0xa6); |
| + |
| + /* Flush the pending character. */ |
| + outbuf = out; |
| + outleft = sizeof (out); |
| + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); |
| + TEST_COMPARE (outleft, sizeof (out) - 2); |
| + TEST_COMPARE (outbuf - out, 2); |
| + TEST_COMPARE (out[0] & 0xff, 0xcc); |
| + TEST_COMPARE (out[1] & 0xff, 0x80); |
| + |
| + /* Nothing should be flushed the second time. */ |
| + outbuf = out; |
| + outleft = sizeof (out); |
| + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); |
| + TEST_COMPARE (outleft, sizeof (out)); |
| + TEST_COMPARE (outbuf - out, 0); |
| + TEST_COMPARE (out[0] & 0xff, 0xcc); |
| + TEST_COMPARE (out[1] & 0xff, 0x80); |
| + |
| + TEST_COMPARE (iconv_close (c), 0); |
| +} |
| + |
| +static int |
| +do_test (void) |
| +{ |
| + with_escape_sequence (); |
| + with_flush (); |
| + return 0; |
| +} |
| + |
| +#include <support/test-driver.c> |
| diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c |
| index de259580c3f378bb..047fab8e8dfbde7e 100644 |
| |
| |
| @@ -67,23 +67,34 @@ enum |
| CURRENT_SEL_MASK = 7 << 3 |
| }; |
| |
| -/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state |
| - also contains the last two bytes to be output, shifted by 6 bits, and a |
| - one-bit indicator whether they must be preceded by the shift sequence, |
| - in bit 22. */ |
| +/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the |
| + state also contains the last two bytes to be output, shifted by 6 |
| + bits, and a one-bit indicator whether they must be preceded by the |
| + shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4 |
| + conversion, COUNT may also contain a non-zero pending wide |
| + character, shifted by six bits. This happens for certain inputs in |
| + JISX0213_1_2004_set and JISX0213_2_set if the second wide character |
| + in a combining sequence cannot be written because the buffer is |
| + full. */ |
| |
| /* Since this is a stateful encoding we have to provide code which resets |
| the output state to the initial state. This has to be done during the |
| flushing. */ |
| #define EMIT_SHIFT_TO_INIT \ |
| - if ((data->__statep->__count & ~7) != ASCII_set) \ |
| + if (data->__statep->__count != ASCII_set) \ |
| { \ |
| if (FROM_DIRECTION) \ |
| { \ |
| - /* It's easy, we don't have to emit anything, we just reset the \ |
| - state for the input. */ \ |
| - data->__statep->__count &= 7; \ |
| - data->__statep->__count |= ASCII_set; \ |
| + if (__glibc_likely (outbuf + 4 <= outend)) \ |
| + { \ |
| + /* Write out the last character. */ \ |
| + *((uint32_t *) outbuf) = data->__statep->__count >> 6; \ |
| + outbuf += sizeof (uint32_t); \ |
| + data->__statep->__count = ASCII_set; \ |
| + } \ |
| + else \ |
| + /* We don't have enough room in the output buffer. */ \ |
| + status = __GCONV_FULL_OUTPUT; \ |
| } \ |
| else \ |
| { \ |
| @@ -151,7 +162,21 @@ enum |
| #define LOOPFCT FROM_LOOP |
| #define BODY \ |
| { \ |
| - uint32_t ch = *inptr; \ |
| + uint32_t ch; \ |
| + \ |
| + /* Output any pending character. */ \ |
| + ch = set >> 6; \ |
| + if (__glibc_unlikely (ch != 0)) \ |
| + { \ |
| + put32 (outptr, ch); \ |
| + outptr += 4; \ |
| + /* Remove the pending character, but preserve state bits. */ \ |
| + set &= (1 << 6) - 1; \ |
| + continue; \ |
| + } \ |
| + \ |
| + /* Otherwise read the next input byte. */ \ |
| + ch = *inptr; \ |
| \ |
| /* Recognize escape sequences. */ \ |
| if (__glibc_unlikely (ch == ESC)) \ |
| @@ -297,21 +322,25 @@ enum |
| uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ |
| uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ |
| \ |
| + inptr += 2; \ |
| + \ |
| + put32 (outptr, u1); \ |
| + outptr += 4; \ |
| + \ |
| /* See whether we have room for two characters. */ \ |
| - if (outptr + 8 <= outend) \ |
| + if (outptr + 4 <= outend) \ |
| { \ |
| - inptr += 2; \ |
| - put32 (outptr, u1); \ |
| - outptr += 4; \ |
| put32 (outptr, u2); \ |
| outptr += 4; \ |
| continue; \ |
| } \ |
| - else \ |
| - { \ |
| - result = __GCONV_FULL_OUTPUT; \ |
| - break; \ |
| - } \ |
| + \ |
| + /* Otherwise store only the first character now, and \ |
| + put the second one into the queue. */ \ |
| + set |= u2 << 6; \ |
| + /* Tell the caller why we terminate the loop. */ \ |
| + result = __GCONV_FULL_OUTPUT; \ |
| + break; \ |
| } \ |
| \ |
| inptr += 2; \ |