From 72085b30bf30867360c4aa77bd43de5e1788d875 Mon Sep 17 00:00:00 2001 From: Ondrej Dubaj Date: Tue, 24 Mar 2020 09:22:47 +0100 Subject: [PATCH] Bugfix and optimize archive_wstring_append_from_mbs() The cal to mbrtowc() or mbtowc() should read up to mbs_length bytes and not wcs_length. This avoids out-of-bounds reads. mbrtowc() and mbtowc() return (size_t)-1 wit errno EILSEQ when they encounter an invalid multibyte character and (size_t)-2 when they they encounter an incomplete multibyte character. As we return failure and all our callers error out it makes no sense to continue parsing mbs. As we allocate `len` wchars at the beginning and each wchar has at least one byte, there will never be need to grow the buffer, so the code can be left out. On the other hand, we are always allocatng more memory than we need. As long as wcs_length == mbs_length == len we can omit wcs_length. We keep the old code commented if we decide to save memory and use autoexpanding wcs_length in the future. --- libarchive/archive_string.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index 5ae09b6..d7541dc 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -590,7 +590,7 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest, * No single byte will be more than one wide character, * so this length estimate will always be big enough. */ - size_t wcs_length = len; + //size_t wcs_length = len; size_t mbs_length = len; const char *mbs = p; wchar_t *wcs; @@ -599,7 +599,11 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest, memset(&shift_state, 0, sizeof(shift_state)); #endif - if (NULL == archive_wstring_ensure(dest, dest->length + wcs_length + 1)) + /* + * As we decided to have wcs_length == mbs_length == len + * we can use len here instead of wcs_length + */ + if (NULL == archive_wstring_ensure(dest, dest->length + len + 1)) return (-1); wcs = dest->s + dest->length; /* @@ -608,6 +612,12 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest, * multi bytes. */ while (*mbs && mbs_length > 0) { + /* + * The buffer we allocated is always big enough. + * Keep this code path in a comment if we decide to choose + * smaller wcs_length in the future + */ +/* if (wcs_length == 0) { dest->length = wcs - dest->s; dest->s[dest->length] = L'\0'; @@ -617,24 +627,20 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest, return (-1); wcs = dest->s + dest->length; } +*/ #if HAVE_MBRTOWC - r = mbrtowc(wcs, mbs, wcs_length, &shift_state); + r = mbrtowc(wcs, mbs, mbs_length, &shift_state); #else - r = mbtowc(wcs, mbs, wcs_length); + r = mbtowc(wcs, mbs, mbs_length); #endif if (r == (size_t)-1 || r == (size_t)-2) { ret_val = -1; - if (errno == EILSEQ) { - ++mbs; - --mbs_length; - continue; - } else - break; + break; } if (r == 0 || r > mbs_length) break; wcs++; - wcs_length--; + //wcs_length--; mbs += r; mbs_length -= r; } -- 2.24.1