Blob Blame History Raw
From 72085b30bf30867360c4aa77bd43de5e1788d875 Mon Sep 17 00:00:00 2001
From: Ondrej Dubaj <odubaj@redhat.com>
Date: Tue, 24 Mar 2020 09:22:47 +0100
Subject: [PATCH] Bugfix and optimize archive_wstring_append_from_mbs()

The cal to mbrtowc() or mbtowc() should read up to mbs_length
bytes and not wcs_length. This avoids out-of-bounds reads.

mbrtowc() and mbtowc() return (size_t)-1 wit errno EILSEQ when
they encounter an invalid multibyte character and (size_t)-2 when
they they encounter an incomplete multibyte character. As we return
failure and all our callers error out it makes no sense to continue
parsing mbs.

As we allocate `len` wchars at the beginning and each wchar has
at least one byte, there will never be need to grow the buffer,
so the code can be left out. On the other hand, we are always
allocatng more memory than we need.

As long as wcs_length == mbs_length == len we can omit wcs_length.
We keep the old code commented if we decide to save memory and
use autoexpanding wcs_length in the future.
---
 libarchive/archive_string.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c
index 5ae09b6..d7541dc 100644
--- a/libarchive/archive_string.c
+++ b/libarchive/archive_string.c
@@ -590,7 +590,7 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
 	 * No single byte will be more than one wide character,
 	 * so this length estimate will always be big enough.
 	 */
-	size_t wcs_length = len;
+	//size_t wcs_length = len;
 	size_t mbs_length = len;
 	const char *mbs = p;
 	wchar_t *wcs;
@@ -599,7 +599,11 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
 
 	memset(&shift_state, 0, sizeof(shift_state));
 #endif
-	if (NULL == archive_wstring_ensure(dest, dest->length + wcs_length + 1))
+	/*
+	 * As we decided to have wcs_length == mbs_length == len
+	 * we can use len here instead of wcs_length
+	 */
+	if (NULL == archive_wstring_ensure(dest, dest->length + len + 1))
 		return (-1);
 	wcs = dest->s + dest->length;
 	/*
@@ -608,6 +612,12 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
 	 * multi bytes.
 	 */
 	while (*mbs && mbs_length > 0) {
+		/*
+		 * The buffer we allocated is always big enough.
+		 * Keep this code path in a comment if we decide to choose
+		 * smaller wcs_length in the future
+		 */
+/*
 		if (wcs_length == 0) {
 			dest->length = wcs - dest->s;
 			dest->s[dest->length] = L'\0';
@@ -617,24 +627,20 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
 				return (-1);
 			wcs = dest->s + dest->length;
 		}
+*/
 #if HAVE_MBRTOWC
-		r = mbrtowc(wcs, mbs, wcs_length, &shift_state);
+		r = mbrtowc(wcs, mbs, mbs_length, &shift_state);
 #else
-		r = mbtowc(wcs, mbs, wcs_length);
+		r = mbtowc(wcs, mbs, mbs_length);
 #endif
 		if (r == (size_t)-1 || r == (size_t)-2) {
 			ret_val = -1;
-			if (errno == EILSEQ) {
-				++mbs;
-				--mbs_length;
-				continue;
-			} else
-				break;
+			break;
 		}
 		if (r == 0 || r > mbs_length)
 			break;
 		wcs++;
-		wcs_length--;
+		//wcs_length--;
 		mbs += r;
 		mbs_length -= r;
 	}
-- 
2.24.1