|
|
03181a |
From 72085b30bf30867360c4aa77bd43de5e1788d875 Mon Sep 17 00:00:00 2001
|
|
|
03181a |
From: Ondrej Dubaj <odubaj@redhat.com>
|
|
|
03181a |
Date: Tue, 24 Mar 2020 09:22:47 +0100
|
|
|
03181a |
Subject: [PATCH] Bugfix and optimize archive_wstring_append_from_mbs()
|
|
|
03181a |
|
|
|
03181a |
The cal to mbrtowc() or mbtowc() should read up to mbs_length
|
|
|
03181a |
bytes and not wcs_length. This avoids out-of-bounds reads.
|
|
|
03181a |
|
|
|
03181a |
mbrtowc() and mbtowc() return (size_t)-1 wit errno EILSEQ when
|
|
|
03181a |
they encounter an invalid multibyte character and (size_t)-2 when
|
|
|
03181a |
they they encounter an incomplete multibyte character. As we return
|
|
|
03181a |
failure and all our callers error out it makes no sense to continue
|
|
|
03181a |
parsing mbs.
|
|
|
03181a |
|
|
|
03181a |
As we allocate `len` wchars at the beginning and each wchar has
|
|
|
03181a |
at least one byte, there will never be need to grow the buffer,
|
|
|
03181a |
so the code can be left out. On the other hand, we are always
|
|
|
03181a |
allocatng more memory than we need.
|
|
|
03181a |
|
|
|
03181a |
As long as wcs_length == mbs_length == len we can omit wcs_length.
|
|
|
03181a |
We keep the old code commented if we decide to save memory and
|
|
|
03181a |
use autoexpanding wcs_length in the future.
|
|
|
03181a |
---
|
|
|
03181a |
libarchive/archive_string.c | 28 +++++++++++++++++-----------
|
|
|
03181a |
1 file changed, 17 insertions(+), 11 deletions(-)
|
|
|
03181a |
|
|
|
03181a |
diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c
|
|
|
03181a |
index 5ae09b6..d7541dc 100644
|
|
|
03181a |
--- a/libarchive/archive_string.c
|
|
|
03181a |
+++ b/libarchive/archive_string.c
|
|
|
03181a |
@@ -590,7 +590,7 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
|
|
03181a |
* No single byte will be more than one wide character,
|
|
|
03181a |
* so this length estimate will always be big enough.
|
|
|
03181a |
*/
|
|
|
03181a |
- size_t wcs_length = len;
|
|
|
03181a |
+ //size_t wcs_length = len;
|
|
|
03181a |
size_t mbs_length = len;
|
|
|
03181a |
const char *mbs = p;
|
|
|
03181a |
wchar_t *wcs;
|
|
|
03181a |
@@ -599,7 +599,11 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
|
|
03181a |
|
|
|
03181a |
memset(&shift_state, 0, sizeof(shift_state));
|
|
|
03181a |
#endif
|
|
|
03181a |
- if (NULL == archive_wstring_ensure(dest, dest->length + wcs_length + 1))
|
|
|
03181a |
+ /*
|
|
|
03181a |
+ * As we decided to have wcs_length == mbs_length == len
|
|
|
03181a |
+ * we can use len here instead of wcs_length
|
|
|
03181a |
+ */
|
|
|
03181a |
+ if (NULL == archive_wstring_ensure(dest, dest->length + len + 1))
|
|
|
03181a |
return (-1);
|
|
|
03181a |
wcs = dest->s + dest->length;
|
|
|
03181a |
/*
|
|
|
03181a |
@@ -608,6 +612,12 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
|
|
03181a |
* multi bytes.
|
|
|
03181a |
*/
|
|
|
03181a |
while (*mbs && mbs_length > 0) {
|
|
|
03181a |
+ /*
|
|
|
03181a |
+ * The buffer we allocated is always big enough.
|
|
|
03181a |
+ * Keep this code path in a comment if we decide to choose
|
|
|
03181a |
+ * smaller wcs_length in the future
|
|
|
03181a |
+ */
|
|
|
03181a |
+/*
|
|
|
03181a |
if (wcs_length == 0) {
|
|
|
03181a |
dest->length = wcs - dest->s;
|
|
|
03181a |
dest->s[dest->length] = L'\0';
|
|
|
03181a |
@@ -617,24 +627,20 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
|
|
03181a |
return (-1);
|
|
|
03181a |
wcs = dest->s + dest->length;
|
|
|
03181a |
}
|
|
|
03181a |
+*/
|
|
|
03181a |
#if HAVE_MBRTOWC
|
|
|
03181a |
- r = mbrtowc(wcs, mbs, wcs_length, &shift_state);
|
|
|
03181a |
+ r = mbrtowc(wcs, mbs, mbs_length, &shift_state);
|
|
|
03181a |
#else
|
|
|
03181a |
- r = mbtowc(wcs, mbs, wcs_length);
|
|
|
03181a |
+ r = mbtowc(wcs, mbs, mbs_length);
|
|
|
03181a |
#endif
|
|
|
03181a |
if (r == (size_t)-1 || r == (size_t)-2) {
|
|
|
03181a |
ret_val = -1;
|
|
|
03181a |
- if (errno == EILSEQ) {
|
|
|
03181a |
- ++mbs;
|
|
|
03181a |
- --mbs_length;
|
|
|
03181a |
- continue;
|
|
|
03181a |
- } else
|
|
|
03181a |
- break;
|
|
|
03181a |
+ break;
|
|
|
03181a |
}
|
|
|
03181a |
if (r == 0 || r > mbs_length)
|
|
|
03181a |
break;
|
|
|
03181a |
wcs++;
|
|
|
03181a |
- wcs_length--;
|
|
|
03181a |
+ //wcs_length--;
|
|
|
03181a |
mbs += r;
|
|
|
03181a |
mbs_length -= r;
|
|
|
03181a |
}
|
|
|
03181a |
--
|
|
|
03181a |
2.24.1
|
|
|
03181a |
|