From 7a4129ad1075b54e902af703d2582ffb79b99c49 Mon Sep 17 00:00:00 2001
From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Date: Tue, 24 Nov 2015 13:47:16 +1300
Subject: [PATCH 5/9] CVE-2015-5330: Fix handling of unicode near string
endings
Until now next_codepoint_ext() and next_codepoint_handle_ext() were
using strnlen(str, 5) to determine how much string they should try to
decode. This ended up looking past the end of the string when it was not
null terminated and the final character looked like a multi-byte encoding.
The fix is to let the caller say how long the string can be.
Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
---
lib/util/charset/charset.h | 9 +++++----
lib/util/charset/codepoints.c | 24 ++++++++++++++++--------
lib/util/charset/util_str.c | 3 ++-
lib/util/charset/util_unistr.c | 3 ++-
4 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index e4297e4..060f1cf 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -171,15 +171,16 @@ smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic,
charset_t from, charset_t to);
const char *charset_name(struct smb_iconv_handle *ic, charset_t ch);
-codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
- size_t *size);
+codepoint_t next_codepoint_ext(const char *str, size_t len,
+ charset_t src_charset, size_t *size);
codepoint_t next_codepoint(const char *str, size_t *size);
ssize_t push_codepoint(char *str, codepoint_t c);
/* codepoints */
codepoint_t next_codepoint_handle_ext(struct smb_iconv_handle *ic,
- const char *str, charset_t src_charset,
- size_t *size);
+ const char *str, size_t len,
+ charset_t src_charset,
+ size_t *size);
codepoint_t next_codepoint_handle(struct smb_iconv_handle *ic,
const char *str, size_t *size);
ssize_t push_codepoint_handle(struct smb_iconv_handle *ic,
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
index 0984164..542eeae 100644
--- a/lib/util/charset/codepoints.c
+++ b/lib/util/charset/codepoints.c
@@ -319,7 +319,8 @@ smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic,
*/
_PUBLIC_ codepoint_t next_codepoint_handle_ext(
struct smb_iconv_handle *ic,
- const char *str, charset_t src_charset,
+ const char *str, size_t len,
+ charset_t src_charset,
size_t *bytes_consumed)
{
/* it cannot occupy more than 4 bytes in UTF16 format */
@@ -339,7 +340,7 @@ _PUBLIC_ codepoint_t next_codepoint_handle_ext(
* we assume that no multi-byte character can take more than 5 bytes.
* This is OK as we only support codepoints up to 1M (U+100000)
*/
- ilen_orig = strnlen(str, 5);
+ ilen_orig = MIN(len, 5);
ilen = ilen_orig;
descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
@@ -395,9 +396,16 @@ _PUBLIC_ codepoint_t next_codepoint_handle_ext(
return INVALID_CODEPOINT if the next character cannot be converted
*/
_PUBLIC_ codepoint_t next_codepoint_handle(struct smb_iconv_handle *ic,
- const char *str, size_t *size)
+ const char *str, size_t *size)
{
- return next_codepoint_handle_ext(ic, str, CH_UNIX, size);
+ /*
+ * We assume that no multi-byte character can take more than 5 bytes
+ * thus avoiding walking all the way down a long string. This is OK as
+ * Unicode codepoints only go up to (U+10ffff), which can always be
+ * encoded in 4 bytes or less.
+ */
+ return next_codepoint_handle_ext(ic, str, strnlen(str, 5), CH_UNIX,
+ size);
}
/*
@@ -459,11 +467,11 @@ _PUBLIC_ ssize_t push_codepoint_handle(struct smb_iconv_handle *ic,
return 5 - olen;
}
-_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
- size_t *size)
+_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, size_t len,
+ charset_t src_charset, size_t *size)
{
- return next_codepoint_handle_ext(get_iconv_handle(), str,
- src_charset, size);
+ return next_codepoint_handle_ext(get_iconv_handle(), str, len,
+ src_charset, size);
}
_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
index d2e6cbb..2653bfc 100644
--- a/lib/util/charset/util_str.c
+++ b/lib/util/charset/util_str.c
@@ -210,7 +210,8 @@ _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
while (*s) {
size_t c_size;
- codepoint_t c = next_codepoint_handle_ext(ic, s, src_charset, &c_size);
+ codepoint_t c = next_codepoint_handle_ext(ic, s, strnlen(s, 5),
+ src_charset, &c_size);
s += c_size;
switch (dst_charset) {
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index e4ae650..f299269 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -112,7 +112,8 @@ _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
while (n-- && *src) {
size_t c_size;
- codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
+ codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
+ CH_UNIX, &c_size);
src += c_size;
c = toupper_m(c);
--
2.5.0
From 382a9146a88b7aac7db4c64519b3da5611c817ef Mon Sep 17 00:00:00 2001
From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Date: Tue, 24 Nov 2015 13:49:09 +1300
Subject: [PATCH 6/9] CVE-2015-5330: strupper_talloc_n_handle(): properly count
characters
When a codepoint eats more than one byte we really want to know,
especially if the string is not NUL terminated.
Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
---
lib/util/charset/util_unistr.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index f299269..2cc8718 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -110,11 +110,12 @@ _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
return NULL;
}
- while (n-- && *src) {
+ while (n && *src) {
size_t c_size;
codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
CH_UNIX, &c_size);
src += c_size;
+ n -= c_size;
c = toupper_m(c);
--
2.5.0
From f317c31922a9ee8ae5ee9c0895a72ee6828d2c81 Mon Sep 17 00:00:00 2001
From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Date: Tue, 24 Nov 2015 13:54:09 +1300
Subject: [PATCH 7/9] CVE-2015-5330: next_codepoint_handle_ext: don't
short-circuit UTF16 low bytes
UTF16 contains zero bytes when it is encoding ASCII (for example), so we
can't assume the absense of the 0x80 bit means a one byte encoding. No
current callers use UTF16.
Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
---
lib/util/charset/codepoints.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
index 542eeae..19d084f 100644
--- a/lib/util/charset/codepoints.c
+++ b/lib/util/charset/codepoints.c
@@ -331,7 +331,10 @@ _PUBLIC_ codepoint_t next_codepoint_handle_ext(
size_t olen;
char *outbuf;
- if ((str[0] & 0x80) == 0) {
+
+ if (((str[0] & 0x80) == 0) && (src_charset == CH_DOS ||
+ src_charset == CH_UNIX ||
+ src_charset == CH_UTF8)) {
*bytes_consumed = 1;
return (codepoint_t)str[0];
}
--
2.5.0