6c52e2
From 7a4129ad1075b54e902af703d2582ffb79b99c49 Mon Sep 17 00:00:00 2001
6c52e2
From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
6c52e2
Date: Tue, 24 Nov 2015 13:47:16 +1300
6c52e2
Subject: [PATCH 5/9] CVE-2015-5330: Fix handling of unicode near string
6c52e2
 endings
6c52e2
6c52e2
Until now next_codepoint_ext() and next_codepoint_handle_ext() were
6c52e2
using strnlen(str, 5) to determine how much string they should try to
6c52e2
decode. This ended up looking past the end of the string when it was not
6c52e2
null terminated and the final character looked like a multi-byte encoding.
6c52e2
The fix is to let the caller say how long the string can be.
6c52e2
6c52e2
Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
6c52e2
6c52e2
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
6c52e2
Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
6c52e2
Reviewed-by: Ralph Boehme <slow@samba.org>
6c52e2
---
6c52e2
 lib/util/charset/charset.h     |  9 +++++----
6c52e2
 lib/util/charset/codepoints.c  | 24 ++++++++++++++++--------
6c52e2
 lib/util/charset/util_str.c    |  3 ++-
6c52e2
 lib/util/charset/util_unistr.c |  3 ++-
6c52e2
 4 files changed, 25 insertions(+), 14 deletions(-)
6c52e2
6c52e2
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
6c52e2
index e4297e4..060f1cf 100644
6c52e2
--- a/lib/util/charset/charset.h
6c52e2
+++ b/lib/util/charset/charset.h
6c52e2
@@ -171,15 +171,16 @@ smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic,
6c52e2
 			    charset_t from, charset_t to);
6c52e2
 const char *charset_name(struct smb_iconv_handle *ic, charset_t ch);
6c52e2
 
6c52e2
-codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
6c52e2
-			       size_t *size);
6c52e2
+codepoint_t next_codepoint_ext(const char *str, size_t len,
6c52e2
+			       charset_t src_charset, size_t *size);
6c52e2
 codepoint_t next_codepoint(const char *str, size_t *size);
6c52e2
 ssize_t push_codepoint(char *str, codepoint_t c);
6c52e2
 
6c52e2
 /* codepoints */
6c52e2
 codepoint_t next_codepoint_handle_ext(struct smb_iconv_handle *ic,
6c52e2
-			    const char *str, charset_t src_charset,
6c52e2
-			    size_t *size);
6c52e2
+				      const char *str, size_t len,
6c52e2
+				      charset_t src_charset,
6c52e2
+				      size_t *size);
6c52e2
 codepoint_t next_codepoint_handle(struct smb_iconv_handle *ic,
6c52e2
 			    const char *str, size_t *size);
6c52e2
 ssize_t push_codepoint_handle(struct smb_iconv_handle *ic,
6c52e2
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
6c52e2
index 0984164..542eeae 100644
6c52e2
--- a/lib/util/charset/codepoints.c
6c52e2
+++ b/lib/util/charset/codepoints.c
6c52e2
@@ -319,7 +319,8 @@ smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic,
6c52e2
  */
6c52e2
 _PUBLIC_ codepoint_t next_codepoint_handle_ext(
6c52e2
 			struct smb_iconv_handle *ic,
6c52e2
-			const char *str, charset_t src_charset,
6c52e2
+			const char *str, size_t len,
6c52e2
+			charset_t src_charset,
6c52e2
 			size_t *bytes_consumed)
6c52e2
 {
6c52e2
 	/* it cannot occupy more than 4 bytes in UTF16 format */
6c52e2
@@ -339,7 +340,7 @@ _PUBLIC_ codepoint_t next_codepoint_handle_ext(
6c52e2
 	 * we assume that no multi-byte character can take more than 5 bytes.
6c52e2
 	 * This is OK as we only support codepoints up to 1M (U+100000)
6c52e2
 	 */
6c52e2
-	ilen_orig = strnlen(str, 5);
6c52e2
+	ilen_orig = MIN(len, 5);
6c52e2
 	ilen = ilen_orig;
6c52e2
 
6c52e2
 	descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
6c52e2
@@ -395,9 +396,16 @@ _PUBLIC_ codepoint_t next_codepoint_handle_ext(
6c52e2
   return INVALID_CODEPOINT if the next character cannot be converted
6c52e2
 */
6c52e2
 _PUBLIC_ codepoint_t next_codepoint_handle(struct smb_iconv_handle *ic,
6c52e2
-				    const char *str, size_t *size)
6c52e2
+					   const char *str, size_t *size)
6c52e2
 {
6c52e2
-	return next_codepoint_handle_ext(ic, str, CH_UNIX, size);
6c52e2
+	/*
6c52e2
+	 * We assume that no multi-byte character can take more than 5 bytes
6c52e2
+	 * thus avoiding walking all the way down a long string. This is OK as
6c52e2
+	 * Unicode codepoints only go up to (U+10ffff), which can always be
6c52e2
+	 * encoded in 4 bytes or less.
6c52e2
+	 */
6c52e2
+	return next_codepoint_handle_ext(ic, str, strnlen(str, 5), CH_UNIX,
6c52e2
+					 size);
6c52e2
 }
6c52e2
 
6c52e2
 /*
6c52e2
@@ -459,11 +467,11 @@ _PUBLIC_ ssize_t push_codepoint_handle(struct smb_iconv_handle *ic,
6c52e2
 	return 5 - olen;
6c52e2
 }
6c52e2
 
6c52e2
-_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
6c52e2
-					size_t *size)
6c52e2
+_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, size_t len,
6c52e2
+					charset_t src_charset, size_t *size)
6c52e2
 {
6c52e2
-	return next_codepoint_handle_ext(get_iconv_handle(), str,
6c52e2
-					      src_charset, size);
6c52e2
+	return next_codepoint_handle_ext(get_iconv_handle(), str, len,
6c52e2
+					 src_charset, size);
6c52e2
 }
6c52e2
 
6c52e2
 _PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
6c52e2
diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
6c52e2
index d2e6cbb..2653bfc 100644
6c52e2
--- a/lib/util/charset/util_str.c
6c52e2
+++ b/lib/util/charset/util_str.c
6c52e2
@@ -210,7 +210,8 @@ _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
6c52e2
 
6c52e2
 	while (*s) {
6c52e2
 		size_t c_size;
6c52e2
-		codepoint_t c = next_codepoint_handle_ext(ic, s, src_charset, &c_size);
6c52e2
+		codepoint_t c = next_codepoint_handle_ext(ic, s, strnlen(s, 5),
6c52e2
+							  src_charset, &c_size);
6c52e2
 		s += c_size;
6c52e2
 
6c52e2
 		switch (dst_charset) {
6c52e2
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
6c52e2
index e4ae650..f299269 100644
6c52e2
--- a/lib/util/charset/util_unistr.c
6c52e2
+++ b/lib/util/charset/util_unistr.c
6c52e2
@@ -112,7 +112,8 @@ _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
6c52e2
 
6c52e2
 	while (n-- && *src) {
6c52e2
 		size_t c_size;
6c52e2
-		codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
6c52e2
+		codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
6c52e2
+							  CH_UNIX, &c_size);
6c52e2
 		src += c_size;
6c52e2
 
6c52e2
 		c = toupper_m(c);
6c52e2
-- 
6c52e2
2.5.0
6c52e2
6c52e2
6c52e2
From 382a9146a88b7aac7db4c64519b3da5611c817ef Mon Sep 17 00:00:00 2001
6c52e2
From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
6c52e2
Date: Tue, 24 Nov 2015 13:49:09 +1300
6c52e2
Subject: [PATCH 6/9] CVE-2015-5330: strupper_talloc_n_handle(): properly count
6c52e2
 characters
6c52e2
6c52e2
When a codepoint eats more than one byte we really want to know,
6c52e2
especially if the string is not NUL terminated.
6c52e2
6c52e2
Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
6c52e2
6c52e2
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
6c52e2
Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
6c52e2
Reviewed-by: Ralph Boehme <slow@samba.org>
6c52e2
---
6c52e2
 lib/util/charset/util_unistr.c | 3 ++-
6c52e2
 1 file changed, 2 insertions(+), 1 deletion(-)
6c52e2
6c52e2
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
6c52e2
index f299269..2cc8718 100644
6c52e2
--- a/lib/util/charset/util_unistr.c
6c52e2
+++ b/lib/util/charset/util_unistr.c
6c52e2
@@ -110,11 +110,12 @@ _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
6c52e2
 		return NULL;
6c52e2
 	}
6c52e2
 
6c52e2
-	while (n-- && *src) {
6c52e2
+	while (n && *src) {
6c52e2
 		size_t c_size;
6c52e2
 		codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
6c52e2
 							  CH_UNIX, &c_size);
6c52e2
 		src += c_size;
6c52e2
+		n -= c_size;
6c52e2
 
6c52e2
 		c = toupper_m(c);
6c52e2
 
6c52e2
-- 
6c52e2
2.5.0
6c52e2
6c52e2
6c52e2
From f317c31922a9ee8ae5ee9c0895a72ee6828d2c81 Mon Sep 17 00:00:00 2001
6c52e2
From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
6c52e2
Date: Tue, 24 Nov 2015 13:54:09 +1300
6c52e2
Subject: [PATCH 7/9] CVE-2015-5330: next_codepoint_handle_ext: don't
6c52e2
 short-circuit UTF16 low bytes
6c52e2
6c52e2
UTF16 contains zero bytes when it is encoding ASCII (for example), so we
6c52e2
can't assume the absense of the 0x80 bit means a one byte encoding. No
6c52e2
current callers use UTF16.
6c52e2
6c52e2
Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
6c52e2
6c52e2
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
6c52e2
Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
6c52e2
Reviewed-by: Ralph Boehme <slow@samba.org>
6c52e2
---
6c52e2
 lib/util/charset/codepoints.c | 5 ++++-
6c52e2
 1 file changed, 4 insertions(+), 1 deletion(-)
6c52e2
6c52e2
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
6c52e2
index 542eeae..19d084f 100644
6c52e2
--- a/lib/util/charset/codepoints.c
6c52e2
+++ b/lib/util/charset/codepoints.c
6c52e2
@@ -331,7 +331,10 @@ _PUBLIC_ codepoint_t next_codepoint_handle_ext(
6c52e2
 	size_t olen;
6c52e2
 	char *outbuf;
6c52e2
 
6c52e2
-	if ((str[0] & 0x80) == 0) {
6c52e2
+
6c52e2
+	if (((str[0] & 0x80) == 0) && (src_charset == CH_DOS ||
6c52e2
+				       src_charset == CH_UNIX ||
6c52e2
+				       src_charset == CH_UTF8)) {
6c52e2
 		*bytes_consumed = 1;
6c52e2
 		return (codepoint_t)str[0];
6c52e2
 	}
6c52e2
-- 
6c52e2
2.5.0
6c52e2
6c52e2
6c52e2