Blame SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch

eceae4
From f0370bfa9c47575d4e47c94e443aa91983683a43 Mon Sep 17 00:00:00 2001
eceae4
From: Erik Larsson <mechie@users.sourceforge.net>
eceae4
Date: Tue, 12 Apr 2016 17:02:40 +0200
eceae4
Subject: [PATCH 2/2] unistr.c: Unify the two defines NOREVBOM and
eceae4
 ALLOW_BROKEN_SURROGATES.
eceae4
eceae4
In the mailing list discussion we came to the conclusion that there
eceae4
doesn't seem to be any reason to keep these declarations separate since
eceae4
they address the same issue, namely libntfs-3g's tolerance for bad
eceae4
Unicode data in filenames and other UTF-16 strings in the file system,
eceae4
so merge the two defines into the new define ALLOW_BROKEN_UNICODE.
eceae4
---
eceae4
 libntfs-3g/unistr.c | 58 +++++++++++++++++++++++------------------------------
eceae4
 1 file changed, 25 insertions(+), 33 deletions(-)
eceae4
eceae4
diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c
eceae4
index 71802aa..753acc0 100644
eceae4
--- a/libntfs-3g/unistr.c
eceae4
+++ b/libntfs-3g/unistr.c
eceae4
@@ -59,12 +59,11 @@
eceae4
 #include "logging.h"
eceae4
 #include "misc.h"
eceae4
 
eceae4
-#define NOREVBOM 0  /* JPA rejecting U+FFFE and U+FFFF, open to debate */
eceae4
-
eceae4
-#ifndef ALLOW_BROKEN_SURROGATES
eceae4
-/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */
eceae4
-#define ALLOW_BROKEN_SURROGATES 1
eceae4
-#endif /* !defined(ALLOW_BROKEN_SURROGATES) */
eceae4
+#ifndef ALLOW_BROKEN_UNICODE
eceae4
+/* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
eceae4
+ * open to debate. */
eceae4
+#define ALLOW_BROKEN_UNICODE 1
eceae4
+#endif /* !defined(ALLOW_BROKEN_UNICODE) */
eceae4
 
eceae4
 /*
eceae4
  * IMPORTANT
eceae4
@@ -468,7 +467,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
eceae4
 				surrog = FALSE;
eceae4
 				count += 4;
eceae4
 			} else {
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 				/* The first UTF-16 unit of a surrogate pair has
eceae4
 				 * a value between 0xd800 and 0xdc00. It can be
eceae4
 				 * encoded as an individual UTF-8 sequence if we
eceae4
@@ -481,7 +480,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
eceae4
 				continue;
eceae4
 #else
eceae4
 				goto fail;
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 			}
eceae4
 		} else
eceae4
 			if (c < 0x80)
eceae4
@@ -492,15 +491,13 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
eceae4
 				count += 3;
eceae4
 			else if (c < 0xdc00)
eceae4
 				surrog = TRUE;
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 			else if (c < 0xe000)
eceae4
 				count += 3;
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
-#if NOREVBOM
eceae4
-			else if ((c >= 0xe000) && (c < 0xfffe))
eceae4
-#else
eceae4
 			else if (c >= 0xe000)
eceae4
-#endif
eceae4
+#else
eceae4
+			else if ((c >= 0xe000) && (c < 0xfffe))
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 				count += 3;
eceae4
 			else 
eceae4
 				goto fail;
eceae4
@@ -510,11 +507,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
eceae4
 		}
eceae4
 	}
eceae4
 	if (surrog) 
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 		count += 3; /* ending with a single surrogate */
eceae4
 #else
eceae4
 		goto fail;
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 
eceae4
 	ret = count;
eceae4
 out:
eceae4
@@ -576,7 +573,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
eceae4
 				*t++ = 0x80 + (c & 63);
eceae4
 				halfpair = 0;
eceae4
 			} else {
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 				/* The first UTF-16 unit of a surrogate pair has
eceae4
 				 * a value between 0xd800 and 0xdc00. It can be
eceae4
 				 * encoded as an individual UTF-8 sequence if we
eceae4
@@ -591,7 +588,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
eceae4
 				continue;
eceae4
 #else
eceae4
 				goto fail;
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 			}
eceae4
 		} else if (c < 0x80) {
eceae4
 			*t++ = c;
eceae4
@@ -605,13 +602,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
eceae4
 		        	*t++ = 0x80 | (c & 0x3f);
eceae4
 			} else if (c < 0xdc00)
eceae4
 				halfpair = c;
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 			else if (c < 0xe000) {
eceae4
 				*t++ = 0xe0 | (c >> 12);
eceae4
 				*t++ = 0x80 | ((c >> 6) & 0x3f);
eceae4
 				*t++ = 0x80 | (c & 0x3f);
eceae4
 			}
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 			else if (c >= 0xe000) {
eceae4
 				*t++ = 0xe0 | (c >> 12);
eceae4
 				*t++ = 0x80 | ((c >> 6) & 0x3f);
eceae4
@@ -620,13 +617,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
eceae4
 				goto fail;
eceae4
 	        }
eceae4
 	}
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 	if (halfpair) { /* ending with a single surrogate */
eceae4
 		*t++ = 0xe0 | (halfpair >> 12);
eceae4
 		*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
eceae4
 		*t++ = 0x80 | (halfpair & 0x3f);
eceae4
 	}
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 	*t = '\0';
eceae4
 	
eceae4
 #if defined(__APPLE__) || defined(__DARWIN__)
eceae4
@@ -748,21 +745,16 @@ static int utf8_to_unicode(u32 *wc, const char *s)
eceae4
 			    | ((u32)(s[1] & 0x3F) << 6)
eceae4
 			    | ((u32)(s[2] & 0x3F));
eceae4
 			/* Check valid ranges */
eceae4
-#if NOREVBOM
eceae4
+#if ALLOW_BROKEN_UNICODE
eceae4
 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
 			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
-			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
eceae4
-				return 3;
eceae4
-#else
eceae4
-			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
eceae4
-#if ALLOW_BROKEN_SURROGATES
eceae4
-			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
eceae4
-#endif /* ALLOW_BROKEN_SURROGATES */
eceae4
 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
eceae4
 				return 3;
eceae4
-#endif
eceae4
+#else
eceae4
+			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
eceae4
+			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
eceae4
+				return 3;
eceae4
+#endif /* ALLOW_BROKEN_UNICODE */
eceae4
 		}
eceae4
 		goto fail;
eceae4
 					/* four-byte */
eceae4
-- 
eceae4
2.10.2
eceae4