Blame SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch

70298e
From f0370bfa9c47575d4e47c94e443aa91983683a43 Mon Sep 17 00:00:00 2001
70298e
From: Erik Larsson <mechie@users.sourceforge.net>
70298e
Date: Tue, 12 Apr 2016 17:02:40 +0200
70298e
Subject: [PATCH 2/2] unistr.c: Unify the two defines NOREVBOM and
70298e
 ALLOW_BROKEN_SURROGATES.
70298e
70298e
In the mailing list discussion we came to the conclusion that there
70298e
doesn't seem to be any reason to keep these declarations separate since
70298e
they address the same issue, namely libntfs-3g's tolerance for bad
70298e
Unicode data in filenames and other UTF-16 strings in the file system,
70298e
so merge the two defines into the new define ALLOW_BROKEN_UNICODE.
70298e
---
70298e
 libntfs-3g/unistr.c | 58 +++++++++++++++++++++++------------------------------
70298e
 1 file changed, 25 insertions(+), 33 deletions(-)
70298e
70298e
diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c
70298e
index 71802aa..753acc0 100644
70298e
--- a/libntfs-3g/unistr.c
70298e
+++ b/libntfs-3g/unistr.c
70298e
@@ -59,12 +59,11 @@
70298e
 #include "logging.h"
70298e
 #include "misc.h"
70298e
 
70298e
-#define NOREVBOM 0  /* JPA rejecting U+FFFE and U+FFFF, open to debate */
70298e
-
70298e
-#ifndef ALLOW_BROKEN_SURROGATES
70298e
-/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */
70298e
-#define ALLOW_BROKEN_SURROGATES 1
70298e
-#endif /* !defined(ALLOW_BROKEN_SURROGATES) */
70298e
+#ifndef ALLOW_BROKEN_UNICODE
70298e
+/* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
70298e
+ * open to debate. */
70298e
+#define ALLOW_BROKEN_UNICODE 1
70298e
+#endif /* !defined(ALLOW_BROKEN_UNICODE) */
70298e
 
70298e
 /*
70298e
  * IMPORTANT
70298e
@@ -468,7 +467,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
70298e
 				surrog = FALSE;
70298e
 				count += 4;
70298e
 			} else {
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 				/* The first UTF-16 unit of a surrogate pair has
70298e
 				 * a value between 0xd800 and 0xdc00. It can be
70298e
 				 * encoded as an individual UTF-8 sequence if we
70298e
@@ -481,7 +480,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
70298e
 				continue;
70298e
 #else
70298e
 				goto fail;
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 			}
70298e
 		} else
70298e
 			if (c < 0x80)
70298e
@@ -492,15 +491,13 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
70298e
 				count += 3;
70298e
 			else if (c < 0xdc00)
70298e
 				surrog = TRUE;
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 			else if (c < 0xe000)
70298e
 				count += 3;
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
-#if NOREVBOM
70298e
-			else if ((c >= 0xe000) && (c < 0xfffe))
70298e
-#else
70298e
 			else if (c >= 0xe000)
70298e
-#endif
70298e
+#else
70298e
+			else if ((c >= 0xe000) && (c < 0xfffe))
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 				count += 3;
70298e
 			else 
70298e
 				goto fail;
70298e
@@ -510,11 +507,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
70298e
 		}
70298e
 	}
70298e
 	if (surrog) 
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 		count += 3; /* ending with a single surrogate */
70298e
 #else
70298e
 		goto fail;
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 
70298e
 	ret = count;
70298e
 out:
70298e
@@ -576,7 +573,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
70298e
 				*t++ = 0x80 + (c & 63);
70298e
 				halfpair = 0;
70298e
 			} else {
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 				/* The first UTF-16 unit of a surrogate pair has
70298e
 				 * a value between 0xd800 and 0xdc00. It can be
70298e
 				 * encoded as an individual UTF-8 sequence if we
70298e
@@ -591,7 +588,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
70298e
 				continue;
70298e
 #else
70298e
 				goto fail;
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 			}
70298e
 		} else if (c < 0x80) {
70298e
 			*t++ = c;
70298e
@@ -605,13 +602,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
70298e
 		        	*t++ = 0x80 | (c & 0x3f);
70298e
 			} else if (c < 0xdc00)
70298e
 				halfpair = c;
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 			else if (c < 0xe000) {
70298e
 				*t++ = 0xe0 | (c >> 12);
70298e
 				*t++ = 0x80 | ((c >> 6) & 0x3f);
70298e
 				*t++ = 0x80 | (c & 0x3f);
70298e
 			}
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 			else if (c >= 0xe000) {
70298e
 				*t++ = 0xe0 | (c >> 12);
70298e
 				*t++ = 0x80 | ((c >> 6) & 0x3f);
70298e
@@ -620,13 +617,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
70298e
 				goto fail;
70298e
 	        }
70298e
 	}
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 	if (halfpair) { /* ending with a single surrogate */
70298e
 		*t++ = 0xe0 | (halfpair >> 12);
70298e
 		*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
70298e
 		*t++ = 0x80 | (halfpair & 0x3f);
70298e
 	}
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 	*t = '\0';
70298e
 	
70298e
 #if defined(__APPLE__) || defined(__DARWIN__)
70298e
@@ -748,21 +745,16 @@ static int utf8_to_unicode(u32 *wc, const char *s)
70298e
 			    | ((u32)(s[1] & 0x3F) << 6)
70298e
 			    | ((u32)(s[2] & 0x3F));
70298e
 			/* Check valid ranges */
70298e
-#if NOREVBOM
70298e
+#if ALLOW_BROKEN_UNICODE
70298e
 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
 			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
-			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
70298e
-				return 3;
70298e
-#else
70298e
-			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
70298e
-#if ALLOW_BROKEN_SURROGATES
70298e
-			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
70298e
-#endif /* ALLOW_BROKEN_SURROGATES */
70298e
 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
70298e
 				return 3;
70298e
-#endif
70298e
+#else
70298e
+			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
70298e
+			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
70298e
+				return 3;
70298e
+#endif /* ALLOW_BROKEN_UNICODE */
70298e
 		}
70298e
 		goto fail;
70298e
 					/* four-byte */
70298e
-- 
70298e
2.10.2
70298e