From 70298e38197823fbaccf2d45d3be2ada147d9815 Mon Sep 17 00:00:00 2001
From: CentOS Sources <bugs@centos.org>
Date: Aug 01 2017 03:32:46 +0000
Subject: import libguestfs-winsupport-7.2-2.el7


---

diff --git a/SOURCES/0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch b/SOURCES/0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch
new file mode 100644
index 0000000..7412dc4
--- /dev/null
+++ b/SOURCES/0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch
@@ -0,0 +1,172 @@
+From d9c61dd60ec484909f70b7a916ada3a93af94b60 Mon Sep 17 00:00:00 2001
+From: Erik Larsson <mechie@users.sourceforge.net>
+Date: Fri, 8 Apr 2016 05:39:48 +0200
+Subject: [PATCH 1/2] unistr.c: Enable encoding broken UTF-16 into broken
+ UTF-8, A.K.A. WTF-8.
+
+Windows filenames may contain invalid UTF-16 sequences (specifically
+broken surrogate pairs), which cannot be converted to UTF-8 if we do
+strict conversion.
+
+This patch enables encoding broken UTF-16 into similarly broken UTF-8 by
+encoding any surrogate character that don't have a match into a separate
+3-byte UTF-8 sequence.
+
+This is "sort of" valid UTF-8, but not valid Unicode since the code
+points used for surrogate pair encoding are not supposed to occur in a
+valid Unicode string... but on the other hand the source UTF-16 data is
+also broken, so we aren't really making things any worse.
+
+This format is sometimes referred to as WTF-8 (Wobbly Translation
+Format, 8-bit encoding) and is a common solution to represent broken
+UTF-16 as UTF-8.
+
+It is a lossless round-trip conversion, i.e converting from broken
+UTF-16 to "WTF-8" and back to UTF-16 yields the same broken UTF-16
+sequence. Because of this property it enables accessing these files
+by filename through ntfs-3g and the ntfsprogs (e.g. ls -la works as
+expected).
+
+To disable this behaviour you can pass the preprocessor/compiler flag
+'-DALLOW_BROKEN_SURROGATES=0' when building ntfs-3g.
+---
+ libntfs-3g/unistr.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 65 insertions(+), 2 deletions(-)
+
+diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c
+index 7f278cd..71802aa 100644
+--- a/libntfs-3g/unistr.c
++++ b/libntfs-3g/unistr.c
+@@ -61,6 +61,11 @@
+ 
+ #define NOREVBOM 0  /* JPA rejecting U+FFFE and U+FFFF, open to debate */
+ 
++#ifndef ALLOW_BROKEN_SURROGATES
++/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */
++#define ALLOW_BROKEN_SURROGATES 1
++#endif /* !defined(ALLOW_BROKEN_SURROGATES) */
++
+ /*
+  * IMPORTANT
+  * =========
+@@ -462,8 +467,22 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 			if ((c >= 0xdc00) && (c < 0xe000)) {
+ 				surrog = FALSE;
+ 				count += 4;
+-			} else 
++			} else {
++#if ALLOW_BROKEN_SURROGATES
++				/* The first UTF-16 unit of a surrogate pair has
++				 * a value between 0xd800 and 0xdc00. It can be
++				 * encoded as an individual UTF-8 sequence if we
++				 * cannot combine it with the next UTF-16 unit
++				 * unit as a surrogate pair. */
++				surrog = FALSE;
++				count += 3;
++
++				--i;
++				continue;
++#else
+ 				goto fail;
++#endif /* ALLOW_BROKEN_SURROGATES */
++			}
+ 		} else
+ 			if (c < 0x80)
+ 				count++;
+@@ -473,6 +492,10 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 				count += 3;
+ 			else if (c < 0xdc00)
+ 				surrog = TRUE;
++#if ALLOW_BROKEN_SURROGATES
++			else if (c < 0xe000)
++				count += 3;
++#endif /* ALLOW_BROKEN_SURROGATES */
+ #if NOREVBOM
+ 			else if ((c >= 0xe000) && (c < 0xfffe))
+ #else
+@@ -487,7 +510,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 		}
+ 	}
+ 	if (surrog) 
++#if ALLOW_BROKEN_SURROGATES
++		count += 3; /* ending with a single surrogate */
++#else
+ 		goto fail;
++#endif /* ALLOW_BROKEN_SURROGATES */
+ 
+ 	ret = count;
+ out:
+@@ -548,8 +575,24 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 				*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
+ 				*t++ = 0x80 + (c & 63);
+ 				halfpair = 0;
+-			} else 
++			} else {
++#if ALLOW_BROKEN_SURROGATES
++				/* The first UTF-16 unit of a surrogate pair has
++				 * a value between 0xd800 and 0xdc00. It can be
++				 * encoded as an individual UTF-8 sequence if we
++				 * cannot combine it with the next UTF-16 unit
++				 * unit as a surrogate pair. */
++				*t++ = 0xe0 | (halfpair >> 12);
++				*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
++				*t++ = 0x80 | (halfpair & 0x3f);
++				halfpair = 0;
++
++				--i;
++				continue;
++#else
+ 				goto fail;
++#endif /* ALLOW_BROKEN_SURROGATES */
++			}
+ 		} else if (c < 0x80) {
+ 			*t++ = c;
+ 	    	} else {
+@@ -562,6 +605,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 		        	*t++ = 0x80 | (c & 0x3f);
+ 			} else if (c < 0xdc00)
+ 				halfpair = c;
++#if ALLOW_BROKEN_SURROGATES
++			else if (c < 0xe000) {
++				*t++ = 0xe0 | (c >> 12);
++				*t++ = 0x80 | ((c >> 6) & 0x3f);
++				*t++ = 0x80 | (c & 0x3f);
++			}
++#endif /* ALLOW_BROKEN_SURROGATES */
+ 			else if (c >= 0xe000) {
+ 				*t++ = 0xe0 | (c >> 12);
+ 				*t++ = 0x80 | ((c >> 6) & 0x3f);
+@@ -570,6 +620,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 				goto fail;
+ 	        }
+ 	}
++#if ALLOW_BROKEN_SURROGATES
++	if (halfpair) { /* ending with a single surrogate */
++		*t++ = 0xe0 | (halfpair >> 12);
++		*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
++		*t++ = 0x80 | (halfpair & 0x3f);
++	}
++#endif /* ALLOW_BROKEN_SURROGATES */
+ 	*t = '\0';
+ 	
+ #if defined(__APPLE__) || defined(__DARWIN__)
+@@ -693,10 +750,16 @@ static int utf8_to_unicode(u32 *wc, const char *s)
+ 			/* Check valid ranges */
+ #if NOREVBOM
+ 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
++#if ALLOW_BROKEN_SURROGATES
++			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
++#endif /* ALLOW_BROKEN_SURROGATES */
+ 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
+ 				return 3;
+ #else
+ 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
++#if ALLOW_BROKEN_SURROGATES
++			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
++#endif /* ALLOW_BROKEN_SURROGATES */
+ 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
+ 				return 3;
+ #endif
+-- 
+2.10.2
+
diff --git a/SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch b/SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch
new file mode 100644
index 0000000..3467837
--- /dev/null
+++ b/SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch
@@ -0,0 +1,170 @@
+From f0370bfa9c47575d4e47c94e443aa91983683a43 Mon Sep 17 00:00:00 2001
+From: Erik Larsson <mechie@users.sourceforge.net>
+Date: Tue, 12 Apr 2016 17:02:40 +0200
+Subject: [PATCH 2/2] unistr.c: Unify the two defines NOREVBOM and
+ ALLOW_BROKEN_SURROGATES.
+
+In the mailing list discussion we came to the conclusion that there
+doesn't seem to be any reason to keep these declarations separate since
+they address the same issue, namely libntfs-3g's tolerance for bad
+Unicode data in filenames and other UTF-16 strings in the file system,
+so merge the two defines into the new define ALLOW_BROKEN_UNICODE.
+---
+ libntfs-3g/unistr.c | 58 +++++++++++++++++++++++------------------------------
+ 1 file changed, 25 insertions(+), 33 deletions(-)
+
+diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c
+index 71802aa..753acc0 100644
+--- a/libntfs-3g/unistr.c
++++ b/libntfs-3g/unistr.c
+@@ -59,12 +59,11 @@
+ #include "logging.h"
+ #include "misc.h"
+ 
+-#define NOREVBOM 0  /* JPA rejecting U+FFFE and U+FFFF, open to debate */
+-
+-#ifndef ALLOW_BROKEN_SURROGATES
+-/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */
+-#define ALLOW_BROKEN_SURROGATES 1
+-#endif /* !defined(ALLOW_BROKEN_SURROGATES) */
++#ifndef ALLOW_BROKEN_UNICODE
++/* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
++ * open to debate. */
++#define ALLOW_BROKEN_UNICODE 1
++#endif /* !defined(ALLOW_BROKEN_UNICODE) */
+ 
+ /*
+  * IMPORTANT
+@@ -468,7 +467,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 				surrog = FALSE;
+ 				count += 4;
+ 			} else {
+-#if ALLOW_BROKEN_SURROGATES
++#if ALLOW_BROKEN_UNICODE
+ 				/* The first UTF-16 unit of a surrogate pair has
+ 				 * a value between 0xd800 and 0xdc00. It can be
+ 				 * encoded as an individual UTF-8 sequence if we
+@@ -481,7 +480,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 				continue;
+ #else
+ 				goto fail;
+-#endif /* ALLOW_BROKEN_SURROGATES */
++#endif /* ALLOW_BROKEN_UNICODE */
+ 			}
+ 		} else
+ 			if (c < 0x80)
+@@ -492,15 +491,13 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 				count += 3;
+ 			else if (c < 0xdc00)
+ 				surrog = TRUE;
+-#if ALLOW_BROKEN_SURROGATES
++#if ALLOW_BROKEN_UNICODE
+ 			else if (c < 0xe000)
+ 				count += 3;
+-#endif /* ALLOW_BROKEN_SURROGATES */
+-#if NOREVBOM
+-			else if ((c >= 0xe000) && (c < 0xfffe))
+-#else
+ 			else if (c >= 0xe000)
+-#endif
++#else
++			else if ((c >= 0xe000) && (c < 0xfffe))
++#endif /* ALLOW_BROKEN_UNICODE */
+ 				count += 3;
+ 			else 
+ 				goto fail;
+@@ -510,11 +507,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
+ 		}
+ 	}
+ 	if (surrog) 
+-#if ALLOW_BROKEN_SURROGATES
++#if ALLOW_BROKEN_UNICODE
+ 		count += 3; /* ending with a single surrogate */
+ #else
+ 		goto fail;
+-#endif /* ALLOW_BROKEN_SURROGATES */
++#endif /* ALLOW_BROKEN_UNICODE */
+ 
+ 	ret = count;
+ out:
+@@ -576,7 +573,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 				*t++ = 0x80 + (c & 63);
+ 				halfpair = 0;
+ 			} else {
+-#if ALLOW_BROKEN_SURROGATES
++#if ALLOW_BROKEN_UNICODE
+ 				/* The first UTF-16 unit of a surrogate pair has
+ 				 * a value between 0xd800 and 0xdc00. It can be
+ 				 * encoded as an individual UTF-8 sequence if we
+@@ -591,7 +588,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 				continue;
+ #else
+ 				goto fail;
+-#endif /* ALLOW_BROKEN_SURROGATES */
++#endif /* ALLOW_BROKEN_UNICODE */
+ 			}
+ 		} else if (c < 0x80) {
+ 			*t++ = c;
+@@ -605,13 +602,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 		        	*t++ = 0x80 | (c & 0x3f);
+ 			} else if (c < 0xdc00)
+ 				halfpair = c;
+-#if ALLOW_BROKEN_SURROGATES
++#if ALLOW_BROKEN_UNICODE
+ 			else if (c < 0xe000) {
+ 				*t++ = 0xe0 | (c >> 12);
+ 				*t++ = 0x80 | ((c >> 6) & 0x3f);
+ 				*t++ = 0x80 | (c & 0x3f);
+ 			}
+-#endif /* ALLOW_BROKEN_SURROGATES */
++#endif /* ALLOW_BROKEN_UNICODE */
+ 			else if (c >= 0xe000) {
+ 				*t++ = 0xe0 | (c >> 12);
+ 				*t++ = 0x80 | ((c >> 6) & 0x3f);
+@@ -620,13 +617,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
+ 				goto fail;
+ 	        }
+ 	}
+-#if ALLOW_BROKEN_SURROGATES
++#if ALLOW_BROKEN_UNICODE
+ 	if (halfpair) { /* ending with a single surrogate */
+ 		*t++ = 0xe0 | (halfpair >> 12);
+ 		*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
+ 		*t++ = 0x80 | (halfpair & 0x3f);
+ 	}
+-#endif /* ALLOW_BROKEN_SURROGATES */
++#endif /* ALLOW_BROKEN_UNICODE */
+ 	*t = '\0';
+ 	
+ #if defined(__APPLE__) || defined(__DARWIN__)
+@@ -748,21 +745,16 @@ static int utf8_to_unicode(u32 *wc, const char *s)
+ 			    | ((u32)(s[1] & 0x3F) << 6)
+ 			    | ((u32)(s[2] & 0x3F));
+ 			/* Check valid ranges */
+-#if NOREVBOM
++#if ALLOW_BROKEN_UNICODE
+ 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
+-#if ALLOW_BROKEN_SURROGATES
+ 			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
+-#endif /* ALLOW_BROKEN_SURROGATES */
+-			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
+-				return 3;
+-#else
+-			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
+-#if ALLOW_BROKEN_SURROGATES
+-			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
+-#endif /* ALLOW_BROKEN_SURROGATES */
+ 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
+ 				return 3;
+-#endif
++#else
++			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
++			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
++				return 3;
++#endif /* ALLOW_BROKEN_UNICODE */
+ 		}
+ 		goto fail;
+ 					/* four-byte */
+-- 
+2.10.2
+
diff --git a/SPECS/libguestfs-winsupport.spec b/SPECS/libguestfs-winsupport.spec
index 4f53382..3b54c1c 100644
--- a/SPECS/libguestfs-winsupport.spec
+++ b/SPECS/libguestfs-winsupport.spec
@@ -5,7 +5,7 @@
 
 Name:           libguestfs-winsupport
 Version:        7.2
-Release:        1%{?dist}
+Release:        2%{?dist}
 Summary:        Add support for Windows guests to virt-v2v and virt-p2v
 
 URL:            http://www.ntfs-3g.org/
@@ -21,6 +21,9 @@ Source0:        http://tuxera.com/opensource/ntfs-3g_ntfsprogs-%{ntfs_version}.t
 
 Patch0:         ntfs-3g_ntfsprogs-2011.10.9-RC-ntfsck-unsupported-return-0.patch
 Patch1:         CVE-2015-3202.patch
+# https://bugzilla.redhat.com/show_bug.cgi?id=1301593#c8
+Patch2:         0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch
+Patch3:         0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch
 
 BuildRequires:  libtool, libattr-devel
 BuildRequires:  libconfig-devel, libgcrypt-devel, gnutls-devel, libuuid-devel
@@ -35,6 +38,8 @@ virt-v2v and virt-p2v programs.
 %setup -q -n ntfs-3g_ntfsprogs-%{ntfs_version}
 %patch0 -p1 -b .unsupported
 %patch1 -p1 -b .cve
+%patch2 -p1
+%patch3 -p1
 
 
 %build
@@ -96,6 +101,11 @@ popd
 
 
 %changelog
+* Wed Feb 22 2017 Richard W.M. Jones <rjones@redhat.com> - 7.2-2
+- Fix for handling guest filenames with invalid or incomplete
+  multibyte or wide characters
+  resolves: rhbz#1301593
+
 * Tue Jul 07 2015 Richard W.M. Jones <rjones@redhat.com> - 7.2-1
 - Rebase and rebuild for RHEL 7.2
   resolves: rhbz#1240278