From 70298e38197823fbaccf2d45d3be2ada147d9815 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 01 2017 03:32:46 +0000 Subject: import libguestfs-winsupport-7.2-2.el7 --- diff --git a/SOURCES/0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch b/SOURCES/0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch new file mode 100644 index 0000000..7412dc4 --- /dev/null +++ b/SOURCES/0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch @@ -0,0 +1,172 @@ +From d9c61dd60ec484909f70b7a916ada3a93af94b60 Mon Sep 17 00:00:00 2001 +From: Erik Larsson +Date: Fri, 8 Apr 2016 05:39:48 +0200 +Subject: [PATCH 1/2] unistr.c: Enable encoding broken UTF-16 into broken + UTF-8, A.K.A. WTF-8. + +Windows filenames may contain invalid UTF-16 sequences (specifically +broken surrogate pairs), which cannot be converted to UTF-8 if we do +strict conversion. + +This patch enables encoding broken UTF-16 into similarly broken UTF-8 by +encoding any surrogate character that don't have a match into a separate +3-byte UTF-8 sequence. + +This is "sort of" valid UTF-8, but not valid Unicode since the code +points used for surrogate pair encoding are not supposed to occur in a +valid Unicode string... but on the other hand the source UTF-16 data is +also broken, so we aren't really making things any worse. + +This format is sometimes referred to as WTF-8 (Wobbly Translation +Format, 8-bit encoding) and is a common solution to represent broken +UTF-16 as UTF-8. + +It is a lossless round-trip conversion, i.e converting from broken +UTF-16 to "WTF-8" and back to UTF-16 yields the same broken UTF-16 +sequence. Because of this property it enables accessing these files +by filename through ntfs-3g and the ntfsprogs (e.g. ls -la works as +expected). + +To disable this behaviour you can pass the preprocessor/compiler flag +'-DALLOW_BROKEN_SURROGATES=0' when building ntfs-3g. +--- + libntfs-3g/unistr.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 65 insertions(+), 2 deletions(-) + +diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c +index 7f278cd..71802aa 100644 +--- a/libntfs-3g/unistr.c ++++ b/libntfs-3g/unistr.c +@@ -61,6 +61,11 @@ + + #define NOREVBOM 0 /* JPA rejecting U+FFFE and U+FFFF, open to debate */ + ++#ifndef ALLOW_BROKEN_SURROGATES ++/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */ ++#define ALLOW_BROKEN_SURROGATES 1 ++#endif /* !defined(ALLOW_BROKEN_SURROGATES) */ ++ + /* + * IMPORTANT + * ========= +@@ -462,8 +467,22 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + if ((c >= 0xdc00) && (c < 0xe000)) { + surrog = FALSE; + count += 4; +- } else ++ } else { ++#if ALLOW_BROKEN_SURROGATES ++ /* The first UTF-16 unit of a surrogate pair has ++ * a value between 0xd800 and 0xdc00. It can be ++ * encoded as an individual UTF-8 sequence if we ++ * cannot combine it with the next UTF-16 unit ++ * unit as a surrogate pair. */ ++ surrog = FALSE; ++ count += 3; ++ ++ --i; ++ continue; ++#else + goto fail; ++#endif /* ALLOW_BROKEN_SURROGATES */ ++ } + } else + if (c < 0x80) + count++; +@@ -473,6 +492,10 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + count += 3; + else if (c < 0xdc00) + surrog = TRUE; ++#if ALLOW_BROKEN_SURROGATES ++ else if (c < 0xe000) ++ count += 3; ++#endif /* ALLOW_BROKEN_SURROGATES */ + #if NOREVBOM + else if ((c >= 0xe000) && (c < 0xfffe)) + #else +@@ -487,7 +510,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + } + } + if (surrog) ++#if ALLOW_BROKEN_SURROGATES ++ count += 3; /* ending with a single surrogate */ ++#else + goto fail; ++#endif /* ALLOW_BROKEN_SURROGATES */ + + ret = count; + out: +@@ -548,8 +575,24 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + *t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4); + *t++ = 0x80 + (c & 63); + halfpair = 0; +- } else ++ } else { ++#if ALLOW_BROKEN_SURROGATES ++ /* The first UTF-16 unit of a surrogate pair has ++ * a value between 0xd800 and 0xdc00. It can be ++ * encoded as an individual UTF-8 sequence if we ++ * cannot combine it with the next UTF-16 unit ++ * unit as a surrogate pair. */ ++ *t++ = 0xe0 | (halfpair >> 12); ++ *t++ = 0x80 | ((halfpair >> 6) & 0x3f); ++ *t++ = 0x80 | (halfpair & 0x3f); ++ halfpair = 0; ++ ++ --i; ++ continue; ++#else + goto fail; ++#endif /* ALLOW_BROKEN_SURROGATES */ ++ } + } else if (c < 0x80) { + *t++ = c; + } else { +@@ -562,6 +605,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + *t++ = 0x80 | (c & 0x3f); + } else if (c < 0xdc00) + halfpair = c; ++#if ALLOW_BROKEN_SURROGATES ++ else if (c < 0xe000) { ++ *t++ = 0xe0 | (c >> 12); ++ *t++ = 0x80 | ((c >> 6) & 0x3f); ++ *t++ = 0x80 | (c & 0x3f); ++ } ++#endif /* ALLOW_BROKEN_SURROGATES */ + else if (c >= 0xe000) { + *t++ = 0xe0 | (c >> 12); + *t++ = 0x80 | ((c >> 6) & 0x3f); +@@ -570,6 +620,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + goto fail; + } + } ++#if ALLOW_BROKEN_SURROGATES ++ if (halfpair) { /* ending with a single surrogate */ ++ *t++ = 0xe0 | (halfpair >> 12); ++ *t++ = 0x80 | ((halfpair >> 6) & 0x3f); ++ *t++ = 0x80 | (halfpair & 0x3f); ++ } ++#endif /* ALLOW_BROKEN_SURROGATES */ + *t = '\0'; + + #if defined(__APPLE__) || defined(__DARWIN__) +@@ -693,10 +750,16 @@ static int utf8_to_unicode(u32 *wc, const char *s) + /* Check valid ranges */ + #if NOREVBOM + if (((*wc >= 0x800) && (*wc <= 0xD7FF)) ++#if ALLOW_BROKEN_SURROGATES ++ || ((*wc >= 0xD800) && (*wc <= 0xDFFF)) ++#endif /* ALLOW_BROKEN_SURROGATES */ + || ((*wc >= 0xe000) && (*wc <= 0xFFFD))) + return 3; + #else + if (((*wc >= 0x800) && (*wc <= 0xD7FF)) ++#if ALLOW_BROKEN_SURROGATES ++ || ((*wc >= 0xD800) && (*wc <= 0xDFFF)) ++#endif /* ALLOW_BROKEN_SURROGATES */ + || ((*wc >= 0xe000) && (*wc <= 0xFFFF))) + return 3; + #endif +-- +2.10.2 + diff --git a/SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch b/SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch new file mode 100644 index 0000000..3467837 --- /dev/null +++ b/SOURCES/0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch @@ -0,0 +1,170 @@ +From f0370bfa9c47575d4e47c94e443aa91983683a43 Mon Sep 17 00:00:00 2001 +From: Erik Larsson +Date: Tue, 12 Apr 2016 17:02:40 +0200 +Subject: [PATCH 2/2] unistr.c: Unify the two defines NOREVBOM and + ALLOW_BROKEN_SURROGATES. + +In the mailing list discussion we came to the conclusion that there +doesn't seem to be any reason to keep these declarations separate since +they address the same issue, namely libntfs-3g's tolerance for bad +Unicode data in filenames and other UTF-16 strings in the file system, +so merge the two defines into the new define ALLOW_BROKEN_UNICODE. +--- + libntfs-3g/unistr.c | 58 +++++++++++++++++++++++------------------------------ + 1 file changed, 25 insertions(+), 33 deletions(-) + +diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c +index 71802aa..753acc0 100644 +--- a/libntfs-3g/unistr.c ++++ b/libntfs-3g/unistr.c +@@ -59,12 +59,11 @@ + #include "logging.h" + #include "misc.h" + +-#define NOREVBOM 0 /* JPA rejecting U+FFFE and U+FFFF, open to debate */ +- +-#ifndef ALLOW_BROKEN_SURROGATES +-/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */ +-#define ALLOW_BROKEN_SURROGATES 1 +-#endif /* !defined(ALLOW_BROKEN_SURROGATES) */ ++#ifndef ALLOW_BROKEN_UNICODE ++/* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default, ++ * open to debate. */ ++#define ALLOW_BROKEN_UNICODE 1 ++#endif /* !defined(ALLOW_BROKEN_UNICODE) */ + + /* + * IMPORTANT +@@ -468,7 +467,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + surrog = FALSE; + count += 4; + } else { +-#if ALLOW_BROKEN_SURROGATES ++#if ALLOW_BROKEN_UNICODE + /* The first UTF-16 unit of a surrogate pair has + * a value between 0xd800 and 0xdc00. It can be + * encoded as an individual UTF-8 sequence if we +@@ -481,7 +480,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + continue; + #else + goto fail; +-#endif /* ALLOW_BROKEN_SURROGATES */ ++#endif /* ALLOW_BROKEN_UNICODE */ + } + } else + if (c < 0x80) +@@ -492,15 +491,13 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + count += 3; + else if (c < 0xdc00) + surrog = TRUE; +-#if ALLOW_BROKEN_SURROGATES ++#if ALLOW_BROKEN_UNICODE + else if (c < 0xe000) + count += 3; +-#endif /* ALLOW_BROKEN_SURROGATES */ +-#if NOREVBOM +- else if ((c >= 0xe000) && (c < 0xfffe)) +-#else + else if (c >= 0xe000) +-#endif ++#else ++ else if ((c >= 0xe000) && (c < 0xfffe)) ++#endif /* ALLOW_BROKEN_UNICODE */ + count += 3; + else + goto fail; +@@ -510,11 +507,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l + } + } + if (surrog) +-#if ALLOW_BROKEN_SURROGATES ++#if ALLOW_BROKEN_UNICODE + count += 3; /* ending with a single surrogate */ + #else + goto fail; +-#endif /* ALLOW_BROKEN_SURROGATES */ ++#endif /* ALLOW_BROKEN_UNICODE */ + + ret = count; + out: +@@ -576,7 +573,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + *t++ = 0x80 + (c & 63); + halfpair = 0; + } else { +-#if ALLOW_BROKEN_SURROGATES ++#if ALLOW_BROKEN_UNICODE + /* The first UTF-16 unit of a surrogate pair has + * a value between 0xd800 and 0xdc00. It can be + * encoded as an individual UTF-8 sequence if we +@@ -591,7 +588,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + continue; + #else + goto fail; +-#endif /* ALLOW_BROKEN_SURROGATES */ ++#endif /* ALLOW_BROKEN_UNICODE */ + } + } else if (c < 0x80) { + *t++ = c; +@@ -605,13 +602,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + *t++ = 0x80 | (c & 0x3f); + } else if (c < 0xdc00) + halfpair = c; +-#if ALLOW_BROKEN_SURROGATES ++#if ALLOW_BROKEN_UNICODE + else if (c < 0xe000) { + *t++ = 0xe0 | (c >> 12); + *t++ = 0x80 | ((c >> 6) & 0x3f); + *t++ = 0x80 | (c & 0x3f); + } +-#endif /* ALLOW_BROKEN_SURROGATES */ ++#endif /* ALLOW_BROKEN_UNICODE */ + else if (c >= 0xe000) { + *t++ = 0xe0 | (c >> 12); + *t++ = 0x80 | ((c >> 6) & 0x3f); +@@ -620,13 +617,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, + goto fail; + } + } +-#if ALLOW_BROKEN_SURROGATES ++#if ALLOW_BROKEN_UNICODE + if (halfpair) { /* ending with a single surrogate */ + *t++ = 0xe0 | (halfpair >> 12); + *t++ = 0x80 | ((halfpair >> 6) & 0x3f); + *t++ = 0x80 | (halfpair & 0x3f); + } +-#endif /* ALLOW_BROKEN_SURROGATES */ ++#endif /* ALLOW_BROKEN_UNICODE */ + *t = '\0'; + + #if defined(__APPLE__) || defined(__DARWIN__) +@@ -748,21 +745,16 @@ static int utf8_to_unicode(u32 *wc, const char *s) + | ((u32)(s[1] & 0x3F) << 6) + | ((u32)(s[2] & 0x3F)); + /* Check valid ranges */ +-#if NOREVBOM ++#if ALLOW_BROKEN_UNICODE + if (((*wc >= 0x800) && (*wc <= 0xD7FF)) +-#if ALLOW_BROKEN_SURROGATES + || ((*wc >= 0xD800) && (*wc <= 0xDFFF)) +-#endif /* ALLOW_BROKEN_SURROGATES */ +- || ((*wc >= 0xe000) && (*wc <= 0xFFFD))) +- return 3; +-#else +- if (((*wc >= 0x800) && (*wc <= 0xD7FF)) +-#if ALLOW_BROKEN_SURROGATES +- || ((*wc >= 0xD800) && (*wc <= 0xDFFF)) +-#endif /* ALLOW_BROKEN_SURROGATES */ + || ((*wc >= 0xe000) && (*wc <= 0xFFFF))) + return 3; +-#endif ++#else ++ if (((*wc >= 0x800) && (*wc <= 0xD7FF)) ++ || ((*wc >= 0xe000) && (*wc <= 0xFFFD))) ++ return 3; ++#endif /* ALLOW_BROKEN_UNICODE */ + } + goto fail; + /* four-byte */ +-- +2.10.2 + diff --git a/SPECS/libguestfs-winsupport.spec b/SPECS/libguestfs-winsupport.spec index 4f53382..3b54c1c 100644 --- a/SPECS/libguestfs-winsupport.spec +++ b/SPECS/libguestfs-winsupport.spec @@ -5,7 +5,7 @@ Name: libguestfs-winsupport Version: 7.2 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Add support for Windows guests to virt-v2v and virt-p2v URL: http://www.ntfs-3g.org/ @@ -21,6 +21,9 @@ Source0: http://tuxera.com/opensource/ntfs-3g_ntfsprogs-%{ntfs_version}.t Patch0: ntfs-3g_ntfsprogs-2011.10.9-RC-ntfsck-unsupported-return-0.patch Patch1: CVE-2015-3202.patch +# https://bugzilla.redhat.com/show_bug.cgi?id=1301593#c8 +Patch2: 0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch +Patch3: 0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch BuildRequires: libtool, libattr-devel BuildRequires: libconfig-devel, libgcrypt-devel, gnutls-devel, libuuid-devel @@ -35,6 +38,8 @@ virt-v2v and virt-p2v programs. %setup -q -n ntfs-3g_ntfsprogs-%{ntfs_version} %patch0 -p1 -b .unsupported %patch1 -p1 -b .cve +%patch2 -p1 +%patch3 -p1 %build @@ -96,6 +101,11 @@ popd %changelog +* Wed Feb 22 2017 Richard W.M. Jones - 7.2-2 +- Fix for handling guest filenames with invalid or incomplete + multibyte or wide characters + resolves: rhbz#1301593 + * Tue Jul 07 2015 Richard W.M. Jones - 7.2-1 - Rebase and rebuild for RHEL 7.2 resolves: rhbz#1240278