Tree - rpms/expat - CentOS Git server

rpms / expat

Blame SOURCES/expat-2.1.0-Protect-against-malicious-namespace-declarations.patch

Blob History Raw

		4cc7ea	`commit f9ba0addb8e60cb72f8a8524fb4379d741cd4e7d`
		4cc7ea	`Author: Tomas Korbar <tkorbar@redhat.com>`
		4cc7ea	`Date: Mon Mar 21 12:42:50 2022 +0100`
		4cc7ea
		4cc7ea	`Protect against malicious namespace declarations`
		4cc7ea
		4cc7ea	`diff --git a/lib/xmlparse.c b/lib/xmlparse.c`
		4cc7ea	`index 87d1a98..4bfb860 100644`
		4cc7ea	`--- a/lib/xmlparse.c`
		4cc7ea	`+++ b/lib/xmlparse.c`
		4cc7ea	`@@ -679,8 +679,7 @@ XML_ParserCreate(const XML_Char *encodingName)`
		4cc7ea	`XML_Parser XMLCALL`
		4cc7ea	`XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)`
		4cc7ea	`{`
		4cc7ea	`- XML_Char tmp[2];`
		4cc7ea	`- *tmp = nsSep;`
		4cc7ea	`+ XML_Char tmp[2] = {nsSep, 0};`
		4cc7ea	`return XML_ParserCreate_MM(encodingName, NULL, tmp);`
		4cc7ea	`}`
		4cc7ea
		4cc7ea	`@@ -1044,8 +1043,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser,`
		4cc7ea	`would be otherwise.`
		4cc7ea	`*/`
		4cc7ea	`if (ns) {`
		4cc7ea	`- XML_Char tmp[2];`
		4cc7ea	`- *tmp = namespaceSeparator;`
		4cc7ea	`+ XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};`
		4cc7ea	`parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);`
		4cc7ea	`}`
		4cc7ea	`else {`
		4cc7ea	`@@ -3173,6 +3171,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc,`
		4cc7ea	`return XML_ERROR_NONE;`
		4cc7ea	`}`
		4cc7ea
		4cc7ea	`+static XML_Bool`
		4cc7ea	`+is_rfc3986_uri_char(XML_Char candidate) {`
		4cc7ea	`+ // For the RFC 3986 ANBF grammar see`
		4cc7ea	`+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A`
		4cc7ea	`+`
		4cc7ea	`+ switch (candidate) {`
		4cc7ea	`+ // From rule "ALPHA" (uppercase half)`
		4cc7ea	`+ case 'A':`
		4cc7ea	`+ case 'B':`
		4cc7ea	`+ case 'C':`
		4cc7ea	`+ case 'D':`
		4cc7ea	`+ case 'E':`
		4cc7ea	`+ case 'F':`
		4cc7ea	`+ case 'G':`
		4cc7ea	`+ case 'H':`
		4cc7ea	`+ case 'I':`
		4cc7ea	`+ case 'J':`
		4cc7ea	`+ case 'K':`
		4cc7ea	`+ case 'L':`
		4cc7ea	`+ case 'M':`
		4cc7ea	`+ case 'N':`
		4cc7ea	`+ case 'O':`
		4cc7ea	`+ case 'P':`
		4cc7ea	`+ case 'Q':`
		4cc7ea	`+ case 'R':`
		4cc7ea	`+ case 'S':`
		4cc7ea	`+ case 'T':`
		4cc7ea	`+ case 'U':`
		4cc7ea	`+ case 'V':`
		4cc7ea	`+ case 'W':`
		4cc7ea	`+ case 'X':`
		4cc7ea	`+ case 'Y':`
		4cc7ea	`+ case 'Z':`
		4cc7ea	`+`
		4cc7ea	`+ // From rule "ALPHA" (lowercase half)`
		4cc7ea	`+ case 'a':`
		4cc7ea	`+ case 'b':`
		4cc7ea	`+ case 'c':`
		4cc7ea	`+ case 'd':`
		4cc7ea	`+ case 'e':`
		4cc7ea	`+ case 'f':`
		4cc7ea	`+ case 'g':`
		4cc7ea	`+ case 'h':`
		4cc7ea	`+ case 'i':`
		4cc7ea	`+ case 'j':`
		4cc7ea	`+ case 'k':`
		4cc7ea	`+ case 'l':`
		4cc7ea	`+ case 'm':`
		4cc7ea	`+ case 'n':`
		4cc7ea	`+ case 'o':`
		4cc7ea	`+ case 'p':`
		4cc7ea	`+ case 'q':`
		4cc7ea	`+ case 'r':`
		4cc7ea	`+ case 's':`
		4cc7ea	`+ case 't':`
		4cc7ea	`+ case 'u':`
		4cc7ea	`+ case 'v':`
		4cc7ea	`+ case 'w':`
		4cc7ea	`+ case 'x':`
		4cc7ea	`+ case 'y':`
		4cc7ea	`+ case 'z':`
		4cc7ea	`+`
		4cc7ea	`+ // From rule "DIGIT"`
		4cc7ea	`+ case '0':`
		4cc7ea	`+ case '1':`
		4cc7ea	`+ case '2':`
		4cc7ea	`+ case '3':`
		4cc7ea	`+ case '4':`
		4cc7ea	`+ case '5':`
		4cc7ea	`+ case '6':`
		4cc7ea	`+ case '7':`
		4cc7ea	`+ case '8':`
		4cc7ea	`+ case '9':`
		4cc7ea	`+`
		4cc7ea	`+ // From rule "pct-encoded"`
		4cc7ea	`+ case '%':`
		4cc7ea	`+`
		4cc7ea	`+ // From rule "unreserved"`
		4cc7ea	`+ case '-':`
		4cc7ea	`+ case '.':`
		4cc7ea	`+ case '_':`
		4cc7ea	`+ case '~':`
		4cc7ea	`+`
		4cc7ea	`+ // From rule "gen-delims"`
		4cc7ea	`+ case ':':`
		4cc7ea	`+ case '/':`
		4cc7ea	`+ case '?':`
		4cc7ea	`+ case '#':`
		4cc7ea	`+ case '[':`
		4cc7ea	`+ case ']':`
		4cc7ea	`+ case '@':`
		4cc7ea	`+`
		4cc7ea	`+ // From rule "sub-delims"`
		4cc7ea	`+ case '!':`
		4cc7ea	`+ case '$':`
		4cc7ea	`+ case '&':`
		4cc7ea	`+ case '\'':`
		4cc7ea	`+ case '(':`
		4cc7ea	`+ case ')':`
		4cc7ea	`+ case '*':`
		4cc7ea	`+ case '+':`
		4cc7ea	`+ case ',':`
		4cc7ea	`+ case ';':`
		4cc7ea	`+ case '=':`
		4cc7ea	`+ return XML_TRUE;`
		4cc7ea	`+`
		4cc7ea	`+ default:`
		4cc7ea	`+ return XML_FALSE;`
		4cc7ea	`+ }`
		4cc7ea	`+}`
		4cc7ea	`+`
		4cc7ea	`/* addBinding() overwrites the value of prefix->binding without checking.`
		4cc7ea	`Therefore one must keep track of the old value outside of addBinding().`
		4cc7ea	`*/`
		4cc7ea	`@@ -3233,6 +3342,29 @@ addBinding(XML_Parser parser, PREFIX prefix, const ATTRIBUTE_ID attId,`
		4cc7ea	`if (!mustBeXML && isXMLNS`
		4cc7ea	`&& (len > xmlnsLen \|\| uri[len] != xmlnsNamespace[len]))`
		4cc7ea	`isXMLNS = XML_FALSE;`
		4cc7ea	`+`
		4cc7ea	`+ // NOTE: While Expat does not validate namespace URIs against RFC 3986`
		4cc7ea	`+ // today (and is not REQUIRED to do so with regard to the XML 1.0`
		4cc7ea	`+ // namespaces specification) we have to at least make sure, that`
		4cc7ea	`+ // the application on top of Expat (that is likely splitting expanded`
		4cc7ea	`+ // element names ("qualified names") of form`
		4cc7ea	`+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces`
		4cc7ea	`+ // in its element handler code) cannot be confused by an attacker`
		4cc7ea	`+ // putting additional namespace separator characters into namespace`
		4cc7ea	`+ // declarations. That would be ambiguous and not to be expected.`
		4cc7ea	`+ //`
		4cc7ea	`+ // While the HTML API docs of function XML_ParserCreateNS have been`
		4cc7ea	`+ // advising against use of a namespace separator character that can`
		4cc7ea	`+ // appear in a URI for >20 years now, some widespread applications`
		4cc7ea	`+ // are using URI characters (':' (colon) in particular) for a`
		4cc7ea	`+ // namespace separator, in practice. To keep these applications`
		4cc7ea	`+ // functional, we only reject namespaces URIs containing the`
		4cc7ea	`+ // application-chosen namespace separator if the chosen separator`
		4cc7ea	`+ // is a non-URI character with regard to RFC 3986.`
		4cc7ea	`+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)`
		4cc7ea	`+ && ! is_rfc3986_uri_char(uri[len])) {`
		4cc7ea	`+ return XML_ERROR_SYNTAX;`
		4cc7ea	`+ }`
		4cc7ea	`}`
		4cc7ea	`isXML = isXML && len == xmlLen;`
		4cc7ea	`isXMLNS = isXMLNS && len == xmlnsLen;`
		4cc7ea	`diff --git a/tests/runtests.c b/tests/runtests.c`
		4cc7ea	`index b99e375..86f8b18 100644`
		4cc7ea	`--- a/tests/runtests.c`
		4cc7ea	`+++ b/tests/runtests.c`
		4cc7ea	`@@ -31,6 +31,13 @@`
		4cc7ea
		4cc7ea	`static XML_Parser parser;`
		4cc7ea
		4cc7ea	`+#ifndef UNUSED_P`
		4cc7ea	`+# ifdef __GNUC__`
		4cc7ea	`+# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))`
		4cc7ea	`+# else`
		4cc7ea	`+# define UNUSED_P(p) UNUSED_ ## p`
		4cc7ea	`+# endif`
		4cc7ea	`+#endif`
		4cc7ea
		4cc7ea	`static void`
		4cc7ea	`basic_setup(void)`
		4cc7ea	`@@ -40,6 +47,10 @@ basic_setup(void)`
		4cc7ea	`fail("Parser not created.");`
		4cc7ea	`}`
		4cc7ea
		4cc7ea	`+static void`
		4cc7ea	`+dummy_end_element(void UNUSED_P(userData), const XML_Char UNUSED_P(name))`
		4cc7ea	`+{}`
		4cc7ea	`+`
		4cc7ea	`static void`
		4cc7ea	`basic_teardown(void)`
		4cc7ea	`{`
		4cc7ea	`@@ -1499,6 +1510,138 @@ START_TEST(test_ns_unbound_prefix_on_element)`
		4cc7ea	`}`
		4cc7ea	`END_TEST`
		4cc7ea
		4cc7ea	`+START_TEST(test_ns_separator_in_uri) {`
		4cc7ea	`+ struct test_case {`
		4cc7ea	`+ enum XML_Status expectedStatus;`
		4cc7ea	`+ const char *doc;`
		4cc7ea	`+ XML_Char namesep;`
		4cc7ea	`+ };`
		4cc7ea	`+ struct test_case cases[] = {`
		4cc7ea	`+ {XML_STATUS_OK, "<doc xmlns='one_two' />", '\n'},`
		4cc7ea	`+ {XML_STATUS_ERROR, "<doc xmlns='one two' />", '\n'},`
		4cc7ea	`+ {XML_STATUS_OK, "<doc xmlns='one:two' />", ':'},`
		4cc7ea	`+ };`
		4cc7ea	`+`
		4cc7ea	`+ size_t i = 0;`
		4cc7ea	`+ size_t failCount = 0;`
		4cc7ea	`+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {`
		4cc7ea	`+ XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);`
		4cc7ea	`+ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);`
		4cc7ea	`+ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),`
		4cc7ea	`+ /isFinal/ XML_TRUE)`
		4cc7ea	`+ != cases[i].expectedStatus) {`
		4cc7ea	`+ failCount++;`
		4cc7ea	`+ }`
		4cc7ea	`+ XML_ParserFree(parser);`
		4cc7ea	`+ }`
		4cc7ea	`+`
		4cc7ea	`+ if (failCount) {`
		4cc7ea	`+ fail("Namespace separator handling is broken");`
		4cc7ea	`+ }`
		4cc7ea	`+}`
		4cc7ea	`+END_TEST`
		4cc7ea	`+`
		4cc7ea	`+`
		4cc7ea	`+START_TEST(test_utf8_in_start_tags) {`
		4cc7ea	`+ struct test_case {`
		4cc7ea	`+ bool goodName;`
		4cc7ea	`+ bool goodNameStart;`
		4cc7ea	`+ const char *tagName;`
		4cc7ea	`+ };`
		4cc7ea	`+`
		4cc7ea	`+ // The idea with the tests below is this:`
		4cc7ea	`+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences`
		4cc7ea	`+ // go to isNever and are hence not a concern.`
		4cc7ea	`+ //`
		4cc7ea	`+ // We start with a character that is a valid name character`
		4cc7ea	`+ // (or even name-start character, see XML 1.0r4 spec) and then we flip`
		4cc7ea	`+ // single bits at places where (1) the result leaves the UTF-8 encoding space`
		4cc7ea	`+ // and (2) we stay in the same n-byte sequence family.`
		4cc7ea	`+ //`
		4cc7ea	`+ // The flipped bits are highlighted in angle brackets in comments,`
		4cc7ea	`+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped`
		4cc7ea	`+ // the most significant bit to 1 to leave UTF-8 encoding space.`
		4cc7ea	`+ struct test_case cases[] = {`
		4cc7ea	`+ // 1-byte UTF-8: [0xxx xxxx]`
		4cc7ea	`+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'`
		4cc7ea	`+ {false, false, "\xBA"}, // [<1>011 1010]`
		4cc7ea	`+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'`
		4cc7ea	`+ {false, false, "\xB9"}, // [<1>011 1001]`
		4cc7ea	`+`
		4cc7ea	`+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]`
		4cc7ea	`+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =`
		4cc7ea	`+ // Arabic small waw U+06E5`
		4cc7ea	`+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]`
		4cc7ea	`+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]`
		4cc7ea	`+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]`
		4cc7ea	`+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =`
		4cc7ea	`+ // combining char U+0301`
		4cc7ea	`+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]`
		4cc7ea	`+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]`
		4cc7ea	`+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]`
		4cc7ea	`+`
		4cc7ea	`+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]`
		4cc7ea	`+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =`
		4cc7ea	`+ // Devanagari Letter A U+0905`
		4cc7ea	`+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]`
		4cc7ea	`+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]`
		4cc7ea	`+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]`
		4cc7ea	`+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]`
		4cc7ea	`+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]`
		4cc7ea	`+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =`
		4cc7ea	`+ // combining char U+0901`
		4cc7ea	`+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]`
		4cc7ea	`+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]`
		4cc7ea	`+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]`
		4cc7ea	`+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]`
		4cc7ea	`+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]`
		4cc7ea	`+ };`
		4cc7ea	`+ const bool atNameStart[] = {true, false};`
		4cc7ea	`+`
		4cc7ea	`+ size_t i = 0;`
		4cc7ea	`+ char doc[1024];`
		4cc7ea	`+ size_t failCount = 0;`
		4cc7ea	`+`
		4cc7ea	`+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {`
		4cc7ea	`+ size_t j = 0;`
		4cc7ea	`+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {`
		4cc7ea	`+ const bool expectedSuccess`
		4cc7ea	`+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;`
		4cc7ea	`+ sprintf(doc, "<%s%s>`
		4cc7ea	`+ XML_Parser parser = XML_ParserCreate(NULL);`
		4cc7ea	`+`
		4cc7ea	`+ const enum XML_Status status`
		4cc7ea	`+ = XML_Parse(parser, doc, (int)strlen(doc), /isFinal=/XML_FALSE);`
		4cc7ea	`+`
		4cc7ea	`+ bool success = true;`
		4cc7ea	`+ if ((status == XML_STATUS_OK) != expectedSuccess) {`
		4cc7ea	`+ success = false;`
		4cc7ea	`+ }`
		4cc7ea	`+ if ((status == XML_STATUS_ERROR)`
		4cc7ea	`+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {`
		4cc7ea	`+ success = false;`
		4cc7ea	`+ }`
		4cc7ea	`+`
		4cc7ea	`+ if (! success) {`
		4cc7ea	`+ fprintf(`
		4cc7ea	`+ stderr,`
		4cc7ea	`+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",`
		4cc7ea	`+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",`
		4cc7ea	`+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));`
		4cc7ea	`+ failCount++;`
		4cc7ea	`+ }`
		4cc7ea	`+`
		4cc7ea	`+ XML_ParserFree(parser);`
		4cc7ea	`+`
		4cc7ea	`+ }`
		4cc7ea	`+`
		4cc7ea	`+ if (failCount > 0) {`
		4cc7ea	`+ fail("UTF-8 regression detected");`
		4cc7ea	`+ }`
		4cc7ea	`+}`
		4cc7ea	`+}`
		4cc7ea	`+END_TEST`
		4cc7ea	`+`
		4cc7ea	`static Suite *`
		4cc7ea	`make_suite(void)`
		4cc7ea	`{`
		4cc7ea	`@@ -1569,6 +1712,7 @@ make_suite(void)`
		4cc7ea	`tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes);`
		4cc7ea	`tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute);`
		4cc7ea	`tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element);`
		4cc7ea	`+ tcase_add_test(tc_namespace, test_ns_separator_in_uri);`
		4cc7ea
		4cc7ea	`#ifdef XML_DTD`
		4cc7ea	`tcase_add_test(tc_basic,`

rpms / expat

Source Code

Blame SOURCES/expat-2.1.0-Protect-against-malicious-namespace-declarations.patch