|
|
8ad338 |
commit fd5473ef5873048eadef344a1f16f71ad8eefe99
|
|
|
8ad338 |
Author: Tomas Korbar <tkorbar@redhat.com>
|
|
|
8ad338 |
Date: Mon Mar 14 12:17:41 2022 +0100
|
|
|
8ad338 |
|
|
|
8ad338 |
Protect against malicious namespace declarations
|
|
|
8ad338 |
|
|
|
8ad338 |
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
|
|
|
8ad338 |
index 581b9a4..6f3510b 100644
|
|
|
8ad338 |
--- a/lib/xmlparse.c
|
|
|
8ad338 |
+++ b/lib/xmlparse.c
|
|
|
8ad338 |
@@ -661,8 +661,7 @@ XML_ParserCreate(const XML_Char *encodingName)
|
|
|
8ad338 |
XML_Parser XMLCALL
|
|
|
8ad338 |
XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
|
|
|
8ad338 |
{
|
|
|
8ad338 |
- XML_Char tmp[2];
|
|
|
8ad338 |
- *tmp = nsSep;
|
|
|
8ad338 |
+ XML_Char tmp[2] = {nsSep, 0};
|
|
|
8ad338 |
return XML_ParserCreate_MM(encodingName, NULL, tmp);
|
|
|
8ad338 |
}
|
|
|
8ad338 |
|
|
|
8ad338 |
@@ -1288,8 +1287,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser,
|
|
|
8ad338 |
would be otherwise.
|
|
|
8ad338 |
*/
|
|
|
8ad338 |
if (parser->m_ns) {
|
|
|
8ad338 |
- XML_Char tmp[2];
|
|
|
8ad338 |
- *tmp = parser->m_namespaceSeparator;
|
|
|
8ad338 |
+ XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
|
|
|
8ad338 |
parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
|
|
|
8ad338 |
}
|
|
|
8ad338 |
else {
|
|
|
8ad338 |
@@ -3640,6 +3638,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
|
|
|
8ad338 |
return XML_ERROR_NONE;
|
|
|
8ad338 |
}
|
|
|
8ad338 |
|
|
|
8ad338 |
+static XML_Bool
|
|
|
8ad338 |
+is_rfc3986_uri_char(XML_Char candidate) {
|
|
|
8ad338 |
+ // For the RFC 3986 ANBF grammar see
|
|
|
8ad338 |
+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ switch (candidate) {
|
|
|
8ad338 |
+ // From rule "ALPHA" (uppercase half)
|
|
|
8ad338 |
+ case 'A':
|
|
|
8ad338 |
+ case 'B':
|
|
|
8ad338 |
+ case 'C':
|
|
|
8ad338 |
+ case 'D':
|
|
|
8ad338 |
+ case 'E':
|
|
|
8ad338 |
+ case 'F':
|
|
|
8ad338 |
+ case 'G':
|
|
|
8ad338 |
+ case 'H':
|
|
|
8ad338 |
+ case 'I':
|
|
|
8ad338 |
+ case 'J':
|
|
|
8ad338 |
+ case 'K':
|
|
|
8ad338 |
+ case 'L':
|
|
|
8ad338 |
+ case 'M':
|
|
|
8ad338 |
+ case 'N':
|
|
|
8ad338 |
+ case 'O':
|
|
|
8ad338 |
+ case 'P':
|
|
|
8ad338 |
+ case 'Q':
|
|
|
8ad338 |
+ case 'R':
|
|
|
8ad338 |
+ case 'S':
|
|
|
8ad338 |
+ case 'T':
|
|
|
8ad338 |
+ case 'U':
|
|
|
8ad338 |
+ case 'V':
|
|
|
8ad338 |
+ case 'W':
|
|
|
8ad338 |
+ case 'X':
|
|
|
8ad338 |
+ case 'Y':
|
|
|
8ad338 |
+ case 'Z':
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // From rule "ALPHA" (lowercase half)
|
|
|
8ad338 |
+ case 'a':
|
|
|
8ad338 |
+ case 'b':
|
|
|
8ad338 |
+ case 'c':
|
|
|
8ad338 |
+ case 'd':
|
|
|
8ad338 |
+ case 'e':
|
|
|
8ad338 |
+ case 'f':
|
|
|
8ad338 |
+ case 'g':
|
|
|
8ad338 |
+ case 'h':
|
|
|
8ad338 |
+ case 'i':
|
|
|
8ad338 |
+ case 'j':
|
|
|
8ad338 |
+ case 'k':
|
|
|
8ad338 |
+ case 'l':
|
|
|
8ad338 |
+ case 'm':
|
|
|
8ad338 |
+ case 'n':
|
|
|
8ad338 |
+ case 'o':
|
|
|
8ad338 |
+ case 'p':
|
|
|
8ad338 |
+ case 'q':
|
|
|
8ad338 |
+ case 'r':
|
|
|
8ad338 |
+ case 's':
|
|
|
8ad338 |
+ case 't':
|
|
|
8ad338 |
+ case 'u':
|
|
|
8ad338 |
+ case 'v':
|
|
|
8ad338 |
+ case 'w':
|
|
|
8ad338 |
+ case 'x':
|
|
|
8ad338 |
+ case 'y':
|
|
|
8ad338 |
+ case 'z':
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // From rule "DIGIT"
|
|
|
8ad338 |
+ case '0':
|
|
|
8ad338 |
+ case '1':
|
|
|
8ad338 |
+ case '2':
|
|
|
8ad338 |
+ case '3':
|
|
|
8ad338 |
+ case '4':
|
|
|
8ad338 |
+ case '5':
|
|
|
8ad338 |
+ case '6':
|
|
|
8ad338 |
+ case '7':
|
|
|
8ad338 |
+ case '8':
|
|
|
8ad338 |
+ case '9':
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // From rule "pct-encoded"
|
|
|
8ad338 |
+ case '%':
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // From rule "unreserved"
|
|
|
8ad338 |
+ case '-':
|
|
|
8ad338 |
+ case '.':
|
|
|
8ad338 |
+ case '_':
|
|
|
8ad338 |
+ case '~':
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // From rule "gen-delims"
|
|
|
8ad338 |
+ case ':':
|
|
|
8ad338 |
+ case '/':
|
|
|
8ad338 |
+ case '?':
|
|
|
8ad338 |
+ case '#':
|
|
|
8ad338 |
+ case '[':
|
|
|
8ad338 |
+ case ']':
|
|
|
8ad338 |
+ case '@':
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // From rule "sub-delims"
|
|
|
8ad338 |
+ case '!':
|
|
|
8ad338 |
+ case '$':
|
|
|
8ad338 |
+ case '&':
|
|
|
8ad338 |
+ case '\'':
|
|
|
8ad338 |
+ case '(':
|
|
|
8ad338 |
+ case ')':
|
|
|
8ad338 |
+ case '*':
|
|
|
8ad338 |
+ case '+':
|
|
|
8ad338 |
+ case ',':
|
|
|
8ad338 |
+ case ';':
|
|
|
8ad338 |
+ case '=':
|
|
|
8ad338 |
+ return XML_TRUE;
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ default:
|
|
|
8ad338 |
+ return XML_FALSE;
|
|
|
8ad338 |
+ }
|
|
|
8ad338 |
+}
|
|
|
8ad338 |
+
|
|
|
8ad338 |
/* addBinding() overwrites the value of prefix->binding without checking.
|
|
|
8ad338 |
Therefore one must keep track of the old value outside of addBinding().
|
|
|
8ad338 |
*/
|
|
|
8ad338 |
@@ -3700,6 +3809,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
|
|
|
8ad338 |
if (!mustBeXML && isXMLNS
|
|
|
8ad338 |
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
|
|
|
8ad338 |
isXMLNS = XML_FALSE;
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
|
|
|
8ad338 |
+ // today (and is not REQUIRED to do so with regard to the XML 1.0
|
|
|
8ad338 |
+ // namespaces specification) we have to at least make sure, that
|
|
|
8ad338 |
+ // the application on top of Expat (that is likely splitting expanded
|
|
|
8ad338 |
+ // element names ("qualified names") of form
|
|
|
8ad338 |
+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
|
|
|
8ad338 |
+ // in its element handler code) cannot be confused by an attacker
|
|
|
8ad338 |
+ // putting additional namespace separator characters into namespace
|
|
|
8ad338 |
+ // declarations. That would be ambiguous and not to be expected.
|
|
|
8ad338 |
+ //
|
|
|
8ad338 |
+ // While the HTML API docs of function XML_ParserCreateNS have been
|
|
|
8ad338 |
+ // advising against use of a namespace separator character that can
|
|
|
8ad338 |
+ // appear in a URI for >20 years now, some widespread applications
|
|
|
8ad338 |
+ // are using URI characters (':' (colon) in particular) for a
|
|
|
8ad338 |
+ // namespace separator, in practice. To keep these applications
|
|
|
8ad338 |
+ // functional, we only reject namespaces URIs containing the
|
|
|
8ad338 |
+ // application-chosen namespace separator if the chosen separator
|
|
|
8ad338 |
+ // is a non-URI character with regard to RFC 3986.
|
|
|
8ad338 |
+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
|
|
|
8ad338 |
+ && ! is_rfc3986_uri_char(uri[len])) {
|
|
|
8ad338 |
+ return XML_ERROR_SYNTAX;
|
|
|
8ad338 |
+ }
|
|
|
8ad338 |
}
|
|
|
8ad338 |
isXML = isXML && len == xmlLen;
|
|
|
8ad338 |
isXMLNS = isXMLNS && len == xmlnsLen;
|
|
|
8ad338 |
diff --git a/tests/runtests.c b/tests/runtests.c
|
|
|
8ad338 |
index ecc6f47..eabd55d 100644
|
|
|
8ad338 |
--- a/tests/runtests.c
|
|
|
8ad338 |
+++ b/tests/runtests.c
|
|
|
8ad338 |
@@ -7950,6 +7950,38 @@ START_TEST(test_ns_double_colon_doctype)
|
|
|
8ad338 |
}
|
|
|
8ad338 |
END_TEST
|
|
|
8ad338 |
|
|
|
8ad338 |
+START_TEST(test_ns_separator_in_uri) {
|
|
|
8ad338 |
+ struct test_case {
|
|
|
8ad338 |
+ enum XML_Status expectedStatus;
|
|
|
8ad338 |
+ const char *doc;
|
|
|
8ad338 |
+ XML_Char namesep;
|
|
|
8ad338 |
+ };
|
|
|
8ad338 |
+ struct test_case cases[] = {
|
|
|
8ad338 |
+ {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
|
|
|
8ad338 |
+ {XML_STATUS_ERROR, "<doc xmlns='one
two' />", XCS('\n')},
|
|
|
8ad338 |
+ {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
|
|
|
8ad338 |
+ };
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ size_t i = 0;
|
|
|
8ad338 |
+ size_t failCount = 0;
|
|
|
8ad338 |
+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
|
|
8ad338 |
+ XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
|
|
|
8ad338 |
+ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
|
|
|
8ad338 |
+ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
|
|
|
8ad338 |
+ /*isFinal*/ XML_TRUE)
|
|
|
8ad338 |
+ != cases[i].expectedStatus) {
|
|
|
8ad338 |
+ failCount++;
|
|
|
8ad338 |
+ }
|
|
|
8ad338 |
+ XML_ParserFree(parser);
|
|
|
8ad338 |
+ }
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+ if (failCount) {
|
|
|
8ad338 |
+ fail("Namespace separator handling is broken");
|
|
|
8ad338 |
+ }
|
|
|
8ad338 |
+}
|
|
|
8ad338 |
+END_TEST
|
|
|
8ad338 |
+
|
|
|
8ad338 |
+
|
|
|
8ad338 |
/* Control variable; the number of times duff_allocator() will successfully allocate */
|
|
|
8ad338 |
#define ALLOC_ALWAYS_SUCCEED (-1)
|
|
|
8ad338 |
#define REALLOC_ALWAYS_SUCCEED (-1)
|
|
|
8ad338 |
@@ -12290,6 +12322,7 @@ make_suite(void)
|
|
|
8ad338 |
tcase_add_test(tc_namespace, test_ns_utf16_doctype);
|
|
|
8ad338 |
tcase_add_test(tc_namespace, test_ns_invalid_doctype);
|
|
|
8ad338 |
tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
|
|
|
8ad338 |
+ tcase_add_test(tc_namespace, test_ns_separator_in_uri);
|
|
|
8ad338 |
|
|
|
8ad338 |
suite_add_tcase(s, tc_misc);
|
|
|
8ad338 |
tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
|