Blame SOURCES/expat-2.2.10-Protect-against-malicious-namespace-declarations.patch

4e0c08
commit 5c47ae80738d0985babf06a023b3845169682064
4e0c08
Author: Tomas Korbar <tkorbar@redhat.com>
4e0c08
Date:   Mon Mar 14 10:22:37 2022 +0100
4e0c08
4e0c08
    Protect against malicious namespace declarations
4e0c08
4e0c08
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
4e0c08
index 5c3f573..901abbf 100644
4e0c08
--- a/lib/xmlparse.c
4e0c08
+++ b/lib/xmlparse.c
4e0c08
@@ -638,8 +638,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
4e0c08
 
4e0c08
 XML_Parser XMLCALL
4e0c08
 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
4e0c08
-  XML_Char tmp[2];
4e0c08
-  *tmp = nsSep;
4e0c08
+  XML_Char tmp[2] = {nsSep, 0};
4e0c08
   return XML_ParserCreate_MM(encodingName, NULL, tmp);
4e0c08
 }
4e0c08
 
4e0c08
@@ -1253,8 +1252,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
4e0c08
      would be otherwise.
4e0c08
   */
4e0c08
   if (parser->m_ns) {
4e0c08
-    XML_Char tmp[2];
4e0c08
-    *tmp = parser->m_namespaceSeparator;
4e0c08
+    XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
4e0c08
     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
4e0c08
   } else {
4e0c08
     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
4e0c08
@@ -3526,6 +3524,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
4e0c08
   return XML_ERROR_NONE;
4e0c08
 }
4e0c08
 
4e0c08
+static XML_Bool
4e0c08
+is_rfc3986_uri_char(XML_Char candidate) {
4e0c08
+  // For the RFC 3986 ANBF grammar see
4e0c08
+  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
4e0c08
+
4e0c08
+  switch (candidate) {
4e0c08
+  // From rule "ALPHA" (uppercase half)
4e0c08
+  case 'A':
4e0c08
+  case 'B':
4e0c08
+  case 'C':
4e0c08
+  case 'D':
4e0c08
+  case 'E':
4e0c08
+  case 'F':
4e0c08
+  case 'G':
4e0c08
+  case 'H':
4e0c08
+  case 'I':
4e0c08
+  case 'J':
4e0c08
+  case 'K':
4e0c08
+  case 'L':
4e0c08
+  case 'M':
4e0c08
+  case 'N':
4e0c08
+  case 'O':
4e0c08
+  case 'P':
4e0c08
+  case 'Q':
4e0c08
+  case 'R':
4e0c08
+  case 'S':
4e0c08
+  case 'T':
4e0c08
+  case 'U':
4e0c08
+  case 'V':
4e0c08
+  case 'W':
4e0c08
+  case 'X':
4e0c08
+  case 'Y':
4e0c08
+  case 'Z':
4e0c08
+
4e0c08
+  // From rule "ALPHA" (lowercase half)
4e0c08
+  case 'a':
4e0c08
+  case 'b':
4e0c08
+  case 'c':
4e0c08
+  case 'd':
4e0c08
+  case 'e':
4e0c08
+  case 'f':
4e0c08
+  case 'g':
4e0c08
+  case 'h':
4e0c08
+  case 'i':
4e0c08
+  case 'j':
4e0c08
+  case 'k':
4e0c08
+  case 'l':
4e0c08
+  case 'm':
4e0c08
+  case 'n':
4e0c08
+  case 'o':
4e0c08
+  case 'p':
4e0c08
+  case 'q':
4e0c08
+  case 'r':
4e0c08
+  case 's':
4e0c08
+  case 't':
4e0c08
+  case 'u':
4e0c08
+  case 'v':
4e0c08
+  case 'w':
4e0c08
+  case 'x':
4e0c08
+  case 'y':
4e0c08
+  case 'z':
4e0c08
+
4e0c08
+  // From rule "DIGIT"
4e0c08
+  case '0':
4e0c08
+  case '1':
4e0c08
+  case '2':
4e0c08
+  case '3':
4e0c08
+  case '4':
4e0c08
+  case '5':
4e0c08
+  case '6':
4e0c08
+  case '7':
4e0c08
+  case '8':
4e0c08
+  case '9':
4e0c08
+
4e0c08
+  // From rule "pct-encoded"
4e0c08
+  case '%':
4e0c08
+
4e0c08
+  // From rule "unreserved"
4e0c08
+  case '-':
4e0c08
+  case '.':
4e0c08
+  case '_':
4e0c08
+  case '~':
4e0c08
+
4e0c08
+  // From rule "gen-delims"
4e0c08
+  case ':':
4e0c08
+  case '/':
4e0c08
+  case '?':
4e0c08
+  case '#':
4e0c08
+  case '[':
4e0c08
+  case ']':
4e0c08
+  case '@':
4e0c08
+
4e0c08
+  // From rule "sub-delims"
4e0c08
+  case '!':
4e0c08
+  case '$':
4e0c08
+  case '&':
4e0c08
+  case '\'':
4e0c08
+  case '(':
4e0c08
+  case ')':
4e0c08
+  case '*':
4e0c08
+  case '+':
4e0c08
+  case ',':
4e0c08
+  case ';':
4e0c08
+  case '=':
4e0c08
+    return XML_TRUE;
4e0c08
+
4e0c08
+  default:
4e0c08
+    return XML_FALSE;
4e0c08
+  }
4e0c08
+}
4e0c08
+
4e0c08
 /* addBinding() overwrites the value of prefix->binding without checking.
4e0c08
    Therefore one must keep track of the old value outside of addBinding().
4e0c08
 */
4e0c08
@@ -3581,6 +3690,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4e0c08
     if (! mustBeXML && isXMLNS
4e0c08
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4e0c08
       isXMLNS = XML_FALSE;
4e0c08
+
4e0c08
+    // NOTE: While Expat does not validate namespace URIs against RFC 3986
4e0c08
+    //       today (and is not REQUIRED to do so with regard to the XML 1.0
4e0c08
+    //       namespaces specification) we have to at least make sure, that
4e0c08
+    //       the application on top of Expat (that is likely splitting expanded
4e0c08
+    //       element names ("qualified names") of form
4e0c08
+    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4e0c08
+    //       in its element handler code) cannot be confused by an attacker
4e0c08
+    //       putting additional namespace separator characters into namespace
4e0c08
+    //       declarations.  That would be ambiguous and not to be expected.
4e0c08
+    //
4e0c08
+    //       While the HTML API docs of function XML_ParserCreateNS have been
4e0c08
+    //       advising against use of a namespace separator character that can
4e0c08
+    //       appear in a URI for >20 years now, some widespread applications
4e0c08
+    //       are using URI characters (':' (colon) in particular) for a
4e0c08
+    //       namespace separator, in practice.  To keep these applications
4e0c08
+    //       functional, we only reject namespaces URIs containing the
4e0c08
+    //       application-chosen namespace separator if the chosen separator
4e0c08
+    //       is a non-URI character with regard to RFC 3986.
4e0c08
+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4e0c08
+        && ! is_rfc3986_uri_char(uri[len])) {
4e0c08
+      return XML_ERROR_SYNTAX;
4e0c08
+    }
4e0c08
   }
4e0c08
   isXML = isXML && len == xmlLen;
4e0c08
   isXMLNS = isXMLNS && len == xmlnsLen;
4e0c08
diff --git a/tests/runtests.c b/tests/runtests.c
4e0c08
index f03e008..40172d2 100644
4e0c08
--- a/tests/runtests.c
4e0c08
+++ b/tests/runtests.c
4e0c08
@@ -7233,6 +7233,37 @@ START_TEST(test_ns_double_colon_doctype) {
4e0c08
 }
4e0c08
 END_TEST
4e0c08
 
4e0c08
+START_TEST(test_ns_separator_in_uri) {
4e0c08
+  struct test_case {
4e0c08
+    enum XML_Status expectedStatus;
4e0c08
+    const char *doc;
4e0c08
+    XML_Char namesep;
4e0c08
+  };
4e0c08
+  struct test_case cases[] = {
4e0c08
+      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
4e0c08
+      {XML_STATUS_ERROR, "<doc xmlns='one
two' />", XCS('\n')},
4e0c08
+      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
4e0c08
+  };
4e0c08
+
4e0c08
+  size_t i = 0;
4e0c08
+  size_t failCount = 0;
4e0c08
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4e0c08
+    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
4e0c08
+    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
4e0c08
+    if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
4e0c08
+                  /*isFinal*/ XML_TRUE)
4e0c08
+        != cases[i].expectedStatus) {
4e0c08
+      failCount++;
4e0c08
+    }
4e0c08
+    XML_ParserFree(parser);
4e0c08
+  }
4e0c08
+
4e0c08
+  if (failCount) {
4e0c08
+    fail("Namespace separator handling is broken");
4e0c08
+  }
4e0c08
+}
4e0c08
+END_TEST
4e0c08
+
4e0c08
 /* Control variable; the number of times duff_allocator() will successfully
4e0c08
  * allocate */
4e0c08
 #define ALLOC_ALWAYS_SUCCEED (-1)
4e0c08
@@ -11527,6 +11558,7 @@ make_suite(void) {
4e0c08
   tcase_add_test(tc_namespace, test_ns_utf16_doctype);
4e0c08
   tcase_add_test(tc_namespace, test_ns_invalid_doctype);
4e0c08
   tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
4e0c08
+  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
4e0c08
 
4e0c08
   suite_add_tcase(s, tc_misc);
4e0c08
   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);