Blame SOURCES/expat-2.1.0-Add-missing-validation-of-encoding.patch

4cc7ea
commit c8abeb6dbc14761866da2d3cf359d795f126b6d8
4cc7ea
Author: Tomas Korbar <tkorbar@redhat.com>
4cc7ea
Date:   Mon Mar 21 12:48:53 2022 +0100
4cc7ea
4cc7ea
    Add missing validation of encoding
4cc7ea
4cc7ea
diff --git a/lib/xmltok.c b/lib/xmltok.c
4cc7ea
index cb98ce1..a080f59 100644
4cc7ea
--- a/lib/xmltok.c
4cc7ea
+++ b/lib/xmltok.c
4cc7ea
@@ -71,13 +71,6 @@
4cc7ea
                       + ((((byte)[2]) >> 5) & 1)] \
4cc7ea
          & (1 << (((byte)[2]) & 0x1F)))
4cc7ea
 
4cc7ea
-#define UTF8_GET_NAMING(pages, p, n) \
4cc7ea
-  ((n) == 2 \
4cc7ea
-  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
4cc7ea
-  : ((n) == 3 \
4cc7ea
-     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
4cc7ea
-     : 0))
4cc7ea
-
4cc7ea
 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
4cc7ea
    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
4cc7ea
    with the additional restriction of not allowing the Unicode
4cc7ea
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
4cc7ea
index c93e2ac..a135514 100644
4cc7ea
--- a/lib/xmltok_impl.c
4cc7ea
+++ b/lib/xmltok_impl.c
4cc7ea
@@ -34,7 +34,7 @@
4cc7ea
    case BT_LEAD ## n: \
4cc7ea
      if (end - ptr < n) \
4cc7ea
        return XML_TOK_PARTIAL_CHAR; \
4cc7ea
-     if (!IS_NAME_CHAR(enc, ptr, n)) { \
4cc7ea
+     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
4cc7ea
        *nextTokPtr = ptr; \
4cc7ea
        return XML_TOK_INVALID; \
4cc7ea
      } \
4cc7ea
@@ -62,7 +62,7 @@
4cc7ea
    case BT_LEAD ## n: \
4cc7ea
      if (end - ptr < n) \
4cc7ea
        return XML_TOK_PARTIAL_CHAR; \
4cc7ea
-     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
4cc7ea
+     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
4cc7ea
        *nextTokPtr = ptr; \
4cc7ea
        return XML_TOK_INVALID; \
4cc7ea
      } \
4cc7ea
@@ -1097,6 +1097,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
4cc7ea
   case BT_LEAD ## n: \
4cc7ea
     if (end - ptr < n) \
4cc7ea
       return XML_TOK_PARTIAL_CHAR; \
4cc7ea
+    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
4cc7ea
+      *nextTokPtr = ptr;                                                       \
4cc7ea
+      return XML_TOK_INVALID;                                                  \
4cc7ea
+    }                                                                          \
4cc7ea
     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
4cc7ea
       ptr += n; \
4cc7ea
       tok = XML_TOK_NAME; \
4cc7ea
diff --git a/tests/runtests.c b/tests/runtests.c
4cc7ea
index 86f8b18..c01f096 100644
4cc7ea
--- a/tests/runtests.c
4cc7ea
+++ b/tests/runtests.c
4cc7ea
@@ -14,6 +14,7 @@
4cc7ea
 #include <string.h>
4cc7ea
 #include <stdint.h>
4cc7ea
 #include <limits.h>
4cc7ea
+#include <stdbool.h>
4cc7ea
 
4cc7ea
 #include "expat.h"
4cc7ea
 #include "chardata.h"
4cc7ea
@@ -82,7 +83,7 @@ _xml_failure(XML_Parser parser, const char *file, int line)
4cc7ea
 
4cc7ea
 static void
4cc7ea
 _expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
4cc7ea
-                char *file, int lineno)
4cc7ea
+                const char *file, int lineno)
4cc7ea
 {
4cc7ea
     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
4cc7ea
         /* Hackish use of _fail_unless() macro, but let's us report
4cc7ea
@@ -1541,6 +1542,13 @@ START_TEST(test_ns_separator_in_uri) {
4cc7ea
 }
4cc7ea
 END_TEST
4cc7ea
 
4cc7ea
+START_TEST(test_bad_doctype_utf8) {
4cc7ea
+  char *text = "
4cc7ea
+                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
4cc7ea
+  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4cc7ea
+                 "Invalid UTF-8 in DOCTYPE not faulted");
4cc7ea
+}
4cc7ea
+END_TEST
4cc7ea
 
4cc7ea
 START_TEST(test_utf8_in_start_tags) {
4cc7ea
   struct test_case {
4cc7ea
@@ -1695,6 +1703,8 @@ make_suite(void)
4cc7ea
     tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
4cc7ea
     tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
4cc7ea
     tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
4cc7ea
+    tcase_add_test(tc_basic, test_utf8_in_start_tags);
4cc7ea
+    tcase_add_test(tc_basic, test_bad_doctype_utf8);
4cc7ea
 
4cc7ea
     suite_add_tcase(s, tc_namespace);
4cc7ea
     tcase_add_checked_fixture(tc_namespace,