Tree - rpms/expat - CentOS Git server

rpms / expat

Blame SOURCES/expat-2.2.5-Add-missing-validation-of-encoding.patch

Blob History Raw

		afa004	`commit e8f285b522a907603501329e5b4212755f525fdf`
		afa004	`Author: Tomas Korbar <tkorbar@redhat.com>`
		afa004	`Date: Thu Mar 3 12:04:09 2022 +0100`
		afa004
		afa004	`CVE-2022-25235`
		afa004
		afa004	`diff --git a/lib/xmltok.c b/lib/xmltok.c`
		afa004	`index 6b415d8..b55732a 100644`
		afa004	`--- a/lib/xmltok.c`
		afa004	`+++ b/lib/xmltok.c`
		afa004	`@@ -103,13 +103,6 @@`
		afa004	`+ ((((byte)[2]) >> 5) & 1)] \`
		afa004	`& (1u << (((byte)[2]) & 0x1F)))`
		afa004
		afa004	`-#define UTF8_GET_NAMING(pages, p, n) \`
		afa004	`- ((n) == 2 \`
		afa004	`- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \`
		afa004	`- : ((n) == 3 \`
		afa004	`- ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \`
		afa004	`- : 0))`
		afa004	`-`
		afa004	`/* Detection of invalid UTF-8 sequences is based on Table 3.1B`
		afa004	`of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/`
		afa004	`with the additional restriction of not allowing the Unicode`
		afa004	`diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c`
		afa004	`index 0403dd3..56d7a40 100644`
		afa004	`--- a/lib/xmltok_impl.c`
		afa004	`+++ b/lib/xmltok_impl.c`
		afa004	`@@ -61,7 +61,7 @@`
		afa004	`case BT_LEAD ## n: \`
		afa004	`if (end - ptr < n) \`
		afa004	`return XML_TOK_PARTIAL_CHAR; \`
		afa004	`- if (!IS_NAME_CHAR(enc, ptr, n)) { \`
		afa004	`+ if (IS_INVALID_CHAR(enc, ptr, n) \|\| ! IS_NAME_CHAR(enc, ptr, n)) { \`
		afa004	`*nextTokPtr = ptr; \`
		afa004	`return XML_TOK_INVALID; \`
		afa004	`} \`
		afa004	`@@ -89,7 +89,7 @@`
		afa004	`case BT_LEAD ## n: \`
		afa004	`if (end - ptr < n) \`
		afa004	`return XML_TOK_PARTIAL_CHAR; \`
		afa004	`- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \`
		afa004	`+ if (IS_INVALID_CHAR(enc, ptr, n) \|\| ! IS_NMSTRT_CHAR(enc, ptr, n)) { \`
		afa004	`*nextTokPtr = ptr; \`
		afa004	`return XML_TOK_INVALID; \`
		afa004	`} \`
		afa004	`@@ -1117,6 +1117,10 @@ PREFIX(prologTok)(const ENCODING enc, const char ptr, const char *end,`
		afa004	`case BT_LEAD ## n: \`
		afa004	`if (end - ptr < n) \`
		afa004	`return XML_TOK_PARTIAL_CHAR; \`
		afa004	`+ if (IS_INVALID_CHAR(enc, ptr, n)) { \`
		afa004	`+ *nextTokPtr = ptr; \`
		afa004	`+ return XML_TOK_INVALID; \`
		afa004	`+ } \`
		afa004	`if (IS_NMSTRT_CHAR(enc, ptr, n)) { \`
		afa004	`ptr += n; \`
		afa004	`tok = XML_TOK_NAME; \`
		afa004	`diff --git a/tests/runtests.c b/tests/runtests.c`
		afa004	`index 278bfa1..0f3afde 100644`
		afa004	`--- a/tests/runtests.c`
		afa004	`+++ b/tests/runtests.c`
		afa004	`@@ -6540,6 +6540,106 @@ START_TEST(test_utf8_in_cdata_section_2)`
		afa004	`}`
		afa004	`END_TEST`
		afa004
		afa004	`+START_TEST(test_utf8_in_start_tags) {`
		afa004	`+ struct test_case {`
		afa004	`+ bool goodName;`
		afa004	`+ bool goodNameStart;`
		afa004	`+ const char *tagName;`
		afa004	`+ };`
		afa004	`+`
		afa004	`+ // The idea with the tests below is this:`
		afa004	`+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences`
		afa004	`+ // go to isNever and are hence not a concern.`
		afa004	`+ //`
		afa004	`+ // We start with a character that is a valid name character`
		afa004	`+ // (or even name-start character, see XML 1.0r4 spec) and then we flip`
		afa004	`+ // single bits at places where (1) the result leaves the UTF-8 encoding space`
		afa004	`+ // and (2) we stay in the same n-byte sequence family.`
		afa004	`+ //`
		afa004	`+ // The flipped bits are highlighted in angle brackets in comments,`
		afa004	`+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped`
		afa004	`+ // the most significant bit to 1 to leave UTF-8 encoding space.`
		afa004	`+ struct test_case cases[] = {`
		afa004	`+ // 1-byte UTF-8: [0xxx xxxx]`
		afa004	`+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'`
		afa004	`+ {false, false, "\xBA"}, // [<1>011 1010]`
		afa004	`+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'`
		afa004	`+ {false, false, "\xB9"}, // [<1>011 1001]`
		afa004	`+`
		afa004	`+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]`
		afa004	`+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =`
		afa004	`+ // Arabic small waw U+06E5`
		afa004	`+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]`
		afa004	`+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]`
		afa004	`+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]`
		afa004	`+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =`
		afa004	`+ // combining char U+0301`
		afa004	`+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]`
		afa004	`+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]`
		afa004	`+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]`
		afa004	`+`
		afa004	`+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]`
		afa004	`+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =`
		afa004	`+ // Devanagari Letter A U+0905`
		afa004	`+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]`
		afa004	`+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]`
		afa004	`+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]`
		afa004	`+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]`
		afa004	`+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]`
		afa004	`+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =`
		afa004	`+ // combining char U+0901`
		afa004	`+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]`
		afa004	`+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]`
		afa004	`+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]`
		afa004	`+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]`
		afa004	`+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]`
		afa004	`+ };`
		afa004	`+ const bool atNameStart[] = {true, false};`
		afa004	`+`
		afa004	`+ size_t i = 0;`
		afa004	`+ char doc[1024];`
		afa004	`+ size_t failCount = 0;`
		afa004	`+`
		afa004	`+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {`
		afa004	`+ size_t j = 0;`
		afa004	`+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {`
		afa004	`+ const bool expectedSuccess`
		afa004	`+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;`
		afa004	`+ sprintf(doc, "<%s%s>`
		afa004	`+ XML_Parser parser = XML_ParserCreate(NULL);`
		afa004	`+`
		afa004	`+ const enum XML_Status status`
		afa004	`+ = XML_Parse(parser, doc, (int)strlen(doc), /isFinal=/XML_FALSE);`
		afa004	`+`
		afa004	`+ bool success = true;`
		afa004	`+ if ((status == XML_STATUS_OK) != expectedSuccess) {`
		afa004	`+ success = false;`
		afa004	`+ }`
		afa004	`+ if ((status == XML_STATUS_ERROR)`
		afa004	`+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {`
		afa004	`+ success = false;`
		afa004	`+ }`
		afa004	`+`
		afa004	`+ if (! success) {`
		afa004	`+ fprintf(`
		afa004	`+ stderr,`
		afa004	`+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",`
		afa004	`+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",`
		afa004	`+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));`
		afa004	`+ failCount++;`
		afa004	`+ }`
		afa004	`+`
		afa004	`+ XML_ParserFree(parser);`
		afa004	`+ }`
		afa004	`+ }`
		afa004	`+`
		afa004	`+ if (failCount > 0) {`
		afa004	`+ fail("UTF-8 regression detected");`
		afa004	`+ }`
		afa004	`+}`
		afa004	`+END_TEST`
		afa004	`+`
		afa004	`+`
		afa004	`/* Test trailing spaces in elements are accepted */`
		afa004	`static void XMLCALL`
		afa004	`record_element_end_handler(void *userData,`
		afa004	`@@ -6734,6 +6834,15 @@ START_TEST(test_bad_doctype)`
		afa004	`}`
		afa004	`END_TEST`
		afa004
		afa004	`+START_TEST(test_bad_doctype_utf8) {`
		afa004	`+ const char *text = "`
		afa004	`+ "doc><doc/>"; // [1101 1011] [<0>010 0101]`
		afa004	`+ expect_failure(text, XML_ERROR_INVALID_TOKEN,`
		afa004	`+ "Invalid UTF-8 in DOCTYPE not faulted");`
		afa004	`+}`
		afa004	`+END_TEST`
		afa004	`+`
		afa004	`+`
		afa004	`START_TEST(test_bad_doctype_utf16)`
		afa004	`{`
		afa004	`const char text[] =`
		afa004	`@@ -12256,6 +12365,7 @@ make_suite(void)`
		afa004	`tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);`
		afa004	`tcase_add_test(tc_basic, test_utf8_in_cdata_section);`
		afa004	`tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);`
		afa004	`+ tcase_add_test(tc_basic, test_utf8_in_start_tags);`
		afa004	`tcase_add_test(tc_basic, test_trailing_spaces_in_elements);`
		afa004	`tcase_add_test(tc_basic, test_utf16_attribute);`
		afa004	`tcase_add_test(tc_basic, test_utf16_second_attr);`
		afa004	`@@ -12264,6 +12374,7 @@ make_suite(void)`
		afa004	`tcase_add_test(tc_basic, test_bad_attr_desc_keyword);`
		afa004	`tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);`
		afa004	`tcase_add_test(tc_basic, test_bad_doctype);`
		afa004	`+ tcase_add_test(tc_basic, test_bad_doctype_utf8);`
		afa004	`tcase_add_test(tc_basic, test_bad_doctype_utf16);`
		afa004	`tcase_add_test(tc_basic, test_bad_doctype_plus);`
		afa004	`tcase_add_test(tc_basic, test_bad_doctype_star);`

rpms / expat

Source Code

Blame SOURCES/expat-2.2.5-Add-missing-validation-of-encoding.patch