Tree - rpms/expat - CentOS Git server

rpms / expat

Blame SOURCES/expat-2.2.5-Add-missing-validation-of-encoding.patch

Blob History Raw

		8ad338	`commit e8f285b522a907603501329e5b4212755f525fdf`
		8ad338	`Author: Tomas Korbar <tkorbar@redhat.com>`
		8ad338	`Date: Thu Mar 3 12:04:09 2022 +0100`
		8ad338
		8ad338	`CVE-2022-25235`
		8ad338
		8ad338	`diff --git a/lib/xmltok.c b/lib/xmltok.c`
		8ad338	`index 6b415d8..b55732a 100644`
		8ad338	`--- a/lib/xmltok.c`
		8ad338	`+++ b/lib/xmltok.c`
		8ad338	`@@ -103,13 +103,6 @@`
		8ad338	`+ ((((byte)[2]) >> 5) & 1)] \`
		8ad338	`& (1u << (((byte)[2]) & 0x1F)))`
		8ad338
		8ad338	`-#define UTF8_GET_NAMING(pages, p, n) \`
		8ad338	`- ((n) == 2 \`
		8ad338	`- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \`
		8ad338	`- : ((n) == 3 \`
		8ad338	`- ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \`
		8ad338	`- : 0))`
		8ad338	`-`
		8ad338	`/* Detection of invalid UTF-8 sequences is based on Table 3.1B`
		8ad338	`of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/`
		8ad338	`with the additional restriction of not allowing the Unicode`
		8ad338	`diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c`
		8ad338	`index 0403dd3..56d7a40 100644`
		8ad338	`--- a/lib/xmltok_impl.c`
		8ad338	`+++ b/lib/xmltok_impl.c`
		8ad338	`@@ -61,7 +61,7 @@`
		8ad338	`case BT_LEAD ## n: \`
		8ad338	`if (end - ptr < n) \`
		8ad338	`return XML_TOK_PARTIAL_CHAR; \`
		8ad338	`- if (!IS_NAME_CHAR(enc, ptr, n)) { \`
		8ad338	`+ if (IS_INVALID_CHAR(enc, ptr, n) \|\| ! IS_NAME_CHAR(enc, ptr, n)) { \`
		8ad338	`*nextTokPtr = ptr; \`
		8ad338	`return XML_TOK_INVALID; \`
		8ad338	`} \`
		8ad338	`@@ -89,7 +89,7 @@`
		8ad338	`case BT_LEAD ## n: \`
		8ad338	`if (end - ptr < n) \`
		8ad338	`return XML_TOK_PARTIAL_CHAR; \`
		8ad338	`- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \`
		8ad338	`+ if (IS_INVALID_CHAR(enc, ptr, n) \|\| ! IS_NMSTRT_CHAR(enc, ptr, n)) { \`
		8ad338	`*nextTokPtr = ptr; \`
		8ad338	`return XML_TOK_INVALID; \`
		8ad338	`} \`
		8ad338	`@@ -1117,6 +1117,10 @@ PREFIX(prologTok)(const ENCODING enc, const char ptr, const char *end,`
		8ad338	`case BT_LEAD ## n: \`
		8ad338	`if (end - ptr < n) \`
		8ad338	`return XML_TOK_PARTIAL_CHAR; \`
		8ad338	`+ if (IS_INVALID_CHAR(enc, ptr, n)) { \`
		8ad338	`+ *nextTokPtr = ptr; \`
		8ad338	`+ return XML_TOK_INVALID; \`
		8ad338	`+ } \`
		8ad338	`if (IS_NMSTRT_CHAR(enc, ptr, n)) { \`
		8ad338	`ptr += n; \`
		8ad338	`tok = XML_TOK_NAME; \`
		8ad338	`diff --git a/tests/runtests.c b/tests/runtests.c`
		8ad338	`index 278bfa1..0f3afde 100644`
		8ad338	`--- a/tests/runtests.c`
		8ad338	`+++ b/tests/runtests.c`
		8ad338	`@@ -6540,6 +6540,106 @@ START_TEST(test_utf8_in_cdata_section_2)`
		8ad338	`}`
		8ad338	`END_TEST`
		8ad338
		8ad338	`+START_TEST(test_utf8_in_start_tags) {`
		8ad338	`+ struct test_case {`
		8ad338	`+ bool goodName;`
		8ad338	`+ bool goodNameStart;`
		8ad338	`+ const char *tagName;`
		8ad338	`+ };`
		8ad338	`+`
		8ad338	`+ // The idea with the tests below is this:`
		8ad338	`+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences`
		8ad338	`+ // go to isNever and are hence not a concern.`
		8ad338	`+ //`
		8ad338	`+ // We start with a character that is a valid name character`
		8ad338	`+ // (or even name-start character, see XML 1.0r4 spec) and then we flip`
		8ad338	`+ // single bits at places where (1) the result leaves the UTF-8 encoding space`
		8ad338	`+ // and (2) we stay in the same n-byte sequence family.`
		8ad338	`+ //`
		8ad338	`+ // The flipped bits are highlighted in angle brackets in comments,`
		8ad338	`+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped`
		8ad338	`+ // the most significant bit to 1 to leave UTF-8 encoding space.`
		8ad338	`+ struct test_case cases[] = {`
		8ad338	`+ // 1-byte UTF-8: [0xxx xxxx]`
		8ad338	`+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'`
		8ad338	`+ {false, false, "\xBA"}, // [<1>011 1010]`
		8ad338	`+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'`
		8ad338	`+ {false, false, "\xB9"}, // [<1>011 1001]`
		8ad338	`+`
		8ad338	`+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]`
		8ad338	`+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =`
		8ad338	`+ // Arabic small waw U+06E5`
		8ad338	`+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]`
		8ad338	`+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]`
		8ad338	`+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]`
		8ad338	`+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =`
		8ad338	`+ // combining char U+0301`
		8ad338	`+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]`
		8ad338	`+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]`
		8ad338	`+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]`
		8ad338	`+`
		8ad338	`+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]`
		8ad338	`+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =`
		8ad338	`+ // Devanagari Letter A U+0905`
		8ad338	`+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]`
		8ad338	`+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]`
		8ad338	`+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]`
		8ad338	`+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]`
		8ad338	`+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]`
		8ad338	`+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =`
		8ad338	`+ // combining char U+0901`
		8ad338	`+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]`
		8ad338	`+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]`
		8ad338	`+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]`
		8ad338	`+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]`
		8ad338	`+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]`
		8ad338	`+ };`
		8ad338	`+ const bool atNameStart[] = {true, false};`
		8ad338	`+`
		8ad338	`+ size_t i = 0;`
		8ad338	`+ char doc[1024];`
		8ad338	`+ size_t failCount = 0;`
		8ad338	`+`
		8ad338	`+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {`
		8ad338	`+ size_t j = 0;`
		8ad338	`+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {`
		8ad338	`+ const bool expectedSuccess`
		8ad338	`+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;`
		8ad338	`+ sprintf(doc, "<%s%s>`
		8ad338	`+ XML_Parser parser = XML_ParserCreate(NULL);`
		8ad338	`+`
		8ad338	`+ const enum XML_Status status`
		8ad338	`+ = XML_Parse(parser, doc, (int)strlen(doc), /isFinal=/XML_FALSE);`
		8ad338	`+`
		8ad338	`+ bool success = true;`
		8ad338	`+ if ((status == XML_STATUS_OK) != expectedSuccess) {`
		8ad338	`+ success = false;`
		8ad338	`+ }`
		8ad338	`+ if ((status == XML_STATUS_ERROR)`
		8ad338	`+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {`
		8ad338	`+ success = false;`
		8ad338	`+ }`
		8ad338	`+`
		8ad338	`+ if (! success) {`
		8ad338	`+ fprintf(`
		8ad338	`+ stderr,`
		8ad338	`+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",`
		8ad338	`+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",`
		8ad338	`+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));`
		8ad338	`+ failCount++;`
		8ad338	`+ }`
		8ad338	`+`
		8ad338	`+ XML_ParserFree(parser);`
		8ad338	`+ }`
		8ad338	`+ }`
		8ad338	`+`
		8ad338	`+ if (failCount > 0) {`
		8ad338	`+ fail("UTF-8 regression detected");`
		8ad338	`+ }`
		8ad338	`+}`
		8ad338	`+END_TEST`
		8ad338	`+`
		8ad338	`+`
		8ad338	`/* Test trailing spaces in elements are accepted */`
		8ad338	`static void XMLCALL`
		8ad338	`record_element_end_handler(void *userData,`
		8ad338	`@@ -6734,6 +6834,15 @@ START_TEST(test_bad_doctype)`
		8ad338	`}`
		8ad338	`END_TEST`
		8ad338
		8ad338	`+START_TEST(test_bad_doctype_utf8) {`
		8ad338	`+ const char *text = "`
		8ad338	`+ "doc><doc/>"; // [1101 1011] [<0>010 0101]`
		8ad338	`+ expect_failure(text, XML_ERROR_INVALID_TOKEN,`
		8ad338	`+ "Invalid UTF-8 in DOCTYPE not faulted");`
		8ad338	`+}`
		8ad338	`+END_TEST`
		8ad338	`+`
		8ad338	`+`
		8ad338	`START_TEST(test_bad_doctype_utf16)`
		8ad338	`{`
		8ad338	`const char text[] =`
		8ad338	`@@ -12256,6 +12365,7 @@ make_suite(void)`
		8ad338	`tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);`
		8ad338	`tcase_add_test(tc_basic, test_utf8_in_cdata_section);`
		8ad338	`tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);`
		8ad338	`+ tcase_add_test(tc_basic, test_utf8_in_start_tags);`
		8ad338	`tcase_add_test(tc_basic, test_trailing_spaces_in_elements);`
		8ad338	`tcase_add_test(tc_basic, test_utf16_attribute);`
		8ad338	`tcase_add_test(tc_basic, test_utf16_second_attr);`
		8ad338	`@@ -12264,6 +12374,7 @@ make_suite(void)`
		8ad338	`tcase_add_test(tc_basic, test_bad_attr_desc_keyword);`
		8ad338	`tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);`
		8ad338	`tcase_add_test(tc_basic, test_bad_doctype);`
		8ad338	`+ tcase_add_test(tc_basic, test_bad_doctype_utf8);`
		8ad338	`tcase_add_test(tc_basic, test_bad_doctype_utf16);`
		8ad338	`tcase_add_test(tc_basic, test_bad_doctype_plus);`
		8ad338	`tcase_add_test(tc_basic, test_bad_doctype_star);`

rpms / expat

Source Code

Blame SOURCES/expat-2.2.5-Add-missing-validation-of-encoding.patch