Tree - rpms/libxml2 - CentOS Git server

xzyang / rpms / libxml2

Forked from rpms/libxml2 3 years ago

Source
Stats

Blame SOURCES/libxml2-Heap-based-buffer-overread-in-htmlCurrentChar.patch

Blob History Raw

		1c8959	`commit c26d0004e779316830d93120dbfe98f6eee0783b`
		1c8959	`Author: Pranjal Jumde <pjumde@apple.com>`
		1c8959	`Date: Tue Mar 1 15:18:04 2016 -0800`
		1c8959
		1c8959	`Heap-based buffer overread in htmlCurrentChar`
		1c8959
		1c8959	`For https://bugzilla.gnome.org/show_bug.cgi?id=758606`
		1c8959
		1c8959	`* parserInternals.c:`
		1c8959	`(xmlNextChar): Add an test to catch other issues on ctxt->input`
		1c8959	`corruption proactively.`
		1c8959	`For non-UTF-8 charsets, xmlNextChar() failed to check for the end`
		1c8959	`of the input buffer and would continuing reading. Fix this by`
		1c8959	`pulling out the check for the end of the input buffer into common`
		1c8959	`code, and return if we reach the end of the input buffer`
		1c8959	`prematurely.`
		1c8959	`* result/HTML/758606.html: Added.`
		1c8959	`* result/HTML/758606.html.err: Added.`
		1c8959	`* result/HTML/758606.html.sax: Added.`
		1c8959	`* result/HTML/758606_2.html: Added.`
		1c8959	`* result/HTML/758606_2.html.err: Added.`
		1c8959	`* result/HTML/758606_2.html.sax: Added.`
		1c8959	`* test/HTML/758606.html: Added test case.`
		1c8959	`* test/HTML/758606_2.html: Added test case.`
		1c8959
		1c8959	`diff --git a/parserInternals.c b/parserInternals.c`
		1c8959	`index 1fe1f6a..341d6a1 100644`
		1c8959	`--- a/parserInternals.c`
		1c8959	`+++ b/parserInternals.c`
		1c8959	`@@ -55,6 +55,10 @@`
		1c8959	`#include <libxml/globals.h>`
		1c8959	`#include <libxml/chvalid.h>`
		1c8959
		1c8959	`+#define CUR(ctxt) ctxt->input->cur`
		1c8959	`+#define END(ctxt) ctxt->input->end`
		1c8959	`+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))`
		1c8959	`+`
		1c8959	`#include "buf.h"`
		1c8959	`#include "enc.h"`
		1c8959
		1c8959	`@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)`
		1c8959	`(ctxt->input == NULL))`
		1c8959	`return;`
		1c8959
		1c8959	`- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {`
		1c8959	`- if ((*ctxt->input->cur == 0) &&`
		1c8959	`- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&`
		1c8959	`- (ctxt->instate != XML_PARSER_COMMENT)) {`
		1c8959	`- /*`
		1c8959	`- * If we are at the end of the current entity and`
		1c8959	`- * the context allows it, we pop consumed entities`
		1c8959	`- * automatically.`
		1c8959	`- * the auto closing should be blocked in other cases`
		1c8959	`- */`
		1c8959	`+ if (!(VALID_CTXT(ctxt))) {`
		1c8959	`+ xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);`
		1c8959	`+ ctxt->errNo = XML_ERR_INTERNAL_ERROR;`
		1c8959	`+ xmlStopParser(ctxt);`
		1c8959	`+ return;`
		1c8959	`+ }`
		1c8959	`+`
		1c8959	`+ if ((*ctxt->input->cur == 0) &&`
		1c8959	`+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {`
		1c8959	`+ if ((ctxt->instate != XML_PARSER_COMMENT))`
		1c8959	`xmlPopInput(ctxt);`
		1c8959	`- } else {`
		1c8959	`- const unsigned char *cur;`
		1c8959	`- unsigned char c;`
		1c8959	`+ return;`
		1c8959	`+ }`
		1c8959
		1c8959	`- /*`
		1c8959	`- * 2.11 End-of-Line Handling`
		1c8959	`- * the literal two-character sequence "#xD#xA" or a standalone`
		1c8959	`- * literal #xD, an XML processor must pass to the application`
		1c8959	`- * the single character #xA.`
		1c8959	`- */`
		1c8959	`- if (*(ctxt->input->cur) == '\n') {`
		1c8959	`- ctxt->input->line++; ctxt->input->col = 1;`
		1c8959	`- } else`
		1c8959	`- ctxt->input->col++;`
		1c8959	`+ if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {`
		1c8959	`+ const unsigned char *cur;`
		1c8959	`+ unsigned char c;`
		1c8959
		1c8959	`- /*`
		1c8959	`- * We are supposed to handle UTF8, check it's valid`
		1c8959	`- * From rfc2044: encoding of the Unicode values on UTF-8:`
		1c8959	`- *`
		1c8959	`- * UCS-4 range (hex.) UTF-8 octet sequence (binary)`
		1c8959	`- * 0000 0000-0000 007F 0xxxxxxx`
		1c8959	`- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx`
		1c8959	`- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx`
		1c8959	`- *`
		1c8959	`- * Check for the 0x110000 limit too`
		1c8959	`- */`
		1c8959	`- cur = ctxt->input->cur;`
		1c8959	`+ /*`
		1c8959	`+ * 2.11 End-of-Line Handling`
		1c8959	`+ * the literal two-character sequence "#xD#xA" or a standalone`
		1c8959	`+ * literal #xD, an XML processor must pass to the application`
		1c8959	`+ * the single character #xA.`
		1c8959	`+ */`
		1c8959	`+ if (*(ctxt->input->cur) == '\n') {`
		1c8959	`+ ctxt->input->line++; ctxt->input->col = 1;`
		1c8959	`+ } else`
		1c8959	`+ ctxt->input->col++;`
		1c8959
		1c8959	`- c = *cur;`
		1c8959	`- if (c & 0x80) {`
		1c8959	`- if (c == 0xC0)`
		1c8959	`- goto encoding_error;`
		1c8959	`- if (cur[1] == 0) {`
		1c8959	`+ /*`
		1c8959	`+ * We are supposed to handle UTF8, check it's valid`
		1c8959	`+ * From rfc2044: encoding of the Unicode values on UTF-8:`
		1c8959	`+ *`
		1c8959	`+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)`
		1c8959	`+ * 0000 0000-0000 007F 0xxxxxxx`
		1c8959	`+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx`
		1c8959	`+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx`
		1c8959	`+ *`
		1c8959	`+ * Check for the 0x110000 limit too`
		1c8959	`+ */`
		1c8959	`+ cur = ctxt->input->cur;`
		1c8959	`+`
		1c8959	`+ c = *cur;`
		1c8959	`+ if (c & 0x80) {`
		1c8959	`+ if (c == 0xC0)`
		1c8959	`+ goto encoding_error;`
		1c8959	`+ if (cur[1] == 0) {`
		1c8959	`+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);`
		1c8959	`+ cur = ctxt->input->cur;`
		1c8959	`+ }`
		1c8959	`+ if ((cur[1] & 0xc0) != 0x80)`
		1c8959	`+ goto encoding_error;`
		1c8959	`+ if ((c & 0xe0) == 0xe0) {`
		1c8959	`+ unsigned int val;`
		1c8959	`+`
		1c8959	`+ if (cur[2] == 0) {`
		1c8959	`xmlParserInputGrow(ctxt->input, INPUT_CHUNK);`
		1c8959	`cur = ctxt->input->cur;`
		1c8959	`}`
		1c8959	`- if ((cur[1] & 0xc0) != 0x80)`
		1c8959	`+ if ((cur[2] & 0xc0) != 0x80)`
		1c8959	`goto encoding_error;`
		1c8959	`- if ((c & 0xe0) == 0xe0) {`
		1c8959	`- unsigned int val;`
		1c8959	`-`
		1c8959	`- if (cur[2] == 0) {`
		1c8959	`+ if ((c & 0xf0) == 0xf0) {`
		1c8959	`+ if (cur[3] == 0) {`
		1c8959	`xmlParserInputGrow(ctxt->input, INPUT_CHUNK);`
		1c8959	`cur = ctxt->input->cur;`
		1c8959	`}`
		1c8959	`- if ((cur[2] & 0xc0) != 0x80)`
		1c8959	`+ if (((c & 0xf8) != 0xf0) \|\|`
		1c8959	`+ ((cur[3] & 0xc0) != 0x80))`
		1c8959	`goto encoding_error;`
		1c8959	`- if ((c & 0xf0) == 0xf0) {`
		1c8959	`- if (cur[3] == 0) {`
		1c8959	`- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);`
		1c8959	`- cur = ctxt->input->cur;`
		1c8959	`- }`
		1c8959	`- if (((c & 0xf8) != 0xf0) \|\|`
		1c8959	`- ((cur[3] & 0xc0) != 0x80))`
		1c8959	`- goto encoding_error;`
		1c8959	`- /* 4-byte code */`
		1c8959	`- ctxt->input->cur += 4;`
		1c8959	`- val = (cur[0] & 0x7) << 18;`
		1c8959	`- val \|= (cur[1] & 0x3f) << 12;`
		1c8959	`- val \|= (cur[2] & 0x3f) << 6;`
		1c8959	`- val \|= cur[3] & 0x3f;`
		1c8959	`- } else {`
		1c8959	`- /* 3-byte code */`
		1c8959	`- ctxt->input->cur += 3;`
		1c8959	`- val = (cur[0] & 0xf) << 12;`
		1c8959	`- val \|= (cur[1] & 0x3f) << 6;`
		1c8959	`- val \|= cur[2] & 0x3f;`
		1c8959	`- }`
		1c8959	`- if (((val > 0xd7ff) && (val < 0xe000)) \|\|`
		1c8959	`- ((val > 0xfffd) && (val < 0x10000)) \|\|`
		1c8959	`- (val >= 0x110000)) {`
		1c8959	`- xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,`
		1c8959	`- "Char 0x%X out of allowed range\n",`
		1c8959	`- val);`
		1c8959	`- }`
		1c8959	`- } else`
		1c8959	`- /* 2-byte code */`
		1c8959	`- ctxt->input->cur += 2;`
		1c8959	`+ /* 4-byte code */`
		1c8959	`+ ctxt->input->cur += 4;`
		1c8959	`+ val = (cur[0] & 0x7) << 18;`
		1c8959	`+ val \|= (cur[1] & 0x3f) << 12;`
		1c8959	`+ val \|= (cur[2] & 0x3f) << 6;`
		1c8959	`+ val \|= cur[3] & 0x3f;`
		1c8959	`+ } else {`
		1c8959	`+ /* 3-byte code */`
		1c8959	`+ ctxt->input->cur += 3;`
		1c8959	`+ val = (cur[0] & 0xf) << 12;`
		1c8959	`+ val \|= (cur[1] & 0x3f) << 6;`
		1c8959	`+ val \|= cur[2] & 0x3f;`
		1c8959	`+ }`
		1c8959	`+ if (((val > 0xd7ff) && (val < 0xe000)) \|\|`
		1c8959	`+ ((val > 0xfffd) && (val < 0x10000)) \|\|`
		1c8959	`+ (val >= 0x110000)) {`
		1c8959	`+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,`
		1c8959	`+ "Char 0x%X out of allowed range\n",`
		1c8959	`+ val);`
		1c8959	`+ }`
		1c8959	`} else`
		1c8959	`- /* 1-byte code */`
		1c8959	`- ctxt->input->cur++;`
		1c8959	`+ /* 2-byte code */`
		1c8959	`+ ctxt->input->cur += 2;`
		1c8959	`+ } else`
		1c8959	`+ /* 1-byte code */`
		1c8959	`+ ctxt->input->cur++;`
		1c8959
		1c8959	`- ctxt->nbChars++;`
		1c8959	`- if (*ctxt->input->cur == 0)`
		1c8959	`- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);`
		1c8959	`- }`
		1c8959	`+ ctxt->nbChars++;`
		1c8959	`+ if (*ctxt->input->cur == 0)`
		1c8959	`+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);`
		1c8959	`} else {`
		1c8959	`/*`
		1c8959	`* Assume it's a fixed length encoding (1) with`
		1c8959	`diff --git a/result/HTML/758606.html b/result/HTML/758606.html`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..4f21f62`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/result/HTML/758606.html`
		1c8959	`@@ -0,0 +1,2 @@`
		1c8959	`+`
		1c8959	`+`
		1c8959	`diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..060433a`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/result/HTML/758606.html.err`
		1c8959	`@@ -0,0 +1,16 @@`
		1c8959	`+./test/HTML/758606.html:1: HTML parser error : Comment not terminated`
		1c8959	`+`
		1c8959	`+`
		1c8959	`+ ^`
		1c8959	`+./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC`
		1c8959	`+`
		1c8959	`+ ^`
		1c8959	`+./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration`
		1c8959	`+`
		1c8959	`+ ^`
		1c8959	`+./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !`
		1c8959	`+`
		1c8959	`+^`
		1c8959	`+./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated`
		1c8959	`+`
		1c8959	`+^`
		1c8959	`diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..d44a5cf`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/result/HTML/758606.html.sax`
		1c8959	`@@ -0,0 +1,10 @@`
		1c8959	`+SAX.setDocumentLocator()`
		1c8959	`+SAX.startDocument()`
		1c8959	`+SAX.error: Comment not terminated`
		1c8959	`+`
		1c8959	`+SAX.error: Invalid char in CDATA 0xC`
		1c8959	`+SAX.error: Misplaced DOCTYPE declaration`
		1c8959	`+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !`
		1c8959	`+SAX.error: DOCTYPE improperly terminated`
		1c8959	`+SAX.internalSubset((null), , )`
		1c8959	`+SAX.endDocument()`
		1c8959	`diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..273816a`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/result/HTML/758606_2.html`
		1c8959	`@@ -0,0 +1,2 @@`
		1c8959	`+`
		1c8959	`+<html><body>‘ </body></html>`
		1c8959	`diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..4be039f`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/result/HTML/758606_2.html.err`
		1c8959	`@@ -0,0 +1,16 @@`
		1c8959	`+./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated`
		1c8959	`+`
		1c8959	`+`
		1c8959	`+ ^`
		1c8959	`+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC`
		1c8959	`+`
		1c8959	`+ ^`
		1c8959	`+./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration`
		1c8959	`+Â‘`
		1c8959	`+ ^`
		1c8959	`+./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !`
		1c8959	`+`
		1c8959	`+^`
		1c8959	`+./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated`
		1c8959	`+`
		1c8959	`+^`
		1c8959	`diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..80ff3d7`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/result/HTML/758606_2.html.sax`
		1c8959	`@@ -0,0 +1,17 @@`
		1c8959	`+SAX.setDocumentLocator()`
		1c8959	`+SAX.startDocument()`
		1c8959	`+SAX.error: Comment not terminated`
		1c8959	`+`
		1c8959	`+SAX.error: Invalid char in CDATA 0xC`
		1c8959	`+SAX.startElement(html)`
		1c8959	`+SAX.startElement(body)`
		1c8959	`+SAX.startElement(p)`
		1c8959	`+SAX.characters(‘, 2)`
		1c8959	`+SAX.error: Misplaced DOCTYPE declaration`
		1c8959	`+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !`
		1c8959	`+SAX.error: DOCTYPE improperly terminated`
		1c8959	`+SAX.internalSubset((null), , )`
		1c8959	`+SAX.endElement(p)`
		1c8959	`+SAX.endElement(body)`
		1c8959	`+SAX.endElement(html)`
		1c8959	`+SAX.endDocument()`
		1c8959	`diff --git a/test/HTML/758606.html b/test/HTML/758606.html`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..01a013c`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/test/HTML/758606.html`
		1c8959	`@@ -0,0 +1 @@`
		1c8959	`+`
		1c8959	`diff --git a/test/HTML/758606_2.html b/test/HTML/758606_2.html`
		1c8959	`new file mode 100644`
		1c8959	`index 0000000..daa185b`
		1c8959	`--- /dev/null`
		1c8959	`+++ b/test/HTML/758606_2.html`
		1c8959	`@@ -0,0 +1 @@`
		1c8959	`+`

xzyang / rpms / libxml2

Source Code

Blame SOURCES/libxml2-Heap-based-buffer-overread-in-htmlCurrentChar.patch