|
|
6dedca |
commit c26d0004e779316830d93120dbfe98f6eee0783b
|
|
|
6dedca |
Author: Pranjal Jumde <pjumde@apple.com>
|
|
|
6dedca |
Date: Tue Mar 1 15:18:04 2016 -0800
|
|
|
6dedca |
|
|
|
6dedca |
Heap-based buffer overread in htmlCurrentChar
|
|
|
6dedca |
|
|
|
6dedca |
For https://bugzilla.gnome.org/show_bug.cgi?id=758606
|
|
|
6dedca |
|
|
|
6dedca |
* parserInternals.c:
|
|
|
6dedca |
(xmlNextChar): Add an test to catch other issues on ctxt->input
|
|
|
6dedca |
corruption proactively.
|
|
|
6dedca |
For non-UTF-8 charsets, xmlNextChar() failed to check for the end
|
|
|
6dedca |
of the input buffer and would continuing reading. Fix this by
|
|
|
6dedca |
pulling out the check for the end of the input buffer into common
|
|
|
6dedca |
code, and return if we reach the end of the input buffer
|
|
|
6dedca |
prematurely.
|
|
|
6dedca |
* result/HTML/758606.html: Added.
|
|
|
6dedca |
* result/HTML/758606.html.err: Added.
|
|
|
6dedca |
* result/HTML/758606.html.sax: Added.
|
|
|
6dedca |
* result/HTML/758606_2.html: Added.
|
|
|
6dedca |
* result/HTML/758606_2.html.err: Added.
|
|
|
6dedca |
* result/HTML/758606_2.html.sax: Added.
|
|
|
6dedca |
* test/HTML/758606.html: Added test case.
|
|
|
6dedca |
* test/HTML/758606_2.html: Added test case.
|
|
|
6dedca |
|
|
|
6dedca |
diff --git a/parserInternals.c b/parserInternals.c
|
|
|
6dedca |
index 1fe1f6a..341d6a1 100644
|
|
|
6dedca |
--- a/parserInternals.c
|
|
|
6dedca |
+++ b/parserInternals.c
|
|
|
6dedca |
@@ -55,6 +55,10 @@
|
|
|
6dedca |
#include <libxml/globals.h>
|
|
|
6dedca |
#include <libxml/chvalid.h>
|
|
|
6dedca |
|
|
|
6dedca |
+#define CUR(ctxt) ctxt->input->cur
|
|
|
6dedca |
+#define END(ctxt) ctxt->input->end
|
|
|
6dedca |
+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
|
|
|
6dedca |
+
|
|
|
6dedca |
#include "buf.h"
|
|
|
6dedca |
#include "enc.h"
|
|
|
6dedca |
|
|
|
6dedca |
@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
|
|
|
6dedca |
(ctxt->input == NULL))
|
|
|
6dedca |
return;
|
|
|
6dedca |
|
|
|
6dedca |
- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
|
|
|
6dedca |
- if ((*ctxt->input->cur == 0) &&
|
|
|
6dedca |
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
|
|
|
6dedca |
- (ctxt->instate != XML_PARSER_COMMENT)) {
|
|
|
6dedca |
- /*
|
|
|
6dedca |
- * If we are at the end of the current entity and
|
|
|
6dedca |
- * the context allows it, we pop consumed entities
|
|
|
6dedca |
- * automatically.
|
|
|
6dedca |
- * the auto closing should be blocked in other cases
|
|
|
6dedca |
- */
|
|
|
6dedca |
+ if (!(VALID_CTXT(ctxt))) {
|
|
|
6dedca |
+ xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
|
|
|
6dedca |
+ ctxt->errNo = XML_ERR_INTERNAL_ERROR;
|
|
|
6dedca |
+ xmlStopParser(ctxt);
|
|
|
6dedca |
+ return;
|
|
|
6dedca |
+ }
|
|
|
6dedca |
+
|
|
|
6dedca |
+ if ((*ctxt->input->cur == 0) &&
|
|
|
6dedca |
+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
|
|
|
6dedca |
+ if ((ctxt->instate != XML_PARSER_COMMENT))
|
|
|
6dedca |
xmlPopInput(ctxt);
|
|
|
6dedca |
- } else {
|
|
|
6dedca |
- const unsigned char *cur;
|
|
|
6dedca |
- unsigned char c;
|
|
|
6dedca |
+ return;
|
|
|
6dedca |
+ }
|
|
|
6dedca |
|
|
|
6dedca |
- /*
|
|
|
6dedca |
- * 2.11 End-of-Line Handling
|
|
|
6dedca |
- * the literal two-character sequence "#xD#xA" or a standalone
|
|
|
6dedca |
- * literal #xD, an XML processor must pass to the application
|
|
|
6dedca |
- * the single character #xA.
|
|
|
6dedca |
- */
|
|
|
6dedca |
- if (*(ctxt->input->cur) == '\n') {
|
|
|
6dedca |
- ctxt->input->line++; ctxt->input->col = 1;
|
|
|
6dedca |
- } else
|
|
|
6dedca |
- ctxt->input->col++;
|
|
|
6dedca |
+ if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
|
|
|
6dedca |
+ const unsigned char *cur;
|
|
|
6dedca |
+ unsigned char c;
|
|
|
6dedca |
|
|
|
6dedca |
- /*
|
|
|
6dedca |
- * We are supposed to handle UTF8, check it's valid
|
|
|
6dedca |
- * From rfc2044: encoding of the Unicode values on UTF-8:
|
|
|
6dedca |
- *
|
|
|
6dedca |
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
|
|
6dedca |
- * 0000 0000-0000 007F 0xxxxxxx
|
|
|
6dedca |
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
|
|
6dedca |
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
6dedca |
- *
|
|
|
6dedca |
- * Check for the 0x110000 limit too
|
|
|
6dedca |
- */
|
|
|
6dedca |
- cur = ctxt->input->cur;
|
|
|
6dedca |
+ /*
|
|
|
6dedca |
+ * 2.11 End-of-Line Handling
|
|
|
6dedca |
+ * the literal two-character sequence "#xD#xA" or a standalone
|
|
|
6dedca |
+ * literal #xD, an XML processor must pass to the application
|
|
|
6dedca |
+ * the single character #xA.
|
|
|
6dedca |
+ */
|
|
|
6dedca |
+ if (*(ctxt->input->cur) == '\n') {
|
|
|
6dedca |
+ ctxt->input->line++; ctxt->input->col = 1;
|
|
|
6dedca |
+ } else
|
|
|
6dedca |
+ ctxt->input->col++;
|
|
|
6dedca |
|
|
|
6dedca |
- c = *cur;
|
|
|
6dedca |
- if (c & 0x80) {
|
|
|
6dedca |
- if (c == 0xC0)
|
|
|
6dedca |
- goto encoding_error;
|
|
|
6dedca |
- if (cur[1] == 0) {
|
|
|
6dedca |
+ /*
|
|
|
6dedca |
+ * We are supposed to handle UTF8, check it's valid
|
|
|
6dedca |
+ * From rfc2044: encoding of the Unicode values on UTF-8:
|
|
|
6dedca |
+ *
|
|
|
6dedca |
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
|
|
6dedca |
+ * 0000 0000-0000 007F 0xxxxxxx
|
|
|
6dedca |
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
|
|
6dedca |
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
6dedca |
+ *
|
|
|
6dedca |
+ * Check for the 0x110000 limit too
|
|
|
6dedca |
+ */
|
|
|
6dedca |
+ cur = ctxt->input->cur;
|
|
|
6dedca |
+
|
|
|
6dedca |
+ c = *cur;
|
|
|
6dedca |
+ if (c & 0x80) {
|
|
|
6dedca |
+ if (c == 0xC0)
|
|
|
6dedca |
+ goto encoding_error;
|
|
|
6dedca |
+ if (cur[1] == 0) {
|
|
|
6dedca |
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
|
|
6dedca |
+ cur = ctxt->input->cur;
|
|
|
6dedca |
+ }
|
|
|
6dedca |
+ if ((cur[1] & 0xc0) != 0x80)
|
|
|
6dedca |
+ goto encoding_error;
|
|
|
6dedca |
+ if ((c & 0xe0) == 0xe0) {
|
|
|
6dedca |
+ unsigned int val;
|
|
|
6dedca |
+
|
|
|
6dedca |
+ if (cur[2] == 0) {
|
|
|
6dedca |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
|
|
6dedca |
cur = ctxt->input->cur;
|
|
|
6dedca |
}
|
|
|
6dedca |
- if ((cur[1] & 0xc0) != 0x80)
|
|
|
6dedca |
+ if ((cur[2] & 0xc0) != 0x80)
|
|
|
6dedca |
goto encoding_error;
|
|
|
6dedca |
- if ((c & 0xe0) == 0xe0) {
|
|
|
6dedca |
- unsigned int val;
|
|
|
6dedca |
-
|
|
|
6dedca |
- if (cur[2] == 0) {
|
|
|
6dedca |
+ if ((c & 0xf0) == 0xf0) {
|
|
|
6dedca |
+ if (cur[3] == 0) {
|
|
|
6dedca |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
|
|
6dedca |
cur = ctxt->input->cur;
|
|
|
6dedca |
}
|
|
|
6dedca |
- if ((cur[2] & 0xc0) != 0x80)
|
|
|
6dedca |
+ if (((c & 0xf8) != 0xf0) ||
|
|
|
6dedca |
+ ((cur[3] & 0xc0) != 0x80))
|
|
|
6dedca |
goto encoding_error;
|
|
|
6dedca |
- if ((c & 0xf0) == 0xf0) {
|
|
|
6dedca |
- if (cur[3] == 0) {
|
|
|
6dedca |
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
|
|
6dedca |
- cur = ctxt->input->cur;
|
|
|
6dedca |
- }
|
|
|
6dedca |
- if (((c & 0xf8) != 0xf0) ||
|
|
|
6dedca |
- ((cur[3] & 0xc0) != 0x80))
|
|
|
6dedca |
- goto encoding_error;
|
|
|
6dedca |
- /* 4-byte code */
|
|
|
6dedca |
- ctxt->input->cur += 4;
|
|
|
6dedca |
- val = (cur[0] & 0x7) << 18;
|
|
|
6dedca |
- val |= (cur[1] & 0x3f) << 12;
|
|
|
6dedca |
- val |= (cur[2] & 0x3f) << 6;
|
|
|
6dedca |
- val |= cur[3] & 0x3f;
|
|
|
6dedca |
- } else {
|
|
|
6dedca |
- /* 3-byte code */
|
|
|
6dedca |
- ctxt->input->cur += 3;
|
|
|
6dedca |
- val = (cur[0] & 0xf) << 12;
|
|
|
6dedca |
- val |= (cur[1] & 0x3f) << 6;
|
|
|
6dedca |
- val |= cur[2] & 0x3f;
|
|
|
6dedca |
- }
|
|
|
6dedca |
- if (((val > 0xd7ff) && (val < 0xe000)) ||
|
|
|
6dedca |
- ((val > 0xfffd) && (val < 0x10000)) ||
|
|
|
6dedca |
- (val >= 0x110000)) {
|
|
|
6dedca |
- xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
|
|
|
6dedca |
- "Char 0x%X out of allowed range\n",
|
|
|
6dedca |
- val);
|
|
|
6dedca |
- }
|
|
|
6dedca |
- } else
|
|
|
6dedca |
- /* 2-byte code */
|
|
|
6dedca |
- ctxt->input->cur += 2;
|
|
|
6dedca |
+ /* 4-byte code */
|
|
|
6dedca |
+ ctxt->input->cur += 4;
|
|
|
6dedca |
+ val = (cur[0] & 0x7) << 18;
|
|
|
6dedca |
+ val |= (cur[1] & 0x3f) << 12;
|
|
|
6dedca |
+ val |= (cur[2] & 0x3f) << 6;
|
|
|
6dedca |
+ val |= cur[3] & 0x3f;
|
|
|
6dedca |
+ } else {
|
|
|
6dedca |
+ /* 3-byte code */
|
|
|
6dedca |
+ ctxt->input->cur += 3;
|
|
|
6dedca |
+ val = (cur[0] & 0xf) << 12;
|
|
|
6dedca |
+ val |= (cur[1] & 0x3f) << 6;
|
|
|
6dedca |
+ val |= cur[2] & 0x3f;
|
|
|
6dedca |
+ }
|
|
|
6dedca |
+ if (((val > 0xd7ff) && (val < 0xe000)) ||
|
|
|
6dedca |
+ ((val > 0xfffd) && (val < 0x10000)) ||
|
|
|
6dedca |
+ (val >= 0x110000)) {
|
|
|
6dedca |
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
|
|
|
6dedca |
+ "Char 0x%X out of allowed range\n",
|
|
|
6dedca |
+ val);
|
|
|
6dedca |
+ }
|
|
|
6dedca |
} else
|
|
|
6dedca |
- /* 1-byte code */
|
|
|
6dedca |
- ctxt->input->cur++;
|
|
|
6dedca |
+ /* 2-byte code */
|
|
|
6dedca |
+ ctxt->input->cur += 2;
|
|
|
6dedca |
+ } else
|
|
|
6dedca |
+ /* 1-byte code */
|
|
|
6dedca |
+ ctxt->input->cur++;
|
|
|
6dedca |
|
|
|
6dedca |
- ctxt->nbChars++;
|
|
|
6dedca |
- if (*ctxt->input->cur == 0)
|
|
|
6dedca |
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
|
|
6dedca |
- }
|
|
|
6dedca |
+ ctxt->nbChars++;
|
|
|
6dedca |
+ if (*ctxt->input->cur == 0)
|
|
|
6dedca |
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
|
|
6dedca |
} else {
|
|
|
6dedca |
/*
|
|
|
6dedca |
* Assume it's a fixed length encoding (1) with
|
|
|
6dedca |
diff --git a/result/HTML/758606.html b/result/HTML/758606.html
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..4f21f62
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/result/HTML/758606.html
|
|
|
6dedca |
@@ -0,0 +1,2 @@
|
|
|
6dedca |
+
|
|
|
6dedca |
+
|
|
|
6dedca |
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..060433a
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/result/HTML/758606.html.err
|
|
|
6dedca |
@@ -0,0 +1,16 @@
|
|
|
6dedca |
+./test/HTML/758606.html:1: HTML parser error : Comment not terminated
|
|
|
6dedca |
+
|
|
|
6dedca |
+
|
|
|
6dedca |
+ ^
|
|
|
6dedca |
+./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC
|
|
|
6dedca |
+
|
|
|
6dedca |
+ ^
|
|
|
6dedca |
+./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration
|
|
|
6dedca |
+
|
|
|
6dedca |
+ ^
|
|
|
6dedca |
+./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
|
|
|
6dedca |
+
|
|
|
6dedca |
+^
|
|
|
6dedca |
+./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated
|
|
|
6dedca |
+
|
|
|
6dedca |
+^
|
|
|
6dedca |
diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..d44a5cf
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/result/HTML/758606.html.sax
|
|
|
6dedca |
@@ -0,0 +1,10 @@
|
|
|
6dedca |
+SAX.setDocumentLocator()
|
|
|
6dedca |
+SAX.startDocument()
|
|
|
6dedca |
+SAX.error: Comment not terminated
|
|
|
6dedca |
+
|
|
|
6dedca |
+SAX.error: Invalid char in CDATA 0xC
|
|
|
6dedca |
+SAX.error: Misplaced DOCTYPE declaration
|
|
|
6dedca |
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
|
|
|
6dedca |
+SAX.error: DOCTYPE improperly terminated
|
|
|
6dedca |
+SAX.internalSubset((null), , )
|
|
|
6dedca |
+SAX.endDocument()
|
|
|
6dedca |
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..273816a
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/result/HTML/758606_2.html
|
|
|
6dedca |
@@ -0,0 +1,2 @@
|
|
|
6dedca |
+
|
|
|
6dedca |
+<html><body> </body></html>
|
|
|
6dedca |
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..4be039f
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/result/HTML/758606_2.html.err
|
|
|
6dedca |
@@ -0,0 +1,16 @@
|
|
|
6dedca |
+./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated
|
|
|
6dedca |
+
|
|
|
6dedca |
+
|
|
|
6dedca |
+ ^
|
|
|
6dedca |
+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC
|
|
|
6dedca |
+
|
|
|
6dedca |
+ ^
|
|
|
6dedca |
+./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration
|
|
|
6dedca |
+‘
|
|
|
6dedca |
+ ^
|
|
|
6dedca |
+./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
|
|
|
6dedca |
+
|
|
|
6dedca |
+^
|
|
|
6dedca |
+./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated
|
|
|
6dedca |
+
|
|
|
6dedca |
+^
|
|
|
6dedca |
diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..80ff3d7
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/result/HTML/758606_2.html.sax
|
|
|
6dedca |
@@ -0,0 +1,17 @@
|
|
|
6dedca |
+SAX.setDocumentLocator()
|
|
|
6dedca |
+SAX.startDocument()
|
|
|
6dedca |
+SAX.error: Comment not terminated
|
|
|
6dedca |
+
|
|
|
6dedca |
+SAX.error: Invalid char in CDATA 0xC
|
|
|
6dedca |
+SAX.startElement(html)
|
|
|
6dedca |
+SAX.startElement(body)
|
|
|
6dedca |
+SAX.startElement(p)
|
|
|
6dedca |
+SAX.characters(, 2)
|
|
|
6dedca |
+SAX.error: Misplaced DOCTYPE declaration
|
|
|
6dedca |
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
|
|
|
6dedca |
+SAX.error: DOCTYPE improperly terminated
|
|
|
6dedca |
+SAX.internalSubset((null), , )
|
|
|
6dedca |
+SAX.endElement(p)
|
|
|
6dedca |
+SAX.endElement(body)
|
|
|
6dedca |
+SAX.endElement(html)
|
|
|
6dedca |
+SAX.endDocument()
|
|
|
6dedca |
diff --git a/test/HTML/758606.html b/test/HTML/758606.html
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..01a013c
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/test/HTML/758606.html
|
|
|
6dedca |
@@ -0,0 +1 @@
|
|
|
6dedca |
+
|
|
|
6dedca |
diff --git a/test/HTML/758606_2.html b/test/HTML/758606_2.html
|
|
|
6dedca |
new file mode 100644
|
|
|
6dedca |
index 0000000..daa185b
|
|
|
6dedca |
--- /dev/null
|
|
|
6dedca |
+++ b/test/HTML/758606_2.html
|
|
|
6dedca |
@@ -0,0 +1 @@
|
|
|
6dedca |
+
|