Blame SOURCES/libxml2-Heap-based-buffer-overread-in-htmlCurrentChar.patch

6dedca
commit c26d0004e779316830d93120dbfe98f6eee0783b
6dedca
Author: Pranjal Jumde <pjumde@apple.com>
6dedca
Date:   Tue Mar 1 15:18:04 2016 -0800
6dedca
6dedca
    Heap-based buffer overread in htmlCurrentChar
6dedca
    
6dedca
    For https://bugzilla.gnome.org/show_bug.cgi?id=758606
6dedca
    
6dedca
    * parserInternals.c:
6dedca
    (xmlNextChar): Add an test to catch other issues on ctxt->input
6dedca
    corruption proactively.
6dedca
    For non-UTF-8 charsets, xmlNextChar() failed to check for the end
6dedca
    of the input buffer and would continuing reading.  Fix this by
6dedca
    pulling out the check for the end of the input buffer into common
6dedca
    code, and return if we reach the end of the input buffer
6dedca
    prematurely.
6dedca
    * result/HTML/758606.html: Added.
6dedca
    * result/HTML/758606.html.err: Added.
6dedca
    * result/HTML/758606.html.sax: Added.
6dedca
    * result/HTML/758606_2.html: Added.
6dedca
    * result/HTML/758606_2.html.err: Added.
6dedca
    * result/HTML/758606_2.html.sax: Added.
6dedca
    * test/HTML/758606.html: Added test case.
6dedca
    * test/HTML/758606_2.html: Added test case.
6dedca
6dedca
diff --git a/parserInternals.c b/parserInternals.c
6dedca
index 1fe1f6a..341d6a1 100644
6dedca
--- a/parserInternals.c
6dedca
+++ b/parserInternals.c
6dedca
@@ -55,6 +55,10 @@
6dedca
 #include <libxml/globals.h>
6dedca
 #include <libxml/chvalid.h>
6dedca
 
6dedca
+#define CUR(ctxt) ctxt->input->cur
6dedca
+#define END(ctxt) ctxt->input->end
6dedca
+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
6dedca
+
6dedca
 #include "buf.h"
6dedca
 #include "enc.h"
6dedca
 
6dedca
@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
6dedca
         (ctxt->input == NULL))
6dedca
         return;
6dedca
 
6dedca
-    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
6dedca
-        if ((*ctxt->input->cur == 0) &&
6dedca
-            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
6dedca
-            (ctxt->instate != XML_PARSER_COMMENT)) {
6dedca
-            /*
6dedca
-             * If we are at the end of the current entity and
6dedca
-             * the context allows it, we pop consumed entities
6dedca
-             * automatically.
6dedca
-             * the auto closing should be blocked in other cases
6dedca
-             */
6dedca
+    if (!(VALID_CTXT(ctxt))) {
6dedca
+        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
6dedca
+	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6dedca
+        xmlStopParser(ctxt);
6dedca
+	return;
6dedca
+    }
6dedca
+
6dedca
+    if ((*ctxt->input->cur == 0) &&
6dedca
+        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
6dedca
+        if ((ctxt->instate != XML_PARSER_COMMENT))
6dedca
             xmlPopInput(ctxt);
6dedca
-        } else {
6dedca
-            const unsigned char *cur;
6dedca
-            unsigned char c;
6dedca
+        return;
6dedca
+    }
6dedca
 
6dedca
-            /*
6dedca
-             *   2.11 End-of-Line Handling
6dedca
-             *   the literal two-character sequence "#xD#xA" or a standalone
6dedca
-             *   literal #xD, an XML processor must pass to the application
6dedca
-             *   the single character #xA.
6dedca
-             */
6dedca
-            if (*(ctxt->input->cur) == '\n') {
6dedca
-                ctxt->input->line++; ctxt->input->col = 1;
6dedca
-            } else
6dedca
-                ctxt->input->col++;
6dedca
+    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
6dedca
+        const unsigned char *cur;
6dedca
+        unsigned char c;
6dedca
 
6dedca
-            /*
6dedca
-             * We are supposed to handle UTF8, check it's valid
6dedca
-             * From rfc2044: encoding of the Unicode values on UTF-8:
6dedca
-             *
6dedca
-             * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
6dedca
-             * 0000 0000-0000 007F   0xxxxxxx
6dedca
-             * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
6dedca
-             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
6dedca
-             *
6dedca
-             * Check for the 0x110000 limit too
6dedca
-             */
6dedca
-            cur = ctxt->input->cur;
6dedca
+        /*
6dedca
+         *   2.11 End-of-Line Handling
6dedca
+         *   the literal two-character sequence "#xD#xA" or a standalone
6dedca
+         *   literal #xD, an XML processor must pass to the application
6dedca
+         *   the single character #xA.
6dedca
+         */
6dedca
+        if (*(ctxt->input->cur) == '\n') {
6dedca
+            ctxt->input->line++; ctxt->input->col = 1;
6dedca
+        } else
6dedca
+            ctxt->input->col++;
6dedca
 
6dedca
-            c = *cur;
6dedca
-            if (c & 0x80) {
6dedca
-	        if (c == 0xC0)
6dedca
-		    goto encoding_error;
6dedca
-                if (cur[1] == 0) {
6dedca
+        /*
6dedca
+         * We are supposed to handle UTF8, check it's valid
6dedca
+         * From rfc2044: encoding of the Unicode values on UTF-8:
6dedca
+         *
6dedca
+         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
6dedca
+         * 0000 0000-0000 007F   0xxxxxxx
6dedca
+         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
6dedca
+         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
6dedca
+         *
6dedca
+         * Check for the 0x110000 limit too
6dedca
+         */
6dedca
+        cur = ctxt->input->cur;
6dedca
+
6dedca
+        c = *cur;
6dedca
+        if (c & 0x80) {
6dedca
+        if (c == 0xC0)
6dedca
+	    goto encoding_error;
6dedca
+            if (cur[1] == 0) {
6dedca
+                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
6dedca
+                cur = ctxt->input->cur;
6dedca
+            }
6dedca
+            if ((cur[1] & 0xc0) != 0x80)
6dedca
+                goto encoding_error;
6dedca
+            if ((c & 0xe0) == 0xe0) {
6dedca
+                unsigned int val;
6dedca
+
6dedca
+                if (cur[2] == 0) {
6dedca
                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
6dedca
                     cur = ctxt->input->cur;
6dedca
                 }
6dedca
-                if ((cur[1] & 0xc0) != 0x80)
6dedca
+                if ((cur[2] & 0xc0) != 0x80)
6dedca
                     goto encoding_error;
6dedca
-                if ((c & 0xe0) == 0xe0) {
6dedca
-                    unsigned int val;
6dedca
-
6dedca
-                    if (cur[2] == 0) {
6dedca
+                if ((c & 0xf0) == 0xf0) {
6dedca
+                    if (cur[3] == 0) {
6dedca
                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
6dedca
                         cur = ctxt->input->cur;
6dedca
                     }
6dedca
-                    if ((cur[2] & 0xc0) != 0x80)
6dedca
+                    if (((c & 0xf8) != 0xf0) ||
6dedca
+                        ((cur[3] & 0xc0) != 0x80))
6dedca
                         goto encoding_error;
6dedca
-                    if ((c & 0xf0) == 0xf0) {
6dedca
-                        if (cur[3] == 0) {
6dedca
-                            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
6dedca
-                            cur = ctxt->input->cur;
6dedca
-                        }
6dedca
-                        if (((c & 0xf8) != 0xf0) ||
6dedca
-                            ((cur[3] & 0xc0) != 0x80))
6dedca
-                            goto encoding_error;
6dedca
-                        /* 4-byte code */
6dedca
-                        ctxt->input->cur += 4;
6dedca
-                        val = (cur[0] & 0x7) << 18;
6dedca
-                        val |= (cur[1] & 0x3f) << 12;
6dedca
-                        val |= (cur[2] & 0x3f) << 6;
6dedca
-                        val |= cur[3] & 0x3f;
6dedca
-                    } else {
6dedca
-                        /* 3-byte code */
6dedca
-                        ctxt->input->cur += 3;
6dedca
-                        val = (cur[0] & 0xf) << 12;
6dedca
-                        val |= (cur[1] & 0x3f) << 6;
6dedca
-                        val |= cur[2] & 0x3f;
6dedca
-                    }
6dedca
-                    if (((val > 0xd7ff) && (val < 0xe000)) ||
6dedca
-                        ((val > 0xfffd) && (val < 0x10000)) ||
6dedca
-                        (val >= 0x110000)) {
6dedca
-			xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
6dedca
-					  "Char 0x%X out of allowed range\n",
6dedca
-					  val);
6dedca
-                    }
6dedca
-                } else
6dedca
-                    /* 2-byte code */
6dedca
-                    ctxt->input->cur += 2;
6dedca
+                    /* 4-byte code */
6dedca
+                    ctxt->input->cur += 4;
6dedca
+                    val = (cur[0] & 0x7) << 18;
6dedca
+                    val |= (cur[1] & 0x3f) << 12;
6dedca
+                    val |= (cur[2] & 0x3f) << 6;
6dedca
+                    val |= cur[3] & 0x3f;
6dedca
+                } else {
6dedca
+                    /* 3-byte code */
6dedca
+                    ctxt->input->cur += 3;
6dedca
+                    val = (cur[0] & 0xf) << 12;
6dedca
+                    val |= (cur[1] & 0x3f) << 6;
6dedca
+                    val |= cur[2] & 0x3f;
6dedca
+                }
6dedca
+                if (((val > 0xd7ff) && (val < 0xe000)) ||
6dedca
+                    ((val > 0xfffd) && (val < 0x10000)) ||
6dedca
+                    (val >= 0x110000)) {
6dedca
+		xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
6dedca
+				  "Char 0x%X out of allowed range\n",
6dedca
+				  val);
6dedca
+                }
6dedca
             } else
6dedca
-                /* 1-byte code */
6dedca
-                ctxt->input->cur++;
6dedca
+                /* 2-byte code */
6dedca
+                ctxt->input->cur += 2;
6dedca
+        } else
6dedca
+            /* 1-byte code */
6dedca
+            ctxt->input->cur++;
6dedca
 
6dedca
-            ctxt->nbChars++;
6dedca
-            if (*ctxt->input->cur == 0)
6dedca
-                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
6dedca
-        }
6dedca
+        ctxt->nbChars++;
6dedca
+        if (*ctxt->input->cur == 0)
6dedca
+            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
6dedca
     } else {
6dedca
         /*
6dedca
          * Assume it's a fixed length encoding (1) with
6dedca
diff --git a/result/HTML/758606.html b/result/HTML/758606.html
6dedca
new file mode 100644
6dedca
index 0000000..4f21f62
6dedca
--- /dev/null
6dedca
+++ b/result/HTML/758606.html
6dedca
@@ -0,0 +1,2 @@
6dedca
+
6dedca
+
6dedca
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
6dedca
new file mode 100644
6dedca
index 0000000..060433a
6dedca
--- /dev/null
6dedca
+++ b/result/HTML/758606.html.err
6dedca
@@ -0,0 +1,16 @@
6dedca
+./test/HTML/758606.html:1: HTML parser error : Comment not terminated 
6dedca
+
6dedca
+
6dedca
+    ^
6dedca
+./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC
6dedca
+
6dedca
+    ^
6dedca
+./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration
6dedca
+
6dedca
+     ^
6dedca
+./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
6dedca
+
6dedca
+^
6dedca
+./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated
6dedca
+
6dedca
+^
6dedca
diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax
6dedca
new file mode 100644
6dedca
index 0000000..d44a5cf
6dedca
--- /dev/null
6dedca
+++ b/result/HTML/758606.html.sax
6dedca
@@ -0,0 +1,10 @@
6dedca
+SAX.setDocumentLocator()
6dedca
+SAX.startDocument()
6dedca
+SAX.error: Comment not terminated 
6dedca
+
6dedca
+SAX.error: Invalid char in CDATA 0xC
6dedca
+SAX.error: Misplaced DOCTYPE declaration
6dedca
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
6dedca
+SAX.error: DOCTYPE improperly terminated
6dedca
+SAX.internalSubset((null), , )
6dedca
+SAX.endDocument()
6dedca
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html
6dedca
new file mode 100644
6dedca
index 0000000..273816a
6dedca
--- /dev/null
6dedca
+++ b/result/HTML/758606_2.html
6dedca
@@ -0,0 +1,2 @@
6dedca
+
6dedca
+<html><body>

</body></html>
6dedca
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
6dedca
new file mode 100644
6dedca
index 0000000..4be039f
6dedca
--- /dev/null
6dedca
+++ b/result/HTML/758606_2.html.err
6dedca
@@ -0,0 +1,16 @@
6dedca
+./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated 
6dedca
+
6dedca
+
6dedca
+    ^
6dedca
+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC
6dedca
+
6dedca
+    ^
6dedca
+./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration
6dedca
+‘
6dedca
+  ^
6dedca
+./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
6dedca
+
6dedca
+^
6dedca
+./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated
6dedca
+
6dedca
+^
6dedca
diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax
6dedca
new file mode 100644
6dedca
index 0000000..80ff3d7
6dedca
--- /dev/null
6dedca
+++ b/result/HTML/758606_2.html.sax
6dedca
@@ -0,0 +1,17 @@
6dedca
+SAX.setDocumentLocator()
6dedca
+SAX.startDocument()
6dedca
+SAX.error: Comment not terminated 
6dedca
+
6dedca
+SAX.error: Invalid char in CDATA 0xC
6dedca
+SAX.startElement(html)
6dedca
+SAX.startElement(body)
6dedca
+SAX.startElement(p)
6dedca
+SAX.characters(‘, 2)
6dedca
+SAX.error: Misplaced DOCTYPE declaration
6dedca
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
6dedca
+SAX.error: DOCTYPE improperly terminated
6dedca
+SAX.internalSubset((null), , )
6dedca
+SAX.endElement(p)
6dedca
+SAX.endElement(body)
6dedca
+SAX.endElement(html)
6dedca
+SAX.endDocument()
6dedca
diff --git a/test/HTML/758606.html b/test/HTML/758606.html
6dedca
new file mode 100644
6dedca
index 0000000..01a013c
6dedca
--- /dev/null
6dedca
+++ b/test/HTML/758606.html
6dedca
@@ -0,0 +1 @@
6dedca
+
6dedca
diff --git a/test/HTML/758606_2.html b/test/HTML/758606_2.html
6dedca
new file mode 100644
6dedca
index 0000000..daa185b
6dedca
--- /dev/null
6dedca
+++ b/test/HTML/758606_2.html
6dedca
@@ -0,0 +1 @@
6dedca
+