Blame SOURCES/libxml2-Heap-based-buffer-overread-in-htmlCurrentChar.patch

1c8959
commit c26d0004e779316830d93120dbfe98f6eee0783b
1c8959
Author: Pranjal Jumde <pjumde@apple.com>
1c8959
Date:   Tue Mar 1 15:18:04 2016 -0800
1c8959
1c8959
    Heap-based buffer overread in htmlCurrentChar
1c8959
    
1c8959
    For https://bugzilla.gnome.org/show_bug.cgi?id=758606
1c8959
    
1c8959
    * parserInternals.c:
1c8959
    (xmlNextChar): Add an test to catch other issues on ctxt->input
1c8959
    corruption proactively.
1c8959
    For non-UTF-8 charsets, xmlNextChar() failed to check for the end
1c8959
    of the input buffer and would continuing reading.  Fix this by
1c8959
    pulling out the check for the end of the input buffer into common
1c8959
    code, and return if we reach the end of the input buffer
1c8959
    prematurely.
1c8959
    * result/HTML/758606.html: Added.
1c8959
    * result/HTML/758606.html.err: Added.
1c8959
    * result/HTML/758606.html.sax: Added.
1c8959
    * result/HTML/758606_2.html: Added.
1c8959
    * result/HTML/758606_2.html.err: Added.
1c8959
    * result/HTML/758606_2.html.sax: Added.
1c8959
    * test/HTML/758606.html: Added test case.
1c8959
    * test/HTML/758606_2.html: Added test case.
1c8959
1c8959
diff --git a/parserInternals.c b/parserInternals.c
1c8959
index 1fe1f6a..341d6a1 100644
1c8959
--- a/parserInternals.c
1c8959
+++ b/parserInternals.c
1c8959
@@ -55,6 +55,10 @@
1c8959
 #include <libxml/globals.h>
1c8959
 #include <libxml/chvalid.h>
1c8959
 
1c8959
+#define CUR(ctxt) ctxt->input->cur
1c8959
+#define END(ctxt) ctxt->input->end
1c8959
+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
1c8959
+
1c8959
 #include "buf.h"
1c8959
 #include "enc.h"
1c8959
 
1c8959
@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
1c8959
         (ctxt->input == NULL))
1c8959
         return;
1c8959
 
1c8959
-    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1c8959
-        if ((*ctxt->input->cur == 0) &&
1c8959
-            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1c8959
-            (ctxt->instate != XML_PARSER_COMMENT)) {
1c8959
-            /*
1c8959
-             * If we are at the end of the current entity and
1c8959
-             * the context allows it, we pop consumed entities
1c8959
-             * automatically.
1c8959
-             * the auto closing should be blocked in other cases
1c8959
-             */
1c8959
+    if (!(VALID_CTXT(ctxt))) {
1c8959
+        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
1c8959
+	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1c8959
+        xmlStopParser(ctxt);
1c8959
+	return;
1c8959
+    }
1c8959
+
1c8959
+    if ((*ctxt->input->cur == 0) &&
1c8959
+        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
1c8959
+        if ((ctxt->instate != XML_PARSER_COMMENT))
1c8959
             xmlPopInput(ctxt);
1c8959
-        } else {
1c8959
-            const unsigned char *cur;
1c8959
-            unsigned char c;
1c8959
+        return;
1c8959
+    }
1c8959
 
1c8959
-            /*
1c8959
-             *   2.11 End-of-Line Handling
1c8959
-             *   the literal two-character sequence "#xD#xA" or a standalone
1c8959
-             *   literal #xD, an XML processor must pass to the application
1c8959
-             *   the single character #xA.
1c8959
-             */
1c8959
-            if (*(ctxt->input->cur) == '\n') {
1c8959
-                ctxt->input->line++; ctxt->input->col = 1;
1c8959
-            } else
1c8959
-                ctxt->input->col++;
1c8959
+    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1c8959
+        const unsigned char *cur;
1c8959
+        unsigned char c;
1c8959
 
1c8959
-            /*
1c8959
-             * We are supposed to handle UTF8, check it's valid
1c8959
-             * From rfc2044: encoding of the Unicode values on UTF-8:
1c8959
-             *
1c8959
-             * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1c8959
-             * 0000 0000-0000 007F   0xxxxxxx
1c8959
-             * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1c8959
-             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1c8959
-             *
1c8959
-             * Check for the 0x110000 limit too
1c8959
-             */
1c8959
-            cur = ctxt->input->cur;
1c8959
+        /*
1c8959
+         *   2.11 End-of-Line Handling
1c8959
+         *   the literal two-character sequence "#xD#xA" or a standalone
1c8959
+         *   literal #xD, an XML processor must pass to the application
1c8959
+         *   the single character #xA.
1c8959
+         */
1c8959
+        if (*(ctxt->input->cur) == '\n') {
1c8959
+            ctxt->input->line++; ctxt->input->col = 1;
1c8959
+        } else
1c8959
+            ctxt->input->col++;
1c8959
 
1c8959
-            c = *cur;
1c8959
-            if (c & 0x80) {
1c8959
-	        if (c == 0xC0)
1c8959
-		    goto encoding_error;
1c8959
-                if (cur[1] == 0) {
1c8959
+        /*
1c8959
+         * We are supposed to handle UTF8, check it's valid
1c8959
+         * From rfc2044: encoding of the Unicode values on UTF-8:
1c8959
+         *
1c8959
+         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1c8959
+         * 0000 0000-0000 007F   0xxxxxxx
1c8959
+         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1c8959
+         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1c8959
+         *
1c8959
+         * Check for the 0x110000 limit too
1c8959
+         */
1c8959
+        cur = ctxt->input->cur;
1c8959
+
1c8959
+        c = *cur;
1c8959
+        if (c & 0x80) {
1c8959
+        if (c == 0xC0)
1c8959
+	    goto encoding_error;
1c8959
+            if (cur[1] == 0) {
1c8959
+                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1c8959
+                cur = ctxt->input->cur;
1c8959
+            }
1c8959
+            if ((cur[1] & 0xc0) != 0x80)
1c8959
+                goto encoding_error;
1c8959
+            if ((c & 0xe0) == 0xe0) {
1c8959
+                unsigned int val;
1c8959
+
1c8959
+                if (cur[2] == 0) {
1c8959
                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1c8959
                     cur = ctxt->input->cur;
1c8959
                 }
1c8959
-                if ((cur[1] & 0xc0) != 0x80)
1c8959
+                if ((cur[2] & 0xc0) != 0x80)
1c8959
                     goto encoding_error;
1c8959
-                if ((c & 0xe0) == 0xe0) {
1c8959
-                    unsigned int val;
1c8959
-
1c8959
-                    if (cur[2] == 0) {
1c8959
+                if ((c & 0xf0) == 0xf0) {
1c8959
+                    if (cur[3] == 0) {
1c8959
                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1c8959
                         cur = ctxt->input->cur;
1c8959
                     }
1c8959
-                    if ((cur[2] & 0xc0) != 0x80)
1c8959
+                    if (((c & 0xf8) != 0xf0) ||
1c8959
+                        ((cur[3] & 0xc0) != 0x80))
1c8959
                         goto encoding_error;
1c8959
-                    if ((c & 0xf0) == 0xf0) {
1c8959
-                        if (cur[3] == 0) {
1c8959
-                            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1c8959
-                            cur = ctxt->input->cur;
1c8959
-                        }
1c8959
-                        if (((c & 0xf8) != 0xf0) ||
1c8959
-                            ((cur[3] & 0xc0) != 0x80))
1c8959
-                            goto encoding_error;
1c8959
-                        /* 4-byte code */
1c8959
-                        ctxt->input->cur += 4;
1c8959
-                        val = (cur[0] & 0x7) << 18;
1c8959
-                        val |= (cur[1] & 0x3f) << 12;
1c8959
-                        val |= (cur[2] & 0x3f) << 6;
1c8959
-                        val |= cur[3] & 0x3f;
1c8959
-                    } else {
1c8959
-                        /* 3-byte code */
1c8959
-                        ctxt->input->cur += 3;
1c8959
-                        val = (cur[0] & 0xf) << 12;
1c8959
-                        val |= (cur[1] & 0x3f) << 6;
1c8959
-                        val |= cur[2] & 0x3f;
1c8959
-                    }
1c8959
-                    if (((val > 0xd7ff) && (val < 0xe000)) ||
1c8959
-                        ((val > 0xfffd) && (val < 0x10000)) ||
1c8959
-                        (val >= 0x110000)) {
1c8959
-			xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
1c8959
-					  "Char 0x%X out of allowed range\n",
1c8959
-					  val);
1c8959
-                    }
1c8959
-                } else
1c8959
-                    /* 2-byte code */
1c8959
-                    ctxt->input->cur += 2;
1c8959
+                    /* 4-byte code */
1c8959
+                    ctxt->input->cur += 4;
1c8959
+                    val = (cur[0] & 0x7) << 18;
1c8959
+                    val |= (cur[1] & 0x3f) << 12;
1c8959
+                    val |= (cur[2] & 0x3f) << 6;
1c8959
+                    val |= cur[3] & 0x3f;
1c8959
+                } else {
1c8959
+                    /* 3-byte code */
1c8959
+                    ctxt->input->cur += 3;
1c8959
+                    val = (cur[0] & 0xf) << 12;
1c8959
+                    val |= (cur[1] & 0x3f) << 6;
1c8959
+                    val |= cur[2] & 0x3f;
1c8959
+                }
1c8959
+                if (((val > 0xd7ff) && (val < 0xe000)) ||
1c8959
+                    ((val > 0xfffd) && (val < 0x10000)) ||
1c8959
+                    (val >= 0x110000)) {
1c8959
+		xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
1c8959
+				  "Char 0x%X out of allowed range\n",
1c8959
+				  val);
1c8959
+                }
1c8959
             } else
1c8959
-                /* 1-byte code */
1c8959
-                ctxt->input->cur++;
1c8959
+                /* 2-byte code */
1c8959
+                ctxt->input->cur += 2;
1c8959
+        } else
1c8959
+            /* 1-byte code */
1c8959
+            ctxt->input->cur++;
1c8959
 
1c8959
-            ctxt->nbChars++;
1c8959
-            if (*ctxt->input->cur == 0)
1c8959
-                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1c8959
-        }
1c8959
+        ctxt->nbChars++;
1c8959
+        if (*ctxt->input->cur == 0)
1c8959
+            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1c8959
     } else {
1c8959
         /*
1c8959
          * Assume it's a fixed length encoding (1) with
1c8959
diff --git a/result/HTML/758606.html b/result/HTML/758606.html
1c8959
new file mode 100644
1c8959
index 0000000..4f21f62
1c8959
--- /dev/null
1c8959
+++ b/result/HTML/758606.html
1c8959
@@ -0,0 +1,2 @@
1c8959
+
1c8959
+
1c8959
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
1c8959
new file mode 100644
1c8959
index 0000000..060433a
1c8959
--- /dev/null
1c8959
+++ b/result/HTML/758606.html.err
1c8959
@@ -0,0 +1,16 @@
1c8959
+./test/HTML/758606.html:1: HTML parser error : Comment not terminated 
1c8959
+
1c8959
+
1c8959
+    ^
1c8959
+./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC
1c8959
+
1c8959
+    ^
1c8959
+./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration
1c8959
+
1c8959
+     ^
1c8959
+./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
1c8959
+
1c8959
+^
1c8959
+./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated
1c8959
+
1c8959
+^
1c8959
diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax
1c8959
new file mode 100644
1c8959
index 0000000..d44a5cf
1c8959
--- /dev/null
1c8959
+++ b/result/HTML/758606.html.sax
1c8959
@@ -0,0 +1,10 @@
1c8959
+SAX.setDocumentLocator()
1c8959
+SAX.startDocument()
1c8959
+SAX.error: Comment not terminated 
1c8959
+
1c8959
+SAX.error: Invalid char in CDATA 0xC
1c8959
+SAX.error: Misplaced DOCTYPE declaration
1c8959
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
1c8959
+SAX.error: DOCTYPE improperly terminated
1c8959
+SAX.internalSubset((null), , )
1c8959
+SAX.endDocument()
1c8959
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html
1c8959
new file mode 100644
1c8959
index 0000000..273816a
1c8959
--- /dev/null
1c8959
+++ b/result/HTML/758606_2.html
1c8959
@@ -0,0 +1,2 @@
1c8959
+
1c8959
+<html><body>

</body></html>
1c8959
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
1c8959
new file mode 100644
1c8959
index 0000000..4be039f
1c8959
--- /dev/null
1c8959
+++ b/result/HTML/758606_2.html.err
1c8959
@@ -0,0 +1,16 @@
1c8959
+./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated 
1c8959
+
1c8959
+
1c8959
+    ^
1c8959
+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC
1c8959
+
1c8959
+    ^
1c8959
+./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration
1c8959
+‘
1c8959
+  ^
1c8959
+./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
1c8959
+
1c8959
+^
1c8959
+./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated
1c8959
+
1c8959
+^
1c8959
diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax
1c8959
new file mode 100644
1c8959
index 0000000..80ff3d7
1c8959
--- /dev/null
1c8959
+++ b/result/HTML/758606_2.html.sax
1c8959
@@ -0,0 +1,17 @@
1c8959
+SAX.setDocumentLocator()
1c8959
+SAX.startDocument()
1c8959
+SAX.error: Comment not terminated 
1c8959
+
1c8959
+SAX.error: Invalid char in CDATA 0xC
1c8959
+SAX.startElement(html)
1c8959
+SAX.startElement(body)
1c8959
+SAX.startElement(p)
1c8959
+SAX.characters(‘, 2)
1c8959
+SAX.error: Misplaced DOCTYPE declaration
1c8959
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
1c8959
+SAX.error: DOCTYPE improperly terminated
1c8959
+SAX.internalSubset((null), , )
1c8959
+SAX.endElement(p)
1c8959
+SAX.endElement(body)
1c8959
+SAX.endElement(html)
1c8959
+SAX.endDocument()
1c8959
diff --git a/test/HTML/758606.html b/test/HTML/758606.html
1c8959
new file mode 100644
1c8959
index 0000000..01a013c
1c8959
--- /dev/null
1c8959
+++ b/test/HTML/758606.html
1c8959
@@ -0,0 +1 @@
1c8959
+
1c8959
diff --git a/test/HTML/758606_2.html b/test/HTML/758606_2.html
1c8959
new file mode 100644
1c8959
index 0000000..daa185b
1c8959
--- /dev/null
1c8959
+++ b/test/HTML/758606_2.html
1c8959
@@ -0,0 +1 @@
1c8959
+