Blob Blame History Raw
Make the XML entity recursion check more precise.

libxml doesn't detect entity recursion specifically but has a variety
of related checks, such as entities not expanding too deeply or
producing exponential blow-ups in content.

Because entity declarations are parsed in a separate context with
their own element recursion budget, a recursive entity can overflow
the stack using a lot of open elements (but within the per-context
limit) as it slowly consumes (but does not exhaust) the entity depth
budget.

This adds a specific, precise check for recursive entities that
detects entity recursion specifically and fails immediately.

The existing entity expansion depth checks are still relevant for long
chains of different entities.

BUG=628581

Review-Url: https://codereview.chromium.org/2539003002
Cr-Commit-Position: refs/heads/master@{#436899}


Index: libxml2-2.9.4/entities.c
===================================================================
--- libxml2-2.9.4.orig/entities.c
+++ libxml2-2.9.4/entities.c
@@ -159,6 +159,7 @@ xmlCreateEntity(xmlDictPtr dict, const x
     memset(ret, 0, sizeof(xmlEntity));
     ret->type = XML_ENTITY_DECL;
     ret->checked = 0;
+    ret->guard = XML_ENTITY_NOT_BEING_CHECKED;
 
     /*
      * fill the structure.
@@ -931,6 +932,7 @@ xmlCopyEntity(xmlEntityPtr ent) {
 	cur->orig = xmlStrdup(ent->orig);
     if (ent->URI != NULL)
 	cur->URI = xmlStrdup(ent->URI);
+    cur->guard = 0;
     return(cur);
 }
 
Index: libxml2-2.9.4/include/libxml/entities.h
===================================================================
--- libxml2-2.9.4.orig/include/libxml/entities.h
+++ libxml2-2.9.4/include/libxml/entities.h
@@ -30,6 +30,11 @@ typedef enum {
     XML_INTERNAL_PREDEFINED_ENTITY = 6
 } xmlEntityType;
 
+typedef enum {
+  XML_ENTITY_NOT_BEING_CHECKED,
+  XML_ENTITY_BEING_CHECKED              /* entity check is in progress */
+} xmlEntityRecursionGuard;
+
 /*
  * An unit of storage for an entity, contains the string, the value
  * and the linkind data needed for the linking in the hash table.
@@ -60,6 +65,7 @@ struct _xmlEntity {
 					/* this is also used to count entities
 					 * references done from that entity
 					 * and if it contains '<' */
+     xmlEntityRecursionGuard guard;
 };
 
 /*
Index: libxml2-2.9.4/parser.c
===================================================================
--- libxml2-2.9.4.orig/parser.c
+++ libxml2-2.9.4/parser.c
@@ -133,6 +133,10 @@ xmlParserEntityCheck(xmlParserCtxtPtr ct
     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
         return (1);
 
+	if ((ent != NULL) && (ent->guard == XML_ENTITY_BEING_CHECKED)) {
+        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
+        return (1);
+    }
     /*
      * This may look absurd but is needed to detect
      * entities problems
@@ -143,12 +147,14 @@ xmlParserEntityCheck(xmlParserCtxtPtr ct
 	unsigned long oldnbent = ctxt->nbentities;
 	xmlChar *rep;
 
+    ent->guard = XML_ENTITY_BEING_CHECKED;
 	ent->checked = 1;
 
         ++ctxt->depth;
 	rep = xmlStringDecodeEntities(ctxt, ent->content,
 				  XML_SUBSTITUTE_REF, 0, 0, 0);
         --ctxt->depth;
+	ent->guard = XML_ENTITY_NOT_BEING_CHECKED;
 	if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
 	    ent->content[0] = 0;
 	}
@@ -7337,23 +7343,28 @@ xmlParseReference(xmlParserCtxtPtr ctxt)
 	 * if its replacement text matches the production labeled
 	 * content.
 	 */
-	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
-	    ctxt->depth++;
-	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
-	                                              user_data, &list);
-	    ctxt->depth--;
-
-	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
-	    ctxt->depth++;
-	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
-	                                   user_data, ctxt->depth, ent->URI,
-					   ent->ExternalID, &list);
-	    ctxt->depth--;
-	} else {
-	    ret = XML_ERR_ENTITY_PE_INTERNAL;
-	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
-			 "invalid entity type found\n", NULL);
-	}
+    if (ent->guard == XML_ENTITY_BEING_CHECKED) {
+        ret = XML_ERR_ENTITY_LOOP;
+    } else {
+        ent->guard = XML_ENTITY_BEING_CHECKED;
+        if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
+            ctxt->depth++;
+            ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
+                                                      user_data, &list);
+            ctxt->depth--;
+        } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
+            ctxt->depth++;
+            ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
+                                           user_data, ctxt->depth, ent->URI,
+                                           ent->ExternalID, &list);
+            ctxt->depth--;
+        } else {
+            ret = XML_ERR_ENTITY_PE_INTERNAL;
+            xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
+                         "invalid entity type found\n", NULL);
+        }
+        ent->guard = XML_ENTITY_NOT_BEING_CHECKED;
+    }
 
 	/*
 	 * Store the number of entities needing parsing for this entity
@@ -7456,23 +7467,29 @@ xmlParseReference(xmlParserCtxtPtr ctxt)
 	    else
 		user_data = ctxt->userData;
 
-	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
-		ctxt->depth++;
-		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
-				   ent->content, user_data, NULL);
-		ctxt->depth--;
-	    } else if (ent->etype ==
-		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
-		ctxt->depth++;
-		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
-			   ctxt->sax, user_data, ctxt->depth,
-			   ent->URI, ent->ExternalID, NULL);
-		ctxt->depth--;
-	    } else {
-		ret = XML_ERR_ENTITY_PE_INTERNAL;
-		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
-			     "invalid entity type found\n", NULL);
-	    }
+        if (ent->guard == XML_ENTITY_BEING_CHECKED) {
+            ret = XML_ERR_ENTITY_LOOP;
+        } else {
+            ent->guard = XML_ENTITY_BEING_CHECKED;
+            if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
+                ctxt->depth++;
+                ret = xmlParseBalancedChunkMemoryInternal(ctxt,
+                                   ent->content, user_data, NULL);
+                ctxt->depth--;
+            } else if (ent->etype ==
+                       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
+                ctxt->depth++;
+                ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
+                           ctxt->sax, user_data, ctxt->depth,
+                           ent->URI, ent->ExternalID, NULL);
+                ctxt->depth--;
+            } else {
+                ret = XML_ERR_ENTITY_PE_INTERNAL;
+                xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
+                             "invalid entity type found\n", NULL);
+            }
+            ent->guard = XML_ENTITY_NOT_BEING_CHECKED;
+        }
 	    if (ret == XML_ERR_ENTITY_LOOP) {
 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
 		return;