Blame SOURCES/libxml2-2.9.7-CVE-2021-3517.patch

bc5a00
From bf22713507fe1fc3a2c4b525cf0a88c2dc87a3a2 Mon Sep 17 00:00:00 2001
bc5a00
From: Joel Hockey <joel.hockey@gmail.com>
bc5a00
Date: Sun, 16 Aug 2020 17:19:35 -0700
bc5a00
Subject: [PATCH] Validate UTF8 in xmlEncodeEntities
bc5a00
bc5a00
Code is currently assuming UTF-8 without validating. Truncated UTF-8
bc5a00
input can cause out-of-bounds array access.
bc5a00
bc5a00
Adds further checks to partial fix in 50f06b3e.
bc5a00
bc5a00
Fixes #178
bc5a00
---
bc5a00
 entities.c | 16 +++++++++++++++-
bc5a00
 1 file changed, 15 insertions(+), 1 deletion(-)
bc5a00
bc5a00
diff --git a/entities.c b/entities.c
bc5a00
index 37b99a56..1a8f86f0 100644
bc5a00
--- a/entities.c
bc5a00
+++ b/entities.c
bc5a00
@@ -704,11 +704,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
bc5a00
 	    } else {
bc5a00
 		/*
bc5a00
 		 * We assume we have UTF-8 input.
bc5a00
+		 * It must match either:
bc5a00
+		 *   110xxxxx 10xxxxxx
bc5a00
+		 *   1110xxxx 10xxxxxx 10xxxxxx
bc5a00
+		 *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
bc5a00
+		 * That is:
bc5a00
+		 *   cur[0] is 11xxxxxx
bc5a00
+		 *   cur[1] is 10xxxxxx
bc5a00
+		 *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
bc5a00
+		 *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
bc5a00
+		 *   cur[0] is not 11111xxx
bc5a00
 		 */
bc5a00
 		char buf[11], *ptr;
bc5a00
 		int val = 0, l = 1;
bc5a00
 
bc5a00
-		if (*cur < 0xC0) {
bc5a00
+		if (((cur[0] & 0xC0) != 0xC0) ||
bc5a00
+		    ((cur[1] & 0xC0) != 0x80) ||
bc5a00
+		    (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
bc5a00
+		    (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
bc5a00
+		    (((cur[0] & 0xF8) == 0xF8))) {
bc5a00
 		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
bc5a00
 			    "xmlEncodeEntities: input not UTF-8");
bc5a00
 		    if (doc != NULL)
bc5a00
-- 
bc5a00
GitLab
bc5a00