|
|
bc5a00 |
From bf22713507fe1fc3a2c4b525cf0a88c2dc87a3a2 Mon Sep 17 00:00:00 2001
|
|
|
bc5a00 |
From: Joel Hockey <joel.hockey@gmail.com>
|
|
|
bc5a00 |
Date: Sun, 16 Aug 2020 17:19:35 -0700
|
|
|
bc5a00 |
Subject: [PATCH] Validate UTF8 in xmlEncodeEntities
|
|
|
bc5a00 |
|
|
|
bc5a00 |
Code is currently assuming UTF-8 without validating. Truncated UTF-8
|
|
|
bc5a00 |
input can cause out-of-bounds array access.
|
|
|
bc5a00 |
|
|
|
bc5a00 |
Adds further checks to partial fix in 50f06b3e.
|
|
|
bc5a00 |
|
|
|
bc5a00 |
Fixes #178
|
|
|
bc5a00 |
---
|
|
|
bc5a00 |
entities.c | 16 +++++++++++++++-
|
|
|
bc5a00 |
1 file changed, 15 insertions(+), 1 deletion(-)
|
|
|
bc5a00 |
|
|
|
bc5a00 |
diff --git a/entities.c b/entities.c
|
|
|
bc5a00 |
index 37b99a56..1a8f86f0 100644
|
|
|
bc5a00 |
--- a/entities.c
|
|
|
bc5a00 |
+++ b/entities.c
|
|
|
bc5a00 |
@@ -704,11 +704,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
|
|
|
bc5a00 |
} else {
|
|
|
bc5a00 |
/*
|
|
|
bc5a00 |
* We assume we have UTF-8 input.
|
|
|
bc5a00 |
+ * It must match either:
|
|
|
bc5a00 |
+ * 110xxxxx 10xxxxxx
|
|
|
bc5a00 |
+ * 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
bc5a00 |
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
bc5a00 |
+ * That is:
|
|
|
bc5a00 |
+ * cur[0] is 11xxxxxx
|
|
|
bc5a00 |
+ * cur[1] is 10xxxxxx
|
|
|
bc5a00 |
+ * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
|
|
|
bc5a00 |
+ * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
|
|
|
bc5a00 |
+ * cur[0] is not 11111xxx
|
|
|
bc5a00 |
*/
|
|
|
bc5a00 |
char buf[11], *ptr;
|
|
|
bc5a00 |
int val = 0, l = 1;
|
|
|
bc5a00 |
|
|
|
bc5a00 |
- if (*cur < 0xC0) {
|
|
|
bc5a00 |
+ if (((cur[0] & 0xC0) != 0xC0) ||
|
|
|
bc5a00 |
+ ((cur[1] & 0xC0) != 0x80) ||
|
|
|
bc5a00 |
+ (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
|
|
|
bc5a00 |
+ (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
|
|
|
bc5a00 |
+ (((cur[0] & 0xF8) == 0xF8))) {
|
|
|
bc5a00 |
xmlEntitiesErr(XML_CHECK_NOT_UTF8,
|
|
|
bc5a00 |
"xmlEncodeEntities: input not UTF-8");
|
|
|
bc5a00 |
if (doc != NULL)
|
|
|
bc5a00 |
--
|
|
|
bc5a00 |
GitLab
|
|
|
bc5a00 |
|