From 50f06b3efb638efb0abd95dc62dca05ae67882c2 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 7 Aug 2020 21:54:27 +0200
Subject: [PATCH] Fix out-of-bounds read with 'xmllint --htmlout'
Make sure that truncated UTF-8 sequences don't cause an out-of-bounds
array access.
Thanks to @SuhwanSong and the Agency for Defense Development (ADD) for
the report.
Fixes #178.
---
xmllint.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/xmllint.c b/xmllint.c
index f6a8e4636..c647486f3 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -528,6 +528,12 @@ static void
xmlHTMLEncodeSend(void) {
char *result;
+ /*
+ * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
+ * end with a truncated UTF-8 sequence. This is a hack to at least avoid
+ * an out-of-bounds read.
+ */
+ memset(&buffer[sizeof(buffer)-4], 0, 4);
result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
if (result) {
xmlGenericError(xmlGenericErrorContext, "%s", result);
--
GitLab