From 6aee99f381cc5bdfb6e514ac1e82f5e7b0fa7e2d Mon Sep 17 00:00:00 2001
From: Rob Crittenden <rcritten@redhat.com>
Date: Fri, 25 Feb 2022 16:42:35 -0500
Subject: [PATCH 5/6] Add missing validation of encoding (CVE-2022-25235)
Backported from upstream https://github.com/libexpat/libexpat/pull/562
Resolves: #2058114
---
lib/expat/xmltok/xmltok.c | 21 +++++++++++++++------
lib/expat/xmltok/xmltok_impl.c | 8 ++++++--
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/lib/expat/xmltok/xmltok.c b/lib/expat/xmltok/xmltok.c
index 7b31fbb..3b0c950 100644
--- a/lib/expat/xmltok/xmltok.c
+++ b/lib/expat/xmltok/xmltok.c
@@ -61,12 +61,17 @@ We need 8 bits to index into pages, 3 bits to add to that index and
? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
: 0))
+#define UTF8_INVALID2(p) \
+ ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
+
#define UTF8_INVALID3(p) \
- ((*p) == 0xED \
- ? (((p)[1] & 0x20) != 0) \
- : ((*p) == 0xEF \
- ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
- : 0))
+ (((p)[2] & 0x80) == 0 \
+ || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \
+ : ((p)[2] & 0xC0) == 0xC0) \
+ || ((*p) == 0xE0 \
+ ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
+ : ((p)[1] & 0x80) == 0 \
+ || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
@@ -104,7 +109,11 @@ int utf8_isNmstrt3(const ENCODING *enc ATTR_UNUSED, const char *p)
#define utf8_isNmstrt4 isNever
-#define utf8_isInvalid2 isNever
+static
+int utf8_isInvalid2(const ENCODING *enc ATTR_UNUSED, const char *p)
+{
+ return UTF8_INVALID2((const unsigned char *)p);
+}
static
int utf8_isInvalid3(const ENCODING *enc ATTR_UNUSED, const char *p)
diff --git a/lib/expat/xmltok/xmltok_impl.c b/lib/expat/xmltok/xmltok_impl.c
index d035527..bae79b9 100644
--- a/lib/expat/xmltok/xmltok_impl.c
+++ b/lib/expat/xmltok/xmltok_impl.c
@@ -43,7 +43,7 @@ See the file copying.txt for copying permission.
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (!IS_NAME_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -71,7 +71,7 @@ See the file copying.txt for copying permission.
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -1168,6 +1168,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
+ *nextTokPtr = ptr; \
+ return XML_TOK_INVALID; \
+ } \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \
--
2.31.1