Blob Blame History Raw
From 6aee99f381cc5bdfb6e514ac1e82f5e7b0fa7e2d Mon Sep 17 00:00:00 2001
From: Rob Crittenden <rcritten@redhat.com>
Date: Fri, 25 Feb 2022 16:42:35 -0500
Subject: [PATCH 5/6] Add missing validation of encoding (CVE-2022-25235)

Backported from upstream https://github.com/libexpat/libexpat/pull/562

Resolves: #2058114
---
 lib/expat/xmltok/xmltok.c      | 21 +++++++++++++++------
 lib/expat/xmltok/xmltok_impl.c |  8 ++++++--
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/lib/expat/xmltok/xmltok.c b/lib/expat/xmltok/xmltok.c
index 7b31fbb..3b0c950 100644
--- a/lib/expat/xmltok/xmltok.c
+++ b/lib/expat/xmltok/xmltok.c
@@ -61,12 +61,17 @@ We need 8 bits to index into pages, 3 bits to add to that index and
      ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
      : 0))
 
+#define UTF8_INVALID2(p) \
+  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
+
 #define UTF8_INVALID3(p) \
-  ((*p) == 0xED \
-  ? (((p)[1] & 0x20) != 0) \
-  : ((*p) == 0xEF \
-     ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
-     : 0))
+  (((p)[2] & 0x80) == 0 \
+   || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \
+                                      : ((p)[2] & 0xC0) == 0xC0) \
+   || ((*p) == 0xE0 \
+           ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
+           : ((p)[1] & 0x80) == 0 \
+                 || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
 
 #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
 
@@ -104,7 +109,11 @@ int utf8_isNmstrt3(const ENCODING *enc ATTR_UNUSED, const char *p)
 
 #define utf8_isNmstrt4 isNever
 
-#define utf8_isInvalid2 isNever
+static
+int utf8_isInvalid2(const ENCODING *enc ATTR_UNUSED, const char *p)
+{ 
+  return UTF8_INVALID2((const unsigned char *)p);
+}
 
 static
 int utf8_isInvalid3(const ENCODING *enc ATTR_UNUSED, const char *p)
diff --git a/lib/expat/xmltok/xmltok_impl.c b/lib/expat/xmltok/xmltok_impl.c
index d035527..bae79b9 100644
--- a/lib/expat/xmltok/xmltok_impl.c
+++ b/lib/expat/xmltok/xmltok_impl.c
@@ -43,7 +43,7 @@ See the file copying.txt for copying permission.
    case BT_LEAD ## n: \
      if (end - ptr < n) \
        return XML_TOK_PARTIAL_CHAR; \
-     if (!IS_NAME_CHAR(enc, ptr, n)) { \
+     if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NAME_CHAR(enc, ptr, n)) { \
        *nextTokPtr = ptr; \
        return XML_TOK_INVALID; \
      } \
@@ -71,7 +71,7 @@ See the file copying.txt for copying permission.
    case BT_LEAD ## n: \
      if (end - ptr < n) \
        return XML_TOK_PARTIAL_CHAR; \
-     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
+     if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NMSTRT_CHAR(enc, ptr, n)) { \
        *nextTokPtr = ptr; \
        return XML_TOK_INVALID; \
      } \
@@ -1168,6 +1168,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
   case BT_LEAD ## n: \
     if (end - ptr < n) \
       return XML_TOK_PARTIAL_CHAR; \
+    if (IS_INVALID_CHAR(enc, ptr, n)) { \
+      *nextTokPtr = ptr; \
+      return XML_TOK_INVALID; \
+    } \
     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
       ptr += n; \
       tok = XML_TOK_NAME; \
-- 
2.31.1