From abdf3942a848b3de8c4fcdbccf15139b1ed0d9c2 Mon Sep 17 00:00:00 2001
From: Lubos Uhliarik <luhliari@redhat.com>
Date: Mon, 3 Aug 2020 16:48:15 +0200
Subject: [PATCH] Fix for CVE-2020-15049

---
 src/HttpHeader.cc                    |  85 ++++++------
 src/HttpHeaderTools.cc               |  27 ++++
 src/HttpHeaderTools.h                |   8 +-
 src/http/ContentLengthInterpreter.cc | 190 +++++++++++++++++++++++++++
 src/http/ContentLengthInterpreter.h  |  66 ++++++++++
 src/http/Makefile.am                 |   2 +
 src/http/Makefile.in                 |   4 +-
 7 files changed, 337 insertions(+), 45 deletions(-)
 create mode 100644 src/http/ContentLengthInterpreter.cc
 create mode 100644 src/http/ContentLengthInterpreter.h
diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc
index 7e8c77e..ef60c02 100644
--- a/src/HttpHeader.cc
+++ b/src/HttpHeader.cc
@@ -11,6 +11,7 @@
 #include "squid.h"
 #include "base64.h"
 #include "globals.h"
+#include "http/ContentLengthInterpreter.h"
 #include "HttpHdrCc.h"
 #include "HttpHdrContRange.h"
 #include "HttpHdrSc.h"
@@ -588,7 +589,6 @@ int
 HttpHeader::parse(const char *header_start, const char *header_end)
 {
     const char *field_ptr = header_start;
-    HttpHeaderEntry *e, *e2;
     int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
 
     PROF_start(HttpHeaderParse);
@@ -605,6 +605,7 @@ HttpHeader::parse(const char *header_start, const char *header_end)
         return reset();
     }
 
+    Http::ContentLengthInterpreter clen(warnOnError);
     /* common format headers are "<name>:[ws]<value>" lines delimited by <CRLF>.
      * continuation lines start with a (single) space or tab */
     while (field_ptr < header_end) {
@@ -681,6 +682,7 @@ HttpHeader::parse(const char *header_start, const char *header_end)
             break;      /* terminating blank line */
         }
 
+        HttpHeaderEntry *e;
         if ((e = HttpHeaderEntry::parse(field_start, field_end)) == NULL) {
             debugs(55, warnOnError, "WARNING: unparseable HTTP header field {" <<
                    getStringPrefix(field_start, field_end) << "}");
@@ -693,45 +695,19 @@ HttpHeader::parse(const char *header_start, const char *header_end)
             return reset();
         }
 
-        // XXX: RFC 7230 Section 3.3.3 item #4 requires sending a 502 error in
-        // several cases that we do not yet cover. TODO: Rewrite to cover more.
-        if (e->id == HDR_CONTENT_LENGTH && (e2 = findEntry(e->id)) != NULL) {
-            if (e->value != e2->value) {
-                int64_t l1, l2;
-                debugs(55, warnOnError, "WARNING: found two conflicting content-length headers in {" <<
-                       getStringPrefix(header_start, header_end) << "}");
-
-                if (!Config.onoff.relaxed_header_parser) {
-                    delete e;
-                    PROF_stop(HttpHeaderParse);
-                    return reset();
-                }
 
-                if (!httpHeaderParseOffset(e->value.termedBuf(), &l1)) {
-                    debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e->value << "'");
-                    delete e;
-                    continue;
-                } else if (!httpHeaderParseOffset(e2->value.termedBuf(), &l2)) {
-                    debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e2->value << "'");
-                    delById(e2->id);
-                } else {
-                    if (l1 != l2)
-                        conflictingContentLength_ = true;
-                    delete e;
-                    continue;
-                }
-            } else {
-                debugs(55, warnOnError, "NOTICE: found double content-length header");
-                delete e;
+        if (e->id == HDR_CONTENT_LENGTH && !clen.checkField(e->value)) {
+            delete e;
 
-                if (Config.onoff.relaxed_header_parser)
-                    continue;
+            if (Config.onoff.relaxed_header_parser)
+                continue; // clen has printed any necessary warnings
 
-                PROF_stop(HttpHeaderParse);
-                return reset();
-            }
+            PROF_stop(HttpHeaderParse);
+            clean();
+            return 0;
         }
 
+
         if (e->id == HDR_OTHER && stringHasWhitespace(e->name.termedBuf())) {
             debugs(55, warnOnError, "WARNING: found whitespace in HTTP header name {" <<
                    getStringPrefix(field_start, field_end) << "}");
@@ -746,6 +722,32 @@ HttpHeader::parse(const char *header_start, const char *header_end)
         addEntry(e);
     }
 
+    if (clen.headerWideProblem) {
+        debugs(55, warnOnError, "WARNING: " << clen.headerWideProblem <<
+               " Content-Length field values in" <<
+               Raw("header", header_start, (size_t)(header_end - header_start)));
+    }
+
+    if (chunked()) {
+        // RFC 2616 section 4.4: ignore Content-Length with Transfer-Encoding
+        // RFC 7230 section 3.3.3 #3: Transfer-Encoding overwrites Content-Length
+        delById(HDR_CONTENT_LENGTH);
+
+        // and clen state becomes irrelevant
+    } else if (clen.sawBad) {
+        // ensure our callers do not accidentally see bad Content-Length values
+        delById(HDR_CONTENT_LENGTH);
+        conflictingContentLength_ = true; // TODO: Rename to badContentLength_.
+    } else if (clen.needsSanitizing) {
+        // RFC 7230 section 3.3.2: MUST either reject or ... [sanitize];
+        // ensure our callers see a clean Content-Length value or none at all
+        delById(HDR_CONTENT_LENGTH);
+        if (clen.sawGood) {
+            putInt64(HDR_CONTENT_LENGTH, clen.value);
+            debugs(55, 5, "sanitized Content-Length to be " << clen.value);
+        }
+    }
+
     if (chunked()) {
         // RFC 2616 section 4.4: ignore Content-Length with Transfer-Encoding
         delById(HDR_CONTENT_LENGTH);
@@ -1722,6 +1724,7 @@ HttpHeaderEntry::getInt() const
     assert_eid (id);
     assert (Headers[id].type == ftInt);
     int val = -1;
+
     int ok = httpHeaderParseInt(value.termedBuf(), &val);
     httpHeaderNoteParsedEntry(id, value, !ok);
     /* XXX: Should we check ok - ie
@@ -1733,15 +1736,11 @@ HttpHeaderEntry::getInt() const
 int64_t
 HttpHeaderEntry::getInt64() const
 {
-    assert_eid (id);
-    assert (Headers[id].type == ftInt64);
     int64_t val = -1;
-    int ok = httpHeaderParseOffset(value.termedBuf(), &val);
-    httpHeaderNoteParsedEntry(id, value, !ok);
-    /* XXX: Should we check ok - ie
-     * return ok ? -1 : value;
-     */
-    return val;
+
+    const bool ok = httpHeaderParseOffset(value.termedBuf(), &val);
+    httpHeaderNoteParsedEntry(id, value, ok);
+    return val; // remains -1 if !ok (XXX: bad method API)
 }
 
 static void
diff --git a/src/HttpHeaderTools.cc b/src/HttpHeaderTools.cc
index d8c29d8..02087cd 100644
--- a/src/HttpHeaderTools.cc
+++ b/src/HttpHeaderTools.cc
@@ -188,6 +188,33 @@ httpHeaderParseInt(const char *start, int *value)
     return 1;
 }
 
+bool
+httpHeaderParseOffset(const char *start, int64_t *value, char **endPtr)
+{
+    char *end = nullptr;
+    errno = 0;
+
+    const int64_t res = strtoll(start, &end, 10);
+    if (errno && !res) {
+        debugs(66, 7, "failed to parse malformed offset in " << start);
+        return false;
+    }
+    if (errno == ERANGE && (res == LLONG_MIN || res == LLONG_MAX)) { // no overflow
+        debugs(66, 7, "failed to parse huge offset in " << start);
+        return false;
+    }
+    if (start == end) {
+        debugs(66, 7, "failed to parse empty offset");
+        return false;
+    }
+    *value = res;
+    if (endPtr)
+        *endPtr = end;
+    debugs(66, 7, "offset " << start << " parsed as " << res);
+    return true;
+}
+
+
 int
 httpHeaderParseOffset(const char *start, int64_t * value)
 {
diff --git a/src/HttpHeaderTools.h b/src/HttpHeaderTools.h
index 509d940..2d97ad4 100644
--- a/src/HttpHeaderTools.h
+++ b/src/HttpHeaderTools.h
@@ -113,7 +113,13 @@ public:
     bool quoted;
 };
 
-int httpHeaderParseOffset(const char *start, int64_t * off);
+/// A strtoll(10) wrapper that checks for strtoll() failures and other problems.
+/// XXX: This function is not fully compatible with some HTTP syntax rules.
+/// Just like strtoll(), allows whitespace prefix, a sign, and _any_ suffix.
+/// Requires at least one digit to be present.
+/// Sets "off" and "end" arguments if and only if no problems were found.
+/// \return true if and only if no problems were found.
+bool httpHeaderParseOffset(const char *start, int64_t *offPtr, char **endPtr = nullptr);
 
 HttpHeaderFieldInfo *httpHeaderBuildFieldsInfo(const HttpHeaderFieldAttrs * attrs, int count);
 void httpHeaderDestroyFieldsInfo(HttpHeaderFieldInfo * info, int count);
diff --git a/src/http/ContentLengthInterpreter.cc b/src/http/ContentLengthInterpreter.cc
new file mode 100644
index 0000000..1d40f4a
--- /dev/null
+++ b/src/http/ContentLengthInterpreter.cc
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+/* DEBUG: section 55    HTTP Header */
+
+#include "squid.h"
+#include "base/CharacterSet.h"
+#include "Debug.h"
+#include "http/ContentLengthInterpreter.h"
+#include "HttpHeaderTools.h"
+#include "SquidConfig.h"
+#include "SquidString.h"
+#include "StrList.h"
+
+Http::ContentLengthInterpreter::ContentLengthInterpreter(const int aDebugLevel):
+    value(-1),
+    headerWideProblem(nullptr),
+    debugLevel(aDebugLevel),
+    sawBad(false),
+    needsSanitizing(false),
+    sawGood(false)
+{
+}
+
+/// characters HTTP permits tolerant parsers to accept as delimiters
+static const CharacterSet &
+RelaxedDelimiterCharacters()
+{
+    // RFC 7230 section 3.5
+    // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
+    // or bare CR as whitespace between request-line fields
+    static const CharacterSet RelaxedDels =
+        (CharacterSet::SP +
+         CharacterSet::HTAB +
+         CharacterSet("VT,FF","\x0B\x0C") +
+         CharacterSet::CR).rename("relaxed-WSP");
+
+    return RelaxedDels;
+}
+
+const CharacterSet &
+Http::ContentLengthInterpreter::WhitespaceCharacters()
+{
+    return Config.onoff.relaxed_header_parser ?
+           RelaxedDelimiterCharacters() : CharacterSet::WSP;
+}
+
+const CharacterSet &
+Http::ContentLengthInterpreter::DelimiterCharacters()
+{
+    return Config.onoff.relaxed_header_parser ?
+           RelaxedDelimiterCharacters() : CharacterSet::SP;
+}
+
+/// checks whether all characters before the Content-Length number are allowed
+/// \returns the start of the digit sequence (or nil on errors)
+const char *
+Http::ContentLengthInterpreter::findDigits(const char *prefix, const char * const valueEnd) const
+{
+    // skip leading OWS in RFC 7230's `OWS field-value OWS`
+    const CharacterSet &whitespace = WhitespaceCharacters();
+    while (prefix < valueEnd) {
+        const auto ch = *prefix;
+        if (CharacterSet::DIGIT[ch])
+            return prefix; // common case: a pre-trimmed field value
+        if (!whitespace[ch])
+            return nullptr; // (trimmed) length does not start with a digit
+        ++prefix;
+    }
+    return nullptr; // empty or whitespace-only value
+}
+
+/// checks whether all characters after the Content-Length are allowed
+bool
+Http::ContentLengthInterpreter::goodSuffix(const char *suffix, const char * const end) const
+{
+    // optimize for the common case that does not need delimiters
+    if (suffix == end)
+        return true;
+
+    for (const CharacterSet &delimiters = DelimiterCharacters();
+            suffix < end; ++suffix) {
+        if (!delimiters[*suffix])
+            return false;
+    }
+    // needsSanitizing = true; // TODO: Always remove trailing whitespace?
+    return true; // including empty suffix
+}
+
+/// handles a single-token Content-Length value
+/// rawValue null-termination requirements are those of httpHeaderParseOffset()
+bool
+Http::ContentLengthInterpreter::checkValue(const char *rawValue, const int valueSize)
+{
+    Must(!sawBad);
+
+    const auto valueEnd = rawValue + valueSize;
+
+    const auto digits = findDigits(rawValue, valueEnd);
+    if (!digits) {
+        debugs(55, debugLevel, "WARNING: Leading garbage or empty value in" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    int64_t latestValue = -1;
+    char *suffix = nullptr;
+
+    if (!httpHeaderParseOffset(digits, &latestValue, &suffix)) {
+        debugs(55, DBG_IMPORTANT, "WARNING: Malformed" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    if (latestValue < 0) {
+        debugs(55, debugLevel, "WARNING: Negative" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    // check for garbage after the number
+    if (!goodSuffix(suffix, valueEnd)) {
+        debugs(55, debugLevel, "WARNING: Trailing garbage in" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    if (sawGood) {
+        /* we have found at least two, possibly identical values */
+
+        needsSanitizing = true; // replace identical values with a single value
+
+        const bool conflicting = value != latestValue;
+        if (conflicting)
+            headerWideProblem = "Conflicting"; // overwrite any lesser problem
+        else if (!headerWideProblem) // preserve a possibly worse problem
+            headerWideProblem = "Duplicate";
+
+        // with relaxed_header_parser, identical values are permitted
+        sawBad = !Config.onoff.relaxed_header_parser || conflicting;
+        return false; // conflicting or duplicate
+    }
+
+    sawGood = true;
+    value = latestValue;
+    return true;
+}
+
+/// handles Content-Length: a, b, c
+bool
+Http::ContentLengthInterpreter::checkList(const String &list)
+{
+    Must(!sawBad);
+
+    if (!Config.onoff.relaxed_header_parser) {
+        debugs(55, debugLevel, "WARNING: List-like" << Raw("Content-Length", list.rawBuf(), list.size()));
+        sawBad = true;
+        return false;
+    }
+
+    needsSanitizing = true; // remove extra commas (at least)
+
+    const char *pos = nullptr;
+    const char *item = nullptr;;
+    int ilen = -1;
+    while (strListGetItem(&list, ',', &item, &ilen, &pos)) {
+        if (!checkValue(item, ilen) && sawBad)
+            break;
+        // keep going after a duplicate value to find conflicting ones
+    }
+    return false; // no need to keep this list field; it will be sanitized away
+}
+
+bool
+Http::ContentLengthInterpreter::checkField(const String &rawValue)
+{
+    if (sawBad)
+        return false; // one rotten apple is enough to spoil all of them
+
+    // TODO: Optimize by always parsing the first integer first.
+    return rawValue.pos(',') ?
+           checkList(rawValue) :
+           checkValue(rawValue.rawBuf(), rawValue.size());
+}
+
diff --git a/src/http/ContentLengthInterpreter.h b/src/http/ContentLengthInterpreter.h
new file mode 100644
index 0000000..ba7080c
--- /dev/null
+++ b/src/http/ContentLengthInterpreter.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H
+#define SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H
+
+class String;
+
+namespace Http
+{
+
+/// Finds the intended Content-Length value while parsing message-header fields.
+/// Deals with complications such as value lists and/or repeated fields.
+class ContentLengthInterpreter
+{
+public:
+    explicit ContentLengthInterpreter(const int aDebugLevel);
+
+    /// updates history based on the given message-header field
+    /// \return true iff the field should be added/remembered for future use
+    bool checkField(const String &field);
+
+    /// intended Content-Length value if sawGood is set and sawBad is not set
+    /// meaningless otherwise
+    int64_t value;
+
+    /* for debugging (declared here to minimize padding) */
+    const char *headerWideProblem; ///< worst header-wide problem found (or nil)
+    const int debugLevel; ///< debugging level for certain warnings
+
+    /// whether a malformed Content-Length value was present
+    bool sawBad;
+
+    /// whether all remembered fields should be removed
+    /// removed fields ought to be replaced with the intended value (if known)
+    /// irrelevant if sawBad is set
+    bool needsSanitizing;
+
+    /// whether a valid field value was present, possibly among problematic ones
+    /// irrelevant if sawBad is set
+    bool sawGood;
+
+    /// Whitespace between protocol elements in restricted contexts like
+    /// request line, status line, asctime-date, and credentials
+    /// Seen in RFCs as SP but may be "relaxed" by us.
+    /// See also: WhitespaceCharacters().
+    /// XXX: Misnamed and overused.
+    static const CharacterSet &DelimiterCharacters();
+
+    static const CharacterSet &WhitespaceCharacters();
+protected:
+    const char *findDigits(const char *prefix, const char *valueEnd) const;
+    bool goodSuffix(const char *suffix, const char * const end) const;
+    bool checkValue(const char *start, const int size);
+    bool checkList(const String &list);
+};
+
+} // namespace Http
+
+#endif /* SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H */
+
diff --git a/src/http/Makefile.am b/src/http/Makefile.am
index 7887ef0..78b503e 100644
--- a/src/http/Makefile.am
+++ b/src/http/Makefile.am
@@ -11,6 +11,8 @@ include $(top_srcdir)/src/TestHeaders.am
 noinst_LTLIBRARIES = libsquid-http.la
 
 libsquid_http_la_SOURCES = \
+	ContentLengthInterpreter.cc \
+	ContentLengthInterpreter.h \
 	MethodType.cc \
 	MethodType.h \
 	ProtocolVersion.h \
diff --git a/src/http/Makefile.in b/src/http/Makefile.in
index f5b62fb..c7891ae 100644
--- a/src/http/Makefile.in
+++ b/src/http/Makefile.in
@@ -160,7 +160,7 @@ CONFIG_CLEAN_VPATH_FILES =
 LTLIBRARIES = $(noinst_LTLIBRARIES)
 libsquid_http_la_LIBADD =
 am_libsquid_http_la_OBJECTS = MethodType.lo StatusCode.lo \
-	StatusLine.lo
+	StatusLine.lo ContentLengthInterpreter.lo
 libsquid_http_la_OBJECTS = $(am_libsquid_http_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -694,6 +694,8 @@ COMPAT_LIB = $(top_builddir)/compat/libcompat-squid.la $(LIBPROFILER)
 subst_perlshell = sed -e 's,[@]PERL[@],$(PERL),g' <$(srcdir)/$@.pl.in >$@ || ($(RM) -f $@ ; exit 1)
 noinst_LTLIBRARIES = libsquid-http.la
 libsquid_http_la_SOURCES = \
+	ContentLengthInterpreter.cc \
+	ContentLengthInterpreter.h \
 	MethodType.cc \
 	MethodType.h \
 	ProtocolVersion.h \
-- 
2.21.0