Blob Blame History Raw
Index: bogofilter/src/lexer.c
===================================================================
--- bogofilter/src/lexer.c	(revision 7029)
+++ bogofilter/src/lexer.c	(revision 7030)
@@ -329,7 +329,7 @@
 	count += cnt;
 
 	/* Note: some malformed messages can cause xfgetsl() to report
-	** "Invalid buffer size, exiting."  ** and then abort.  This
+	** "Invalid buffer size, exiting."  and then abort.  This
 	** can happen when the parser is in html mode and there's a
 	** leading '<' but no closing '>'.
 	**
@@ -343,9 +343,12 @@
 
 	if (count >= MAX_TOKEN_LEN * 2 && 
 	    long_token(buff.t.u.text, (uint) count)) {
-	    uint start = buff.t.leng - count;
-	    uint length = count - max_token_len;
-	    buff_shift(&buff, start, length);
+	    /* Make sure not to shift bytes outside the buffer */
+	    if (buff.t.leng >= (uint) count) {
+		    uint start = buff.t.leng - count;
+		    uint length = count - max_token_len;
+		    buff_shift(&buff, start, length);
+	    }
 	    count = buff.t.leng;
 	}
 	else
Index: bogofilter/NEWS
===================================================================
--- bogofilter/NEWS	(revision 7029)
+++ bogofilter/NEWS	(revision 7030)
@@ -15,6 +15,14 @@
 
 -------------------------------------------------------------------------------
 
+	2015-02-28
+
+	* Fix the lexer to not try to delete parts from HTML tokens if it is
+	  reading garbage (for instance, binary files misdeclared as HTML).
+	  This was exposed on Fedora 20 and 21 but not Ubuntu 14.04 (x86_64),
+	  and is possibly related to its newer flex 2.5.37 that may have
+	  changed the way it uses yyinput() a bit.  Reported by Matt Garretson.
+
 	2015-02-25
 
 	* Fix the lexer to handle MIME multipart messages properly when the