Blame SOURCES/mailx-12.5-encsplit.patch

c6c8a7
diff --git a/mime.c b/mime.c
c6c8a7
index 45de80a..f9fbadf 100644
c6c8a7
--- a/mime.c
c6c8a7
+++ b/mime.c
c6c8a7
@@ -1109,16 +1109,34 @@ fromhdr_end:
c6c8a7
 }
c6c8a7
 
c6c8a7
 /*
c6c8a7
+ * return length of this UTF-8 codepoint in bytes
c6c8a7
+ */
c6c8a7
+static size_t
c6c8a7
+codepointsize(char tc)
c6c8a7
+{
c6c8a7
+	int rv = 0;
c6c8a7
+	if ( ! ( tc & 0x80 ) )
c6c8a7
+		return 1;
c6c8a7
+	while ( tc & 0x80 )
c6c8a7
+	{
c6c8a7
+		rv++;
c6c8a7
+		tc = tc<<1;
c6c8a7
+	}
c6c8a7
+	return rv;
c6c8a7
+}
c6c8a7
+
c6c8a7
+/*
c6c8a7
  * Convert header fields to RFC 1522 format and write to the file fo.
c6c8a7
  */
c6c8a7
 static size_t
c6c8a7
 mime_write_tohdr(struct str *in, FILE *fo)
c6c8a7
 {
c6c8a7
 	char *upper, *wbeg, *wend, *charset, *lastwordend = NULL, *lastspc, b,
c6c8a7
-		*charset7;
c6c8a7
+		*charset7, *cp;
c6c8a7
 	struct str cin, cout;
c6c8a7
-	size_t sz = 0, col = 0, wr, charsetlen, charset7len;
c6c8a7
+	size_t sz = 0, col = 0, wr, charsetlen, charset7len, cpsz;
c6c8a7
 	int quoteany, mustquote, broken,
c6c8a7
+		maxin, maxout, curin, cps,
c6c8a7
 		maxcol = 65 /* there is the header field's name, too */;
c6c8a7
 
c6c8a7
 	upper = in->s + in->l;
c6c8a7
@@ -1134,41 +1152,75 @@ mime_write_tohdr(struct str *in, FILE *fo)
c6c8a7
 		if (mustquote_hdr(wbeg, wbeg == in->s, wbeg == &upper[-1]))
c6c8a7
 			quoteany++;
c6c8a7
 	}
c6c8a7
+
c6c8a7
+	/*
c6c8a7
+	 * rfc2047 says we cannot split multi-byte characters over
c6c8a7
+	 * encoded words, so we need to know if we're a multi-byte
c6c8a7
+	 * source stream (UTF-8 specifically) or just an 8 bit
c6c8a7
+	 * stream like ISO-8859-15
c6c8a7
+	 * so test beginning of charset since it is valid to include
c6c8a7
+	 * language in charset "UTF-8*DE" etc as per rfc 2184/2231
c6c8a7
+	 */
c6c8a7
+	char *thisset = b&0200 ? charset : charset7;
c6c8a7
+	int is_utf8 = ( strncasecmp( thisset, "utf-8", 5 ) == 0 );
c6c8a7
+
c6c8a7
 	if (2 * quoteany > in->l) {
c6c8a7
 		/*
c6c8a7
 		 * Print the entire field in base64.
c6c8a7
 		 */
c6c8a7
-		for (wbeg = in->s; wbeg < upper; wbeg = wend) {
c6c8a7
+		for (wbeg = in->s; wbeg < upper; ) {
c6c8a7
 			wend = upper;
c6c8a7
 			cin.s = wbeg;
c6c8a7
-			for (;;) {
c6c8a7
-				cin.l = wend - wbeg;
c6c8a7
-				if (cin.l * 4/3 + 7 + charsetlen
c6c8a7
-						< maxcol - col) {
c6c8a7
-					fprintf(fo, "=?%s?B?",
c6c8a7
-						b&0200 ? charset : charset7);
c6c8a7
-					wr = mime_write_tob64(&cin, fo, 1);
c6c8a7
-					fwrite("?=", sizeof (char), 2, fo);
c6c8a7
-					wr += 7 + charsetlen;
c6c8a7
-					sz += wr, col += wr;
c6c8a7
-					if (wend < upper) {
c6c8a7
-						fwrite("\n ", sizeof (char),
c6c8a7
-								2, fo);
c6c8a7
-						sz += 2;
c6c8a7
-						col = 0;
c6c8a7
-						maxcol = 76;
c6c8a7
+			/*
c6c8a7
+			 * we calculate the maximum number of bytes
c6c8a7
+			 * we can use on this output line, and then what
c6c8a7
+			 * this equates to as base64 encoded source bytes
c6c8a7
+			 */
c6c8a7
+			maxout = maxcol - col - 7 - charsetlen;
c6c8a7
+			maxin = (maxout - (maxout & 0x03)) * 3/4;
c6c8a7
+
c6c8a7
+			/* short enough to finish ? */
c6c8a7
+			if (maxin > upper - wbeg )
c6c8a7
+			{
c6c8a7
+				curin = upper - wbeg;
c6c8a7
+				wbeg += curin;
c6c8a7
+			}else
c6c8a7
+			{
c6c8a7
+				if (is_utf8)
c6c8a7
+				{
c6c8a7
+					/*
c6c8a7
+					 * now scan the input from the beginning
c6c8a7
+					 * to see how many codepoints will fit
c6c8a7
+					 */
c6c8a7
+					curin = 0;
c6c8a7
+					while (curin < maxin
c6c8a7
+						&& (cpsz = codepointsize(*wbeg)) <= (maxin - curin))
c6c8a7
+					{
c6c8a7
+						curin += cpsz;
c6c8a7
+						wbeg += cpsz;
c6c8a7
 					}
c6c8a7
-					break;
c6c8a7
-				} else {
c6c8a7
-					if (col) {
c6c8a7
-						fprintf(fo, "\n ");
c6c8a7
-						sz += 2;
c6c8a7
-						col = 0;
c6c8a7
-						maxcol = 76;
c6c8a7
-					} else
c6c8a7
-						wend -= 4;
c6c8a7
+				}else
c6c8a7
+				{
c6c8a7
+					curin = maxin;
c6c8a7
+					wbeg += maxin;
c6c8a7
 				}
c6c8a7
 			}
c6c8a7
+			cin.l = curin;
c6c8a7
+			fprintf(fo, "%s=?%s?B?", (cin.s != in->s) ? " " : "", thisset );
c6c8a7
+			wr = mime_write_tob64(&cin, fo, 1);
c6c8a7
+
c6c8a7
+			if (wbeg < upper)
c6c8a7
+			{
c6c8a7
+				wr += fwrite("?=\n ", sizeof (char), 4, fo) * sizeof (char);
c6c8a7
+			}else
c6c8a7
+			{
c6c8a7
+				wr += fwrite("?=", sizeof (char), 2, fo) * sizeof (char);
c6c8a7
+			}
c6c8a7
+
c6c8a7
+			/* and shuffle pointers and counts */
c6c8a7
+			col = 1;
c6c8a7
+			maxcol = 76;
c6c8a7
+			sz += wr + 7 + charsetlen + ((cin.s != in->s) ? 1 : 0 );
c6c8a7
 		}
c6c8a7
 	} else {
c6c8a7
 		/*
c6c8a7
@@ -1243,7 +1295,29 @@ mime_write_tohdr(struct str *in, FILE *fo)
c6c8a7
 								maxcol -= wbeg -
c6c8a7
 									lastspc;
c6c8a7
 						} else {
c6c8a7
-							wend -= 4;
c6c8a7
+							if (is_utf8)
c6c8a7
+							{
c6c8a7
+								/*
c6c8a7
+								 * make sure wend is not pointing to
c6c8a7
+								 * the middle of a codepoint
c6c8a7
+								 */
c6c8a7
+								cp = wend;
c6c8a7
+								while (--cp > wbeg)
c6c8a7
+								{
c6c8a7
+									cps = codepointsize(*cp);
c6c8a7
+									if (cps > 1)
c6c8a7
+									{
c6c8a7
+										if (wend - cp - cps > 4)
c6c8a7
+											wend -= 4;
c6c8a7
+										else
c6c8a7
+											wend = cp;
c6c8a7
+										break;
c6c8a7
+									}
c6c8a7
+								}
c6c8a7
+								if (cp == wbeg)
c6c8a7
+									wend -= 4;
c6c8a7
+							} else
c6c8a7
+								wend -= 4;
c6c8a7
 						}
c6c8a7
 						free(cout.s);
c6c8a7
 					}