|
|
c88ac0 |
diff --git a/mime.c b/mime.c
|
|
|
c88ac0 |
index 45de80a..f9fbadf 100644
|
|
|
c88ac0 |
--- a/mime.c
|
|
|
c88ac0 |
+++ b/mime.c
|
|
|
c88ac0 |
@@ -1109,16 +1109,34 @@ fromhdr_end:
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
|
|
|
c88ac0 |
/*
|
|
|
c88ac0 |
+ * return length of this UTF-8 codepoint in bytes
|
|
|
c88ac0 |
+ */
|
|
|
c88ac0 |
+static size_t
|
|
|
c88ac0 |
+codepointsize(char tc)
|
|
|
c88ac0 |
+{
|
|
|
c88ac0 |
+ int rv = 0;
|
|
|
c88ac0 |
+ if ( ! ( tc & 0x80 ) )
|
|
|
c88ac0 |
+ return 1;
|
|
|
c88ac0 |
+ while ( tc & 0x80 )
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ rv++;
|
|
|
c88ac0 |
+ tc = tc<<1;
|
|
|
c88ac0 |
+ }
|
|
|
c88ac0 |
+ return rv;
|
|
|
c88ac0 |
+}
|
|
|
c88ac0 |
+
|
|
|
c88ac0 |
+/*
|
|
|
c88ac0 |
* Convert header fields to RFC 1522 format and write to the file fo.
|
|
|
c88ac0 |
*/
|
|
|
c88ac0 |
static size_t
|
|
|
c88ac0 |
mime_write_tohdr(struct str *in, FILE *fo)
|
|
|
c88ac0 |
{
|
|
|
c88ac0 |
char *upper, *wbeg, *wend, *charset, *lastwordend = NULL, *lastspc, b,
|
|
|
c88ac0 |
- *charset7;
|
|
|
c88ac0 |
+ *charset7, *cp;
|
|
|
c88ac0 |
struct str cin, cout;
|
|
|
c88ac0 |
- size_t sz = 0, col = 0, wr, charsetlen, charset7len;
|
|
|
c88ac0 |
+ size_t sz = 0, col = 0, wr, charsetlen, charset7len, cpsz;
|
|
|
c88ac0 |
int quoteany, mustquote, broken,
|
|
|
c88ac0 |
+ maxin, maxout, curin, cps,
|
|
|
c88ac0 |
maxcol = 65 /* there is the header field's name, too */;
|
|
|
c88ac0 |
|
|
|
c88ac0 |
upper = in->s + in->l;
|
|
|
c88ac0 |
@@ -1134,41 +1152,75 @@ mime_write_tohdr(struct str *in, FILE *fo)
|
|
|
c88ac0 |
if (mustquote_hdr(wbeg, wbeg == in->s, wbeg == &upper[-1]))
|
|
|
c88ac0 |
quoteany++;
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
+
|
|
|
c88ac0 |
+ /*
|
|
|
c88ac0 |
+ * rfc2047 says we cannot split multi-byte characters over
|
|
|
c88ac0 |
+ * encoded words, so we need to know if we're a multi-byte
|
|
|
c88ac0 |
+ * source stream (UTF-8 specifically) or just an 8 bit
|
|
|
c88ac0 |
+ * stream like ISO-8859-15
|
|
|
c88ac0 |
+ * so test beginning of charset since it is valid to include
|
|
|
c88ac0 |
+ * language in charset "UTF-8*DE" etc as per rfc 2184/2231
|
|
|
c88ac0 |
+ */
|
|
|
c88ac0 |
+ char *thisset = b&0200 ? charset : charset7;
|
|
|
c88ac0 |
+ int is_utf8 = ( strncasecmp( thisset, "utf-8", 5 ) == 0 );
|
|
|
c88ac0 |
+
|
|
|
c88ac0 |
if (2 * quoteany > in->l) {
|
|
|
c88ac0 |
/*
|
|
|
c88ac0 |
* Print the entire field in base64.
|
|
|
c88ac0 |
*/
|
|
|
c88ac0 |
- for (wbeg = in->s; wbeg < upper; wbeg = wend) {
|
|
|
c88ac0 |
+ for (wbeg = in->s; wbeg < upper; ) {
|
|
|
c88ac0 |
wend = upper;
|
|
|
c88ac0 |
cin.s = wbeg;
|
|
|
c88ac0 |
- for (;;) {
|
|
|
c88ac0 |
- cin.l = wend - wbeg;
|
|
|
c88ac0 |
- if (cin.l * 4/3 + 7 + charsetlen
|
|
|
c88ac0 |
- < maxcol - col) {
|
|
|
c88ac0 |
- fprintf(fo, "=?%s?B?",
|
|
|
c88ac0 |
- b&0200 ? charset : charset7);
|
|
|
c88ac0 |
- wr = mime_write_tob64(&cin, fo, 1);
|
|
|
c88ac0 |
- fwrite("?=", sizeof (char), 2, fo);
|
|
|
c88ac0 |
- wr += 7 + charsetlen;
|
|
|
c88ac0 |
- sz += wr, col += wr;
|
|
|
c88ac0 |
- if (wend < upper) {
|
|
|
c88ac0 |
- fwrite("\n ", sizeof (char),
|
|
|
c88ac0 |
- 2, fo);
|
|
|
c88ac0 |
- sz += 2;
|
|
|
c88ac0 |
- col = 0;
|
|
|
c88ac0 |
- maxcol = 76;
|
|
|
c88ac0 |
+ /*
|
|
|
c88ac0 |
+ * we calculate the maximum number of bytes
|
|
|
c88ac0 |
+ * we can use on this output line, and then what
|
|
|
c88ac0 |
+ * this equates to as base64 encoded source bytes
|
|
|
c88ac0 |
+ */
|
|
|
c88ac0 |
+ maxout = maxcol - col - 7 - charsetlen;
|
|
|
c88ac0 |
+ maxin = (maxout - (maxout & 0x03)) * 3/4;
|
|
|
c88ac0 |
+
|
|
|
c88ac0 |
+ /* short enough to finish ? */
|
|
|
c88ac0 |
+ if (maxin > upper - wbeg )
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ curin = upper - wbeg;
|
|
|
c88ac0 |
+ wbeg += curin;
|
|
|
c88ac0 |
+ }else
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ if (is_utf8)
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ /*
|
|
|
c88ac0 |
+ * now scan the input from the beginning
|
|
|
c88ac0 |
+ * to see how many codepoints will fit
|
|
|
c88ac0 |
+ */
|
|
|
c88ac0 |
+ curin = 0;
|
|
|
c88ac0 |
+ while (curin < maxin
|
|
|
c88ac0 |
+ && (cpsz = codepointsize(*wbeg)) <= (maxin - curin))
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ curin += cpsz;
|
|
|
c88ac0 |
+ wbeg += cpsz;
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
- break;
|
|
|
c88ac0 |
- } else {
|
|
|
c88ac0 |
- if (col) {
|
|
|
c88ac0 |
- fprintf(fo, "\n ");
|
|
|
c88ac0 |
- sz += 2;
|
|
|
c88ac0 |
- col = 0;
|
|
|
c88ac0 |
- maxcol = 76;
|
|
|
c88ac0 |
- } else
|
|
|
c88ac0 |
- wend -= 4;
|
|
|
c88ac0 |
+ }else
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ curin = maxin;
|
|
|
c88ac0 |
+ wbeg += maxin;
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
+ cin.l = curin;
|
|
|
c88ac0 |
+ fprintf(fo, "%s=?%s?B?", (cin.s != in->s) ? " " : "", thisset );
|
|
|
c88ac0 |
+ wr = mime_write_tob64(&cin, fo, 1);
|
|
|
c88ac0 |
+
|
|
|
c88ac0 |
+ if (wbeg < upper)
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ wr += fwrite("?=\n ", sizeof (char), 4, fo) * sizeof (char);
|
|
|
c88ac0 |
+ }else
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ wr += fwrite("?=", sizeof (char), 2, fo) * sizeof (char);
|
|
|
c88ac0 |
+ }
|
|
|
c88ac0 |
+
|
|
|
c88ac0 |
+ /* and shuffle pointers and counts */
|
|
|
c88ac0 |
+ col = 1;
|
|
|
c88ac0 |
+ maxcol = 76;
|
|
|
c88ac0 |
+ sz += wr + 7 + charsetlen + ((cin.s != in->s) ? 1 : 0 );
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
} else {
|
|
|
c88ac0 |
/*
|
|
|
c88ac0 |
@@ -1243,7 +1295,29 @@ mime_write_tohdr(struct str *in, FILE *fo)
|
|
|
c88ac0 |
maxcol -= wbeg -
|
|
|
c88ac0 |
lastspc;
|
|
|
c88ac0 |
} else {
|
|
|
c88ac0 |
- wend -= 4;
|
|
|
c88ac0 |
+ if (is_utf8)
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ /*
|
|
|
c88ac0 |
+ * make sure wend is not pointing to
|
|
|
c88ac0 |
+ * the middle of a codepoint
|
|
|
c88ac0 |
+ */
|
|
|
c88ac0 |
+ cp = wend;
|
|
|
c88ac0 |
+ while (--cp > wbeg)
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ cps = codepointsize(*cp);
|
|
|
c88ac0 |
+ if (cps > 1)
|
|
|
c88ac0 |
+ {
|
|
|
c88ac0 |
+ if (wend - cp - cps > 4)
|
|
|
c88ac0 |
+ wend -= 4;
|
|
|
c88ac0 |
+ else
|
|
|
c88ac0 |
+ wend = cp;
|
|
|
c88ac0 |
+ break;
|
|
|
c88ac0 |
+ }
|
|
|
c88ac0 |
+ }
|
|
|
c88ac0 |
+ if (cp == wbeg)
|
|
|
c88ac0 |
+ wend -= 4;
|
|
|
c88ac0 |
+ } else
|
|
|
c88ac0 |
+ wend -= 4;
|
|
|
c88ac0 |
}
|
|
|
c88ac0 |
free(cout.s);
|
|
|
c88ac0 |
}
|