diff -ur a/git-compat-util.h b/git-compat-util.h --- a/git-compat-util.h 2023-02-01 16:15:48.875070563 +0100 +++ b/git-compat-util.h 2023-02-21 11:27:58.038145942 +0100 @@ -543,6 +543,13 @@ return a - b; } +static inline int cast_size_t_to_int(size_t a) +{ + if (a > INT_MAX) + die("number too large to represent as int on this platform: %"PRIuMAX, + (uintmax_t)a); + return (int)a; +} extern char *xstrdup(const char *str); extern void *xmalloc(size_t size); diff -ur a/pretty.c b/pretty.c --- a/pretty.c 2013-06-10 22:01:55.000000000 +0200 +++ b/pretty.c 2023-02-21 11:18:11.584775725 +0100 @@ -11,6 +11,13 @@ #include "reflog-walk.h" #include "gpg-interface.h" +/* + * The limit for formatting directives, which enable the caller to append + * arbitrarily many bytes to the formatted buffer. This includes padding + * and wrapping formatters. + */ +#define FORMATTING_LIMIT (16 * 1024) + static char *user_format; static struct cmt_fmt_map { const char *name; @@ -917,7 +924,9 @@ if (pos) strbuf_add(&tmp, sb->buf, pos); strbuf_add_wrapped_text(&tmp, sb->buf + pos, - (int) indent1, (int) indent2, (int) width); + cast_size_t_to_int(indent1), + cast_size_t_to_int(indent2), + cast_size_t_to_int(width)); strbuf_swap(&tmp, sb); strbuf_release(&tmp); } @@ -1030,9 +1039,18 @@ const char *end = start + strcspn(start, ",)"); char *next; int width; - if (!end || end == start) + if (!*end || end == start) return 0; width = strtoul(start, &next, 10); + + /* + * We need to limit the amount of padding, or otherwise this + * would allow the user to pad the buffer by arbitrarily many + * bytes and thus cause resource exhaustion. + */ + if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT) + return 0; + if (next == start || width == 0) return 0; c->padding = to_column ? -width : width; @@ -1119,6 +1137,16 @@ if (*next != ')') return 0; } + + /* + * We need to limit the format here as it allows the + * user to prepend arbitrarily many bytes to the buffer + * when rewrapping. + */ + if (width > FORMATTING_LIMIT || + indent1 > FORMATTING_LIMIT || + indent2 > FORMATTING_LIMIT) + return 0; rewrap_message_tail(sb, c, width, indent1, indent2); return end - placeholder + 1; } else @@ -1296,18 +1324,20 @@ struct format_commit_context *c) { struct strbuf local_sb = STRBUF_INIT; - int total_consumed = 0, len, padding = c->padding; + size_t total_consumed = 0; + int len, padding = c->padding; + if (padding < 0) { const char *start = strrchr(sb->buf, '\n'); int occupied; if (!start) start = sb->buf; - occupied = utf8_strnwidth(start, -1, 1); + occupied = utf8_strnwidth(start, strlen(start), 1); padding = (-padding) - occupied; } while (1) { int modifier = *placeholder == 'C'; - int consumed = format_commit_one(&local_sb, placeholder, c); + size_t consumed = format_commit_one(&local_sb, placeholder, c); total_consumed += consumed; if (!modifier) @@ -1319,7 +1349,7 @@ placeholder++; total_consumed++; } - len = utf8_strnwidth(local_sb.buf, -1, 1); + len = utf8_strnwidth(local_sb.buf, local_sb.len, 1); if (c->flush_type == flush_left_and_steal) { const char *ch = sb->buf + sb->len - 1; @@ -1334,7 +1364,7 @@ if (*ch != 'm') break; p = ch - 1; - while (ch - p < 10 && *p != '\033') + while (p > sb->buf && ch - p < 10 && *p != '\033') p--; if (*p != '\033' || ch + 1 - p != display_mode_esc_sequence_len(p)) @@ -1373,7 +1403,7 @@ } strbuf_addstr(sb, local_sb.buf); } else { - int sb_len = sb->len, offset = 0; + size_t sb_len = sb->len, offset = 0; if (c->flush_type == flush_left) offset = padding - len; else if (c->flush_type == flush_both) @@ -1398,8 +1428,7 @@ const char *placeholder, void *context) { - int consumed; - size_t orig_len; + size_t consumed, orig_len; enum { NO_MAGIC, ADD_LF_BEFORE_NON_EMPTY, @@ -1420,9 +1449,21 @@ default: break; } - if (magic != NO_MAGIC) + if (magic != NO_MAGIC) { placeholder++; + switch (placeholder[0]) { + case 'w': + /* + * `%+w()` cannot ever expand to a non-empty string, + * and it potentially changes the layout of preceding + * contents. We're thus not able to handle the magic in + * this combination and refuse the pattern. + */ + return 0; + }; + } + orig_len = sb->len; if (((struct format_commit_context *)context)->flush_type != no_flush) consumed = format_and_pad_commit(sb, placeholder, context); diff -ur a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh --- a/t/t4205-log-pretty-formats.sh 2013-06-10 22:01:55.000000000 +0200 +++ b/t/t4205-log-pretty-formats.sh 2023-02-21 11:19:31.345309067 +0100 @@ -274,4 +274,80 @@ test_cmp expected actual ' +test_expect_success 'log --pretty with space stealing' ' + printf mm0 >expect && + git log -1 --pretty="format:mm%>>|(1)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with invalid padding format' ' + printf "%s%%<(20" "$(git rev-parse HEAD)" >expect && + git log -1 --pretty="format:%H%<(20" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with magical wrapping directives' ' + commit_id=$(git commit-tree HEAD^{tree} -m "describe me") && + git tag describe-me $commit_id && + printf "\n(tag:\ndescribe-me)%%+w(2)" >expect && + git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' ' + printf "%%w(2147483649,1,1)0" >expect && + git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,2147483649,1)0" >expect && + git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,1,2147483649)0" >expect && + git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' ' + printf "%%<(2147483649)0" >expect && + git log -1 --pretty="format:%<(2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with padding and preceding control chars' ' + printf "\20\20 0" >expect && + git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty truncation with control chars' ' + test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars && + printf "\20\20\20\20x.." >expect && + git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual && + test_cmp expect actual +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + # We only assert that this command does not crash. This needs to be + # executed with the address sanitizer to demonstrate failure. + git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' ' + test-tool genzeros 2147483649 | tr "\000" "1" >expect && + huge_commit=$(git commit-tree -F expect HEAD^{tree}) +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + git log -1 --format="%B%<(1)%x30" $huge_commit >actual && + echo 0 >>expect && + test_cmp expect actual +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' ' + test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error && + cat >expect <<-EOF && + fatal: number too large to represent as int on this platform: 2147483649 + EOF + test_cmp expect error +' + test_done diff -ur a/t/test-lib.sh b/t/test-lib.sh --- a/t/test-lib.sh 2013-06-10 22:01:55.000000000 +0200 +++ b/t/test-lib.sh 2023-02-21 11:52:24.739202530 +0100 @@ -770,6 +770,10 @@ git var GIT_AUTHOR_IDENT ' +test_lazy_prereq SIZE_T_IS_64BIT ' + test 8 -eq "$(build_option sizeof-size_t)" +' + # When the tests are run as root, permission tests will report that # things are writable when they shouldn't be. test -w / || test_set_prereq SANITY diff -ur a/utf8.c b/utf8.c --- a/utf8.c 2013-06-10 22:01:55.000000000 +0200 +++ b/utf8.c 2023-02-21 12:00:28.555925285 +0100 @@ -266,26 +266,32 @@ * string, assuming that the string is utf8. Returns strlen() instead * if the string does not look like a valid utf8 string. */ -int utf8_strnwidth(const char *string, int len, int skip_ansi) +int utf8_strnwidth(const char *string, size_t len, int skip_ansi) { - int width = 0; const char *orig = string; + size_t width = 0; - if (len == -1) - len = strlen(string); while (string && string < orig + len) { - int skip; + int glyph_width; + size_t skip; while (skip_ansi && (skip = display_mode_esc_sequence_len(string)) != 0) string += skip; - width += utf8_width(&string, NULL); + glyph_width = utf8_width(&string, NULL); + if (glyph_width > 0) + width += glyph_width; } - return string ? width : len; + + /* + * TODO: fix the interface of this function and `utf8_strwidth()` to + * return `size_t` instead of `int`. + */ + return cast_size_t_to_int(string ? width : len); } int utf8_strwidth(const char *string) { - return utf8_strnwidth(string, -1, 0); + return utf8_strnwidth(string, strlen(string), 0); } int is_utf8(const char *text) @@ -424,47 +430,52 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, const char *subst) { - struct strbuf sb_dst = STRBUF_INIT; - char *src = sb_src->buf; - char *end = src + sb_src->len; - char *dst; - int w = 0, subst_len = 0; - - if (subst) - subst_len = strlen(subst); - strbuf_grow(&sb_dst, sb_src->len + subst_len); - dst = sb_dst.buf; + const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len; + struct strbuf dst; + int w = 0; + + strbuf_init(&dst, sb_src->len); while (src < end) { - char *old; + const char *old; + int glyph_width; size_t n; while ((n = display_mode_esc_sequence_len(src))) { - memcpy(dst, src, n); + strbuf_add(&dst, src, n); src += n; - dst += n; } + + if (src >= end) + break; old = src; - n = utf8_width((const char**)&src, NULL); - if (!src) /* broken utf-8, do nothing */ - return; - if (n && w >= pos && w < pos + width) { + glyph_width = utf8_width((const char**)&src, NULL); + if (!src) /* broken utf-8, do nothing */ + goto out; + + /* + * In case we see a control character we copy it into the + * buffer, but don't add it to the width. + */ + if (glyph_width < 0) + glyph_width = 0; + + if (glyph_width && w >= pos && w < pos + width) { if (subst) { - memcpy(dst, subst, subst_len); - dst += subst_len; + strbuf_addstr(&dst, subst); subst = NULL; } - w += n; - continue; + } else { + strbuf_add(&dst, old, src - old); } - memcpy(dst, old, src - old); - dst += src - old; - w += n; - } - strbuf_setlen(&sb_dst, dst - sb_dst.buf); - strbuf_swap(sb_src, &sb_dst); - strbuf_release(&sb_dst); + + w += glyph_width; + } + + strbuf_swap(sb_src, &dst); +out: + strbuf_release(&dst); } int is_encoding_utf8(const char *name) diff -ur a/utf8.h b/utf8.h --- a/utf8.h 2013-06-10 22:01:55.000000000 +0200 +++ b/utf8.h 2023-02-21 12:00:40.186991497 +0100 @@ -5,7 +5,7 @@ size_t display_mode_esc_sequence_len(const char *s); int utf8_width(const char **start, size_t *remainder_p); -int utf8_strnwidth(const char *string, int len, int skip_ansi); +int utf8_strnwidth(const char *string, size_t len, int skip_ansi); int utf8_strwidth(const char *string); int is_utf8(const char *text); int is_encoding_utf8(const char *name);