Blob Blame History Raw
diff -ur a/git-compat-util.h b/git-compat-util.h
--- a/git-compat-util.h	2023-02-01 16:15:48.875070563 +0100
+++ b/git-compat-util.h	2023-02-21 11:27:58.038145942 +0100
@@ -543,6 +543,13 @@
 	return a - b;
 }
 
+static inline int cast_size_t_to_int(size_t a)
+{
+	if (a > INT_MAX)
+		die("number too large to represent as int on this platform: %"PRIuMAX,
+		    (uintmax_t)a);
+	return (int)a;
+}
 
 extern char *xstrdup(const char *str);
 extern void *xmalloc(size_t size);
diff -ur a/pretty.c b/pretty.c
--- a/pretty.c	2013-06-10 22:01:55.000000000 +0200
+++ b/pretty.c	2023-02-21 11:18:11.584775725 +0100
@@ -11,6 +11,13 @@
 #include "reflog-walk.h"
 #include "gpg-interface.h"
 
+/*
+ * The limit for formatting directives, which enable the caller to append
+ * arbitrarily many bytes to the formatted buffer. This includes padding
+ * and wrapping formatters.
+ */
+#define FORMATTING_LIMIT (16 * 1024)
+
 static char *user_format;
 static struct cmt_fmt_map {
 	const char *name;
@@ -917,7 +924,9 @@
 	if (pos)
 		strbuf_add(&tmp, sb->buf, pos);
 	strbuf_add_wrapped_text(&tmp, sb->buf + pos,
-				(int) indent1, (int) indent2, (int) width);
+				cast_size_t_to_int(indent1),
+				cast_size_t_to_int(indent2),
+				cast_size_t_to_int(width));
 	strbuf_swap(&tmp, sb);
 	strbuf_release(&tmp);
 }
@@ -1030,9 +1039,18 @@
 		const char *end = start + strcspn(start, ",)");
 		char *next;
 		int width;
-		if (!end || end == start)
+		if (!*end || end == start)
 			return 0;
 		width = strtoul(start, &next, 10);
+
+		/*
+		 * We need to limit the amount of padding, or otherwise this
+		 * would allow the user to pad the buffer by arbitrarily many
+		 * bytes and thus cause resource exhaustion.
+		 */
+		if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT)
+			return 0;
+
 		if (next == start || width == 0)
 			return 0;
 		c->padding = to_column ? -width : width;
@@ -1119,6 +1137,16 @@
 				if (*next != ')')
 					return 0;
 			}
+
+			/*
+			 * We need to limit the format here as it allows the
+			 * user to prepend arbitrarily many bytes to the buffer
+			 * when rewrapping.
+			 */
+			if (width > FORMATTING_LIMIT ||
+				indent1 > FORMATTING_LIMIT ||
+			    indent2 > FORMATTING_LIMIT)
+				return 0;
 			rewrap_message_tail(sb, c, width, indent1, indent2);
 			return end - placeholder + 1;
 		} else
@@ -1296,18 +1324,20 @@
 				    struct format_commit_context *c)
 {
 	struct strbuf local_sb = STRBUF_INIT;
-	int total_consumed = 0, len, padding = c->padding;
+	size_t total_consumed = 0;
+	int len, padding = c->padding;
+
 	if (padding < 0) {
 		const char *start = strrchr(sb->buf, '\n');
 		int occupied;
 		if (!start)
 			start = sb->buf;
-		occupied = utf8_strnwidth(start, -1, 1);
+		occupied = utf8_strnwidth(start, strlen(start), 1);
 		padding = (-padding) - occupied;
 	}
 	while (1) {
 		int modifier = *placeholder == 'C';
-		int consumed = format_commit_one(&local_sb, placeholder, c);
+		size_t consumed = format_commit_one(&local_sb, placeholder, c);
 		total_consumed += consumed;
 
 		if (!modifier)
@@ -1319,7 +1349,7 @@
 		placeholder++;
 		total_consumed++;
 	}
-	len = utf8_strnwidth(local_sb.buf, -1, 1);
+	len = utf8_strnwidth(local_sb.buf, local_sb.len, 1);
 
 	if (c->flush_type == flush_left_and_steal) {
 		const char *ch = sb->buf + sb->len - 1;
@@ -1334,7 +1364,7 @@
 			if (*ch != 'm')
 				break;
 			p = ch - 1;
-			while (ch - p < 10 && *p != '\033')
+			while (p > sb->buf && ch - p < 10 && *p != '\033')
 				p--;
 			if (*p != '\033' ||
 			    ch + 1 - p != display_mode_esc_sequence_len(p))
@@ -1373,7 +1403,7 @@
 		}
 		strbuf_addstr(sb, local_sb.buf);
 	} else {
-		int sb_len = sb->len, offset = 0;
+		size_t sb_len = sb->len, offset = 0;
 		if (c->flush_type == flush_left)
 			offset = padding - len;
 		else if (c->flush_type == flush_both)
@@ -1398,8 +1428,7 @@
 				 const char *placeholder,
 				 void *context)
 {
-	int consumed;
-	size_t orig_len;
+	size_t consumed, orig_len;
 	enum {
 		NO_MAGIC,
 		ADD_LF_BEFORE_NON_EMPTY,
@@ -1420,9 +1449,21 @@
 	default:
 		break;
 	}
-	if (magic != NO_MAGIC)
+	if (magic != NO_MAGIC) {
 		placeholder++;
 
+		switch (placeholder[0]) {
+		case 'w':
+			/*
+			 * `%+w()` cannot ever expand to a non-empty string,
+			 * and it potentially changes the layout of preceding
+			 * contents. We're thus not able to handle the magic in
+			 * this combination and refuse the pattern.
+			 */
+			return 0;
+		};
+	}
+
 	orig_len = sb->len;
 	if (((struct format_commit_context *)context)->flush_type != no_flush)
 		consumed = format_and_pad_commit(sb, placeholder, context);
diff -ur a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh
--- a/t/t4205-log-pretty-formats.sh	2013-06-10 22:01:55.000000000 +0200
+++ b/t/t4205-log-pretty-formats.sh	2023-02-21 11:19:31.345309067 +0100
@@ -274,4 +274,80 @@
 	test_cmp expected actual
 '
 
+test_expect_success 'log --pretty with space stealing' '
+	printf mm0 >expect &&
+	git log -1 --pretty="format:mm%>>|(1)%x30" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'log --pretty with invalid padding format' '
+	printf "%s%%<(20" "$(git rev-parse HEAD)" >expect &&
+	git log -1 --pretty="format:%H%<(20" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'log --pretty with magical wrapping directives' '
+	commit_id=$(git commit-tree HEAD^{tree} -m "describe me") &&
+	git tag describe-me $commit_id &&
+	printf "\n(tag:\ndescribe-me)%%+w(2)" >expect &&
+	git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' '
+	printf "%%w(2147483649,1,1)0" >expect &&
+	git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual &&
+	test_cmp expect actual &&
+	printf "%%w(1,2147483649,1)0" >expect &&
+	git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual &&
+	test_cmp expect actual &&
+	printf "%%w(1,1,2147483649)0" >expect &&
+	git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' '
+	printf "%%<(2147483649)0" >expect &&
+	git log -1 --pretty="format:%<(2147483649)%x30" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'log --pretty with padding and preceding control chars' '
+	printf "\20\20   0" >expect &&
+	git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'log --pretty truncation with control chars' '
+	test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars &&
+	printf "\20\20\20\20x.." >expect &&
+	git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' '
+	# We only assert that this command does not crash. This needs to be
+	# executed with the address sanitizer to demonstrate failure.
+	git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null
+'
+
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' '
+	test-tool genzeros 2147483649 | tr "\000" "1" >expect &&
+	huge_commit=$(git commit-tree -F expect HEAD^{tree})
+'
+
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' '
+	git log -1 --format="%B%<(1)%x30" $huge_commit >actual &&
+	echo 0 >>expect &&
+	test_cmp expect actual
+'
+
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' '
+	test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error &&
+	cat >expect <<-EOF &&
+	fatal: number too large to represent as int on this platform: 2147483649
+	EOF
+	test_cmp expect error
+'
+
 test_done
diff -ur a/t/test-lib.sh b/t/test-lib.sh
--- a/t/test-lib.sh	2013-06-10 22:01:55.000000000 +0200
+++ b/t/test-lib.sh	2023-02-21 11:52:24.739202530 +0100
@@ -770,6 +770,10 @@
 	git var GIT_AUTHOR_IDENT
 '
 
+test_lazy_prereq SIZE_T_IS_64BIT '
+	test 8 -eq "$(build_option sizeof-size_t)"
+'
+
 # When the tests are run as root, permission tests will report that
 # things are writable when they shouldn't be.
 test -w / || test_set_prereq SANITY
diff -ur a/utf8.c b/utf8.c
--- a/utf8.c	2013-06-10 22:01:55.000000000 +0200
+++ b/utf8.c	2023-02-21 12:00:28.555925285 +0100
@@ -266,26 +266,32 @@
  * string, assuming that the string is utf8.  Returns strlen() instead
  * if the string does not look like a valid utf8 string.
  */
-int utf8_strnwidth(const char *string, int len, int skip_ansi)
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi)
 {
-	int width = 0;
 	const char *orig = string;
+	size_t width = 0;
 
-	if (len == -1)
-		len = strlen(string);
 	while (string && string < orig + len) {
-		int skip;
+		int glyph_width;
+		size_t skip;
 		while (skip_ansi &&
 		       (skip = display_mode_esc_sequence_len(string)) != 0)
 			string += skip;
-		width += utf8_width(&string, NULL);
+		glyph_width = utf8_width(&string, NULL);
+		if (glyph_width > 0)
+			width += glyph_width;
 	}
-	return string ? width : len;
+
+	/*
+	 * TODO: fix the interface of this function and `utf8_strwidth()` to
+	 * return `size_t` instead of `int`.
+	 */
+	return cast_size_t_to_int(string ? width : len);
 }
 
 int utf8_strwidth(const char *string)
 {
-	return utf8_strnwidth(string, -1, 0);
+	return utf8_strnwidth(string, strlen(string), 0);
 }
 
 int is_utf8(const char *text)
@@ -424,47 +430,52 @@
 void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
 			 const char *subst)
 {
-	struct strbuf sb_dst = STRBUF_INIT;
-	char *src = sb_src->buf;
-	char *end = src + sb_src->len;
-	char *dst;
-	int w = 0, subst_len = 0;
-
-	if (subst)
-		subst_len = strlen(subst);
-	strbuf_grow(&sb_dst, sb_src->len + subst_len);
-	dst = sb_dst.buf;
+	const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len;
+	struct strbuf dst;
+	int w = 0;
+
+	strbuf_init(&dst, sb_src->len);
 
 	while (src < end) {
-		char *old;
+		const char *old;
+		int glyph_width;
 		size_t n;
 
 		while ((n = display_mode_esc_sequence_len(src))) {
-			memcpy(dst, src, n);
+			strbuf_add(&dst, src, n);
 			src += n;
-			dst += n;
 		}
+		
+		if (src >= end)
+			break;
 
 		old = src;
-		n = utf8_width((const char**)&src, NULL);
-		if (!src) 	/* broken utf-8, do nothing */
-			return;
-		if (n && w >= pos && w < pos + width) {
+		glyph_width = utf8_width((const char**)&src, NULL);
+		if (!src) /* broken utf-8, do nothing */
+			goto out;
+
+		/*
+		 * In case we see a control character we copy it into the
+		 * buffer, but don't add it to the width.
+		 */
+		if (glyph_width < 0)
+			glyph_width = 0;
+
+		if (glyph_width && w >= pos && w < pos + width) {
 			if (subst) {
-				memcpy(dst, subst, subst_len);
-				dst += subst_len;
+				strbuf_addstr(&dst, subst);
 				subst = NULL;
 			}
-			w += n;
-			continue;
+		} else {
+			strbuf_add(&dst, old, src - old);
 		}
-		memcpy(dst, old, src - old);
-		dst += src - old;
-		w += n;
-	}
-	strbuf_setlen(&sb_dst, dst - sb_dst.buf);
-	strbuf_swap(sb_src, &sb_dst);
-	strbuf_release(&sb_dst);
+
+		w += glyph_width;
+	}
+
+	strbuf_swap(sb_src, &dst);
+out:
+	strbuf_release(&dst);
 }
 
 int is_encoding_utf8(const char *name)
diff -ur a/utf8.h b/utf8.h
--- a/utf8.h	2013-06-10 22:01:55.000000000 +0200
+++ b/utf8.h	2023-02-21 12:00:40.186991497 +0100
@@ -5,7 +5,7 @@
 
 size_t display_mode_esc_sequence_len(const char *s);
 int utf8_width(const char **start, size_t *remainder_p);
-int utf8_strnwidth(const char *string, int len, int skip_ansi);
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi);
 int utf8_strwidth(const char *string);
 int is_utf8(const char *text);
 int is_encoding_utf8(const char *name);