Blame SOURCES/git-2.27.0-cve-2022-23521-cve-2022-41903.patch

a6e337
diff -ur a/attr.c b/attr.c
a6e337
--- a/attr.c	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/attr.c	2023-01-30 12:39:49.556799373 +0100
a6e337
@@ -28,7 +28,7 @@
a6e337
 #endif
a6e337
 
a6e337
 struct git_attr {
a6e337
-	int attr_nr; /* unique attribute number */
a6e337
+	unsigned int attr_nr; /* unique attribute number */
a6e337
 	char name[FLEX_ARRAY]; /* attribute name */
a6e337
 };
a6e337
 
a6e337
@@ -220,7 +220,7 @@
a6e337
  * dictionary.  If no entry is found, create a new attribute and store it in
a6e337
  * the dictionary.
a6e337
  */
a6e337
-static const struct git_attr *git_attr_internal(const char *name, int namelen)
a6e337
+static const struct git_attr *git_attr_internal(const char *name, size_t namelen)
a6e337
 {
a6e337
 	struct git_attr *a;
a6e337
 
a6e337
@@ -236,8 +236,8 @@
a6e337
 		a->attr_nr = hashmap_get_size(&g_attr_hashmap.map);
a6e337
 
a6e337
 		attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
a6e337
-		assert(a->attr_nr ==
a6e337
-		       (hashmap_get_size(&g_attr_hashmap.map) - 1));
a6e337
+		if (a->attr_nr != hashmap_get_size(&g_attr_hashmap.map) - 1)
a6e337
+			die(_("unable to add additional attribute"));
a6e337
 	}
a6e337
 
a6e337
 	hashmap_unlock(&g_attr_hashmap);
a6e337
@@ -282,7 +282,7 @@
a6e337
 		const struct git_attr *attr;
a6e337
 	} u;
a6e337
 	char is_macro;
a6e337
-	unsigned num_attr;
a6e337
+	size_t num_attr;
a6e337
 	struct attr_state state[FLEX_ARRAY];
a6e337
 };
a6e337
 
a6e337
@@ -299,7 +299,7 @@
a6e337
 			      struct attr_state *e)
a6e337
 {
a6e337
 	const char *ep, *equals;
a6e337
-	int len;
a6e337
+	size_t len;
a6e337
 
a6e337
 	ep = cp + strcspn(cp, blank);
a6e337
 	equals = strchr(cp, '=');
a6e337
@@ -343,8 +343,7 @@
a6e337
 static struct match_attr *parse_attr_line(const char *line, const char *src,
a6e337
 					  int lineno, int macro_ok)
a6e337
 {
a6e337
-	int namelen;
a6e337
-	int num_attr, i;
a6e337
+	size_t namelen, num_attr, i;
a6e337
 	const char *cp, *name, *states;
a6e337
 	struct match_attr *res = NULL;
a6e337
 	int is_macro;
a6e337
@@ -355,6 +354,11 @@
a6e337
 		return NULL;
a6e337
 	name = cp;
a6e337
 
a6e337
+	if (strlen(line) >= ATTR_MAX_LINE_LENGTH) {
a6e337
+		warning(_("ignoring overly long attributes line %d"), lineno);
a6e337
+		return NULL;
a6e337
+	}
a6e337
+
a6e337
 	if (*cp == '"' && !unquote_c_style(&pattern, name, &states)) {
a6e337
 		name = pattern.buf;
a6e337
 		namelen = pattern.len;
a6e337
@@ -391,10 +395,9 @@
a6e337
 			goto fail_return;
a6e337
 	}
a6e337
 
a6e337
-	res = xcalloc(1,
a6e337
-		      sizeof(*res) +
a6e337
-		      sizeof(struct attr_state) * num_attr +
a6e337
-		      (is_macro ? 0 : namelen + 1));
a6e337
+	res = xcalloc(1, st_add3(sizeof(*res),
a6e337
+				 st_mult(sizeof(struct attr_state), num_attr),
a6e337
+				 is_macro ? 0 : namelen + 1));
a6e337
 	if (is_macro) {
a6e337
 		res->u.attr = git_attr_internal(name, namelen);
a6e337
 	} else {
a6e337
@@ -457,11 +460,12 @@
a6e337
 
a6e337
 static void attr_stack_free(struct attr_stack *e)
a6e337
 {
a6e337
-	int i;
a6e337
+	unsigned i;
a6e337
 	free(e->origin);
a6e337
 	for (i = 0; i < e->num_matches; i++) {
a6e337
 		struct match_attr *a = e->attrs[i];
a6e337
-		int j;
a6e337
+		size_t j;
a6e337
+
a6e337
 		for (j = 0; j < a->num_attr; j++) {
a6e337
 			const char *setto = a->state[j].setto;
a6e337
 			if (setto == ATTR__TRUE ||
a6e337
@@ -670,8 +674,8 @@
a6e337
 	a = parse_attr_line(line, src, lineno, macro_ok);
a6e337
 	if (!a)
a6e337
 		return;
a6e337
-	ALLOC_GROW(res->attrs, res->num_matches + 1, res->alloc);
a6e337
-	res->attrs[res->num_matches++] = a;
a6e337
+	ALLOC_GROW_BY(res->attrs, res->num_matches, 1, res->alloc);
a6e337
+	res->attrs[res->num_matches - 1] = a;
a6e337
 }
a6e337
 
a6e337
 static struct attr_stack *read_attr_from_array(const char **list)
a6e337
@@ -710,21 +714,35 @@
a6e337
 
a6e337
 static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
a6e337
 {
a6e337
+	struct strbuf buf = STRBUF_INIT;
a6e337
 	FILE *fp = fopen_or_warn(path, "r");
a6e337
 	struct attr_stack *res;
a6e337
-	char buf[2048];
a6e337
 	int lineno = 0;
a6e337
+	int fd;
a6e337
+	struct stat st;
a6e337
 
a6e337
 	if (!fp)
a6e337
 		return NULL;
a6e337
-	res = xcalloc(1, sizeof(*res));
a6e337
-	while (fgets(buf, sizeof(buf), fp)) {
a6e337
-		char *bufp = buf;
a6e337
-		if (!lineno)
a6e337
-			skip_utf8_bom(&bufp, strlen(bufp));
a6e337
-		handle_attr_line(res, bufp, path, ++lineno, macro_ok);
a6e337
+	fd = fileno(fp);
a6e337
+	if (fstat(fd, &st)) {
a6e337
+		warning_errno(_("cannot fstat gitattributes file '%s'"), path);
a6e337
+		fclose(fp);
a6e337
+		return NULL;
a6e337
+	}
a6e337
+	if (st.st_size >= ATTR_MAX_FILE_SIZE) {
a6e337
+		warning(_("ignoring overly large gitattributes file '%s'"), path);
a6e337
+		fclose(fp);
a6e337
+		return NULL;
a6e337
+	}
a6e337
+
a6e337
+	CALLOC_ARRAY(res, 1);
a6e337
+	while (strbuf_getline(&buf, fp) != EOF) {
a6e337
+		if (!lineno && starts_with(buf.buf, utf8_bom))
a6e337
+			strbuf_remove(&buf, 0, strlen(utf8_bom));
a6e337
+		handle_attr_line(res, buf.buf, path, ++lineno, macro_ok);
a6e337
 	}
a6e337
 	fclose(fp);
a6e337
+	strbuf_release(&buf;;
a6e337
 	return res;
a6e337
 }
a6e337
 
a6e337
@@ -735,13 +753,18 @@
a6e337
 	struct attr_stack *res;
a6e337
 	char *buf, *sp;
a6e337
 	int lineno = 0;
a6e337
+	size_t size;
a6e337
 
a6e337
 	if (!istate)
a6e337
 		return NULL;
a6e337
 
a6e337
-	buf = read_blob_data_from_index(istate, path, NULL);
a6e337
+	buf = read_blob_data_from_index(istate, path, &size);
a6e337
 	if (!buf)
a6e337
 		return NULL;
a6e337
+	if (size >= ATTR_MAX_FILE_SIZE) {
a6e337
+		warning(_("ignoring overly large gitattributes blob '%s'"), path);
a6e337
+		return NULL;
a6e337
+	}
a6e337
 
a6e337
 	res = xcalloc(1, sizeof(*res));
a6e337
 	for (sp = buf; *sp; ) {
a6e337
@@ -1011,12 +1034,12 @@
a6e337
 static int fill_one(const char *what, struct all_attrs_item *all_attrs,
a6e337
 		    const struct match_attr *a, int rem)
a6e337
 {
a6e337
-	int i;
a6e337
+	size_t i;
a6e337
 
a6e337
-	for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) {
a6e337
-		const struct git_attr *attr = a->state[i].attr;
a6e337
+	for (i = a->num_attr; rem > 0 && i > 0; i--) {
a6e337
+		const struct git_attr *attr = a->state[i - 1].attr;
a6e337
 		const char **n = &(all_attrs[attr->attr_nr].value);
a6e337
-		const char *v = a->state[i].setto;
a6e337
+		const char *v = a->state[i - 1].setto;
a6e337
 
a6e337
 		if (*n == ATTR__UNKNOWN) {
a6e337
 			debug_set(what,
a6e337
@@ -1035,11 +1058,11 @@
a6e337
 		struct all_attrs_item *all_attrs, int rem)
a6e337
 {
a6e337
 	for (; rem > 0 && stack; stack = stack->prev) {
a6e337
-		int i;
a6e337
+		unsigned i;
a6e337
 		const char *base = stack->origin ? stack->origin : "";
a6e337
 
a6e337
-		for (i = stack->num_matches - 1; 0 < rem && 0 <= i; i--) {
a6e337
-			const struct match_attr *a = stack->attrs[i];
a6e337
+		for (i = stack->num_matches; 0 < rem && 0 < i; i--) {
a6e337
+			const struct match_attr *a = stack->attrs[i - 1];
a6e337
 			if (a->is_macro)
a6e337
 				continue;
a6e337
 			if (path_matches(path, pathlen, basename_offset,
a6e337
@@ -1070,9 +1093,9 @@
a6e337
 			     const struct attr_stack *stack)
a6e337
 {
a6e337
 	for (; stack; stack = stack->prev) {
a6e337
-		int i;
a6e337
-		for (i = stack->num_matches - 1; i >= 0; i--) {
a6e337
-			const struct match_attr *ma = stack->attrs[i];
a6e337
+		unsigned i;
a6e337
+		for (i = stack->num_matches; i > 0; i--) {
a6e337
+			const struct match_attr *ma = stack->attrs[i - 1];
a6e337
 			if (ma->is_macro) {
a6e337
 				int n = ma->u.attr->attr_nr;
a6e337
 				if (!all_attrs[n].macro) {
a6e337
@@ -1126,7 +1149,7 @@
a6e337
 	collect_some_attrs(istate, path, check);
a6e337
 
a6e337
 	for (i = 0; i < check->nr; i++) {
a6e337
-		size_t n = check->items[i].attr->attr_nr;
a6e337
+		unsigned int n = check->items[i].attr->attr_nr;
a6e337
 		const char *value = check->all_attrs[n].value;
a6e337
 		if (value == ATTR__UNKNOWN)
a6e337
 			value = ATTR__UNSET;
a6e337
diff -ur a/attr.h b/attr.h
a6e337
--- a/attr.h	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/attr.h	2023-01-27 13:43:27.957577861 +0100
a6e337
@@ -107,6 +107,18 @@
a6e337
  * - Free the `attr_check` struct by calling `attr_check_free()`.
a6e337
  */
a6e337
 
a6e337
+/**
a6e337
+ * The maximum line length for a gitattributes file. If the line exceeds this
a6e337
+ * length we will ignore it.
a6e337
+ */
a6e337
+#define ATTR_MAX_LINE_LENGTH 2048
a6e337
+
a6e337
+ /**
a6e337
+  * The maximum size of the giattributes file. If the file exceeds this size we
a6e337
+  * will ignore it.
a6e337
+  */
a6e337
+#define ATTR_MAX_FILE_SIZE (100 * 1024 * 1024)
a6e337
+
a6e337
 struct index_state;
a6e337
 
a6e337
 /**
a6e337
diff -ur a/column.c b/column.c
a6e337
--- a/column.c	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/column.c	2023-01-27 13:41:32.875854649 +0100
a6e337
@@ -23,7 +23,7 @@
a6e337
 /* return length of 's' in letters, ANSI escapes stripped */
a6e337
 static int item_length(const char *s)
a6e337
 {
a6e337
-	return utf8_strnwidth(s, -1, 1);
a6e337
+	return utf8_strnwidth(s, strlen(s), 1);
a6e337
 }
a6e337
 
a6e337
 /*
a6e337
diff -ur a/git-compat-util.h b/git-compat-util.h
a6e337
--- a/git-compat-util.h	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/git-compat-util.h	2023-01-27 13:21:31.543397249 +0100
a6e337
@@ -837,6 +837,14 @@
a6e337
 	return a - b;
a6e337
 }
a6e337
 
a6e337
+static inline int cast_size_t_to_int(size_t a)
a6e337
+{
a6e337
+	if (a > INT_MAX)
a6e337
+		die("number too large to represent as int on this platform: %"PRIuMAX,
a6e337
+		    (uintmax_t)a);
a6e337
+	return (int)a;
a6e337
+}
a6e337
+
a6e337
 #ifdef HAVE_ALLOCA_H
a6e337
 # include <alloca.h>
a6e337
 # define xalloca(size)      (alloca(size))
a6e337
diff -ur a/pretty.c b/pretty.c
a6e337
--- a/pretty.c	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/pretty.c	2023-01-30 13:06:39.520835531 +0100
a6e337
@@ -13,6 +13,13 @@
a6e337
 #include "gpg-interface.h"
a6e337
 #include "trailer.h"
a6e337
 
a6e337
+/*
a6e337
+ * The limit for formatting directives, which enable the caller to append
a6e337
+ * arbitrarily many bytes to the formatted buffer. This includes padding
a6e337
+ * and wrapping formatters.
a6e337
+ */
a6e337
+#define FORMATTING_LIMIT (16 * 1024)
a6e337
+
a6e337
 static char *user_format;
a6e337
 static struct cmt_fmt_map {
a6e337
 	const char *name;
a6e337
@@ -914,7 +921,9 @@
a6e337
 	if (pos)
a6e337
 		strbuf_add(&tmp, sb->buf, pos);
a6e337
 	strbuf_add_wrapped_text(&tmp, sb->buf + pos,
a6e337
-				(int) indent1, (int) indent2, (int) width);
a6e337
+				cast_size_t_to_int(indent1),
a6e337
+				cast_size_t_to_int(indent2),
a6e337
+				cast_size_t_to_int(width));
a6e337
 	strbuf_swap(&tmp, sb);
a6e337
 	strbuf_release(&tmp);
a6e337
 }
a6e337
@@ -1040,9 +1049,18 @@
a6e337
 		const char *end = start + strcspn(start, ",)");
a6e337
 		char *next;
a6e337
 		int width;
a6e337
-		if (!end || end == start)
a6e337
+		if (!*end || end == start)
a6e337
 			return 0;
a6e337
 		width = strtol(start, &next, 10);
a6e337
+
a6e337
+		/*
a6e337
+		 * We need to limit the amount of padding, or otherwise this
a6e337
+		 * would allow the user to pad the buffer by arbitrarily many
a6e337
+		 * bytes and thus cause resource exhaustion.
a6e337
+		 */
a6e337
+		if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT)
a6e337
+			return 0;
a6e337
+
a6e337
 		if (next == start || width == 0)
a6e337
 			return 0;
a6e337
 		if (width < 0) {
a6e337
@@ -1202,6 +1220,16 @@
a6e337
 				if (*next != ')')
a6e337
 					return 0;
a6e337
 			}
a6e337
+
a6e337
+			/*
a6e337
+			 * We need to limit the format here as it allows the
a6e337
+			 * user to prepend arbitrarily many bytes to the buffer
a6e337
+			 * when rewrapping.
a6e337
+			 */
a6e337
+			if (width > FORMATTING_LIMIT ||
a6e337
+			    indent1 > FORMATTING_LIMIT ||
a6e337
+			    indent2 > FORMATTING_LIMIT)
a6e337
+				return 0;
a6e337
 			rewrap_message_tail(sb, c, width, indent1, indent2);
a6e337
 			return end - placeholder + 1;
a6e337
 		} else
a6e337
@@ -1471,19 +1499,21 @@
a6e337
 				    struct format_commit_context *c)
a6e337
 {
a6e337
 	struct strbuf local_sb = STRBUF_INIT;
a6e337
-	int total_consumed = 0, len, padding = c->padding;
a6e337
+	size_t total_consumed = 0;
a6e337
+	int len, padding = c->padding;
a6e337
+
a6e337
 	if (padding < 0) {
a6e337
 		const char *start = strrchr(sb->buf, '\n');
a6e337
 		int occupied;
a6e337
 		if (!start)
a6e337
 			start = sb->buf;
a6e337
-		occupied = utf8_strnwidth(start, -1, 1);
a6e337
+		occupied = utf8_strnwidth(start, strlen(start), 1);
a6e337
 		occupied += c->pretty_ctx->graph_width;
a6e337
 		padding = (-padding) - occupied;
a6e337
 	}
a6e337
 	while (1) {
a6e337
 		int modifier = *placeholder == 'C';
a6e337
-		int consumed = format_commit_one(&local_sb, placeholder, c);
a6e337
+		size_t consumed = format_commit_one(&local_sb, placeholder, c);
a6e337
 		total_consumed += consumed;
a6e337
 
a6e337
 		if (!modifier)
a6e337
@@ -1495,7 +1525,7 @@
a6e337
 		placeholder++;
a6e337
 		total_consumed++;
a6e337
 	}
a6e337
-	len = utf8_strnwidth(local_sb.buf, -1, 1);
a6e337
+	len = utf8_strnwidth(local_sb.buf, local_sb.len, 1);
a6e337
 
a6e337
 	if (c->flush_type == flush_left_and_steal) {
a6e337
 		const char *ch = sb->buf + sb->len - 1;
a6e337
@@ -1510,7 +1540,7 @@
a6e337
 			if (*ch != 'm')
a6e337
 				break;
a6e337
 			p = ch - 1;
a6e337
-			while (ch - p < 10 && *p != '\033')
a6e337
+			while (p > sb->buf && ch - p < 10 && *p != '\033')
a6e337
 				p--;
a6e337
 			if (*p != '\033' ||
a6e337
 			    ch + 1 - p != display_mode_esc_sequence_len(p))
a6e337
@@ -1549,7 +1579,7 @@
a6e337
 		}
a6e337
 		strbuf_addbuf(sb, &local_sb);
a6e337
 	} else {
a6e337
-		int sb_len = sb->len, offset = 0;
a6e337
+		size_t sb_len = sb->len, offset = 0;
a6e337
 		if (c->flush_type == flush_left)
a6e337
 			offset = padding - len;
a6e337
 		else if (c->flush_type == flush_both)
a6e337
@@ -1572,8 +1602,7 @@
a6e337
 				 const char *placeholder,
a6e337
 				 void *context)
a6e337
 {
a6e337
-	int consumed;
a6e337
-	size_t orig_len;
a6e337
+	size_t consumed, orig_len;
a6e337
 	enum {
a6e337
 		NO_MAGIC,
a6e337
 		ADD_LF_BEFORE_NON_EMPTY,
a6e337
@@ -1594,9 +1623,22 @@
a6e337
 	default:
a6e337
 		break;
a6e337
 	}
a6e337
-	if (magic != NO_MAGIC)
a6e337
+	if (magic != NO_MAGIC) {
a6e337
 		placeholder++;
a6e337
 
a6e337
+		switch (placeholder[0]) {
a6e337
+		case 'w':
a6e337
+			/*
a6e337
+			 * `%+w()` cannot ever expand to a non-empty string,
a6e337
+			 * and it potentially changes the layout of preceding
a6e337
+			 * contents. We're thus not able to handle the magic in
a6e337
+			 * this combination and refuse the pattern.
a6e337
+			 */
a6e337
+			return 0;
a6e337
+		};
a6e337
+
a6e337
+	}
a6e337
+
a6e337
 	orig_len = sb->len;
a6e337
 	if (((struct format_commit_context *)context)->flush_type != no_flush)
a6e337
 		consumed = format_and_pad_commit(sb, placeholder, context);
a6e337
diff -ur a/t/t0003-attributes.sh b/t/t0003-attributes.sh
a6e337
--- a/t/t0003-attributes.sh	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/t/t0003-attributes.sh	2023-01-27 13:42:42.966294716 +0100
a6e337
@@ -339,4 +339,63 @@
a6e337
 	test_cmp expect actual
a6e337
 '
a6e337
 
a6e337
+test_expect_success 'large attributes line ignored in tree' '
a6e337
+	test_when_finished "rm .gitattributes" &&
a6e337
+	printf "path %02043d" 1 >.gitattributes &&
a6e337
+	git check-attr --all path >actual 2>err &&
a6e337
+	echo "warning: ignoring overly long attributes line 1" >expect &&
a6e337
+	test_cmp expect err &&
a6e337
+	test_must_be_empty actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'large attributes line ignores trailing content in tree' '
a6e337
+	test_when_finished "rm .gitattributes" &&
a6e337
+	# older versions of Git broke lines at 2048 bytes; the 2045 bytes
a6e337
+	# of 0-padding here is accounting for the three bytes of "a 1", which
a6e337
+	# would knock "trailing" to the "next" line, where it would be
a6e337
+	# erroneously parsed.
a6e337
+	printf "a %02045dtrailing attribute\n" 1 >.gitattributes &&
a6e337
+	git check-attr --all trailing >actual 2>err &&
a6e337
+	echo "warning: ignoring overly long attributes line 1" >expect &&
a6e337
+	test_cmp expect err &&
a6e337
+	test_must_be_empty actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success EXPENSIVE 'large attributes file ignored in tree' '
a6e337
+	test_when_finished "rm .gitattributes" &&
a6e337
+	dd if=/dev/zero of=.gitattributes bs=101M count=1 2>/dev/null &&
a6e337
+	git check-attr --all path >/dev/null 2>err &&
a6e337
+	echo "warning: ignoring overly large gitattributes file ${SQ}.gitattributes${SQ}" >expect &&
a6e337
+	test_cmp expect err
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'large attributes line ignored in index' '
a6e337
+	test_when_finished "git update-index --remove .gitattributes" &&
a6e337
+	blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) &&
a6e337
+	git update-index --add --cacheinfo 100644,$blob,.gitattributes &&
a6e337
+	git check-attr --cached --all path >actual 2>err &&
a6e337
+	echo "warning: ignoring overly long attributes line 1" >expect &&
a6e337
+	test_cmp expect err &&
a6e337
+	test_must_be_empty actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'large attributes line ignores trailing content in index' '
a6e337
+	test_when_finished "git update-index --remove .gitattributes" &&
a6e337
+	blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) &&
a6e337
+	git update-index --add --cacheinfo 100644,$blob,.gitattributes &&
a6e337
+	git check-attr --cached --all trailing >actual 2>err &&
a6e337
+	echo "warning: ignoring overly long attributes line 1" >expect &&
a6e337
+	test_cmp expect err &&
a6e337
+	test_must_be_empty actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success EXPENSIVE 'large attributes file ignored in index' '
a6e337
+	test_when_finished "git update-index --remove .gitattributes" &&
a6e337
+	blob=$(dd if=/dev/zero bs=101M count=1 2>/dev/null | git hash-object -w --stdin) &&
a6e337
+	git update-index --add --cacheinfo 100644,$blob,.gitattributes &&
a6e337
+	git check-attr --cached --all path >/dev/null 2>err &&
a6e337
+	echo "warning: ignoring overly large gitattributes blob ${SQ}.gitattributes${SQ}" >expect &&
a6e337
+	test_cmp expect err
a6e337
+'
a6e337
+
a6e337
 test_done
a6e337
diff -ur a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh
a6e337
--- a/t/t4205-log-pretty-formats.sh	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/t/t4205-log-pretty-formats.sh	2023-01-27 13:31:02.772847108 +0100
a6e337
@@ -867,4 +867,80 @@
a6e337
 	test_cmp expect actual
a6e337
 '
a6e337
 
a6e337
+test_expect_success 'log --pretty with space stealing' '
a6e337
+	printf mm0 >expect &&
a6e337
+	git log -1 --pretty="format:mm%>>|(1)%x30" >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'log --pretty with invalid padding format' '
a6e337
+	printf "%s%%<(20" "$(git rev-parse HEAD)" >expect &&
a6e337
+	git log -1 --pretty="format:%H%<(20" >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'log --pretty with magical wrapping directives' '
a6e337
+	commit_id=$(git commit-tree HEAD^{tree} -m "describe me") &&
a6e337
+	git tag describe-me $commit_id &&
a6e337
+	printf "\n(tag:\ndescribe-me)%%+w(2)" >expect &&
a6e337
+	git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' '
a6e337
+	printf "%%w(2147483649,1,1)0" >expect &&
a6e337
+	git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual &&
a6e337
+	test_cmp expect actual &&
a6e337
+	printf "%%w(1,2147483649,1)0" >expect &&
a6e337
+	git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual &&
a6e337
+	test_cmp expect actual &&
a6e337
+	printf "%%w(1,1,2147483649)0" >expect &&
a6e337
+	git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' '
a6e337
+	printf "%%<(2147483649)0" >expect &&
a6e337
+	git log -1 --pretty="format:%<(2147483649)%x30" >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'log --pretty with padding and preceding control chars' '
a6e337
+	printf "\20\20   0" >expect &&
a6e337
+	git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success 'log --pretty truncation with control chars' '
a6e337
+	test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars &&
a6e337
+	printf "\20\20\20\20x.." >expect &&
a6e337
+	git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' '
a6e337
+	# We only assert that this command does not crash. This needs to be
a6e337
+	# executed with the address sanitizer to demonstrate failure.
a6e337
+	git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null
a6e337
+'
a6e337
+
a6e337
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' '
a6e337
+	test-tool genzeros 2147483649 | tr "\000" "1" >expect &&
a6e337
+	huge_commit=$(git commit-tree -F expect HEAD^{tree})
a6e337
+'
a6e337
+
a6e337
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' '
a6e337
+	git log -1 --format="%B%<(1)%x30" $huge_commit >actual &&
a6e337
+	echo 0 >>expect &&
a6e337
+	test_cmp expect actual
a6e337
+'
a6e337
+
a6e337
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' '
a6e337
+	test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error &&
a6e337
+	cat >expect <<-EOF &&
a6e337
+	fatal: number too large to represent as int on this platform: 2147483649
a6e337
+	EOF
a6e337
+	test_cmp expect error
a6e337
+'
a6e337
+
a6e337
 test_done
a6e337
diff -ur a/t/test-lib.sh b/t/test-lib.sh
a6e337
--- a/t/test-lib.sh	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/t/test-lib.sh	2023-01-27 13:31:39.388068883 +0100
a6e337
@@ -1676,6 +1676,10 @@
a6e337
 	sed -ne "s/^$1: //p"
a6e337
 }
a6e337
 
a6e337
+test_lazy_prereq SIZE_T_IS_64BIT '
a6e337
+	test 8 -eq "$(build_option sizeof-size_t)"
a6e337
+'
a6e337
+
a6e337
 test_lazy_prereq LONG_IS_64BIT '
a6e337
 	test 8 -le "$(build_option sizeof-long)"
a6e337
 '
a6e337
diff -ur a/utf8.c b/utf8.c
a6e337
--- a/utf8.c	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/utf8.c	2023-01-27 13:39:03.666917950 +0100
a6e337
@@ -206,26 +206,34 @@
a6e337
  * string, assuming that the string is utf8.  Returns strlen() instead
a6e337
  * if the string does not look like a valid utf8 string.
a6e337
  */
a6e337
-int utf8_strnwidth(const char *string, int len, int skip_ansi)
a6e337
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi)
a6e337
 {
a6e337
-	int width = 0;
a6e337
 	const char *orig = string;
a6e337
+	size_t width = 0;
a6e337
 
a6e337
-	if (len == -1)
a6e337
-		len = strlen(string);
a6e337
 	while (string && string < orig + len) {
a6e337
-		int skip;
a6e337
+		int glyph_width;
a6e337
+		size_t skip;
a6e337
+
a6e337
 		while (skip_ansi &&
a6e337
 		       (skip = display_mode_esc_sequence_len(string)) != 0)
a6e337
 			string += skip;
a6e337
-		width += utf8_width(&string, NULL);
a6e337
+		
a6e337
+		glyph_width = utf8_width(&string, NULL);
a6e337
+		if (glyph_width > 0)
a6e337
+			width += glyph_width;
a6e337
 	}
a6e337
-	return string ? width : len;
a6e337
+
a6e337
+	/*
a6e337
+	 * TODO: fix the interface of this function and `utf8_strwidth()` to
a6e337
+	 * return `size_t` instead of `int`.
a6e337
+	 */
a6e337
+	return cast_size_t_to_int(string ? width : len);
a6e337
 }
a6e337
 
a6e337
 int utf8_strwidth(const char *string)
a6e337
 {
a6e337
-	return utf8_strnwidth(string, -1, 0);
a6e337
+	return utf8_strnwidth(string, strlen(string), 0);
a6e337
 }
a6e337
 
a6e337
 int is_utf8(const char *text)
a6e337
@@ -357,51 +365,51 @@
a6e337
 void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
a6e337
 			 const char *subst)
a6e337
 {
a6e337
-	struct strbuf sb_dst = STRBUF_INIT;
a6e337
-	char *src = sb_src->buf;
a6e337
-	char *end = src + sb_src->len;
a6e337
-	char *dst;
a6e337
-	int w = 0, subst_len = 0;
a6e337
-
a6e337
-	if (subst)
a6e337
-		subst_len = strlen(subst);
a6e337
-	strbuf_grow(&sb_dst, sb_src->len + subst_len);
a6e337
-	dst = sb_dst.buf;
a6e337
+	const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len;
a6e337
+	struct strbuf dst;
a6e337
+	int w = 0;
a6e337
+
a6e337
+	strbuf_init(&dst, sb_src->len);
a6e337
 
a6e337
 	while (src < end) {
a6e337
-		char *old;
a6e337
+		const char *old;
a6e337
+		int glyph_width;
a6e337
 		size_t n;
a6e337
 
a6e337
 		while ((n = display_mode_esc_sequence_len(src))) {
a6e337
-			memcpy(dst, src, n);
a6e337
+			strbuf_add(&dst, src, n);
a6e337
 			src += n;
a6e337
-			dst += n;
a6e337
 		}
a6e337
 
a6e337
 		if (src >= end)
a6e337
 			break;
a6e337
 
a6e337
 		old = src;
a6e337
-		n = utf8_width((const char**)&src, NULL);
a6e337
-		if (!src) 	/* broken utf-8, do nothing */
a6e337
+		glyph_width = utf8_width((const char**)&src, NULL);
a6e337
+		if (!src) /* broken utf-8, do nothing */
a6e337
 			goto out;
a6e337
-		if (n && w >= pos && w < pos + width) {
a6e337
+
a6e337
+		/*
a6e337
+		 * In case we see a control character we copy it into the
a6e337
+		 * buffer, but don't add it to the width.
a6e337
+		 */
a6e337
+		if (glyph_width < 0)
a6e337
+			glyph_width = 0;
a6e337
+
a6e337
+		if (glyph_width && w >= pos && w < pos + width) {
a6e337
 			if (subst) {
a6e337
-				memcpy(dst, subst, subst_len);
a6e337
-				dst += subst_len;
a6e337
+				strbuf_addstr(&dst, subst);
a6e337
 				subst = NULL;
a6e337
 			}
a6e337
-			w += n;
a6e337
-			continue;
a6e337
+		} else {
a6e337
+			strbuf_add(&dst, old, src - old);
a6e337
 		}
a6e337
-		memcpy(dst, old, src - old);
a6e337
-		dst += src - old;
a6e337
-		w += n;
a6e337
+
a6e337
+		w += glyph_width;
a6e337
 	}
a6e337
-	strbuf_setlen(&sb_dst, dst - sb_dst.buf);
a6e337
-	strbuf_swap(sb_src, &sb_dst);
a6e337
+	strbuf_swap(sb_src, &dst);
a6e337
 out:
a6e337
-	strbuf_release(&sb_dst);
a6e337
+	strbuf_release(&dst);
a6e337
 }
a6e337
 
a6e337
 /*
a6e337
@@ -791,7 +799,7 @@
a6e337
 void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width,
a6e337
 		       const char *s)
a6e337
 {
a6e337
-	int slen = strlen(s);
a6e337
+	size_t slen = strlen(s);
a6e337
 	int display_len = utf8_strnwidth(s, slen, 0);
a6e337
 	int utf8_compensation = slen - display_len;
a6e337
 
a6e337
diff -ur a/utf8.h b/utf8.h
a6e337
--- a/utf8.h	2020-06-01 17:49:27.000000000 +0200
a6e337
+++ b/utf8.h	2023-01-27 13:32:14.857283715 +0100
a6e337
@@ -7,7 +7,7 @@
a6e337
 
a6e337
 size_t display_mode_esc_sequence_len(const char *s);
a6e337
 int utf8_width(const char **start, size_t *remainder_p);
a6e337
-int utf8_strnwidth(const char *string, int len, int skip_ansi);
a6e337
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi);
a6e337
 int utf8_strwidth(const char *string);
a6e337
 int is_utf8(const char *text);
a6e337
 int is_encoding_utf8(const char *name);