Blob Blame History Raw
diff -ur b/attr.c c/attr.c
--- b/attr.c	2013-06-10 22:01:55.000000000 +0200
+++ c/attr.c	2023-02-21 12:25:20.735892607 +0100
@@ -12,6 +12,7 @@
 #include "exec_cmd.h"
 #include "attr.h"
 #include "dir.h"
+#include "utf8.h"
 
 const char git_attr__true[] = "(builtin)true";
 const char git_attr__false[] = "\0(builtin)false";
@@ -55,26 +56,36 @@
 	return val;
 }
 
-static int invalid_attr_name(const char *name, int namelen)
+static int attr_name_valid(const char *name, size_t namelen)
 {
 	/*
 	 * Attribute name cannot begin with '-' and must consist of
 	 * characters from [-A-Za-z0-9_.].
 	 */
 	if (namelen <= 0 || *name == '-')
-		return -1;
+		return 0;
 	while (namelen--) {
 		char ch = *name++;
 		if (! (ch == '-' || ch == '.' || ch == '_' ||
 		       ('0' <= ch && ch <= '9') ||
 		       ('a' <= ch && ch <= 'z') ||
 		       ('A' <= ch && ch <= 'Z')) )
-			return -1;
+			return 0;
 	}
-	return 0;
+	return 1;
 }
 
-static struct git_attr *git_attr_internal(const char *name, int len)
+static void report_invalid_attr(const char *name, size_t len,
+				const char *src, int lineno)
+{
+	struct strbuf err = STRBUF_INIT;
+	strbuf_addf(&err, _("%.*s is not a valid attribute name"),
+		    (int) len, name);
+	fprintf(stderr, "%s: %s:%d\n", err.buf, src, lineno);
+	strbuf_release(&err);
+}
+
+static struct git_attr *git_attr_internal(const char *name, size_t len)
 {
 	unsigned hval = hash_name(name, len);
 	unsigned pos = hval % HASHSIZE;
@@ -86,7 +97,7 @@
 			return a;
 	}
 
-	if (invalid_attr_name(name, len))
+	if (!attr_name_valid(name, len))
 		return NULL;
 
 	a = xmalloc(sizeof(*a) + len + 1);
@@ -142,7 +153,7 @@
 		struct git_attr *attr;
 	} u;
 	char is_macro;
-	unsigned num_attr;
+	size_t num_attr;
 	struct attr_state state[FLEX_ARRAY];
 };
 
@@ -159,7 +170,7 @@
 			      struct attr_state *e)
 {
 	const char *ep, *equals;
-	int len;
+	size_t len;
 
 	ep = cp + strcspn(cp, blank);
 	equals = strchr(cp, '=');
@@ -174,10 +185,8 @@
 			cp++;
 			len--;
 		}
-		if (invalid_attr_name(cp, len)) {
-			fprintf(stderr,
-				"%.*s is not a valid attribute name: %s:%d\n",
-				len, cp, src, lineno);
+		if (!attr_name_valid(cp, len)) {
+			report_invalid_attr(cp, len, src, lineno);
 			return NULL;
 		}
 	} else {
@@ -199,8 +208,7 @@
 static struct match_attr *parse_attr_line(const char *line, const char *src,
 					  int lineno, int macro_ok)
 {
-	int namelen;
-	int num_attr, i;
+	size_t namelen, num_attr, i;
 	const char *cp, *name, *states;
 	struct match_attr *res = NULL;
 	int is_macro;
@@ -209,6 +217,12 @@
 	if (!*cp || *cp == '#')
 		return NULL;
 	name = cp;
+
+	if (strlen(line) >= ATTR_MAX_LINE_LENGTH) {
+		warning(_("ignoring overly long attributes line %d"), lineno);
+		return NULL;
+	}
+
 	namelen = strcspn(name, blank);
 	if (strlen(ATTRIBUTE_MACRO_PREFIX) < namelen &&
 	    !prefixcmp(name, ATTRIBUTE_MACRO_PREFIX)) {
@@ -221,10 +235,8 @@
 		name += strlen(ATTRIBUTE_MACRO_PREFIX);
 		name += strspn(name, blank);
 		namelen = strcspn(name, blank);
-		if (invalid_attr_name(name, namelen)) {
-			fprintf(stderr,
-				"%.*s is not a valid attribute name: %s:%d\n",
-				namelen, name, src, lineno);
+		if (!attr_name_valid(name, namelen)) {
+			report_invalid_attr(cp, namelen, src, lineno);
 			return NULL;
 		}
 	}
@@ -241,10 +253,9 @@
 			return NULL;
 	}
 
-	res = xcalloc(1,
-		      sizeof(*res) +
-		      sizeof(struct attr_state) * num_attr +
-		      (is_macro ? 0 : namelen + 1));
+	res = xcalloc(1, st_add3(sizeof(*res),
+				 st_mult(sizeof(struct attr_state), num_attr),
+				 is_macro ? 0 : namelen + 1));
 	if (is_macro)
 		res->u.attr = git_attr_internal(name, namelen);
 	else {
@@ -301,11 +312,11 @@
 
 static void free_attr_elem(struct attr_stack *e)
 {
-	int i;
+	unsigned i;
 	free(e->origin);
 	for (i = 0; i < e->num_matches; i++) {
 		struct match_attr *a = e->attrs[i];
-		int j;
+		size_t j;
 		for (j = 0; j < a->num_attr; j++) {
 			const char *setto = a->state[j].setto;
 			if (setto == ATTR__TRUE ||
@@ -364,20 +375,39 @@
 
 static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
 {
+	struct strbuf buf = STRBUF_INIT;
 	FILE *fp = fopen(path, "r");
 	struct attr_stack *res;
-	char buf[2048];
 	int lineno = 0;
+	int fd;
+	struct stat st;
 
 	if (!fp) {
 		if (errno != ENOENT && errno != ENOTDIR)
 			warn_on_inaccessible(path);
 		return NULL;
 	}
+	
+	fd = fileno(fp);
+	if (fstat(fd, &st)) {
+		warning_errno(_("cannot fstat gitattributes file '%s'"), path);
+		fclose(fp);
+		return NULL;
+	}
+	if (st.st_size >= ATTR_MAX_FILE_SIZE) {
+		warning(_("ignoring overly large gitattributes file '%s'"), path);
+		fclose(fp);
+		return NULL;
+	}
+
 	res = xcalloc(1, sizeof(*res));
-	while (fgets(buf, sizeof(buf), fp))
-		handle_attr_line(res, buf, path, ++lineno, macro_ok);
+	while (strbuf_getline(&buf, fp, '\n') != EOF) {
+		if (!lineno && starts_with(buf.buf, utf8_bom))
+			strbuf_remove(&buf, 0, strlen(utf8_bom));
+		handle_attr_line(res, buf.buf, path, ++lineno, macro_ok);
+	}
 	fclose(fp);
+	strbuf_release(&buf);
 	return res;
 }
 
@@ -386,11 +416,18 @@
 	struct attr_stack *res;
 	char *buf, *sp;
 	int lineno = 0;
+	unsigned long size;
 
-	buf = read_blob_data_from_index(use_index ? use_index : &the_index, path, NULL);
+	buf = read_blob_data_from_index(use_index ? use_index : &the_index, path, &size);
 	if (!buf)
 		return NULL;
 
+	if (size >= ATTR_MAX_FILE_SIZE) {
+		warning(_("ignoring overly large gitattributes blob '%s'"), path);
+		return NULL;
+	}
+
+
 	res = xcalloc(1, sizeof(*res));
 	for (sp = buf; *sp; ) {
 		char *ep;
@@ -648,15 +685,15 @@
 
 static int macroexpand_one(int attr_nr, int rem);
 
-static int fill_one(const char *what, struct match_attr *a, int rem)
+static int fill_one(const char *what, const struct match_attr *a, int rem)
 {
 	struct git_attr_check *check = check_all_attr;
-	int i;
+	size_t i;
 
-	for (i = a->num_attr - 1; 0 < rem && 0 <= i; i--) {
-		struct git_attr *attr = a->state[i].attr;
+	for (i = a->num_attr; rem > 0 && i > 0; i--) {
+		const struct git_attr *attr = a->state[i - 1].attr;
 		const char **n = &(check[attr->attr_nr].value);
-		const char *v = a->state[i].setto;
+		const char *v = a->state[i - 1].setto;
 
 		if (*n == ATTR__UNKNOWN) {
 			debug_set(what,
@@ -673,11 +710,11 @@
 static int fill(const char *path, int pathlen, int basename_offset,
 		struct attr_stack *stk, int rem)
 {
-	int i;
+	unsigned i;
 	const char *base = stk->origin ? stk->origin : "";
 
-	for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
-		struct match_attr *a = stk->attrs[i];
+	for (i = stk->num_matches; 0 < rem && 0 < i; i--) {
+			const struct match_attr *a = stk->attrs[i - 1];
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen, basename_offset,
@@ -691,14 +728,14 @@
 {
 	struct attr_stack *stk;
 	struct match_attr *a = NULL;
-	int i;
+	unsigned i;
 
 	if (check_all_attr[attr_nr].value != ATTR__TRUE)
 		return rem;
 
 	for (stk = attr_stack; !a && stk; stk = stk->prev)
-		for (i = stk->num_matches - 1; !a && 0 <= i; i--) {
-			struct match_attr *ma = stk->attrs[i];
+		for (i = stk->num_matches; !a && i > 0; i--) {
+			struct match_attr *ma = stk->attrs[i - 1];
 			if (!ma->is_macro)
 				continue;
 			if (ma->u.attr->attr_nr == attr_nr)
diff -ur b/attr.h c/attr.h
--- b/attr.h	2013-06-10 22:01:55.000000000 +0200
+++ c/attr.h	2023-02-21 12:25:42.455029765 +0100
@@ -1,6 +1,18 @@
 #ifndef ATTR_H
 #define ATTR_H
 
+/**
+ * The maximum line length for a gitattributes file. If the line exceeds this
+ * length we will ignore it.
+ */
+#define ATTR_MAX_LINE_LENGTH 2048
+
+ /**
+  * The maximum size of the giattributes file. If the file exceeds this size we
+  * will ignore it.
+  */
+#define ATTR_MAX_FILE_SIZE (100 * 1024 * 1024)
+
 /* An attribute is a pointer to this opaque structure */
 struct git_attr;
 
diff -ur b/git-compat-util.h c/git-compat-util.h
--- b/git-compat-util.h	2023-02-21 11:27:58.038145942 +0100
+++ c/git-compat-util.h	2023-02-21 12:27:18.836638388 +0100
@@ -324,7 +324,9 @@
 extern NORETURN void die(const char *err, ...) __attribute__((format (printf, 1, 2)));
 extern NORETURN void die_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
 extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+extern int error_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
 extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+extern void warning_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
 
 /*
  * Let callers be aware of the constant return value; this can help
@@ -524,8 +526,8 @@
 		    (uintmax_t)a, (uintmax_t)b);
 	return a + b;
 }
-#define st_add3(a,b,c)   st_add((a),st_add((b),(c)))
-#define st_add4(a,b,c,d) st_add((a),st_add3((b),(c),(d)))
+#define st_add3(a,b,c)   st_add(st_add((a),(b)),(c))
+#define st_add4(a,b,c,d) st_add(st_add3((a),(b),(c)),(d))
 
 static inline size_t st_mult(size_t a, size_t b)
 {
diff -ur b/t/t0003-attributes.sh c/t/t0003-attributes.sh
--- b/t/t0003-attributes.sh	2013-06-10 22:01:55.000000000 +0200
+++ c/t/t0003-attributes.sh	2023-02-21 12:30:22.614804084 +0100
@@ -245,39 +245,106 @@
 '
 
 test_expect_success 'setup bare' '
-	git clone --bare . bare.git &&
-	cd bare.git
+	git clone --bare . bare.git
 '
 
 test_expect_success 'bare repository: check that .gitattribute is ignored' '
 	(
-		echo "f	test=f"
-		echo "a/i test=a/i"
-	) >.gitattributes &&
-	attr_check f unspecified &&
-	attr_check a/f unspecified &&
-	attr_check a/c/f unspecified &&
-	attr_check a/i unspecified &&
-	attr_check subdir/a/i unspecified
+		cd bare.git &&
+		(
+			echo "f	test=f"
+			echo "a/i test=a/i"
+		) >.gitattributes &&
+		attr_check f unspecified &&
+		attr_check a/f unspecified &&
+		attr_check a/c/f unspecified &&
+		attr_check a/i unspecified &&
+		attr_check subdir/a/i unspecified
+	)
 '
 
 test_expect_success 'bare repository: check that --cached honors index' '
-	GIT_INDEX_FILE=../.git/index \
-	git check-attr --cached --stdin --all <../stdin-all |
-	sort >actual &&
-	test_cmp ../specified-all actual
+	(
+		cd bare.git &&
+		GIT_INDEX_FILE=../.git/index \
+		git check-attr --cached --stdin --all <../stdin-all |
+		sort >actual &&
+		test_cmp ../specified-all actual
+	)
 '
 
 test_expect_success 'bare repository: test info/attributes' '
 	(
-		echo "f	test=f"
-		echo "a/i test=a/i"
-	) >info/attributes &&
-	attr_check f f &&
-	attr_check a/f f &&
-	attr_check a/c/f f &&
-	attr_check a/i a/i &&
-	attr_check subdir/a/i unspecified
+		cd bare.git &&
+		(
+			echo "f	test=f"
+			echo "a/i test=a/i"
+		) >info/attributes &&
+		attr_check f f &&
+		attr_check a/f f &&
+		attr_check a/c/f f &&
+		attr_check a/i a/i &&
+		attr_check subdir/a/i unspecified
+	)
+'
+
+test_expect_success 'large attributes line ignored in tree' '
+	test_when_finished "rm .gitattributes" &&
+	printf "path %02043d" 1 >.gitattributes &&
+	git check-attr --all path >actual 2>err &&
+	echo "warning: ignoring overly long attributes line 1" >expect &&
+	test_cmp expect err &&
+	test_must_be_empty actual
+'
+
+test_expect_success 'large attributes line ignores trailing content in tree' '
+	test_when_finished "rm .gitattributes" &&
+	# older versions of Git broke lines at 2048 bytes; the 2045 bytes
+	# of 0-padding here is accounting for the three bytes of "a 1", which
+	# would knock "trailing" to the "next" line, where it would be
+	# erroneously parsed.
+	printf "a %02045dtrailing attribute\n" 1 >.gitattributes &&
+	git check-attr --all trailing >actual 2>err &&
+	echo "warning: ignoring overly long attributes line 1" >expect &&
+	test_cmp expect err &&
+	test_must_be_empty actual
+'
+
+test_expect_success EXPENSIVE 'large attributes file ignored in tree' '
+	test_when_finished "rm .gitattributes" &&
+	dd if=/dev/zero of=.gitattributes bs=101M count=1 2>/dev/null &&
+	git check-attr --all path >/dev/null 2>err &&
+	echo "warning: ignoring overly large gitattributes file ${SQ}.gitattributes${SQ}" >expect &&
+	test_cmp expect err
+'
+
+test_expect_success 'large attributes line ignored in index' '
+	test_when_finished "git update-index --remove .gitattributes" &&
+	blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) &&
+	git update-index --add --cacheinfo 100644 $blob .gitattributes &&
+	git check-attr --cached --all path >actual 2>err &&
+	echo "warning: ignoring overly long attributes line 1" >expect &&
+	test_cmp expect err &&
+	test_must_be_empty actual
+'
+
+test_expect_success 'large attributes line ignores trailing content in index' '
+	test_when_finished "git update-index --remove .gitattributes" &&
+	blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) &&
+	git update-index --add --cacheinfo 100644 $blob .gitattributes &&
+	git check-attr --cached --all trailing >actual 2>err &&
+	echo "warning: ignoring overly long attributes line 1" >expect &&
+	test_cmp expect err &&
+	test_must_be_empty actual
+'
+
+test_expect_success EXPENSIVE 'large attributes file ignored in index' '
+	test_when_finished "git update-index --remove .gitattributes" &&
+	blob=$(dd if=/dev/zero bs=101M count=1 2>/dev/null | git hash-object -w --stdin) &&
+	git update-index --add --cacheinfo 100644 $blob .gitattributes &&
+	git check-attr --cached --all path >/dev/null 2>err &&
+	echo "warning: ignoring overly large gitattributes blob ${SQ}.gitattributes${SQ}" >expect &&
+	test_cmp expect err
 '
 
 test_done
diff -ur b/t/test-lib-functions.sh c/t/test-lib-functions.sh
--- b/t/test-lib-functions.sh	2013-06-10 22:01:55.000000000 +0200
+++ c/t/test-lib-functions.sh	2023-02-21 12:31:24.357204323 +0100
@@ -609,6 +609,20 @@
 	$GIT_TEST_CMP "$@"
 }
 
+# Check if the file expected to be empty is indeed empty, and barfs
+# otherwise.
+
+test_must_be_empty () {
+	test "$#" -ne 1 && BUG "1 param"
+	test_path_is_file "$1" &&
+	if test -s "$1"
+	then
+		echo "'$1' is not empty, it contains:"
+		cat "$1"
+		return 1
+	fi
+}
+
 # Tests that its two parameters refer to the same revision
 test_cmp_rev () {
 	git rev-parse --verify "$1" >expect.rev &&
diff -ur b/t/test-lib.sh c/t/test-lib.sh
--- b/t/test-lib.sh	2023-02-21 11:52:24.739202530 +0100
+++ c/t/test-lib.sh	2023-02-21 12:31:52.866389106 +0100
@@ -153,6 +153,9 @@
 LF='
 '
 
+# Single quote
+SQ=\'
+
 export _x05 _x40 _z40 LF
 
 # Each test should start with something like this, after copyright notices:
diff -ur b/usage.c c/usage.c
--- b/usage.c	2013-06-10 22:01:55.000000000 +0200
+++ c/usage.c	2023-02-21 12:32:56.807803579 +0100
@@ -104,6 +104,30 @@
 	va_end(params);
 }
 
+static const char *fmt_with_err(char *buf, int n, const char *fmt)
+{
+	char str_error[256], *err;
+	int i, j;
+
+	err = strerror(errno);
+	for (i = j = 0; err[i] && j < sizeof(str_error) - 1; ) {
+		if ((str_error[j++] = err[i++]) != '%')
+			continue;
+		if (j < sizeof(str_error) - 1) {
+			str_error[j++] = '%';
+		} else {
+			/* No room to double the '%', so we overwrite it with
+			 * '\0' below */
+			j--;
+			break;
+		}
+	}
+	str_error[j] = 0;
+	/* Truncation is acceptable here */
+	snprintf(buf, n, "%s: %s", fmt, str_error);
+	return buf;
+}
+
 void NORETURN die_errno(const char *fmt, ...)
 {
 	va_list params;
@@ -149,6 +173,16 @@
 	return -1;
 }
 
+void warning_errno(const char *warn, ...)
+{
+	char buf[1024];
+	va_list params;
+
+	va_start(params, warn);
+	warn_routine(fmt_with_err(buf, sizeof(buf), warn), params);
+	va_end(params);
+}
+
 void warning(const char *warn, ...)
 {
 	va_list params;
diff -ur b/utf8.c c/utf8.c
--- b/utf8.c	2023-02-21 12:00:28.555925285 +0100
+++ c/utf8.c	2023-02-21 12:33:48.863141018 +0100
@@ -639,3 +639,14 @@
 
 	return chrlen;
 }
+
+const char utf8_bom[] = "\357\273\277";
+
+int skip_utf8_bom(char **text, size_t len)
+{
+	if (len < strlen(utf8_bom) ||
+	    memcmp(*text, utf8_bom, strlen(utf8_bom)))
+		return 0;
+	*text += strlen(utf8_bom);
+	return 1;
+}
diff -ur b/utf8.h c/utf8.h
--- b/utf8.h	2023-02-21 12:00:40.186991497 +0100
+++ c/utf8.h	2023-02-21 12:34:19.536339834 +0100
@@ -12,6 +12,9 @@
 int same_encoding(const char *, const char *);
 int utf8_fprintf(FILE *, const char *, ...);
 
+extern const char utf8_bom[];
+int skip_utf8_bom(char **, size_t);
+
 void strbuf_add_wrapped_text(struct strbuf *buf,
 		const char *text, int indent, int indent2, int width);
 void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,