diff -ur b/attr.c c/attr.c --- b/attr.c 2013-06-10 22:01:55.000000000 +0200 +++ c/attr.c 2023-02-21 12:25:20.735892607 +0100 @@ -12,6 +12,7 @@ #include "exec_cmd.h" #include "attr.h" #include "dir.h" +#include "utf8.h" const char git_attr__true[] = "(builtin)true"; const char git_attr__false[] = "\0(builtin)false"; @@ -55,26 +56,36 @@ return val; } -static int invalid_attr_name(const char *name, int namelen) +static int attr_name_valid(const char *name, size_t namelen) { /* * Attribute name cannot begin with '-' and must consist of * characters from [-A-Za-z0-9_.]. */ if (namelen <= 0 || *name == '-') - return -1; + return 0; while (namelen--) { char ch = *name++; if (! (ch == '-' || ch == '.' || ch == '_' || ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) ) - return -1; + return 0; } - return 0; + return 1; } -static struct git_attr *git_attr_internal(const char *name, int len) +static void report_invalid_attr(const char *name, size_t len, + const char *src, int lineno) +{ + struct strbuf err = STRBUF_INIT; + strbuf_addf(&err, _("%.*s is not a valid attribute name"), + (int) len, name); + fprintf(stderr, "%s: %s:%d\n", err.buf, src, lineno); + strbuf_release(&err); +} + +static struct git_attr *git_attr_internal(const char *name, size_t len) { unsigned hval = hash_name(name, len); unsigned pos = hval % HASHSIZE; @@ -86,7 +97,7 @@ return a; } - if (invalid_attr_name(name, len)) + if (!attr_name_valid(name, len)) return NULL; a = xmalloc(sizeof(*a) + len + 1); @@ -142,7 +153,7 @@ struct git_attr *attr; } u; char is_macro; - unsigned num_attr; + size_t num_attr; struct attr_state state[FLEX_ARRAY]; }; @@ -159,7 +170,7 @@ struct attr_state *e) { const char *ep, *equals; - int len; + size_t len; ep = cp + strcspn(cp, blank); equals = strchr(cp, '='); @@ -174,10 +185,8 @@ cp++; len--; } - if (invalid_attr_name(cp, len)) { - fprintf(stderr, - "%.*s is not a valid attribute name: %s:%d\n", - len, cp, src, lineno); + if (!attr_name_valid(cp, len)) { + report_invalid_attr(cp, len, src, lineno); return NULL; } } else { @@ -199,8 +208,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, int lineno, int macro_ok) { - int namelen; - int num_attr, i; + size_t namelen, num_attr, i; const char *cp, *name, *states; struct match_attr *res = NULL; int is_macro; @@ -209,6 +217,12 @@ if (!*cp || *cp == '#') return NULL; name = cp; + + if (strlen(line) >= ATTR_MAX_LINE_LENGTH) { + warning(_("ignoring overly long attributes line %d"), lineno); + return NULL; + } + namelen = strcspn(name, blank); if (strlen(ATTRIBUTE_MACRO_PREFIX) < namelen && !prefixcmp(name, ATTRIBUTE_MACRO_PREFIX)) { @@ -221,10 +235,8 @@ name += strlen(ATTRIBUTE_MACRO_PREFIX); name += strspn(name, blank); namelen = strcspn(name, blank); - if (invalid_attr_name(name, namelen)) { - fprintf(stderr, - "%.*s is not a valid attribute name: %s:%d\n", - namelen, name, src, lineno); + if (!attr_name_valid(name, namelen)) { + report_invalid_attr(cp, namelen, src, lineno); return NULL; } } @@ -241,10 +253,9 @@ return NULL; } - res = xcalloc(1, - sizeof(*res) + - sizeof(struct attr_state) * num_attr + - (is_macro ? 0 : namelen + 1)); + res = xcalloc(1, st_add3(sizeof(*res), + st_mult(sizeof(struct attr_state), num_attr), + is_macro ? 0 : namelen + 1)); if (is_macro) res->u.attr = git_attr_internal(name, namelen); else { @@ -301,11 +312,11 @@ static void free_attr_elem(struct attr_stack *e) { - int i; + unsigned i; free(e->origin); for (i = 0; i < e->num_matches; i++) { struct match_attr *a = e->attrs[i]; - int j; + size_t j; for (j = 0; j < a->num_attr; j++) { const char *setto = a->state[j].setto; if (setto == ATTR__TRUE || @@ -364,20 +375,39 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) { + struct strbuf buf = STRBUF_INIT; FILE *fp = fopen(path, "r"); struct attr_stack *res; - char buf[2048]; int lineno = 0; + int fd; + struct stat st; if (!fp) { if (errno != ENOENT && errno != ENOTDIR) warn_on_inaccessible(path); return NULL; } + + fd = fileno(fp); + if (fstat(fd, &st)) { + warning_errno(_("cannot fstat gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + if (st.st_size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + res = xcalloc(1, sizeof(*res)); - while (fgets(buf, sizeof(buf), fp)) - handle_attr_line(res, buf, path, ++lineno, macro_ok); + while (strbuf_getline(&buf, fp, '\n') != EOF) { + if (!lineno && starts_with(buf.buf, utf8_bom)) + strbuf_remove(&buf, 0, strlen(utf8_bom)); + handle_attr_line(res, buf.buf, path, ++lineno, macro_ok); + } fclose(fp); + strbuf_release(&buf); return res; } @@ -386,11 +416,18 @@ struct attr_stack *res; char *buf, *sp; int lineno = 0; + unsigned long size; - buf = read_blob_data_from_index(use_index ? use_index : &the_index, path, NULL); + buf = read_blob_data_from_index(use_index ? use_index : &the_index, path, &size); if (!buf) return NULL; + if (size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes blob '%s'"), path); + return NULL; + } + + res = xcalloc(1, sizeof(*res)); for (sp = buf; *sp; ) { char *ep; @@ -648,15 +685,15 @@ static int macroexpand_one(int attr_nr, int rem); -static int fill_one(const char *what, struct match_attr *a, int rem) +static int fill_one(const char *what, const struct match_attr *a, int rem) { struct git_attr_check *check = check_all_attr; - int i; + size_t i; - for (i = a->num_attr - 1; 0 < rem && 0 <= i; i--) { - struct git_attr *attr = a->state[i].attr; + for (i = a->num_attr; rem > 0 && i > 0; i--) { + const struct git_attr *attr = a->state[i - 1].attr; const char **n = &(check[attr->attr_nr].value); - const char *v = a->state[i].setto; + const char *v = a->state[i - 1].setto; if (*n == ATTR__UNKNOWN) { debug_set(what, @@ -673,11 +710,11 @@ static int fill(const char *path, int pathlen, int basename_offset, struct attr_stack *stk, int rem) { - int i; + unsigned i; const char *base = stk->origin ? stk->origin : ""; - for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) { - struct match_attr *a = stk->attrs[i]; + for (i = stk->num_matches; 0 < rem && 0 < i; i--) { + const struct match_attr *a = stk->attrs[i - 1]; if (a->is_macro) continue; if (path_matches(path, pathlen, basename_offset, @@ -691,14 +728,14 @@ { struct attr_stack *stk; struct match_attr *a = NULL; - int i; + unsigned i; if (check_all_attr[attr_nr].value != ATTR__TRUE) return rem; for (stk = attr_stack; !a && stk; stk = stk->prev) - for (i = stk->num_matches - 1; !a && 0 <= i; i--) { - struct match_attr *ma = stk->attrs[i]; + for (i = stk->num_matches; !a && i > 0; i--) { + struct match_attr *ma = stk->attrs[i - 1]; if (!ma->is_macro) continue; if (ma->u.attr->attr_nr == attr_nr) diff -ur b/attr.h c/attr.h --- b/attr.h 2013-06-10 22:01:55.000000000 +0200 +++ c/attr.h 2023-02-21 12:25:42.455029765 +0100 @@ -1,6 +1,18 @@ #ifndef ATTR_H #define ATTR_H +/** + * The maximum line length for a gitattributes file. If the line exceeds this + * length we will ignore it. + */ +#define ATTR_MAX_LINE_LENGTH 2048 + + /** + * The maximum size of the giattributes file. If the file exceeds this size we + * will ignore it. + */ +#define ATTR_MAX_FILE_SIZE (100 * 1024 * 1024) + /* An attribute is a pointer to this opaque structure */ struct git_attr; diff -ur b/git-compat-util.h c/git-compat-util.h --- b/git-compat-util.h 2023-02-21 11:27:58.038145942 +0100 +++ c/git-compat-util.h 2023-02-21 12:27:18.836638388 +0100 @@ -324,7 +324,9 @@ extern NORETURN void die(const char *err, ...) __attribute__((format (printf, 1, 2))); extern NORETURN void die_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +extern int error_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +extern void warning_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); /* * Let callers be aware of the constant return value; this can help @@ -524,8 +526,8 @@ (uintmax_t)a, (uintmax_t)b); return a + b; } -#define st_add3(a,b,c) st_add((a),st_add((b),(c))) -#define st_add4(a,b,c,d) st_add((a),st_add3((b),(c),(d))) +#define st_add3(a,b,c) st_add(st_add((a),(b)),(c)) +#define st_add4(a,b,c,d) st_add(st_add3((a),(b),(c)),(d)) static inline size_t st_mult(size_t a, size_t b) { diff -ur b/t/t0003-attributes.sh c/t/t0003-attributes.sh --- b/t/t0003-attributes.sh 2013-06-10 22:01:55.000000000 +0200 +++ c/t/t0003-attributes.sh 2023-02-21 12:30:22.614804084 +0100 @@ -245,39 +245,106 @@ ' test_expect_success 'setup bare' ' - git clone --bare . bare.git && - cd bare.git + git clone --bare . bare.git ' test_expect_success 'bare repository: check that .gitattribute is ignored' ' ( - echo "f test=f" - echo "a/i test=a/i" - ) >.gitattributes && - attr_check f unspecified && - attr_check a/f unspecified && - attr_check a/c/f unspecified && - attr_check a/i unspecified && - attr_check subdir/a/i unspecified + cd bare.git && + ( + echo "f test=f" + echo "a/i test=a/i" + ) >.gitattributes && + attr_check f unspecified && + attr_check a/f unspecified && + attr_check a/c/f unspecified && + attr_check a/i unspecified && + attr_check subdir/a/i unspecified + ) ' test_expect_success 'bare repository: check that --cached honors index' ' - GIT_INDEX_FILE=../.git/index \ - git check-attr --cached --stdin --all <../stdin-all | - sort >actual && - test_cmp ../specified-all actual + ( + cd bare.git && + GIT_INDEX_FILE=../.git/index \ + git check-attr --cached --stdin --all <../stdin-all | + sort >actual && + test_cmp ../specified-all actual + ) ' test_expect_success 'bare repository: test info/attributes' ' ( - echo "f test=f" - echo "a/i test=a/i" - ) >info/attributes && - attr_check f f && - attr_check a/f f && - attr_check a/c/f f && - attr_check a/i a/i && - attr_check subdir/a/i unspecified + cd bare.git && + ( + echo "f test=f" + echo "a/i test=a/i" + ) >info/attributes && + attr_check f f && + attr_check a/f f && + attr_check a/c/f f && + attr_check a/i a/i && + attr_check subdir/a/i unspecified + ) +' + +test_expect_success 'large attributes line ignored in tree' ' + test_when_finished "rm .gitattributes" && + printf "path %02043d" 1 >.gitattributes && + git check-attr --all path >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success 'large attributes line ignores trailing content in tree' ' + test_when_finished "rm .gitattributes" && + # older versions of Git broke lines at 2048 bytes; the 2045 bytes + # of 0-padding here is accounting for the three bytes of "a 1", which + # would knock "trailing" to the "next" line, where it would be + # erroneously parsed. + printf "a %02045dtrailing attribute\n" 1 >.gitattributes && + git check-attr --all trailing >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success EXPENSIVE 'large attributes file ignored in tree' ' + test_when_finished "rm .gitattributes" && + dd if=/dev/zero of=.gitattributes bs=101M count=1 2>/dev/null && + git check-attr --all path >/dev/null 2>err && + echo "warning: ignoring overly large gitattributes file ${SQ}.gitattributes${SQ}" >expect && + test_cmp expect err +' + +test_expect_success 'large attributes line ignored in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644 $blob .gitattributes && + git check-attr --cached --all path >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success 'large attributes line ignores trailing content in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644 $blob .gitattributes && + git check-attr --cached --all trailing >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success EXPENSIVE 'large attributes file ignored in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(dd if=/dev/zero bs=101M count=1 2>/dev/null | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644 $blob .gitattributes && + git check-attr --cached --all path >/dev/null 2>err && + echo "warning: ignoring overly large gitattributes blob ${SQ}.gitattributes${SQ}" >expect && + test_cmp expect err ' test_done diff -ur b/t/test-lib-functions.sh c/t/test-lib-functions.sh --- b/t/test-lib-functions.sh 2013-06-10 22:01:55.000000000 +0200 +++ c/t/test-lib-functions.sh 2023-02-21 12:31:24.357204323 +0100 @@ -609,6 +609,20 @@ $GIT_TEST_CMP "$@" } +# Check if the file expected to be empty is indeed empty, and barfs +# otherwise. + +test_must_be_empty () { + test "$#" -ne 1 && BUG "1 param" + test_path_is_file "$1" && + if test -s "$1" + then + echo "'$1' is not empty, it contains:" + cat "$1" + return 1 + fi +} + # Tests that its two parameters refer to the same revision test_cmp_rev () { git rev-parse --verify "$1" >expect.rev && diff -ur b/t/test-lib.sh c/t/test-lib.sh --- b/t/test-lib.sh 2023-02-21 11:52:24.739202530 +0100 +++ c/t/test-lib.sh 2023-02-21 12:31:52.866389106 +0100 @@ -153,6 +153,9 @@ LF=' ' +# Single quote +SQ=\' + export _x05 _x40 _z40 LF # Each test should start with something like this, after copyright notices: diff -ur b/usage.c c/usage.c --- b/usage.c 2013-06-10 22:01:55.000000000 +0200 +++ c/usage.c 2023-02-21 12:32:56.807803579 +0100 @@ -104,6 +104,30 @@ va_end(params); } +static const char *fmt_with_err(char *buf, int n, const char *fmt) +{ + char str_error[256], *err; + int i, j; + + err = strerror(errno); + for (i = j = 0; err[i] && j < sizeof(str_error) - 1; ) { + if ((str_error[j++] = err[i++]) != '%') + continue; + if (j < sizeof(str_error) - 1) { + str_error[j++] = '%'; + } else { + /* No room to double the '%', so we overwrite it with + * '\0' below */ + j--; + break; + } + } + str_error[j] = 0; + /* Truncation is acceptable here */ + snprintf(buf, n, "%s: %s", fmt, str_error); + return buf; +} + void NORETURN die_errno(const char *fmt, ...) { va_list params; @@ -149,6 +173,16 @@ return -1; } +void warning_errno(const char *warn, ...) +{ + char buf[1024]; + va_list params; + + va_start(params, warn); + warn_routine(fmt_with_err(buf, sizeof(buf), warn), params); + va_end(params); +} + void warning(const char *warn, ...) { va_list params; diff -ur b/utf8.c c/utf8.c --- b/utf8.c 2023-02-21 12:00:28.555925285 +0100 +++ c/utf8.c 2023-02-21 12:33:48.863141018 +0100 @@ -639,3 +639,14 @@ return chrlen; } + +const char utf8_bom[] = "\357\273\277"; + +int skip_utf8_bom(char **text, size_t len) +{ + if (len < strlen(utf8_bom) || + memcmp(*text, utf8_bom, strlen(utf8_bom))) + return 0; + *text += strlen(utf8_bom); + return 1; +} diff -ur b/utf8.h c/utf8.h --- b/utf8.h 2023-02-21 12:00:40.186991497 +0100 +++ c/utf8.h 2023-02-21 12:34:19.536339834 +0100 @@ -12,6 +12,9 @@ int same_encoding(const char *, const char *); int utf8_fprintf(FILE *, const char *, ...); +extern const char utf8_bom[]; +int skip_utf8_bom(char **, size_t); + void strbuf_add_wrapped_text(struct strbuf *buf, const char *text, int indent, int indent2, int width); void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,