From 1353ca67060c2c2631552eb04a70d40584c1b519 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jul 10 2018 08:09:40 +0000 Subject: import rh-git29-git-2.9.3-4.el7 --- diff --git a/SOURCES/git-cve-2018-11235-fsck.patch b/SOURCES/git-cve-2018-11235-fsck.patch new file mode 100644 index 0000000..99b523c --- /dev/null +++ b/SOURCES/git-cve-2018-11235-fsck.patch @@ -0,0 +1,1374 @@ +From ff9e25d40ff1f2a2942eac7b3dd108ca739f362a Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Fri, 13 Jan 2017 12:58:16 -0500 +Subject: [PATCH 01/11] sha1_file: add read_loose_object() function + +commit f6371f9210418f1beabc85b097e2a3470aeeb54d upstream. + +It's surprisingly hard to ask the sha1_file code to open a +_specific_ incarnation of a loose object. Most of the +functions take a sha1, and loop over the various object +types (packed versus loose) and locations (local versus +alternates) at a low level. + +However, some tools like fsck need to look at a specific +file. This patch gives them a function they can use to open +the loose object at a given path. + +The implementation unfortunately ends up repeating bits of +related functions, but there's not a good way around it +without some major refactoring of the whole sha1_file stack. +We need to mmap the specific file, then partially read the +zlib stream to know whether we're streaming or not, and then +finally either stream it or copy the data to a buffer. + +We can do that by assembling some of the more arcane +internal sha1_file functions, but we end up having to +essentially reimplement unpack_sha1_file(), along with the +streaming bits of check_sha1_signature(). + +Still, most of the ugliness is contained in the new +function, and the interface is clean enough that it may be +reusable (though it seems unlikely anything but git-fsck +would care about opening a specific file). + +Signed-off-by: Jeff King +Signed-off-by: Junio C Hamano +Signed-off-by: Jonathan Nieder +--- + cache.h | 13 ++++++ + sha1_file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 143 insertions(+), 2 deletions(-) + +diff --git a/cache.h b/cache.h +index 61d43c6..fa53623 100644 +--- a/cache.h ++++ b/cache.h +@@ -1113,6 +1113,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename); + + extern int has_sha1_pack(const unsigned char *sha1); + ++/* ++ * Open the loose object at path, check its sha1, and return the contents, ++ * type, and size. If the object is a blob, then "contents" may return NULL, ++ * to allow streaming of large blobs. ++ * ++ * Returns 0 on success, negative on error (details may be written to stderr). ++ */ ++int read_loose_object(const char *path, ++ const unsigned char *expected_sha1, ++ enum object_type *type, ++ unsigned long *size, ++ void **contents); ++ + /* + * Return true iff we have an object named sha1, whether local or in + * an alternate object database, and whether packed or loose. This +diff --git a/sha1_file.c b/sha1_file.c +index cb571ac..6d29840 100644 +--- a/sha1_file.c ++++ b/sha1_file.c +@@ -1547,12 +1547,21 @@ static int open_sha1_file(const unsigned char *sha1) + return -1; + } + +-void *map_sha1_file(const unsigned char *sha1, unsigned long *size) ++/* ++ * Map the loose object at "path" if it is not NULL, or the path found by ++ * searching for a loose object named "sha1". ++ */ ++static void *map_sha1_file_1(const char *path, ++ const unsigned char *sha1, ++ unsigned long *size) + { + void *map; + int fd; + +- fd = open_sha1_file(sha1); ++ if (path) ++ fd = git_open_noatime(path); ++ else ++ fd = open_sha1_file(sha1); + map = NULL; + if (fd >= 0) { + struct stat st; +@@ -1571,6 +1580,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) + return map; + } + ++void *map_sha1_file(const unsigned char *sha1, unsigned long *size) ++{ ++ return map_sha1_file_1(NULL, sha1, size); ++} ++ + unsigned long unpack_object_header_buffer(const unsigned char *buf, + unsigned long len, enum object_type *type, unsigned long *sizep) + { +@@ -3644,3 +3658,117 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) + } + return r ? r : pack_errors; + } ++ ++static int check_stream_sha1(git_zstream *stream, ++ const char *hdr, ++ unsigned long size, ++ const char *path, ++ const unsigned char *expected_sha1) ++{ ++ git_SHA_CTX c; ++ unsigned char real_sha1[GIT_SHA1_RAWSZ]; ++ unsigned char buf[4096]; ++ unsigned long total_read; ++ int status = Z_OK; ++ ++ git_SHA1_Init(&c); ++ git_SHA1_Update(&c, hdr, stream->total_out); ++ ++ /* ++ * We already read some bytes into hdr, but the ones up to the NUL ++ * do not count against the object's content size. ++ */ ++ total_read = stream->total_out - strlen(hdr) - 1; ++ ++ /* ++ * This size comparison must be "<=" to read the final zlib packets; ++ * see the comment in unpack_sha1_rest for details. ++ */ ++ while (total_read <= size && ++ (status == Z_OK || status == Z_BUF_ERROR)) { ++ stream->next_out = buf; ++ stream->avail_out = sizeof(buf); ++ if (size - total_read < stream->avail_out) ++ stream->avail_out = size - total_read; ++ status = git_inflate(stream, Z_FINISH); ++ git_SHA1_Update(&c, buf, stream->next_out - buf); ++ total_read += stream->next_out - buf; ++ } ++ git_inflate_end(stream); ++ ++ if (status != Z_STREAM_END) { ++ error("corrupt loose object '%s'", sha1_to_hex(expected_sha1)); ++ return -1; ++ } ++ ++ git_SHA1_Final(real_sha1, &c); ++ if (hashcmp(expected_sha1, real_sha1)) { ++ error("sha1 mismatch for %s (expected %s)", path, ++ sha1_to_hex(expected_sha1)); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++int read_loose_object(const char *path, ++ const unsigned char *expected_sha1, ++ enum object_type *type, ++ unsigned long *size, ++ void **contents) ++{ ++ int ret = -1; ++ int fd = -1; ++ void *map = NULL; ++ unsigned long mapsize; ++ git_zstream stream; ++ char hdr[32]; ++ ++ *contents = NULL; ++ ++ map = map_sha1_file_1(path, NULL, &mapsize); ++ if (!map) { ++ error_errno("unable to mmap %s", path); ++ goto out; ++ } ++ ++ if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { ++ error("unable to unpack header of %s", path); ++ goto out; ++ } ++ ++ *type = parse_sha1_header(hdr, size); ++ if (*type < 0) { ++ error("unable to parse header of %s", path); ++ git_inflate_end(&stream); ++ goto out; ++ } ++ ++ if (*type == OBJ_BLOB) { ++ if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) ++ goto out; ++ } else { ++ *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1); ++ if (!*contents) { ++ error("unable to unpack contents of %s", path); ++ git_inflate_end(&stream); ++ goto out; ++ } ++ if (check_sha1_signature(expected_sha1, *contents, ++ *size, typename(*type))) { ++ error("sha1 mismatch for %s (expected %s)", path, ++ sha1_to_hex(expected_sha1)); ++ free(*contents); ++ goto out; ++ } ++ } ++ ++ ret = 0; /* everything checks out */ ++ ++out: ++ if (map) ++ munmap(map, mapsize); ++ if (fd >= 0) ++ close(fd); ++ return ret; ++} +-- +2.14.4 + + +From 65074f3eda3338445c7674b7d5b176844970b5de Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Fri, 13 Jan 2017 12:59:44 -0500 +Subject: [PATCH 02/11] fsck: parse loose object paths directly + +commit c68b489e56431cf27f7719913ab09ddc62f95912 upstream. + +When we iterate over the list of loose objects to check, we +get the actual path of each object. But we then throw it +away and pass just the sha1 to fsck_sha1(), which will do a +fresh lookup. Usually it would find the same object, but it +may not if an object exists both as a loose and a packed +object. We may end up checking the packed object twice, and +never look at the loose one. + +In practice this isn't too terrible, because if fsck doesn't +complain, it means you have at least one good copy. But +since the point of fsck is to look for corruption, we should +be thorough. + +The new read_loose_object() interface can help us get the +data from disk, and then we replace parse_object() with +parse_object_buffer(). As a bonus, our error messages now +mention the path to a corrupted object, which should make it +easier to track down errors when they do happen. + +Signed-off-by: Jeff King +Signed-off-by: Junio C Hamano +Signed-off-by: Jonathan Nieder +--- + builtin/fsck.c | 46 +++++++++++++++++++++++++++++++++------------- + t/t1450-fsck.sh | 16 ++++++++++++++++ + 2 files changed, 49 insertions(+), 13 deletions(-) + +diff --git a/builtin/fsck.c b/builtin/fsck.c +index 9923b10..150597c 100644 +--- a/builtin/fsck.c ++++ b/builtin/fsck.c +@@ -341,18 +341,6 @@ static int fsck_obj(struct object *obj) + return 0; + } + +-static int fsck_sha1(const unsigned char *sha1) +-{ +- struct object *obj = parse_object(sha1); +- if (!obj) { +- errors_found |= ERROR_OBJECT; +- return error("%s: object corrupt or missing", +- sha1_to_hex(sha1)); +- } +- obj->flags |= HAS_OBJ; +- return fsck_obj(obj); +-} +- + static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, + unsigned long size, void *buffer, int *eaten) + { +@@ -459,9 +447,41 @@ static void get_default_heads(void) + } + } + ++static struct object *parse_loose_object(const unsigned char *sha1, ++ const char *path) ++{ ++ struct object *obj; ++ void *contents; ++ enum object_type type; ++ unsigned long size; ++ int eaten; ++ ++ if (read_loose_object(path, sha1, &type, &size, &contents) < 0) ++ return NULL; ++ ++ if (!contents && type != OBJ_BLOB) ++ die("BUG: read_loose_object streamed a non-blob"); ++ ++ obj = parse_object_buffer(sha1, type, size, contents, &eaten); ++ ++ if (!eaten) ++ free(contents); ++ return obj; ++} ++ + static int fsck_loose(const unsigned char *sha1, const char *path, void *data) + { +- if (fsck_sha1(sha1)) ++ struct object *obj = parse_loose_object(sha1, path); ++ ++ if (!obj) { ++ errors_found |= ERROR_OBJECT; ++ error("%s: object corrupt or missing: %s", ++ sha1_to_hex(sha1), path); ++ return 0; /* keep checking other objects */ ++ } ++ ++ obj->flags = HAS_OBJ; ++ if (fsck_obj(obj)) + errors_found |= ERROR_OBJECT; + return 0; + } +diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh +index 7ee8ea0..17c4c77 100755 +--- a/t/t1450-fsck.sh ++++ b/t/t1450-fsck.sh +@@ -523,4 +523,20 @@ test_expect_success 'fsck --connectivity-only' ' + ) + ' + ++test_expect_success 'fsck finds problems in duplicate loose objects' ' ++ rm -rf broken-duplicate && ++ git init broken-duplicate && ++ ( ++ cd broken-duplicate && ++ test_commit duplicate && ++ # no "-d" here, so we end up with duplicates ++ git repack && ++ # now corrupt the loose copy ++ file=$(sha1_file "$(git rev-parse HEAD)") && ++ rm "$file" && ++ echo broken >"$file" && ++ test_must_fail git fsck ++ ) ++' ++ + test_done +-- +2.14.4 + + +From 6a26b1fcf92a6c825eb72c1a55399f38af5b7b79 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Wed, 2 May 2018 16:37:09 -0400 +Subject: [PATCH 03/11] index-pack: make fsck error message more specific + +commit db5a58c1bda5b20169b9958af1e8b05ddd178b01 upstream. + +If fsck reports an error, we say only "Error in object". +This isn't quite as bad as it might seem, since the fsck +code would have dumped some errors to stderr already. But it +might help to give a little more context. The earlier output +would not have even mentioned "fsck", and that may be a clue +that the "fsck.*" or "*.fsckObjects" config may be relevant. + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + builtin/index-pack.c | 2 +- + builtin/unpack-objects.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/builtin/index-pack.c b/builtin/index-pack.c +index 1008d7f..81f3bc7 100644 +--- a/builtin/index-pack.c ++++ b/builtin/index-pack.c +@@ -841,7 +841,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, + die(_("invalid %s"), typename(type)); + if (do_fsck_object && + fsck_object(obj, buf, size, &fsck_options)) +- die(_("Error in object")); ++ die(_("fsck error in packed object")); + if (fsck_walk(obj, NULL, &fsck_options)) + die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); + +diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c +index 875e7ed..7018829 100644 +--- a/builtin/unpack-objects.c ++++ b/builtin/unpack-objects.c +@@ -205,7 +205,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op + if (!obj_buf) + die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); + if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) +- die("Error in object"); ++ die("fsck error in packed object"); + fsck_options.walk = check_object; + if (fsck_walk(obj, NULL, &fsck_options)) + die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); +-- +2.14.4 + + +From 8ac12a6cd60b74748494beadd7750de39f727ff3 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Sun, 13 May 2018 12:35:37 -0400 +Subject: [PATCH 04/11] fsck: simplify ".git" check + +commit ed9c3220621d634d543bc4dd998d12167dfc57d4 upstream. + +There's no need for us to manually check for ".git"; it's a +subset of the other filesystem-specific tests. Dropping it +makes our code slightly shorter. More importantly, the +existing code may make a reader wonder why ".GIT" is not +covered here, and whether that is a bug (it isn't, as it's +also covered in the filesystem-specific tests). + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + fsck.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/fsck.c b/fsck.c +index 0531545..c807b7f 100644 +--- a/fsck.c ++++ b/fsck.c +@@ -460,9 +460,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) + has_empty_name |= !*name; + has_dot |= !strcmp(name, "."); + has_dotdot |= !strcmp(name, ".."); +- has_dotgit |= (!strcmp(name, ".git") || +- is_hfs_dotgit(name) || +- is_ntfs_dotgit(name)); ++ has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); + has_zero_pad |= *(char *)desc.buffer == '0'; + update_tree_entry(&desc); + +-- +2.14.4 + + +From 27d0f7bf0b6ba57e849353a65ff73887088b6d7b Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Wed, 2 May 2018 15:44:51 -0400 +Subject: [PATCH 05/11] fsck: actually fsck blob data + +commit 7ac4f3a007e2567f9d2492806186aa063f9a08d6 upstream. + +Because fscking a blob has always been a noop, we didn't +bother passing around the blob data. In preparation for +content-level checks, let's fix up a few things: + + 1. The fsck_object() function just returns success for any + blob. Let's a noop fsck_blob(), which we can fill in + with actual logic later. + + 2. The fsck_loose() function in builtin/fsck.c + just threw away blob content after loading it. Let's + hold onto it until after we've called fsck_object(). + + The easiest way to do this is to just drop the + parse_loose_object() helper entirely. Incidentally, + this also fixes a memory leak: if we successfully + loaded the object data but did not parse it, we would + have left the function without freeing it. + + 3. When fsck_loose() loads the object data, it + does so with a custom read_loose_object() helper. This + function streams any blobs, regardless of size, under + the assumption that we're only checking the sha1. + + Instead, let's actually load blobs smaller than + big_file_threshold, as the normal object-reading + code-paths would do. This lets us fsck small files, and + a NULL return is an indication that the blob was so big + that it needed to be streamed, and we can pass that + information along to fsck_blob(). + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + builtin/fsck.c | 39 +++++++++++++++++++-------------------- + fsck.c | 8 +++++++- + sha1_file.c | 2 +- + 3 files changed, 27 insertions(+), 22 deletions(-) + +diff --git a/builtin/fsck.c b/builtin/fsck.c +index 150597c..61ab1cd 100644 +--- a/builtin/fsck.c ++++ b/builtin/fsck.c +@@ -299,7 +299,7 @@ static void check_connectivity(void) + } + } + +-static int fsck_obj(struct object *obj) ++static int fsck_obj(struct object *obj, void *buffer, unsigned long size) + { + if (obj->flags & SEEN) + return 0; +@@ -311,7 +311,7 @@ static int fsck_obj(struct object *obj) + + if (fsck_walk(obj, NULL, &fsck_obj_options)) + objerror(obj, "broken links"); +- if (fsck_object(obj, NULL, 0, &fsck_obj_options)) ++ if (fsck_object(obj, buffer, size, &fsck_obj_options)) + return -1; + + if (obj->type == OBJ_TREE) { +@@ -355,7 +355,7 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, + return error("%s: object corrupt or missing", sha1_to_hex(sha1)); + } + obj->flags = HAS_OBJ; +- return fsck_obj(obj); ++ return fsck_obj(obj, buffer, size); + } + + static int default_refs; +@@ -447,43 +447,42 @@ static void get_default_heads(void) + } + } + +-static struct object *parse_loose_object(const unsigned char *sha1, +- const char *path) ++static int fsck_loose(const unsigned char *sha1, const char *path, void *data) + { + struct object *obj; +- void *contents; + enum object_type type; + unsigned long size; ++ void *contents; + int eaten; + +- if (read_loose_object(path, sha1, &type, &size, &contents) < 0) +- return NULL; ++ if (read_loose_object(path, sha1, &type, &size, &contents) < 0) { ++ errors_found |= ERROR_OBJECT; ++ error("%s: object corrupt or missing: %s", ++ sha1_to_hex(sha1), path); ++ return 0; /* keep checking other objects */ ++ } + + if (!contents && type != OBJ_BLOB) + die("BUG: read_loose_object streamed a non-blob"); + + obj = parse_object_buffer(sha1, type, size, contents, &eaten); + +- if (!eaten) +- free(contents); +- return obj; +-} +- +-static int fsck_loose(const unsigned char *sha1, const char *path, void *data) +-{ +- struct object *obj = parse_loose_object(sha1, path); +- + if (!obj) { + errors_found |= ERROR_OBJECT; +- error("%s: object corrupt or missing: %s", ++ error("%s: object could not be parsed: %s", + sha1_to_hex(sha1), path); ++ if (!eaten) ++ free(contents); + return 0; /* keep checking other objects */ + } + + obj->flags = HAS_OBJ; +- if (fsck_obj(obj)) ++ if (fsck_obj(obj, contents, size)) + errors_found |= ERROR_OBJECT; +- return 0; ++ ++ if (!eaten) ++ free(contents); ++ return 0; /* keep checking other objects, even if we saw an error */ + } + + static int fsck_cruft(const char *basename, const char *path, void *data) +diff --git a/fsck.c b/fsck.c +index c807b7f..a520290 100644 +--- a/fsck.c ++++ b/fsck.c +@@ -795,6 +795,12 @@ static int fsck_tag(struct tag *tag, const char *data, + return fsck_tag_buffer(tag, data, size, options); + } + ++static int fsck_blob(struct blob *blob, const char *buf, ++ unsigned long size, struct fsck_options *options) ++{ ++ return 0; ++} ++ + int fsck_object(struct object *obj, void *data, unsigned long size, + struct fsck_options *options) + { +@@ -802,7 +808,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size, + return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); + + if (obj->type == OBJ_BLOB) +- return 0; ++ return fsck_blob((struct blob *)obj, data, size, options); + if (obj->type == OBJ_TREE) + return fsck_tree((struct tree *) obj, options); + if (obj->type == OBJ_COMMIT) +diff --git a/sha1_file.c b/sha1_file.c +index 6d29840..a091644 100644 +--- a/sha1_file.c ++++ b/sha1_file.c +@@ -3744,7 +3744,7 @@ int read_loose_object(const char *path, + goto out; + } + +- if (*type == OBJ_BLOB) { ++ if (*type == OBJ_BLOB && *size > big_file_threshold) { + if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) + goto out; + } else { +-- +2.14.4 + + +From d393612c63e6bdfd7ab54139c229be3db89c550d Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Wed, 2 May 2018 17:20:08 -0400 +Subject: [PATCH 06/11] fsck: detect gitmodules files + +commit 159e7b080bfa5d34559467cacaa79df89a01afc0 upstream. + +In preparation for performing fsck checks on .gitmodules +files, this commit plumbs in the actual detection of the +files. Note that unlike most other fsck checks, this cannot +be a property of a single object: we must know that the +object is found at a ".gitmodules" path at the root tree of +a commit. + +Since the fsck code only sees one object at a time, we have +to mark the related objects to fit the puzzle together. When +we see a commit we mark its tree as a root tree, and when +we see a root tree with a .gitmodules file, we mark the +corresponding blob to be checked. + +In an ideal world, we'd check the objects in topological +order: commits followed by trees followed by blobs. In that +case we can avoid ever loading an object twice, since all +markings would be complete by the time we get to the marked +objects. And indeed, if we are checking a single packfile, +this is the order in which Git will generally write the +objects. But we can't count on that: + + 1. git-fsck may show us the objects in arbitrary order + (loose objects are fed in sha1 order, but we may also + have multiple packs, and we process each pack fully in + sequence). + + 2. The type ordering is just what git-pack-objects happens + to write now. The pack format does not require a + specific order, and it's possible that future versions + of Git (or a custom version trying to fool official + Git's fsck checks!) may order it differently. + + 3. We may not even be fscking all of the relevant objects + at once. Consider pushing with transfer.fsckObjects, + where one push adds a blob at path "foo", and then a + second push adds the same blob at path ".gitmodules". + The blob is not part of the second push at all, but we + need to mark and check it. + +So in the general case, we need to make up to three passes +over the objects: once to make sure we've seen all commits, +then once to cover any trees we might have missed, and then +a final pass to cover any .gitmodules blobs we found in the +second pass. + +We can simplify things a bit by loosening the requirement +that we find .gitmodules only at root trees. Technically +a file like "subdir/.gitmodules" is not parsed by Git, but +it's not unreasonable for us to declare that Git is aware of +all ".gitmodules" files and make them eligible for checking. +That lets us drop the root-tree requirement, which +eliminates one pass entirely. And it makes our worst case +much better: instead of potentially queueing every root tree +to be re-examined, the worst case is that we queue each +unique .gitmodules blob for a second look. + +This patch just adds the boilerplate to find .gitmodules +files. The actual content checks will come in a subsequent +commit. + +[jn: backported to 2.11.y: + - passing oid->hash instead of oid to lookup_blob + - using "struct hashmap" directly since "struct oidset" isn't + available] + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + fsck.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + fsck.h | 7 ++++++ + 2 files changed, 98 insertions(+) + +diff --git a/fsck.c b/fsck.c +index a520290..f0a566a 100644 +--- a/fsck.c ++++ b/fsck.c +@@ -9,6 +9,42 @@ + #include "refs.h" + #include "utf8.h" + #include "sha1-array.h" ++#include "hashmap.h" ++ ++struct oidhash_entry { ++ struct hashmap_entry ent; ++ struct object_id oid; ++}; ++ ++static int oidhash_hashcmp(const void *va, const void *vb, ++ const void *vkey) ++{ ++ const struct oidhash_entry *a = va, *b = vb; ++ const struct object_id *key = vkey; ++ return oidcmp(&a->oid, key ? key : &b->oid); ++} ++ ++static struct hashmap gitmodules_found; ++static struct hashmap gitmodules_done; ++ ++static void oidhash_insert(struct hashmap *h, const struct object_id *oid) ++{ ++ struct oidhash_entry *e; ++ ++ if (!h->tablesize) ++ hashmap_init(h, oidhash_hashcmp, 0); ++ e = xmalloc(sizeof(*e)); ++ hashmap_entry_init(&e->ent, sha1hash(oid->hash)); ++ oidcpy(&e->oid, oid); ++ hashmap_add(h, e); ++} ++ ++static int oidhash_contains(struct hashmap *h, const struct object_id *oid) ++{ ++ return h->tablesize && ++ !!hashmap_get_from_hash(h, sha1hash(oid->hash), oid); ++} ++ + + #define FSCK_FATAL -1 + #define FSCK_INFO -2 +@@ -43,6 +79,7 @@ + FUNC(MISSING_TAG_ENTRY, ERROR) \ + FUNC(MISSING_TAG_OBJECT, ERROR) \ + FUNC(MISSING_TREE, ERROR) \ ++ FUNC(MISSING_TREE_OBJECT, ERROR) \ + FUNC(MISSING_TYPE, ERROR) \ + FUNC(MISSING_TYPE_ENTRY, ERROR) \ + FUNC(MULTIPLE_AUTHORS, ERROR) \ +@@ -50,6 +87,8 @@ + FUNC(TREE_NOT_SORTED, ERROR) \ + FUNC(UNKNOWN_TYPE, ERROR) \ + FUNC(ZERO_PADDED_DATE, ERROR) \ ++ FUNC(GITMODULES_MISSING, ERROR) \ ++ FUNC(GITMODULES_BLOB, ERROR) \ + /* warnings */ \ + FUNC(BAD_FILEMODE, WARN) \ + FUNC(EMPTY_NAME, WARN) \ +@@ -462,6 +501,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) + has_dotdot |= !strcmp(name, ".."); + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); + has_zero_pad |= *(char *)desc.buffer == '0'; ++ ++ if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) ++ oidhash_insert(&gitmodules_found, oid); ++ + update_tree_entry(&desc); + + switch (mode) { +@@ -831,3 +874,51 @@ int fsck_error_function(struct object *obj, int msg_type, const char *message) + error("object %s: %s", oid_to_hex(&obj->oid), message); + return 1; + } ++ ++int fsck_finish(struct fsck_options *options) ++{ ++ int ret = 0; ++ struct hashmap_iter iter; ++ const struct oidhash_entry *e; ++ ++ hashmap_iter_init(&gitmodules_found, &iter); ++ while ((e = hashmap_iter_next(&iter))) { ++ const struct object_id *oid = &e->oid; ++ struct blob *blob; ++ enum object_type type; ++ unsigned long size; ++ char *buf; ++ ++ if (oidhash_contains(&gitmodules_done, oid)) ++ continue; ++ ++ blob = lookup_blob(oid->hash); ++ if (!blob) { ++ ret |= report(options, &blob->object, ++ FSCK_MSG_GITMODULES_BLOB, ++ "non-blob found at .gitmodules"); ++ continue; ++ } ++ ++ buf = read_sha1_file(oid->hash, &type, &size); ++ if (!buf) { ++ ret |= report(options, &blob->object, ++ FSCK_MSG_GITMODULES_MISSING, ++ "unable to read .gitmodules blob"); ++ continue; ++ } ++ ++ if (type == OBJ_BLOB) ++ ret |= fsck_blob(blob, buf, size, options); ++ else ++ ret |= report(options, &blob->object, ++ FSCK_MSG_GITMODULES_BLOB, ++ "non-blob found at .gitmodules"); ++ free(buf); ++ } ++ ++ ++ hashmap_free(&gitmodules_found, 1); ++ hashmap_free(&gitmodules_done, 1); ++ return ret; ++} +diff --git a/fsck.h b/fsck.h +index dded84b..3eebe56 100644 +--- a/fsck.h ++++ b/fsck.h +@@ -50,4 +50,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options); + int fsck_object(struct object *obj, void *data, unsigned long size, + struct fsck_options *options); + ++/* ++ * Some fsck checks are context-dependent, and may end up queued; run this ++ * after completing all fsck_object() calls in order to resolve any remaining ++ * checks. ++ */ ++int fsck_finish(struct fsck_options *options); ++ + #endif +-- +2.14.4 + + +From ff398bff4000efa4275058c6b4f8c479831494f0 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Wed, 2 May 2018 17:25:27 -0400 +Subject: [PATCH 07/11] fsck: check .gitmodules content + +commit ed8b10f631c9a71df3351d46187bf7f3fa4f9b7e upstream. + +This patch detects and blocks submodule names which do not +match the policy set forth in submodule-config. These should +already be caught by the submodule code itself, but putting +the check here means that newer versions of Git can protect +older ones from malicious entries (e.g., a server with +receive.fsckObjects will block the objects, protecting +clients which fetch from it). + +As a side effect, this means fsck will also complain about +.gitmodules files that cannot be parsed (or were larger than +core.bigFileThreshold). + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + fsck.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 60 insertions(+), 1 deletion(-) + +diff --git a/fsck.c b/fsck.c +index f0a566a..d366ad8 100644 +--- a/fsck.c ++++ b/fsck.c +@@ -10,6 +10,9 @@ + #include "utf8.h" + #include "sha1-array.h" + #include "hashmap.h" ++#include "submodule-config.h" ++ ++#define CONFIG_ORIGIN_BLOB 0 + + struct oidhash_entry { + struct hashmap_entry ent; +@@ -89,6 +92,8 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid) + FUNC(ZERO_PADDED_DATE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ ++ FUNC(GITMODULES_PARSE, ERROR) \ ++ FUNC(GITMODULES_NAME, ERROR) \ + /* warnings */ \ + FUNC(BAD_FILEMODE, WARN) \ + FUNC(EMPTY_NAME, WARN) \ +@@ -838,10 +843,64 @@ static int fsck_tag(struct tag *tag, const char *data, + return fsck_tag_buffer(tag, data, size, options); + } + ++struct fsck_gitmodules_data { ++ struct object *obj; ++ struct fsck_options *options; ++ int ret; ++}; ++ ++static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) ++{ ++ struct fsck_gitmodules_data *data = vdata; ++ const char *subsection, *key; ++ int subsection_len; ++ char *name; ++ ++ if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || ++ !subsection) ++ return 0; ++ ++ name = xmemdupz(subsection, subsection_len); ++ if (check_submodule_name(name) < 0) ++ data->ret |= report(data->options, data->obj, ++ FSCK_MSG_GITMODULES_NAME, ++ "disallowed submodule name: %s", ++ name); ++ free(name); ++ ++ return 0; ++} ++ + static int fsck_blob(struct blob *blob, const char *buf, + unsigned long size, struct fsck_options *options) + { +- return 0; ++ struct fsck_gitmodules_data data; ++ ++ if (!oidhash_contains(&gitmodules_found, &blob->object.oid)) ++ return 0; ++ oidhash_insert(&gitmodules_done, &blob->object.oid); ++ ++ if (!buf) { ++ /* ++ * A missing buffer here is a sign that the caller found the ++ * blob too gigantic to load into memory. Let's just consider ++ * that an error. ++ */ ++ return report(options, &blob->object, ++ FSCK_MSG_GITMODULES_PARSE, ++ ".gitmodules too large to parse"); ++ } ++ ++ data.obj = &blob->object; ++ data.options = options; ++ data.ret = 0; ++ if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, ++ ".gitmodules", buf, size, &data)) ++ data.ret |= report(options, &blob->object, ++ FSCK_MSG_GITMODULES_PARSE, ++ "could not parse gitmodules blob"); ++ ++ return data.ret; + } + + int fsck_object(struct object *obj, void *data, unsigned long size, +-- +2.14.4 + + +From 3f7875be4a5b68058e243a0525d75165620738cb Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Wed, 2 May 2018 17:20:35 -0400 +Subject: [PATCH 08/11] fsck: call fsck_finish() after fscking objects + +commit 1995b5e03e1cc97116be58cdc0502d4a23547856 upstream. + +Now that the internal fsck code is capable of checking +.gitmodules files, we just need to teach its callers to use +the "finish" function to check any queued objects. + +With this, we can now catch the malicious case in t7415 with +git-fsck. + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + builtin/fsck.c | 3 +++ + t/t7415-submodule-names.sh | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/builtin/fsck.c b/builtin/fsck.c +index 61ab1cd..70f620b 100644 +--- a/builtin/fsck.c ++++ b/builtin/fsck.c +@@ -660,6 +660,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) + count += p->num_objects; + } + stop_progress(&progress); ++ ++ if (fsck_finish(&fsck_obj_options)) ++ errors_found |= ERROR_OBJECT; + } + + heads = 0; +diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh +index b30fa33..5d61627 100755 +--- a/t/t7415-submodule-names.sh ++++ b/t/t7415-submodule-names.sh +@@ -73,4 +73,8 @@ test_expect_success 'clone evil superproject' ' + ! grep "RUNNING POST CHECKOUT" output + ' + ++test_expect_success 'fsck detects evil superproject' ' ++ test_must_fail git fsck ++' ++ + test_done +-- +2.14.4 + + +From 563989871514b892c2c9a4f4fd2ef515dd6adaee Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Fri, 4 May 2018 19:40:08 -0400 +Subject: [PATCH 09/11] unpack-objects: call fsck_finish() after fscking + objects + +commit 6e328d6caef218db320978e3e251009135d87d0e upstream. + +As with the previous commit, we must call fsck's "finish" +function in order to catch any queued objects for +.gitmodules checks. + +This second pass will be able to access any incoming +objects, because we will have exploded them to loose objects +by now. + +This isn't quite ideal, because it means that bad objects +may have been written to the object database (and a +subsequent operation could then reference them, even if the +other side doesn't send the objects again). However, this is +sufficient when used with receive.fsckObjects, since those +loose objects will all be placed in a temporary quarantine +area that will get wiped if we find any problems. + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + builtin/unpack-objects.c | 5 ++++- + t/t7415-submodule-names.sh | 7 +++++++ + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c +index 7018829..27ba86e 100644 +--- a/builtin/unpack-objects.c ++++ b/builtin/unpack-objects.c +@@ -560,8 +560,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) + unpack_all(); + git_SHA1_Update(&ctx, buffer, offset); + git_SHA1_Final(sha1, &ctx); +- if (strict) ++ if (strict) { + write_rest(); ++ if (fsck_finish(&fsck_options)) ++ die(_("fsck error in pack objects")); ++ } + if (hashcmp(fill(20), sha1)) + die("final sha1 did not match"); + use(20); +diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh +index 5d61627..a09087c 100755 +--- a/t/t7415-submodule-names.sh ++++ b/t/t7415-submodule-names.sh +@@ -77,4 +77,11 @@ test_expect_success 'fsck detects evil superproject' ' + test_must_fail git fsck + ' + ++test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' ++ rm -rf dst.git && ++ git init --bare dst.git && ++ git -C dst.git config transfer.fsckObjects true && ++ test_must_fail git push dst.git HEAD ++' ++ + test_done +-- +2.14.4 + + +From 0e3a3c30f0ba16f3e3fd4581cea36b2d2d4d204a Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Fri, 4 May 2018 19:45:01 -0400 +Subject: [PATCH 10/11] index-pack: check .gitmodules files with --strict + +commit 73c3f0f704a91b6792e0199a3f3ab6e3a1971675 upstream. + +Now that the internal fsck code has all of the plumbing we +need, we can start checking incoming .gitmodules files. +Naively, it seems like we would just need to add a call to +fsck_finish() after we've processed all of the objects. And +that would be enough to cover the initial test included +here. But there are two extra bits: + + 1. We currently don't bother calling fsck_object() at all + for blobs, since it has traditionally been a noop. We'd + actually catch these blobs in fsck_finish() at the end, + but it's more efficient to check them when we already + have the object loaded in memory. + + 2. The second pass done by fsck_finish() needs to access + the objects, but we're actually indexing the pack in + this process. In theory we could give the fsck code a + special callback for accessing the in-pack data, but + it's actually quite tricky: + + a. We don't have an internal efficient index mapping + oids to packfile offsets. We only generate it on + the fly as part of writing out the .idx file. + + b. We'd still have to reconstruct deltas, which means + we'd basically have to replicate all of the + reading logic in packfile.c. + + Instead, let's avoid running fsck_finish() until after + we've written out the .idx file, and then just add it + to our internal packed_git list. + + This does mean that the objects are "in the repository" + before we finish our fsck checks. But unpack-objects + already exhibits this same behavior, and it's an + acceptable tradeoff here for the same reason: the + quarantine mechanism means that pushes will be + fully protected. + +In addition to a basic push test in t7415, we add a sneaky +pack that reverses the usual object order in the pack, +requiring that index-pack access the tree and blob during +the "finish" step. + +This already works for unpack-objects (since it will have +written out loose objects), but we'll check it with this +sneaky pack for good measure. + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + builtin/index-pack.c | 10 ++++++++++ + t/lib-pack.sh | 12 ++++++++++++ + t/t7415-submodule-names.sh | 38 ++++++++++++++++++++++++++++++++++++++ + 3 files changed, 60 insertions(+) + +diff --git a/builtin/index-pack.c b/builtin/index-pack.c +index 81f3bc7..f53bf22 100644 +--- a/builtin/index-pack.c ++++ b/builtin/index-pack.c +@@ -825,6 +825,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, + blob->object.flags |= FLAG_CHECKED; + else + die(_("invalid blob object %s"), sha1_to_hex(sha1)); ++ if (do_fsck_object && ++ fsck_object(&blob->object, (void *)data, size, &fsck_options)) ++ die(_("fsck error in packed object")); + } else { + struct object *obj; + int eaten; +@@ -1440,6 +1443,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, + } else + chmod(final_index_name, 0444); + ++ if (do_fsck_object) ++ add_packed_git(final_index_name, strlen(final_index_name), 0); ++ + if (!from_stdin) { + printf("%s\n", sha1_to_hex(sha1)); + } else { +@@ -1775,6 +1781,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) + pack_sha1); + else + close(input_fd); ++ ++ if (do_fsck_object && fsck_finish(&fsck_options)) ++ die(_("fsck error in pack objects")); ++ + free(objects); + strbuf_release(&index_name_buf); + strbuf_release(&keep_name_buf); +diff --git a/t/lib-pack.sh b/t/lib-pack.sh +index 7509846..4674899 100644 +--- a/t/lib-pack.sh ++++ b/t/lib-pack.sh +@@ -79,6 +79,18 @@ pack_obj () { + ;; + esac + ++ # If it's not a delta, we can convince pack-objects to generate a pack ++ # with just our entry, and then strip off the header (12 bytes) and ++ # trailer (20 bytes). ++ if test -z "$2" ++ then ++ echo "$1" | git pack-objects --stdout >pack_obj.tmp && ++ size=$(wc -c &2 "BUG: don't know how to print $1${2:+ (from $2)}" + return 1 + } +diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh +index a09087c..fca7091 100755 +--- a/t/t7415-submodule-names.sh ++++ b/t/t7415-submodule-names.sh +@@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a + real-world setup that confirms we catch this in practice. + ' + . ./test-lib.sh ++. "$TEST_DIRECTORY"/lib-pack.sh + + test_expect_success 'check names' ' + cat >expect <<-\EOF && +@@ -84,4 +85,41 @@ test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' + test_must_fail git push dst.git HEAD + ' + ++test_expect_success 'transfer.fsckObjects detects evil superproject (index)' ' ++ rm -rf dst.git && ++ git init --bare dst.git && ++ git -C dst.git config transfer.fsckObjects true && ++ git -C dst.git config transfer.unpackLimit 1 && ++ test_must_fail git push dst.git HEAD ++' ++ ++# Normally our packs contain commits followed by trees followed by blobs. This ++# reverses the order, which requires backtracking to find the context of a ++# blob. We'll start with a fresh gitmodules-only tree to make it simpler. ++test_expect_success 'create oddly ordered pack' ' ++ git checkout --orphan odd && ++ git rm -rf --cached . && ++ git add .gitmodules && ++ git commit -m odd && ++ { ++ pack_header 3 && ++ pack_obj $(git rev-parse HEAD:.gitmodules) && ++ pack_obj $(git rev-parse HEAD^{tree}) && ++ pack_obj $(git rev-parse HEAD) ++ } >odd.pack && ++ pack_trailer odd.pack ++' ++ ++test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' ' ++ rm -rf dst.git && ++ git init --bare dst.git && ++ test_must_fail git -C dst.git unpack-objects --strict +Date: Fri, 4 May 2018 20:03:35 -0400 +Subject: [PATCH 11/11] fsck: complain when .gitmodules is a symlink + +commit b7b1fca175f1ed7933f361028c631b9ac86d868d upstream. + +We've recently forbidden .gitmodules to be a symlink in +verify_path(). And it's an easy way to circumvent our fsck +checks for .gitmodules content. So let's complain when we +see it. + +Signed-off-by: Jeff King +Signed-off-by: Jonathan Nieder +--- + fsck.c | 14 ++++++++++---- + t/t7415-submodule-names.sh | 29 +++++++++++++++++++++++++++++ + 2 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/fsck.c b/fsck.c +index d366ad8..79694b0 100644 +--- a/fsck.c ++++ b/fsck.c +@@ -94,6 +94,7 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid) + FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ ++ FUNC(GITMODULES_SYMLINK, ERROR) \ + /* warnings */ \ + FUNC(BAD_FILEMODE, WARN) \ + FUNC(EMPTY_NAME, WARN) \ +@@ -472,7 +473,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con + + static int fsck_tree(struct tree *item, struct fsck_options *options) + { +- int retval; ++ int retval = 0; + int has_null_sha1 = 0; + int has_full_path = 0; + int has_empty_name = 0; +@@ -507,8 +508,14 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); + has_zero_pad |= *(char *)desc.buffer == '0'; + +- if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) +- oidhash_insert(&gitmodules_found, oid); ++ if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) { ++ if (!S_ISLNK(mode)) ++ oidhash_insert(&gitmodules_found, oid); ++ else ++ retval += report(options, &item->object, ++ FSCK_MSG_GITMODULES_SYMLINK, ++ ".gitmodules is a symbolic link"); ++ } + + update_tree_entry(&desc); + +@@ -551,7 +558,6 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) + o_name = name; + } + +- retval = 0; + if (has_null_sha1) + retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1"); + if (has_full_path) +diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh +index fca7091..6ee7115 100755 +--- a/t/t7415-submodule-names.sh ++++ b/t/t7415-submodule-names.sh +@@ -122,4 +122,33 @@ test_expect_success 'transfer.fsckObjects handles odd pack (index)' ' + test_must_fail git -C dst.git index-pack --strict --stdin output && ++ grep gitmodulesSymlink output ++ ) ++' ++ + test_done +-- +2.14.4 + diff --git a/SOURCES/git-cve-2018-11235.patch b/SOURCES/git-cve-2018-11235.patch new file mode 100644 index 0000000..521abb6 --- /dev/null +++ b/SOURCES/git-cve-2018-11235.patch @@ -0,0 +1,1257 @@ +From 17f3a3cfacbc2a8c212062f3bbb1b03d668189c0 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Mon, 30 Apr 2018 03:25:25 -0400 +Subject: [PATCH 01/11] submodule-config: verify submodule names as paths + +Submodule "names" come from the untrusted .gitmodules file, +but we blindly append them to $GIT_DIR/modules to create our +on-disk repo paths. This means you can do bad things by +putting "../" into the name (among other things). + +Let's sanity-check these names to avoid building a path that +can be exploited. There are two main decisions: + + 1. What should the allowed syntax be? + + It's tempting to reuse verify_path(), since submodule + names typically come from in-repo paths. But there are + two reasons not to: + + a. It's technically more strict than what we need, as + we really care only about breaking out of the + $GIT_DIR/modules/ hierarchy. E.g., having a + submodule named "foo/.git" isn't actually + dangerous, and it's possible that somebody has + manually given such a funny name. + + b. Since we'll eventually use this checking logic in + fsck to prevent downstream repositories, it should + be consistent across platforms. Because + verify_path() relies on is_dir_sep(), it wouldn't + block "foo\..\bar" on a non-Windows machine. + + 2. Where should we enforce it? These days most of the + .gitmodules reads go through submodule-config.c, so + I've put it there in the reading step. That should + cover all of the C code. + + We also construct the name for "git submodule add" + inside the git-submodule.sh script. This is probably + not a big deal for security since the name is coming + from the user anyway, but it would be polite to remind + them if the name they pick is invalid (and we need to + expose the name-checker to the shell anyway for our + test scripts). + + This patch issues a warning when reading .gitmodules + and just ignores the related config entry completely. + This will generally end up producing a sensible error, + as it works the same as a .gitmodules file which is + missing a submodule entry (so "submodule update" will + barf, but "git clone --recurse-submodules" will print + an error but not abort the clone. + + There is one minor oddity, which is that we print the + warning once per malformed config key (since that's how + the config subsystem gives us the entries). So in the + new test, for example, the user would see three + warnings. That's OK, since the intent is that this case + should never come up outside of malicious repositories + (and then it might even benefit the user to see the + message multiple times). + +Credit for finding this vulnerability and the proof of +concept from which the test script was adapted goes to +Etienne Stalmans. + +Signed-off-by: Jeff King +--- + builtin/submodule--helper.c | 27 +++++++++++++++- + git-submodule.sh | 5 +++ + submodule-config.c | 31 ++++++++++++++++++ + submodule-config.h | 8 +++++ + t/t7415-submodule-names.sh | 76 +++++++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 146 insertions(+), 1 deletion(-) + create mode 100755 t/t7415-submodule-names.sh + +diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c +index 926d205..5883e6c 100644 +--- a/builtin/submodule--helper.c ++++ b/builtin/submodule--helper.c +@@ -842,6 +842,30 @@ static int resolve_relative_path(int argc, const char **argv, const char *prefix + return 0; + } + ++/* ++ * Exit non-zero if any of the submodule names given on the command line is ++ * invalid. If no names are given, filter stdin to print only valid names ++ * (which is primarily intended for testing). ++ */ ++static int check_name(int argc, const char **argv, const char *prefix) ++{ ++ if (argc > 1) { ++ while (*++argv) { ++ if (check_submodule_name(*argv) < 0) ++ return 1; ++ } ++ } else { ++ struct strbuf buf = STRBUF_INIT; ++ while (strbuf_getline(&buf, stdin) != EOF) { ++ if (!check_submodule_name(buf.buf)) ++ printf("%s\n", buf.buf); ++ } ++ strbuf_release(&buf); ++ } ++ return 0; ++} ++ ++ + struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); +@@ -855,7 +879,8 @@ static struct cmd_struct commands[] = { + {"relative-path", resolve_relative_path}, + {"resolve-relative-url", resolve_relative_url}, + {"resolve-relative-url-test", resolve_relative_url_test}, +- {"init", module_init} ++ {"init", module_init}, ++ {"check-name", check_name} + }; + + int cmd_submodule__helper(int argc, const char **argv, const char *prefix) +diff --git a/git-submodule.sh b/git-submodule.sh +index 78fdac9..d82da42 100755 +--- a/git-submodule.sh ++++ b/git-submodule.sh +@@ -225,6 +225,11 @@ Use -f if you really want to add it." >&2 + sm_name="$sm_path" + fi + ++ if ! git submodule--helper check-name "$sm_name" ++ then ++ die "$(eval_gettext "'$sm_name' is not a valid submodule name")" ++ fi ++ + # perhaps the path exists and is already a git repo, else clone it + if test -e "$sm_path" + then +diff --git a/submodule-config.c b/submodule-config.c +index 93dd364..dbba74a 100644 +--- a/submodule-config.c ++++ b/submodule-config.c +@@ -162,6 +162,31 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache, + return NULL; + } + ++int check_submodule_name(const char *name) ++{ ++ /* Disallow empty names */ ++ if (!*name) ++ return -1; ++ ++ /* ++ * Look for '..' as a path component. Check both '/' and '\\' as ++ * separators rather than is_dir_sep(), because we want the name rules ++ * to be consistent across platforms. ++ */ ++ goto in_component; /* always start inside component */ ++ while (*name) { ++ char c = *name++; ++ if (c == '/' || c == '\\') { ++in_component: ++ if (name[0] == '.' && name[1] == '.' && ++ (!name[2] || name[2] == '/' || name[2] == '\\')) ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ + static int name_and_item_from_var(const char *var, struct strbuf *name, + struct strbuf *item) + { +@@ -173,6 +198,12 @@ static int name_and_item_from_var(const char *var, struct strbuf *name, + return 0; + + strbuf_add(name, subsection, subsection_len); ++ if (check_submodule_name(name->buf) < 0) { ++ warning(_("ignoring suspicious submodule name: %s"), name->buf); ++ strbuf_release(name); ++ return 0; ++ } ++ + strbuf_addstr(item, key); + + return 1; +diff --git a/submodule-config.h b/submodule-config.h +index e4857f5..6ed7a3c 100644 +--- a/submodule-config.h ++++ b/submodule-config.h +@@ -29,4 +29,12 @@ const struct submodule *submodule_from_path(const unsigned char *commit_sha1, + const char *path); + void submodule_free(void); + ++/* ++ * Returns 0 if the name is syntactically acceptable as a submodule "name" ++ * (e.g., that may be found in the subsection of a .gitmodules file) and -1 ++ * otherwise. ++ */ ++int check_submodule_name(const char *name); ++ ++ + #endif /* SUBMODULE_CONFIG_H */ +diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh +new file mode 100755 +index 0000000..75fa071 +--- /dev/null ++++ b/t/t7415-submodule-names.sh +@@ -0,0 +1,76 @@ ++#!/bin/sh ++ ++test_description='check handling of .. in submodule names ++ ++Exercise the name-checking function on a variety of names, and then give a ++real-world setup that confirms we catch this in practice. ++' ++. ./test-lib.sh ++ ++test_expect_success 'check names' ' ++ cat >expect <<-\EOF && ++ valid ++ valid/with/paths ++ EOF ++ ++ git submodule--helper check-name >actual <<-\EOF && ++ valid ++ valid/with/paths ++ ++ ../foo ++ /../foo ++ ..\foo ++ \..\foo ++ foo/.. ++ foo/../ ++ foo\.. ++ foo\..\ ++ foo/../bar ++ EOF ++ ++ test_cmp expect actual ++' ++ ++test_expect_success 'create innocent subrepo' ' ++ git init innocent && ++ git -C innocent commit --allow-empty -m foo ++' ++ ++test_expect_success 'submodule add refuses invalid names' ' ++ test_must_fail \ ++ git submodule add --name ../../modules/evil "$PWD/innocent" evil ++' ++ ++test_expect_success 'add evil submodule' ' ++ git submodule add "$PWD/innocent" evil && ++ ++ mkdir modules && ++ cp -r .git/modules/evil modules && ++ write_script modules/evil/hooks/post-checkout <<-\EOF && ++ echo >&2 "RUNNING POST CHECKOUT" ++ EOF ++ ++ git config -f .gitmodules submodule.evil.update checkout && ++ git config -f .gitmodules --rename-section \ ++ submodule.evil submodule.../../modules/evil && ++ git add modules && ++ git commit -am evil ++' ++ ++# This step seems like it shouldn't be necessary, since the payload is ++# contained entirely in the evil submodule. But due to the vagaries of the ++# submodule code, checking out the evil module will fail unless ".git/modules" ++# exists. Adding another submodule (with a name that sorts before "evil") is an ++# easy way to make sure this is the case in the victim clone. ++test_expect_success 'add other submodule' ' ++ git submodule add "$PWD/innocent" another-module && ++ git add another-module && ++ git commit -am another ++' ++ ++test_expect_success 'clone evil superproject' ' ++ git clone --recurse-submodules . victim >output 2>&1 && ++ ! grep "RUNNING POST CHECKOUT" output ++' ++ ++test_done +-- +2.14.4 + + +From 42410051db39f2009980ba293ff0f4f1755df06a Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Sun, 13 May 2018 12:09:42 -0400 +Subject: [PATCH 02/11] is_ntfs_dotgit: use a size_t for traversing string + +We walk through the "name" string using an int, which can +wrap to a negative value and cause us to read random memory +before our array (e.g., by creating a tree with a name >2GB, +since "int" is still 32 bits even on most 64-bit platforms). +Worse, this is easy to trigger during the fsck_tree() check, +which is supposed to be protecting us from malicious +garbage. + +Note one bit of trickiness in the existing code: we +sometimes assign -1 to "len" at the end of the loop, and +then rely on the "len++" in the for-loop's increment to take +it back to 0. This is still legal with a size_t, since +assigning -1 will turn into SIZE_MAX, which then wraps +around to 0 on increment. + +Signed-off-by: Jeff King +--- + path.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/path.c b/path.c +index 0778ff0..3f41416 100644 +--- a/path.c ++++ b/path.c +@@ -1205,7 +1205,7 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) + + int is_ntfs_dotgit(const char *name) + { +- int len; ++ size_t len; + + for (len = 0; ; len++) + if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { +-- +2.14.4 + + +From e63ce01d9a387c60754349508352f3bca086bbec Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Wed, 2 May 2018 15:23:45 -0400 +Subject: [PATCH 03/11] is_hfs_dotgit: match other .git files + +Both verify_path() and fsck match ".git", ".GIT", and other +variants specific to HFS+. Let's allow matching other +special files like ".gitmodules", which we'll later use to +enforce extra restrictions via verify_path() and fsck. + +Signed-off-by: Jeff King +--- + utf8.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++------------ + utf8.h | 5 +++++ + 2 files changed, 51 insertions(+), 12 deletions(-) + +diff --git a/utf8.c b/utf8.c +index 00e10c8..0d406b1 100644 +--- a/utf8.c ++++ b/utf8.c +@@ -605,28 +605,33 @@ static ucs_char_t next_hfs_char(const char **in) + } + } + +-int is_hfs_dotgit(const char *path) ++static int is_hfs_dot_generic(const char *path, ++ const char *needle, size_t needle_len) + { + ucs_char_t c; + + c = next_hfs_char(&path); + if (c != '.') + return 0; +- c = next_hfs_char(&path); + + /* + * there's a great deal of other case-folding that occurs +- * in HFS+, but this is enough to catch anything that will +- * convert to ".git" ++ * in HFS+, but this is enough to catch our fairly vanilla ++ * hard-coded needles. + */ +- if (c != 'g' && c != 'G') +- return 0; +- c = next_hfs_char(&path); +- if (c != 'i' && c != 'I') +- return 0; +- c = next_hfs_char(&path); +- if (c != 't' && c != 'T') +- return 0; ++ for (; needle_len > 0; needle++, needle_len--) { ++ c = next_hfs_char(&path); ++ ++ /* ++ * We know our needles contain only ASCII, so we clamp here to ++ * make the results of tolower() sane. ++ */ ++ if (c > 127) ++ return 0; ++ if (tolower(c) != *needle) ++ return 0; ++ } ++ + c = next_hfs_char(&path); + if (c && !is_dir_sep(c)) + return 0; +@@ -634,6 +639,35 @@ int is_hfs_dotgit(const char *path) + return 1; + } + ++/* ++ * Inline wrapper to make sure the compiler resolves strlen() on literals at ++ * compile time. ++ */ ++static inline int is_hfs_dot_str(const char *path, const char *needle) ++{ ++ return is_hfs_dot_generic(path, needle, strlen(needle)); ++} ++ ++int is_hfs_dotgit(const char *path) ++{ ++ return is_hfs_dot_str(path, "git"); ++} ++ ++int is_hfs_dotgitmodules(const char *path) ++{ ++ return is_hfs_dot_str(path, "gitmodules"); ++} ++ ++int is_hfs_dotgitignore(const char *path) ++{ ++ return is_hfs_dot_str(path, "gitignore"); ++} ++ ++int is_hfs_dotgitattributes(const char *path) ++{ ++ return is_hfs_dot_str(path, "gitattributes"); ++} ++ + const char utf8_bom[] = "\357\273\277"; + + int skip_utf8_bom(char **text, size_t len) +diff --git a/utf8.h b/utf8.h +index 6bbcf31..da19b43 100644 +--- a/utf8.h ++++ b/utf8.h +@@ -52,8 +52,13 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding); + * The path should be NUL-terminated, but we will match variants of both ".git\0" + * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck + * and verify_path(). ++ * ++ * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo". + */ + int is_hfs_dotgit(const char *path); ++int is_hfs_dotgitmodules(const char *path); ++int is_hfs_dotgitignore(const char *path); ++int is_hfs_dotgitattributes(const char *path); + + typedef enum { + ALIGN_LEFT, +-- +2.14.4 + + +From c914f82f17ce42ea03ae1754f263c600e4faa7f3 Mon Sep 17 00:00:00 2001 +From: Johannes Schindelin +Date: Fri, 11 May 2018 16:03:54 +0200 +Subject: [PATCH 04/11] is_ntfs_dotgit: match other .git files + +When we started to catch NTFS short names that clash with .git, we only +looked for GIT~1. This is sufficient because we only ever clone into an +empty directory, so .git is guaranteed to be the first subdirectory or +file in that directory. + +However, even with a fresh clone, .gitmodules is *not* necessarily the +first file to be written that would want the NTFS short name GITMOD~1: a +malicious repository can add .gitmodul0000 and friends, which sorts +before `.gitmodules` and is therefore checked out *first*. For that +reason, we have to test not only for ~1 short names, but for others, +too. + +It's hard to just adapt the existing checks in is_ntfs_dotgit(): since +Windows 2000 (i.e., in all Windows versions still supported by Git), +NTFS short names are only generated in the ~ form up to +number 4. After that, a *different* prefix is used, calculated from the +long file name using an undocumented, but stable algorithm. + +For example, the short name of .gitmodules would be GITMOD~1, but if it +is taken, and all of ~2, ~3 and ~4 are taken, too, the short name +GI7EBA~1 will be used. From there, collisions are handled by +incrementing the number, shortening the prefix as needed (until ~9999999 +is reached, in which case NTFS will not allow the file to be created). + +We'd also want to handle .gitignore and .gitattributes, which suffer +from a similar problem, using the fall-back short names GI250A~1 and +GI7D29~1, respectively. + +To accommodate for that, we could reimplement the hashing algorithm, but +it is just safer and simpler to provide the known prefixes. This +algorithm has been reverse-engineered and described at +https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but +still available via https://web.archive.org/. + +These can be recomputed by running the following Perl script: + +-- snip -- +use warnings; +use strict; + +sub compute_short_name_hash ($) { + my $checksum = 0; + foreach (split('', $_[0])) { + $checksum = ($checksum * 0x25 + ord($_)) & 0xffff; + } + + $checksum = ($checksum * 314159269) & 0xffffffff; + $checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000); + $checksum -= (($checksum * 1152921497) >> 60) * 1000000007; + + return scalar reverse sprintf("%x", $checksum & 0xffff); +} + +print compute_short_name_hash($ARGV[0]); +-- snap -- + +E.g., running that with the argument ".gitignore" will +result in "250a" (which then becomes "gi250a" in the code). + +Signed-off-by: Johannes Schindelin +Signed-off-by: Jeff King +--- + cache.h | 10 +++++++- + path.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 93 insertions(+), 1 deletion(-) + +diff --git a/cache.h b/cache.h +index 17afd25..fbbe933 100644 +--- a/cache.h ++++ b/cache.h +@@ -1032,7 +1032,15 @@ int normalize_path_copy(char *dst, const char *src); + int longest_ancestor_length(const char *path, struct string_list *prefixes); + char *strip_path_suffix(const char *path, const char *suffix); + int daemon_avoid_alias(const char *path); +-extern int is_ntfs_dotgit(const char *name); ++ ++/* ++ * These functions match their is_hfs_dotgit() counterparts; see utf8.h for ++ * details. ++ */ ++int is_ntfs_dotgit(const char *name); ++int is_ntfs_dotgitmodules(const char *name); ++int is_ntfs_dotgitignore(const char *name); ++int is_ntfs_dotgitattributes(const char *name); + + /* + * Returns true iff "str" could be confused as a command-line option when +diff --git a/path.c b/path.c +index 3f41416..34d95a4 100644 +--- a/path.c ++++ b/path.c +@@ -1222,6 +1222,90 @@ int is_ntfs_dotgit(const char *name) + } + } + ++static int is_ntfs_dot_generic(const char *name, ++ const char *dotgit_name, ++ size_t len, ++ const char *dotgit_ntfs_shortname_prefix) ++{ ++ int saw_tilde; ++ size_t i; ++ ++ if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) { ++ i = len + 1; ++only_spaces_and_periods: ++ for (;;) { ++ char c = name[i++]; ++ if (!c) ++ return 1; ++ if (c != ' ' && c != '.') ++ return 0; ++ } ++ } ++ ++ /* ++ * Is it a regular NTFS short name, i.e. shortened to 6 characters, ++ * followed by ~1, ... ~4? ++ */ ++ if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' && ++ name[7] >= '1' && name[7] <= '4') { ++ i = 8; ++ goto only_spaces_and_periods; ++ } ++ ++ /* ++ * Is it a fall-back NTFS short name (for details, see ++ * https://en.wikipedia.org/wiki/8.3_filename? ++ */ ++ for (i = 0, saw_tilde = 0; i < 8; i++) ++ if (name[i] == '\0') ++ return 0; ++ else if (saw_tilde) { ++ if (name[i] < '0' || name[i] > '9') ++ return 0; ++ } else if (name[i] == '~') { ++ if (name[++i] < '1' || name[i] > '9') ++ return 0; ++ saw_tilde = 1; ++ } else if (i >= 6) ++ return 0; ++ else if (name[i] < 0) { ++ /* ++ * We know our needles contain only ASCII, so we clamp ++ * here to make the results of tolower() sane. ++ */ ++ return 0; ++ } else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i]) ++ return 0; ++ ++ goto only_spaces_and_periods; ++} ++ ++/* ++ * Inline helper to make sure compiler resolves strlen() on literals at ++ * compile time. ++ */ ++static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name, ++ const char *dotgit_ntfs_shortname_prefix) ++{ ++ return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name), ++ dotgit_ntfs_shortname_prefix); ++} ++ ++int is_ntfs_dotgitmodules(const char *name) ++{ ++ return is_ntfs_dot_str(name, "gitmodules", "gi7eba"); ++} ++ ++int is_ntfs_dotgitignore(const char *name) ++{ ++ return is_ntfs_dot_str(name, "gitignore", "gi250a"); ++} ++ ++int is_ntfs_dotgitattributes(const char *name) ++{ ++ return is_ntfs_dot_str(name, "gitattributes", "gi7d29"); ++} ++ + int looks_like_command_line_option(const char *str) + { + return str && str[0] == '-'; +-- +2.14.4 + + +From 4c8e72db9864e150f163d1b9da7b0e4e15f49585 Mon Sep 17 00:00:00 2001 +From: Johannes Schindelin +Date: Sat, 12 May 2018 22:16:51 +0200 +Subject: [PATCH 05/11] is_{hfs,ntfs}_dotgitmodules: add tests + +This tests primarily for NTFS issues, but also adds one example of an +HFS+ issue. + +Thanks go to Congyi Wu for coming up with the list of examples where +NTFS would possibly equate the filename with `.gitmodules`. + +Signed-off-by: Johannes Schindelin +Signed-off-by: Jeff King +--- + t/helper/test-path-utils.c | 20 +++++++++++ + t/t0060-path-utils.sh | 86 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 106 insertions(+) + +diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c +index ba805b3..e75b146 100644 +--- a/t/helper/test-path-utils.c ++++ b/t/helper/test-path-utils.c +@@ -1,5 +1,6 @@ + #include "cache.h" + #include "string-list.h" ++#include "utf8.h" + + /* + * A "string_list_each_func_t" function that normalizes an entry from +@@ -156,6 +157,11 @@ static struct test_data dirname_data[] = { + { NULL, NULL } + }; + ++static int is_dotgitmodules(const char *path) ++{ ++ return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); ++} ++ + int main(int argc, char **argv) + { + if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { +@@ -256,6 +262,20 @@ int main(int argc, char **argv) + if (argc == 2 && !strcmp(argv[1], "dirname")) + return test_function(dirname_data, dirname, argv[1]); + ++ if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) { ++ int res = 0, expect = 1, i; ++ for (i = 2; i < argc; i++) ++ if (!strcmp("--not", argv[i])) ++ expect = !expect; ++ else if (expect != is_dotgitmodules(argv[i])) ++ res = error("'%s' is %s.gitmodules", argv[i], ++ expect ? "not " : ""); ++ else ++ fprintf(stderr, "ok: '%s' is %s.gitmodules\n", ++ argv[i], expect ? "" : "not "); ++ return !!res; ++ } ++ + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], + argv[1] ? argv[1] : "(there was none)"); + return 1; +diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh +index bf2deee..a1c9bbd 100755 +--- a/t/t0060-path-utils.sh ++++ b/t/t0060-path-utils.sh +@@ -344,4 +344,90 @@ test_submodule_relative_url "(null)" "ssh://hostname:22/repo" "../subrepo" "ssh: + test_submodule_relative_url "(null)" "user@host:path/to/repo" "../subrepo" "user@host:path/to/subrepo" + test_submodule_relative_url "(null)" "user@host:repo" "../subrepo" "user@host:subrepo" + ++test_expect_success 'match .gitmodules' ' ++ test-path-utils is_dotgitmodules \ ++ .gitmodules \ ++ \ ++ .git${u200c}modules \ ++ \ ++ .Gitmodules \ ++ .gitmoduleS \ ++ \ ++ ".gitmodules " \ ++ ".gitmodules." \ ++ ".gitmodules " \ ++ ".gitmodules. " \ ++ ".gitmodules ." \ ++ ".gitmodules.." \ ++ ".gitmodules " \ ++ ".gitmodules. " \ ++ ".gitmodules . " \ ++ ".gitmodules ." \ ++ \ ++ ".Gitmodules " \ ++ ".Gitmodules." \ ++ ".Gitmodules " \ ++ ".Gitmodules. " \ ++ ".Gitmodules ." \ ++ ".Gitmodules.." \ ++ ".Gitmodules " \ ++ ".Gitmodules. " \ ++ ".Gitmodules . " \ ++ ".Gitmodules ." \ ++ \ ++ GITMOD~1 \ ++ gitmod~1 \ ++ GITMOD~2 \ ++ gitmod~3 \ ++ GITMOD~4 \ ++ \ ++ "GITMOD~1 " \ ++ "gitmod~2." \ ++ "GITMOD~3 " \ ++ "gitmod~4. " \ ++ "GITMOD~1 ." \ ++ "gitmod~2 " \ ++ "GITMOD~3. " \ ++ "gitmod~4 . " \ ++ \ ++ GI7EBA~1 \ ++ gi7eba~9 \ ++ \ ++ GI7EB~10 \ ++ GI7EB~11 \ ++ GI7EB~99 \ ++ GI7EB~10 \ ++ GI7E~100 \ ++ GI7E~101 \ ++ GI7E~999 \ ++ ~1000000 \ ++ ~9999999 \ ++ \ ++ --not \ ++ ".gitmodules x" \ ++ ".gitmodules .x" \ ++ \ ++ " .gitmodules" \ ++ \ ++ ..gitmodules \ ++ \ ++ gitmodules \ ++ \ ++ .gitmodule \ ++ \ ++ ".gitmodules x " \ ++ ".gitmodules .x" \ ++ \ ++ GI7EBA~ \ ++ GI7EBA~0 \ ++ GI7EBA~~1 \ ++ GI7EBA~X \ ++ Gx7EBA~1 \ ++ GI7EBX~1 \ ++ \ ++ GI7EB~1 \ ++ GI7EB~01 \ ++ GI7EB~1X ++' ++ + test_done +-- +2.14.4 + + +From f57dd60d921e31b78e42ad9f91c333ab19138deb Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Sun, 13 May 2018 12:57:14 -0400 +Subject: [PATCH 06/11] skip_prefix: add case-insensitive variant + +We have the convenient skip_prefix() helper, but if you want +to do case-insensitive matching, you're stuck doing it by +hand. We could add an extra parameter to the function to +let callers ask for this, but the function is small and +somewhat performance-critical. Let's just re-implement it +for the case-insensitive version. + +Signed-off-by: Jeff King +--- + git-compat-util.h | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/git-compat-util.h b/git-compat-util.h +index 49d4029..7ad6e59 100644 +--- a/git-compat-util.h ++++ b/git-compat-util.h +@@ -909,6 +909,23 @@ static inline int sane_iscase(int x, int is_lower) + return (x & 0x20) == 0; + } + ++/* ++ * Like skip_prefix, but compare case-insensitively. Note that the comparison ++ * is done via tolower(), so it is strictly ASCII (no multi-byte characters or ++ * locale-specific conversions). ++ */ ++static inline int skip_iprefix(const char *str, const char *prefix, ++ const char **out) ++{ ++ do { ++ if (!*prefix) { ++ *out = str; ++ return 1; ++ } ++ } while (tolower(*str++) == tolower(*prefix++)); ++ return 0; ++} ++ + static inline int strtoul_ui(char const *s, int base, unsigned int *result) + { + unsigned long ul; +-- +2.14.4 + + +From da0dec01dc5fb1d43732ad997d30bed853025a6c Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Sun, 13 May 2018 13:00:23 -0400 +Subject: [PATCH 07/11] verify_path: drop clever fallthrough + +We check ".git" and ".." in the same switch statement, and +fall through the cases to share the end-of-component check. +While this saves us a line or two, it makes modifying the +function much harder. Let's just write it out. + +Signed-off-by: Jeff King +--- + read-cache.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/read-cache.c b/read-cache.c +index db27766..b8a2a06 100644 +--- a/read-cache.c ++++ b/read-cache.c +@@ -787,8 +787,7 @@ static int verify_dotfile(const char *rest) + + switch (*rest) { + /* +- * ".git" followed by NUL or slash is bad. This +- * shares the path end test with the ".." case. ++ * ".git" followed by NUL or slash is bad. + */ + case 'g': + case 'G': +@@ -796,8 +795,9 @@ static int verify_dotfile(const char *rest) + break; + if (rest[2] != 't' && rest[2] != 'T') + break; +- rest += 2; +- /* fallthrough */ ++ if (rest[3] == '\0' || is_dir_sep(rest[3])) ++ return 0; ++ break; + case '.': + if (rest[1] == '\0' || is_dir_sep(rest[1])) + return 0; +-- +2.14.4 + + +From 392a029f659dc735a445ca16349b98c0c63bcee6 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Tue, 15 May 2018 09:56:50 -0400 +Subject: [PATCH 08/11] verify_dotfile: mention case-insensitivity in comment + +We're more restrictive than we need to be in matching ".GIT" +on case-sensitive filesystems; let's make a note that this +is intentional. + +Signed-off-by: Jeff King +--- + read-cache.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/read-cache.c b/read-cache.c +index b8a2a06..88ed49c 100644 +--- a/read-cache.c ++++ b/read-cache.c +@@ -787,7 +787,10 @@ static int verify_dotfile(const char *rest) + + switch (*rest) { + /* +- * ".git" followed by NUL or slash is bad. ++ * ".git" followed by NUL or slash is bad. Note that we match ++ * case-insensitively here, even if ignore_case is not set. ++ * This outlaws ".GIT" everywhere out of an abundance of caution, ++ * since there's really no good reason to allow it. + */ + case 'g': + case 'G': +-- +2.14.4 + + +From 95a1ce8bd49ad78d9266af969fa6a9b5476ad118 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Mon, 14 May 2018 11:00:56 -0400 +Subject: [PATCH 09/11] update-index: stat updated files earlier + +In the update_one(), we check verify_path() on the proposed +path before doing anything else. In preparation for having +verify_path() look at the file mode, let's stat the file +earlier, so we can check the mode accurately. + +This is made a bit trickier by the fact that this function +only does an lstat in a few code paths (the ones that flow +down through process_path()). So we can speculatively do the +lstat() here and pass the results down, and just use a dummy +mode for cases where we won't actually be updating the index +from the filesystem. + +Signed-off-by: Jeff King +--- + builtin/update-index.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +diff --git a/builtin/update-index.c b/builtin/update-index.c +index b8b8522..0c5be84 100644 +--- a/builtin/update-index.c ++++ b/builtin/update-index.c +@@ -354,10 +354,9 @@ static int process_directory(const char *path, int len, struct stat *st) + return error("%s: is a directory - add files inside instead", path); + } + +-static int process_path(const char *path) ++static int process_path(const char *path, struct stat *st, int stat_errno) + { + int pos, len; +- struct stat st; + const struct cache_entry *ce; + + len = strlen(path); +@@ -381,13 +380,13 @@ static int process_path(const char *path) + * First things first: get the stat information, to decide + * what to do about the pathname! + */ +- if (lstat(path, &st) < 0) +- return process_lstat_error(path, errno); ++ if (stat_errno) ++ return process_lstat_error(path, stat_errno); + +- if (S_ISDIR(st.st_mode)) +- return process_directory(path, len, &st); ++ if (S_ISDIR(st->st_mode)) ++ return process_directory(path, len, st); + +- return add_one_path(ce, path, len, &st); ++ return add_one_path(ce, path, len, st); + } + + static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, +@@ -451,6 +450,16 @@ static void chmod_path(int flip, const char *path) + + static void update_one(const char *path) + { ++ int stat_errno = 0; ++ struct stat st; ++ ++ if (mark_valid_only || mark_skip_worktree_only || force_remove) ++ st.st_mode = 0; ++ else if (lstat(path, &st) < 0) { ++ st.st_mode = 0; ++ stat_errno = errno; ++ } /* else stat is valid */ ++ + if (!verify_path(path)) { + fprintf(stderr, "Ignoring path %s\n", path); + return; +@@ -472,7 +481,7 @@ static void update_one(const char *path) + report("remove '%s'", path); + return; + } +- if (process_path(path)) ++ if (process_path(path, &st, stat_errno)) + die("Unable to process path %s", path); + report("add '%s'", path); + } +-- +2.14.4 + + +From 1d5af177a0853d286069176b11f661ed0e069faa Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Fri, 4 May 2018 20:03:35 -0400 +Subject: [PATCH 10/11] verify_path: disallow symlinks in .gitmodules + +There are a few reasons it's not a good idea to make +.gitmodules a symlink, including: + + 1. It won't be portable to systems without symlinks. + + 2. It may behave inconsistently, since Git may look at + this file in the index or a tree without bothering to + resolve any symbolic links. We don't do this _yet_, but + the config infrastructure is there and it's planned for + the future. + +With some clever code, we could make (2) work. And some +people may not care about (1) if they only work on one +platform. But there are a few security reasons to simply +disallow it: + + a. A symlinked .gitmodules file may circumvent any fsck + checks of the content. + + b. Git may read and write from the on-disk file without + sanity checking the symlink target. So for example, if + you link ".gitmodules" to "../oops" and run "git + submodule add", we'll write to the file "oops" outside + the repository. + +Again, both of those are problems that _could_ be solved +with sufficient code, but given the complications in (1) and +(2), we're better off just outlawing it explicitly. + +Note the slightly tricky call to verify_path() in +update-index's update_one(). There we may not have a mode if +we're not updating from the filesystem (e.g., we might just +be removing the file). Passing "0" as the mode there works +fine; since it's not a symlink, we'll just skip the extra +checks. + +Signed-off-by: Jeff King +--- + builtin/apply.c | 4 ++-- + builtin/update-index.c | 6 +++--- + cache.h | 2 +- + read-cache.c | 40 +++++++++++++++++++++++++++++++--------- + 4 files changed, 37 insertions(+), 15 deletions(-) + +diff --git a/builtin/apply.c b/builtin/apply.c +index c770d7d..8602b98 100644 +--- a/builtin/apply.c ++++ b/builtin/apply.c +@@ -3683,9 +3683,9 @@ static void die_on_unsafe_path(struct patch *patch) + if (!patch->is_delete) + new_name = patch->new_name; + +- if (old_name && !verify_path(old_name)) ++ if (old_name && !verify_path(old_name, patch->old_mode)) + die(_("invalid path '%s'"), old_name); +- if (new_name && !verify_path(new_name)) ++ if (new_name && !verify_path(new_name, patch->new_mode)) + die(_("invalid path '%s'"), new_name); + } + +diff --git a/builtin/update-index.c b/builtin/update-index.c +index 0c5be84..c54bada 100644 +--- a/builtin/update-index.c ++++ b/builtin/update-index.c +@@ -395,7 +395,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, + int size, len, option; + struct cache_entry *ce; + +- if (!verify_path(path)) ++ if (!verify_path(path, mode)) + return error("Invalid path '%s'", path); + + len = strlen(path); +@@ -460,7 +460,7 @@ static void update_one(const char *path) + stat_errno = errno; + } /* else stat is valid */ + +- if (!verify_path(path)) { ++ if (!verify_path(path, st.st_mode)) { + fprintf(stderr, "Ignoring path %s\n", path); + return; + } +@@ -550,7 +550,7 @@ static void read_index_info(int nul_term_line) + path_name = uq.buf; + } + +- if (!verify_path(path_name)) { ++ if (!verify_path(path_name, mode)) { + fprintf(stderr, "Ignoring path %s\n", path_name); + continue; + } +diff --git a/cache.h b/cache.h +index fbbe933..61d43c6 100644 +--- a/cache.h ++++ b/cache.h +@@ -560,7 +560,7 @@ extern int read_index_unmerged(struct index_state *); + extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); + extern int discard_index(struct index_state *); + extern int unmerged_index(const struct index_state *); +-extern int verify_path(const char *path); ++extern int verify_path(const char *path, unsigned mode); + extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); + extern void adjust_dirname_case(struct index_state *istate, char *name); + extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); +diff --git a/read-cache.c b/read-cache.c +index 88ed49c..d6dedd5 100644 +--- a/read-cache.c ++++ b/read-cache.c +@@ -738,7 +738,7 @@ struct cache_entry *make_cache_entry(unsigned int mode, + int size, len; + struct cache_entry *ce, *ret; + +- if (!verify_path(path)) { ++ if (!verify_path(path, mode)) { + error("Invalid path '%s'", path); + return NULL; + } +@@ -773,7 +773,7 @@ int ce_same_name(const struct cache_entry *a, const struct cache_entry *b) + * Also, we don't want double slashes or slashes at the + * end that can make pathnames ambiguous. + */ +-static int verify_dotfile(const char *rest) ++static int verify_dotfile(const char *rest, unsigned mode) + { + /* + * The first character was '.', but that +@@ -791,6 +791,9 @@ static int verify_dotfile(const char *rest) + * case-insensitively here, even if ignore_case is not set. + * This outlaws ".GIT" everywhere out of an abundance of caution, + * since there's really no good reason to allow it. ++ * ++ * Once we've seen ".git", we can also find ".gitmodules", etc (also ++ * case-insensitively). + */ + case 'g': + case 'G': +@@ -800,6 +803,12 @@ static int verify_dotfile(const char *rest) + break; + if (rest[3] == '\0' || is_dir_sep(rest[3])) + return 0; ++ if (S_ISLNK(mode)) { ++ rest += 3; ++ if (skip_iprefix(rest, "modules", &rest) && ++ (*rest == '\0' || is_dir_sep(*rest))) ++ return 0; ++ } + break; + case '.': + if (rest[1] == '\0' || is_dir_sep(rest[1])) +@@ -808,7 +817,7 @@ static int verify_dotfile(const char *rest) + return 1; + } + +-int verify_path(const char *path) ++int verify_path(const char *path, unsigned mode) + { + char c; + +@@ -821,12 +830,25 @@ int verify_path(const char *path) + return 1; + if (is_dir_sep(c)) { + inside: +- if (protect_hfs && is_hfs_dotgit(path)) +- return 0; +- if (protect_ntfs && is_ntfs_dotgit(path)) +- return 0; ++ if (protect_hfs) { ++ if (is_hfs_dotgit(path)) ++ return 0; ++ if (S_ISLNK(mode)) { ++ if (is_hfs_dotgitmodules(path)) ++ return 0; ++ } ++ } ++ if (protect_ntfs) { ++ if (is_ntfs_dotgit(path)) ++ return 0; ++ if (S_ISLNK(mode)) { ++ if (is_ntfs_dotgitmodules(path)) ++ return 0; ++ } ++ } ++ + c = *path++; +- if ((c == '.' && !verify_dotfile(path)) || ++ if ((c == '.' && !verify_dotfile(path, mode)) || + is_dir_sep(c) || c == '\0') + return 0; + } +@@ -1008,7 +1030,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e + + if (!ok_to_add) + return -1; +- if (!verify_path(ce->name)) ++ if (!verify_path(ce->name, ce->ce_mode)) + return error("Invalid path '%s'", ce->name); + + if (!skip_df_check && +-- +2.14.4 + + +From ab1977d8fdcf7b259e1a4ede2f99423dca97ea41 Mon Sep 17 00:00:00 2001 +From: Sebastian Kisela +Date: Wed, 20 Jun 2018 15:03:18 +0200 +Subject: [PATCH 11/11] submodules: make t7415 test accept fail exit code + +The command that performs the cloning fails with non zero exit +code, but the exit code itself is not important. +The command may fail but the general output must be as expected. +--- + t/t7415-submodule-names.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh +index 75fa071..b30fa33 100755 +--- a/t/t7415-submodule-names.sh ++++ b/t/t7415-submodule-names.sh +@@ -69,7 +69,7 @@ test_expect_success 'add other submodule' ' + ' + + test_expect_success 'clone evil superproject' ' +- git clone --recurse-submodules . victim >output 2>&1 && ++ test_might_fail git clone --recurse-submodules . victim >output 2>&1 && + ! grep "RUNNING POST CHECKOUT" output + ' + +-- +2.14.4 + diff --git a/SPECS/git.spec b/SPECS/git.spec index ea2da9a..6a244fb 100644 --- a/SPECS/git.spec +++ b/SPECS/git.spec @@ -71,7 +71,7 @@ Name: %{?scl_prefix}git Version: 2.9.3 -Release: 3%{?dist} +Release: 4%{?dist} Summary: Fast Version Control System License: GPLv2 Group: Development/Tools @@ -119,6 +119,11 @@ Patch6: 0002-resolve_ref_unsafe-limit-the-number-of-stat_ref-retr.patch Patch7: 0003-Fix-CVE-2017-8386.patch Patch8: git-cve-2017-1000117.patch +# Also contains fix for CVE-2018-11233 in upstream commit 42410051db39f2009980ba293ff0f4f1755df06a +# which is part of the patch series git-cve-2018-11235.patch +Patch9: git-cve-2018-11235.patch +Patch10: git-cve-2018-11235-fsck.patch + BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) %if ! %{use_prebuilt_docs} && ! 0%{?_without_docs} @@ -460,6 +465,10 @@ rm -rf "$gpghome" # Cleanup tmp gpg home dir %patch6 -p1 %patch7 -p1 %patch8 -p1 +%patch9 -p1 +%patch10 -p1 + +chmod a+x t/t0011-hashmap.sh t/t1307-config-blob.sh t/t4139-apply-escape.sh t/t7415-submodule-names.sh %if %{use_prebuilt_docs} mkdir -p prebuilt_docs/{html,man} @@ -722,6 +731,12 @@ grep -E "$not_core_re" bin-man-doc-files > bin-man-doc-git-files # check Name/GenericName that contains rh-GitXX (with current XX, e.g. 29) grep -qi "^Name=%{scl}" %{buildroot}%{appdesktopdir}/*git-gui.desktop grep -qi "^GenericName=%{scl}" %{buildroot}%{appdesktopdir}/*git-gui.desktop + + # Run tests with enabled scl, so that httpd24-libcurl dependencies + # are searched for at the right place (/opt/rh/httpd24) + %{?scl:scl enable %{scl_httpd} - << "EOF"} + make test + %{?scl:EOF} %endif %clean @@ -870,6 +885,11 @@ rm -rf %{buildroot} # No files for you! %changelog +* Thu Jun 21 2018 Sebastian Kisela - 2.9.3-4 +- Backport the fix for CVE-2018-11235 and CVE-2018-11233. +- Thanks to Jonathan Nieder for backporting to 2.11.x + and to Steve Beattie for backporting to 2.7.x. + * Fri Aug 11 2017 Petr Stodulka - 2.9.3-3 - prevent command injection via malicious ssh URLs - dissalow repo names beginning with dash