Blob Blame History Raw
From ff9e25d40ff1f2a2942eac7b3dd108ca739f362a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 13 Jan 2017 12:58:16 -0500
Subject: [PATCH 01/11] sha1_file: add read_loose_object() function

commit f6371f9210418f1beabc85b097e2a3470aeeb54d upstream.

It's surprisingly hard to ask the sha1_file code to open a
_specific_ incarnation of a loose object. Most of the
functions take a sha1, and loop over the various object
types (packed versus loose) and locations (local versus
alternates) at a low level.

However, some tools like fsck need to look at a specific
file. This patch gives them a function they can use to open
the loose object at a given path.

The implementation unfortunately ends up repeating bits of
related functions, but there's not a good way around it
without some major refactoring of the whole sha1_file stack.
We need to mmap the specific file, then partially read the
zlib stream to know whether we're streaming or not, and then
finally either stream it or copy the data to a buffer.

We can do that by assembling some of the more arcane
internal sha1_file functions, but we end up having to
essentially reimplement unpack_sha1_file(), along with the
streaming bits of check_sha1_signature().

Still, most of the ugliness is contained in the new
function, and the interface is clean enough that it may be
reusable (though it seems unlikely anything but git-fsck
would care about opening a specific file).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 cache.h     |  13 ++++++
 sha1_file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 61d43c6..fa53623 100644
--- a/cache.h
+++ b/cache.h
@@ -1113,6 +1113,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
 
 extern int has_sha1_pack(const unsigned char *sha1);
 
+/*
+ * Open the loose object at path, check its sha1, and return the contents,
+ * type, and size. If the object is a blob, then "contents" may return NULL,
+ * to allow streaming of large blobs.
+ *
+ * Returns 0 on success, negative on error (details may be written to stderr).
+ */
+int read_loose_object(const char *path,
+		      const unsigned char *expected_sha1,
+		      enum object_type *type,
+		      unsigned long *size,
+		      void **contents);
+
 /*
  * Return true iff we have an object named sha1, whether local or in
  * an alternate object database, and whether packed or loose.  This
diff --git a/sha1_file.c b/sha1_file.c
index cb571ac..6d29840 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1547,12 +1547,21 @@ static int open_sha1_file(const unsigned char *sha1)
 	return -1;
 }
 
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+/*
+ * Map the loose object at "path" if it is not NULL, or the path found by
+ * searching for a loose object named "sha1".
+ */
+static void *map_sha1_file_1(const char *path,
+			     const unsigned char *sha1,
+			     unsigned long *size)
 {
 	void *map;
 	int fd;
 
-	fd = open_sha1_file(sha1);
+	if (path)
+		fd = git_open_noatime(path);
+	else
+		fd = open_sha1_file(sha1);
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1571,6 +1580,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 	return map;
 }
 
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+{
+	return map_sha1_file_1(NULL, sha1, size);
+}
+
 unsigned long unpack_object_header_buffer(const unsigned char *buf,
 		unsigned long len, enum object_type *type, unsigned long *sizep)
 {
@@ -3644,3 +3658,117 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
 	}
 	return r ? r : pack_errors;
 }
+
+static int check_stream_sha1(git_zstream *stream,
+			     const char *hdr,
+			     unsigned long size,
+			     const char *path,
+			     const unsigned char *expected_sha1)
+{
+	git_SHA_CTX c;
+	unsigned char real_sha1[GIT_SHA1_RAWSZ];
+	unsigned char buf[4096];
+	unsigned long total_read;
+	int status = Z_OK;
+
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, hdr, stream->total_out);
+
+	/*
+	 * We already read some bytes into hdr, but the ones up to the NUL
+	 * do not count against the object's content size.
+	 */
+	total_read = stream->total_out - strlen(hdr) - 1;
+
+	/*
+	 * This size comparison must be "<=" to read the final zlib packets;
+	 * see the comment in unpack_sha1_rest for details.
+	 */
+	while (total_read <= size &&
+	       (status == Z_OK || status == Z_BUF_ERROR)) {
+		stream->next_out = buf;
+		stream->avail_out = sizeof(buf);
+		if (size - total_read < stream->avail_out)
+			stream->avail_out = size - total_read;
+		status = git_inflate(stream, Z_FINISH);
+		git_SHA1_Update(&c, buf, stream->next_out - buf);
+		total_read += stream->next_out - buf;
+	}
+	git_inflate_end(stream);
+
+	if (status != Z_STREAM_END) {
+		error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
+		return -1;
+	}
+
+	git_SHA1_Final(real_sha1, &c);
+	if (hashcmp(expected_sha1, real_sha1)) {
+		error("sha1 mismatch for %s (expected %s)", path,
+		      sha1_to_hex(expected_sha1));
+		return -1;
+	}
+
+	return 0;
+}
+
+int read_loose_object(const char *path,
+		      const unsigned char *expected_sha1,
+		      enum object_type *type,
+		      unsigned long *size,
+		      void **contents)
+{
+	int ret = -1;
+	int fd = -1;
+	void *map = NULL;
+	unsigned long mapsize;
+	git_zstream stream;
+	char hdr[32];
+
+	*contents = NULL;
+
+	map = map_sha1_file_1(path, NULL, &mapsize);
+	if (!map) {
+		error_errno("unable to mmap %s", path);
+		goto out;
+	}
+
+	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
+		error("unable to unpack header of %s", path);
+		goto out;
+	}
+
+	*type = parse_sha1_header(hdr, size);
+	if (*type < 0) {
+		error("unable to parse header of %s", path);
+		git_inflate_end(&stream);
+		goto out;
+	}
+
+	if (*type == OBJ_BLOB) {
+		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
+			goto out;
+	} else {
+		*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
+		if (!*contents) {
+			error("unable to unpack contents of %s", path);
+			git_inflate_end(&stream);
+			goto out;
+		}
+		if (check_sha1_signature(expected_sha1, *contents,
+					 *size, typename(*type))) {
+			error("sha1 mismatch for %s (expected %s)", path,
+			      sha1_to_hex(expected_sha1));
+			free(*contents);
+			goto out;
+		}
+	}
+
+	ret = 0; /* everything checks out */
+
+out:
+	if (map)
+		munmap(map, mapsize);
+	if (fd >= 0)
+		close(fd);
+	return ret;
+}
-- 
2.14.4


From 65074f3eda3338445c7674b7d5b176844970b5de Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 13 Jan 2017 12:59:44 -0500
Subject: [PATCH 02/11] fsck: parse loose object paths directly

commit c68b489e56431cf27f7719913ab09ddc62f95912 upstream.

When we iterate over the list of loose objects to check, we
get the actual path of each object. But we then throw it
away and pass just the sha1 to fsck_sha1(), which will do a
fresh lookup. Usually it would find the same object, but it
may not if an object exists both as a loose and a packed
object. We may end up checking the packed object twice, and
never look at the loose one.

In practice this isn't too terrible, because if fsck doesn't
complain, it means you have at least one good copy. But
since the point of fsck is to look for corruption, we should
be thorough.

The new read_loose_object() interface can help us get the
data from disk, and then we replace parse_object() with
parse_object_buffer(). As a bonus, our error messages now
mention the path to a corrupted object, which should make it
easier to track down errors when they do happen.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/fsck.c  | 46 +++++++++++++++++++++++++++++++++-------------
 t/t1450-fsck.sh | 16 ++++++++++++++++
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 9923b10..150597c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -341,18 +341,6 @@ static int fsck_obj(struct object *obj)
 	return 0;
 }
 
-static int fsck_sha1(const unsigned char *sha1)
-{
-	struct object *obj = parse_object(sha1);
-	if (!obj) {
-		errors_found |= ERROR_OBJECT;
-		return error("%s: object corrupt or missing",
-			     sha1_to_hex(sha1));
-	}
-	obj->flags |= HAS_OBJ;
-	return fsck_obj(obj);
-}
-
 static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
 			   unsigned long size, void *buffer, int *eaten)
 {
@@ -459,9 +447,41 @@ static void get_default_heads(void)
 	}
 }
 
+static struct object *parse_loose_object(const unsigned char *sha1,
+					 const char *path)
+{
+	struct object *obj;
+	void *contents;
+	enum object_type type;
+	unsigned long size;
+	int eaten;
+
+	if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
+		return NULL;
+
+	if (!contents && type != OBJ_BLOB)
+		die("BUG: read_loose_object streamed a non-blob");
+
+	obj = parse_object_buffer(sha1, type, size, contents, &eaten);
+
+	if (!eaten)
+		free(contents);
+	return obj;
+}
+
 static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
 {
-	if (fsck_sha1(sha1))
+	struct object *obj = parse_loose_object(sha1, path);
+
+	if (!obj) {
+		errors_found |= ERROR_OBJECT;
+		error("%s: object corrupt or missing: %s",
+		      sha1_to_hex(sha1), path);
+		return 0; /* keep checking other objects */
+	}
+
+	obj->flags = HAS_OBJ;
+	if (fsck_obj(obj))
 		errors_found |= ERROR_OBJECT;
 	return 0;
 }
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 7ee8ea0..17c4c77 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -523,4 +523,20 @@ test_expect_success 'fsck --connectivity-only' '
 	)
 '
 
+test_expect_success 'fsck finds problems in duplicate loose objects' '
+	rm -rf broken-duplicate &&
+	git init broken-duplicate &&
+	(
+		cd broken-duplicate &&
+		test_commit duplicate &&
+		# no "-d" here, so we end up with duplicates
+		git repack &&
+		# now corrupt the loose copy
+		file=$(sha1_file "$(git rev-parse HEAD)") &&
+		rm "$file" &&
+		echo broken >"$file" &&
+		test_must_fail git fsck
+	)
+'
+
 test_done
-- 
2.14.4


From 6a26b1fcf92a6c825eb72c1a55399f38af5b7b79 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 16:37:09 -0400
Subject: [PATCH 03/11] index-pack: make fsck error message more specific

commit db5a58c1bda5b20169b9958af1e8b05ddd178b01 upstream.

If fsck reports an error, we say only "Error in object".
This isn't quite as bad as it might seem, since the fsck
code would have dumped some errors to stderr already. But it
might help to give a little more context. The earlier output
would not have even mentioned "fsck", and that may be a clue
that the "fsck.*" or "*.fsckObjects" config may be relevant.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/index-pack.c     | 2 +-
 builtin/unpack-objects.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 1008d7f..81f3bc7 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -841,7 +841,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
 				die(_("invalid %s"), typename(type));
 			if (do_fsck_object &&
 			    fsck_object(obj, buf, size, &fsck_options))
-				die(_("Error in object"));
+				die(_("fsck error in packed object"));
 			if (fsck_walk(obj, NULL, &fsck_options))
 				die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
 
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 875e7ed..7018829 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -205,7 +205,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op
 	if (!obj_buf)
 		die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
 	if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
-		die("Error in object");
+		die("fsck error in packed object");
 	fsck_options.walk = check_object;
 	if (fsck_walk(obj, NULL, &fsck_options))
 		die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
-- 
2.14.4


From 8ac12a6cd60b74748494beadd7750de39f727ff3 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 13 May 2018 12:35:37 -0400
Subject: [PATCH 04/11] fsck: simplify ".git" check

commit ed9c3220621d634d543bc4dd998d12167dfc57d4 upstream.

There's no need for us to manually check for ".git"; it's a
subset of the other filesystem-specific tests. Dropping it
makes our code slightly shorter. More importantly, the
existing code may make a reader wonder why ".GIT" is not
covered here, and whether that is a bug (it isn't, as it's
also covered in the filesystem-specific tests).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fsck.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fsck.c b/fsck.c
index 0531545..c807b7f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -460,9 +460,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		has_empty_name |= !*name;
 		has_dot |= !strcmp(name, ".");
 		has_dotdot |= !strcmp(name, "..");
-		has_dotgit |= (!strcmp(name, ".git") ||
-			       is_hfs_dotgit(name) ||
-			       is_ntfs_dotgit(name));
+		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 		has_zero_pad |= *(char *)desc.buffer == '0';
 		update_tree_entry(&desc);
 
-- 
2.14.4


From 27d0f7bf0b6ba57e849353a65ff73887088b6d7b Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 15:44:51 -0400
Subject: [PATCH 05/11] fsck: actually fsck blob data

commit 7ac4f3a007e2567f9d2492806186aa063f9a08d6 upstream.

Because fscking a blob has always been a noop, we didn't
bother passing around the blob data. In preparation for
content-level checks, let's fix up a few things:

  1. The fsck_object() function just returns success for any
     blob. Let's a noop fsck_blob(), which we can fill in
     with actual logic later.

  2. The fsck_loose() function in builtin/fsck.c
     just threw away blob content after loading it. Let's
     hold onto it until after we've called fsck_object().

     The easiest way to do this is to just drop the
     parse_loose_object() helper entirely. Incidentally,
     this also fixes a memory leak: if we successfully
     loaded the object data but did not parse it, we would
     have left the function without freeing it.

  3. When fsck_loose() loads the object data, it
     does so with a custom read_loose_object() helper. This
     function streams any blobs, regardless of size, under
     the assumption that we're only checking the sha1.

     Instead, let's actually load blobs smaller than
     big_file_threshold, as the normal object-reading
     code-paths would do. This lets us fsck small files, and
     a NULL return is an indication that the blob was so big
     that it needed to be streamed, and we can pass that
     information along to fsck_blob().

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/fsck.c | 39 +++++++++++++++++++--------------------
 fsck.c         |  8 +++++++-
 sha1_file.c    |  2 +-
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 150597c..61ab1cd 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -299,7 +299,7 @@ static void check_connectivity(void)
 	}
 }
 
-static int fsck_obj(struct object *obj)
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
 {
 	if (obj->flags & SEEN)
 		return 0;
@@ -311,7 +311,7 @@ static int fsck_obj(struct object *obj)
 
 	if (fsck_walk(obj, NULL, &fsck_obj_options))
 		objerror(obj, "broken links");
-	if (fsck_object(obj, NULL, 0, &fsck_obj_options))
+	if (fsck_object(obj, buffer, size, &fsck_obj_options))
 		return -1;
 
 	if (obj->type == OBJ_TREE) {
@@ -355,7 +355,7 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
 		return error("%s: object corrupt or missing", sha1_to_hex(sha1));
 	}
 	obj->flags = HAS_OBJ;
-	return fsck_obj(obj);
+	return fsck_obj(obj, buffer, size);
 }
 
 static int default_refs;
@@ -447,43 +447,42 @@ static void get_default_heads(void)
 	}
 }
 
-static struct object *parse_loose_object(const unsigned char *sha1,
-					 const char *path)
+static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
 {
 	struct object *obj;
-	void *contents;
 	enum object_type type;
 	unsigned long size;
+	void *contents;
 	int eaten;
 
-	if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
-		return NULL;
+	if (read_loose_object(path, sha1, &type, &size, &contents) < 0) {
+		errors_found |= ERROR_OBJECT;
+		error("%s: object corrupt or missing: %s",
+		      sha1_to_hex(sha1), path);
+		return 0; /* keep checking other objects */
+	}
 
 	if (!contents && type != OBJ_BLOB)
 		die("BUG: read_loose_object streamed a non-blob");
 
 	obj = parse_object_buffer(sha1, type, size, contents, &eaten);
 
-	if (!eaten)
-		free(contents);
-	return obj;
-}
-
-static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
-{
-	struct object *obj = parse_loose_object(sha1, path);
-
 	if (!obj) {
 		errors_found |= ERROR_OBJECT;
-		error("%s: object corrupt or missing: %s",
+		error("%s: object could not be parsed: %s",
 		      sha1_to_hex(sha1), path);
+		if (!eaten)
+			free(contents);
 		return 0; /* keep checking other objects */
 	}
 
 	obj->flags = HAS_OBJ;
-	if (fsck_obj(obj))
+	if (fsck_obj(obj, contents, size))
 		errors_found |= ERROR_OBJECT;
-	return 0;
+
+	if (!eaten)
+		free(contents);
+	return 0; /* keep checking other objects, even if we saw an error */
 }
 
 static int fsck_cruft(const char *basename, const char *path, void *data)
diff --git a/fsck.c b/fsck.c
index c807b7f..a520290 100644
--- a/fsck.c
+++ b/fsck.c
@@ -795,6 +795,12 @@ static int fsck_tag(struct tag *tag, const char *data,
 	return fsck_tag_buffer(tag, data, size, options);
 }
 
+static int fsck_blob(struct blob *blob, const char *buf,
+		     unsigned long size, struct fsck_options *options)
+{
+	return 0;
+}
+
 int fsck_object(struct object *obj, void *data, unsigned long size,
 	struct fsck_options *options)
 {
@@ -802,7 +808,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
 		return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
 
 	if (obj->type == OBJ_BLOB)
-		return 0;
+		return fsck_blob((struct blob *)obj, data, size, options);
 	if (obj->type == OBJ_TREE)
 		return fsck_tree((struct tree *) obj, options);
 	if (obj->type == OBJ_COMMIT)
diff --git a/sha1_file.c b/sha1_file.c
index 6d29840..a091644 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3744,7 +3744,7 @@ int read_loose_object(const char *path,
 		goto out;
 	}
 
-	if (*type == OBJ_BLOB) {
+	if (*type == OBJ_BLOB && *size > big_file_threshold) {
 		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
 			goto out;
 	} else {
-- 
2.14.4


From d393612c63e6bdfd7ab54139c229be3db89c550d Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 17:20:08 -0400
Subject: [PATCH 06/11] fsck: detect gitmodules files

commit 159e7b080bfa5d34559467cacaa79df89a01afc0 upstream.

In preparation for performing fsck checks on .gitmodules
files, this commit plumbs in the actual detection of the
files. Note that unlike most other fsck checks, this cannot
be a property of a single object: we must know that the
object is found at a ".gitmodules" path at the root tree of
a commit.

Since the fsck code only sees one object at a time, we have
to mark the related objects to fit the puzzle together. When
we see a commit we mark its tree as a root tree, and when
we see a root tree with a .gitmodules file, we mark the
corresponding blob to be checked.

In an ideal world, we'd check the objects in topological
order: commits followed by trees followed by blobs. In that
case we can avoid ever loading an object twice, since all
markings would be complete by the time we get to the marked
objects. And indeed, if we are checking a single packfile,
this is the order in which Git will generally write the
objects. But we can't count on that:

  1. git-fsck may show us the objects in arbitrary order
     (loose objects are fed in sha1 order, but we may also
     have multiple packs, and we process each pack fully in
     sequence).

  2. The type ordering is just what git-pack-objects happens
     to write now. The pack format does not require a
     specific order, and it's possible that future versions
     of Git (or a custom version trying to fool official
     Git's fsck checks!) may order it differently.

  3. We may not even be fscking all of the relevant objects
     at once. Consider pushing with transfer.fsckObjects,
     where one push adds a blob at path "foo", and then a
     second push adds the same blob at path ".gitmodules".
     The blob is not part of the second push at all, but we
     need to mark and check it.

So in the general case, we need to make up to three passes
over the objects: once to make sure we've seen all commits,
then once to cover any trees we might have missed, and then
a final pass to cover any .gitmodules blobs we found in the
second pass.

We can simplify things a bit by loosening the requirement
that we find .gitmodules only at root trees. Technically
a file like "subdir/.gitmodules" is not parsed by Git, but
it's not unreasonable for us to declare that Git is aware of
all ".gitmodules" files and make them eligible for checking.
That lets us drop the root-tree requirement, which
eliminates one pass entirely. And it makes our worst case
much better: instead of potentially queueing every root tree
to be re-examined, the worst case is that we queue each
unique .gitmodules blob for a second look.

This patch just adds the boilerplate to find .gitmodules
files. The actual content checks will come in a subsequent
commit.

[jn: backported to 2.11.y:
 - passing oid->hash instead of oid to lookup_blob
 - using "struct hashmap" directly since "struct oidset" isn't
   available]

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fsck.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fsck.h |  7 ++++++
 2 files changed, 98 insertions(+)

diff --git a/fsck.c b/fsck.c
index a520290..f0a566a 100644
--- a/fsck.c
+++ b/fsck.c
@@ -9,6 +9,42 @@
 #include "refs.h"
 #include "utf8.h"
 #include "sha1-array.h"
+#include "hashmap.h"
+
+struct oidhash_entry {
+	struct hashmap_entry ent;
+	struct object_id oid;
+};
+
+static int oidhash_hashcmp(const void *va, const void *vb,
+			   const void *vkey)
+{
+	const struct oidhash_entry *a = va, *b = vb;
+	const struct object_id *key = vkey;
+	return oidcmp(&a->oid, key ? key : &b->oid);
+}
+
+static struct hashmap gitmodules_found;
+static struct hashmap gitmodules_done;
+
+static void oidhash_insert(struct hashmap *h, const struct object_id *oid)
+{
+	struct oidhash_entry *e;
+
+	if (!h->tablesize)
+		hashmap_init(h, oidhash_hashcmp, 0);
+	e = xmalloc(sizeof(*e));
+	hashmap_entry_init(&e->ent, sha1hash(oid->hash));
+	oidcpy(&e->oid, oid);
+	hashmap_add(h, e);
+}
+
+static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
+{
+	return h->tablesize &&
+		!!hashmap_get_from_hash(h, sha1hash(oid->hash), oid);
+}
+
 
 #define FSCK_FATAL -1
 #define FSCK_INFO -2
@@ -43,6 +79,7 @@
 	FUNC(MISSING_TAG_ENTRY, ERROR) \
 	FUNC(MISSING_TAG_OBJECT, ERROR) \
 	FUNC(MISSING_TREE, ERROR) \
+	FUNC(MISSING_TREE_OBJECT, ERROR) \
 	FUNC(MISSING_TYPE, ERROR) \
 	FUNC(MISSING_TYPE_ENTRY, ERROR) \
 	FUNC(MULTIPLE_AUTHORS, ERROR) \
@@ -50,6 +87,8 @@
 	FUNC(TREE_NOT_SORTED, ERROR) \
 	FUNC(UNKNOWN_TYPE, ERROR) \
 	FUNC(ZERO_PADDED_DATE, ERROR) \
+	FUNC(GITMODULES_MISSING, ERROR) \
+	FUNC(GITMODULES_BLOB, ERROR) \
 	/* warnings */ \
 	FUNC(BAD_FILEMODE, WARN) \
 	FUNC(EMPTY_NAME, WARN) \
@@ -462,6 +501,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		has_dotdot |= !strcmp(name, "..");
 		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 		has_zero_pad |= *(char *)desc.buffer == '0';
+
+		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
+			oidhash_insert(&gitmodules_found, oid);
+
 		update_tree_entry(&desc);
 
 		switch (mode) {
@@ -831,3 +874,51 @@ int fsck_error_function(struct object *obj, int msg_type, const char *message)
 	error("object %s: %s", oid_to_hex(&obj->oid), message);
 	return 1;
 }
+
+int fsck_finish(struct fsck_options *options)
+{
+	int ret = 0;
+	struct hashmap_iter iter;
+	const struct oidhash_entry *e;
+
+	hashmap_iter_init(&gitmodules_found, &iter);
+	while ((e = hashmap_iter_next(&iter))) {
+		const struct object_id *oid = &e->oid;
+		struct blob *blob;
+		enum object_type type;
+		unsigned long size;
+		char *buf;
+
+		if (oidhash_contains(&gitmodules_done, oid))
+			continue;
+
+		blob = lookup_blob(oid->hash);
+		if (!blob) {
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_BLOB,
+				      "non-blob found at .gitmodules");
+			continue;
+		}
+
+		buf = read_sha1_file(oid->hash, &type, &size);
+		if (!buf) {
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_MISSING,
+				      "unable to read .gitmodules blob");
+			continue;
+		}
+
+		if (type == OBJ_BLOB)
+			ret |= fsck_blob(blob, buf, size, options);
+		else
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_BLOB,
+				      "non-blob found at .gitmodules");
+		free(buf);
+	}
+
+
+	hashmap_free(&gitmodules_found, 1);
+	hashmap_free(&gitmodules_done, 1);
+	return ret;
+}
diff --git a/fsck.h b/fsck.h
index dded84b..3eebe56 100644
--- a/fsck.h
+++ b/fsck.h
@@ -50,4 +50,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
 int fsck_object(struct object *obj, void *data, unsigned long size,
 	struct fsck_options *options);
 
+/*
+ * Some fsck checks are context-dependent, and may end up queued; run this
+ * after completing all fsck_object() calls in order to resolve any remaining
+ * checks.
+ */
+int fsck_finish(struct fsck_options *options);
+
 #endif
-- 
2.14.4


From ff398bff4000efa4275058c6b4f8c479831494f0 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 17:25:27 -0400
Subject: [PATCH 07/11] fsck: check .gitmodules content

commit ed8b10f631c9a71df3351d46187bf7f3fa4f9b7e upstream.

This patch detects and blocks submodule names which do not
match the policy set forth in submodule-config. These should
already be caught by the submodule code itself, but putting
the check here means that newer versions of Git can protect
older ones from malicious entries (e.g., a server with
receive.fsckObjects will block the objects, protecting
clients which fetch from it).

As a side effect, this means fsck will also complain about
.gitmodules files that cannot be parsed (or were larger than
core.bigFileThreshold).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fsck.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/fsck.c b/fsck.c
index f0a566a..d366ad8 100644
--- a/fsck.c
+++ b/fsck.c
@@ -10,6 +10,9 @@
 #include "utf8.h"
 #include "sha1-array.h"
 #include "hashmap.h"
+#include "submodule-config.h"
+
+#define CONFIG_ORIGIN_BLOB 0
 
 struct oidhash_entry {
 	struct hashmap_entry ent;
@@ -89,6 +92,8 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
 	FUNC(ZERO_PADDED_DATE, ERROR) \
 	FUNC(GITMODULES_MISSING, ERROR) \
 	FUNC(GITMODULES_BLOB, ERROR) \
+	FUNC(GITMODULES_PARSE, ERROR) \
+	FUNC(GITMODULES_NAME, ERROR) \
 	/* warnings */ \
 	FUNC(BAD_FILEMODE, WARN) \
 	FUNC(EMPTY_NAME, WARN) \
@@ -838,10 +843,64 @@ static int fsck_tag(struct tag *tag, const char *data,
 	return fsck_tag_buffer(tag, data, size, options);
 }
 
+struct fsck_gitmodules_data {
+	struct object *obj;
+	struct fsck_options *options;
+	int ret;
+};
+
+static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
+{
+	struct fsck_gitmodules_data *data = vdata;
+	const char *subsection, *key;
+	int subsection_len;
+	char *name;
+
+	if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
+	    !subsection)
+		return 0;
+
+	name = xmemdupz(subsection, subsection_len);
+	if (check_submodule_name(name) < 0)
+		data->ret |= report(data->options, data->obj,
+				    FSCK_MSG_GITMODULES_NAME,
+				    "disallowed submodule name: %s",
+				    name);
+	free(name);
+
+	return 0;
+}
+
 static int fsck_blob(struct blob *blob, const char *buf,
 		     unsigned long size, struct fsck_options *options)
 {
-	return 0;
+	struct fsck_gitmodules_data data;
+
+	if (!oidhash_contains(&gitmodules_found, &blob->object.oid))
+		return 0;
+	oidhash_insert(&gitmodules_done, &blob->object.oid);
+
+	if (!buf) {
+		/*
+		 * A missing buffer here is a sign that the caller found the
+		 * blob too gigantic to load into memory. Let's just consider
+		 * that an error.
+		 */
+		return report(options, &blob->object,
+			      FSCK_MSG_GITMODULES_PARSE,
+			      ".gitmodules too large to parse");
+	}
+
+	data.obj = &blob->object;
+	data.options = options;
+	data.ret = 0;
+	if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
+				".gitmodules", buf, size, &data))
+		data.ret |= report(options, &blob->object,
+				   FSCK_MSG_GITMODULES_PARSE,
+				   "could not parse gitmodules blob");
+
+	return data.ret;
 }
 
 int fsck_object(struct object *obj, void *data, unsigned long size,
-- 
2.14.4


From 3f7875be4a5b68058e243a0525d75165620738cb Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 17:20:35 -0400
Subject: [PATCH 08/11] fsck: call fsck_finish() after fscking objects

commit 1995b5e03e1cc97116be58cdc0502d4a23547856 upstream.

Now that the internal fsck code is capable of checking
.gitmodules files, we just need to teach its callers to use
the "finish" function to check any queued objects.

With this, we can now catch the malicious case in t7415 with
git-fsck.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/fsck.c             | 3 +++
 t/t7415-submodule-names.sh | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 61ab1cd..70f620b 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -660,6 +660,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
 			count += p->num_objects;
 		}
 		stop_progress(&progress);
+
+		if (fsck_finish(&fsck_obj_options))
+			errors_found |= ERROR_OBJECT;
 	}
 
 	heads = 0;
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
index b30fa33..5d61627 100755
--- a/t/t7415-submodule-names.sh
+++ b/t/t7415-submodule-names.sh
@@ -73,4 +73,8 @@ test_expect_success 'clone evil superproject' '
 	! grep "RUNNING POST CHECKOUT" output
 '
 
+test_expect_success 'fsck detects evil superproject' '
+	test_must_fail git fsck
+'
+
 test_done
-- 
2.14.4


From 563989871514b892c2c9a4f4fd2ef515dd6adaee Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 4 May 2018 19:40:08 -0400
Subject: [PATCH 09/11] unpack-objects: call fsck_finish() after fscking
 objects

commit 6e328d6caef218db320978e3e251009135d87d0e upstream.

As with the previous commit, we must call fsck's "finish"
function in order to catch any queued objects for
.gitmodules checks.

This second pass will be able to access any incoming
objects, because we will have exploded them to loose objects
by now.

This isn't quite ideal, because it means that bad objects
may have been written to the object database (and a
subsequent operation could then reference them, even if the
other side doesn't send the objects again). However, this is
sufficient when used with receive.fsckObjects, since those
loose objects will all be placed in a temporary quarantine
area that will get wiped if we find any problems.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/unpack-objects.c   | 5 ++++-
 t/t7415-submodule-names.sh | 7 +++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 7018829..27ba86e 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -560,8 +560,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 	unpack_all();
 	git_SHA1_Update(&ctx, buffer, offset);
 	git_SHA1_Final(sha1, &ctx);
-	if (strict)
+	if (strict) {
 		write_rest();
+		if (fsck_finish(&fsck_options))
+			die(_("fsck error in pack objects"));
+	}
 	if (hashcmp(fill(20), sha1))
 		die("final sha1 did not match");
 	use(20);
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
index 5d61627..a09087c 100755
--- a/t/t7415-submodule-names.sh
+++ b/t/t7415-submodule-names.sh
@@ -77,4 +77,11 @@ test_expect_success 'fsck detects evil superproject' '
 	test_must_fail git fsck
 '
 
+test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	git -C dst.git config transfer.fsckObjects true &&
+	test_must_fail git push dst.git HEAD
+'
+
 test_done
-- 
2.14.4


From 0e3a3c30f0ba16f3e3fd4581cea36b2d2d4d204a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 4 May 2018 19:45:01 -0400
Subject: [PATCH 10/11] index-pack: check .gitmodules files with --strict

commit 73c3f0f704a91b6792e0199a3f3ab6e3a1971675 upstream.

Now that the internal fsck code has all of the plumbing we
need, we can start checking incoming .gitmodules files.
Naively, it seems like we would just need to add a call to
fsck_finish() after we've processed all of the objects. And
that would be enough to cover the initial test included
here. But there are two extra bits:

  1. We currently don't bother calling fsck_object() at all
     for blobs, since it has traditionally been a noop. We'd
     actually catch these blobs in fsck_finish() at the end,
     but it's more efficient to check them when we already
     have the object loaded in memory.

  2. The second pass done by fsck_finish() needs to access
     the objects, but we're actually indexing the pack in
     this process. In theory we could give the fsck code a
     special callback for accessing the in-pack data, but
     it's actually quite tricky:

       a. We don't have an internal efficient index mapping
	  oids to packfile offsets. We only generate it on
	  the fly as part of writing out the .idx file.

       b. We'd still have to reconstruct deltas, which means
          we'd basically have to replicate all of the
	  reading logic in packfile.c.

     Instead, let's avoid running fsck_finish() until after
     we've written out the .idx file, and then just add it
     to our internal packed_git list.

     This does mean that the objects are "in the repository"
     before we finish our fsck checks. But unpack-objects
     already exhibits this same behavior, and it's an
     acceptable tradeoff here for the same reason: the
     quarantine mechanism means that pushes will be
     fully protected.

In addition to a basic push test in t7415, we add a sneaky
pack that reverses the usual object order in the pack,
requiring that index-pack access the tree and blob during
the "finish" step.

This already works for unpack-objects (since it will have
written out loose objects), but we'll check it with this
sneaky pack for good measure.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/index-pack.c       | 10 ++++++++++
 t/lib-pack.sh              | 12 ++++++++++++
 t/t7415-submodule-names.sh | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 81f3bc7..f53bf22 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -825,6 +825,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
 				blob->object.flags |= FLAG_CHECKED;
 			else
 				die(_("invalid blob object %s"), sha1_to_hex(sha1));
+			if (do_fsck_object &&
+			    fsck_object(&blob->object, (void *)data, size, &fsck_options))
+				die(_("fsck error in packed object"));
 		} else {
 			struct object *obj;
 			int eaten;
@@ -1440,6 +1443,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
 	} else
 		chmod(final_index_name, 0444);
 
+	if (do_fsck_object)
+		add_packed_git(final_index_name, strlen(final_index_name), 0);
+
 	if (!from_stdin) {
 		printf("%s\n", sha1_to_hex(sha1));
 	} else {
@@ -1775,6 +1781,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 		      pack_sha1);
 	else
 		close(input_fd);
+
+	if (do_fsck_object && fsck_finish(&fsck_options))
+		die(_("fsck error in pack objects"));
+
 	free(objects);
 	strbuf_release(&index_name_buf);
 	strbuf_release(&keep_name_buf);
diff --git a/t/lib-pack.sh b/t/lib-pack.sh
index 7509846..4674899 100644
--- a/t/lib-pack.sh
+++ b/t/lib-pack.sh
@@ -79,6 +79,18 @@ pack_obj () {
 		;;
 	esac
 
+	# If it's not a delta, we can convince pack-objects to generate a pack
+	# with just our entry, and then strip off the header (12 bytes) and
+	# trailer (20 bytes).
+	if test -z "$2"
+	then
+		echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
+		size=$(wc -c <pack_obj.tmp) &&
+		dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
+		rm -f pack_obj.tmp
+		return
+	fi
+
 	echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
 	return 1
 }
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
index a09087c..fca7091 100755
--- a/t/t7415-submodule-names.sh
+++ b/t/t7415-submodule-names.sh
@@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a
 real-world setup that confirms we catch this in practice.
 '
 . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-pack.sh
 
 test_expect_success 'check names' '
 	cat >expect <<-\EOF &&
@@ -84,4 +85,41 @@ test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
 	test_must_fail git push dst.git HEAD
 '
 
+test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	git -C dst.git config transfer.fsckObjects true &&
+	git -C dst.git config transfer.unpackLimit 1 &&
+	test_must_fail git push dst.git HEAD
+'
+
+# Normally our packs contain commits followed by trees followed by blobs. This
+# reverses the order, which requires backtracking to find the context of a
+# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
+test_expect_success 'create oddly ordered pack' '
+	git checkout --orphan odd &&
+	git rm -rf --cached . &&
+	git add .gitmodules &&
+	git commit -m odd &&
+	{
+		pack_header 3 &&
+		pack_obj $(git rev-parse HEAD:.gitmodules) &&
+		pack_obj $(git rev-parse HEAD^{tree}) &&
+		pack_obj $(git rev-parse HEAD)
+	} >odd.pack &&
+	pack_trailer odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	test_must_fail git -C dst.git unpack-objects --strict <odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	test_must_fail git -C dst.git index-pack --strict --stdin <odd.pack
+'
+
 test_done
-- 
2.14.4


From b3790cb51efbba6c393471feb21e9f9d9ec86377 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 4 May 2018 20:03:35 -0400
Subject: [PATCH 11/11] fsck: complain when .gitmodules is a symlink

commit b7b1fca175f1ed7933f361028c631b9ac86d868d upstream.

We've recently forbidden .gitmodules to be a symlink in
verify_path(). And it's an easy way to circumvent our fsck
checks for .gitmodules content. So let's complain when we
see it.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fsck.c                     | 14 ++++++++++----
 t/t7415-submodule-names.sh | 29 +++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/fsck.c b/fsck.c
index d366ad8..79694b0 100644
--- a/fsck.c
+++ b/fsck.c
@@ -94,6 +94,7 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
 	FUNC(GITMODULES_BLOB, ERROR) \
 	FUNC(GITMODULES_PARSE, ERROR) \
 	FUNC(GITMODULES_NAME, ERROR) \
+	FUNC(GITMODULES_SYMLINK, ERROR) \
 	/* warnings */ \
 	FUNC(BAD_FILEMODE, WARN) \
 	FUNC(EMPTY_NAME, WARN) \
@@ -472,7 +473,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
 
 static int fsck_tree(struct tree *item, struct fsck_options *options)
 {
-	int retval;
+	int retval = 0;
 	int has_null_sha1 = 0;
 	int has_full_path = 0;
 	int has_empty_name = 0;
@@ -507,8 +508,14 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 		has_zero_pad |= *(char *)desc.buffer == '0';
 
-		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
-			oidhash_insert(&gitmodules_found, oid);
+		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
+			if (!S_ISLNK(mode))
+				oidhash_insert(&gitmodules_found, oid);
+			else
+				retval += report(options, &item->object,
+						 FSCK_MSG_GITMODULES_SYMLINK,
+						 ".gitmodules is a symbolic link");
+		}
 
 		update_tree_entry(&desc);
 
@@ -551,7 +558,6 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		o_name = name;
 	}
 
-	retval = 0;
 	if (has_null_sha1)
 		retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
 	if (has_full_path)
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
index fca7091..6ee7115 100755
--- a/t/t7415-submodule-names.sh
+++ b/t/t7415-submodule-names.sh
@@ -122,4 +122,33 @@ test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
 	test_must_fail git -C dst.git index-pack --strict --stdin <odd.pack
 '
 
+test_expect_success 'fsck detects symlinked .gitmodules file' '
+	git init symlink &&
+	(
+		cd symlink &&
+
+		# Make the tree directly to avoid index restrictions.
+		#
+		# Because symlinks store the target as a blob, choose
+		# a pathname that could be parsed as a .gitmodules file
+		# to trick naive non-symlink-aware checking.
+		tricky="[foo]bar=true" &&
+		content=$(git hash-object -w ../.gitmodules) &&
+		target=$(printf "$tricky" | git hash-object -w --stdin) &&
+		tree=$(
+			{
+				printf "100644 blob $content\t$tricky\n" &&
+				printf "120000 blob $target\t.gitmodules\n"
+			} | git mktree
+		) &&
+		commit=$(git commit-tree $tree) &&
+
+		# Check not only that we fail, but that it is due to the
+		# symlink detector; this grep string comes from the config
+		# variable name and will not be translated.
+		test_must_fail git fsck 2>output &&
+		grep gitmodulesSymlink output
+	)
+'
+
 test_done
-- 
2.14.4