Blame SOURCES/git-cve-2018-11235-fsck.patch

1353ca
From ff9e25d40ff1f2a2942eac7b3dd108ca739f362a Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Fri, 13 Jan 2017 12:58:16 -0500
1353ca
Subject: [PATCH 01/11] sha1_file: add read_loose_object() function
1353ca
1353ca
commit f6371f9210418f1beabc85b097e2a3470aeeb54d upstream.
1353ca
1353ca
It's surprisingly hard to ask the sha1_file code to open a
1353ca
_specific_ incarnation of a loose object. Most of the
1353ca
functions take a sha1, and loop over the various object
1353ca
types (packed versus loose) and locations (local versus
1353ca
alternates) at a low level.
1353ca
1353ca
However, some tools like fsck need to look at a specific
1353ca
file. This patch gives them a function they can use to open
1353ca
the loose object at a given path.
1353ca
1353ca
The implementation unfortunately ends up repeating bits of
1353ca
related functions, but there's not a good way around it
1353ca
without some major refactoring of the whole sha1_file stack.
1353ca
We need to mmap the specific file, then partially read the
1353ca
zlib stream to know whether we're streaming or not, and then
1353ca
finally either stream it or copy the data to a buffer.
1353ca
1353ca
We can do that by assembling some of the more arcane
1353ca
internal sha1_file functions, but we end up having to
1353ca
essentially reimplement unpack_sha1_file(), along with the
1353ca
streaming bits of check_sha1_signature().
1353ca
1353ca
Still, most of the ugliness is contained in the new
1353ca
function, and the interface is clean enough that it may be
1353ca
reusable (though it seems unlikely anything but git-fsck
1353ca
would care about opening a specific file).
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 cache.h     |  13 ++++++
1353ca
 sha1_file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1353ca
 2 files changed, 143 insertions(+), 2 deletions(-)
1353ca
1353ca
diff --git a/cache.h b/cache.h
1353ca
index 61d43c6..fa53623 100644
1353ca
--- a/cache.h
1353ca
+++ b/cache.h
1353ca
@@ -1113,6 +1113,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
1353ca
 
1353ca
 extern int has_sha1_pack(const unsigned char *sha1);
1353ca
 
1353ca
+/*
1353ca
+ * Open the loose object at path, check its sha1, and return the contents,
1353ca
+ * type, and size. If the object is a blob, then "contents" may return NULL,
1353ca
+ * to allow streaming of large blobs.
1353ca
+ *
1353ca
+ * Returns 0 on success, negative on error (details may be written to stderr).
1353ca
+ */
1353ca
+int read_loose_object(const char *path,
1353ca
+		      const unsigned char *expected_sha1,
1353ca
+		      enum object_type *type,
1353ca
+		      unsigned long *size,
1353ca
+		      void **contents);
1353ca
+
1353ca
 /*
1353ca
  * Return true iff we have an object named sha1, whether local or in
1353ca
  * an alternate object database, and whether packed or loose.  This
1353ca
diff --git a/sha1_file.c b/sha1_file.c
1353ca
index cb571ac..6d29840 100644
1353ca
--- a/sha1_file.c
1353ca
+++ b/sha1_file.c
1353ca
@@ -1547,12 +1547,21 @@ static int open_sha1_file(const unsigned char *sha1)
1353ca
 	return -1;
1353ca
 }
1353ca
 
1353ca
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1353ca
+/*
1353ca
+ * Map the loose object at "path" if it is not NULL, or the path found by
1353ca
+ * searching for a loose object named "sha1".
1353ca
+ */
1353ca
+static void *map_sha1_file_1(const char *path,
1353ca
+			     const unsigned char *sha1,
1353ca
+			     unsigned long *size)
1353ca
 {
1353ca
 	void *map;
1353ca
 	int fd;
1353ca
 
1353ca
-	fd = open_sha1_file(sha1);
1353ca
+	if (path)
1353ca
+		fd = git_open_noatime(path);
1353ca
+	else
1353ca
+		fd = open_sha1_file(sha1);
1353ca
 	map = NULL;
1353ca
 	if (fd >= 0) {
1353ca
 		struct stat st;
1353ca
@@ -1571,6 +1580,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1353ca
 	return map;
1353ca
 }
1353ca
 
1353ca
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1353ca
+{
1353ca
+	return map_sha1_file_1(NULL, sha1, size);
1353ca
+}
1353ca
+
1353ca
 unsigned long unpack_object_header_buffer(const unsigned char *buf,
1353ca
 		unsigned long len, enum object_type *type, unsigned long *sizep)
1353ca
 {
1353ca
@@ -3644,3 +3658,117 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1353ca
 	}
1353ca
 	return r ? r : pack_errors;
1353ca
 }
1353ca
+
1353ca
+static int check_stream_sha1(git_zstream *stream,
1353ca
+			     const char *hdr,
1353ca
+			     unsigned long size,
1353ca
+			     const char *path,
1353ca
+			     const unsigned char *expected_sha1)
1353ca
+{
1353ca
+	git_SHA_CTX c;
1353ca
+	unsigned char real_sha1[GIT_SHA1_RAWSZ];
1353ca
+	unsigned char buf[4096];
1353ca
+	unsigned long total_read;
1353ca
+	int status = Z_OK;
1353ca
+
1353ca
+	git_SHA1_Init(&c);
1353ca
+	git_SHA1_Update(&c, hdr, stream->total_out);
1353ca
+
1353ca
+	/*
1353ca
+	 * We already read some bytes into hdr, but the ones up to the NUL
1353ca
+	 * do not count against the object's content size.
1353ca
+	 */
1353ca
+	total_read = stream->total_out - strlen(hdr) - 1;
1353ca
+
1353ca
+	/*
1353ca
+	 * This size comparison must be "<=" to read the final zlib packets;
1353ca
+	 * see the comment in unpack_sha1_rest for details.
1353ca
+	 */
1353ca
+	while (total_read <= size &&
1353ca
+	       (status == Z_OK || status == Z_BUF_ERROR)) {
1353ca
+		stream->next_out = buf;
1353ca
+		stream->avail_out = sizeof(buf);
1353ca
+		if (size - total_read < stream->avail_out)
1353ca
+			stream->avail_out = size - total_read;
1353ca
+		status = git_inflate(stream, Z_FINISH);
1353ca
+		git_SHA1_Update(&c, buf, stream->next_out - buf);
1353ca
+		total_read += stream->next_out - buf;
1353ca
+	}
1353ca
+	git_inflate_end(stream);
1353ca
+
1353ca
+	if (status != Z_STREAM_END) {
1353ca
+		error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
1353ca
+		return -1;
1353ca
+	}
1353ca
+
1353ca
+	git_SHA1_Final(real_sha1, &c);
1353ca
+	if (hashcmp(expected_sha1, real_sha1)) {
1353ca
+		error("sha1 mismatch for %s (expected %s)", path,
1353ca
+		      sha1_to_hex(expected_sha1));
1353ca
+		return -1;
1353ca
+	}
1353ca
+
1353ca
+	return 0;
1353ca
+}
1353ca
+
1353ca
+int read_loose_object(const char *path,
1353ca
+		      const unsigned char *expected_sha1,
1353ca
+		      enum object_type *type,
1353ca
+		      unsigned long *size,
1353ca
+		      void **contents)
1353ca
+{
1353ca
+	int ret = -1;
1353ca
+	int fd = -1;
1353ca
+	void *map = NULL;
1353ca
+	unsigned long mapsize;
1353ca
+	git_zstream stream;
1353ca
+	char hdr[32];
1353ca
+
1353ca
+	*contents = NULL;
1353ca
+
1353ca
+	map = map_sha1_file_1(path, NULL, &mapsize);
1353ca
+	if (!map) {
1353ca
+		error_errno("unable to mmap %s", path);
1353ca
+		goto out;
1353ca
+	}
1353ca
+
1353ca
+	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
1353ca
+		error("unable to unpack header of %s", path);
1353ca
+		goto out;
1353ca
+	}
1353ca
+
1353ca
+	*type = parse_sha1_header(hdr, size);
1353ca
+	if (*type < 0) {
1353ca
+		error("unable to parse header of %s", path);
1353ca
+		git_inflate_end(&stream);
1353ca
+		goto out;
1353ca
+	}
1353ca
+
1353ca
+	if (*type == OBJ_BLOB) {
1353ca
+		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
1353ca
+			goto out;
1353ca
+	} else {
1353ca
+		*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
1353ca
+		if (!*contents) {
1353ca
+			error("unable to unpack contents of %s", path);
1353ca
+			git_inflate_end(&stream);
1353ca
+			goto out;
1353ca
+		}
1353ca
+		if (check_sha1_signature(expected_sha1, *contents,
1353ca
+					 *size, typename(*type))) {
1353ca
+			error("sha1 mismatch for %s (expected %s)", path,
1353ca
+			      sha1_to_hex(expected_sha1));
1353ca
+			free(*contents);
1353ca
+			goto out;
1353ca
+		}
1353ca
+	}
1353ca
+
1353ca
+	ret = 0; /* everything checks out */
1353ca
+
1353ca
+out:
1353ca
+	if (map)
1353ca
+		munmap(map, mapsize);
1353ca
+	if (fd >= 0)
1353ca
+		close(fd);
1353ca
+	return ret;
1353ca
+}
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 65074f3eda3338445c7674b7d5b176844970b5de Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Fri, 13 Jan 2017 12:59:44 -0500
1353ca
Subject: [PATCH 02/11] fsck: parse loose object paths directly
1353ca
1353ca
commit c68b489e56431cf27f7719913ab09ddc62f95912 upstream.
1353ca
1353ca
When we iterate over the list of loose objects to check, we
1353ca
get the actual path of each object. But we then throw it
1353ca
away and pass just the sha1 to fsck_sha1(), which will do a
1353ca
fresh lookup. Usually it would find the same object, but it
1353ca
may not if an object exists both as a loose and a packed
1353ca
object. We may end up checking the packed object twice, and
1353ca
never look at the loose one.
1353ca
1353ca
In practice this isn't too terrible, because if fsck doesn't
1353ca
complain, it means you have at least one good copy. But
1353ca
since the point of fsck is to look for corruption, we should
1353ca
be thorough.
1353ca
1353ca
The new read_loose_object() interface can help us get the
1353ca
data from disk, and then we replace parse_object() with
1353ca
parse_object_buffer(). As a bonus, our error messages now
1353ca
mention the path to a corrupted object, which should make it
1353ca
easier to track down errors when they do happen.
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 builtin/fsck.c  | 46 +++++++++++++++++++++++++++++++++-------------
1353ca
 t/t1450-fsck.sh | 16 ++++++++++++++++
1353ca
 2 files changed, 49 insertions(+), 13 deletions(-)
1353ca
1353ca
diff --git a/builtin/fsck.c b/builtin/fsck.c
1353ca
index 9923b10..150597c 100644
1353ca
--- a/builtin/fsck.c
1353ca
+++ b/builtin/fsck.c
1353ca
@@ -341,18 +341,6 @@ static int fsck_obj(struct object *obj)
1353ca
 	return 0;
1353ca
 }
1353ca
 
1353ca
-static int fsck_sha1(const unsigned char *sha1)
1353ca
-{
1353ca
-	struct object *obj = parse_object(sha1);
1353ca
-	if (!obj) {
1353ca
-		errors_found |= ERROR_OBJECT;
1353ca
-		return error("%s: object corrupt or missing",
1353ca
-			     sha1_to_hex(sha1));
1353ca
-	}
1353ca
-	obj->flags |= HAS_OBJ;
1353ca
-	return fsck_obj(obj);
1353ca
-}
1353ca
-
1353ca
 static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
1353ca
 			   unsigned long size, void *buffer, int *eaten)
1353ca
 {
1353ca
@@ -459,9 +447,41 @@ static void get_default_heads(void)
1353ca
 	}
1353ca
 }
1353ca
 
1353ca
+static struct object *parse_loose_object(const unsigned char *sha1,
1353ca
+					 const char *path)
1353ca
+{
1353ca
+	struct object *obj;
1353ca
+	void *contents;
1353ca
+	enum object_type type;
1353ca
+	unsigned long size;
1353ca
+	int eaten;
1353ca
+
1353ca
+	if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
1353ca
+		return NULL;
1353ca
+
1353ca
+	if (!contents && type != OBJ_BLOB)
1353ca
+		die("BUG: read_loose_object streamed a non-blob");
1353ca
+
1353ca
+	obj = parse_object_buffer(sha1, type, size, contents, &eaten);
1353ca
+
1353ca
+	if (!eaten)
1353ca
+		free(contents);
1353ca
+	return obj;
1353ca
+}
1353ca
+
1353ca
 static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
1353ca
 {
1353ca
-	if (fsck_sha1(sha1))
1353ca
+	struct object *obj = parse_loose_object(sha1, path);
1353ca
+
1353ca
+	if (!obj) {
1353ca
+		errors_found |= ERROR_OBJECT;
1353ca
+		error("%s: object corrupt or missing: %s",
1353ca
+		      sha1_to_hex(sha1), path);
1353ca
+		return 0; /* keep checking other objects */
1353ca
+	}
1353ca
+
1353ca
+	obj->flags = HAS_OBJ;
1353ca
+	if (fsck_obj(obj))
1353ca
 		errors_found |= ERROR_OBJECT;
1353ca
 	return 0;
1353ca
 }
1353ca
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
1353ca
index 7ee8ea0..17c4c77 100755
1353ca
--- a/t/t1450-fsck.sh
1353ca
+++ b/t/t1450-fsck.sh
1353ca
@@ -523,4 +523,20 @@ test_expect_success 'fsck --connectivity-only' '
1353ca
 	)
1353ca
 '
1353ca
 
1353ca
+test_expect_success 'fsck finds problems in duplicate loose objects' '
1353ca
+	rm -rf broken-duplicate &&
1353ca
+	git init broken-duplicate &&
1353ca
+	(
1353ca
+		cd broken-duplicate &&
1353ca
+		test_commit duplicate &&
1353ca
+		# no "-d" here, so we end up with duplicates
1353ca
+		git repack &&
1353ca
+		# now corrupt the loose copy
1353ca
+		file=$(sha1_file "$(git rev-parse HEAD)") &&
1353ca
+		rm "$file" &&
1353ca
+		echo broken >"$file" &&
1353ca
+		test_must_fail git fsck
1353ca
+	)
1353ca
+'
1353ca
+
1353ca
 test_done
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 6a26b1fcf92a6c825eb72c1a55399f38af5b7b79 Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Wed, 2 May 2018 16:37:09 -0400
1353ca
Subject: [PATCH 03/11] index-pack: make fsck error message more specific
1353ca
1353ca
commit db5a58c1bda5b20169b9958af1e8b05ddd178b01 upstream.
1353ca
1353ca
If fsck reports an error, we say only "Error in object".
1353ca
This isn't quite as bad as it might seem, since the fsck
1353ca
code would have dumped some errors to stderr already. But it
1353ca
might help to give a little more context. The earlier output
1353ca
would not have even mentioned "fsck", and that may be a clue
1353ca
that the "fsck.*" or "*.fsckObjects" config may be relevant.
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 builtin/index-pack.c     | 2 +-
1353ca
 builtin/unpack-objects.c | 2 +-
1353ca
 2 files changed, 2 insertions(+), 2 deletions(-)
1353ca
1353ca
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
1353ca
index 1008d7f..81f3bc7 100644
1353ca
--- a/builtin/index-pack.c
1353ca
+++ b/builtin/index-pack.c
1353ca
@@ -841,7 +841,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
1353ca
 				die(_("invalid %s"), typename(type));
1353ca
 			if (do_fsck_object &&
1353ca
 			    fsck_object(obj, buf, size, &fsck_options))
1353ca
-				die(_("Error in object"));
1353ca
+				die(_("fsck error in packed object"));
1353ca
 			if (fsck_walk(obj, NULL, &fsck_options))
1353ca
 				die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
1353ca
 
1353ca
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
1353ca
index 875e7ed..7018829 100644
1353ca
--- a/builtin/unpack-objects.c
1353ca
+++ b/builtin/unpack-objects.c
1353ca
@@ -205,7 +205,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op
1353ca
 	if (!obj_buf)
1353ca
 		die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
1353ca
 	if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
1353ca
-		die("Error in object");
1353ca
+		die("fsck error in packed object");
1353ca
 	fsck_options.walk = check_object;
1353ca
 	if (fsck_walk(obj, NULL, &fsck_options))
1353ca
 		die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 8ac12a6cd60b74748494beadd7750de39f727ff3 Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Sun, 13 May 2018 12:35:37 -0400
1353ca
Subject: [PATCH 04/11] fsck: simplify ".git" check
1353ca
1353ca
commit ed9c3220621d634d543bc4dd998d12167dfc57d4 upstream.
1353ca
1353ca
There's no need for us to manually check for ".git"; it's a
1353ca
subset of the other filesystem-specific tests. Dropping it
1353ca
makes our code slightly shorter. More importantly, the
1353ca
existing code may make a reader wonder why ".GIT" is not
1353ca
covered here, and whether that is a bug (it isn't, as it's
1353ca
also covered in the filesystem-specific tests).
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 fsck.c | 4 +---
1353ca
 1 file changed, 1 insertion(+), 3 deletions(-)
1353ca
1353ca
diff --git a/fsck.c b/fsck.c
1353ca
index 0531545..c807b7f 100644
1353ca
--- a/fsck.c
1353ca
+++ b/fsck.c
1353ca
@@ -460,9 +460,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
1353ca
 		has_empty_name |= !*name;
1353ca
 		has_dot |= !strcmp(name, ".");
1353ca
 		has_dotdot |= !strcmp(name, "..");
1353ca
-		has_dotgit |= (!strcmp(name, ".git") ||
1353ca
-			       is_hfs_dotgit(name) ||
1353ca
-			       is_ntfs_dotgit(name));
1353ca
+		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
1353ca
 		has_zero_pad |= *(char *)desc.buffer == '0';
1353ca
 		update_tree_entry(&desc);
1353ca
 
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 27d0f7bf0b6ba57e849353a65ff73887088b6d7b Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Wed, 2 May 2018 15:44:51 -0400
1353ca
Subject: [PATCH 05/11] fsck: actually fsck blob data
1353ca
1353ca
commit 7ac4f3a007e2567f9d2492806186aa063f9a08d6 upstream.
1353ca
1353ca
Because fscking a blob has always been a noop, we didn't
1353ca
bother passing around the blob data. In preparation for
1353ca
content-level checks, let's fix up a few things:
1353ca
1353ca
  1. The fsck_object() function just returns success for any
1353ca
     blob. Let's a noop fsck_blob(), which we can fill in
1353ca
     with actual logic later.
1353ca
1353ca
  2. The fsck_loose() function in builtin/fsck.c
1353ca
     just threw away blob content after loading it. Let's
1353ca
     hold onto it until after we've called fsck_object().
1353ca
1353ca
     The easiest way to do this is to just drop the
1353ca
     parse_loose_object() helper entirely. Incidentally,
1353ca
     this also fixes a memory leak: if we successfully
1353ca
     loaded the object data but did not parse it, we would
1353ca
     have left the function without freeing it.
1353ca
1353ca
  3. When fsck_loose() loads the object data, it
1353ca
     does so with a custom read_loose_object() helper. This
1353ca
     function streams any blobs, regardless of size, under
1353ca
     the assumption that we're only checking the sha1.
1353ca
1353ca
     Instead, let's actually load blobs smaller than
1353ca
     big_file_threshold, as the normal object-reading
1353ca
     code-paths would do. This lets us fsck small files, and
1353ca
     a NULL return is an indication that the blob was so big
1353ca
     that it needed to be streamed, and we can pass that
1353ca
     information along to fsck_blob().
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 builtin/fsck.c | 39 +++++++++++++++++++--------------------
1353ca
 fsck.c         |  8 +++++++-
1353ca
 sha1_file.c    |  2 +-
1353ca
 3 files changed, 27 insertions(+), 22 deletions(-)
1353ca
1353ca
diff --git a/builtin/fsck.c b/builtin/fsck.c
1353ca
index 150597c..61ab1cd 100644
1353ca
--- a/builtin/fsck.c
1353ca
+++ b/builtin/fsck.c
1353ca
@@ -299,7 +299,7 @@ static void check_connectivity(void)
1353ca
 	}
1353ca
 }
1353ca
 
1353ca
-static int fsck_obj(struct object *obj)
1353ca
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
1353ca
 {
1353ca
 	if (obj->flags & SEEN)
1353ca
 		return 0;
1353ca
@@ -311,7 +311,7 @@ static int fsck_obj(struct object *obj)
1353ca
 
1353ca
 	if (fsck_walk(obj, NULL, &fsck_obj_options))
1353ca
 		objerror(obj, "broken links");
1353ca
-	if (fsck_object(obj, NULL, 0, &fsck_obj_options))
1353ca
+	if (fsck_object(obj, buffer, size, &fsck_obj_options))
1353ca
 		return -1;
1353ca
 
1353ca
 	if (obj->type == OBJ_TREE) {
1353ca
@@ -355,7 +355,7 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
1353ca
 		return error("%s: object corrupt or missing", sha1_to_hex(sha1));
1353ca
 	}
1353ca
 	obj->flags = HAS_OBJ;
1353ca
-	return fsck_obj(obj);
1353ca
+	return fsck_obj(obj, buffer, size);
1353ca
 }
1353ca
 
1353ca
 static int default_refs;
1353ca
@@ -447,43 +447,42 @@ static void get_default_heads(void)
1353ca
 	}
1353ca
 }
1353ca
 
1353ca
-static struct object *parse_loose_object(const unsigned char *sha1,
1353ca
-					 const char *path)
1353ca
+static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
1353ca
 {
1353ca
 	struct object *obj;
1353ca
-	void *contents;
1353ca
 	enum object_type type;
1353ca
 	unsigned long size;
1353ca
+	void *contents;
1353ca
 	int eaten;
1353ca
 
1353ca
-	if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
1353ca
-		return NULL;
1353ca
+	if (read_loose_object(path, sha1, &type, &size, &contents) < 0) {
1353ca
+		errors_found |= ERROR_OBJECT;
1353ca
+		error("%s: object corrupt or missing: %s",
1353ca
+		      sha1_to_hex(sha1), path);
1353ca
+		return 0; /* keep checking other objects */
1353ca
+	}
1353ca
 
1353ca
 	if (!contents && type != OBJ_BLOB)
1353ca
 		die("BUG: read_loose_object streamed a non-blob");
1353ca
 
1353ca
 	obj = parse_object_buffer(sha1, type, size, contents, &eaten);
1353ca
 
1353ca
-	if (!eaten)
1353ca
-		free(contents);
1353ca
-	return obj;
1353ca
-}
1353ca
-
1353ca
-static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
1353ca
-{
1353ca
-	struct object *obj = parse_loose_object(sha1, path);
1353ca
-
1353ca
 	if (!obj) {
1353ca
 		errors_found |= ERROR_OBJECT;
1353ca
-		error("%s: object corrupt or missing: %s",
1353ca
+		error("%s: object could not be parsed: %s",
1353ca
 		      sha1_to_hex(sha1), path);
1353ca
+		if (!eaten)
1353ca
+			free(contents);
1353ca
 		return 0; /* keep checking other objects */
1353ca
 	}
1353ca
 
1353ca
 	obj->flags = HAS_OBJ;
1353ca
-	if (fsck_obj(obj))
1353ca
+	if (fsck_obj(obj, contents, size))
1353ca
 		errors_found |= ERROR_OBJECT;
1353ca
-	return 0;
1353ca
+
1353ca
+	if (!eaten)
1353ca
+		free(contents);
1353ca
+	return 0; /* keep checking other objects, even if we saw an error */
1353ca
 }
1353ca
 
1353ca
 static int fsck_cruft(const char *basename, const char *path, void *data)
1353ca
diff --git a/fsck.c b/fsck.c
1353ca
index c807b7f..a520290 100644
1353ca
--- a/fsck.c
1353ca
+++ b/fsck.c
1353ca
@@ -795,6 +795,12 @@ static int fsck_tag(struct tag *tag, const char *data,
1353ca
 	return fsck_tag_buffer(tag, data, size, options);
1353ca
 }
1353ca
 
1353ca
+static int fsck_blob(struct blob *blob, const char *buf,
1353ca
+		     unsigned long size, struct fsck_options *options)
1353ca
+{
1353ca
+	return 0;
1353ca
+}
1353ca
+
1353ca
 int fsck_object(struct object *obj, void *data, unsigned long size,
1353ca
 	struct fsck_options *options)
1353ca
 {
1353ca
@@ -802,7 +808,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
1353ca
 		return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
1353ca
 
1353ca
 	if (obj->type == OBJ_BLOB)
1353ca
-		return 0;
1353ca
+		return fsck_blob((struct blob *)obj, data, size, options);
1353ca
 	if (obj->type == OBJ_TREE)
1353ca
 		return fsck_tree((struct tree *) obj, options);
1353ca
 	if (obj->type == OBJ_COMMIT)
1353ca
diff --git a/sha1_file.c b/sha1_file.c
1353ca
index 6d29840..a091644 100644
1353ca
--- a/sha1_file.c
1353ca
+++ b/sha1_file.c
1353ca
@@ -3744,7 +3744,7 @@ int read_loose_object(const char *path,
1353ca
 		goto out;
1353ca
 	}
1353ca
 
1353ca
-	if (*type == OBJ_BLOB) {
1353ca
+	if (*type == OBJ_BLOB && *size > big_file_threshold) {
1353ca
 		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
1353ca
 			goto out;
1353ca
 	} else {
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From d393612c63e6bdfd7ab54139c229be3db89c550d Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Wed, 2 May 2018 17:20:08 -0400
1353ca
Subject: [PATCH 06/11] fsck: detect gitmodules files
1353ca
1353ca
commit 159e7b080bfa5d34559467cacaa79df89a01afc0 upstream.
1353ca
1353ca
In preparation for performing fsck checks on .gitmodules
1353ca
files, this commit plumbs in the actual detection of the
1353ca
files. Note that unlike most other fsck checks, this cannot
1353ca
be a property of a single object: we must know that the
1353ca
object is found at a ".gitmodules" path at the root tree of
1353ca
a commit.
1353ca
1353ca
Since the fsck code only sees one object at a time, we have
1353ca
to mark the related objects to fit the puzzle together. When
1353ca
we see a commit we mark its tree as a root tree, and when
1353ca
we see a root tree with a .gitmodules file, we mark the
1353ca
corresponding blob to be checked.
1353ca
1353ca
In an ideal world, we'd check the objects in topological
1353ca
order: commits followed by trees followed by blobs. In that
1353ca
case we can avoid ever loading an object twice, since all
1353ca
markings would be complete by the time we get to the marked
1353ca
objects. And indeed, if we are checking a single packfile,
1353ca
this is the order in which Git will generally write the
1353ca
objects. But we can't count on that:
1353ca
1353ca
  1. git-fsck may show us the objects in arbitrary order
1353ca
     (loose objects are fed in sha1 order, but we may also
1353ca
     have multiple packs, and we process each pack fully in
1353ca
     sequence).
1353ca
1353ca
  2. The type ordering is just what git-pack-objects happens
1353ca
     to write now. The pack format does not require a
1353ca
     specific order, and it's possible that future versions
1353ca
     of Git (or a custom version trying to fool official
1353ca
     Git's fsck checks!) may order it differently.
1353ca
1353ca
  3. We may not even be fscking all of the relevant objects
1353ca
     at once. Consider pushing with transfer.fsckObjects,
1353ca
     where one push adds a blob at path "foo", and then a
1353ca
     second push adds the same blob at path ".gitmodules".
1353ca
     The blob is not part of the second push at all, but we
1353ca
     need to mark and check it.
1353ca
1353ca
So in the general case, we need to make up to three passes
1353ca
over the objects: once to make sure we've seen all commits,
1353ca
then once to cover any trees we might have missed, and then
1353ca
a final pass to cover any .gitmodules blobs we found in the
1353ca
second pass.
1353ca
1353ca
We can simplify things a bit by loosening the requirement
1353ca
that we find .gitmodules only at root trees. Technically
1353ca
a file like "subdir/.gitmodules" is not parsed by Git, but
1353ca
it's not unreasonable for us to declare that Git is aware of
1353ca
all ".gitmodules" files and make them eligible for checking.
1353ca
That lets us drop the root-tree requirement, which
1353ca
eliminates one pass entirely. And it makes our worst case
1353ca
much better: instead of potentially queueing every root tree
1353ca
to be re-examined, the worst case is that we queue each
1353ca
unique .gitmodules blob for a second look.
1353ca
1353ca
This patch just adds the boilerplate to find .gitmodules
1353ca
files. The actual content checks will come in a subsequent
1353ca
commit.
1353ca
1353ca
[jn: backported to 2.11.y:
1353ca
 - passing oid->hash instead of oid to lookup_blob
1353ca
 - using "struct hashmap" directly since "struct oidset" isn't
1353ca
   available]
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 fsck.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1353ca
 fsck.h |  7 ++++++
1353ca
 2 files changed, 98 insertions(+)
1353ca
1353ca
diff --git a/fsck.c b/fsck.c
1353ca
index a520290..f0a566a 100644
1353ca
--- a/fsck.c
1353ca
+++ b/fsck.c
1353ca
@@ -9,6 +9,42 @@
1353ca
 #include "refs.h"
1353ca
 #include "utf8.h"
1353ca
 #include "sha1-array.h"
1353ca
+#include "hashmap.h"
1353ca
+
1353ca
+struct oidhash_entry {
1353ca
+	struct hashmap_entry ent;
1353ca
+	struct object_id oid;
1353ca
+};
1353ca
+
1353ca
+static int oidhash_hashcmp(const void *va, const void *vb,
1353ca
+			   const void *vkey)
1353ca
+{
1353ca
+	const struct oidhash_entry *a = va, *b = vb;
1353ca
+	const struct object_id *key = vkey;
1353ca
+	return oidcmp(&a->oid, key ? key : &b->oid);
1353ca
+}
1353ca
+
1353ca
+static struct hashmap gitmodules_found;
1353ca
+static struct hashmap gitmodules_done;
1353ca
+
1353ca
+static void oidhash_insert(struct hashmap *h, const struct object_id *oid)
1353ca
+{
1353ca
+	struct oidhash_entry *e;
1353ca
+
1353ca
+	if (!h->tablesize)
1353ca
+		hashmap_init(h, oidhash_hashcmp, 0);
1353ca
+	e = xmalloc(sizeof(*e));
1353ca
+	hashmap_entry_init(&e->ent, sha1hash(oid->hash));
1353ca
+	oidcpy(&e->oid, oid);
1353ca
+	hashmap_add(h, e);
1353ca
+}
1353ca
+
1353ca
+static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
1353ca
+{
1353ca
+	return h->tablesize &&
1353ca
+		!!hashmap_get_from_hash(h, sha1hash(oid->hash), oid);
1353ca
+}
1353ca
+
1353ca
 
1353ca
 #define FSCK_FATAL -1
1353ca
 #define FSCK_INFO -2
1353ca
@@ -43,6 +79,7 @@
1353ca
 	FUNC(MISSING_TAG_ENTRY, ERROR) \
1353ca
 	FUNC(MISSING_TAG_OBJECT, ERROR) \
1353ca
 	FUNC(MISSING_TREE, ERROR) \
1353ca
+	FUNC(MISSING_TREE_OBJECT, ERROR) \
1353ca
 	FUNC(MISSING_TYPE, ERROR) \
1353ca
 	FUNC(MISSING_TYPE_ENTRY, ERROR) \
1353ca
 	FUNC(MULTIPLE_AUTHORS, ERROR) \
1353ca
@@ -50,6 +87,8 @@
1353ca
 	FUNC(TREE_NOT_SORTED, ERROR) \
1353ca
 	FUNC(UNKNOWN_TYPE, ERROR) \
1353ca
 	FUNC(ZERO_PADDED_DATE, ERROR) \
1353ca
+	FUNC(GITMODULES_MISSING, ERROR) \
1353ca
+	FUNC(GITMODULES_BLOB, ERROR) \
1353ca
 	/* warnings */ \
1353ca
 	FUNC(BAD_FILEMODE, WARN) \
1353ca
 	FUNC(EMPTY_NAME, WARN) \
1353ca
@@ -462,6 +501,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
1353ca
 		has_dotdot |= !strcmp(name, "..");
1353ca
 		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
1353ca
 		has_zero_pad |= *(char *)desc.buffer == '0';
1353ca
+
1353ca
+		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
1353ca
+			oidhash_insert(&gitmodules_found, oid);
1353ca
+
1353ca
 		update_tree_entry(&desc);
1353ca
 
1353ca
 		switch (mode) {
1353ca
@@ -831,3 +874,51 @@ int fsck_error_function(struct object *obj, int msg_type, const char *message)
1353ca
 	error("object %s: %s", oid_to_hex(&obj->oid), message);
1353ca
 	return 1;
1353ca
 }
1353ca
+
1353ca
+int fsck_finish(struct fsck_options *options)
1353ca
+{
1353ca
+	int ret = 0;
1353ca
+	struct hashmap_iter iter;
1353ca
+	const struct oidhash_entry *e;
1353ca
+
1353ca
+	hashmap_iter_init(&gitmodules_found, &iter);
1353ca
+	while ((e = hashmap_iter_next(&iter))) {
1353ca
+		const struct object_id *oid = &e->oid;
1353ca
+		struct blob *blob;
1353ca
+		enum object_type type;
1353ca
+		unsigned long size;
1353ca
+		char *buf;
1353ca
+
1353ca
+		if (oidhash_contains(&gitmodules_done, oid))
1353ca
+			continue;
1353ca
+
1353ca
+		blob = lookup_blob(oid->hash);
1353ca
+		if (!blob) {
1353ca
+			ret |= report(options, &blob->object,
1353ca
+				      FSCK_MSG_GITMODULES_BLOB,
1353ca
+				      "non-blob found at .gitmodules");
1353ca
+			continue;
1353ca
+		}
1353ca
+
1353ca
+		buf = read_sha1_file(oid->hash, &type, &size);
1353ca
+		if (!buf) {
1353ca
+			ret |= report(options, &blob->object,
1353ca
+				      FSCK_MSG_GITMODULES_MISSING,
1353ca
+				      "unable to read .gitmodules blob");
1353ca
+			continue;
1353ca
+		}
1353ca
+
1353ca
+		if (type == OBJ_BLOB)
1353ca
+			ret |= fsck_blob(blob, buf, size, options);
1353ca
+		else
1353ca
+			ret |= report(options, &blob->object,
1353ca
+				      FSCK_MSG_GITMODULES_BLOB,
1353ca
+				      "non-blob found at .gitmodules");
1353ca
+		free(buf);
1353ca
+	}
1353ca
+
1353ca
+
1353ca
+	hashmap_free(&gitmodules_found, 1);
1353ca
+	hashmap_free(&gitmodules_done, 1);
1353ca
+	return ret;
1353ca
+}
1353ca
diff --git a/fsck.h b/fsck.h
1353ca
index dded84b..3eebe56 100644
1353ca
--- a/fsck.h
1353ca
+++ b/fsck.h
1353ca
@@ -50,4 +50,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
1353ca
 int fsck_object(struct object *obj, void *data, unsigned long size,
1353ca
 	struct fsck_options *options);
1353ca
 
1353ca
+/*
1353ca
+ * Some fsck checks are context-dependent, and may end up queued; run this
1353ca
+ * after completing all fsck_object() calls in order to resolve any remaining
1353ca
+ * checks.
1353ca
+ */
1353ca
+int fsck_finish(struct fsck_options *options);
1353ca
+
1353ca
 #endif
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From ff398bff4000efa4275058c6b4f8c479831494f0 Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Wed, 2 May 2018 17:25:27 -0400
1353ca
Subject: [PATCH 07/11] fsck: check .gitmodules content
1353ca
1353ca
commit ed8b10f631c9a71df3351d46187bf7f3fa4f9b7e upstream.
1353ca
1353ca
This patch detects and blocks submodule names which do not
1353ca
match the policy set forth in submodule-config. These should
1353ca
already be caught by the submodule code itself, but putting
1353ca
the check here means that newer versions of Git can protect
1353ca
older ones from malicious entries (e.g., a server with
1353ca
receive.fsckObjects will block the objects, protecting
1353ca
clients which fetch from it).
1353ca
1353ca
As a side effect, this means fsck will also complain about
1353ca
.gitmodules files that cannot be parsed (or were larger than
1353ca
core.bigFileThreshold).
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 fsck.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1353ca
 1 file changed, 60 insertions(+), 1 deletion(-)
1353ca
1353ca
diff --git a/fsck.c b/fsck.c
1353ca
index f0a566a..d366ad8 100644
1353ca
--- a/fsck.c
1353ca
+++ b/fsck.c
1353ca
@@ -10,6 +10,9 @@
1353ca
 #include "utf8.h"
1353ca
 #include "sha1-array.h"
1353ca
 #include "hashmap.h"
1353ca
+#include "submodule-config.h"
1353ca
+
1353ca
+#define CONFIG_ORIGIN_BLOB 0
1353ca
 
1353ca
 struct oidhash_entry {
1353ca
 	struct hashmap_entry ent;
1353ca
@@ -89,6 +92,8 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
1353ca
 	FUNC(ZERO_PADDED_DATE, ERROR) \
1353ca
 	FUNC(GITMODULES_MISSING, ERROR) \
1353ca
 	FUNC(GITMODULES_BLOB, ERROR) \
1353ca
+	FUNC(GITMODULES_PARSE, ERROR) \
1353ca
+	FUNC(GITMODULES_NAME, ERROR) \
1353ca
 	/* warnings */ \
1353ca
 	FUNC(BAD_FILEMODE, WARN) \
1353ca
 	FUNC(EMPTY_NAME, WARN) \
1353ca
@@ -838,10 +843,64 @@ static int fsck_tag(struct tag *tag, const char *data,
1353ca
 	return fsck_tag_buffer(tag, data, size, options);
1353ca
 }
1353ca
 
1353ca
+struct fsck_gitmodules_data {
1353ca
+	struct object *obj;
1353ca
+	struct fsck_options *options;
1353ca
+	int ret;
1353ca
+};
1353ca
+
1353ca
+static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
1353ca
+{
1353ca
+	struct fsck_gitmodules_data *data = vdata;
1353ca
+	const char *subsection, *key;
1353ca
+	int subsection_len;
1353ca
+	char *name;
1353ca
+
1353ca
+	if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
1353ca
+	    !subsection)
1353ca
+		return 0;
1353ca
+
1353ca
+	name = xmemdupz(subsection, subsection_len);
1353ca
+	if (check_submodule_name(name) < 0)
1353ca
+		data->ret |= report(data->options, data->obj,
1353ca
+				    FSCK_MSG_GITMODULES_NAME,
1353ca
+				    "disallowed submodule name: %s",
1353ca
+				    name);
1353ca
+	free(name);
1353ca
+
1353ca
+	return 0;
1353ca
+}
1353ca
+
1353ca
 static int fsck_blob(struct blob *blob, const char *buf,
1353ca
 		     unsigned long size, struct fsck_options *options)
1353ca
 {
1353ca
-	return 0;
1353ca
+	struct fsck_gitmodules_data data;
1353ca
+
1353ca
+	if (!oidhash_contains(&gitmodules_found, &blob->object.oid))
1353ca
+		return 0;
1353ca
+	oidhash_insert(&gitmodules_done, &blob->object.oid);
1353ca
+
1353ca
+	if (!buf) {
1353ca
+		/*
1353ca
+		 * A missing buffer here is a sign that the caller found the
1353ca
+		 * blob too gigantic to load into memory. Let's just consider
1353ca
+		 * that an error.
1353ca
+		 */
1353ca
+		return report(options, &blob->object,
1353ca
+			      FSCK_MSG_GITMODULES_PARSE,
1353ca
+			      ".gitmodules too large to parse");
1353ca
+	}
1353ca
+
1353ca
+	data.obj = &blob->object;
1353ca
+	data.options = options;
1353ca
+	data.ret = 0;
1353ca
+	if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
1353ca
+				".gitmodules", buf, size, &data))
1353ca
+		data.ret |= report(options, &blob->object,
1353ca
+				   FSCK_MSG_GITMODULES_PARSE,
1353ca
+				   "could not parse gitmodules blob");
1353ca
+
1353ca
+	return data.ret;
1353ca
 }
1353ca
 
1353ca
 int fsck_object(struct object *obj, void *data, unsigned long size,
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 3f7875be4a5b68058e243a0525d75165620738cb Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Wed, 2 May 2018 17:20:35 -0400
1353ca
Subject: [PATCH 08/11] fsck: call fsck_finish() after fscking objects
1353ca
1353ca
commit 1995b5e03e1cc97116be58cdc0502d4a23547856 upstream.
1353ca
1353ca
Now that the internal fsck code is capable of checking
1353ca
.gitmodules files, we just need to teach its callers to use
1353ca
the "finish" function to check any queued objects.
1353ca
1353ca
With this, we can now catch the malicious case in t7415 with
1353ca
git-fsck.
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 builtin/fsck.c             | 3 +++
1353ca
 t/t7415-submodule-names.sh | 4 ++++
1353ca
 2 files changed, 7 insertions(+)
1353ca
1353ca
diff --git a/builtin/fsck.c b/builtin/fsck.c
1353ca
index 61ab1cd..70f620b 100644
1353ca
--- a/builtin/fsck.c
1353ca
+++ b/builtin/fsck.c
1353ca
@@ -660,6 +660,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
1353ca
 			count += p->num_objects;
1353ca
 		}
1353ca
 		stop_progress(&progress);
1353ca
+
1353ca
+		if (fsck_finish(&fsck_obj_options))
1353ca
+			errors_found |= ERROR_OBJECT;
1353ca
 	}
1353ca
 
1353ca
 	heads = 0;
1353ca
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
1353ca
index b30fa33..5d61627 100755
1353ca
--- a/t/t7415-submodule-names.sh
1353ca
+++ b/t/t7415-submodule-names.sh
1353ca
@@ -73,4 +73,8 @@ test_expect_success 'clone evil superproject' '
1353ca
 	! grep "RUNNING POST CHECKOUT" output
1353ca
 '
1353ca
 
1353ca
+test_expect_success 'fsck detects evil superproject' '
1353ca
+	test_must_fail git fsck
1353ca
+'
1353ca
+
1353ca
 test_done
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 563989871514b892c2c9a4f4fd2ef515dd6adaee Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Fri, 4 May 2018 19:40:08 -0400
1353ca
Subject: [PATCH 09/11] unpack-objects: call fsck_finish() after fscking
1353ca
 objects
1353ca
1353ca
commit 6e328d6caef218db320978e3e251009135d87d0e upstream.
1353ca
1353ca
As with the previous commit, we must call fsck's "finish"
1353ca
function in order to catch any queued objects for
1353ca
.gitmodules checks.
1353ca
1353ca
This second pass will be able to access any incoming
1353ca
objects, because we will have exploded them to loose objects
1353ca
by now.
1353ca
1353ca
This isn't quite ideal, because it means that bad objects
1353ca
may have been written to the object database (and a
1353ca
subsequent operation could then reference them, even if the
1353ca
other side doesn't send the objects again). However, this is
1353ca
sufficient when used with receive.fsckObjects, since those
1353ca
loose objects will all be placed in a temporary quarantine
1353ca
area that will get wiped if we find any problems.
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 builtin/unpack-objects.c   | 5 ++++-
1353ca
 t/t7415-submodule-names.sh | 7 +++++++
1353ca
 2 files changed, 11 insertions(+), 1 deletion(-)
1353ca
1353ca
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
1353ca
index 7018829..27ba86e 100644
1353ca
--- a/builtin/unpack-objects.c
1353ca
+++ b/builtin/unpack-objects.c
1353ca
@@ -560,8 +560,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
1353ca
 	unpack_all();
1353ca
 	git_SHA1_Update(&ctx, buffer, offset);
1353ca
 	git_SHA1_Final(sha1, &ctx;;
1353ca
-	if (strict)
1353ca
+	if (strict) {
1353ca
 		write_rest();
1353ca
+		if (fsck_finish(&fsck_options))
1353ca
+			die(_("fsck error in pack objects"));
1353ca
+	}
1353ca
 	if (hashcmp(fill(20), sha1))
1353ca
 		die("final sha1 did not match");
1353ca
 	use(20);
1353ca
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
1353ca
index 5d61627..a09087c 100755
1353ca
--- a/t/t7415-submodule-names.sh
1353ca
+++ b/t/t7415-submodule-names.sh
1353ca
@@ -77,4 +77,11 @@ test_expect_success 'fsck detects evil superproject' '
1353ca
 	test_must_fail git fsck
1353ca
 '
1353ca
 
1353ca
+test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
1353ca
+	rm -rf dst.git &&
1353ca
+	git init --bare dst.git &&
1353ca
+	git -C dst.git config transfer.fsckObjects true &&
1353ca
+	test_must_fail git push dst.git HEAD
1353ca
+'
1353ca
+
1353ca
 test_done
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From 0e3a3c30f0ba16f3e3fd4581cea36b2d2d4d204a Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Fri, 4 May 2018 19:45:01 -0400
1353ca
Subject: [PATCH 10/11] index-pack: check .gitmodules files with --strict
1353ca
1353ca
commit 73c3f0f704a91b6792e0199a3f3ab6e3a1971675 upstream.
1353ca
1353ca
Now that the internal fsck code has all of the plumbing we
1353ca
need, we can start checking incoming .gitmodules files.
1353ca
Naively, it seems like we would just need to add a call to
1353ca
fsck_finish() after we've processed all of the objects. And
1353ca
that would be enough to cover the initial test included
1353ca
here. But there are two extra bits:
1353ca
1353ca
  1. We currently don't bother calling fsck_object() at all
1353ca
     for blobs, since it has traditionally been a noop. We'd
1353ca
     actually catch these blobs in fsck_finish() at the end,
1353ca
     but it's more efficient to check them when we already
1353ca
     have the object loaded in memory.
1353ca
1353ca
  2. The second pass done by fsck_finish() needs to access
1353ca
     the objects, but we're actually indexing the pack in
1353ca
     this process. In theory we could give the fsck code a
1353ca
     special callback for accessing the in-pack data, but
1353ca
     it's actually quite tricky:
1353ca
1353ca
       a. We don't have an internal efficient index mapping
1353ca
	  oids to packfile offsets. We only generate it on
1353ca
	  the fly as part of writing out the .idx file.
1353ca
1353ca
       b. We'd still have to reconstruct deltas, which means
1353ca
          we'd basically have to replicate all of the
1353ca
	  reading logic in packfile.c.
1353ca
1353ca
     Instead, let's avoid running fsck_finish() until after
1353ca
     we've written out the .idx file, and then just add it
1353ca
     to our internal packed_git list.
1353ca
1353ca
     This does mean that the objects are "in the repository"
1353ca
     before we finish our fsck checks. But unpack-objects
1353ca
     already exhibits this same behavior, and it's an
1353ca
     acceptable tradeoff here for the same reason: the
1353ca
     quarantine mechanism means that pushes will be
1353ca
     fully protected.
1353ca
1353ca
In addition to a basic push test in t7415, we add a sneaky
1353ca
pack that reverses the usual object order in the pack,
1353ca
requiring that index-pack access the tree and blob during
1353ca
the "finish" step.
1353ca
1353ca
This already works for unpack-objects (since it will have
1353ca
written out loose objects), but we'll check it with this
1353ca
sneaky pack for good measure.
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 builtin/index-pack.c       | 10 ++++++++++
1353ca
 t/lib-pack.sh              | 12 ++++++++++++
1353ca
 t/t7415-submodule-names.sh | 38 ++++++++++++++++++++++++++++++++++++++
1353ca
 3 files changed, 60 insertions(+)
1353ca
1353ca
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
1353ca
index 81f3bc7..f53bf22 100644
1353ca
--- a/builtin/index-pack.c
1353ca
+++ b/builtin/index-pack.c
1353ca
@@ -825,6 +825,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
1353ca
 				blob->object.flags |= FLAG_CHECKED;
1353ca
 			else
1353ca
 				die(_("invalid blob object %s"), sha1_to_hex(sha1));
1353ca
+			if (do_fsck_object &&
1353ca
+			    fsck_object(&blob->object, (void *)data, size, &fsck_options))
1353ca
+				die(_("fsck error in packed object"));
1353ca
 		} else {
1353ca
 			struct object *obj;
1353ca
 			int eaten;
1353ca
@@ -1440,6 +1443,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
1353ca
 	} else
1353ca
 		chmod(final_index_name, 0444);
1353ca
 
1353ca
+	if (do_fsck_object)
1353ca
+		add_packed_git(final_index_name, strlen(final_index_name), 0);
1353ca
+
1353ca
 	if (!from_stdin) {
1353ca
 		printf("%s\n", sha1_to_hex(sha1));
1353ca
 	} else {
1353ca
@@ -1775,6 +1781,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
1353ca
 		      pack_sha1);
1353ca
 	else
1353ca
 		close(input_fd);
1353ca
+
1353ca
+	if (do_fsck_object && fsck_finish(&fsck_options))
1353ca
+		die(_("fsck error in pack objects"));
1353ca
+
1353ca
 	free(objects);
1353ca
 	strbuf_release(&index_name_buf);
1353ca
 	strbuf_release(&keep_name_buf);
1353ca
diff --git a/t/lib-pack.sh b/t/lib-pack.sh
1353ca
index 7509846..4674899 100644
1353ca
--- a/t/lib-pack.sh
1353ca
+++ b/t/lib-pack.sh
1353ca
@@ -79,6 +79,18 @@ pack_obj () {
1353ca
 		;;
1353ca
 	esac
1353ca
 
1353ca
+	# If it's not a delta, we can convince pack-objects to generate a pack
1353ca
+	# with just our entry, and then strip off the header (12 bytes) and
1353ca
+	# trailer (20 bytes).
1353ca
+	if test -z "$2"
1353ca
+	then
1353ca
+		echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
1353ca
+		size=$(wc -c 
1353ca
+		dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
1353ca
+		rm -f pack_obj.tmp
1353ca
+		return
1353ca
+	fi
1353ca
+
1353ca
 	echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
1353ca
 	return 1
1353ca
 }
1353ca
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
1353ca
index a09087c..fca7091 100755
1353ca
--- a/t/t7415-submodule-names.sh
1353ca
+++ b/t/t7415-submodule-names.sh
1353ca
@@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a
1353ca
 real-world setup that confirms we catch this in practice.
1353ca
 '
1353ca
 . ./test-lib.sh
1353ca
+. "$TEST_DIRECTORY"/lib-pack.sh
1353ca
 
1353ca
 test_expect_success 'check names' '
1353ca
 	cat >expect <<-\EOF &&
1353ca
@@ -84,4 +85,41 @@ test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
1353ca
 	test_must_fail git push dst.git HEAD
1353ca
 '
1353ca
 
1353ca
+test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
1353ca
+	rm -rf dst.git &&
1353ca
+	git init --bare dst.git &&
1353ca
+	git -C dst.git config transfer.fsckObjects true &&
1353ca
+	git -C dst.git config transfer.unpackLimit 1 &&
1353ca
+	test_must_fail git push dst.git HEAD
1353ca
+'
1353ca
+
1353ca
+# Normally our packs contain commits followed by trees followed by blobs. This
1353ca
+# reverses the order, which requires backtracking to find the context of a
1353ca
+# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
1353ca
+test_expect_success 'create oddly ordered pack' '
1353ca
+	git checkout --orphan odd &&
1353ca
+	git rm -rf --cached . &&
1353ca
+	git add .gitmodules &&
1353ca
+	git commit -m odd &&
1353ca
+	{
1353ca
+		pack_header 3 &&
1353ca
+		pack_obj $(git rev-parse HEAD:.gitmodules) &&
1353ca
+		pack_obj $(git rev-parse HEAD^{tree}) &&
1353ca
+		pack_obj $(git rev-parse HEAD)
1353ca
+	} >odd.pack &&
1353ca
+	pack_trailer odd.pack
1353ca
+'
1353ca
+
1353ca
+test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
1353ca
+	rm -rf dst.git &&
1353ca
+	git init --bare dst.git &&
1353ca
+	test_must_fail git -C dst.git unpack-objects --strict 
1353ca
+'
1353ca
+
1353ca
+test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
1353ca
+	rm -rf dst.git &&
1353ca
+	git init --bare dst.git &&
1353ca
+	test_must_fail git -C dst.git index-pack --strict --stdin 
1353ca
+'
1353ca
+
1353ca
 test_done
1353ca
-- 
1353ca
2.14.4
1353ca
1353ca
1353ca
From b3790cb51efbba6c393471feb21e9f9d9ec86377 Mon Sep 17 00:00:00 2001
1353ca
From: Jeff King <peff@peff.net>
1353ca
Date: Fri, 4 May 2018 20:03:35 -0400
1353ca
Subject: [PATCH 11/11] fsck: complain when .gitmodules is a symlink
1353ca
1353ca
commit b7b1fca175f1ed7933f361028c631b9ac86d868d upstream.
1353ca
1353ca
We've recently forbidden .gitmodules to be a symlink in
1353ca
verify_path(). And it's an easy way to circumvent our fsck
1353ca
checks for .gitmodules content. So let's complain when we
1353ca
see it.
1353ca
1353ca
Signed-off-by: Jeff King <peff@peff.net>
1353ca
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1353ca
---
1353ca
 fsck.c                     | 14 ++++++++++----
1353ca
 t/t7415-submodule-names.sh | 29 +++++++++++++++++++++++++++++
1353ca
 2 files changed, 39 insertions(+), 4 deletions(-)
1353ca
1353ca
diff --git a/fsck.c b/fsck.c
1353ca
index d366ad8..79694b0 100644
1353ca
--- a/fsck.c
1353ca
+++ b/fsck.c
1353ca
@@ -94,6 +94,7 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
1353ca
 	FUNC(GITMODULES_BLOB, ERROR) \
1353ca
 	FUNC(GITMODULES_PARSE, ERROR) \
1353ca
 	FUNC(GITMODULES_NAME, ERROR) \
1353ca
+	FUNC(GITMODULES_SYMLINK, ERROR) \
1353ca
 	/* warnings */ \
1353ca
 	FUNC(BAD_FILEMODE, WARN) \
1353ca
 	FUNC(EMPTY_NAME, WARN) \
1353ca
@@ -472,7 +473,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
1353ca
 
1353ca
 static int fsck_tree(struct tree *item, struct fsck_options *options)
1353ca
 {
1353ca
-	int retval;
1353ca
+	int retval = 0;
1353ca
 	int has_null_sha1 = 0;
1353ca
 	int has_full_path = 0;
1353ca
 	int has_empty_name = 0;
1353ca
@@ -507,8 +508,14 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
1353ca
 		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
1353ca
 		has_zero_pad |= *(char *)desc.buffer == '0';
1353ca
 
1353ca
-		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
1353ca
-			oidhash_insert(&gitmodules_found, oid);
1353ca
+		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
1353ca
+			if (!S_ISLNK(mode))
1353ca
+				oidhash_insert(&gitmodules_found, oid);
1353ca
+			else
1353ca
+				retval += report(options, &item->object,
1353ca
+						 FSCK_MSG_GITMODULES_SYMLINK,
1353ca
+						 ".gitmodules is a symbolic link");
1353ca
+		}
1353ca
 
1353ca
 		update_tree_entry(&desc);
1353ca
 
1353ca
@@ -551,7 +558,6 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
1353ca
 		o_name = name;
1353ca
 	}
1353ca
 
1353ca
-	retval = 0;
1353ca
 	if (has_null_sha1)
1353ca
 		retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
1353ca
 	if (has_full_path)
1353ca
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
1353ca
index fca7091..6ee7115 100755
1353ca
--- a/t/t7415-submodule-names.sh
1353ca
+++ b/t/t7415-submodule-names.sh
1353ca
@@ -122,4 +122,33 @@ test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
1353ca
 	test_must_fail git -C dst.git index-pack --strict --stdin 
1353ca
 '
1353ca
 
1353ca
+test_expect_success 'fsck detects symlinked .gitmodules file' '
1353ca
+	git init symlink &&
1353ca
+	(
1353ca
+		cd symlink &&
1353ca
+
1353ca
+		# Make the tree directly to avoid index restrictions.
1353ca
+		#
1353ca
+		# Because symlinks store the target as a blob, choose
1353ca
+		# a pathname that could be parsed as a .gitmodules file
1353ca
+		# to trick naive non-symlink-aware checking.
1353ca
+		tricky="[foo]bar=true" &&
1353ca
+		content=$(git hash-object -w ../.gitmodules) &&
1353ca
+		target=$(printf "$tricky" | git hash-object -w --stdin) &&
1353ca
+		tree=$(
1353ca
+			{
1353ca
+				printf "100644 blob $content\t$tricky\n" &&
1353ca
+				printf "120000 blob $target\t.gitmodules\n"
1353ca
+			} | git mktree
1353ca
+		) &&
1353ca
+		commit=$(git commit-tree $tree) &&
1353ca
+
1353ca
+		# Check not only that we fail, but that it is due to the
1353ca
+		# symlink detector; this grep string comes from the config
1353ca
+		# variable name and will not be translated.
1353ca
+		test_must_fail git fsck 2>output &&
1353ca
+		grep gitmodulesSymlink output
1353ca
+	)
1353ca
+'
1353ca
+
1353ca
 test_done
1353ca
-- 
1353ca
2.14.4
1353ca