|
|
1353ca |
From ff9e25d40ff1f2a2942eac7b3dd108ca739f362a Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Fri, 13 Jan 2017 12:58:16 -0500
|
|
|
1353ca |
Subject: [PATCH 01/11] sha1_file: add read_loose_object() function
|
|
|
1353ca |
|
|
|
1353ca |
commit f6371f9210418f1beabc85b097e2a3470aeeb54d upstream.
|
|
|
1353ca |
|
|
|
1353ca |
It's surprisingly hard to ask the sha1_file code to open a
|
|
|
1353ca |
_specific_ incarnation of a loose object. Most of the
|
|
|
1353ca |
functions take a sha1, and loop over the various object
|
|
|
1353ca |
types (packed versus loose) and locations (local versus
|
|
|
1353ca |
alternates) at a low level.
|
|
|
1353ca |
|
|
|
1353ca |
However, some tools like fsck need to look at a specific
|
|
|
1353ca |
file. This patch gives them a function they can use to open
|
|
|
1353ca |
the loose object at a given path.
|
|
|
1353ca |
|
|
|
1353ca |
The implementation unfortunately ends up repeating bits of
|
|
|
1353ca |
related functions, but there's not a good way around it
|
|
|
1353ca |
without some major refactoring of the whole sha1_file stack.
|
|
|
1353ca |
We need to mmap the specific file, then partially read the
|
|
|
1353ca |
zlib stream to know whether we're streaming or not, and then
|
|
|
1353ca |
finally either stream it or copy the data to a buffer.
|
|
|
1353ca |
|
|
|
1353ca |
We can do that by assembling some of the more arcane
|
|
|
1353ca |
internal sha1_file functions, but we end up having to
|
|
|
1353ca |
essentially reimplement unpack_sha1_file(), along with the
|
|
|
1353ca |
streaming bits of check_sha1_signature().
|
|
|
1353ca |
|
|
|
1353ca |
Still, most of the ugliness is contained in the new
|
|
|
1353ca |
function, and the interface is clean enough that it may be
|
|
|
1353ca |
reusable (though it seems unlikely anything but git-fsck
|
|
|
1353ca |
would care about opening a specific file).
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Junio C Hamano <gitster@pobox.com>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
cache.h | 13 ++++++
|
|
|
1353ca |
sha1_file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
|
1353ca |
2 files changed, 143 insertions(+), 2 deletions(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/cache.h b/cache.h
|
|
|
1353ca |
index 61d43c6..fa53623 100644
|
|
|
1353ca |
--- a/cache.h
|
|
|
1353ca |
+++ b/cache.h
|
|
|
1353ca |
@@ -1113,6 +1113,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
|
|
|
1353ca |
|
|
|
1353ca |
extern int has_sha1_pack(const unsigned char *sha1);
|
|
|
1353ca |
|
|
|
1353ca |
+/*
|
|
|
1353ca |
+ * Open the loose object at path, check its sha1, and return the contents,
|
|
|
1353ca |
+ * type, and size. If the object is a blob, then "contents" may return NULL,
|
|
|
1353ca |
+ * to allow streaming of large blobs.
|
|
|
1353ca |
+ *
|
|
|
1353ca |
+ * Returns 0 on success, negative on error (details may be written to stderr).
|
|
|
1353ca |
+ */
|
|
|
1353ca |
+int read_loose_object(const char *path,
|
|
|
1353ca |
+ const unsigned char *expected_sha1,
|
|
|
1353ca |
+ enum object_type *type,
|
|
|
1353ca |
+ unsigned long *size,
|
|
|
1353ca |
+ void **contents);
|
|
|
1353ca |
+
|
|
|
1353ca |
/*
|
|
|
1353ca |
* Return true iff we have an object named sha1, whether local or in
|
|
|
1353ca |
* an alternate object database, and whether packed or loose. This
|
|
|
1353ca |
diff --git a/sha1_file.c b/sha1_file.c
|
|
|
1353ca |
index cb571ac..6d29840 100644
|
|
|
1353ca |
--- a/sha1_file.c
|
|
|
1353ca |
+++ b/sha1_file.c
|
|
|
1353ca |
@@ -1547,12 +1547,21 @@ static int open_sha1_file(const unsigned char *sha1)
|
|
|
1353ca |
return -1;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
|
|
1353ca |
+/*
|
|
|
1353ca |
+ * Map the loose object at "path" if it is not NULL, or the path found by
|
|
|
1353ca |
+ * searching for a loose object named "sha1".
|
|
|
1353ca |
+ */
|
|
|
1353ca |
+static void *map_sha1_file_1(const char *path,
|
|
|
1353ca |
+ const unsigned char *sha1,
|
|
|
1353ca |
+ unsigned long *size)
|
|
|
1353ca |
{
|
|
|
1353ca |
void *map;
|
|
|
1353ca |
int fd;
|
|
|
1353ca |
|
|
|
1353ca |
- fd = open_sha1_file(sha1);
|
|
|
1353ca |
+ if (path)
|
|
|
1353ca |
+ fd = git_open_noatime(path);
|
|
|
1353ca |
+ else
|
|
|
1353ca |
+ fd = open_sha1_file(sha1);
|
|
|
1353ca |
map = NULL;
|
|
|
1353ca |
if (fd >= 0) {
|
|
|
1353ca |
struct stat st;
|
|
|
1353ca |
@@ -1571,6 +1580,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
|
|
1353ca |
return map;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ return map_sha1_file_1(NULL, sha1, size);
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
unsigned long unpack_object_header_buffer(const unsigned char *buf,
|
|
|
1353ca |
unsigned long len, enum object_type *type, unsigned long *sizep)
|
|
|
1353ca |
{
|
|
|
1353ca |
@@ -3644,3 +3658,117 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
|
|
|
1353ca |
}
|
|
|
1353ca |
return r ? r : pack_errors;
|
|
|
1353ca |
}
|
|
|
1353ca |
+
|
|
|
1353ca |
+static int check_stream_sha1(git_zstream *stream,
|
|
|
1353ca |
+ const char *hdr,
|
|
|
1353ca |
+ unsigned long size,
|
|
|
1353ca |
+ const char *path,
|
|
|
1353ca |
+ const unsigned char *expected_sha1)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ git_SHA_CTX c;
|
|
|
1353ca |
+ unsigned char real_sha1[GIT_SHA1_RAWSZ];
|
|
|
1353ca |
+ unsigned char buf[4096];
|
|
|
1353ca |
+ unsigned long total_read;
|
|
|
1353ca |
+ int status = Z_OK;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ git_SHA1_Init(&c);
|
|
|
1353ca |
+ git_SHA1_Update(&c, hdr, stream->total_out);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ /*
|
|
|
1353ca |
+ * We already read some bytes into hdr, but the ones up to the NUL
|
|
|
1353ca |
+ * do not count against the object's content size.
|
|
|
1353ca |
+ */
|
|
|
1353ca |
+ total_read = stream->total_out - strlen(hdr) - 1;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ /*
|
|
|
1353ca |
+ * This size comparison must be "<=" to read the final zlib packets;
|
|
|
1353ca |
+ * see the comment in unpack_sha1_rest for details.
|
|
|
1353ca |
+ */
|
|
|
1353ca |
+ while (total_read <= size &&
|
|
|
1353ca |
+ (status == Z_OK || status == Z_BUF_ERROR)) {
|
|
|
1353ca |
+ stream->next_out = buf;
|
|
|
1353ca |
+ stream->avail_out = sizeof(buf);
|
|
|
1353ca |
+ if (size - total_read < stream->avail_out)
|
|
|
1353ca |
+ stream->avail_out = size - total_read;
|
|
|
1353ca |
+ status = git_inflate(stream, Z_FINISH);
|
|
|
1353ca |
+ git_SHA1_Update(&c, buf, stream->next_out - buf);
|
|
|
1353ca |
+ total_read += stream->next_out - buf;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+ git_inflate_end(stream);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (status != Z_STREAM_END) {
|
|
|
1353ca |
+ error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
|
|
|
1353ca |
+ return -1;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ git_SHA1_Final(real_sha1, &c);
|
|
|
1353ca |
+ if (hashcmp(expected_sha1, real_sha1)) {
|
|
|
1353ca |
+ error("sha1 mismatch for %s (expected %s)", path,
|
|
|
1353ca |
+ sha1_to_hex(expected_sha1));
|
|
|
1353ca |
+ return -1;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ return 0;
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
+int read_loose_object(const char *path,
|
|
|
1353ca |
+ const unsigned char *expected_sha1,
|
|
|
1353ca |
+ enum object_type *type,
|
|
|
1353ca |
+ unsigned long *size,
|
|
|
1353ca |
+ void **contents)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ int ret = -1;
|
|
|
1353ca |
+ int fd = -1;
|
|
|
1353ca |
+ void *map = NULL;
|
|
|
1353ca |
+ unsigned long mapsize;
|
|
|
1353ca |
+ git_zstream stream;
|
|
|
1353ca |
+ char hdr[32];
|
|
|
1353ca |
+
|
|
|
1353ca |
+ *contents = NULL;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ map = map_sha1_file_1(path, NULL, &mapsize);
|
|
|
1353ca |
+ if (!map) {
|
|
|
1353ca |
+ error_errno("unable to mmap %s", path);
|
|
|
1353ca |
+ goto out;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
|
|
|
1353ca |
+ error("unable to unpack header of %s", path);
|
|
|
1353ca |
+ goto out;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ *type = parse_sha1_header(hdr, size);
|
|
|
1353ca |
+ if (*type < 0) {
|
|
|
1353ca |
+ error("unable to parse header of %s", path);
|
|
|
1353ca |
+ git_inflate_end(&stream);
|
|
|
1353ca |
+ goto out;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (*type == OBJ_BLOB) {
|
|
|
1353ca |
+ if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
|
|
|
1353ca |
+ goto out;
|
|
|
1353ca |
+ } else {
|
|
|
1353ca |
+ *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
|
|
|
1353ca |
+ if (!*contents) {
|
|
|
1353ca |
+ error("unable to unpack contents of %s", path);
|
|
|
1353ca |
+ git_inflate_end(&stream);
|
|
|
1353ca |
+ goto out;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+ if (check_sha1_signature(expected_sha1, *contents,
|
|
|
1353ca |
+ *size, typename(*type))) {
|
|
|
1353ca |
+ error("sha1 mismatch for %s (expected %s)", path,
|
|
|
1353ca |
+ sha1_to_hex(expected_sha1));
|
|
|
1353ca |
+ free(*contents);
|
|
|
1353ca |
+ goto out;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ ret = 0; /* everything checks out */
|
|
|
1353ca |
+
|
|
|
1353ca |
+out:
|
|
|
1353ca |
+ if (map)
|
|
|
1353ca |
+ munmap(map, mapsize);
|
|
|
1353ca |
+ if (fd >= 0)
|
|
|
1353ca |
+ close(fd);
|
|
|
1353ca |
+ return ret;
|
|
|
1353ca |
+}
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 65074f3eda3338445c7674b7d5b176844970b5de Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Fri, 13 Jan 2017 12:59:44 -0500
|
|
|
1353ca |
Subject: [PATCH 02/11] fsck: parse loose object paths directly
|
|
|
1353ca |
|
|
|
1353ca |
commit c68b489e56431cf27f7719913ab09ddc62f95912 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
When we iterate over the list of loose objects to check, we
|
|
|
1353ca |
get the actual path of each object. But we then throw it
|
|
|
1353ca |
away and pass just the sha1 to fsck_sha1(), which will do a
|
|
|
1353ca |
fresh lookup. Usually it would find the same object, but it
|
|
|
1353ca |
may not if an object exists both as a loose and a packed
|
|
|
1353ca |
object. We may end up checking the packed object twice, and
|
|
|
1353ca |
never look at the loose one.
|
|
|
1353ca |
|
|
|
1353ca |
In practice this isn't too terrible, because if fsck doesn't
|
|
|
1353ca |
complain, it means you have at least one good copy. But
|
|
|
1353ca |
since the point of fsck is to look for corruption, we should
|
|
|
1353ca |
be thorough.
|
|
|
1353ca |
|
|
|
1353ca |
The new read_loose_object() interface can help us get the
|
|
|
1353ca |
data from disk, and then we replace parse_object() with
|
|
|
1353ca |
parse_object_buffer(). As a bonus, our error messages now
|
|
|
1353ca |
mention the path to a corrupted object, which should make it
|
|
|
1353ca |
easier to track down errors when they do happen.
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Junio C Hamano <gitster@pobox.com>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
builtin/fsck.c | 46 +++++++++++++++++++++++++++++++++-------------
|
|
|
1353ca |
t/t1450-fsck.sh | 16 ++++++++++++++++
|
|
|
1353ca |
2 files changed, 49 insertions(+), 13 deletions(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/fsck.c b/builtin/fsck.c
|
|
|
1353ca |
index 9923b10..150597c 100644
|
|
|
1353ca |
--- a/builtin/fsck.c
|
|
|
1353ca |
+++ b/builtin/fsck.c
|
|
|
1353ca |
@@ -341,18 +341,6 @@ static int fsck_obj(struct object *obj)
|
|
|
1353ca |
return 0;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
-static int fsck_sha1(const unsigned char *sha1)
|
|
|
1353ca |
-{
|
|
|
1353ca |
- struct object *obj = parse_object(sha1);
|
|
|
1353ca |
- if (!obj) {
|
|
|
1353ca |
- errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
- return error("%s: object corrupt or missing",
|
|
|
1353ca |
- sha1_to_hex(sha1));
|
|
|
1353ca |
- }
|
|
|
1353ca |
- obj->flags |= HAS_OBJ;
|
|
|
1353ca |
- return fsck_obj(obj);
|
|
|
1353ca |
-}
|
|
|
1353ca |
-
|
|
|
1353ca |
static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
|
|
|
1353ca |
unsigned long size, void *buffer, int *eaten)
|
|
|
1353ca |
{
|
|
|
1353ca |
@@ -459,9 +447,41 @@ static void get_default_heads(void)
|
|
|
1353ca |
}
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
+static struct object *parse_loose_object(const unsigned char *sha1,
|
|
|
1353ca |
+ const char *path)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ struct object *obj;
|
|
|
1353ca |
+ void *contents;
|
|
|
1353ca |
+ enum object_type type;
|
|
|
1353ca |
+ unsigned long size;
|
|
|
1353ca |
+ int eaten;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
|
|
|
1353ca |
+ return NULL;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!contents && type != OBJ_BLOB)
|
|
|
1353ca |
+ die("BUG: read_loose_object streamed a non-blob");
|
|
|
1353ca |
+
|
|
|
1353ca |
+ obj = parse_object_buffer(sha1, type, size, contents, &eaten);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!eaten)
|
|
|
1353ca |
+ free(contents);
|
|
|
1353ca |
+ return obj;
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
|
|
|
1353ca |
{
|
|
|
1353ca |
- if (fsck_sha1(sha1))
|
|
|
1353ca |
+ struct object *obj = parse_loose_object(sha1, path);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!obj) {
|
|
|
1353ca |
+ errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
+ error("%s: object corrupt or missing: %s",
|
|
|
1353ca |
+ sha1_to_hex(sha1), path);
|
|
|
1353ca |
+ return 0; /* keep checking other objects */
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ obj->flags = HAS_OBJ;
|
|
|
1353ca |
+ if (fsck_obj(obj))
|
|
|
1353ca |
errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
return 0;
|
|
|
1353ca |
}
|
|
|
1353ca |
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
|
|
|
1353ca |
index 7ee8ea0..17c4c77 100755
|
|
|
1353ca |
--- a/t/t1450-fsck.sh
|
|
|
1353ca |
+++ b/t/t1450-fsck.sh
|
|
|
1353ca |
@@ -523,4 +523,20 @@ test_expect_success 'fsck --connectivity-only' '
|
|
|
1353ca |
)
|
|
|
1353ca |
'
|
|
|
1353ca |
|
|
|
1353ca |
+test_expect_success 'fsck finds problems in duplicate loose objects' '
|
|
|
1353ca |
+ rm -rf broken-duplicate &&
|
|
|
1353ca |
+ git init broken-duplicate &&
|
|
|
1353ca |
+ (
|
|
|
1353ca |
+ cd broken-duplicate &&
|
|
|
1353ca |
+ test_commit duplicate &&
|
|
|
1353ca |
+ # no "-d" here, so we end up with duplicates
|
|
|
1353ca |
+ git repack &&
|
|
|
1353ca |
+ # now corrupt the loose copy
|
|
|
1353ca |
+ file=$(sha1_file "$(git rev-parse HEAD)") &&
|
|
|
1353ca |
+ rm "$file" &&
|
|
|
1353ca |
+ echo broken >"$file" &&
|
|
|
1353ca |
+ test_must_fail git fsck
|
|
|
1353ca |
+ )
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
test_done
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 6a26b1fcf92a6c825eb72c1a55399f38af5b7b79 Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Wed, 2 May 2018 16:37:09 -0400
|
|
|
1353ca |
Subject: [PATCH 03/11] index-pack: make fsck error message more specific
|
|
|
1353ca |
|
|
|
1353ca |
commit db5a58c1bda5b20169b9958af1e8b05ddd178b01 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
If fsck reports an error, we say only "Error in object".
|
|
|
1353ca |
This isn't quite as bad as it might seem, since the fsck
|
|
|
1353ca |
code would have dumped some errors to stderr already. But it
|
|
|
1353ca |
might help to give a little more context. The earlier output
|
|
|
1353ca |
would not have even mentioned "fsck", and that may be a clue
|
|
|
1353ca |
that the "fsck.*" or "*.fsckObjects" config may be relevant.
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
builtin/index-pack.c | 2 +-
|
|
|
1353ca |
builtin/unpack-objects.c | 2 +-
|
|
|
1353ca |
2 files changed, 2 insertions(+), 2 deletions(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
|
|
|
1353ca |
index 1008d7f..81f3bc7 100644
|
|
|
1353ca |
--- a/builtin/index-pack.c
|
|
|
1353ca |
+++ b/builtin/index-pack.c
|
|
|
1353ca |
@@ -841,7 +841,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
|
|
|
1353ca |
die(_("invalid %s"), typename(type));
|
|
|
1353ca |
if (do_fsck_object &&
|
|
|
1353ca |
fsck_object(obj, buf, size, &fsck_options))
|
|
|
1353ca |
- die(_("Error in object"));
|
|
|
1353ca |
+ die(_("fsck error in packed object"));
|
|
|
1353ca |
if (fsck_walk(obj, NULL, &fsck_options))
|
|
|
1353ca |
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
|
|
|
1353ca |
index 875e7ed..7018829 100644
|
|
|
1353ca |
--- a/builtin/unpack-objects.c
|
|
|
1353ca |
+++ b/builtin/unpack-objects.c
|
|
|
1353ca |
@@ -205,7 +205,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op
|
|
|
1353ca |
if (!obj_buf)
|
|
|
1353ca |
die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
|
|
|
1353ca |
if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
|
|
|
1353ca |
- die("Error in object");
|
|
|
1353ca |
+ die("fsck error in packed object");
|
|
|
1353ca |
fsck_options.walk = check_object;
|
|
|
1353ca |
if (fsck_walk(obj, NULL, &fsck_options))
|
|
|
1353ca |
die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 8ac12a6cd60b74748494beadd7750de39f727ff3 Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Sun, 13 May 2018 12:35:37 -0400
|
|
|
1353ca |
Subject: [PATCH 04/11] fsck: simplify ".git" check
|
|
|
1353ca |
|
|
|
1353ca |
commit ed9c3220621d634d543bc4dd998d12167dfc57d4 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
There's no need for us to manually check for ".git"; it's a
|
|
|
1353ca |
subset of the other filesystem-specific tests. Dropping it
|
|
|
1353ca |
makes our code slightly shorter. More importantly, the
|
|
|
1353ca |
existing code may make a reader wonder why ".GIT" is not
|
|
|
1353ca |
covered here, and whether that is a bug (it isn't, as it's
|
|
|
1353ca |
also covered in the filesystem-specific tests).
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
fsck.c | 4 +---
|
|
|
1353ca |
1 file changed, 1 insertion(+), 3 deletions(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/fsck.c b/fsck.c
|
|
|
1353ca |
index 0531545..c807b7f 100644
|
|
|
1353ca |
--- a/fsck.c
|
|
|
1353ca |
+++ b/fsck.c
|
|
|
1353ca |
@@ -460,9 +460,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
|
|
|
1353ca |
has_empty_name |= !*name;
|
|
|
1353ca |
has_dot |= !strcmp(name, ".");
|
|
|
1353ca |
has_dotdot |= !strcmp(name, "..");
|
|
|
1353ca |
- has_dotgit |= (!strcmp(name, ".git") ||
|
|
|
1353ca |
- is_hfs_dotgit(name) ||
|
|
|
1353ca |
- is_ntfs_dotgit(name));
|
|
|
1353ca |
+ has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
|
|
|
1353ca |
has_zero_pad |= *(char *)desc.buffer == '0';
|
|
|
1353ca |
update_tree_entry(&desc);
|
|
|
1353ca |
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 27d0f7bf0b6ba57e849353a65ff73887088b6d7b Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Wed, 2 May 2018 15:44:51 -0400
|
|
|
1353ca |
Subject: [PATCH 05/11] fsck: actually fsck blob data
|
|
|
1353ca |
|
|
|
1353ca |
commit 7ac4f3a007e2567f9d2492806186aa063f9a08d6 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
Because fscking a blob has always been a noop, we didn't
|
|
|
1353ca |
bother passing around the blob data. In preparation for
|
|
|
1353ca |
content-level checks, let's fix up a few things:
|
|
|
1353ca |
|
|
|
1353ca |
1. The fsck_object() function just returns success for any
|
|
|
1353ca |
blob. Let's a noop fsck_blob(), which we can fill in
|
|
|
1353ca |
with actual logic later.
|
|
|
1353ca |
|
|
|
1353ca |
2. The fsck_loose() function in builtin/fsck.c
|
|
|
1353ca |
just threw away blob content after loading it. Let's
|
|
|
1353ca |
hold onto it until after we've called fsck_object().
|
|
|
1353ca |
|
|
|
1353ca |
The easiest way to do this is to just drop the
|
|
|
1353ca |
parse_loose_object() helper entirely. Incidentally,
|
|
|
1353ca |
this also fixes a memory leak: if we successfully
|
|
|
1353ca |
loaded the object data but did not parse it, we would
|
|
|
1353ca |
have left the function without freeing it.
|
|
|
1353ca |
|
|
|
1353ca |
3. When fsck_loose() loads the object data, it
|
|
|
1353ca |
does so with a custom read_loose_object() helper. This
|
|
|
1353ca |
function streams any blobs, regardless of size, under
|
|
|
1353ca |
the assumption that we're only checking the sha1.
|
|
|
1353ca |
|
|
|
1353ca |
Instead, let's actually load blobs smaller than
|
|
|
1353ca |
big_file_threshold, as the normal object-reading
|
|
|
1353ca |
code-paths would do. This lets us fsck small files, and
|
|
|
1353ca |
a NULL return is an indication that the blob was so big
|
|
|
1353ca |
that it needed to be streamed, and we can pass that
|
|
|
1353ca |
information along to fsck_blob().
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
builtin/fsck.c | 39 +++++++++++++++++++--------------------
|
|
|
1353ca |
fsck.c | 8 +++++++-
|
|
|
1353ca |
sha1_file.c | 2 +-
|
|
|
1353ca |
3 files changed, 27 insertions(+), 22 deletions(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/fsck.c b/builtin/fsck.c
|
|
|
1353ca |
index 150597c..61ab1cd 100644
|
|
|
1353ca |
--- a/builtin/fsck.c
|
|
|
1353ca |
+++ b/builtin/fsck.c
|
|
|
1353ca |
@@ -299,7 +299,7 @@ static void check_connectivity(void)
|
|
|
1353ca |
}
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
-static int fsck_obj(struct object *obj)
|
|
|
1353ca |
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
|
|
|
1353ca |
{
|
|
|
1353ca |
if (obj->flags & SEEN)
|
|
|
1353ca |
return 0;
|
|
|
1353ca |
@@ -311,7 +311,7 @@ static int fsck_obj(struct object *obj)
|
|
|
1353ca |
|
|
|
1353ca |
if (fsck_walk(obj, NULL, &fsck_obj_options))
|
|
|
1353ca |
objerror(obj, "broken links");
|
|
|
1353ca |
- if (fsck_object(obj, NULL, 0, &fsck_obj_options))
|
|
|
1353ca |
+ if (fsck_object(obj, buffer, size, &fsck_obj_options))
|
|
|
1353ca |
return -1;
|
|
|
1353ca |
|
|
|
1353ca |
if (obj->type == OBJ_TREE) {
|
|
|
1353ca |
@@ -355,7 +355,7 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
|
|
|
1353ca |
return error("%s: object corrupt or missing", sha1_to_hex(sha1));
|
|
|
1353ca |
}
|
|
|
1353ca |
obj->flags = HAS_OBJ;
|
|
|
1353ca |
- return fsck_obj(obj);
|
|
|
1353ca |
+ return fsck_obj(obj, buffer, size);
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
static int default_refs;
|
|
|
1353ca |
@@ -447,43 +447,42 @@ static void get_default_heads(void)
|
|
|
1353ca |
}
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
-static struct object *parse_loose_object(const unsigned char *sha1,
|
|
|
1353ca |
- const char *path)
|
|
|
1353ca |
+static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
|
|
|
1353ca |
{
|
|
|
1353ca |
struct object *obj;
|
|
|
1353ca |
- void *contents;
|
|
|
1353ca |
enum object_type type;
|
|
|
1353ca |
unsigned long size;
|
|
|
1353ca |
+ void *contents;
|
|
|
1353ca |
int eaten;
|
|
|
1353ca |
|
|
|
1353ca |
- if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
|
|
|
1353ca |
- return NULL;
|
|
|
1353ca |
+ if (read_loose_object(path, sha1, &type, &size, &contents) < 0) {
|
|
|
1353ca |
+ errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
+ error("%s: object corrupt or missing: %s",
|
|
|
1353ca |
+ sha1_to_hex(sha1), path);
|
|
|
1353ca |
+ return 0; /* keep checking other objects */
|
|
|
1353ca |
+ }
|
|
|
1353ca |
|
|
|
1353ca |
if (!contents && type != OBJ_BLOB)
|
|
|
1353ca |
die("BUG: read_loose_object streamed a non-blob");
|
|
|
1353ca |
|
|
|
1353ca |
obj = parse_object_buffer(sha1, type, size, contents, &eaten);
|
|
|
1353ca |
|
|
|
1353ca |
- if (!eaten)
|
|
|
1353ca |
- free(contents);
|
|
|
1353ca |
- return obj;
|
|
|
1353ca |
-}
|
|
|
1353ca |
-
|
|
|
1353ca |
-static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
|
|
|
1353ca |
-{
|
|
|
1353ca |
- struct object *obj = parse_loose_object(sha1, path);
|
|
|
1353ca |
-
|
|
|
1353ca |
if (!obj) {
|
|
|
1353ca |
errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
- error("%s: object corrupt or missing: %s",
|
|
|
1353ca |
+ error("%s: object could not be parsed: %s",
|
|
|
1353ca |
sha1_to_hex(sha1), path);
|
|
|
1353ca |
+ if (!eaten)
|
|
|
1353ca |
+ free(contents);
|
|
|
1353ca |
return 0; /* keep checking other objects */
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
obj->flags = HAS_OBJ;
|
|
|
1353ca |
- if (fsck_obj(obj))
|
|
|
1353ca |
+ if (fsck_obj(obj, contents, size))
|
|
|
1353ca |
errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
- return 0;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!eaten)
|
|
|
1353ca |
+ free(contents);
|
|
|
1353ca |
+ return 0; /* keep checking other objects, even if we saw an error */
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
static int fsck_cruft(const char *basename, const char *path, void *data)
|
|
|
1353ca |
diff --git a/fsck.c b/fsck.c
|
|
|
1353ca |
index c807b7f..a520290 100644
|
|
|
1353ca |
--- a/fsck.c
|
|
|
1353ca |
+++ b/fsck.c
|
|
|
1353ca |
@@ -795,6 +795,12 @@ static int fsck_tag(struct tag *tag, const char *data,
|
|
|
1353ca |
return fsck_tag_buffer(tag, data, size, options);
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
+static int fsck_blob(struct blob *blob, const char *buf,
|
|
|
1353ca |
+ unsigned long size, struct fsck_options *options)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ return 0;
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
int fsck_object(struct object *obj, void *data, unsigned long size,
|
|
|
1353ca |
struct fsck_options *options)
|
|
|
1353ca |
{
|
|
|
1353ca |
@@ -802,7 +808,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
|
|
|
1353ca |
return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
|
|
|
1353ca |
|
|
|
1353ca |
if (obj->type == OBJ_BLOB)
|
|
|
1353ca |
- return 0;
|
|
|
1353ca |
+ return fsck_blob((struct blob *)obj, data, size, options);
|
|
|
1353ca |
if (obj->type == OBJ_TREE)
|
|
|
1353ca |
return fsck_tree((struct tree *) obj, options);
|
|
|
1353ca |
if (obj->type == OBJ_COMMIT)
|
|
|
1353ca |
diff --git a/sha1_file.c b/sha1_file.c
|
|
|
1353ca |
index 6d29840..a091644 100644
|
|
|
1353ca |
--- a/sha1_file.c
|
|
|
1353ca |
+++ b/sha1_file.c
|
|
|
1353ca |
@@ -3744,7 +3744,7 @@ int read_loose_object(const char *path,
|
|
|
1353ca |
goto out;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
- if (*type == OBJ_BLOB) {
|
|
|
1353ca |
+ if (*type == OBJ_BLOB && *size > big_file_threshold) {
|
|
|
1353ca |
if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
|
|
|
1353ca |
goto out;
|
|
|
1353ca |
} else {
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From d393612c63e6bdfd7ab54139c229be3db89c550d Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Wed, 2 May 2018 17:20:08 -0400
|
|
|
1353ca |
Subject: [PATCH 06/11] fsck: detect gitmodules files
|
|
|
1353ca |
|
|
|
1353ca |
commit 159e7b080bfa5d34559467cacaa79df89a01afc0 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
In preparation for performing fsck checks on .gitmodules
|
|
|
1353ca |
files, this commit plumbs in the actual detection of the
|
|
|
1353ca |
files. Note that unlike most other fsck checks, this cannot
|
|
|
1353ca |
be a property of a single object: we must know that the
|
|
|
1353ca |
object is found at a ".gitmodules" path at the root tree of
|
|
|
1353ca |
a commit.
|
|
|
1353ca |
|
|
|
1353ca |
Since the fsck code only sees one object at a time, we have
|
|
|
1353ca |
to mark the related objects to fit the puzzle together. When
|
|
|
1353ca |
we see a commit we mark its tree as a root tree, and when
|
|
|
1353ca |
we see a root tree with a .gitmodules file, we mark the
|
|
|
1353ca |
corresponding blob to be checked.
|
|
|
1353ca |
|
|
|
1353ca |
In an ideal world, we'd check the objects in topological
|
|
|
1353ca |
order: commits followed by trees followed by blobs. In that
|
|
|
1353ca |
case we can avoid ever loading an object twice, since all
|
|
|
1353ca |
markings would be complete by the time we get to the marked
|
|
|
1353ca |
objects. And indeed, if we are checking a single packfile,
|
|
|
1353ca |
this is the order in which Git will generally write the
|
|
|
1353ca |
objects. But we can't count on that:
|
|
|
1353ca |
|
|
|
1353ca |
1. git-fsck may show us the objects in arbitrary order
|
|
|
1353ca |
(loose objects are fed in sha1 order, but we may also
|
|
|
1353ca |
have multiple packs, and we process each pack fully in
|
|
|
1353ca |
sequence).
|
|
|
1353ca |
|
|
|
1353ca |
2. The type ordering is just what git-pack-objects happens
|
|
|
1353ca |
to write now. The pack format does not require a
|
|
|
1353ca |
specific order, and it's possible that future versions
|
|
|
1353ca |
of Git (or a custom version trying to fool official
|
|
|
1353ca |
Git's fsck checks!) may order it differently.
|
|
|
1353ca |
|
|
|
1353ca |
3. We may not even be fscking all of the relevant objects
|
|
|
1353ca |
at once. Consider pushing with transfer.fsckObjects,
|
|
|
1353ca |
where one push adds a blob at path "foo", and then a
|
|
|
1353ca |
second push adds the same blob at path ".gitmodules".
|
|
|
1353ca |
The blob is not part of the second push at all, but we
|
|
|
1353ca |
need to mark and check it.
|
|
|
1353ca |
|
|
|
1353ca |
So in the general case, we need to make up to three passes
|
|
|
1353ca |
over the objects: once to make sure we've seen all commits,
|
|
|
1353ca |
then once to cover any trees we might have missed, and then
|
|
|
1353ca |
a final pass to cover any .gitmodules blobs we found in the
|
|
|
1353ca |
second pass.
|
|
|
1353ca |
|
|
|
1353ca |
We can simplify things a bit by loosening the requirement
|
|
|
1353ca |
that we find .gitmodules only at root trees. Technically
|
|
|
1353ca |
a file like "subdir/.gitmodules" is not parsed by Git, but
|
|
|
1353ca |
it's not unreasonable for us to declare that Git is aware of
|
|
|
1353ca |
all ".gitmodules" files and make them eligible for checking.
|
|
|
1353ca |
That lets us drop the root-tree requirement, which
|
|
|
1353ca |
eliminates one pass entirely. And it makes our worst case
|
|
|
1353ca |
much better: instead of potentially queueing every root tree
|
|
|
1353ca |
to be re-examined, the worst case is that we queue each
|
|
|
1353ca |
unique .gitmodules blob for a second look.
|
|
|
1353ca |
|
|
|
1353ca |
This patch just adds the boilerplate to find .gitmodules
|
|
|
1353ca |
files. The actual content checks will come in a subsequent
|
|
|
1353ca |
commit.
|
|
|
1353ca |
|
|
|
1353ca |
[jn: backported to 2.11.y:
|
|
|
1353ca |
- passing oid->hash instead of oid to lookup_blob
|
|
|
1353ca |
- using "struct hashmap" directly since "struct oidset" isn't
|
|
|
1353ca |
available]
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
fsck.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
1353ca |
fsck.h | 7 ++++++
|
|
|
1353ca |
2 files changed, 98 insertions(+)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/fsck.c b/fsck.c
|
|
|
1353ca |
index a520290..f0a566a 100644
|
|
|
1353ca |
--- a/fsck.c
|
|
|
1353ca |
+++ b/fsck.c
|
|
|
1353ca |
@@ -9,6 +9,42 @@
|
|
|
1353ca |
#include "refs.h"
|
|
|
1353ca |
#include "utf8.h"
|
|
|
1353ca |
#include "sha1-array.h"
|
|
|
1353ca |
+#include "hashmap.h"
|
|
|
1353ca |
+
|
|
|
1353ca |
+struct oidhash_entry {
|
|
|
1353ca |
+ struct hashmap_entry ent;
|
|
|
1353ca |
+ struct object_id oid;
|
|
|
1353ca |
+};
|
|
|
1353ca |
+
|
|
|
1353ca |
+static int oidhash_hashcmp(const void *va, const void *vb,
|
|
|
1353ca |
+ const void *vkey)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ const struct oidhash_entry *a = va, *b = vb;
|
|
|
1353ca |
+ const struct object_id *key = vkey;
|
|
|
1353ca |
+ return oidcmp(&a->oid, key ? key : &b->oid);
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
+static struct hashmap gitmodules_found;
|
|
|
1353ca |
+static struct hashmap gitmodules_done;
|
|
|
1353ca |
+
|
|
|
1353ca |
+static void oidhash_insert(struct hashmap *h, const struct object_id *oid)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ struct oidhash_entry *e;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!h->tablesize)
|
|
|
1353ca |
+ hashmap_init(h, oidhash_hashcmp, 0);
|
|
|
1353ca |
+ e = xmalloc(sizeof(*e));
|
|
|
1353ca |
+ hashmap_entry_init(&e->ent, sha1hash(oid->hash));
|
|
|
1353ca |
+ oidcpy(&e->oid, oid);
|
|
|
1353ca |
+ hashmap_add(h, e);
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
+static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ return h->tablesize &&
|
|
|
1353ca |
+ !!hashmap_get_from_hash(h, sha1hash(oid->hash), oid);
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
|
|
|
1353ca |
#define FSCK_FATAL -1
|
|
|
1353ca |
#define FSCK_INFO -2
|
|
|
1353ca |
@@ -43,6 +79,7 @@
|
|
|
1353ca |
FUNC(MISSING_TAG_ENTRY, ERROR) \
|
|
|
1353ca |
FUNC(MISSING_TAG_OBJECT, ERROR) \
|
|
|
1353ca |
FUNC(MISSING_TREE, ERROR) \
|
|
|
1353ca |
+ FUNC(MISSING_TREE_OBJECT, ERROR) \
|
|
|
1353ca |
FUNC(MISSING_TYPE, ERROR) \
|
|
|
1353ca |
FUNC(MISSING_TYPE_ENTRY, ERROR) \
|
|
|
1353ca |
FUNC(MULTIPLE_AUTHORS, ERROR) \
|
|
|
1353ca |
@@ -50,6 +87,8 @@
|
|
|
1353ca |
FUNC(TREE_NOT_SORTED, ERROR) \
|
|
|
1353ca |
FUNC(UNKNOWN_TYPE, ERROR) \
|
|
|
1353ca |
FUNC(ZERO_PADDED_DATE, ERROR) \
|
|
|
1353ca |
+ FUNC(GITMODULES_MISSING, ERROR) \
|
|
|
1353ca |
+ FUNC(GITMODULES_BLOB, ERROR) \
|
|
|
1353ca |
/* warnings */ \
|
|
|
1353ca |
FUNC(BAD_FILEMODE, WARN) \
|
|
|
1353ca |
FUNC(EMPTY_NAME, WARN) \
|
|
|
1353ca |
@@ -462,6 +501,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
|
|
|
1353ca |
has_dotdot |= !strcmp(name, "..");
|
|
|
1353ca |
has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
|
|
|
1353ca |
has_zero_pad |= *(char *)desc.buffer == '0';
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
|
|
|
1353ca |
+ oidhash_insert(&gitmodules_found, oid);
|
|
|
1353ca |
+
|
|
|
1353ca |
update_tree_entry(&desc);
|
|
|
1353ca |
|
|
|
1353ca |
switch (mode) {
|
|
|
1353ca |
@@ -831,3 +874,51 @@ int fsck_error_function(struct object *obj, int msg_type, const char *message)
|
|
|
1353ca |
error("object %s: %s", oid_to_hex(&obj->oid), message);
|
|
|
1353ca |
return 1;
|
|
|
1353ca |
}
|
|
|
1353ca |
+
|
|
|
1353ca |
+int fsck_finish(struct fsck_options *options)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ int ret = 0;
|
|
|
1353ca |
+ struct hashmap_iter iter;
|
|
|
1353ca |
+ const struct oidhash_entry *e;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ hashmap_iter_init(&gitmodules_found, &iter);
|
|
|
1353ca |
+ while ((e = hashmap_iter_next(&iter))) {
|
|
|
1353ca |
+ const struct object_id *oid = &e->oid;
|
|
|
1353ca |
+ struct blob *blob;
|
|
|
1353ca |
+ enum object_type type;
|
|
|
1353ca |
+ unsigned long size;
|
|
|
1353ca |
+ char *buf;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (oidhash_contains(&gitmodules_done, oid))
|
|
|
1353ca |
+ continue;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ blob = lookup_blob(oid->hash);
|
|
|
1353ca |
+ if (!blob) {
|
|
|
1353ca |
+ ret |= report(options, &blob->object,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_BLOB,
|
|
|
1353ca |
+ "non-blob found at .gitmodules");
|
|
|
1353ca |
+ continue;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ buf = read_sha1_file(oid->hash, &type, &size);
|
|
|
1353ca |
+ if (!buf) {
|
|
|
1353ca |
+ ret |= report(options, &blob->object,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_MISSING,
|
|
|
1353ca |
+ "unable to read .gitmodules blob");
|
|
|
1353ca |
+ continue;
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (type == OBJ_BLOB)
|
|
|
1353ca |
+ ret |= fsck_blob(blob, buf, size, options);
|
|
|
1353ca |
+ else
|
|
|
1353ca |
+ ret |= report(options, &blob->object,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_BLOB,
|
|
|
1353ca |
+ "non-blob found at .gitmodules");
|
|
|
1353ca |
+ free(buf);
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+
|
|
|
1353ca |
+ hashmap_free(&gitmodules_found, 1);
|
|
|
1353ca |
+ hashmap_free(&gitmodules_done, 1);
|
|
|
1353ca |
+ return ret;
|
|
|
1353ca |
+}
|
|
|
1353ca |
diff --git a/fsck.h b/fsck.h
|
|
|
1353ca |
index dded84b..3eebe56 100644
|
|
|
1353ca |
--- a/fsck.h
|
|
|
1353ca |
+++ b/fsck.h
|
|
|
1353ca |
@@ -50,4 +50,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
|
|
|
1353ca |
int fsck_object(struct object *obj, void *data, unsigned long size,
|
|
|
1353ca |
struct fsck_options *options);
|
|
|
1353ca |
|
|
|
1353ca |
+/*
|
|
|
1353ca |
+ * Some fsck checks are context-dependent, and may end up queued; run this
|
|
|
1353ca |
+ * after completing all fsck_object() calls in order to resolve any remaining
|
|
|
1353ca |
+ * checks.
|
|
|
1353ca |
+ */
|
|
|
1353ca |
+int fsck_finish(struct fsck_options *options);
|
|
|
1353ca |
+
|
|
|
1353ca |
#endif
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From ff398bff4000efa4275058c6b4f8c479831494f0 Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Wed, 2 May 2018 17:25:27 -0400
|
|
|
1353ca |
Subject: [PATCH 07/11] fsck: check .gitmodules content
|
|
|
1353ca |
|
|
|
1353ca |
commit ed8b10f631c9a71df3351d46187bf7f3fa4f9b7e upstream.
|
|
|
1353ca |
|
|
|
1353ca |
This patch detects and blocks submodule names which do not
|
|
|
1353ca |
match the policy set forth in submodule-config. These should
|
|
|
1353ca |
already be caught by the submodule code itself, but putting
|
|
|
1353ca |
the check here means that newer versions of Git can protect
|
|
|
1353ca |
older ones from malicious entries (e.g., a server with
|
|
|
1353ca |
receive.fsckObjects will block the objects, protecting
|
|
|
1353ca |
clients which fetch from it).
|
|
|
1353ca |
|
|
|
1353ca |
As a side effect, this means fsck will also complain about
|
|
|
1353ca |
.gitmodules files that cannot be parsed (or were larger than
|
|
|
1353ca |
core.bigFileThreshold).
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
fsck.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
|
1353ca |
1 file changed, 60 insertions(+), 1 deletion(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/fsck.c b/fsck.c
|
|
|
1353ca |
index f0a566a..d366ad8 100644
|
|
|
1353ca |
--- a/fsck.c
|
|
|
1353ca |
+++ b/fsck.c
|
|
|
1353ca |
@@ -10,6 +10,9 @@
|
|
|
1353ca |
#include "utf8.h"
|
|
|
1353ca |
#include "sha1-array.h"
|
|
|
1353ca |
#include "hashmap.h"
|
|
|
1353ca |
+#include "submodule-config.h"
|
|
|
1353ca |
+
|
|
|
1353ca |
+#define CONFIG_ORIGIN_BLOB 0
|
|
|
1353ca |
|
|
|
1353ca |
struct oidhash_entry {
|
|
|
1353ca |
struct hashmap_entry ent;
|
|
|
1353ca |
@@ -89,6 +92,8 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
|
|
|
1353ca |
FUNC(ZERO_PADDED_DATE, ERROR) \
|
|
|
1353ca |
FUNC(GITMODULES_MISSING, ERROR) \
|
|
|
1353ca |
FUNC(GITMODULES_BLOB, ERROR) \
|
|
|
1353ca |
+ FUNC(GITMODULES_PARSE, ERROR) \
|
|
|
1353ca |
+ FUNC(GITMODULES_NAME, ERROR) \
|
|
|
1353ca |
/* warnings */ \
|
|
|
1353ca |
FUNC(BAD_FILEMODE, WARN) \
|
|
|
1353ca |
FUNC(EMPTY_NAME, WARN) \
|
|
|
1353ca |
@@ -838,10 +843,64 @@ static int fsck_tag(struct tag *tag, const char *data,
|
|
|
1353ca |
return fsck_tag_buffer(tag, data, size, options);
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
+struct fsck_gitmodules_data {
|
|
|
1353ca |
+ struct object *obj;
|
|
|
1353ca |
+ struct fsck_options *options;
|
|
|
1353ca |
+ int ret;
|
|
|
1353ca |
+};
|
|
|
1353ca |
+
|
|
|
1353ca |
+static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
|
|
|
1353ca |
+{
|
|
|
1353ca |
+ struct fsck_gitmodules_data *data = vdata;
|
|
|
1353ca |
+ const char *subsection, *key;
|
|
|
1353ca |
+ int subsection_len;
|
|
|
1353ca |
+ char *name;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
|
|
|
1353ca |
+ !subsection)
|
|
|
1353ca |
+ return 0;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ name = xmemdupz(subsection, subsection_len);
|
|
|
1353ca |
+ if (check_submodule_name(name) < 0)
|
|
|
1353ca |
+ data->ret |= report(data->options, data->obj,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_NAME,
|
|
|
1353ca |
+ "disallowed submodule name: %s",
|
|
|
1353ca |
+ name);
|
|
|
1353ca |
+ free(name);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ return 0;
|
|
|
1353ca |
+}
|
|
|
1353ca |
+
|
|
|
1353ca |
static int fsck_blob(struct blob *blob, const char *buf,
|
|
|
1353ca |
unsigned long size, struct fsck_options *options)
|
|
|
1353ca |
{
|
|
|
1353ca |
- return 0;
|
|
|
1353ca |
+ struct fsck_gitmodules_data data;
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!oidhash_contains(&gitmodules_found, &blob->object.oid))
|
|
|
1353ca |
+ return 0;
|
|
|
1353ca |
+ oidhash_insert(&gitmodules_done, &blob->object.oid);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (!buf) {
|
|
|
1353ca |
+ /*
|
|
|
1353ca |
+ * A missing buffer here is a sign that the caller found the
|
|
|
1353ca |
+ * blob too gigantic to load into memory. Let's just consider
|
|
|
1353ca |
+ * that an error.
|
|
|
1353ca |
+ */
|
|
|
1353ca |
+ return report(options, &blob->object,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_PARSE,
|
|
|
1353ca |
+ ".gitmodules too large to parse");
|
|
|
1353ca |
+ }
|
|
|
1353ca |
+
|
|
|
1353ca |
+ data.obj = &blob->object;
|
|
|
1353ca |
+ data.options = options;
|
|
|
1353ca |
+ data.ret = 0;
|
|
|
1353ca |
+ if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
|
|
|
1353ca |
+ ".gitmodules", buf, size, &data))
|
|
|
1353ca |
+ data.ret |= report(options, &blob->object,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_PARSE,
|
|
|
1353ca |
+ "could not parse gitmodules blob");
|
|
|
1353ca |
+
|
|
|
1353ca |
+ return data.ret;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
int fsck_object(struct object *obj, void *data, unsigned long size,
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 3f7875be4a5b68058e243a0525d75165620738cb Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Wed, 2 May 2018 17:20:35 -0400
|
|
|
1353ca |
Subject: [PATCH 08/11] fsck: call fsck_finish() after fscking objects
|
|
|
1353ca |
|
|
|
1353ca |
commit 1995b5e03e1cc97116be58cdc0502d4a23547856 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
Now that the internal fsck code is capable of checking
|
|
|
1353ca |
.gitmodules files, we just need to teach its callers to use
|
|
|
1353ca |
the "finish" function to check any queued objects.
|
|
|
1353ca |
|
|
|
1353ca |
With this, we can now catch the malicious case in t7415 with
|
|
|
1353ca |
git-fsck.
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
builtin/fsck.c | 3 +++
|
|
|
1353ca |
t/t7415-submodule-names.sh | 4 ++++
|
|
|
1353ca |
2 files changed, 7 insertions(+)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/fsck.c b/builtin/fsck.c
|
|
|
1353ca |
index 61ab1cd..70f620b 100644
|
|
|
1353ca |
--- a/builtin/fsck.c
|
|
|
1353ca |
+++ b/builtin/fsck.c
|
|
|
1353ca |
@@ -660,6 +660,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
|
|
|
1353ca |
count += p->num_objects;
|
|
|
1353ca |
}
|
|
|
1353ca |
stop_progress(&progress);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (fsck_finish(&fsck_obj_options))
|
|
|
1353ca |
+ errors_found |= ERROR_OBJECT;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
heads = 0;
|
|
|
1353ca |
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
|
|
|
1353ca |
index b30fa33..5d61627 100755
|
|
|
1353ca |
--- a/t/t7415-submodule-names.sh
|
|
|
1353ca |
+++ b/t/t7415-submodule-names.sh
|
|
|
1353ca |
@@ -73,4 +73,8 @@ test_expect_success 'clone evil superproject' '
|
|
|
1353ca |
! grep "RUNNING POST CHECKOUT" output
|
|
|
1353ca |
'
|
|
|
1353ca |
|
|
|
1353ca |
+test_expect_success 'fsck detects evil superproject' '
|
|
|
1353ca |
+ test_must_fail git fsck
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
test_done
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 563989871514b892c2c9a4f4fd2ef515dd6adaee Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Fri, 4 May 2018 19:40:08 -0400
|
|
|
1353ca |
Subject: [PATCH 09/11] unpack-objects: call fsck_finish() after fscking
|
|
|
1353ca |
objects
|
|
|
1353ca |
|
|
|
1353ca |
commit 6e328d6caef218db320978e3e251009135d87d0e upstream.
|
|
|
1353ca |
|
|
|
1353ca |
As with the previous commit, we must call fsck's "finish"
|
|
|
1353ca |
function in order to catch any queued objects for
|
|
|
1353ca |
.gitmodules checks.
|
|
|
1353ca |
|
|
|
1353ca |
This second pass will be able to access any incoming
|
|
|
1353ca |
objects, because we will have exploded them to loose objects
|
|
|
1353ca |
by now.
|
|
|
1353ca |
|
|
|
1353ca |
This isn't quite ideal, because it means that bad objects
|
|
|
1353ca |
may have been written to the object database (and a
|
|
|
1353ca |
subsequent operation could then reference them, even if the
|
|
|
1353ca |
other side doesn't send the objects again). However, this is
|
|
|
1353ca |
sufficient when used with receive.fsckObjects, since those
|
|
|
1353ca |
loose objects will all be placed in a temporary quarantine
|
|
|
1353ca |
area that will get wiped if we find any problems.
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
builtin/unpack-objects.c | 5 ++++-
|
|
|
1353ca |
t/t7415-submodule-names.sh | 7 +++++++
|
|
|
1353ca |
2 files changed, 11 insertions(+), 1 deletion(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
|
|
|
1353ca |
index 7018829..27ba86e 100644
|
|
|
1353ca |
--- a/builtin/unpack-objects.c
|
|
|
1353ca |
+++ b/builtin/unpack-objects.c
|
|
|
1353ca |
@@ -560,8 +560,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
|
|
|
1353ca |
unpack_all();
|
|
|
1353ca |
git_SHA1_Update(&ctx, buffer, offset);
|
|
|
1353ca |
git_SHA1_Final(sha1, &ctx;;
|
|
|
1353ca |
- if (strict)
|
|
|
1353ca |
+ if (strict) {
|
|
|
1353ca |
write_rest();
|
|
|
1353ca |
+ if (fsck_finish(&fsck_options))
|
|
|
1353ca |
+ die(_("fsck error in pack objects"));
|
|
|
1353ca |
+ }
|
|
|
1353ca |
if (hashcmp(fill(20), sha1))
|
|
|
1353ca |
die("final sha1 did not match");
|
|
|
1353ca |
use(20);
|
|
|
1353ca |
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
|
|
|
1353ca |
index 5d61627..a09087c 100755
|
|
|
1353ca |
--- a/t/t7415-submodule-names.sh
|
|
|
1353ca |
+++ b/t/t7415-submodule-names.sh
|
|
|
1353ca |
@@ -77,4 +77,11 @@ test_expect_success 'fsck detects evil superproject' '
|
|
|
1353ca |
test_must_fail git fsck
|
|
|
1353ca |
'
|
|
|
1353ca |
|
|
|
1353ca |
+test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
|
|
|
1353ca |
+ rm -rf dst.git &&
|
|
|
1353ca |
+ git init --bare dst.git &&
|
|
|
1353ca |
+ git -C dst.git config transfer.fsckObjects true &&
|
|
|
1353ca |
+ test_must_fail git push dst.git HEAD
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
test_done
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From 0e3a3c30f0ba16f3e3fd4581cea36b2d2d4d204a Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Fri, 4 May 2018 19:45:01 -0400
|
|
|
1353ca |
Subject: [PATCH 10/11] index-pack: check .gitmodules files with --strict
|
|
|
1353ca |
|
|
|
1353ca |
commit 73c3f0f704a91b6792e0199a3f3ab6e3a1971675 upstream.
|
|
|
1353ca |
|
|
|
1353ca |
Now that the internal fsck code has all of the plumbing we
|
|
|
1353ca |
need, we can start checking incoming .gitmodules files.
|
|
|
1353ca |
Naively, it seems like we would just need to add a call to
|
|
|
1353ca |
fsck_finish() after we've processed all of the objects. And
|
|
|
1353ca |
that would be enough to cover the initial test included
|
|
|
1353ca |
here. But there are two extra bits:
|
|
|
1353ca |
|
|
|
1353ca |
1. We currently don't bother calling fsck_object() at all
|
|
|
1353ca |
for blobs, since it has traditionally been a noop. We'd
|
|
|
1353ca |
actually catch these blobs in fsck_finish() at the end,
|
|
|
1353ca |
but it's more efficient to check them when we already
|
|
|
1353ca |
have the object loaded in memory.
|
|
|
1353ca |
|
|
|
1353ca |
2. The second pass done by fsck_finish() needs to access
|
|
|
1353ca |
the objects, but we're actually indexing the pack in
|
|
|
1353ca |
this process. In theory we could give the fsck code a
|
|
|
1353ca |
special callback for accessing the in-pack data, but
|
|
|
1353ca |
it's actually quite tricky:
|
|
|
1353ca |
|
|
|
1353ca |
a. We don't have an internal efficient index mapping
|
|
|
1353ca |
oids to packfile offsets. We only generate it on
|
|
|
1353ca |
the fly as part of writing out the .idx file.
|
|
|
1353ca |
|
|
|
1353ca |
b. We'd still have to reconstruct deltas, which means
|
|
|
1353ca |
we'd basically have to replicate all of the
|
|
|
1353ca |
reading logic in packfile.c.
|
|
|
1353ca |
|
|
|
1353ca |
Instead, let's avoid running fsck_finish() until after
|
|
|
1353ca |
we've written out the .idx file, and then just add it
|
|
|
1353ca |
to our internal packed_git list.
|
|
|
1353ca |
|
|
|
1353ca |
This does mean that the objects are "in the repository"
|
|
|
1353ca |
before we finish our fsck checks. But unpack-objects
|
|
|
1353ca |
already exhibits this same behavior, and it's an
|
|
|
1353ca |
acceptable tradeoff here for the same reason: the
|
|
|
1353ca |
quarantine mechanism means that pushes will be
|
|
|
1353ca |
fully protected.
|
|
|
1353ca |
|
|
|
1353ca |
In addition to a basic push test in t7415, we add a sneaky
|
|
|
1353ca |
pack that reverses the usual object order in the pack,
|
|
|
1353ca |
requiring that index-pack access the tree and blob during
|
|
|
1353ca |
the "finish" step.
|
|
|
1353ca |
|
|
|
1353ca |
This already works for unpack-objects (since it will have
|
|
|
1353ca |
written out loose objects), but we'll check it with this
|
|
|
1353ca |
sneaky pack for good measure.
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
builtin/index-pack.c | 10 ++++++++++
|
|
|
1353ca |
t/lib-pack.sh | 12 ++++++++++++
|
|
|
1353ca |
t/t7415-submodule-names.sh | 38 ++++++++++++++++++++++++++++++++++++++
|
|
|
1353ca |
3 files changed, 60 insertions(+)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
|
|
|
1353ca |
index 81f3bc7..f53bf22 100644
|
|
|
1353ca |
--- a/builtin/index-pack.c
|
|
|
1353ca |
+++ b/builtin/index-pack.c
|
|
|
1353ca |
@@ -825,6 +825,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
|
|
|
1353ca |
blob->object.flags |= FLAG_CHECKED;
|
|
|
1353ca |
else
|
|
|
1353ca |
die(_("invalid blob object %s"), sha1_to_hex(sha1));
|
|
|
1353ca |
+ if (do_fsck_object &&
|
|
|
1353ca |
+ fsck_object(&blob->object, (void *)data, size, &fsck_options))
|
|
|
1353ca |
+ die(_("fsck error in packed object"));
|
|
|
1353ca |
} else {
|
|
|
1353ca |
struct object *obj;
|
|
|
1353ca |
int eaten;
|
|
|
1353ca |
@@ -1440,6 +1443,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
|
|
|
1353ca |
} else
|
|
|
1353ca |
chmod(final_index_name, 0444);
|
|
|
1353ca |
|
|
|
1353ca |
+ if (do_fsck_object)
|
|
|
1353ca |
+ add_packed_git(final_index_name, strlen(final_index_name), 0);
|
|
|
1353ca |
+
|
|
|
1353ca |
if (!from_stdin) {
|
|
|
1353ca |
printf("%s\n", sha1_to_hex(sha1));
|
|
|
1353ca |
} else {
|
|
|
1353ca |
@@ -1775,6 +1781,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
|
|
|
1353ca |
pack_sha1);
|
|
|
1353ca |
else
|
|
|
1353ca |
close(input_fd);
|
|
|
1353ca |
+
|
|
|
1353ca |
+ if (do_fsck_object && fsck_finish(&fsck_options))
|
|
|
1353ca |
+ die(_("fsck error in pack objects"));
|
|
|
1353ca |
+
|
|
|
1353ca |
free(objects);
|
|
|
1353ca |
strbuf_release(&index_name_buf);
|
|
|
1353ca |
strbuf_release(&keep_name_buf);
|
|
|
1353ca |
diff --git a/t/lib-pack.sh b/t/lib-pack.sh
|
|
|
1353ca |
index 7509846..4674899 100644
|
|
|
1353ca |
--- a/t/lib-pack.sh
|
|
|
1353ca |
+++ b/t/lib-pack.sh
|
|
|
1353ca |
@@ -79,6 +79,18 @@ pack_obj () {
|
|
|
1353ca |
;;
|
|
|
1353ca |
esac
|
|
|
1353ca |
|
|
|
1353ca |
+ # If it's not a delta, we can convince pack-objects to generate a pack
|
|
|
1353ca |
+ # with just our entry, and then strip off the header (12 bytes) and
|
|
|
1353ca |
+ # trailer (20 bytes).
|
|
|
1353ca |
+ if test -z "$2"
|
|
|
1353ca |
+ then
|
|
|
1353ca |
+ echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
|
|
|
1353ca |
+ size=$(wc -c
|
|
|
1353ca |
+ dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
|
|
|
1353ca |
+ rm -f pack_obj.tmp
|
|
|
1353ca |
+ return
|
|
|
1353ca |
+ fi
|
|
|
1353ca |
+
|
|
|
1353ca |
echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
|
|
|
1353ca |
return 1
|
|
|
1353ca |
}
|
|
|
1353ca |
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
|
|
|
1353ca |
index a09087c..fca7091 100755
|
|
|
1353ca |
--- a/t/t7415-submodule-names.sh
|
|
|
1353ca |
+++ b/t/t7415-submodule-names.sh
|
|
|
1353ca |
@@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a
|
|
|
1353ca |
real-world setup that confirms we catch this in practice.
|
|
|
1353ca |
'
|
|
|
1353ca |
. ./test-lib.sh
|
|
|
1353ca |
+. "$TEST_DIRECTORY"/lib-pack.sh
|
|
|
1353ca |
|
|
|
1353ca |
test_expect_success 'check names' '
|
|
|
1353ca |
cat >expect <<-\EOF &&
|
|
|
1353ca |
@@ -84,4 +85,41 @@ test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
|
|
|
1353ca |
test_must_fail git push dst.git HEAD
|
|
|
1353ca |
'
|
|
|
1353ca |
|
|
|
1353ca |
+test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
|
|
|
1353ca |
+ rm -rf dst.git &&
|
|
|
1353ca |
+ git init --bare dst.git &&
|
|
|
1353ca |
+ git -C dst.git config transfer.fsckObjects true &&
|
|
|
1353ca |
+ git -C dst.git config transfer.unpackLimit 1 &&
|
|
|
1353ca |
+ test_must_fail git push dst.git HEAD
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
+# Normally our packs contain commits followed by trees followed by blobs. This
|
|
|
1353ca |
+# reverses the order, which requires backtracking to find the context of a
|
|
|
1353ca |
+# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
|
|
|
1353ca |
+test_expect_success 'create oddly ordered pack' '
|
|
|
1353ca |
+ git checkout --orphan odd &&
|
|
|
1353ca |
+ git rm -rf --cached . &&
|
|
|
1353ca |
+ git add .gitmodules &&
|
|
|
1353ca |
+ git commit -m odd &&
|
|
|
1353ca |
+ {
|
|
|
1353ca |
+ pack_header 3 &&
|
|
|
1353ca |
+ pack_obj $(git rev-parse HEAD:.gitmodules) &&
|
|
|
1353ca |
+ pack_obj $(git rev-parse HEAD^{tree}) &&
|
|
|
1353ca |
+ pack_obj $(git rev-parse HEAD)
|
|
|
1353ca |
+ } >odd.pack &&
|
|
|
1353ca |
+ pack_trailer odd.pack
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
+test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
|
|
|
1353ca |
+ rm -rf dst.git &&
|
|
|
1353ca |
+ git init --bare dst.git &&
|
|
|
1353ca |
+ test_must_fail git -C dst.git unpack-objects --strict
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
+test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
|
|
|
1353ca |
+ rm -rf dst.git &&
|
|
|
1353ca |
+ git init --bare dst.git &&
|
|
|
1353ca |
+ test_must_fail git -C dst.git index-pack --strict --stdin
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
test_done
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|
|
|
1353ca |
|
|
|
1353ca |
From b3790cb51efbba6c393471feb21e9f9d9ec86377 Mon Sep 17 00:00:00 2001
|
|
|
1353ca |
From: Jeff King <peff@peff.net>
|
|
|
1353ca |
Date: Fri, 4 May 2018 20:03:35 -0400
|
|
|
1353ca |
Subject: [PATCH 11/11] fsck: complain when .gitmodules is a symlink
|
|
|
1353ca |
|
|
|
1353ca |
commit b7b1fca175f1ed7933f361028c631b9ac86d868d upstream.
|
|
|
1353ca |
|
|
|
1353ca |
We've recently forbidden .gitmodules to be a symlink in
|
|
|
1353ca |
verify_path(). And it's an easy way to circumvent our fsck
|
|
|
1353ca |
checks for .gitmodules content. So let's complain when we
|
|
|
1353ca |
see it.
|
|
|
1353ca |
|
|
|
1353ca |
Signed-off-by: Jeff King <peff@peff.net>
|
|
|
1353ca |
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
|
|
|
1353ca |
---
|
|
|
1353ca |
fsck.c | 14 ++++++++++----
|
|
|
1353ca |
t/t7415-submodule-names.sh | 29 +++++++++++++++++++++++++++++
|
|
|
1353ca |
2 files changed, 39 insertions(+), 4 deletions(-)
|
|
|
1353ca |
|
|
|
1353ca |
diff --git a/fsck.c b/fsck.c
|
|
|
1353ca |
index d366ad8..79694b0 100644
|
|
|
1353ca |
--- a/fsck.c
|
|
|
1353ca |
+++ b/fsck.c
|
|
|
1353ca |
@@ -94,6 +94,7 @@ static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
|
|
|
1353ca |
FUNC(GITMODULES_BLOB, ERROR) \
|
|
|
1353ca |
FUNC(GITMODULES_PARSE, ERROR) \
|
|
|
1353ca |
FUNC(GITMODULES_NAME, ERROR) \
|
|
|
1353ca |
+ FUNC(GITMODULES_SYMLINK, ERROR) \
|
|
|
1353ca |
/* warnings */ \
|
|
|
1353ca |
FUNC(BAD_FILEMODE, WARN) \
|
|
|
1353ca |
FUNC(EMPTY_NAME, WARN) \
|
|
|
1353ca |
@@ -472,7 +473,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
|
|
|
1353ca |
|
|
|
1353ca |
static int fsck_tree(struct tree *item, struct fsck_options *options)
|
|
|
1353ca |
{
|
|
|
1353ca |
- int retval;
|
|
|
1353ca |
+ int retval = 0;
|
|
|
1353ca |
int has_null_sha1 = 0;
|
|
|
1353ca |
int has_full_path = 0;
|
|
|
1353ca |
int has_empty_name = 0;
|
|
|
1353ca |
@@ -507,8 +508,14 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
|
|
|
1353ca |
has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
|
|
|
1353ca |
has_zero_pad |= *(char *)desc.buffer == '0';
|
|
|
1353ca |
|
|
|
1353ca |
- if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
|
|
|
1353ca |
- oidhash_insert(&gitmodules_found, oid);
|
|
|
1353ca |
+ if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
|
|
|
1353ca |
+ if (!S_ISLNK(mode))
|
|
|
1353ca |
+ oidhash_insert(&gitmodules_found, oid);
|
|
|
1353ca |
+ else
|
|
|
1353ca |
+ retval += report(options, &item->object,
|
|
|
1353ca |
+ FSCK_MSG_GITMODULES_SYMLINK,
|
|
|
1353ca |
+ ".gitmodules is a symbolic link");
|
|
|
1353ca |
+ }
|
|
|
1353ca |
|
|
|
1353ca |
update_tree_entry(&desc);
|
|
|
1353ca |
|
|
|
1353ca |
@@ -551,7 +558,6 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
|
|
|
1353ca |
o_name = name;
|
|
|
1353ca |
}
|
|
|
1353ca |
|
|
|
1353ca |
- retval = 0;
|
|
|
1353ca |
if (has_null_sha1)
|
|
|
1353ca |
retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
|
|
|
1353ca |
if (has_full_path)
|
|
|
1353ca |
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
|
|
|
1353ca |
index fca7091..6ee7115 100755
|
|
|
1353ca |
--- a/t/t7415-submodule-names.sh
|
|
|
1353ca |
+++ b/t/t7415-submodule-names.sh
|
|
|
1353ca |
@@ -122,4 +122,33 @@ test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
|
|
|
1353ca |
test_must_fail git -C dst.git index-pack --strict --stdin
|
|
|
1353ca |
'
|
|
|
1353ca |
|
|
|
1353ca |
+test_expect_success 'fsck detects symlinked .gitmodules file' '
|
|
|
1353ca |
+ git init symlink &&
|
|
|
1353ca |
+ (
|
|
|
1353ca |
+ cd symlink &&
|
|
|
1353ca |
+
|
|
|
1353ca |
+ # Make the tree directly to avoid index restrictions.
|
|
|
1353ca |
+ #
|
|
|
1353ca |
+ # Because symlinks store the target as a blob, choose
|
|
|
1353ca |
+ # a pathname that could be parsed as a .gitmodules file
|
|
|
1353ca |
+ # to trick naive non-symlink-aware checking.
|
|
|
1353ca |
+ tricky="[foo]bar=true" &&
|
|
|
1353ca |
+ content=$(git hash-object -w ../.gitmodules) &&
|
|
|
1353ca |
+ target=$(printf "$tricky" | git hash-object -w --stdin) &&
|
|
|
1353ca |
+ tree=$(
|
|
|
1353ca |
+ {
|
|
|
1353ca |
+ printf "100644 blob $content\t$tricky\n" &&
|
|
|
1353ca |
+ printf "120000 blob $target\t.gitmodules\n"
|
|
|
1353ca |
+ } | git mktree
|
|
|
1353ca |
+ ) &&
|
|
|
1353ca |
+ commit=$(git commit-tree $tree) &&
|
|
|
1353ca |
+
|
|
|
1353ca |
+ # Check not only that we fail, but that it is due to the
|
|
|
1353ca |
+ # symlink detector; this grep string comes from the config
|
|
|
1353ca |
+ # variable name and will not be translated.
|
|
|
1353ca |
+ test_must_fail git fsck 2>output &&
|
|
|
1353ca |
+ grep gitmodulesSymlink output
|
|
|
1353ca |
+ )
|
|
|
1353ca |
+'
|
|
|
1353ca |
+
|
|
|
1353ca |
test_done
|
|
|
1353ca |
--
|
|
|
1353ca |
2.14.4
|
|
|
1353ca |
|