Blob Blame History Raw
From 17f3a3cfacbc2a8c212062f3bbb1b03d668189c0 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 30 Apr 2018 03:25:25 -0400
Subject: [PATCH 01/11] submodule-config: verify submodule names as paths

Submodule "names" come from the untrusted .gitmodules file,
but we blindly append them to $GIT_DIR/modules to create our
on-disk repo paths. This means you can do bad things by
putting "../" into the name (among other things).

Let's sanity-check these names to avoid building a path that
can be exploited. There are two main decisions:

  1. What should the allowed syntax be?

     It's tempting to reuse verify_path(), since submodule
     names typically come from in-repo paths. But there are
     two reasons not to:

       a. It's technically more strict than what we need, as
          we really care only about breaking out of the
          $GIT_DIR/modules/ hierarchy.  E.g., having a
          submodule named "foo/.git" isn't actually
          dangerous, and it's possible that somebody has
          manually given such a funny name.

       b. Since we'll eventually use this checking logic in
          fsck to prevent downstream repositories, it should
          be consistent across platforms. Because
          verify_path() relies on is_dir_sep(), it wouldn't
          block "foo\..\bar" on a non-Windows machine.

  2. Where should we enforce it? These days most of the
     .gitmodules reads go through submodule-config.c, so
     I've put it there in the reading step. That should
     cover all of the C code.

     We also construct the name for "git submodule add"
     inside the git-submodule.sh script. This is probably
     not a big deal for security since the name is coming
     from the user anyway, but it would be polite to remind
     them if the name they pick is invalid (and we need to
     expose the name-checker to the shell anyway for our
     test scripts).

     This patch issues a warning when reading .gitmodules
     and just ignores the related config entry completely.
     This will generally end up producing a sensible error,
     as it works the same as a .gitmodules file which is
     missing a submodule entry (so "submodule update" will
     barf, but "git clone --recurse-submodules" will print
     an error but not abort the clone.

     There is one minor oddity, which is that we print the
     warning once per malformed config key (since that's how
     the config subsystem gives us the entries). So in the
     new test, for example, the user would see three
     warnings. That's OK, since the intent is that this case
     should never come up outside of malicious repositories
     (and then it might even benefit the user to see the
     message multiple times).

Credit for finding this vulnerability and the proof of
concept from which the test script was adapted goes to
Etienne Stalmans.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/submodule--helper.c | 27 +++++++++++++++-
 git-submodule.sh            |  5 +++
 submodule-config.c          | 31 ++++++++++++++++++
 submodule-config.h          |  8 +++++
 t/t7415-submodule-names.sh  | 76 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100755 t/t7415-submodule-names.sh

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 926d205..5883e6c 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -842,6 +842,30 @@ static int resolve_relative_path(int argc, const char **argv, const char *prefix
 	return 0;
 }
 
+/*
+ * Exit non-zero if any of the submodule names given on the command line is
+ * invalid. If no names are given, filter stdin to print only valid names
+ * (which is primarily intended for testing).
+ */
+static int check_name(int argc, const char **argv, const char *prefix)
+{
+	if (argc > 1) {
+		while (*++argv) {
+			if (check_submodule_name(*argv) < 0)
+				return 1;
+		}
+	} else {
+		struct strbuf buf = STRBUF_INIT;
+		while (strbuf_getline(&buf, stdin) != EOF) {
+			if (!check_submodule_name(buf.buf))
+				printf("%s\n", buf.buf);
+		}
+		strbuf_release(&buf);
+	}
+	return 0;
+}
+
+
 struct cmd_struct {
 	const char *cmd;
 	int (*fn)(int, const char **, const char *);
@@ -855,7 +879,8 @@ static struct cmd_struct commands[] = {
 	{"relative-path", resolve_relative_path},
 	{"resolve-relative-url", resolve_relative_url},
 	{"resolve-relative-url-test", resolve_relative_url_test},
-	{"init", module_init}
+	{"init", module_init},
+	{"check-name", check_name}
 };
 
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
diff --git a/git-submodule.sh b/git-submodule.sh
index 78fdac9..d82da42 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -225,6 +225,11 @@ Use -f if you really want to add it." >&2
 		sm_name="$sm_path"
 	fi
 
+	if ! git submodule--helper check-name "$sm_name"
+	then
+		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
+	fi
+
 	# perhaps the path exists and is already a git repo, else clone it
 	if test -e "$sm_path"
 	then
diff --git a/submodule-config.c b/submodule-config.c
index 93dd364..dbba74a 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -162,6 +162,31 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache,
 	return NULL;
 }
 
+int check_submodule_name(const char *name)
+{
+	/* Disallow empty names */
+	if (!*name)
+		return -1;
+
+	/*
+	 * Look for '..' as a path component. Check both '/' and '\\' as
+	 * separators rather than is_dir_sep(), because we want the name rules
+	 * to be consistent across platforms.
+	 */
+	goto in_component; /* always start inside component */
+	while (*name) {
+		char c = *name++;
+		if (c == '/' || c == '\\') {
+in_component:
+			if (name[0] == '.' && name[1] == '.' &&
+			    (!name[2] || name[2] == '/' || name[2] == '\\'))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
 static int name_and_item_from_var(const char *var, struct strbuf *name,
 				  struct strbuf *item)
 {
@@ -173,6 +198,12 @@ static int name_and_item_from_var(const char *var, struct strbuf *name,
 		return 0;
 
 	strbuf_add(name, subsection, subsection_len);
+	if (check_submodule_name(name->buf) < 0) {
+		warning(_("ignoring suspicious submodule name: %s"), name->buf);
+		strbuf_release(name);
+		return 0;
+	}
+
 	strbuf_addstr(item, key);
 
 	return 1;
diff --git a/submodule-config.h b/submodule-config.h
index e4857f5..6ed7a3c 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -29,4 +29,12 @@ const struct submodule *submodule_from_path(const unsigned char *commit_sha1,
 		const char *path);
 void submodule_free(void);
 
+/*
+ * Returns 0 if the name is syntactically acceptable as a submodule "name"
+ * (e.g., that may be found in the subsection of a .gitmodules file) and -1
+ * otherwise.
+ */
+int check_submodule_name(const char *name);
+
+
 #endif /* SUBMODULE_CONFIG_H */
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
new file mode 100755
index 0000000..75fa071
--- /dev/null
+++ b/t/t7415-submodule-names.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+test_description='check handling of .. in submodule names
+
+Exercise the name-checking function on a variety of names, and then give a
+real-world setup that confirms we catch this in practice.
+'
+. ./test-lib.sh
+
+test_expect_success 'check names' '
+	cat >expect <<-\EOF &&
+	valid
+	valid/with/paths
+	EOF
+
+	git submodule--helper check-name >actual <<-\EOF &&
+	valid
+	valid/with/paths
+
+	../foo
+	/../foo
+	..\foo
+	\..\foo
+	foo/..
+	foo/../
+	foo\..
+	foo\..\
+	foo/../bar
+	EOF
+
+	test_cmp expect actual
+'
+
+test_expect_success 'create innocent subrepo' '
+	git init innocent &&
+	git -C innocent commit --allow-empty -m foo
+'
+
+test_expect_success 'submodule add refuses invalid names' '
+	test_must_fail \
+		git submodule add --name ../../modules/evil "$PWD/innocent" evil
+'
+
+test_expect_success 'add evil submodule' '
+	git submodule add "$PWD/innocent" evil &&
+
+	mkdir modules &&
+	cp -r .git/modules/evil modules &&
+	write_script modules/evil/hooks/post-checkout <<-\EOF &&
+	echo >&2 "RUNNING POST CHECKOUT"
+	EOF
+
+	git config -f .gitmodules submodule.evil.update checkout &&
+	git config -f .gitmodules --rename-section \
+		submodule.evil submodule.../../modules/evil &&
+	git add modules &&
+	git commit -am evil
+'
+
+# This step seems like it shouldn't be necessary, since the payload is
+# contained entirely in the evil submodule. But due to the vagaries of the
+# submodule code, checking out the evil module will fail unless ".git/modules"
+# exists. Adding another submodule (with a name that sorts before "evil") is an
+# easy way to make sure this is the case in the victim clone.
+test_expect_success 'add other submodule' '
+	git submodule add "$PWD/innocent" another-module &&
+	git add another-module &&
+	git commit -am another
+'
+
+test_expect_success 'clone evil superproject' '
+	git clone --recurse-submodules . victim >output 2>&1 &&
+	! grep "RUNNING POST CHECKOUT" output
+'
+
+test_done
-- 
2.14.4


From 42410051db39f2009980ba293ff0f4f1755df06a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 13 May 2018 12:09:42 -0400
Subject: [PATCH 02/11] is_ntfs_dotgit: use a size_t for traversing string

We walk through the "name" string using an int, which can
wrap to a negative value and cause us to read random memory
before our array (e.g., by creating a tree with a name >2GB,
since "int" is still 32 bits even on most 64-bit platforms).
Worse, this is easy to trigger during the fsck_tree() check,
which is supposed to be protecting us from malicious
garbage.

Note one bit of trickiness in the existing code: we
sometimes assign -1 to "len" at the end of the loop, and
then rely on the "len++" in the for-loop's increment to take
it back to 0. This is still legal with a size_t, since
assigning -1 will turn into SIZE_MAX, which then wraps
around to 0 on increment.

Signed-off-by: Jeff King <peff@peff.net>
---
 path.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/path.c b/path.c
index 0778ff0..3f41416 100644
--- a/path.c
+++ b/path.c
@@ -1205,7 +1205,7 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip)
 
 int is_ntfs_dotgit(const char *name)
 {
-	int len;
+	size_t len;
 
 	for (len = 0; ; len++)
 		if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) {
-- 
2.14.4


From e63ce01d9a387c60754349508352f3bca086bbec Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 15:23:45 -0400
Subject: [PATCH 03/11] is_hfs_dotgit: match other .git files

Both verify_path() and fsck match ".git", ".GIT", and other
variants specific to HFS+. Let's allow matching other
special files like ".gitmodules", which we'll later use to
enforce extra restrictions via verify_path() and fsck.

Signed-off-by: Jeff King <peff@peff.net>
---
 utf8.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++------------
 utf8.h |  5 +++++
 2 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/utf8.c b/utf8.c
index 00e10c8..0d406b1 100644
--- a/utf8.c
+++ b/utf8.c
@@ -605,28 +605,33 @@ static ucs_char_t next_hfs_char(const char **in)
 	}
 }
 
-int is_hfs_dotgit(const char *path)
+static int is_hfs_dot_generic(const char *path,
+			      const char *needle, size_t needle_len)
 {
 	ucs_char_t c;
 
 	c = next_hfs_char(&path);
 	if (c != '.')
 		return 0;
-	c = next_hfs_char(&path);
 
 	/*
 	 * there's a great deal of other case-folding that occurs
-	 * in HFS+, but this is enough to catch anything that will
-	 * convert to ".git"
+	 * in HFS+, but this is enough to catch our fairly vanilla
+	 * hard-coded needles.
 	 */
-	if (c != 'g' && c != 'G')
-		return 0;
-	c = next_hfs_char(&path);
-	if (c != 'i' && c != 'I')
-		return 0;
-	c = next_hfs_char(&path);
-	if (c != 't' && c != 'T')
-		return 0;
+	for (; needle_len > 0; needle++, needle_len--) {
+		c = next_hfs_char(&path);
+
+		/*
+		 * We know our needles contain only ASCII, so we clamp here to
+		 * make the results of tolower() sane.
+		 */
+		if (c > 127)
+			return 0;
+		if (tolower(c) != *needle)
+			return 0;
+	}
+
 	c = next_hfs_char(&path);
 	if (c && !is_dir_sep(c))
 		return 0;
@@ -634,6 +639,35 @@ int is_hfs_dotgit(const char *path)
 	return 1;
 }
 
+/*
+ * Inline wrapper to make sure the compiler resolves strlen() on literals at
+ * compile time.
+ */
+static inline int is_hfs_dot_str(const char *path, const char *needle)
+{
+	return is_hfs_dot_generic(path, needle, strlen(needle));
+}
+
+int is_hfs_dotgit(const char *path)
+{
+	return is_hfs_dot_str(path, "git");
+}
+
+int is_hfs_dotgitmodules(const char *path)
+{
+	return is_hfs_dot_str(path, "gitmodules");
+}
+
+int is_hfs_dotgitignore(const char *path)
+{
+	return is_hfs_dot_str(path, "gitignore");
+}
+
+int is_hfs_dotgitattributes(const char *path)
+{
+	return is_hfs_dot_str(path, "gitattributes");
+}
+
 const char utf8_bom[] = "\357\273\277";
 
 int skip_utf8_bom(char **text, size_t len)
diff --git a/utf8.h b/utf8.h
index 6bbcf31..da19b43 100644
--- a/utf8.h
+++ b/utf8.h
@@ -52,8 +52,13 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding);
  * The path should be NUL-terminated, but we will match variants of both ".git\0"
  * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck
  * and verify_path().
+ *
+ * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo".
  */
 int is_hfs_dotgit(const char *path);
+int is_hfs_dotgitmodules(const char *path);
+int is_hfs_dotgitignore(const char *path);
+int is_hfs_dotgitattributes(const char *path);
 
 typedef enum {
 	ALIGN_LEFT,
-- 
2.14.4


From c914f82f17ce42ea03ae1754f263c600e4faa7f3 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 11 May 2018 16:03:54 +0200
Subject: [PATCH 04/11] is_ntfs_dotgit: match other .git files

When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.

However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.

It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.

For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).

We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.

To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.

These can be recomputed by running the following Perl script:

-- snip --
use warnings;
use strict;

sub compute_short_name_hash ($) {
        my $checksum = 0;
        foreach (split('', $_[0])) {
                $checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
        }

        $checksum = ($checksum * 314159269) & 0xffffffff;
        $checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
        $checksum -= (($checksum * 1152921497) >> 60) * 1000000007;

        return scalar reverse sprintf("%x", $checksum & 0xffff);
}

print compute_short_name_hash($ARGV[0]);
-- snap --

E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
---
 cache.h | 10 +++++++-
 path.c  | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 17afd25..fbbe933 100644
--- a/cache.h
+++ b/cache.h
@@ -1032,7 +1032,15 @@ int normalize_path_copy(char *dst, const char *src);
 int longest_ancestor_length(const char *path, struct string_list *prefixes);
 char *strip_path_suffix(const char *path, const char *suffix);
 int daemon_avoid_alias(const char *path);
-extern int is_ntfs_dotgit(const char *name);
+
+/*
+ * These functions match their is_hfs_dotgit() counterparts; see utf8.h for
+ * details.
+ */
+int is_ntfs_dotgit(const char *name);
+int is_ntfs_dotgitmodules(const char *name);
+int is_ntfs_dotgitignore(const char *name);
+int is_ntfs_dotgitattributes(const char *name);
 
 /*
  * Returns true iff "str" could be confused as a command-line option when
diff --git a/path.c b/path.c
index 3f41416..34d95a4 100644
--- a/path.c
+++ b/path.c
@@ -1222,6 +1222,90 @@ int is_ntfs_dotgit(const char *name)
 		}
 }
 
+static int is_ntfs_dot_generic(const char *name,
+			       const char *dotgit_name,
+			       size_t len,
+			       const char *dotgit_ntfs_shortname_prefix)
+{
+	int saw_tilde;
+	size_t i;
+
+	if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) {
+		i = len + 1;
+only_spaces_and_periods:
+		for (;;) {
+			char c = name[i++];
+			if (!c)
+				return 1;
+			if (c != ' ' && c != '.')
+				return 0;
+		}
+	}
+
+	/*
+	 * Is it a regular NTFS short name, i.e. shortened to 6 characters,
+	 * followed by ~1, ... ~4?
+	 */
+	if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' &&
+	    name[7] >= '1' && name[7] <= '4') {
+		i = 8;
+		goto only_spaces_and_periods;
+	}
+
+	/*
+	 * Is it a fall-back NTFS short name (for details, see
+	 * https://en.wikipedia.org/wiki/8.3_filename?
+	 */
+	for (i = 0, saw_tilde = 0; i < 8; i++)
+		if (name[i] == '\0')
+			return 0;
+		else if (saw_tilde) {
+			if (name[i] < '0' || name[i] > '9')
+				return 0;
+		} else if (name[i] == '~') {
+			if (name[++i] < '1' || name[i] > '9')
+				return 0;
+			saw_tilde = 1;
+		} else if (i >= 6)
+			return 0;
+		else if (name[i] < 0) {
+			/*
+			 * We know our needles contain only ASCII, so we clamp
+			 * here to make the results of tolower() sane.
+			 */
+			return 0;
+		} else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i])
+			return 0;
+
+	goto only_spaces_and_periods;
+}
+
+/*
+ * Inline helper to make sure compiler resolves strlen() on literals at
+ * compile time.
+ */
+static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name,
+				  const char *dotgit_ntfs_shortname_prefix)
+{
+	return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name),
+				   dotgit_ntfs_shortname_prefix);
+}
+
+int is_ntfs_dotgitmodules(const char *name)
+{
+	return is_ntfs_dot_str(name, "gitmodules", "gi7eba");
+}
+
+int is_ntfs_dotgitignore(const char *name)
+{
+	return is_ntfs_dot_str(name, "gitignore", "gi250a");
+}
+
+int is_ntfs_dotgitattributes(const char *name)
+{
+	return is_ntfs_dot_str(name, "gitattributes", "gi7d29");
+}
+
 int looks_like_command_line_option(const char *str)
 {
 	return str && str[0] == '-';
-- 
2.14.4


From 4c8e72db9864e150f163d1b9da7b0e4e15f49585 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 12 May 2018 22:16:51 +0200
Subject: [PATCH 05/11] is_{hfs,ntfs}_dotgitmodules: add tests

This tests primarily for NTFS issues, but also adds one example of an
HFS+ issue.

Thanks go to Congyi Wu for coming up with the list of examples where
NTFS would possibly equate the filename with `.gitmodules`.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
---
 t/helper/test-path-utils.c | 20 +++++++++++
 t/t0060-path-utils.sh      | 86 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c
index ba805b3..e75b146 100644
--- a/t/helper/test-path-utils.c
+++ b/t/helper/test-path-utils.c
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "string-list.h"
+#include "utf8.h"
 
 /*
  * A "string_list_each_func_t" function that normalizes an entry from
@@ -156,6 +157,11 @@ static struct test_data dirname_data[] = {
 	{ NULL,              NULL     }
 };
 
+static int is_dotgitmodules(const char *path)
+{
+	return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path);
+}
+
 int main(int argc, char **argv)
 {
 	if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) {
@@ -256,6 +262,20 @@ int main(int argc, char **argv)
 	if (argc == 2 && !strcmp(argv[1], "dirname"))
 		return test_function(dirname_data, dirname, argv[1]);
 
+	if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) {
+		int res = 0, expect = 1, i;
+		for (i = 2; i < argc; i++)
+			if (!strcmp("--not", argv[i]))
+				expect = !expect;
+			else if (expect != is_dotgitmodules(argv[i]))
+				res = error("'%s' is %s.gitmodules", argv[i],
+					    expect ? "not " : "");
+			else
+				fprintf(stderr, "ok: '%s' is %s.gitmodules\n",
+					argv[i], expect ? "" : "not ");
+		return !!res;
+	}
+
 	fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
 		argv[1] ? argv[1] : "(there was none)");
 	return 1;
diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index bf2deee..a1c9bbd 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -344,4 +344,90 @@ test_submodule_relative_url "(null)" "ssh://hostname:22/repo" "../subrepo" "ssh:
 test_submodule_relative_url "(null)" "user@host:path/to/repo" "../subrepo" "user@host:path/to/subrepo"
 test_submodule_relative_url "(null)" "user@host:repo" "../subrepo" "user@host:subrepo"
 
+test_expect_success 'match .gitmodules' '
+	test-path-utils is_dotgitmodules \
+		.gitmodules \
+		\
+		.git${u200c}modules \
+		\
+		.Gitmodules \
+		.gitmoduleS \
+		\
+		".gitmodules " \
+		".gitmodules." \
+		".gitmodules  " \
+		".gitmodules. " \
+		".gitmodules ." \
+		".gitmodules.." \
+		".gitmodules   " \
+		".gitmodules.  " \
+		".gitmodules . " \
+		".gitmodules  ." \
+		\
+		".Gitmodules " \
+		".Gitmodules." \
+		".Gitmodules  " \
+		".Gitmodules. " \
+		".Gitmodules ." \
+		".Gitmodules.." \
+		".Gitmodules   " \
+		".Gitmodules.  " \
+		".Gitmodules . " \
+		".Gitmodules  ." \
+		\
+		GITMOD~1 \
+		gitmod~1 \
+		GITMOD~2 \
+		gitmod~3 \
+		GITMOD~4 \
+		\
+		"GITMOD~1 " \
+		"gitmod~2." \
+		"GITMOD~3  " \
+		"gitmod~4. " \
+		"GITMOD~1 ." \
+		"gitmod~2   " \
+		"GITMOD~3.  " \
+		"gitmod~4 . " \
+		\
+		GI7EBA~1 \
+		gi7eba~9 \
+		\
+		GI7EB~10 \
+		GI7EB~11 \
+		GI7EB~99 \
+		GI7EB~10 \
+		GI7E~100 \
+		GI7E~101 \
+		GI7E~999 \
+		~1000000 \
+		~9999999 \
+		\
+		--not \
+		".gitmodules x"  \
+		".gitmodules .x" \
+		\
+		" .gitmodules" \
+		\
+		..gitmodules \
+		\
+		gitmodules \
+		\
+		.gitmodule \
+		\
+		".gitmodules x " \
+		".gitmodules .x" \
+		\
+		GI7EBA~ \
+		GI7EBA~0 \
+		GI7EBA~~1 \
+		GI7EBA~X \
+		Gx7EBA~1 \
+		GI7EBX~1 \
+		\
+		GI7EB~1 \
+		GI7EB~01 \
+		GI7EB~1X
+'
+
 test_done
-- 
2.14.4


From f57dd60d921e31b78e42ad9f91c333ab19138deb Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 13 May 2018 12:57:14 -0400
Subject: [PATCH 06/11] skip_prefix: add case-insensitive variant

We have the convenient skip_prefix() helper, but if you want
to do case-insensitive matching, you're stuck doing it by
hand. We could add an extra parameter to the function to
let callers ask for this, but the function is small and
somewhat performance-critical. Let's just re-implement it
for the case-insensitive version.

Signed-off-by: Jeff King <peff@peff.net>
---
 git-compat-util.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/git-compat-util.h b/git-compat-util.h
index 49d4029..7ad6e59 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -909,6 +909,23 @@ static inline int sane_iscase(int x, int is_lower)
 		return (x & 0x20) == 0;
 }
 
+/*
+ * Like skip_prefix, but compare case-insensitively. Note that the comparison
+ * is done via tolower(), so it is strictly ASCII (no multi-byte characters or
+ * locale-specific conversions).
+ */
+static inline int skip_iprefix(const char *str, const char *prefix,
+			       const char **out)
+{
+	do {
+		if (!*prefix) {
+			*out = str;
+			return 1;
+		}
+	} while (tolower(*str++) == tolower(*prefix++));
+	return 0;
+}
+
 static inline int strtoul_ui(char const *s, int base, unsigned int *result)
 {
 	unsigned long ul;
-- 
2.14.4


From da0dec01dc5fb1d43732ad997d30bed853025a6c Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 13 May 2018 13:00:23 -0400
Subject: [PATCH 07/11] verify_path: drop clever fallthrough

We check ".git" and ".." in the same switch statement, and
fall through the cases to share the end-of-component check.
While this saves us a line or two, it makes modifying the
function much harder. Let's just write it out.

Signed-off-by: Jeff King <peff@peff.net>
---
 read-cache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index db27766..b8a2a06 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -787,8 +787,7 @@ static int verify_dotfile(const char *rest)
 
 	switch (*rest) {
 	/*
-	 * ".git" followed by  NUL or slash is bad. This
-	 * shares the path end test with the ".." case.
+	 * ".git" followed by NUL or slash is bad.
 	 */
 	case 'g':
 	case 'G':
@@ -796,8 +795,9 @@ static int verify_dotfile(const char *rest)
 			break;
 		if (rest[2] != 't' && rest[2] != 'T')
 			break;
-		rest += 2;
-	/* fallthrough */
+		if (rest[3] == '\0' || is_dir_sep(rest[3]))
+			return 0;
+		break;
 	case '.':
 		if (rest[1] == '\0' || is_dir_sep(rest[1]))
 			return 0;
-- 
2.14.4


From 392a029f659dc735a445ca16349b98c0c63bcee6 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 15 May 2018 09:56:50 -0400
Subject: [PATCH 08/11] verify_dotfile: mention case-insensitivity in comment

We're more restrictive than we need to be in matching ".GIT"
on case-sensitive filesystems; let's make a note that this
is intentional.

Signed-off-by: Jeff King <peff@peff.net>
---
 read-cache.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index b8a2a06..88ed49c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -787,7 +787,10 @@ static int verify_dotfile(const char *rest)
 
 	switch (*rest) {
 	/*
-	 * ".git" followed by NUL or slash is bad.
+	 * ".git" followed by NUL or slash is bad. Note that we match
+	 * case-insensitively here, even if ignore_case is not set.
+	 * This outlaws ".GIT" everywhere out of an abundance of caution,
+	 * since there's really no good reason to allow it.
 	 */
 	case 'g':
 	case 'G':
-- 
2.14.4


From 95a1ce8bd49ad78d9266af969fa6a9b5476ad118 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 14 May 2018 11:00:56 -0400
Subject: [PATCH 09/11] update-index: stat updated files earlier

In the update_one(), we check verify_path() on the proposed
path before doing anything else. In preparation for having
verify_path() look at the file mode, let's stat the file
earlier, so we can check the mode accurately.

This is made a bit trickier by the fact that this function
only does an lstat in a few code paths (the ones that flow
down through process_path()). So we can speculatively do the
lstat() here and pass the results down, and just use a dummy
mode for cases where we won't actually be updating the index
from the filesystem.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/update-index.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index b8b8522..0c5be84 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -354,10 +354,9 @@ static int process_directory(const char *path, int len, struct stat *st)
 	return error("%s: is a directory - add files inside instead", path);
 }
 
-static int process_path(const char *path)
+static int process_path(const char *path, struct stat *st, int stat_errno)
 {
 	int pos, len;
-	struct stat st;
 	const struct cache_entry *ce;
 
 	len = strlen(path);
@@ -381,13 +380,13 @@ static int process_path(const char *path)
 	 * First things first: get the stat information, to decide
 	 * what to do about the pathname!
 	 */
-	if (lstat(path, &st) < 0)
-		return process_lstat_error(path, errno);
+	if (stat_errno)
+		return process_lstat_error(path, stat_errno);
 
-	if (S_ISDIR(st.st_mode))
-		return process_directory(path, len, &st);
+	if (S_ISDIR(st->st_mode))
+		return process_directory(path, len, st);
 
-	return add_one_path(ce, path, len, &st);
+	return add_one_path(ce, path, len, st);
 }
 
 static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
@@ -451,6 +450,16 @@ static void chmod_path(int flip, const char *path)
 
 static void update_one(const char *path)
 {
+	int stat_errno = 0;
+	struct stat st;
+
+	if (mark_valid_only || mark_skip_worktree_only || force_remove)
+		st.st_mode = 0;
+	else if (lstat(path, &st) < 0) {
+		st.st_mode = 0;
+		stat_errno = errno;
+	} /* else stat is valid */
+
 	if (!verify_path(path)) {
 		fprintf(stderr, "Ignoring path %s\n", path);
 		return;
@@ -472,7 +481,7 @@ static void update_one(const char *path)
 		report("remove '%s'", path);
 		return;
 	}
-	if (process_path(path))
+	if (process_path(path, &st, stat_errno))
 		die("Unable to process path %s", path);
 	report("add '%s'", path);
 }
-- 
2.14.4


From 1d5af177a0853d286069176b11f661ed0e069faa Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 4 May 2018 20:03:35 -0400
Subject: [PATCH 10/11] verify_path: disallow symlinks in .gitmodules

There are a few reasons it's not a good idea to make
.gitmodules a symlink, including:

  1. It won't be portable to systems without symlinks.

  2. It may behave inconsistently, since Git may look at
     this file in the index or a tree without bothering to
     resolve any symbolic links. We don't do this _yet_, but
     the config infrastructure is there and it's planned for
     the future.

With some clever code, we could make (2) work. And some
people may not care about (1) if they only work on one
platform. But there are a few security reasons to simply
disallow it:

  a. A symlinked .gitmodules file may circumvent any fsck
     checks of the content.

  b. Git may read and write from the on-disk file without
     sanity checking the symlink target. So for example, if
     you link ".gitmodules" to "../oops" and run "git
     submodule add", we'll write to the file "oops" outside
     the repository.

Again, both of those are problems that _could_ be solved
with sufficient code, but given the complications in (1) and
(2), we're better off just outlawing it explicitly.

Note the slightly tricky call to verify_path() in
update-index's update_one(). There we may not have a mode if
we're not updating from the filesystem (e.g., we might just
be removing the file). Passing "0" as the mode there works
fine; since it's not a symlink, we'll just skip the extra
checks.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/apply.c        |  4 ++--
 builtin/update-index.c |  6 +++---
 cache.h                |  2 +-
 read-cache.c           | 40 +++++++++++++++++++++++++++++++---------
 4 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index c770d7d..8602b98 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3683,9 +3683,9 @@ static void die_on_unsafe_path(struct patch *patch)
 	if (!patch->is_delete)
 		new_name = patch->new_name;
 
-	if (old_name && !verify_path(old_name))
+	if (old_name && !verify_path(old_name, patch->old_mode))
 		die(_("invalid path '%s'"), old_name);
-	if (new_name && !verify_path(new_name))
+	if (new_name && !verify_path(new_name, patch->new_mode))
 		die(_("invalid path '%s'"), new_name);
 }
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 0c5be84..c54bada 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -395,7 +395,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
 	int size, len, option;
 	struct cache_entry *ce;
 
-	if (!verify_path(path))
+	if (!verify_path(path, mode))
 		return error("Invalid path '%s'", path);
 
 	len = strlen(path);
@@ -460,7 +460,7 @@ static void update_one(const char *path)
 		stat_errno = errno;
 	} /* else stat is valid */
 
-	if (!verify_path(path)) {
+	if (!verify_path(path, st.st_mode)) {
 		fprintf(stderr, "Ignoring path %s\n", path);
 		return;
 	}
@@ -550,7 +550,7 @@ static void read_index_info(int nul_term_line)
 			path_name = uq.buf;
 		}
 
-		if (!verify_path(path_name)) {
+		if (!verify_path(path_name, mode)) {
 			fprintf(stderr, "Ignoring path %s\n", path_name);
 			continue;
 		}
diff --git a/cache.h b/cache.h
index fbbe933..61d43c6 100644
--- a/cache.h
+++ b/cache.h
@@ -560,7 +560,7 @@ extern int read_index_unmerged(struct index_state *);
 extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags);
 extern int discard_index(struct index_state *);
 extern int unmerged_index(const struct index_state *);
-extern int verify_path(const char *path);
+extern int verify_path(const char *path, unsigned mode);
 extern int index_dir_exists(struct index_state *istate, const char *name, int namelen);
 extern void adjust_dirname_case(struct index_state *istate, char *name);
 extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
diff --git a/read-cache.c b/read-cache.c
index 88ed49c..d6dedd5 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -738,7 +738,7 @@ struct cache_entry *make_cache_entry(unsigned int mode,
 	int size, len;
 	struct cache_entry *ce, *ret;
 
-	if (!verify_path(path)) {
+	if (!verify_path(path, mode)) {
 		error("Invalid path '%s'", path);
 		return NULL;
 	}
@@ -773,7 +773,7 @@ int ce_same_name(const struct cache_entry *a, const struct cache_entry *b)
  * Also, we don't want double slashes or slashes at the
  * end that can make pathnames ambiguous.
  */
-static int verify_dotfile(const char *rest)
+static int verify_dotfile(const char *rest, unsigned mode)
 {
 	/*
 	 * The first character was '.', but that
@@ -791,6 +791,9 @@ static int verify_dotfile(const char *rest)
 	 * case-insensitively here, even if ignore_case is not set.
 	 * This outlaws ".GIT" everywhere out of an abundance of caution,
 	 * since there's really no good reason to allow it.
+	 *
+	 * Once we've seen ".git", we can also find ".gitmodules", etc (also
+	 * case-insensitively).
 	 */
 	case 'g':
 	case 'G':
@@ -800,6 +803,12 @@ static int verify_dotfile(const char *rest)
 			break;
 		if (rest[3] == '\0' || is_dir_sep(rest[3]))
 			return 0;
+		if (S_ISLNK(mode)) {
+			rest += 3;
+			if (skip_iprefix(rest, "modules", &rest) &&
+			    (*rest == '\0' || is_dir_sep(*rest)))
+				return 0;
+		}
 		break;
 	case '.':
 		if (rest[1] == '\0' || is_dir_sep(rest[1]))
@@ -808,7 +817,7 @@ static int verify_dotfile(const char *rest)
 	return 1;
 }
 
-int verify_path(const char *path)
+int verify_path(const char *path, unsigned mode)
 {
 	char c;
 
@@ -821,12 +830,25 @@ int verify_path(const char *path)
 			return 1;
 		if (is_dir_sep(c)) {
 inside:
-			if (protect_hfs && is_hfs_dotgit(path))
-				return 0;
-			if (protect_ntfs && is_ntfs_dotgit(path))
-				return 0;
+			if (protect_hfs) {
+				if (is_hfs_dotgit(path))
+					return 0;
+				if (S_ISLNK(mode)) {
+					if (is_hfs_dotgitmodules(path))
+						return 0;
+				}
+			}
+			if (protect_ntfs) {
+				if (is_ntfs_dotgit(path))
+					return 0;
+				if (S_ISLNK(mode)) {
+					if (is_ntfs_dotgitmodules(path))
+						return 0;
+				}
+			}
+
 			c = *path++;
-			if ((c == '.' && !verify_dotfile(path)) ||
+			if ((c == '.' && !verify_dotfile(path, mode)) ||
 			    is_dir_sep(c) || c == '\0')
 				return 0;
 		}
@@ -1008,7 +1030,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 
 	if (!ok_to_add)
 		return -1;
-	if (!verify_path(ce->name))
+	if (!verify_path(ce->name, ce->ce_mode))
 		return error("Invalid path '%s'", ce->name);
 
 	if (!skip_df_check &&
-- 
2.14.4


From ab1977d8fdcf7b259e1a4ede2f99423dca97ea41 Mon Sep 17 00:00:00 2001
From: Sebastian Kisela <skisela@redhat.com>
Date: Wed, 20 Jun 2018 15:03:18 +0200
Subject: [PATCH 11/11] submodules: make t7415 test accept fail exit code

The command that performs the cloning fails with non zero exit
code, but the exit code itself is not important.
The command may fail but the general output must be as expected.
---
 t/t7415-submodule-names.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
index 75fa071..b30fa33 100755
--- a/t/t7415-submodule-names.sh
+++ b/t/t7415-submodule-names.sh
@@ -69,7 +69,7 @@ test_expect_success 'add other submodule' '
 '
 
 test_expect_success 'clone evil superproject' '
-	git clone --recurse-submodules . victim >output 2>&1 &&
+	test_might_fail git clone --recurse-submodules . victim >output 2>&1 &&
 	! grep "RUNNING POST CHECKOUT" output
 '
 
-- 
2.14.4