Blob Blame History Raw
From 692455ed04743fbf2c102ae4bcaa0ac7bcc433a0 Mon Sep 17 00:00:00 2001
From: Christian Couder <chriscool@tuxfamily.org>
Date: Sun, 1 Dec 2013 08:49:16 +0100
Subject: [PATCH 01/11] strbuf: introduce starts_with() and ends_with()

prefixcmp() and suffixcmp() share the common "cmp" suffix that
typically are used to name functions that can be used for ordering,
but they can't, because they are not antisymmetric:

        prefixcmp("foo", "foobar") < 0
        prefixcmp("foobar", "foo") == 0

We in fact do not use these functions for ordering.  Replace them
with functions that just check for equality.

Add starts_with() and end_with() that will be used to replace
prefixcmp() and suffixcmp(), respectively, as the first step.  These
are named after corresponding functions/methods in programming
languages, like Java, Python and Ruby.

In vcs-svn/fast_export.c, there was already an ends_with() function
that did the same thing. Let's use the new one instead while at it.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-compat-util.h     |  2 ++
 strbuf.c              | 18 ++++++++++++++++++
 vcs-svn/fast_export.c | 11 +----------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index bd62ab07a8..c4f0afc104 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -341,7 +341,9 @@ extern void set_die_routine(NORETURN_PTR void (*routine)(const char *err, va_lis
 extern void set_error_routine(void (*routine)(const char *err, va_list params));
 extern void set_die_is_recursing_routine(int (*routine)(void));
 
+extern int starts_with(const char *str, const char *prefix);
 extern int prefixcmp(const char *str, const char *prefix);
+extern int ends_with(const char *str, const char *suffix);
 extern int suffixcmp(const char *str, const char *suffix);
 
 static inline const char *skip_prefix(const char *str, const char *prefix)
diff --git a/strbuf.c b/strbuf.c
index 1170d01c43..83caf4a914 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -1,6 +1,15 @@
 #include "cache.h"
 #include "refs.h"
 
+int starts_with(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 1;
+		else if (*str != *prefix)
+			return 0;
+}
+
 int prefixcmp(const char *str, const char *prefix)
 {
 	for (; ; str++, prefix++)
@@ -10,6 +19,15 @@ int prefixcmp(const char *str, const char *prefix)
 			return (unsigned char)*prefix - (unsigned char)*str;
 }
 
+int ends_with(const char *str, const char *suffix)
+{
+	int len = strlen(str), suflen = strlen(suffix);
+	if (len < suflen)
+		return 0;
+	else
+		return !strcmp(str + len - suflen, suffix);
+}
+
 int suffixcmp(const char *str, const char *suffix)
 {
 	int len = strlen(str), suflen = strlen(suffix);
diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index f2b23c81de..bd0f2c2b86 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -162,22 +162,13 @@ static void die_short_read(struct line_buffer *input)
 	die("invalid dump: unexpected end of file");
 }
 
-static int ends_with(const char *s, size_t len, const char *suffix)
-{
-	const size_t suffixlen = strlen(suffix);
-	if (len < suffixlen)
-		return 0;
-	return !memcmp(s + len - suffixlen, suffix, suffixlen);
-}
-
 static int parse_cat_response_line(const char *header, off_t *len)
 {
-	size_t headerlen = strlen(header);
 	uintmax_t n;
 	const char *type;
 	const char *end;
 
-	if (ends_with(header, headerlen, " missing"))
+	if (ends_with(header, " missing"))
 		return error("cat-blob reports missing blob: %s", header);
 	type = strstr(header, " blob ");
 	if (!type)
-- 
2.26.2


From 6a8afa912b4d2fe5169fbb904537d1a50a3eb66f Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Thu, 12 Mar 2020 01:31:11 -0400
Subject: [PATCH 02/11] credential: detect unrepresentable values when parsing
 urls

The credential protocol can't represent newlines in values, but URLs can
embed percent-encoded newlines in various components. A previous commit
taught the low-level writing routines to die() when encountering this,
but we can be a little friendlier to the user by detecting them earlier
and handling them gracefully.

This patch teaches credential_from_url() to notice such components,
issue a warning, and blank the credential (which will generally result
in prompting the user for a username and password). We blank the whole
credential in this case. Another option would be to blank only the
invalid component. However, we're probably better off not feeding a
partially-parsed URL result to a credential helper. We don't know how a
given helper would handle it, so we're better off to err on the side of
matching nothing rather than something unexpected.

The die() call in credential_write() is _probably_ impossible to reach
after this patch. Values should end up in credential structs only by URL
parsing (which is covered here), or by reading credential protocol input
(which by definition cannot read a newline into a value). But we should
definitely keep the low-level check, as it's our final and most accurate
line of defense against protocol injection attacks. Arguably it could
become a BUG(), but it probably doesn't matter much either way.

Note that the public interface of credential_from_url() grows a little
more than we need here. We'll use the extra flexibility in a future
patch to help fsck catch these cases.
---
 credential.c           | 36 ++++++++++++++++++++++++++++++++++--
 credential.h           | 16 ++++++++++++++++
 t/t0300-credentials.sh | 12 ++++++++++--
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/credential.c b/credential.c
index 096cc5efe9..f56a0a2c02 100644
--- a/credential.c
+++ b/credential.c
@@ -317,7 +317,22 @@ void credential_reject(struct credential *c)
 	c->approved = 0;
 }
 
-void credential_from_url(struct credential *c, const char *url)
+static int check_url_component(const char *url, int quiet,
+			       const char *name, const char *value)
+{
+	if (!value)
+		return 0;
+	if (!strchr(value, '\n'))
+		return 0;
+
+	if (!quiet)
+		warning(_("url contains a newline in its %s component: %s"),
+			name, url);
+	return -1;
+}
+
+int credential_from_url_gently(struct credential *c, const char *url,
+			       int quiet)
 {
 	const char *at, *colon, *cp, *slash, *host, *proto_end;
 
@@ -331,7 +346,7 @@ void credential_from_url(struct credential *c, const char *url)
 	 */
 	proto_end = strstr(url, "://");
 	if (!proto_end)
-		return;
+		return 0;
 	cp = proto_end + 3;
 	at = strchr(cp, '@');
 	colon = strchr(cp, ':');
@@ -366,4 +381,21 @@ void credential_from_url(struct credential *c, const char *url)
 		while (p > c->path && *p == '/')
 			*p-- = '\0';
 	}
+
+	if (check_url_component(url, quiet, "username", c->username) < 0 ||
+	    check_url_component(url, quiet, "password", c->password) < 0 ||
+	    check_url_component(url, quiet, "protocol", c->protocol) < 0 ||
+	    check_url_component(url, quiet, "host", c->host) < 0 ||
+	    check_url_component(url, quiet, "path", c->path) < 0)
+		return -1;
+
+	return 0;
+}
+
+void credential_from_url(struct credential *c, const char *url)
+{
+	if (credential_from_url_gently(c, url, 0) < 0) {
+		warning(_("skipping credential lookup for url: %s"), url);
+		credential_clear(c);
+	}
 }
diff --git a/credential.h b/credential.h
index 0c3e85e8e4..2846e47cc8 100644
--- a/credential.h
+++ b/credential.h
@@ -27,7 +27,23 @@ void credential_reject(struct credential *);
 
 int credential_read(struct credential *, FILE *);
 void credential_write(const struct credential *, FILE *);
+
+/*
+ * Parse a url into a credential struct, replacing any existing contents.
+ *
+ * Ifthe url can't be parsed (e.g., a missing "proto://" component), the
+ * resulting credential will be empty but we'll still return success from the
+ * "gently" form.
+ *
+ * If we encounter a component which cannot be represented as a credential
+ * value (e.g., because it contains a newline), the "gently" form will return
+ * an error but leave the broken state in the credential object for further
+ * examination.  The non-gentle form will issue a warning to stderr and return
+ * an empty credential.
+ */
 void credential_from_url(struct credential *, const char *url);
+int credential_from_url_gently(struct credential *, const char *url, int quiet);
+
 int credential_match(const struct credential *have,
 		     const struct credential *want);
 
diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh
index acc167c774..88bcf6534b 100755
--- a/t/t0300-credentials.sh
+++ b/t/t0300-credentials.sh
@@ -289,9 +289,17 @@ test_expect_success 'http paths can be part of context' '
 	EOF
 '
 
-test_expect_success 'url parser rejects embedded newlines' '
-	test_must_fail git credential fill <<-\EOF
+test_expect_success 'url parser ignores embedded newlines' '
+	check fill <<-EOF
 	url=https://one.example.com?%0ahost=two.example.com/
+	--
+	username=askpass-username
+	password=askpass-password
+	--
+	warning: url contains a newline in its host component: https://one.example.com?%0ahost=two.example.com/
+	warning: skipping credential lookup for url: https://one.example.com?%0ahost=two.example.com/
+	askpass: Username:
+	askpass: Password:
 	EOF
 '
 
-- 
2.26.2


From 3b80da45e70d60a4abc91acc45d23d18092708a4 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 11 Mar 2020 18:48:24 -0400
Subject: [PATCH 03/11] fsck: detect gitmodules URLs with embedded newlines

The credential protocol can't handle values with newlines. We already
detect and block any such URLs from being used with credential helpers,
but let's also add an fsck check to detect and block gitmodules files
with such URLs. That will let us notice the problem earlier when
transfer.fsckObjects is turned on. And in particular it will prevent bad
objects from spreading, which may protect downstream users running older
versions of Git.

We'll file this under the existing gitmodulesUrl flag, which covers URLs
with option injection. There's really no need to distinguish the exact
flaw in the URL in this context. Likewise, I've expanded the description
of t7416 to cover all types of bogus URLs.
---
 fsck.c                        | 16 +++++++++++++++-
 t/t7416-submodule-dash-url.sh | 18 +++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fsck.c b/fsck.c
index 1ed7172ee6..b72faea0c2 100644
--- a/fsck.c
+++ b/fsck.c
@@ -8,6 +8,7 @@
 #include "fsck.h"
 #include "hashmap.h"
 #include "submodule.h"
+#include "credential.h"
 
 struct oidhash_entry {
 	struct hashmap_entry ent;
@@ -419,6 +420,19 @@ static int fsck_tag(struct tag *tag, const char *data,
 	return 0;
 }
 
+static int check_submodule_url(const char *url)
+{
+	struct credential c = CREDENTIAL_INIT;
+	int ret;
+
+	if (looks_like_command_line_option(url))
+		return -1;
+
+	ret = credential_from_url_gently(&c, url, 1);
+	credential_clear(&c);
+	return ret;
+}
+
 struct fsck_gitmodules_data {
 	struct object *obj;
 	fsck_error error_func;
@@ -442,7 +456,7 @@ static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
 					      "disallowed submodule name: %s",
 					      name);
 	if (!strcmp(key, "url") && value &&
-	    looks_like_command_line_option(value))
+	    check_submodule_url(value) < 0)
 		data->ret += data->error_func(data->obj, FSCK_ERROR,
 					      "disallowed submodule url: %s",
 					      value);
diff --git a/t/t7416-submodule-dash-url.sh b/t/t7416-submodule-dash-url.sh
index e85f2e9d29..5356d8766a 100755
--- a/t/t7416-submodule-dash-url.sh
+++ b/t/t7416-submodule-dash-url.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-test_description='check handling of .gitmodule url with dash'
+test_description='check handling of disallowed .gitmodule urls'
 . ./test-lib.sh
 
 test_expect_success 'create submodule with protected dash in url' '
@@ -46,4 +46,20 @@ test_expect_success 'fsck rejects unprotected dash' '
 	test_i18ngrep "disallowed submodule url" err
 '
 
+test_expect_success 'fsck rejects embedded newline in url' '
+	# create an orphan branch to avoid existing .gitmodules objects
+	git checkout --orphan newline &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+	url = "https://one.example.com?%0ahost=two.example.com/foo.git"
+	EOF
+	git add .gitmodules &&
+	git commit -m "gitmodules with newline" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
 test_done
-- 
2.26.2


From a44bd3335d9dd715e2d141a593704aababcc83e4 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sat, 18 Apr 2020 20:47:30 -0700
Subject: [PATCH 04/11] t0300: use more realistic inputs

Many of the tests in t0300 give partial inputs to git-credential,
omitting a protocol or hostname. We're checking only high-level things
like whether and how helpers are invoked at all, and we don't care about
specific hosts. However, in preparation for tightening up the rules
about when we're willing to run a helper, let's start using input that's
a bit more realistic: pretend as if http://example.com is being
examined.

This shouldn't change the point of any of the tests, but do note we have
to adjust the expected output to accommodate this (filling a credential
will repeat back the protocol/host fields to stdout, and the helper
debug messages and askpass prompt will change on stderr).

Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 t/t0300-credentials.sh | 76 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 3 deletions(-)

diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh
index 88bcf6534b..2f0ff6bcae 100755
--- a/t/t0300-credentials.sh
+++ b/t/t0300-credentials.sh
@@ -35,43 +35,71 @@ test_expect_success 'setup helper scripts' '
 
 test_expect_success 'credential_fill invokes helper' '
 	check fill "verbatim foo bar" <<-\EOF
+	protocol=http
+	host=example.com
 	--
+	protocol=http
+	host=example.com
 	username=foo
 	password=bar
 	--
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	EOF
 '
 
 test_expect_success 'credential_fill invokes multiple helpers' '
 	check fill useless "verbatim foo bar" <<-\EOF
+	protocol=http
+	host=example.com
 	--
+	protocol=http
+	host=example.com
 	username=foo
 	password=bar
 	--
 	useless: get
+	useless: protocol=http
+	useless: host=example.com
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	EOF
 '
 
 test_expect_success 'credential_fill stops when we get a full response' '
 	check fill "verbatim one two" "verbatim three four" <<-\EOF
+	protocol=http
+	host=example.com
 	--
+	protocol=http
+	host=example.com
 	username=one
 	password=two
 	--
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	EOF
 '
 
 test_expect_success 'credential_fill continues through partial response' '
 	check fill "verbatim one \"\"" "verbatim two three" <<-\EOF
+	protocol=http
+	host=example.com
 	--
+	protocol=http
+	host=example.com
 	username=two
 	password=three
 	--
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	verbatim: username=one
 	EOF
 '
@@ -97,14 +125,20 @@ test_expect_success 'credential_fill passes along metadata' '
 
 test_expect_success 'credential_approve calls all helpers' '
 	check approve useless "verbatim one two" <<-\EOF
+	protocol=http
+	host=example.com
 	username=foo
 	password=bar
 	--
 	--
 	useless: store
+	useless: protocol=http
+	useless: host=example.com
 	useless: username=foo
 	useless: password=bar
 	verbatim: store
+	verbatim: protocol=http
+	verbatim: host=example.com
 	verbatim: username=foo
 	verbatim: password=bar
 	EOF
@@ -112,6 +146,8 @@ test_expect_success 'credential_approve calls all helpers' '
 
 test_expect_success 'do not bother storing password-less credential' '
 	check approve useless <<-\EOF
+	protocol=http
+	host=example.com
 	username=foo
 	--
 	--
@@ -121,14 +157,20 @@ test_expect_success 'do not bother storing password-less credential' '
 
 test_expect_success 'credential_reject calls all helpers' '
 	check reject useless "verbatim one two" <<-\EOF
+	protocol=http
+	host=example.com
 	username=foo
 	password=bar
 	--
 	--
 	useless: erase
+	useless: protocol=http
+	useless: host=example.com
 	useless: username=foo
 	useless: password=bar
 	verbatim: erase
+	verbatim: protocol=http
+	verbatim: host=example.com
 	verbatim: username=foo
 	verbatim: password=bar
 	EOF
@@ -136,33 +178,49 @@ test_expect_success 'credential_reject calls all helpers' '
 
 test_expect_success 'usernames can be preserved' '
 	check fill "verbatim \"\" three" <<-\EOF
+	protocol=http
+	host=example.com
 	username=one
 	--
+	protocol=http
+	host=example.com
 	username=one
 	password=three
 	--
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	verbatim: username=one
 	EOF
 '
 
 test_expect_success 'usernames can be overridden' '
 	check fill "verbatim two three" <<-\EOF
+	protocol=http
+	host=example.com
 	username=one
 	--
+	protocol=http
+	host=example.com
 	username=two
 	password=three
 	--
 	verbatim: get
+	verbatim: protocol=http
+	verbatim: host=example.com
 	verbatim: username=one
 	EOF
 '
 
 test_expect_success 'do not bother completing already-full credential' '
 	check fill "verbatim three four" <<-\EOF
+	protocol=http
+	host=example.com
 	username=one
 	password=two
 	--
+	protocol=http
+	host=example.com
 	username=one
 	password=two
 	--
@@ -174,23 +232,31 @@ test_expect_success 'do not bother completing already-full credential' '
 # askpass helper is run, we know the internal getpass is working.
 test_expect_success 'empty helper list falls back to internal getpass' '
 	check fill <<-\EOF
+	protocol=http
+	host=example.com
 	--
+	protocol=http
+	host=example.com
 	username=askpass-username
 	password=askpass-password
 	--
-	askpass: Username:
-	askpass: Password:
+	askpass: Username for '\''http://example.com'\'':
+	askpass: Password for '\''http://askpass-username@example.com'\'':
 	EOF
 '
 
 test_expect_success 'internal getpass does not ask for known username' '
 	check fill <<-\EOF
+	protocol=http
+	host=example.com
 	username=foo
 	--
+	protocol=http
+	host=example.com
 	username=foo
 	password=askpass-password
 	--
-	askpass: Password:
+	askpass: Password for '\''http://foo@example.com'\'':
 	EOF
 '
 
@@ -202,7 +268,11 @@ HELPER="!f() {
 test_expect_success 'respect configured credentials' '
 	test_config credential.helper "$HELPER" &&
 	check fill <<-\EOF
+	protocol=http
+	host=example.com
 	--
+	protocol=http
+	host=example.com
 	username=foo
 	password=bar
 	--
-- 
2.26.2


From 6eae9cd97a8f481c04dd4f8ad507eeff69a335da Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sat, 18 Apr 2020 20:48:05 -0700
Subject: [PATCH 05/11] credential: parse URL without host as empty host, not
 unset

We may feed a URL like "cert:///path/to/cert.pem" into the credential
machinery to get the key for a client-side certificate. That
credential has no hostname field, which is about to be disallowed (to
avoid confusion with protocols where a helper _would_ expect a
hostname).

This means as of the next patch, credential helpers won't work for
unlocking certs. Let's fix that by doing two things:

  - when we parse a url with an empty host, set the host field to the
    empty string (asking only to match stored entries with an empty
    host) rather than NULL (asking to match _any_ host).

  - when we build a cert:// credential by hand, similarly assign an
    empty string

It's the latter that is more likely to impact real users in practice,
since it's what's used for http connections. But we don't have good
infrastructure to test it.

The url-parsing version will help anybody using git-credential in a
script, and is easy to test.

Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 credential.c           |  3 +--
 http.c                 |  1 +
 t/t0300-credentials.sh | 17 +++++++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/credential.c b/credential.c
index f56a0a2c02..745b0e6a6d 100644
--- a/credential.c
+++ b/credential.c
@@ -369,8 +369,7 @@ int credential_from_url_gently(struct credential *c, const char *url,
 
 	if (proto_end - url > 0)
 		c->protocol = xmemdupz(url, proto_end - url);
-	if (slash - host > 0)
-		c->host = url_decode_mem(host, slash - host);
+	c->host = url_decode_mem(host, slash - host);
 	/* Trim leading and trailing slashes from path */
 	while (*slash == '/')
 		slash++;
diff --git a/http.c b/http.c
index 3320590e4b..1533c9338c 100644
--- a/http.c
+++ b/http.c
@@ -269,6 +269,7 @@ static int has_cert_password(void)
 		return 0;
 	if (!cert_auth.password) {
 		cert_auth.protocol = xstrdup("cert");
+		cert_auth.host = xstrdup("");
 		cert_auth.username = xstrdup("");
 		cert_auth.path = xstrdup(ssl_cert);
 		credential_fill(&cert_auth);
diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh
index 2f0ff6bcae..0bce92af98 100755
--- a/t/t0300-credentials.sh
+++ b/t/t0300-credentials.sh
@@ -373,4 +373,21 @@ test_expect_success 'url parser ignores embedded newlines' '
 	EOF
 '
 
+test_expect_success 'host-less URLs are parsed as empty host' '
+	check fill "verbatim foo bar" <<-\EOF
+	url=cert:///path/to/cert.pem
+	--
+	protocol=cert
+	host=
+	path=path/to/cert.pem
+	username=foo
+	password=bar
+	--
+	verbatim: get
+	verbatim: protocol=cert
+	verbatim: host=
+	verbatim: path=path/to/cert.pem
+	EOF
+'
+
 test_done
-- 
2.26.2


From 215df2b679d0f0d3458fafe27f4e3a3f05544400 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sat, 18 Apr 2020 20:50:48 -0700
Subject: [PATCH 06/11] credential: refuse to operate when missing host or
 protocol

The credential helper protocol was designed to be very flexible: the
fields it takes as input are treated as a pattern, and any missing
fields are taken as wildcards. This allows unusual things like:

  echo protocol=https | git credential reject

to delete all stored https credentials (assuming the helpers themselves
treat the input that way). But when helpers are invoked automatically by
Git, this flexibility works against us. If for whatever reason we don't
have a "host" field, then we'd match _any_ host. When you're filling a
credential to send to a remote server, this is almost certainly not what
you want.

Prevent this at the layer that writes to the credential helper. Add a
check to the credential API that the host and protocol are always passed
in, and add an assertion to the credential_write function that speaks
credential helper protocol to be doubly sure.

There are a few ways this can be triggered in practice:

  - the "git credential" command passes along arbitrary credential
    parameters it reads from stdin.

  - until the previous patch, when the host field of a URL is empty, we
    would leave it unset (rather than setting it to the empty string)

  - a URL like "example.com/foo.git" is treated by curl as if "http://"
    was present, but our parser sees it as a non-URL and leaves all
    fields unset

  - the recent fix for URLs with embedded newlines blanks the URL but
    otherwise continues. Rather than having the desired effect of
    looking up no credential at all, many helpers will return _any_
    credential

Our earlier test for an embedded newline didn't catch this because it
only checked that the credential was cleared, but didn't configure an
actual helper. Configuring the "verbatim" helper in the test would show
that it is invoked (it's obviously a silly helper which doesn't look at
its input, but the point is that it shouldn't be run at all). Since
we're switching this case to die(), we don't need to bother with a
helper. We can see the new behavior just by checking that the operation
fails.

We'll add new tests covering partial input as well (these can be
triggered through various means with url-parsing, but it's simpler to
just check them directly, as we know we are covered even if the url
parser changes behavior in the future).

[jn: changed to die() instead of logging and showing a manual
 username/password prompt]

Reported-by: Carlo Arenas <carenas@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 credential.c           | 20 ++++++++++++++------
 t/t0300-credentials.sh | 34 ++++++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/credential.c b/credential.c
index 745b0e6a6d..930f8b9fe9 100644
--- a/credential.c
+++ b/credential.c
@@ -85,6 +85,11 @@ static int proto_is_http(const char *s)
 
 static void credential_apply_config(struct credential *c)
 {
+	if (!c->host)
+		die(_("refusing to work with credential missing host field"));
+	if (!c->protocol)
+		die(_("refusing to work with credential missing protocol field"));
+
 	if (c->configured)
 		return;
 	git_config(credential_config_callback, c);
@@ -186,8 +191,11 @@ int credential_read(struct credential *c, FILE *fp)
 	return 0;
 }
 
-static void credential_write_item(FILE *fp, const char *key, const char *value)
+static void credential_write_item(FILE *fp, const char *key, const char *value,
+				  int required)
 {
+	if (!value && required)
+		die("BUG: credential value for %s is missing", key);
 	if (!value)
 		return;
 	if (strchr(value, '\n'))
@@ -197,11 +205,11 @@ static void credential_write_item(FILE *fp, const char *key, const char *value)
 
 void credential_write(const struct credential *c, FILE *fp)
 {
-	credential_write_item(fp, "protocol", c->protocol);
-	credential_write_item(fp, "host", c->host);
-	credential_write_item(fp, "path", c->path);
-	credential_write_item(fp, "username", c->username);
-	credential_write_item(fp, "password", c->password);
+	credential_write_item(fp, "protocol", c->protocol, 1);
+	credential_write_item(fp, "host", c->host, 1);
+	credential_write_item(fp, "path", c->path, 0);
+	credential_write_item(fp, "username", c->username, 0);
+	credential_write_item(fp, "password", c->password, 0);
 }
 
 static int run_credential_helper(struct credential *c,
diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh
index 0bce92af98..fa903bc9ba 100755
--- a/t/t0300-credentials.sh
+++ b/t/t0300-credentials.sh
@@ -359,18 +359,16 @@ test_expect_success 'http paths can be part of context' '
 	EOF
 '
 
-test_expect_success 'url parser ignores embedded newlines' '
-	check fill <<-EOF
+test_expect_success 'url parser rejects embedded newlines' '
+	test_must_fail git credential fill 2>stderr <<-\EOF &&
 	url=https://one.example.com?%0ahost=two.example.com/
-	--
-	username=askpass-username
-	password=askpass-password
-	--
+	EOF
+	cat >expect <<-\EOF &&
 	warning: url contains a newline in its host component: https://one.example.com?%0ahost=two.example.com/
 	warning: skipping credential lookup for url: https://one.example.com?%0ahost=two.example.com/
-	askpass: Username:
-	askpass: Password:
+	fatal: refusing to work with credential missing host field
 	EOF
+	test_i18ncmp expect stderr
 '
 
 test_expect_success 'host-less URLs are parsed as empty host' '
@@ -390,4 +388,24 @@ test_expect_success 'host-less URLs are parsed as empty host' '
 	EOF
 '
 
+test_expect_success 'credential system refuses to work with missing host' '
+	test_must_fail git credential fill 2>stderr <<-\EOF &&
+	protocol=http
+	EOF
+	cat >expect <<-\EOF &&
+	fatal: refusing to work with credential missing host field
+	EOF
+	test_i18ncmp expect stderr
+'
+
+test_expect_success 'credential system refuses to work with missing protocol' '
+	test_must_fail git credential fill 2>stderr <<-\EOF &&
+	host=example.com
+	EOF
+	cat >expect <<-\EOF &&
+	fatal: refusing to work with credential missing protocol field
+	EOF
+	test_i18ncmp expect stderr
+'
+
 test_done
-- 
2.26.2


From 38cf97b5bb0fe73ae21f9a840caeb26c600cb624 Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Sat, 18 Apr 2020 20:52:34 -0700
Subject: [PATCH 07/11] fsck: convert gitmodules url to URL passed to curl

In 07259e74ec1 (fsck: detect gitmodules URLs with embedded newlines,
2020-03-11), git fsck learned to check whether URLs in .gitmodules could
be understood by the credential machinery when they are handled by
git-remote-curl.

However, the check is overbroad: it checks all URLs instead of only
URLs that would be passed to git-remote-curl. In principle a git:// or
file:/// URL does not need to follow the same conventions as an http://
URL; in particular, git:// and file:// protocols are not succeptible to
issues in the credential API because they do not support attaching
credentials.

In the HTTP case, the URL in .gitmodules does not always match the URL
that would be passed to git-remote-curl and the credential machinery:
Git's URL syntax allows specifying a remote helper followed by a "::"
delimiter and a URL to be passed to it, so that

	git ls-remote http::https://example.com/repo.git

invokes git-remote-http with https://example.com/repo.git as its URL
argument. With today's checks, that distinction does not make a
difference, but for a check we are about to introduce (for empty URL
schemes) it will matter.

.gitmodules files also support relative URLs. To ensure coverage for the
https based embedded-newline attack, urldecode and check them directly
for embedded newlines.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
---
 fsck.c                        | 94 +++++++++++++++++++++++++++++++++--
 t/t7416-submodule-dash-url.sh | 29 +++++++++++
 2 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/fsck.c b/fsck.c
index b72faea0c2..a265568eda 100644
--- a/fsck.c
+++ b/fsck.c
@@ -8,6 +8,7 @@
 #include "fsck.h"
 #include "hashmap.h"
 #include "submodule.h"
+#include "url.h"
 #include "credential.h"
 
 struct oidhash_entry {
@@ -420,17 +421,100 @@ static int fsck_tag(struct tag *tag, const char *data,
 	return 0;
 }
 
+/*
+ * Like builtin/submodule--helper.c's starts_with_dot_slash, but without
+ * relying on the platform-dependent is_dir_sep helper.
+ *
+ * This is for use in checking whether a submodule URL is interpreted as
+ * relative to the current directory on any platform, since \ is a
+ * directory separator on Windows but not on other platforms.
+ */
+static int starts_with_dot_slash(const char *str)
+{
+	return str[0] == '.' && (str[1] == '/' || str[1] == '\\');
+}
+
+/*
+ * Like starts_with_dot_slash, this is a variant of submodule--helper's
+ * helper of the same name with the twist that it accepts backslash as a
+ * directory separator even on non-Windows platforms.
+ */
+static int starts_with_dot_dot_slash(const char *str)
+{
+	return str[0] == '.' && starts_with_dot_slash(str + 1);
+}
+
+static int submodule_url_is_relative(const char *url)
+{
+	return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
+}
+
+/*
+ * Check whether a transport is implemented by git-remote-curl.
+ *
+ * If it is, returns 1 and writes the URL that would be passed to
+ * git-remote-curl to the "out" parameter.
+ *
+ * Otherwise, returns 0 and leaves "out" untouched.
+ *
+ * Examples:
+ *   http::https://example.com/repo.git -> 1, https://example.com/repo.git
+ *   https://example.com/repo.git -> 1, https://example.com/repo.git
+ *   git://example.com/repo.git -> 0
+ *
+ * This is for use in checking for previously exploitable bugs that
+ * required a submodule URL to be passed to git-remote-curl.
+ */
+static int url_to_curl_url(const char *url, const char **out)
+{
+	/*
+	 * We don't need to check for case-aliases, "http.exe", and so
+	 * on because in the default configuration, is_transport_allowed
+	 * prevents URLs with those schemes from being cloned
+	 * automatically.
+	 */
+	if ((*out = skip_prefix(url, "http::")) ||
+	    (*out = skip_prefix(url, "https::")) ||
+	    (*out = skip_prefix(url, "ftp::")) ||
+	    (*out = skip_prefix(url, "ftps::")))
+		return 1;
+	if (starts_with(url, "http://") ||
+	    starts_with(url, "https://") ||
+	    starts_with(url, "ftp://") ||
+	    starts_with(url, "ftps://")) {
+		*out = url;
+		return 1;
+	}
+	return 0;
+}
+
 static int check_submodule_url(const char *url)
 {
-	struct credential c = CREDENTIAL_INIT;
-	int ret;
+	const char *curl_url;
 
 	if (looks_like_command_line_option(url))
 		return -1;
 
-	ret = credential_from_url_gently(&c, url, 1);
-	credential_clear(&c);
-	return ret;
+	if (submodule_url_is_relative(url)) {
+		/*
+		 * This could be appended to an http URL and url-decoded;
+		 * check for malicious characters.
+		 */
+		char *decoded = url_decode(url);
+		int has_nl = !!strchr(decoded, '\n');
+		free(decoded);
+		if (has_nl)
+			return -1;
+	}
+
+	else if (url_to_curl_url(url, &curl_url)) {
+		struct credential c = CREDENTIAL_INIT;
+		int ret = credential_from_url_gently(&c, curl_url, 1);
+		credential_clear(&c);
+		return ret;
+	}
+
+	return 0;
 }
 
 struct fsck_gitmodules_data {
diff --git a/t/t7416-submodule-dash-url.sh b/t/t7416-submodule-dash-url.sh
index 5356d8766a..41bec56953 100755
--- a/t/t7416-submodule-dash-url.sh
+++ b/t/t7416-submodule-dash-url.sh
@@ -46,6 +46,20 @@ test_expect_success 'fsck rejects unprotected dash' '
 	test_i18ngrep "disallowed submodule url" err
 '
 
+test_expect_success 'fsck permits embedded newline with unrecognized scheme' '
+	git checkout --orphan newscheme &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = "data://acjbkd%0akajfdickajkd"
+	EOF
+	git add .gitmodules &&
+	git commit -m "gitmodules with unrecognized scheme" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	git push dst HEAD
+'
+
 test_expect_success 'fsck rejects embedded newline in url' '
 	# create an orphan branch to avoid existing .gitmodules objects
 	git checkout --orphan newline &&
@@ -62,4 +76,19 @@ test_expect_success 'fsck rejects embedded newline in url' '
 	grep "disallowed submodule url" err
 '
 
+test_expect_success 'fsck rejects embedded newline in relative url' '
+	git checkout --orphan relative-newline &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = "./%0ahost=two.example.com/foo.git"
+	EOF
+	git add .gitmodules &&
+	git commit -m "relative url with newline" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
 test_done
-- 
2.26.2


From d98d29c00f88d6fe399dd11fd77b88074bd49924 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sat, 18 Apr 2020 20:53:09 -0700
Subject: [PATCH 08/11] credential: die() when parsing invalid urls

When we try to initialize credential loading by URL and find that the
URL is invalid, we set all fields to NULL in order to avoid acting on
malicious input. Later when we request credentials, we diagonse the
erroneous input:

	fatal: refusing to work with credential missing host field

This is problematic in two ways:

- The message doesn't tell the user *why* we are missing the host
  field, so they can't tell from this message alone how to recover.
  There can be intervening messages after the original warning of
  bad input, so the user may not have the context to put two and two
  together.

- The error only occurs when we actually need to get a credential.  If
  the URL permits anonymous access, the only encouragement the user gets
  to correct their bogus URL is a quiet warning.

  This is inconsistent with the check we perform in fsck, where any use
  of such a URL as a submodule is an error.

When we see such a bogus URL, let's not try to be nice and continue
without helpers. Instead, die() immediately. This is simpler and
obviously safe. And there's very little chance of disrupting a normal
workflow.

It's _possible_ that somebody has a legitimate URL with a raw newline in
it. It already wouldn't work with credential helpers, so this patch
steps that up from an inconvenience to "we will refuse to work with it
at all". If such a case does exist, we should figure out a way to work
with it (especially if the newline is only in the path component, which
we normally don't even pass to helpers). But until we see a real report,
we're better off being defensive.

Reported-by: Carlo Arenas <carenas@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 credential.c           | 6 ++----
 t/t0300-credentials.sh | 3 +--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/credential.c b/credential.c
index 930f8b9fe9..725e917b2d 100644
--- a/credential.c
+++ b/credential.c
@@ -401,8 +401,6 @@ int credential_from_url_gently(struct credential *c, const char *url,
 
 void credential_from_url(struct credential *c, const char *url)
 {
-	if (credential_from_url_gently(c, url, 0) < 0) {
-		warning(_("skipping credential lookup for url: %s"), url);
-		credential_clear(c);
-	}
+	if (credential_from_url_gently(c, url, 0) < 0)
+		die(_("credential url cannot be parsed: %s"), url);
 }
diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh
index fa903bc9ba..9892464eba 100755
--- a/t/t0300-credentials.sh
+++ b/t/t0300-credentials.sh
@@ -365,8 +365,7 @@ test_expect_success 'url parser rejects embedded newlines' '
 	EOF
 	cat >expect <<-\EOF &&
 	warning: url contains a newline in its host component: https://one.example.com?%0ahost=two.example.com/
-	warning: skipping credential lookup for url: https://one.example.com?%0ahost=two.example.com/
-	fatal: refusing to work with credential missing host field
+	fatal: credential url cannot be parsed: https://one.example.com?%0ahost=two.example.com/
 	EOF
 	test_i18ncmp expect stderr
 '
-- 
2.26.2


From accb9edcd64007cf940be7b9e6a209ebcc650e8c Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Sat, 18 Apr 2020 20:54:13 -0700
Subject: [PATCH 09/11] credential: treat URL without scheme as invalid

libcurl permits making requests without a URL scheme specified.  In
this case, it guesses the URL from the hostname, so I can run

	git ls-remote http::ftp.example.com/path/to/repo

and it would make an FTP request.

Any user intentionally using such a URL is likely to have made a typo.
Unfortunately, credential_from_url is not able to determine the host and
protocol in order to determine appropriate credentials to send, and
until "credential: refuse to operate when missing host or protocol",
this resulted in another host's credentials being leaked to the named
host.

Teach credential_from_url_gently to consider such a URL to be invalid
so that fsck can detect and block gitmodules files with such URLs,
allowing server operators to avoid serving them to downstream users
running older versions of Git.

This also means that when such URLs are passed on the command line, Git
will print a clearer error so affected users can switch to the simpler
URL that explicitly specifies the host and protocol they intend.

One subtlety: .gitmodules files can contain relative URLs, representing
a URL relative to the URL they were cloned from.  The relative URL
resolver used for .gitmodules can follow ".." components out of the path
part and past the host part of a URL, meaning that such a relative URL
can be used to traverse from a https://foo.example.com/innocent
superproject to a https::attacker.example.com/exploit submodule.
Fortunately a leading ':' in the first path component after a series of
leading './' and '../' components is unlikely to show up in other
contexts, so we can catch this by detecting that pattern.

Reported-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
---
 credential.c                  |  7 ++++--
 fsck.c                        | 47 +++++++++++++++++++++++++++++++++--
 t/t5550-http-fetch.sh         |  5 ++++
 t/t7416-submodule-dash-url.sh | 32 ++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/credential.c b/credential.c
index 725e917b2d..f857f7d1e1 100644
--- a/credential.c
+++ b/credential.c
@@ -353,8 +353,11 @@ int credential_from_url_gently(struct credential *c, const char *url,
 	 *   (3) proto://<user>:<pass>@<host>/...
 	 */
 	proto_end = strstr(url, "://");
-	if (!proto_end)
-		return 0;
+	if (!proto_end) {
+		if (!quiet)
+			warning(_("url has no scheme: %s"), url);
+		return -1;
+	}
 	cp = proto_end + 3;
 	at = strchr(cp, '@');
 	colon = strchr(cp, ':');
diff --git a/fsck.c b/fsck.c
index a265568eda..90aea117c9 100644
--- a/fsck.c
+++ b/fsck.c
@@ -449,6 +449,34 @@ static int submodule_url_is_relative(const char *url)
 	return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
 }
 
+/*
+ * Count directory components that a relative submodule URL should chop
+ * from the remote_url it is to be resolved against.
+ *
+ * In other words, this counts "../" components at the start of a
+ * submodule URL.
+ *
+ * Returns the number of directory components to chop and writes a
+ * pointer to the next character of url after all leading "./" and
+ * "../" components to out.
+ */
+static int count_leading_dotdots(const char *url, const char **out)
+{
+	int result = 0;
+	while (1) {
+		if (starts_with_dot_dot_slash(url)) {
+			result++;
+			url += strlen("../");
+			continue;
+		}
+		if (starts_with_dot_slash(url)) {
+			url += strlen("./");
+			continue;
+		}
+		*out = url;
+		return result;
+	}
+}
 /*
  * Check whether a transport is implemented by git-remote-curl.
  *
@@ -496,15 +524,30 @@ static int check_submodule_url(const char *url)
 		return -1;
 
 	if (submodule_url_is_relative(url)) {
+		char *decoded;
+		const char *next;
+		int has_nl;
+
 		/*
 		 * This could be appended to an http URL and url-decoded;
 		 * check for malicious characters.
 		 */
-		char *decoded = url_decode(url);
-		int has_nl = !!strchr(decoded, '\n');
+		decoded = url_decode(url);
+		has_nl = !!strchr(decoded, '\n');
+
 		free(decoded);
 		if (has_nl)
 			return -1;
+
+		/*
+		 * URLs which escape their root via "../" can overwrite
+		 * the host field and previous components, resolving to
+		 * URLs like https::example.com/submodule.git that were
+		 * susceptible to CVE-2020-11008.
+		 */
+		if (count_leading_dotdots(url, &next) > 0 &&
+		    *next == ':')
+			return -1;
 	}
 
 	else if (url_to_curl_url(url, &curl_url)) {
diff --git a/t/t5550-http-fetch.sh b/t/t5550-http-fetch.sh
index f7d0f146f0..29fdae9a68 100755
--- a/t/t5550-http-fetch.sh
+++ b/t/t5550-http-fetch.sh
@@ -172,5 +172,10 @@ test_expect_success 'did not use upload-pack service' '
 	test_cmp exp act
 '
 
+test_expect_success 'remote-http complains cleanly about malformed urls' '
+	test_must_fail git remote-http http::/example.com/repo.git 2>stderr &&
+	test_i18ngrep "url has no scheme" stderr
+'
+
 stop_httpd
 test_done
diff --git a/t/t7416-submodule-dash-url.sh b/t/t7416-submodule-dash-url.sh
index 41bec56953..1dc8333eb3 100755
--- a/t/t7416-submodule-dash-url.sh
+++ b/t/t7416-submodule-dash-url.sh
@@ -46,6 +46,38 @@ test_expect_success 'fsck rejects unprotected dash' '
 	test_i18ngrep "disallowed submodule url" err
 '
 
+test_expect_success 'fsck rejects missing URL scheme' '
+	git checkout --orphan missing-scheme &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = http::one.example.com/foo.git
+	EOF
+	git add .gitmodules &&
+	test_tick &&
+	git commit -m "gitmodules with missing URL scheme" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
+test_expect_success 'fsck rejects relative URL resolving to missing scheme' '
+	git checkout --orphan relative-missing-scheme &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = "..\\../.\\../:one.example.com/foo.git"
+	EOF
+	git add .gitmodules &&
+	test_tick &&
+	git commit -m "gitmodules with relative URL that strips off scheme" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
 test_expect_success 'fsck permits embedded newline with unrecognized scheme' '
 	git checkout --orphan newscheme &&
 	cat >.gitmodules <<-\EOF &&
-- 
2.26.2


From 8bd36d7015bc9ffbd8ef58227ae5d9928fa708cb Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Sat, 18 Apr 2020 20:54:57 -0700
Subject: [PATCH 10/11] credential: treat URL with empty scheme as invalid

Until "credential: refuse to operate when missing host or protocol",
Git's credential handling code interpreted URLs with empty scheme to
mean "give me credentials matching this host for any protocol".

Luckily libcurl does not recognize such URLs (it tries to look for a
protocol named "" and fails). Just in case that changes, let's reject
them within Git as well. This way, credential_from_url is guaranteed to
always produce a "struct credential" with protocol and host set.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 credential.c                  |  5 ++---
 t/t5550-http-fetch.sh         |  9 +++++++++
 t/t7416-submodule-dash-url.sh | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/credential.c b/credential.c
index f857f7d1e1..323f9025f0 100644
--- a/credential.c
+++ b/credential.c
@@ -353,7 +353,7 @@ int credential_from_url_gently(struct credential *c, const char *url,
 	 *   (3) proto://<user>:<pass>@<host>/...
 	 */
 	proto_end = strstr(url, "://");
-	if (!proto_end) {
+	if (!proto_end || proto_end == url) {
 		if (!quiet)
 			warning(_("url has no scheme: %s"), url);
 		return -1;
@@ -378,8 +378,7 @@ int credential_from_url_gently(struct credential *c, const char *url,
 		host = at + 1;
 	}
 
-	if (proto_end - url > 0)
-		c->protocol = xmemdupz(url, proto_end - url);
+	c->protocol = xmemdupz(url, proto_end - url);
 	c->host = url_decode_mem(host, slash - host);
 	/* Trim leading and trailing slashes from path */
 	while (*slash == '/')
diff --git a/t/t5550-http-fetch.sh b/t/t5550-http-fetch.sh
index 29fdae9a68..afeb90fa66 100755
--- a/t/t5550-http-fetch.sh
+++ b/t/t5550-http-fetch.sh
@@ -177,5 +177,14 @@ test_expect_success 'remote-http complains cleanly about malformed urls' '
 	test_i18ngrep "url has no scheme" stderr
 '
 
+# NEEDSWORK: Writing commands to git-remote-curl can race against the latter
+# erroring out, producing SIGPIPE. Remove "ok=sigpipe" once transport-helper has
+# learned to handle early remote helper failures more cleanly.
+test_expect_success 'remote-http complains cleanly about empty scheme' '
+	test_must_fail ok=sigpipe git ls-remote \
+		http::${HTTPD_URL#http}/dumb/repo.git 2>stderr &&
+	test_i18ngrep "url has no scheme" stderr
+'
+
 stop_httpd
 test_done
diff --git a/t/t7416-submodule-dash-url.sh b/t/t7416-submodule-dash-url.sh
index 1dc8333eb3..9c7a8113fa 100755
--- a/t/t7416-submodule-dash-url.sh
+++ b/t/t7416-submodule-dash-url.sh
@@ -78,6 +78,38 @@ test_expect_success 'fsck rejects relative URL resolving to missing scheme' '
 	grep "disallowed submodule url" err
 '
 
+test_expect_success 'fsck rejects empty URL scheme' '
+	git checkout --orphan empty-scheme &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = http::://one.example.com/foo.git
+	EOF
+	git add .gitmodules &&
+	test_tick &&
+	git commit -m "gitmodules with empty URL scheme" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
+test_expect_success 'fsck rejects relative URL resolving to empty scheme' '
+	git checkout --orphan relative-empty-scheme &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = ../../../:://one.example.com/foo.git
+	EOF
+	git add .gitmodules &&
+	test_tick &&
+	git commit -m "relative gitmodules URL resolving to empty scheme" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
 test_expect_success 'fsck permits embedded newline with unrecognized scheme' '
 	git checkout --orphan newscheme &&
 	cat >.gitmodules <<-\EOF &&
-- 
2.26.2


From e9d4a9cf1445cff676b0147578a62be6a916f675 Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Sat, 18 Apr 2020 20:57:22 -0700
Subject: [PATCH 11/11] fsck: reject URL with empty host in .gitmodules

Git's URL parser interprets

	https:///example.com/repo.git

to have no host and a path of "example.com/repo.git".  Curl, on the
other hand, internally redirects it to https://example.com/repo.git.  As
a result, until "credential: parse URL without host as empty host, not
unset", tricking a user into fetching from such a URL would cause Git to
send credentials for another host to example.com.

Teach fsck to block and detect .gitmodules files using such a URL to
prevent sharing them with Git versions that are not yet protected.

A relative URL in a .gitmodules file could also be used to trigger this.
The relative URL resolver used for .gitmodules does not normalize
sequences of slashes and can follow ".." components out of the path part
and to the host part of a URL, meaning that such a relative URL can be
used to traverse from a https://foo.example.com/innocent superproject to
a https:///attacker.example.com/exploit submodule. Fortunately,
redundant extra slashes in .gitmodules are rare, so we can catch this by
detecting one after a leading sequence of "./" and "../" components.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
---
 fsck.c                        | 10 +++++++---
 t/t7416-submodule-dash-url.sh | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/fsck.c b/fsck.c
index 90aea117c9..b5e0a79cda 100644
--- a/fsck.c
+++ b/fsck.c
@@ -542,17 +542,21 @@ static int check_submodule_url(const char *url)
 		/*
 		 * URLs which escape their root via "../" can overwrite
 		 * the host field and previous components, resolving to
-		 * URLs like https::example.com/submodule.git that were
+		 * URLs like https::example.com/submodule.git and
+		 * https:///example.com/submodule.git that were
 		 * susceptible to CVE-2020-11008.
 		 */
 		if (count_leading_dotdots(url, &next) > 0 &&
-		    *next == ':')
+		    (*next == ':' || *next == '/'))
 			return -1;
 	}
 
 	else if (url_to_curl_url(url, &curl_url)) {
 		struct credential c = CREDENTIAL_INIT;
-		int ret = credential_from_url_gently(&c, curl_url, 1);
+		int ret = 0;
+		if (credential_from_url_gently(&c, curl_url, 1) ||
+		    !*c.host)
+			ret = -1;
 		credential_clear(&c);
 		return ret;
 	}
diff --git a/t/t7416-submodule-dash-url.sh b/t/t7416-submodule-dash-url.sh
index 9c7a8113fa..e65238f3c5 100755
--- a/t/t7416-submodule-dash-url.sh
+++ b/t/t7416-submodule-dash-url.sh
@@ -110,6 +110,38 @@ test_expect_success 'fsck rejects relative URL resolving to empty scheme' '
 	grep "disallowed submodule url" err
 '
 
+test_expect_success 'fsck rejects empty hostname' '
+	git checkout --orphan empty-host &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = http:///one.example.com/foo.git
+	EOF
+	git add .gitmodules &&
+	test_tick &&
+	git commit -m "gitmodules with extra slashes" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
+test_expect_success 'fsck rejects relative url that produced empty hostname' '
+	git checkout --orphan messy-relative &&
+	cat >.gitmodules <<-\EOF &&
+	[submodule "foo"]
+		url = ../../..//one.example.com/foo.git
+	EOF
+	git add .gitmodules &&
+	test_tick &&
+	git commit -m "gitmodules abusing relative_path" &&
+	test_when_finished "rm -rf dst" &&
+	git init --bare dst &&
+	( cd dst && git config transfer.fsckObjects true ) &&
+	test_must_fail git push dst HEAD 2>err &&
+	grep "disallowed submodule url" err
+'
+
 test_expect_success 'fsck permits embedded newline with unrecognized scheme' '
 	git checkout --orphan newscheme &&
 	cat >.gitmodules <<-\EOF &&
-- 
2.26.2