git/connect.c

1525 lines
38 KiB
C
Raw Normal View History

#include "git-compat-util.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "pkt-line.h"
#include "quote.h"
#include "refs.h"
#include "run-command.h"
#include "remote.h"
#include "connect.h"
#include "url.h"
#include "string-list.h"
#include "oid-array.h"
#include "path.h"
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-16 17:12:52 +00:00
#include "transport.h"
#include "trace2.h"
#include "strbuf.h"
#include "version.h"
#include "protocol.h"
#include "alias.h"
#include "bundle-uri.h"
static char *server_capabilities_v1;
static struct strvec server_capabilities_v2 = STRVEC_INIT;
static const char *next_server_feature_value(const char *feature, size_t *len, size_t *offset);
static int check_ref(const char *name, unsigned int flags)
{
if (!flags)
return 1;
if (!skip_prefix(name, "refs/", &name))
return 0;
/* REF_NORMAL means that we don't want the magic fake tag refs */
if ((flags & REF_NORMAL) && check_refname_format(name,
REFNAME_ALLOW_ONELEVEL))
return 0;
/* REF_HEADS means that we want regular branch heads */
if ((flags & REF_HEADS) && starts_with(name, "heads/"))
return 1;
/* REF_TAGS means that we want tags */
if ((flags & REF_TAGS) && starts_with(name, "tags/"))
return 1;
/* All type bits clear means that we are ok with anything */
return !(flags & ~REF_NORMAL);
}
int check_ref_type(const struct ref *ref, int flags)
{
return check_ref(ref->name, flags);
}
static NORETURN void die_initial_contact(int unexpected)
{
/*
* A hang-up after seeing some response from the other end
* means that it is unexpected, as we know the other end is
* willing to talk to us. A hang-up before seeing any
* response does not necessarily mean an ACL problem, though.
*/
if (unexpected)
die(_("the remote end hung up upon initial contact"));
else
die(_("Could not read from remote repository.\n\n"
"Please make sure you have the correct access rights\n"
"and the repository exists."));
}
/* Checks if the server supports the capability 'c' */
int server_supports_v2(const char *c)
{
int i;
for (i = 0; i < server_capabilities_v2.nr; i++) {
const char *out;
if (skip_prefix(server_capabilities_v2.v[i], c, &out) &&
(!*out || *out == '='))
return 1;
}
return 0;
}
void ensure_server_supports_v2(const char *c)
{
if (!server_supports_v2(c))
die(_("server doesn't support '%s'"), c);
}
int server_feature_v2(const char *c, const char **v)
{
int i;
for (i = 0; i < server_capabilities_v2.nr; i++) {
const char *out;
if (skip_prefix(server_capabilities_v2.v[i], c, &out) &&
(*out == '=')) {
*v = out + 1;
return 1;
}
}
return 0;
}
int server_supports_feature(const char *c, const char *feature,
int die_on_error)
{
int i;
for (i = 0; i < server_capabilities_v2.nr; i++) {
const char *out;
if (skip_prefix(server_capabilities_v2.v[i], c, &out) &&
(!*out || *(out++) == '=')) {
if (parse_feature_request(out, feature))
return 1;
else
break;
}
}
if (die_on_error)
die(_("server doesn't support feature '%s'"), feature);
return 0;
}
static void process_capabilities_v2(struct packet_reader *reader)
{
while (packet_reader_read(reader) == PACKET_READ_NORMAL)
strvec_push(&server_capabilities_v2, reader->line);
if (reader->status != PACKET_READ_FLUSH)
die(_("expected flush after capabilities"));
}
enum protocol_version discover_version(struct packet_reader *reader)
{
enum protocol_version version = protocol_unknown_version;
/*
* Peek the first line of the server's response to
* determine the protocol version the server is speaking.
*/
switch (packet_reader_peek(reader)) {
case PACKET_READ_EOF:
die_initial_contact(0);
case PACKET_READ_FLUSH:
case PACKET_READ_DELIM:
case PACKET_READ_RESPONSE_END:
version = protocol_v0;
break;
case PACKET_READ_NORMAL:
version = determine_protocol_version_client(reader->line);
break;
}
switch (version) {
case protocol_v2:
process_capabilities_v2(reader);
break;
case protocol_v1:
/* Read the peeked version line */
packet_reader_read(reader);
break;
case protocol_v0:
break;
case protocol_unknown_version:
BUG("unknown protocol version");
}
trace2_data_intmax("transfer", NULL, "negotiated-version", version);
return version;
}
static void parse_one_symref_info(struct string_list *symref, const char *val, int len)
{
char *sym, *target;
struct string_list_item *item;
if (!len)
return; /* just "symref" */
/* e.g. "symref=HEAD:refs/heads/master" */
sym = xmemdupz(val, len);
target = strchr(sym, ':');
if (!target)
/* just "symref=something" */
goto reject;
*(target++) = '\0';
if (check_refname_format(sym, REFNAME_ALLOW_ONELEVEL) ||
check_refname_format(target, REFNAME_ALLOW_ONELEVEL))
/* "symref=bogus:pair */
goto reject;
item = string_list_append_nodup(symref, sym);
item->util = target;
return;
reject:
free(sym);
return;
}
static void annotate_refs_with_symref_info(struct ref *ref)
{
struct string_list symref = STRING_LIST_INIT_DUP;
size_t offset = 0;
while (1) {
size_t len;
const char *val;
val = next_server_feature_value("symref", &len, &offset);
if (!val)
break;
parse_one_symref_info(&symref, val, len);
}
string_list_sort(&symref);
for (; ref; ref = ref->next) {
struct string_list_item *item;
item = string_list_lookup(&symref, ref->name);
if (!item)
continue;
ref->symref = xstrdup((char *)item->util);
}
string_list_clear(&symref, 0);
}
static void process_capabilities(struct packet_reader *reader, int *linelen)
{
const char *feat_val;
size_t feat_len;
const char *line = reader->line;
int nul_location = strlen(line);
if (nul_location == *linelen)
return;
server_capabilities_v1 = xstrdup(line + nul_location + 1);
*linelen = nul_location;
feat_val = server_feature_value("object-format", &feat_len);
if (feat_val) {
char *hash_name = xstrndup(feat_val, feat_len);
int hash_algo = hash_algo_by_name(hash_name);
if (hash_algo != GIT_HASH_UNKNOWN)
reader->hash_algo = &hash_algos[hash_algo];
free(hash_name);
} else {
reader->hash_algo = &hash_algos[GIT_HASH_SHA1];
}
}
static int process_dummy_ref(const struct packet_reader *reader)
{
const char *line = reader->line;
struct object_id oid;
const char *name;
if (parse_oid_hex_algop(line, &oid, &name, reader->hash_algo))
return 0;
if (*name != ' ')
return 0;
name++;
v0 protocol: fix sha1/sha256 confusion for capabilities^{} Commit eb398797cd (connect: advertized capability is not a ref, 2016-09-09) added support for an upload-pack server responding with: 0000000000000000000000000000000000000000 capabilities^{} followed by a NUL and the actual capabilities. We correctly parse the oid using the packet_reader's hash_algo field, but then we compare it to null_oid(), which will instead use our current repo's default algorithm. If we're defaulting to sha256 locally but the other side is sha1, they won't match and we'll fail to parse the line (and thus die()). This can cause a test failure when the suite is run with GIT_TEST_DEFAULT_HASH=sha256, and we even do so regularly via the linux-sha256 CI job. But since the test requires JGit to run, it's usually just skipped, and nobody noticed the problem. The reason the original patch used JGit is that Git itself does not ever produce such a line via upload-pack; the feature was added to fix a real-world problem when interacting with JGit. That was good for verifying that the incompatibility was fixed, but it's not a good regression test: - hardly anybody runs it, because you have to have jgit installed; hence this bug going unnoticed - we're depending on jgit's behavior for the test to do anything useful. In particular, this behavior is only relevant to the v0 protocol, but these days we ask for the v2 protocol by default. So for modern jgit, this is probably testing nothing. - it's complicated and slow. We had to do some fifo trickery to handle races, and this one test makes up 40% of the runtime of the total script. Instead, let's just hard-code the response that's of interest to us. That will test exactly what we want for every run, and reveals the bug when run in sha256 mode. And of course we'll fix the actual bug by using the correct hash_algo struct. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-14 21:25:11 +00:00
return oideq(reader->hash_algo->null_oid, &oid) &&
!strcmp(name, "capabilities^{}");
}
static void check_no_capabilities(const char *line, int len)
{
if (strlen(line) != len)
warning(_("ignoring capabilities after first line '%s'"),
line + strlen(line));
}
static int process_ref(const struct packet_reader *reader, int len,
struct ref ***list, unsigned int flags,
struct oid_array *extra_have)
{
const char *line = reader->line;
struct object_id old_oid;
const char *name;
if (parse_oid_hex_algop(line, &old_oid, &name, reader->hash_algo))
return 0;
if (*name != ' ')
return 0;
name++;
if (extra_have && !strcmp(name, ".have")) {
oid_array_append(extra_have, &old_oid);
} else if (!strcmp(name, "capabilities^{}")) {
die(_("protocol error: unexpected capabilities^{}"));
} else if (check_ref(name, flags)) {
struct ref *ref = alloc_ref(name);
oidcpy(&ref->old_oid, &old_oid);
**list = ref;
*list = &ref->next;
}
check_no_capabilities(line, len);
return 1;
}
static int process_shallow(const struct packet_reader *reader, int len,
struct oid_array *shallow_points)
{
const char *line = reader->line;
const char *arg;
struct object_id old_oid;
if (!skip_prefix(line, "shallow ", &arg))
return 0;
if (get_oid_hex_algop(arg, &old_oid, reader->hash_algo))
die(_("protocol error: expected shallow sha-1, got '%s'"), arg);
if (!shallow_points)
die(_("repository on the other end cannot be shallow"));
oid_array_append(shallow_points, &old_oid);
check_no_capabilities(line, len);
return 1;
}
enum get_remote_heads_state {
EXPECTING_FIRST_REF = 0,
EXPECTING_REF,
EXPECTING_SHALLOW,
EXPECTING_DONE,
};
/*
* Read all the refs from the other end
*/
struct ref **get_remote_heads(struct packet_reader *reader,
struct ref **list, unsigned int flags,
struct oid_array *extra_have,
struct oid_array *shallow_points)
{
struct ref **orig_list = list;
int len = 0;
enum get_remote_heads_state state = EXPECTING_FIRST_REF;
*list = NULL;
while (state != EXPECTING_DONE) {
switch (packet_reader_read(reader)) {
case PACKET_READ_EOF:
die_initial_contact(1);
case PACKET_READ_NORMAL:
len = reader->pktlen;
break;
case PACKET_READ_FLUSH:
state = EXPECTING_DONE;
break;
case PACKET_READ_DELIM:
case PACKET_READ_RESPONSE_END:
die(_("invalid packet"));
}
switch (state) {
case EXPECTING_FIRST_REF:
process_capabilities(reader, &len);
if (process_dummy_ref(reader)) {
state = EXPECTING_SHALLOW;
break;
}
state = EXPECTING_REF;
/* fallthrough */
case EXPECTING_REF:
if (process_ref(reader, len, &list, flags, extra_have))
break;
state = EXPECTING_SHALLOW;
/* fallthrough */
case EXPECTING_SHALLOW:
if (process_shallow(reader, len, shallow_points))
break;
die(_("protocol error: unexpected '%s'"), reader->line);
case EXPECTING_DONE:
break;
}
}
annotate_refs_with_symref_info(*orig_list);
return list;
}
/* Returns 1 when a valid ref has been added to `list`, 0 otherwise */
static int process_ref_v2(struct packet_reader *reader, struct ref ***list,
const char **unborn_head_target)
{
int ret = 1;
int i = 0;
struct object_id old_oid;
struct ref *ref;
struct string_list line_sections = STRING_LIST_INIT_DUP;
const char *end;
const char *line = reader->line;
/*
* Ref lines have a number of fields which are space deliminated. The
* first field is the OID of the ref. The second field is the ref
* name. Subsequent fields (symref-target and peeled) are optional and
* don't have a particular order.
*/
if (string_list_split(&line_sections, line, ' ', -1) < 2) {
ret = 0;
goto out;
}
if (!strcmp("unborn", line_sections.items[i].string)) {
i++;
if (unborn_head_target &&
!strcmp("HEAD", line_sections.items[i++].string)) {
/*
* Look for the symref target (if any). If found,
* return it to the caller.
*/
for (; i < line_sections.nr; i++) {
const char *arg = line_sections.items[i].string;
if (skip_prefix(arg, "symref-target:", &arg)) {
*unborn_head_target = xstrdup(arg);
break;
}
}
}
goto out;
}
if (parse_oid_hex_algop(line_sections.items[i++].string, &old_oid, &end, reader->hash_algo) ||
*end) {
ret = 0;
goto out;
}
ref = alloc_ref(line_sections.items[i++].string);
memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz);
**list = ref;
*list = &ref->next;
for (; i < line_sections.nr; i++) {
const char *arg = line_sections.items[i].string;
if (skip_prefix(arg, "symref-target:", &arg))
ref->symref = xstrdup(arg);
if (skip_prefix(arg, "peeled:", &arg)) {
struct object_id peeled_oid;
char *peeled_name;
struct ref *peeled;
if (parse_oid_hex_algop(arg, &peeled_oid, &end,
reader->hash_algo) || *end) {
ret = 0;
goto out;
}
peeled_name = xstrfmt("%s^{}", ref->name);
peeled = alloc_ref(peeled_name);
memcpy(peeled->old_oid.hash, peeled_oid.hash,
reader->hash_algo->rawsz);
**list = peeled;
*list = &peeled->next;
free(peeled_name);
}
}
out:
string_list_clear(&line_sections, 0);
return ret;
}
stateless-connect: send response end packet Currently, remote-curl acts as a proxy and blindly forwards packets between an HTTP server and fetch-pack. In the case of a stateless RPC connection where the connection is terminated before the transaction is complete, remote-curl will blindly forward the packets before waiting on more input from fetch-pack. Meanwhile, fetch-pack will read the transaction and continue reading, expecting more input to continue the transaction. This results in a deadlock between the two processes. This can be seen in the following command which does not terminate: $ git -c protocol.version=2 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... whereas the v1 version does terminate as expected: $ git -c protocol.version=1 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... fatal: the remote end hung up unexpectedly Instead of blindly forwarding packets, make remote-curl insert a response end packet after proxying the responses from the remote server when using stateless_connect(). On the RPC client side, ensure that each response ends as described. A separate control packet is chosen because we need to be able to differentiate between what the remote server sends and remote-curl's control packets. By ensuring in the remote-curl code that a server cannot send response end packets, we prevent a malicious server from being able to perform a denial of service attack in which they spoof a response end packet and cause the described deadlock to happen. Reported-by: Force Charlie <charlieio@outlook.com> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Denton Liu <liu.denton@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-19 10:54:00 +00:00
void check_stateless_delimiter(int stateless_rpc,
struct packet_reader *reader,
const char *error)
{
if (!stateless_rpc)
return; /* not in stateless mode, no delimiter expected */
if (packet_reader_read(reader) != PACKET_READ_RESPONSE_END)
die("%s", error);
}
static void send_capabilities(int fd_out, struct packet_reader *reader)
{
const char *hash_name;
if (server_supports_v2("agent"))
packet_write_fmt(fd_out, "agent=%s", git_user_agent_sanitized());
if (server_feature_v2("object-format", &hash_name)) {
int hash_algo = hash_algo_by_name(hash_name);
if (hash_algo == GIT_HASH_UNKNOWN)
die(_("unknown object format '%s' specified by server"), hash_name);
reader->hash_algo = &hash_algos[hash_algo];
packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name);
} else {
reader->hash_algo = &hash_algos[GIT_HASH_SHA1];
}
}
int get_remote_bundle_uri(int fd_out, struct packet_reader *reader,
struct bundle_list *bundles, int stateless_rpc)
{
int line_nr = 1;
/* Assert bundle-uri support */
ensure_server_supports_v2("bundle-uri");
/* (Re-)send capabilities */
send_capabilities(fd_out, reader);
/* Send command */
packet_write_fmt(fd_out, "command=bundle-uri\n");
packet_delim(fd_out);
packet_flush(fd_out);
/* Process response from server */
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
const char *line = reader->line;
line_nr++;
if (!bundle_uri_parse_line(bundles, line))
continue;
return error(_("error on bundle-uri response line %d: %s"),
line_nr, line);
}
if (reader->status != PACKET_READ_FLUSH)
return error(_("expected flush after bundle-uri listing"));
/*
* Might die(), but obscure enough that that's OK, e.g. in
* serve.c we'll call BUG() on its equivalent (the
* PACKET_READ_RESPONSE_END check).
*/
check_stateless_delimiter(stateless_rpc, reader,
_("expected response end packet after ref listing"));
return 0;
}
struct ref **get_remote_refs(int fd_out, struct packet_reader *reader,
struct ref **list, int for_push,
struct transport_ls_refs_options *transport_options,
stateless-connect: send response end packet Currently, remote-curl acts as a proxy and blindly forwards packets between an HTTP server and fetch-pack. In the case of a stateless RPC connection where the connection is terminated before the transaction is complete, remote-curl will blindly forward the packets before waiting on more input from fetch-pack. Meanwhile, fetch-pack will read the transaction and continue reading, expecting more input to continue the transaction. This results in a deadlock between the two processes. This can be seen in the following command which does not terminate: $ git -c protocol.version=2 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... whereas the v1 version does terminate as expected: $ git -c protocol.version=1 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... fatal: the remote end hung up unexpectedly Instead of blindly forwarding packets, make remote-curl insert a response end packet after proxying the responses from the remote server when using stateless_connect(). On the RPC client side, ensure that each response ends as described. A separate control packet is chosen because we need to be able to differentiate between what the remote server sends and remote-curl's control packets. By ensuring in the remote-curl code that a server cannot send response end packets, we prevent a malicious server from being able to perform a denial of service attack in which they spoof a response end packet and cause the described deadlock to happen. Reported-by: Force Charlie <charlieio@outlook.com> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Denton Liu <liu.denton@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-19 10:54:00 +00:00
const struct string_list *server_options,
int stateless_rpc)
{
int i;
struct strvec *ref_prefixes = transport_options ?
&transport_options->ref_prefixes : NULL;
const char **unborn_head_target = transport_options ?
&transport_options->unborn_head_target : NULL;
*list = NULL;
ensure_server_supports_v2("ls-refs");
packet_write_fmt(fd_out, "command=ls-refs\n");
/* Send capabilities */
send_capabilities(fd_out, reader);
if (server_options && server_options->nr) {
ensure_server_supports_v2("server-option");
for (i = 0; i < server_options->nr; i++)
packet_write_fmt(fd_out, "server-option=%s",
server_options->items[i].string);
}
packet_delim(fd_out);
/* When pushing we don't want to request the peeled tags */
if (!for_push)
packet_write_fmt(fd_out, "peel\n");
packet_write_fmt(fd_out, "symrefs\n");
if (server_supports_feature("ls-refs", "unborn", 0))
packet_write_fmt(fd_out, "unborn\n");
for (i = 0; ref_prefixes && i < ref_prefixes->nr; i++) {
packet_write_fmt(fd_out, "ref-prefix %s\n",
ref_prefixes->v[i]);
}
packet_flush(fd_out);
/* Process response from server */
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
if (!process_ref_v2(reader, &list, unborn_head_target))
die(_("invalid ls-refs response: %s"), reader->line);
}
if (reader->status != PACKET_READ_FLUSH)
die(_("expected flush after ref listing"));
stateless-connect: send response end packet Currently, remote-curl acts as a proxy and blindly forwards packets between an HTTP server and fetch-pack. In the case of a stateless RPC connection where the connection is terminated before the transaction is complete, remote-curl will blindly forward the packets before waiting on more input from fetch-pack. Meanwhile, fetch-pack will read the transaction and continue reading, expecting more input to continue the transaction. This results in a deadlock between the two processes. This can be seen in the following command which does not terminate: $ git -c protocol.version=2 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... whereas the v1 version does terminate as expected: $ git -c protocol.version=1 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... fatal: the remote end hung up unexpectedly Instead of blindly forwarding packets, make remote-curl insert a response end packet after proxying the responses from the remote server when using stateless_connect(). On the RPC client side, ensure that each response ends as described. A separate control packet is chosen because we need to be able to differentiate between what the remote server sends and remote-curl's control packets. By ensuring in the remote-curl code that a server cannot send response end packets, we prevent a malicious server from being able to perform a denial of service attack in which they spoof a response end packet and cause the described deadlock to happen. Reported-by: Force Charlie <charlieio@outlook.com> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Denton Liu <liu.denton@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-19 10:54:00 +00:00
check_stateless_delimiter(stateless_rpc, reader,
_("expected response end packet after ref listing"));
return list;
}
const char *parse_feature_value(const char *feature_list, const char *feature, size_t *lenp, size_t *offset)
{
v0 protocol: fix infinite loop when parsing multi-valued capabilities If Git's client-side parsing of an upload-pack response (so git-fetch or ls-remote) sees multiple instances of a single capability, it can enter an infinite loop due to a bug in advancing the "offset" parameter in the parser. This bug can't happen between a client and server of the same Git version. The client bug is in parse_feature_value() when the caller passes in an offset parameter. And that only happens when the v0 protocol is parsing "symref" and "object-format" capabilities, via next_server_feature_value(). But Git has never produced multiple object-format capabilities, and it stopped producing multiple symref values in d007dbf7d6 (Revert "upload-pack: send non-HEAD symbolic refs", 2013-11-18). However, upload-pack did produce multiple symref entries for a while, and they are valid. Plus other implementations, such as Dulwich will still do so. So we should handle them. And even if we do not expect it, it is obviously a bug for the parser to enter an infinite loop. The bug itself is pretty simple. Commit 2c6a403d96 (connect: add function to parse multiple v1 capability values, 2020-05-25) added the "offset" parameter, which is used as both an in- and out-parameter. When parsing the first "symref" capability, *offset will be 0 on input, and after parsing the capability, we set *offset to an index just past the value by taking a pointer difference "(value + end) - feature_list". But on the second call, now *offset is set to that larger index, which lets us skip past the first "symref" capability. However, we do so by incrementing feature_list. That means our pointer difference is now too small; it is counting from where we resumed parsing, not from the start of the original feature_list pointer. And because we incremented feature_list only inside our function, and not the caller, that increment is lost next time the function is called. One solution would be to account for those skipped bytes by incrementing *offset, rather than assigning to it. But wait, there's more! We also increment feature_list if we have a near-miss. Say we are looking for "symref" and find "almost-symref". In that case we'll point feature_list to the "y" in "almost-symref" and restart our search. But that again means our offset won't be correct, as it won't account for the bytes between the start of the string and that "y". So instead, let's just record the beginning of the feature_list string in a separate pointer that we never touch. That offset we take in and return is meant to be using that point as a base, and now we'll do so consistently. Since the bug can't be reproduced using the current version of git-upload-pack, we'll instead hard-code an input which triggers the problem. Before this patch it loops forever re-parsing the second symref entry. Now we check both that it finishes, and that it parses both entries correctly (a case we could not test at all before). We don't need to worry about testing v2 here; it communicates the capabilities in a completely different way, and doesn't use this code at all. There are tests earlier in t5512 that are meant to cover this (they don't, but we'll address that in a future patch). Reported-by: Jonas Haag <jonas@lophus.org> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-14 21:24:16 +00:00
const char *orig_start = feature_list;
size_t len;
if (!feature_list)
return NULL;
len = strlen(feature);
if (offset)
feature_list += *offset;
while (*feature_list) {
const char *found = strstr(feature_list, feature);
if (!found)
return NULL;
2012-08-14 01:59:27 +00:00
if (feature_list == found || isspace(found[-1])) {
const char *value = found + len;
/* feature with no value (e.g., "thin-pack") */
if (!*value || isspace(*value)) {
if (lenp)
*lenp = 0;
if (offset)
v0 protocol: fix infinite loop when parsing multi-valued capabilities If Git's client-side parsing of an upload-pack response (so git-fetch or ls-remote) sees multiple instances of a single capability, it can enter an infinite loop due to a bug in advancing the "offset" parameter in the parser. This bug can't happen between a client and server of the same Git version. The client bug is in parse_feature_value() when the caller passes in an offset parameter. And that only happens when the v0 protocol is parsing "symref" and "object-format" capabilities, via next_server_feature_value(). But Git has never produced multiple object-format capabilities, and it stopped producing multiple symref values in d007dbf7d6 (Revert "upload-pack: send non-HEAD symbolic refs", 2013-11-18). However, upload-pack did produce multiple symref entries for a while, and they are valid. Plus other implementations, such as Dulwich will still do so. So we should handle them. And even if we do not expect it, it is obviously a bug for the parser to enter an infinite loop. The bug itself is pretty simple. Commit 2c6a403d96 (connect: add function to parse multiple v1 capability values, 2020-05-25) added the "offset" parameter, which is used as both an in- and out-parameter. When parsing the first "symref" capability, *offset will be 0 on input, and after parsing the capability, we set *offset to an index just past the value by taking a pointer difference "(value + end) - feature_list". But on the second call, now *offset is set to that larger index, which lets us skip past the first "symref" capability. However, we do so by incrementing feature_list. That means our pointer difference is now too small; it is counting from where we resumed parsing, not from the start of the original feature_list pointer. And because we incremented feature_list only inside our function, and not the caller, that increment is lost next time the function is called. One solution would be to account for those skipped bytes by incrementing *offset, rather than assigning to it. But wait, there's more! We also increment feature_list if we have a near-miss. Say we are looking for "symref" and find "almost-symref". In that case we'll point feature_list to the "y" in "almost-symref" and restart our search. But that again means our offset won't be correct, as it won't account for the bytes between the start of the string and that "y". So instead, let's just record the beginning of the feature_list string in a separate pointer that we never touch. That offset we take in and return is meant to be using that point as a base, and now we'll do so consistently. Since the bug can't be reproduced using the current version of git-upload-pack, we'll instead hard-code an input which triggers the problem. Before this patch it loops forever re-parsing the second symref entry. Now we check both that it finishes, and that it parses both entries correctly (a case we could not test at all before). We don't need to worry about testing v2 here; it communicates the capabilities in a completely different way, and doesn't use this code at all. There are tests earlier in t5512 that are meant to cover this (they don't, but we'll address that in a future patch). Reported-by: Jonas Haag <jonas@lophus.org> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-14 21:24:16 +00:00
*offset = found + len - orig_start;
2012-08-14 01:59:27 +00:00
return value;
}
/* feature with a value (e.g., "agent=git/1.2.3") */
else if (*value == '=') {
size_t end;
2012-08-14 01:59:27 +00:00
value++;
end = strcspn(value, " \t\n");
2012-08-14 01:59:27 +00:00
if (lenp)
*lenp = end;
if (offset)
v0 protocol: fix infinite loop when parsing multi-valued capabilities If Git's client-side parsing of an upload-pack response (so git-fetch or ls-remote) sees multiple instances of a single capability, it can enter an infinite loop due to a bug in advancing the "offset" parameter in the parser. This bug can't happen between a client and server of the same Git version. The client bug is in parse_feature_value() when the caller passes in an offset parameter. And that only happens when the v0 protocol is parsing "symref" and "object-format" capabilities, via next_server_feature_value(). But Git has never produced multiple object-format capabilities, and it stopped producing multiple symref values in d007dbf7d6 (Revert "upload-pack: send non-HEAD symbolic refs", 2013-11-18). However, upload-pack did produce multiple symref entries for a while, and they are valid. Plus other implementations, such as Dulwich will still do so. So we should handle them. And even if we do not expect it, it is obviously a bug for the parser to enter an infinite loop. The bug itself is pretty simple. Commit 2c6a403d96 (connect: add function to parse multiple v1 capability values, 2020-05-25) added the "offset" parameter, which is used as both an in- and out-parameter. When parsing the first "symref" capability, *offset will be 0 on input, and after parsing the capability, we set *offset to an index just past the value by taking a pointer difference "(value + end) - feature_list". But on the second call, now *offset is set to that larger index, which lets us skip past the first "symref" capability. However, we do so by incrementing feature_list. That means our pointer difference is now too small; it is counting from where we resumed parsing, not from the start of the original feature_list pointer. And because we incremented feature_list only inside our function, and not the caller, that increment is lost next time the function is called. One solution would be to account for those skipped bytes by incrementing *offset, rather than assigning to it. But wait, there's more! We also increment feature_list if we have a near-miss. Say we are looking for "symref" and find "almost-symref". In that case we'll point feature_list to the "y" in "almost-symref" and restart our search. But that again means our offset won't be correct, as it won't account for the bytes between the start of the string and that "y". So instead, let's just record the beginning of the feature_list string in a separate pointer that we never touch. That offset we take in and return is meant to be using that point as a base, and now we'll do so consistently. Since the bug can't be reproduced using the current version of git-upload-pack, we'll instead hard-code an input which triggers the problem. Before this patch it loops forever re-parsing the second symref entry. Now we check both that it finishes, and that it parses both entries correctly (a case we could not test at all before). We don't need to worry about testing v2 here; it communicates the capabilities in a completely different way, and doesn't use this code at all. There are tests earlier in t5512 that are meant to cover this (they don't, but we'll address that in a future patch). Reported-by: Jonas Haag <jonas@lophus.org> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-14 21:24:16 +00:00
*offset = value + end - orig_start;
2012-08-14 01:59:27 +00:00
return value;
}
/*
* otherwise we matched a substring of another feature;
* keep looking
*/
}
feature_list = found + 1;
}
return NULL;
}
int server_supports_hash(const char *desired, int *feature_supported)
{
size_t offset = 0;
size_t len;
const char *hash;
hash = next_server_feature_value("object-format", &len, &offset);
if (feature_supported)
*feature_supported = !!hash;
if (!hash) {
hash = hash_algos[GIT_HASH_SHA1].name;
len = strlen(hash);
}
while (hash) {
if (!xstrncmpz(desired, hash, len))
return 1;
hash = next_server_feature_value("object-format", &len, &offset);
}
return 0;
}
2012-08-14 01:59:27 +00:00
int parse_feature_request(const char *feature_list, const char *feature)
{
return !!parse_feature_value(feature_list, feature, NULL, NULL);
}
static const char *next_server_feature_value(const char *feature, size_t *len, size_t *offset)
{
return parse_feature_value(server_capabilities_v1, feature, len, offset);
2012-08-14 01:59:27 +00:00
}
const char *server_feature_value(const char *feature, size_t *len)
2012-08-14 01:59:27 +00:00
{
return parse_feature_value(server_capabilities_v1, feature, len, NULL);
2012-08-14 01:59:27 +00:00
}
int server_supports(const char *feature)
{
return !!server_feature_value(feature, NULL);
}
enum protocol {
PROTO_LOCAL = 1,
PROTO_FILE,
PROTO_SSH,
PROTO_GIT
};
int url_is_local_not_ssh(const char *url)
{
const char *colon = strchr(url, ':');
const char *slash = strchr(url, '/');
return !colon || (slash && slash < colon) ||
mingw: handle `subst`-ed "DOS drives" Over a decade ago, in 25fe217b86c (Windows: Treat Windows style path names., 2008-03-05), Git was taught to handle absolute Windows paths, i.e. paths that start with a drive letter and a colon. Unbeknownst to us, while drive letters of physical drives are limited to letters of the English alphabet, there is a way to assign virtual drive letters to arbitrary directories, via the `subst` command, which is _not_ limited to English letters. It is therefore possible to have absolute Windows paths of the form `1:\what\the\hex.txt`. Even "better": pretty much arbitrary Unicode letters can also be used, e.g. `ä:\tschibät.sch`. While it can be sensibly argued that users who set up such funny drive letters really seek adverse consequences, the Windows Operating System is known to be a platform where many users are at the mercy of administrators who have their very own idea of what constitutes a reasonable setup. Therefore, let's just make sure that such funny paths are still considered absolute paths by Git, on Windows. In addition to Unicode characters, pretty much any character is a valid drive letter, as far as `subst` is concerned, even `:` and `"` or even a space character. While it is probably the opposite of smart to use them, let's safeguard `is_dos_drive_prefix()` against all of them. Note: `[::1]:repo` is a valid URL, but not a valid path on Windows. As `[` is now considered a valid drive letter, we need to be very careful to avoid misinterpreting such a string as valid local path in `url_is_local_not_ssh()`. To do that, we use the just-introduced function `is_valid_path()` (which will label the string as invalid file name because of the colon characters). This fixes CVE-2019-1351. Reported-by: Nicolas Joly <Nicolas.Joly@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-09-05 22:09:10 +00:00
(has_dos_drive_prefix(url) && is_valid_path(url));
}
static const char *prot_name(enum protocol protocol)
{
switch (protocol) {
case PROTO_LOCAL:
case PROTO_FILE:
return "file";
case PROTO_SSH:
return "ssh";
case PROTO_GIT:
return "git";
default:
return "unknown protocol";
}
}
static enum protocol get_protocol(const char *name)
{
if (!strcmp(name, "ssh"))
return PROTO_SSH;
if (!strcmp(name, "git"))
return PROTO_GIT;
if (!strcmp(name, "git+ssh")) /* deprecated - do not use */
return PROTO_SSH;
if (!strcmp(name, "ssh+git")) /* deprecated - do not use */
return PROTO_SSH;
if (!strcmp(name, "file"))
return PROTO_FILE;
die(_("protocol '%s' is not supported"), name);
}
static char *host_end(char **hoststart, int removebrackets)
{
char *host = *hoststart;
char *end;
char *start = strstr(host, "@[");
if (start)
start++; /* Jump over '@' */
else
start = host;
if (start[0] == '[') {
end = strchr(start + 1, ']');
if (end) {
if (removebrackets) {
*end = 0;
memmove(start, start + 1, end - start);
end++;
}
} else
end = host;
} else
end = host;
return end;
}
#define STR_(s) # s
#define STR(s) STR_(s)
static void get_host_and_port(char **host, const char **port)
{
char *colon, *end;
end = host_end(host, 1);
colon = strchr(end, ':');
if (colon) {
long portnr = strtol(colon + 1, &end, 10);
if (end != colon + 1 && *end == '\0' && 0 <= portnr && portnr < 65536) {
*colon = 0;
*port = colon + 1;
} else if (!colon[1]) {
*colon = 0;
}
}
}
static void enable_keepalive(int sockfd)
{
int ka = 1;
if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0)
error_errno(_("unable to set SO_KEEPALIVE on socket"));
}
#ifndef NO_IPV6
static const char *ai_name(const struct addrinfo *ai)
{
static char addr[NI_MAXHOST];
if (getnameinfo(ai->ai_addr, ai->ai_addrlen, addr, sizeof(addr), NULL, 0,
NI_NUMERICHOST) != 0)
xsnprintf(addr, sizeof(addr), "(unknown)");
return addr;
}
/*
* Returns a connected socket() fd, or else die()s.
*/
static int git_tcp_connect_sock(char *host, int flags)
{
struct strbuf error_message = STRBUF_INIT;
int sockfd = -1;
const char *port = STR(DEFAULT_GIT_PORT);
struct addrinfo hints, *ai0, *ai;
int gai;
int cnt = 0;
get_host_and_port(&host, &port);
if (!*port)
port = "<none>";
memset(&hints, 0, sizeof(hints));
if (flags & CONNECT_IPV4)
hints.ai_family = AF_INET;
else if (flags & CONNECT_IPV6)
hints.ai_family = AF_INET6;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
if (flags & CONNECT_VERBOSE)
fprintf(stderr, _("Looking up %s ... "), host);
gai = getaddrinfo(host, port, &hints, &ai);
if (gai)
die(_("unable to look up %s (port %s) (%s)"), host, port, gai_strerror(gai));
if (flags & CONNECT_VERBOSE)
/* TRANSLATORS: this is the end of "Looking up %s ... " */
fprintf(stderr, _("done.\nConnecting to %s (port %s) ... "), host, port);
for (ai0 = ai; ai; ai = ai->ai_next, cnt++) {
sockfd = socket(ai->ai_family,
ai->ai_socktype, ai->ai_protocol);
if ((sockfd < 0) ||
(connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0)) {
strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
host, cnt, ai_name(ai), strerror(errno));
if (0 <= sockfd)
close(sockfd);
sockfd = -1;
continue;
}
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "%s ", ai_name(ai));
break;
}
freeaddrinfo(ai0);
if (sockfd < 0)
die(_("unable to connect to %s:\n%s"), host, error_message.buf);
enable_keepalive(sockfd);
if (flags & CONNECT_VERBOSE)
/* TRANSLATORS: this is the end of "Connecting to %s (port %s) ... " */
fprintf_ln(stderr, _("done."));
strbuf_release(&error_message);
return sockfd;
}
#else /* NO_IPV6 */
/*
* Returns a connected socket() fd, or else die()s.
*/
static int git_tcp_connect_sock(char *host, int flags)
{
struct strbuf error_message = STRBUF_INIT;
int sockfd = -1;
const char *port = STR(DEFAULT_GIT_PORT);
char *ep;
struct hostent *he;
struct sockaddr_in sa;
char **ap;
unsigned int nport;
int cnt;
get_host_and_port(&host, &port);
if (flags & CONNECT_VERBOSE)
fprintf(stderr, _("Looking up %s ... "), host);
he = gethostbyname(host);
if (!he)
die(_("unable to look up %s (%s)"), host, hstrerror(h_errno));
nport = strtoul(port, &ep, 10);
if ( ep == port || *ep ) {
/* Not numeric */
struct servent *se = getservbyname(port,"tcp");
if ( !se )
die(_("unknown port %s"), port);
nport = se->s_port;
}
if (flags & CONNECT_VERBOSE)
/* TRANSLATORS: this is the end of "Looking up %s ... " */
fprintf(stderr, _("done.\nConnecting to %s (port %s) ... "), host, port);
for (cnt = 0, ap = he->h_addr_list; *ap; ap++, cnt++) {
memset(&sa, 0, sizeof sa);
sa.sin_family = he->h_addrtype;
2005-09-29 00:26:44 +00:00
sa.sin_port = htons(nport);
memcpy(&sa.sin_addr, *ap, he->h_length);
sockfd = socket(he->h_addrtype, SOCK_STREAM, 0);
if ((sockfd < 0) ||
connect(sockfd, (struct sockaddr *)&sa, sizeof sa) < 0) {
strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
host,
cnt,
inet_ntoa(*(struct in_addr *)&sa.sin_addr),
strerror(errno));
if (0 <= sockfd)
close(sockfd);
sockfd = -1;
continue;
}
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "%s ",
inet_ntoa(*(struct in_addr *)&sa.sin_addr));
break;
}
if (sockfd < 0)
die(_("unable to connect to %s:\n%s"), host, error_message.buf);
enable_keepalive(sockfd);
if (flags & CONNECT_VERBOSE)
/* TRANSLATORS: this is the end of "Connecting to %s (port %s) ... " */
fprintf_ln(stderr, _("done."));
return sockfd;
}
#endif /* NO_IPV6 */
/*
* Dummy child_process returned by git_connect() if the transport protocol
* does not need fork(2).
*/
static struct child_process no_fork = CHILD_PROCESS_INIT;
int git_connection_is_socket(struct child_process *conn)
{
return conn == &no_fork;
}
static struct child_process *git_tcp_connect(int fd[2], char *host, int flags)
{
int sockfd = git_tcp_connect_sock(host, flags);
fd[0] = sockfd;
fd[1] = dup(sockfd);
return &no_fork;
}
static char *git_proxy_command;
static int git_proxy_command_options(const char *var, const char *value,
config: add ctx arg to config_fn_t Add a new "const struct config_context *ctx" arg to config_fn_t to hold additional information about the config iteration operation. config_context has a "struct key_value_info kvi" member that holds metadata about the config source being read (e.g. what kind of config source it is, the filename, etc). In this series, we're only interested in .kvi, so we could have just used "struct key_value_info" as an arg, but config_context makes it possible to add/adjust members in the future without changing the config_fn_t signature. We could also consider other ways of organizing the args (e.g. moving the config name and value into config_context or key_value_info), but in my experiments, the incremental benefit doesn't justify the added complexity (e.g. a config_fn_t will sometimes invoke another config_fn_t but with a different config value). In subsequent commits, the .kvi member will replace the global "struct config_reader" in config.c, making config iteration a global-free operation. It requires much more work for the machinery to provide meaningful values of .kvi, so for now, merely change the signature and call sites, pass NULL as a placeholder value, and don't rely on the arg in any meaningful way. Most of the changes are performed by contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every config_fn_t: - Modifies the signature to accept "const struct config_context *ctx" - Passes "ctx" to any inner config_fn_t, if needed - Adds UNUSED attributes to "ctx", if needed Most config_fn_t instances are easily identified by seeing if they are called by the various config functions. Most of the remaining ones are manually named in the .cocci patch. Manual cleanups are still needed, but the majority of it is trivial; it's either adjusting config_fn_t that the .cocci patch didn't catch, or adding forward declarations of "struct config_context ctx" to make the signatures make sense. The non-trivial changes are in cases where we are invoking a config_fn_t outside of config machinery, and we now need to decide what value of "ctx" to pass. These cases are: - trace2/tr2_cfg.c:tr2_cfg_set_fl() This is indirectly called by git_config_set() so that the trace2 machinery can notice the new config values and update its settings using the tr2 config parsing function, i.e. tr2_cfg_cb(). - builtin/checkout.c:checkout_main() This calls git_xmerge_config() as a shorthand for parsing a CLI arg. This might be worth refactoring away in the future, since git_xmerge_config() can call git_default_config(), which can do much more than just parsing. Handle them by creating a KVI_INIT macro that initializes "struct key_value_info" to a reasonable default, and use that to construct the "ctx" arg. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
const struct config_context *ctx, void *cb)
{
if (!strcmp(var, "core.gitproxy")) {
const char *for_pos;
int matchlen = -1;
int hostlen;
const char *rhost_name = cb;
int rhost_len = strlen(rhost_name);
if (git_proxy_command)
return 0;
if (!value)
return config_error_nonbool(var);
/* [core]
* ;# matches www.kernel.org as well
* gitproxy = netcatter-1 for kernel.org
* gitproxy = netcatter-2 for sample.xz
* gitproxy = netcatter-default
*/
for_pos = strstr(value, " for ");
if (!for_pos)
/* matches everybody */
matchlen = strlen(value);
else {
hostlen = strlen(for_pos + 5);
if (rhost_len < hostlen)
matchlen = -1;
else if (!strncmp(for_pos + 5,
rhost_name + rhost_len - hostlen,
hostlen) &&
((rhost_len == hostlen) ||
rhost_name[rhost_len - hostlen -1] == '.'))
matchlen = for_pos - value;
else
matchlen = -1;
}
if (0 <= matchlen) {
/* core.gitproxy = none for kernel.org */
if (matchlen == 4 &&
!memcmp(value, "none", 4))
matchlen = 0;
git_proxy_command = xmemdupz(value, matchlen);
}
return 0;
}
config: add ctx arg to config_fn_t Add a new "const struct config_context *ctx" arg to config_fn_t to hold additional information about the config iteration operation. config_context has a "struct key_value_info kvi" member that holds metadata about the config source being read (e.g. what kind of config source it is, the filename, etc). In this series, we're only interested in .kvi, so we could have just used "struct key_value_info" as an arg, but config_context makes it possible to add/adjust members in the future without changing the config_fn_t signature. We could also consider other ways of organizing the args (e.g. moving the config name and value into config_context or key_value_info), but in my experiments, the incremental benefit doesn't justify the added complexity (e.g. a config_fn_t will sometimes invoke another config_fn_t but with a different config value). In subsequent commits, the .kvi member will replace the global "struct config_reader" in config.c, making config iteration a global-free operation. It requires much more work for the machinery to provide meaningful values of .kvi, so for now, merely change the signature and call sites, pass NULL as a placeholder value, and don't rely on the arg in any meaningful way. Most of the changes are performed by contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every config_fn_t: - Modifies the signature to accept "const struct config_context *ctx" - Passes "ctx" to any inner config_fn_t, if needed - Adds UNUSED attributes to "ctx", if needed Most config_fn_t instances are easily identified by seeing if they are called by the various config functions. Most of the remaining ones are manually named in the .cocci patch. Manual cleanups are still needed, but the majority of it is trivial; it's either adjusting config_fn_t that the .cocci patch didn't catch, or adding forward declarations of "struct config_context ctx" to make the signatures make sense. The non-trivial changes are in cases where we are invoking a config_fn_t outside of config machinery, and we now need to decide what value of "ctx" to pass. These cases are: - trace2/tr2_cfg.c:tr2_cfg_set_fl() This is indirectly called by git_config_set() so that the trace2 machinery can notice the new config values and update its settings using the tr2 config parsing function, i.e. tr2_cfg_cb(). - builtin/checkout.c:checkout_main() This calls git_xmerge_config() as a shorthand for parsing a CLI arg. This might be worth refactoring away in the future, since git_xmerge_config() can call git_default_config(), which can do much more than just parsing. Handle them by creating a KVI_INIT macro that initializes "struct key_value_info" to a reasonable default, and use that to construct the "ctx" arg. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
return git_default_config(var, value, ctx, cb);
}
static int git_use_proxy(const char *host)
{
git_proxy_command = getenv("GIT_PROXY_COMMAND");
git_config(git_proxy_command_options, (void*)host);
return (git_proxy_command && *git_proxy_command);
}
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 06:46:07 +00:00
static struct child_process *git_proxy_connect(int fd[2], char *host)
{
const char *port = STR(DEFAULT_GIT_PORT);
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 06:46:07 +00:00
struct child_process *proxy;
get_host_and_port(&host, &port);
if (looks_like_command_line_option(host))
die(_("strange hostname '%s' blocked"), host);
if (looks_like_command_line_option(port))
die(_("strange port '%s' blocked"), port);
proxy = xmalloc(sizeof(*proxy));
child_process_init(proxy);
strvec_push(&proxy->args, git_proxy_command);
strvec_push(&proxy->args, host);
strvec_push(&proxy->args, port);
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 06:46:07 +00:00
proxy->in = -1;
proxy->out = -1;
if (start_command(proxy))
die(_("cannot start proxy %s"), git_proxy_command);
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 06:46:07 +00:00
fd[0] = proxy->out; /* read from proxy stdout */
fd[1] = proxy->in; /* write to proxy stdin */
return proxy;
}
static char *get_port(char *host)
{
char *end;
char *p = strchr(host, ':');
if (p) {
long port = strtol(p + 1, &end, 10);
if (end != p + 1 && *end == '\0' && 0 <= port && port < 65536) {
*p = '\0';
return p+1;
}
}
return NULL;
}
/*
* Extract protocol and relevant parts from the specified connection URL.
* The caller must free() the returned strings.
*/
static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
char **ret_path)
{
char *url;
char *host, *path;
char *end;
int separator = '/';
enum protocol protocol = PROTO_LOCAL;
if (is_url(url_orig))
url = url_decode(url_orig);
else
url = xstrdup(url_orig);
host = strstr(url, "://");
if (host) {
*host = '\0';
protocol = get_protocol(url);
host += 3;
} else {
host = url;
if (!url_is_local_not_ssh(url)) {
protocol = PROTO_SSH;
separator = ':';
}
}
/*
* Don't do destructive transforms as protocol code does
* '[]' unwrapping in get_host_and_port()
*/
end = host_end(&host, 0);
if (protocol == PROTO_LOCAL)
path = end;
else if (protocol == PROTO_FILE && *host != '/' &&
!has_dos_drive_prefix(host) &&
offset_1st_component(host - 2) > 1)
path = host - 2; /* include the leading "//" */
else if (protocol == PROTO_FILE && has_dos_drive_prefix(end))
path = end; /* "file://$(pwd)" may be "file://C:/projects/repo" */
else
path = strchr(end, separator);
if (!path || !*path)
die(_("no path specified; see 'git help pull' for valid url syntax"));
/*
* null-terminate hostname and point path to ~ for URL's like this:
* ssh://host.xz/~user/repo
*/
end = path; /* Need to \0 terminate host here */
if (separator == ':')
path++; /* path starts after ':' */
if (protocol == PROTO_GIT || protocol == PROTO_SSH) {
if (path[1] == '~')
path++;
}
path = xstrdup(path);
*end = '\0';
*ret_host = xstrdup(host);
*ret_path = path;
free(url);
return protocol;
}
static const char *get_ssh_command(void)
{
const char *ssh;
if ((ssh = getenv("GIT_SSH_COMMAND")))
return ssh;
config: fix leaks from git_config_get_string_const() There are two functions to get a single config string: - git_config_get_string() - git_config_get_string_const() One might naively think that the first one allocates a new string and the second one just points us to the internal configset storage. But in fact they both allocate a new copy; the second one exists only to avoid having to cast when using it with a const global which we never intend to free. The documentation for the function explains that clearly, but it seems I'm not alone in being surprised by this. Of 17 calls to the function, 13 of them leak the resulting value. We could obviously fix these by adding the appropriate free(). But it would be simpler still if we actually had a non-allocating way to get the string. There's git_config_get_value() but that doesn't quite do what we want. If the config key is present but is a boolean with no value (e.g., "[foo]bar" in the file), then we'll get NULL (whereas the string versions will print an error and die). So let's introduce a new variant, git_config_get_string_tmp(), that behaves as these callers expect. We need a new name because we have new semantics but the same function signature (so even if we converted the four remaining callers, topics in flight might be surprised). The "tmp" is because this value should only be held onto for a short time. In practice it's rare for us to clear and refresh the configset, invalidating the pointer, but hopefully the "tmp" makes callers think about the lifetime. In each of the converted cases here the value only needs to last within the local function or its immediate caller. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-14 16:17:36 +00:00
if (!git_config_get_string_tmp("core.sshcommand", &ssh))
return ssh;
return NULL;
}
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
enum ssh_variant {
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
VARIANT_AUTO,
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
VARIANT_SIMPLE,
VARIANT_SSH,
VARIANT_PLINK,
VARIANT_PUTTY,
VARIANT_TORTOISEPLINK,
};
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
static void override_ssh_variant(enum ssh_variant *ssh_variant)
{
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
const char *variant = getenv("GIT_SSH_VARIANT");
connect.c: stop conflating ssh command names and overrides dd33e07766 ("connect: Add the envvar GIT_SSH_VARIANT and ssh.variant config", 2017-02-01) attempted to add support for configuration and environment variable to override the different handling of port_option and needs_batch settings suitable for variants of the ssh implementation that was autodetected by looking at the ssh command name. Because it piggybacked on the code that turns command name to specific override (e.g. "plink.exe" and "plink" means port_option needs to be set to 'P' instead of the default 'p'), yet it defined a separate namespace for these overrides (e.g. "putty" can be usable to signal that port_option needs to be 'P'), however, it made the auto-detection based on the command name less robust (e.g. the code now accepts "putty" as a SSH command name and applies the same override). Separate the code that interprets the override that was read from the configuration & environment from the original code that handles the command names, as they are in separate namespaces, to fix this confusion. This incidentally also makes it easier for future enhancement of the override syntax (e.g. "port_option=p,needs_batch=1" may want to be accepted as a more explicit syntax) without affecting the code for auto-detection based on the command name. While at it, update the return type of the handle_ssh_variant() helper function to void; the caller does not use it, and the function does not return any meaningful value. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-09 17:20:25 +00:00
config: fix leaks from git_config_get_string_const() There are two functions to get a single config string: - git_config_get_string() - git_config_get_string_const() One might naively think that the first one allocates a new string and the second one just points us to the internal configset storage. But in fact they both allocate a new copy; the second one exists only to avoid having to cast when using it with a const global which we never intend to free. The documentation for the function explains that clearly, but it seems I'm not alone in being surprised by this. Of 17 calls to the function, 13 of them leak the resulting value. We could obviously fix these by adding the appropriate free(). But it would be simpler still if we actually had a non-allocating way to get the string. There's git_config_get_value() but that doesn't quite do what we want. If the config key is present but is a boolean with no value (e.g., "[foo]bar" in the file), then we'll get NULL (whereas the string versions will print an error and die). So let's introduce a new variant, git_config_get_string_tmp(), that behaves as these callers expect. We need a new name because we have new semantics but the same function signature (so even if we converted the four remaining callers, topics in flight might be surprised). The "tmp" is because this value should only be held onto for a short time. In practice it's rare for us to clear and refresh the configset, invalidating the pointer, but hopefully the "tmp" makes callers think about the lifetime. In each of the converted cases here the value only needs to last within the local function or its immediate caller. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-14 16:17:36 +00:00
if (!variant && git_config_get_string_tmp("ssh.variant", &variant))
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
return;
connect.c: stop conflating ssh command names and overrides dd33e07766 ("connect: Add the envvar GIT_SSH_VARIANT and ssh.variant config", 2017-02-01) attempted to add support for configuration and environment variable to override the different handling of port_option and needs_batch settings suitable for variants of the ssh implementation that was autodetected by looking at the ssh command name. Because it piggybacked on the code that turns command name to specific override (e.g. "plink.exe" and "plink" means port_option needs to be set to 'P' instead of the default 'p'), yet it defined a separate namespace for these overrides (e.g. "putty" can be usable to signal that port_option needs to be 'P'), however, it made the auto-detection based on the command name less robust (e.g. the code now accepts "putty" as a SSH command name and applies the same override). Separate the code that interprets the override that was read from the configuration & environment from the original code that handles the command names, as they are in separate namespaces, to fix this confusion. This incidentally also makes it easier for future enhancement of the override syntax (e.g. "port_option=p,needs_batch=1" may want to be accepted as a more explicit syntax) without affecting the code for auto-detection based on the command name. While at it, update the return type of the handle_ssh_variant() helper function to void; the caller does not use it, and the function does not return any meaningful value. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-09 17:20:25 +00:00
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
if (!strcmp(variant, "auto"))
*ssh_variant = VARIANT_AUTO;
else if (!strcmp(variant, "plink"))
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
*ssh_variant = VARIANT_PLINK;
else if (!strcmp(variant, "putty"))
*ssh_variant = VARIANT_PUTTY;
else if (!strcmp(variant, "tortoiseplink"))
*ssh_variant = VARIANT_TORTOISEPLINK;
else if (!strcmp(variant, "simple"))
*ssh_variant = VARIANT_SIMPLE;
else
*ssh_variant = VARIANT_SSH;
connect.c: stop conflating ssh command names and overrides dd33e07766 ("connect: Add the envvar GIT_SSH_VARIANT and ssh.variant config", 2017-02-01) attempted to add support for configuration and environment variable to override the different handling of port_option and needs_batch settings suitable for variants of the ssh implementation that was autodetected by looking at the ssh command name. Because it piggybacked on the code that turns command name to specific override (e.g. "plink.exe" and "plink" means port_option needs to be set to 'P' instead of the default 'p'), yet it defined a separate namespace for these overrides (e.g. "putty" can be usable to signal that port_option needs to be 'P'), however, it made the auto-detection based on the command name less robust (e.g. the code now accepts "putty" as a SSH command name and applies the same override). Separate the code that interprets the override that was read from the configuration & environment from the original code that handles the command names, as they are in separate namespaces, to fix this confusion. This incidentally also makes it easier for future enhancement of the override syntax (e.g. "port_option=p,needs_batch=1" may want to be accepted as a more explicit syntax) without affecting the code for auto-detection based on the command name. While at it, update the return type of the handle_ssh_variant() helper function to void; the caller does not use it, and the function does not return any meaningful value. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-09 17:20:25 +00:00
}
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
static enum ssh_variant determine_ssh_variant(const char *ssh_command,
int is_cmdline)
connect.c: stop conflating ssh command names and overrides dd33e07766 ("connect: Add the envvar GIT_SSH_VARIANT and ssh.variant config", 2017-02-01) attempted to add support for configuration and environment variable to override the different handling of port_option and needs_batch settings suitable for variants of the ssh implementation that was autodetected by looking at the ssh command name. Because it piggybacked on the code that turns command name to specific override (e.g. "plink.exe" and "plink" means port_option needs to be set to 'P' instead of the default 'p'), yet it defined a separate namespace for these overrides (e.g. "putty" can be usable to signal that port_option needs to be 'P'), however, it made the auto-detection based on the command name less robust (e.g. the code now accepts "putty" as a SSH command name and applies the same override). Separate the code that interprets the override that was read from the configuration & environment from the original code that handles the command names, as they are in separate namespaces, to fix this confusion. This incidentally also makes it easier for future enhancement of the override syntax (e.g. "port_option=p,needs_batch=1" may want to be accepted as a more explicit syntax) without affecting the code for auto-detection based on the command name. While at it, update the return type of the handle_ssh_variant() helper function to void; the caller does not use it, and the function does not return any meaningful value. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-09 17:20:25 +00:00
{
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
enum ssh_variant ssh_variant = VARIANT_AUTO;
connect.c: stop conflating ssh command names and overrides dd33e07766 ("connect: Add the envvar GIT_SSH_VARIANT and ssh.variant config", 2017-02-01) attempted to add support for configuration and environment variable to override the different handling of port_option and needs_batch settings suitable for variants of the ssh implementation that was autodetected by looking at the ssh command name. Because it piggybacked on the code that turns command name to specific override (e.g. "plink.exe" and "plink" means port_option needs to be set to 'P' instead of the default 'p'), yet it defined a separate namespace for these overrides (e.g. "putty" can be usable to signal that port_option needs to be 'P'), however, it made the auto-detection based on the command name less robust (e.g. the code now accepts "putty" as a SSH command name and applies the same override). Separate the code that interprets the override that was read from the configuration & environment from the original code that handles the command names, as they are in separate namespaces, to fix this confusion. This incidentally also makes it easier for future enhancement of the override syntax (e.g. "port_option=p,needs_batch=1" may want to be accepted as a more explicit syntax) without affecting the code for auto-detection based on the command name. While at it, update the return type of the handle_ssh_variant() helper function to void; the caller does not use it, and the function does not return any meaningful value. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-09 17:20:25 +00:00
const char *variant;
char *p = NULL;
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
override_ssh_variant(&ssh_variant);
if (ssh_variant != VARIANT_AUTO)
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
return ssh_variant;
connect.c: stop conflating ssh command names and overrides dd33e07766 ("connect: Add the envvar GIT_SSH_VARIANT and ssh.variant config", 2017-02-01) attempted to add support for configuration and environment variable to override the different handling of port_option and needs_batch settings suitable for variants of the ssh implementation that was autodetected by looking at the ssh command name. Because it piggybacked on the code that turns command name to specific override (e.g. "plink.exe" and "plink" means port_option needs to be set to 'P' instead of the default 'p'), yet it defined a separate namespace for these overrides (e.g. "putty" can be usable to signal that port_option needs to be 'P'), however, it made the auto-detection based on the command name less robust (e.g. the code now accepts "putty" as a SSH command name and applies the same override). Separate the code that interprets the override that was read from the configuration & environment from the original code that handles the command names, as they are in separate namespaces, to fix this confusion. This incidentally also makes it easier for future enhancement of the override syntax (e.g. "port_option=p,needs_batch=1" may want to be accepted as a more explicit syntax) without affecting the code for auto-detection based on the command name. While at it, update the return type of the handle_ssh_variant() helper function to void; the caller does not use it, and the function does not return any meaningful value. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-09 17:20:25 +00:00
if (!is_cmdline) {
p = xstrdup(ssh_command);
variant = basename(p);
} else {
const char **ssh_argv;
p = xstrdup(ssh_command);
if (split_cmdline(p, &ssh_argv) > 0) {
variant = basename((char *)ssh_argv[0]);
/*
* At this point, variant points into the buffer
* referenced by p, hence we do not need ssh_argv
* any longer.
*/
free(ssh_argv);
} else {
free(p);
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
return ssh_variant;
}
}
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
if (!strcasecmp(variant, "ssh") ||
!strcasecmp(variant, "ssh.exe"))
ssh_variant = VARIANT_SSH;
else if (!strcasecmp(variant, "plink") ||
!strcasecmp(variant, "plink.exe"))
ssh_variant = VARIANT_PLINK;
else if (!strcasecmp(variant, "tortoiseplink") ||
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
!strcasecmp(variant, "tortoiseplink.exe"))
ssh_variant = VARIANT_TORTOISEPLINK;
free(p);
ssh: introduce a 'simple' ssh variant When using the 'ssh' transport, the '-o' option is used to specify an environment variable which should be set on the remote end. This allows git to send additional information when contacting the server, requesting the use of a different protocol version via the 'GIT_PROTOCOL' environment variable like so: "-o SendEnv=GIT_PROTOCOL". Unfortunately not all ssh variants support the sending of environment variables to the remote end. To account for this, only use the '-o' option for ssh variants which are OpenSSH compliant. This is done by checking that the basename of the ssh command is 'ssh' or the ssh variant is overridden to be 'ssh' (via the ssh.variant config). Other options like '-p' and '-P', which are used to specify a specific port to use, or '-4' and '-6', which are used to indicate that IPV4 or IPV6 addresses should be used, may also not be supported by all ssh variants. Currently if an ssh command's basename wasn't 'plink' or 'tortoiseplink' git assumes that the command is an OpenSSH variant. Since user configured ssh commands may not be OpenSSH compliant, tighten this constraint and assume a variant of 'simple' if the basename of the command doesn't match the variants known to git. The new ssh variant 'simple' will only have the host and command to execute ([username@]host command) passed as parameters to the ssh command. Update the Documentation to better reflect the command-line options sent to ssh commands based on their variant. Reported-by: Jeffrey Yasskin <jyasskin@google.com> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:31 +00:00
return ssh_variant;
}
/*
* Open a connection using Git's native protocol.
*
* The caller is responsible for freeing hostandport, but this function may
* modify it (for example, to truncate it to remove the port part).
*/
static struct child_process *git_connect_git(int fd[2], char *hostandport,
const char *path, const char *prog,
enum protocol_version version,
int flags)
{
struct child_process *conn;
struct strbuf request = STRBUF_INIT;
/*
* Set up virtual host information based on where we will
* connect, unless the user has overridden us in
* the environment.
*/
char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
if (target_host)
target_host = xstrdup(target_host);
else
target_host = xstrdup(hostandport);
transport_check_allowed("git");
git_connect_git(): forbid newlines in host and path When we connect to a git:// server, we send an initial request that looks something like: 002dgit-upload-pack repo.git\0host=example.com If the repo path contains a newline, then it's included literally, and we get: 002egit-upload-pack repo .git\0host=example.com This works fine if you really do have a newline in your repository name; the server side uses the pktline framing to parse the string, not newlines. However, there are many _other_ protocols in the wild that do parse on newlines, such as HTTP. So a carefully constructed git:// URL can actually turn into a valid HTTP request. For example: git://localhost:1234/%0d%0a%0d%0aGET%20/%20HTTP/1.1 %0d%0aHost:localhost%0d%0a%0d%0a becomes: 0050git-upload-pack / GET / HTTP/1.1 Host:localhost host=localhost:1234 on the wire. Again, this isn't a problem for a real Git server, but it does mean that feeding a malicious URL to Git (e.g., through a submodule) can cause it to make unexpected cross-protocol requests. Since repository names with newlines are presumably quite rare (and indeed, we already disallow them in git-over-http), let's just disallow them over this protocol. Hostnames could likewise inject a newline, but this is unlikely a problem in practice; we'd try resolving the hostname with a newline in it, which wouldn't work. Still, it doesn't hurt to err on the side of caution there, since we would not expect them to work in the first place. The ssh and local code paths are unaffected by this patch. In both cases we're trying to run upload-pack via a shell, and will quote the newline so that it makes it intact. An attacker can point an ssh url at an arbitrary port, of course, but unless there's an actual ssh server there, we'd never get as far as sending our shell command anyway. We _could_ similarly restrict newlines in those protocols out of caution, but there seems little benefit to doing so. The new test here is run alongside the git-daemon tests, which cover the same protocol, but it shouldn't actually contact the daemon at all. In theory we could make the test more robust by setting up an actual repository with a newline in it (so that our clone would succeed if our new check didn't kick in). But a repo directory with newline in it is likely not portable across all filesystems. Likewise, we could check git-daemon's log that it was not contacted at all, but we do not currently record the log (and anyway, it would make the test racy with the daemon's log write). We'll just check the client-side stderr to make sure we hit the expected code path. Reported-by: Harold Kim <h.kim@flatt.tech> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 09:43:58 +00:00
if (strchr(target_host, '\n') || strchr(path, '\n'))
die(_("newline is forbidden in git:// hosts and repo paths"));
/*
* These underlying connection commands die() if they
* cannot connect.
*/
if (git_use_proxy(hostandport))
conn = git_proxy_connect(fd, hostandport);
else
conn = git_tcp_connect(fd, hostandport, flags);
/*
* Separate original protocol components prog and path
* from extended host header with a NUL byte.
*
* Note: Do not add any other headers here! Doing so
* will cause older git-daemon servers to crash.
*/
strbuf_addf(&request,
"%s %s%chost=%s%c",
prog, path, 0,
target_host, 0);
/* If using a new version put that stuff here after a second null byte */
if (version > 0) {
strbuf_addch(&request, '\0');
strbuf_addf(&request, "version=%d%c",
version, '\0');
}
packet_write(fd[1], request.buf, request.len);
free(target_host);
strbuf_release(&request);
return conn;
}
/*
* Append the appropriate environment variables to `env` and options to
* `args` for running ssh in Git's SSH-tunneled transport.
*/
static void push_ssh_options(struct strvec *args, struct strvec *env,
enum ssh_variant variant, const char *port,
enum protocol_version version, int flags)
{
if (variant == VARIANT_SSH &&
version > 0) {
strvec_push(args, "-o");
strvec_push(args, "SendEnv=" GIT_PROTOCOL_ENVIRONMENT);
strvec_pushf(env, GIT_PROTOCOL_ENVIRONMENT "=version=%d",
version);
}
if (flags & CONNECT_IPV4) {
switch (variant) {
case VARIANT_AUTO:
BUG("VARIANT_AUTO passed to push_ssh_options");
case VARIANT_SIMPLE:
die(_("ssh variant 'simple' does not support -4"));
case VARIANT_SSH:
case VARIANT_PLINK:
case VARIANT_PUTTY:
case VARIANT_TORTOISEPLINK:
strvec_push(args, "-4");
}
} else if (flags & CONNECT_IPV6) {
switch (variant) {
case VARIANT_AUTO:
BUG("VARIANT_AUTO passed to push_ssh_options");
case VARIANT_SIMPLE:
die(_("ssh variant 'simple' does not support -6"));
case VARIANT_SSH:
case VARIANT_PLINK:
case VARIANT_PUTTY:
case VARIANT_TORTOISEPLINK:
strvec_push(args, "-6");
}
}
if (variant == VARIANT_TORTOISEPLINK)
strvec_push(args, "-batch");
if (port) {
switch (variant) {
case VARIANT_AUTO:
BUG("VARIANT_AUTO passed to push_ssh_options");
case VARIANT_SIMPLE:
die(_("ssh variant 'simple' does not support setting port"));
case VARIANT_SSH:
strvec_push(args, "-p");
break;
case VARIANT_PLINK:
case VARIANT_PUTTY:
case VARIANT_TORTOISEPLINK:
strvec_push(args, "-P");
}
strvec_push(args, port);
}
}
/* Prepare a child_process for use by Git's SSH-tunneled transport. */
static void fill_ssh_args(struct child_process *conn, const char *ssh_host,
const char *port, enum protocol_version version,
int flags)
{
const char *ssh;
enum ssh_variant variant;
if (looks_like_command_line_option(ssh_host))
die(_("strange hostname '%s' blocked"), ssh_host);
ssh = get_ssh_command();
if (ssh) {
variant = determine_ssh_variant(ssh, 1);
} else {
/*
* GIT_SSH is the no-shell version of
* GIT_SSH_COMMAND (and must remain so for
* historical compatibility).
*/
conn->use_shell = 0;
ssh = getenv("GIT_SSH");
if (!ssh)
ssh = "ssh";
variant = determine_ssh_variant(ssh, 0);
}
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
if (variant == VARIANT_AUTO) {
struct child_process detect = CHILD_PROCESS_INIT;
detect.use_shell = conn->use_shell;
detect.no_stdin = detect.no_stdout = detect.no_stderr = 1;
strvec_push(&detect.args, ssh);
strvec_push(&detect.args, "-G");
push_ssh_options(&detect.args, &detect.env,
VARIANT_SSH, port, version, flags);
strvec_push(&detect.args, ssh_host);
ssh: 'auto' variant to select between 'ssh' and 'simple' Android's "repo" tool is a tool for managing a large codebase consisting of multiple smaller repositories, similar to Git's submodule feature. Starting with Git 94b8ae5a (ssh: introduce a 'simple' ssh variant, 2017-10-16), users noticed that it stopped handling the port in ssh:// URLs. The cause: when it encounters ssh:// URLs, repo pre-connects to the server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses that connection. Before 94b8ae5a, the helper was assumed to support OpenSSH options for lack of a better guess and got passed a -p option to set the port. After that patch, it uses the new default of a simple helper that does not accept an option to set the port. The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid that. But users of old versions and of other similar GIT_SSH implementations would not get the benefit of that fix. So update the default to use OpenSSH options again, with a twist. As observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles OpenSSH options: common helpers such as travis-ci's dpl[*] are configured using GIT_SSH and do not accept OpenSSH options. So make the default a new variant "auto", with the following behavior: 1. First, check for a recognized basename, like today. 2. If the basename is not recognized, check whether $GIT_SSH supports OpenSSH options by running $GIT_SSH -G <options> <host> This returns status 0 and prints configuration in OpenSSH if it recognizes all <options> and returns status 255 if it encounters an unrecognized option. A wrapper script like exec ssh -- "$@" would fail with ssh: Could not resolve hostname -g: Name or service not known , correctly reflecting that it does not support OpenSSH options. The command is run with stdin, stdout, and stderr redirected to /dev/null so even a command that expects a terminal would exit immediately. 3. Based on the result from step (2), behave like "ssh" (if it succeeded) or "simple" (if it failed). This way, the default ssh variant for unrecognized commands can handle both the repo and dpl cases as intended. This autodetection has been running on Google workstations since 2017-10-23 with no reported negative effects. [*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215 Reported-by: William Yan <wyan@google.com> Improved-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-20 21:30:04 +00:00
variant = run_command(&detect) ? VARIANT_SIMPLE : VARIANT_SSH;
}
strvec_push(&conn->args, ssh);
push_ssh_options(&conn->args, &conn->env, variant, port, version,
flags);
strvec_push(&conn->args, ssh_host);
}
/*
* This returns the dummy child_process `no_fork` if the transport protocol
* does not need fork(2), or a struct child_process object if it does. Once
* done, finish the connection with finish_connect() with the value returned
* from this function (it is safe to call finish_connect() with NULL to
* support the former case).
*
* If it returns, the connect is successful; it just dies on errors (this
* will hopefully be changed in a libification effort, to return NULL when
* the connection failed).
*/
struct child_process *git_connect(int fd[2], const char *url,
git_connect(): fix corner cases in downgrading v2 to v0 There's code in git_connect() that checks whether we are doing a push with protocol_v2, and if so, drops us to protocol_v0 (since we know how to do v2 only for fetches). But it misses some corner cases: 1. it checks the "prog" variable, which is actually the path to receive-pack on the remote side. By default this is just "git-receive-pack", but it could be an arbitrary string (like "/path/to/git receive-pack", etc). We'd accidentally stay in v2 mode in this case. 2. besides "receive-pack" and "upload-pack", there's one other value we'd expect: "upload-archive" for handling "git archive --remote". Like receive-pack, this doesn't understand v2, and should use the v0 protocol. In practice, neither of these causes bugs in the real world so far. We do send a "we understand v2" probe to the server, but since no server implements v2 for anything but upload-pack, it's simply ignored. But this would eventually become a problem if we do implement v2 for those endpoints, as older clients would falsely claim to understand it, leading to a server response they can't parse. We can fix (1) by passing in both the program path and the "name" of the operation. I treat the name as a string here, because that's the pattern set in transport_connect(), which is one of our callers (we were simply throwing away the "name" value there before). We can fix (2) by allowing only known-v2 protocols ("upload-pack"), rather than blocking unknown ones ("receive-pack" and "upload-archive"). That will mean whoever eventually implements v2 push will have to adjust this list, but that's reasonable. We'll do the safe, conservative thing (sticking to v0) by default, and anybody working on v2 will quickly realize this spot needs to be updated. The new tests cover the receive-pack and upload-archive cases above, and re-confirm that we allow v2 with an arbitrary "--upload-pack" path (that already worked before this patch, of course, but it would be an easy thing to break if we flipped the allow/block logic without also handling "name" separately). Here are a few miscellaneous implementation notes, since I had to do a little head-scratching to understand who calls what: - transport_connect() is called only for git-upload-archive. For non-http git remotes, that resolves to the virtual connect_git() function (which then calls git_connect(); confused yet?). So plumbing through "name" in connect_git() covers that. - for regular fetches and pushes, callers use higher-level functions like transport_fetch_refs(). For non-http git remotes, that means calling git_connect() under the hood via connect_setup(). And that uses the "for_push" flag to decide which name to use. - likewise, plumbing like fetch-pack and send-pack may call git_connect() directly; they each know which name to use. - for remote helpers (including http), we already have separate parameters for "name" and "exec" (another name for "prog"). In process_connect_service(), we feed the "name" to the helper via "connect" or "stateless-connect" directives. There's also a "servpath" option, which can be used to tell the helper about the "exec" path. But no helpers we implement support it! For http it would be useless anyway (no reasonable server implementation will allow you to send a shell command to run the server). In theory it would be useful for more obscure helpers like remote-ext, but even there it is not implemented. It's tempting to get rid of it simply to reduce confusion, but we have publicly documented it since it was added in fa8c097cc9 (Support remote helpers implementing smart transports, 2009-12-09), so it's possible some helper in the wild is using it. - So for v2, helpers (again, including http) are mainly used via stateless-connect, driven by the main program. But they do still need to decide whether to do a v2 probe. And so there's similar logic in remote-curl.c's discover_refs() that looks for "git-receive-pack". But it's not buggy in the same way. Since it doesn't support servpath, it is always dealing with a "service" string like "git-receive-pack". And since it doesn't support straight "connect", it can't be used for "upload-archive". So we could leave that spot alone. But I've updated it here to match the logic we're changing in connect_git(). That seems like the least confusing thing for somebody who has to touch both of these spots later (say, to add v2 push support). I didn't add a new test to make sure this doesn't break anything; we already have several tests (in t5551 and elsewhere) that make sure we are using v2 over http. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-17 19:08:51 +00:00
const char *name,
const char *prog, int flags)
{
char *hostandport, *path;
struct child_process *conn;
enum protocol protocol;
enum protocol_version version = get_protocol_version_config();
/*
* NEEDSWORK: If we are trying to use protocol v2 and we are planning
git_connect(): fix corner cases in downgrading v2 to v0 There's code in git_connect() that checks whether we are doing a push with protocol_v2, and if so, drops us to protocol_v0 (since we know how to do v2 only for fetches). But it misses some corner cases: 1. it checks the "prog" variable, which is actually the path to receive-pack on the remote side. By default this is just "git-receive-pack", but it could be an arbitrary string (like "/path/to/git receive-pack", etc). We'd accidentally stay in v2 mode in this case. 2. besides "receive-pack" and "upload-pack", there's one other value we'd expect: "upload-archive" for handling "git archive --remote". Like receive-pack, this doesn't understand v2, and should use the v0 protocol. In practice, neither of these causes bugs in the real world so far. We do send a "we understand v2" probe to the server, but since no server implements v2 for anything but upload-pack, it's simply ignored. But this would eventually become a problem if we do implement v2 for those endpoints, as older clients would falsely claim to understand it, leading to a server response they can't parse. We can fix (1) by passing in both the program path and the "name" of the operation. I treat the name as a string here, because that's the pattern set in transport_connect(), which is one of our callers (we were simply throwing away the "name" value there before). We can fix (2) by allowing only known-v2 protocols ("upload-pack"), rather than blocking unknown ones ("receive-pack" and "upload-archive"). That will mean whoever eventually implements v2 push will have to adjust this list, but that's reasonable. We'll do the safe, conservative thing (sticking to v0) by default, and anybody working on v2 will quickly realize this spot needs to be updated. The new tests cover the receive-pack and upload-archive cases above, and re-confirm that we allow v2 with an arbitrary "--upload-pack" path (that already worked before this patch, of course, but it would be an easy thing to break if we flipped the allow/block logic without also handling "name" separately). Here are a few miscellaneous implementation notes, since I had to do a little head-scratching to understand who calls what: - transport_connect() is called only for git-upload-archive. For non-http git remotes, that resolves to the virtual connect_git() function (which then calls git_connect(); confused yet?). So plumbing through "name" in connect_git() covers that. - for regular fetches and pushes, callers use higher-level functions like transport_fetch_refs(). For non-http git remotes, that means calling git_connect() under the hood via connect_setup(). And that uses the "for_push" flag to decide which name to use. - likewise, plumbing like fetch-pack and send-pack may call git_connect() directly; they each know which name to use. - for remote helpers (including http), we already have separate parameters for "name" and "exec" (another name for "prog"). In process_connect_service(), we feed the "name" to the helper via "connect" or "stateless-connect" directives. There's also a "servpath" option, which can be used to tell the helper about the "exec" path. But no helpers we implement support it! For http it would be useless anyway (no reasonable server implementation will allow you to send a shell command to run the server). In theory it would be useful for more obscure helpers like remote-ext, but even there it is not implemented. It's tempting to get rid of it simply to reduce confusion, but we have publicly documented it since it was added in fa8c097cc9 (Support remote helpers implementing smart transports, 2009-12-09), so it's possible some helper in the wild is using it. - So for v2, helpers (again, including http) are mainly used via stateless-connect, driven by the main program. But they do still need to decide whether to do a v2 probe. And so there's similar logic in remote-curl.c's discover_refs() that looks for "git-receive-pack". But it's not buggy in the same way. Since it doesn't support servpath, it is always dealing with a "service" string like "git-receive-pack". And since it doesn't support straight "connect", it can't be used for "upload-archive". So we could leave that spot alone. But I've updated it here to match the logic we're changing in connect_git(). That seems like the least confusing thing for somebody who has to touch both of these spots later (say, to add v2 push support). I didn't add a new test to make sure this doesn't break anything; we already have several tests (in t5551 and elsewhere) that make sure we are using v2 over http. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-17 19:08:51 +00:00
* to perform any operation that doesn't involve upload-pack (i.e., a
* fetch, ls-remote, etc), then fallback to v0 since we don't know how
* to do anything else (like push or remote archive) via v2.
*/
git_connect(): fix corner cases in downgrading v2 to v0 There's code in git_connect() that checks whether we are doing a push with protocol_v2, and if so, drops us to protocol_v0 (since we know how to do v2 only for fetches). But it misses some corner cases: 1. it checks the "prog" variable, which is actually the path to receive-pack on the remote side. By default this is just "git-receive-pack", but it could be an arbitrary string (like "/path/to/git receive-pack", etc). We'd accidentally stay in v2 mode in this case. 2. besides "receive-pack" and "upload-pack", there's one other value we'd expect: "upload-archive" for handling "git archive --remote". Like receive-pack, this doesn't understand v2, and should use the v0 protocol. In practice, neither of these causes bugs in the real world so far. We do send a "we understand v2" probe to the server, but since no server implements v2 for anything but upload-pack, it's simply ignored. But this would eventually become a problem if we do implement v2 for those endpoints, as older clients would falsely claim to understand it, leading to a server response they can't parse. We can fix (1) by passing in both the program path and the "name" of the operation. I treat the name as a string here, because that's the pattern set in transport_connect(), which is one of our callers (we were simply throwing away the "name" value there before). We can fix (2) by allowing only known-v2 protocols ("upload-pack"), rather than blocking unknown ones ("receive-pack" and "upload-archive"). That will mean whoever eventually implements v2 push will have to adjust this list, but that's reasonable. We'll do the safe, conservative thing (sticking to v0) by default, and anybody working on v2 will quickly realize this spot needs to be updated. The new tests cover the receive-pack and upload-archive cases above, and re-confirm that we allow v2 with an arbitrary "--upload-pack" path (that already worked before this patch, of course, but it would be an easy thing to break if we flipped the allow/block logic without also handling "name" separately). Here are a few miscellaneous implementation notes, since I had to do a little head-scratching to understand who calls what: - transport_connect() is called only for git-upload-archive. For non-http git remotes, that resolves to the virtual connect_git() function (which then calls git_connect(); confused yet?). So plumbing through "name" in connect_git() covers that. - for regular fetches and pushes, callers use higher-level functions like transport_fetch_refs(). For non-http git remotes, that means calling git_connect() under the hood via connect_setup(). And that uses the "for_push" flag to decide which name to use. - likewise, plumbing like fetch-pack and send-pack may call git_connect() directly; they each know which name to use. - for remote helpers (including http), we already have separate parameters for "name" and "exec" (another name for "prog"). In process_connect_service(), we feed the "name" to the helper via "connect" or "stateless-connect" directives. There's also a "servpath" option, which can be used to tell the helper about the "exec" path. But no helpers we implement support it! For http it would be useless anyway (no reasonable server implementation will allow you to send a shell command to run the server). In theory it would be useful for more obscure helpers like remote-ext, but even there it is not implemented. It's tempting to get rid of it simply to reduce confusion, but we have publicly documented it since it was added in fa8c097cc9 (Support remote helpers implementing smart transports, 2009-12-09), so it's possible some helper in the wild is using it. - So for v2, helpers (again, including http) are mainly used via stateless-connect, driven by the main program. But they do still need to decide whether to do a v2 probe. And so there's similar logic in remote-curl.c's discover_refs() that looks for "git-receive-pack". But it's not buggy in the same way. Since it doesn't support servpath, it is always dealing with a "service" string like "git-receive-pack". And since it doesn't support straight "connect", it can't be used for "upload-archive". So we could leave that spot alone. But I've updated it here to match the logic we're changing in connect_git(). That seems like the least confusing thing for somebody who has to touch both of these spots later (say, to add v2 push support). I didn't add a new test to make sure this doesn't break anything; we already have several tests (in t5551 and elsewhere) that make sure we are using v2 over http. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-17 19:08:51 +00:00
if (version == protocol_v2 && strcmp("git-upload-pack", name))
version = protocol_v0;
/* Without this we cannot rely on waitpid() to tell
* what happened to our children.
*/
signal(SIGCHLD, SIG_DFL);
protocol = parse_connect_url(url, &hostandport, &path);
if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) {
printf("Diag: url=%s\n", url ? url : "NULL");
printf("Diag: protocol=%s\n", prot_name(protocol));
printf("Diag: hostandport=%s\n", hostandport ? hostandport : "NULL");
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
conn = git_connect_git(fd, hostandport, path, prog, version, flags);
conn->trace2_child_class = "transport/git";
} else {
struct strbuf cmd = STRBUF_INIT;
connect: tell server that the client understands v1 Teach the connection logic to tell a serve that it understands protocol v1. This is done in 2 different ways for the builtin transports, both of which ultimately set 'GIT_PROTOCOL' to 'version=1' on the server. 1. git:// A normal request to git-daemon is structured as "command path/to/repo\0host=..\0" and due to a bug introduced in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) we aren't able to place any extra arguments (separated by NULs) besides the host otherwise the parsing of those arguments would enter an infinite loop. This bug was fixed in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) but a check was put in place to disallow extra arguments so that new clients wouldn't trigger this bug in older servers. In order to get around this limitation git-daemon was taught to recognize additional request arguments hidden behind a second NUL byte. Requests can then be structured like: "command path/to/repo\0host=..\0\0version=1\0key=value\0". git-daemon can then parse out the extra arguments and set 'GIT_PROTOCOL' accordingly. By placing these extra arguments behind a second NUL byte we can skirt around both the infinite loop bug in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) as well as the explicit disallowing of extra arguments introduced in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) because both of these versions of git-daemon check for a single NUL byte after the host argument before terminating the argument parsing. 2. ssh://, file:// Set 'GIT_PROTOCOL' environment variable with the desired protocol version. With the file:// transport, 'GIT_PROTOCOL' can be set explicitly in the locally running git-upload-pack or git-receive-pack processes. With the ssh:// transport and OpenSSH compliant ssh programs, 'GIT_PROTOCOL' can be sent across ssh by using '-o SendEnv=GIT_PROTOCOL' and having the server whitelist this environment variable. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:28 +00:00
const char *const *var;
conn = xmalloc(sizeof(*conn));
child_process_init(conn);
if (looks_like_command_line_option(path))
die(_("strange pathname '%s' blocked"), path);
strbuf_addstr(&cmd, prog);
strbuf_addch(&cmd, ' ');
sq_quote_buf(&cmd, path);
/* remove repo-local variables from the environment */
connect: tell server that the client understands v1 Teach the connection logic to tell a serve that it understands protocol v1. This is done in 2 different ways for the builtin transports, both of which ultimately set 'GIT_PROTOCOL' to 'version=1' on the server. 1. git:// A normal request to git-daemon is structured as "command path/to/repo\0host=..\0" and due to a bug introduced in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) we aren't able to place any extra arguments (separated by NULs) besides the host otherwise the parsing of those arguments would enter an infinite loop. This bug was fixed in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) but a check was put in place to disallow extra arguments so that new clients wouldn't trigger this bug in older servers. In order to get around this limitation git-daemon was taught to recognize additional request arguments hidden behind a second NUL byte. Requests can then be structured like: "command path/to/repo\0host=..\0\0version=1\0key=value\0". git-daemon can then parse out the extra arguments and set 'GIT_PROTOCOL' accordingly. By placing these extra arguments behind a second NUL byte we can skirt around both the infinite loop bug in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) as well as the explicit disallowing of extra arguments introduced in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) because both of these versions of git-daemon check for a single NUL byte after the host argument before terminating the argument parsing. 2. ssh://, file:// Set 'GIT_PROTOCOL' environment variable with the desired protocol version. With the file:// transport, 'GIT_PROTOCOL' can be set explicitly in the locally running git-upload-pack or git-receive-pack processes. With the ssh:// transport and OpenSSH compliant ssh programs, 'GIT_PROTOCOL' can be sent across ssh by using '-o SendEnv=GIT_PROTOCOL' and having the server whitelist this environment variable. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:28 +00:00
for (var = local_repo_env; *var; var++)
strvec_push(&conn->env, *var);
connect: tell server that the client understands v1 Teach the connection logic to tell a serve that it understands protocol v1. This is done in 2 different ways for the builtin transports, both of which ultimately set 'GIT_PROTOCOL' to 'version=1' on the server. 1. git:// A normal request to git-daemon is structured as "command path/to/repo\0host=..\0" and due to a bug introduced in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) we aren't able to place any extra arguments (separated by NULs) besides the host otherwise the parsing of those arguments would enter an infinite loop. This bug was fixed in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) but a check was put in place to disallow extra arguments so that new clients wouldn't trigger this bug in older servers. In order to get around this limitation git-daemon was taught to recognize additional request arguments hidden behind a second NUL byte. Requests can then be structured like: "command path/to/repo\0host=..\0\0version=1\0key=value\0". git-daemon can then parse out the extra arguments and set 'GIT_PROTOCOL' accordingly. By placing these extra arguments behind a second NUL byte we can skirt around both the infinite loop bug in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) as well as the explicit disallowing of extra arguments introduced in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) because both of these versions of git-daemon check for a single NUL byte after the host argument before terminating the argument parsing. 2. ssh://, file:// Set 'GIT_PROTOCOL' environment variable with the desired protocol version. With the file:// transport, 'GIT_PROTOCOL' can be set explicitly in the locally running git-upload-pack or git-receive-pack processes. With the ssh:// transport and OpenSSH compliant ssh programs, 'GIT_PROTOCOL' can be sent across ssh by using '-o SendEnv=GIT_PROTOCOL' and having the server whitelist this environment variable. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:28 +00:00
conn->use_shell = 1;
conn->in = conn->out = -1;
if (protocol == PROTO_SSH) {
char *ssh_host = hostandport;
const char *port = NULL;
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-16 17:12:52 +00:00
transport_check_allowed("ssh");
get_host_and_port(&ssh_host, &port);
if (!port)
port = get_port(ssh_host);
if (flags & CONNECT_DIAG_URL) {
printf("Diag: url=%s\n", url ? url : "NULL");
printf("Diag: protocol=%s\n", prot_name(protocol));
printf("Diag: userandhost=%s\n", ssh_host ? ssh_host : "NULL");
printf("Diag: port=%s\n", port ? port : "NONE");
printf("Diag: path=%s\n", path ? path : "NULL");
free(hostandport);
free(path);
free(conn);
strbuf_release(&cmd);
return NULL;
}
conn->trace2_child_class = "transport/ssh";
fill_ssh_args(conn, ssh_host, port, version, flags);
} else {
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-16 17:12:52 +00:00
transport_check_allowed("file");
conn->trace2_child_class = "transport/file";
if (version > 0) {
strvec_pushf(&conn->env,
GIT_PROTOCOL_ENVIRONMENT "=version=%d",
version);
connect: tell server that the client understands v1 Teach the connection logic to tell a serve that it understands protocol v1. This is done in 2 different ways for the builtin transports, both of which ultimately set 'GIT_PROTOCOL' to 'version=1' on the server. 1. git:// A normal request to git-daemon is structured as "command path/to/repo\0host=..\0" and due to a bug introduced in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) we aren't able to place any extra arguments (separated by NULs) besides the host otherwise the parsing of those arguments would enter an infinite loop. This bug was fixed in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) but a check was put in place to disallow extra arguments so that new clients wouldn't trigger this bug in older servers. In order to get around this limitation git-daemon was taught to recognize additional request arguments hidden behind a second NUL byte. Requests can then be structured like: "command path/to/repo\0host=..\0\0version=1\0key=value\0". git-daemon can then parse out the extra arguments and set 'GIT_PROTOCOL' accordingly. By placing these extra arguments behind a second NUL byte we can skirt around both the infinite loop bug in 49ba83fb6 (Add virtualization support to git-daemon, 2006-09-19) as well as the explicit disallowing of extra arguments introduced in 73bb33a94 (daemon: Strictly parse the "extra arg" part of the command, 2009-06-04) because both of these versions of git-daemon check for a single NUL byte after the host argument before terminating the argument parsing. 2. ssh://, file:// Set 'GIT_PROTOCOL' environment variable with the desired protocol version. With the file:// transport, 'GIT_PROTOCOL' can be set explicitly in the locally running git-upload-pack or git-receive-pack processes. With the ssh:// transport and OpenSSH compliant ssh programs, 'GIT_PROTOCOL' can be sent across ssh by using '-o SendEnv=GIT_PROTOCOL' and having the server whitelist this environment variable. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 17:55:28 +00:00
}
}
strvec_push(&conn->args, cmd.buf);
if (start_command(conn))
die(_("unable to fork"));
fd[0] = conn->out; /* read from child's stdout */
fd[1] = conn->in; /* write to child's stdin */
strbuf_release(&cmd);
}
free(hostandport);
free(path);
return conn;
}
int finish_connect(struct child_process *conn)
{
int code;
if (!conn || git_connection_is_socket(conn))
return 0;
code = finish_command(conn);
free(conn);
return code;
}