From 1387fd4105b242fa2d24ad99d10a5b1af23f293e Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Wed, 7 Dec 2022 18:52:00 -0800 Subject: [PATCH] Validate SSH host keys --- Cargo.toml | 7 +- crates/cargo-test-support/Cargo.toml | 2 +- src/cargo/sources/git/known_hosts.rs | 439 +++++++++++++++++++++ src/cargo/sources/git/mod.rs | 1 + src/cargo/sources/git/utils.rs | 12 +- src/doc/src/appendix/git-authentication.md | 23 ++ 6 files changed, 479 insertions(+), 5 deletions(-) create mode 100644 src/cargo/sources/git/known_hosts.rs diff --git a/Cargo.toml b/Cargo.toml index 54b4f97bd..b3f55479d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ name = "cargo" path = "src/cargo/lib.rs" [dependencies] +base64 = "0.13.1" bytesize = "1.0" cargo-platform = { path = "crates/cargo-platform", version = "0.1.2" } cargo-util = { path = "crates/cargo-util", version = "0.2.3" } @@ -27,8 +28,8 @@ pretty_env_logger = { version = "0.4", optional = true } anyhow = "1.0.47" filetime = "0.2.9" flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] } -git2 = "0.15.0" -git2-curl = "0.16.0" +git2 = "0.16.0" +git2-curl = "0.17.0" glob = "0.3.0" hex = "0.4" home = "0.5" @@ -42,7 +43,7 @@ jobserver = "0.1.24" lazycell = "1.2.0" libc = "0.2" log = "0.4.6" -libgit2-sys = "0.14.0" +libgit2-sys = "0.14.1" memchr = "2.1.3" opener = "0.5" os_info = "3.5.0" diff --git a/crates/cargo-test-support/Cargo.toml b/crates/cargo-test-support/Cargo.toml index 65e0f7566..653c5db4d 100644 --- a/crates/cargo-test-support/Cargo.toml +++ b/crates/cargo-test-support/Cargo.toml @@ -17,7 +17,7 @@ filetime = "0.2" flate2 = { version = "1.0", default-features = false, features = ["zlib"] } pasetors = { version = "0.6.4", features = ["v3", "paserk", "std", "serde"] } time = { version = "0.3", features = ["parsing", "formatting"]} -git2 = "0.15.0" +git2 = "0.16.0" glob = "0.3" itertools = "0.10.0" lazy_static = "1.0" diff --git a/src/cargo/sources/git/known_hosts.rs b/src/cargo/sources/git/known_hosts.rs new file mode 100644 index 000000000..875dcf63f --- /dev/null +++ b/src/cargo/sources/git/known_hosts.rs @@ -0,0 +1,439 @@ +//! SSH host key validation support. +//! +//! A primary goal with this implementation is to provide user-friendly error +//! messages, guiding them to understand the issue and how to resolve it. +//! +//! Note that there are a lot of limitations here. This reads OpenSSH +//! known_hosts files from well-known locations, but it does not read OpenSSH +//! config files. The config file can change the behavior of how OpenSSH +//! handles known_hosts files. For example, some things we don't handle: +//! +//! - `GlobalKnownHostsFile` — Changes the location of the global host file. +//! - `UserKnownHostsFile` — Changes the location of the user's host file. +//! - `KnownHostsCommand` — A command to fetch known hosts. +//! - `CheckHostIP` — DNS spoofing checks. +//! - `VisualHostKey` — Shows a visual ascii-art key. +//! - `VerifyHostKeyDNS` — Uses SSHFP DNS records to fetch a host key. +//! +//! There's also a number of things that aren't supported but could be easily +//! added (it just adds a little complexity). For example, hashed hostnames, +//! hostname patterns, and revoked markers. See "FIXME" comments littered in +//! this file. + +use git2::cert::Cert; +use git2::CertificateCheckStatus; +use std::collections::HashSet; +use std::fmt::Write; +use std::path::{Path, PathBuf}; + +/// These are host keys that are hard-coded in cargo to provide convenience. +/// +/// If GitHub ever publishes new keys, the user can add them to their own +/// configuration file to use those instead. +/// +/// The GitHub keys are sourced from or +/// . +/// +/// These will be ignored if the user adds their own entries for `github.com`, +/// which can be useful if GitHub ever revokes their old keys. +static BUNDLED_KEYS: &[(&str, &str, &str)] = &[ + ("github.com", "ssh-ed25519", "AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl"), + ("github.com", "ecdsa-sha2-nistp256", "AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg="), + ("github.com", "ssh-rsa", "AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ=="), +]; + +enum KnownHostError { + /// Some general error happened while validating the known hosts. + CheckError(anyhow::Error), + /// The host key was not found. + HostKeyNotFound { + hostname: String, + key_type: git2::cert::SshHostKeyType, + remote_host_key: String, + remote_fingerprint: String, + other_hosts: Vec, + }, + /// The host key was found, but does not match the remote's key. + HostKeyHasChanged { + hostname: String, + key_type: git2::cert::SshHostKeyType, + old_known_host: KnownHost, + remote_host_key: String, + remote_fingerprint: String, + }, +} + +impl From for KnownHostError { + fn from(err: anyhow::Error) -> KnownHostError { + KnownHostError::CheckError(err.into()) + } +} + +/// The location where a host key was located. +#[derive(Clone)] +enum KnownHostLocation { + /// Loaded from a file from disk. + File { path: PathBuf, lineno: u32 }, + /// Part of the hard-coded bundled keys in Cargo. + Bundled, +} + +/// The git2 callback used to validate a certificate (only ssh known hosts are validated). +pub fn certificate_check( + cert: &Cert<'_>, + host: &str, + port: Option, +) -> Result { + let Some(host_key) = cert.as_hostkey() else { + // Return passthrough for TLS X509 certificates to use whatever validation + // was done in git2. + return Ok(CertificateCheckStatus::CertificatePassthrough) + }; + // If a nonstandard port is in use, check for that first. + // The fallback to check without a port is handled in the HostKeyNotFound handler. + let host_maybe_port = match port { + Some(port) if port != 22 => format!("[{host}]:{port}"), + _ => host.to_string(), + }; + // The error message must be constructed as a string to pass through the libgit2 C API. + let err_msg = match check_ssh_known_hosts(host_key, &host_maybe_port) { + Ok(()) => { + return Ok(CertificateCheckStatus::CertificateOk); + } + Err(KnownHostError::CheckError(e)) => { + format!("error: failed to validate host key:\n{:#}", e) + } + Err(KnownHostError::HostKeyNotFound { + hostname, + key_type, + remote_host_key, + remote_fingerprint, + other_hosts, + }) => { + // Try checking without the port. + if port.is_some() + && !matches!(port, Some(22)) + && check_ssh_known_hosts(host_key, host).is_ok() + { + return Ok(CertificateCheckStatus::CertificateOk); + } + let key_type_short_name = key_type.short_name(); + let key_type_name = key_type.name(); + let known_hosts_location = user_known_host_location_to_add(); + let other_hosts_message = if other_hosts.is_empty() { + String::new() + } else { + let mut msg = String::from( + "Note: This host key was found, \ + but is associated with a different host:\n", + ); + for known_host in other_hosts { + let loc = match known_host.location { + KnownHostLocation::File { path, lineno } => { + format!("{} line {lineno}", path.display()) + } + KnownHostLocation::Bundled => format!("bundled with cargo"), + }; + write!(msg, " {loc}: {}\n", known_host.patterns).unwrap(); + } + msg + }; + format!("error: unknown SSH host key\n\ + The SSH host key for `{hostname}` is not known and cannot be validated.\n\ + \n\ + To resolve this issue, add the host key to {known_hosts_location}\n\ + \n\ + The key to add is:\n\ + \n\ + {hostname} {key_type_name} {remote_host_key}\n\ + \n\ + The {key_type_short_name} key fingerprint is: SHA256:{remote_fingerprint}\n\ + This fingerprint should be validated with the server administrator that it is correct.\n\ + {other_hosts_message}\n\ + See https://doc.rust-lang.org/nightly/cargo/appendix/git-authentication.html#ssh-known-hosts \ + for more information.\n\ + ") + } + Err(KnownHostError::HostKeyHasChanged { + hostname, + key_type, + old_known_host, + remote_host_key, + remote_fingerprint, + }) => { + let key_type_short_name = key_type.short_name(); + let key_type_name = key_type.name(); + let known_hosts_location = user_known_host_location_to_add(); + let old_key_resolution = match old_known_host.location { + KnownHostLocation::File { path, lineno } => { + let old_key_location = path.display(); + format!( + "removing the old {key_type_name} key for `{hostname}` \ + located at {old_key_location} line {lineno}, \ + and adding the new key to {known_hosts_location}", + ) + } + KnownHostLocation::Bundled => { + format!( + "adding the new key to {known_hosts_location}\n\ + The current host key is bundled as part of Cargo." + ) + } + }; + format!("error: SSH host key has changed for `{hostname}`\n\ + *********************************\n\ + * WARNING: HOST KEY HAS CHANGED *\n\ + *********************************\n\ + This may be caused by a man-in-the-middle attack, or the \ + server may have changed its host key.\n\ + \n\ + The {key_type_short_name} fingerprint for the key from the remote host is:\n\ + SHA256:{remote_fingerprint}\n\ + \n\ + You are strongly encouraged to contact the server \ + administrator for `{hostname}` to verify that this new key is \ + correct.\n\ + \n\ + If you can verify that the server has a new key, you can \ + resolve this error by {old_key_resolution}\n\ + \n\ + The key provided by the remote host is:\n\ + \n\ + {hostname} {key_type_name} {remote_host_key}\n\ + \n\ + See https://doc.rust-lang.org/nightly/cargo/appendix/git-authentication.html#ssh-known-hosts \ + for more information.\n\ + ") + } + }; + Err(git2::Error::new( + git2::ErrorCode::GenericError, + git2::ErrorClass::Callback, + err_msg, + )) +} + +/// Checks if the given host/host key pair is known. +fn check_ssh_known_hosts( + cert_host_key: &git2::cert::CertHostkey<'_>, + host: &str, +) -> Result<(), KnownHostError> { + let Some(remote_host_key) = cert_host_key.hostkey() else { + return Err(anyhow::format_err!("remote host key is not available").into()); + }; + let remote_key_type = cert_host_key.hostkey_type().unwrap(); + // `changed_key` keeps track of any entries where the key has changed. + let mut changed_key = None; + // `other_hosts` keeps track of any entries that have an identical key, + // but a different hostname. + let mut other_hosts = Vec::new(); + + // Collect all the known host entries from disk. + let mut known_hosts = Vec::new(); + for path in known_host_files() { + if !path.exists() { + continue; + } + let hosts = load_hostfile(&path)?; + known_hosts.extend(hosts); + } + // Load the bundled keys. Don't add keys for hosts that the user has + // configured, which gives them the option to override them. This could be + // useful if the keys are ever revoked. + let configured_hosts: HashSet<_> = known_hosts + .iter() + .flat_map(|known_host| { + known_host + .patterns + .split(',') + .map(|pattern| pattern.to_lowercase()) + }) + .collect(); + for (patterns, key_type, key) in BUNDLED_KEYS { + if !configured_hosts.contains(*patterns) { + let key = base64::decode(key).unwrap(); + known_hosts.push(KnownHost { + location: KnownHostLocation::Bundled, + patterns: patterns.to_string(), + key_type: key_type.to_string(), + key, + }); + } + } + + for known_host in known_hosts { + // The key type from libgit2 needs to match the key type from the host file. + if known_host.key_type != remote_key_type.name() { + continue; + } + let key_matches = known_host.key == remote_host_key; + if !known_host.host_matches(host) { + // `name` can be None for hashed hostnames (which libgit2 does not expose). + if key_matches { + other_hosts.push(known_host.clone()); + } + continue; + } + if key_matches { + return Ok(()); + } + // The host and key type matched, but the key itself did not. + // This indicates the key has changed. + // This is only reported as an error if no subsequent lines have a + // correct key. + changed_key = Some(known_host.clone()); + } + // Older versions of OpenSSH (before 6.8, March 2015) showed MD5 + // fingerprints (see FingerprintHash ssh config option). Here we only + // support SHA256. + let mut remote_fingerprint = cargo_util::Sha256::new(); + remote_fingerprint.update(remote_host_key); + let remote_fingerprint = + base64::encode_config(remote_fingerprint.finish(), base64::STANDARD_NO_PAD); + let remote_host_key = base64::encode(remote_host_key); + // FIXME: Ideally the error message should include the IP address of the + // remote host (to help the user validate that they are connecting to the + // host they were expecting to). However, I don't see a way to obtain that + // information from libgit2. + match changed_key { + Some(old_known_host) => Err(KnownHostError::HostKeyHasChanged { + hostname: host.to_string(), + key_type: remote_key_type, + old_known_host, + remote_host_key, + remote_fingerprint, + }), + None => Err(KnownHostError::HostKeyNotFound { + hostname: host.to_string(), + key_type: remote_key_type, + remote_host_key, + remote_fingerprint, + other_hosts, + }), + } +} + +/// Returns a list of files to try loading OpenSSH-formatted known hosts. +fn known_host_files() -> Vec { + let mut result = Vec::new(); + if cfg!(unix) { + result.push(PathBuf::from("/etc/ssh/ssh_known_hosts")); + } else if cfg!(windows) { + // The msys/cygwin version of OpenSSH uses `/etc` from the posix root + // filesystem there (such as `C:\msys64\etc\ssh\ssh_known_hosts`). + // However, I do not know of a way to obtain that location from + // Windows-land. The ProgramData version here is what the PowerShell + // port of OpenSSH does. + if let Some(progdata) = std::env::var_os("ProgramData") { + let mut progdata = PathBuf::from(progdata); + progdata.push("ssh"); + progdata.push("ssh_known_hosts"); + result.push(progdata) + } + } + result.extend(user_known_host_location()); + result +} + +/// The location of the user's known_hosts file. +fn user_known_host_location() -> Option { + // NOTE: This is a potentially inaccurate prediction of what the user + // actually wants. The actual location depends on several factors: + // + // - Windows OpenSSH Powershell version: I believe this looks up the home + // directory via ProfileImagePath in the registry, falling back to + // `GetWindowsDirectoryW` if that fails. + // - OpenSSH Portable (under msys): This is very complicated. I got lost + // after following it through some ldap/active directory stuff. + // - OpenSSH (most unix platforms): Uses `pw->pw_dir` from `getpwuid()`. + // + // This doesn't do anything close to that. home_dir's behavior is: + // - Windows: $USERPROFILE, or SHGetFolderPathW() + // - Unix: $HOME, or getpwuid_r() + // + // Since there is a mismatch here, the location returned here might be + // different than what the user's `ssh` CLI command uses. We may want to + // consider trying to align it better. + home::home_dir().map(|mut home| { + home.push(".ssh"); + home.push("known_hosts"); + home + }) +} + +/// The location to display in an error message instructing the user where to +/// add the new key. +fn user_known_host_location_to_add() -> String { + // Note that we don't bother with the legacy known_hosts2 files. + match user_known_host_location() { + Some(path) => path.to_str().expect("utf-8 home").to_string(), + None => "~/.ssh/known_hosts".to_string(), + } +} + +/// A single known host entry. +#[derive(Clone)] +struct KnownHost { + location: KnownHostLocation, + /// The hostname. May be comma separated to match multiple hosts. + patterns: String, + key_type: String, + key: Vec, +} + +impl KnownHost { + /// Returns whether or not the given host matches this known host entry. + fn host_matches(&self, host: &str) -> bool { + let mut match_found = false; + let host = host.to_lowercase(); + // FIXME: support hashed hostnames + for pattern in self.patterns.split(',') { + let pattern = pattern.to_lowercase(); + // FIXME: support * and ? wildcards + if let Some(pattern) = pattern.strip_prefix('!') { + if pattern == host { + return false; + } + } else { + match_found = pattern == host; + } + } + match_found + } +} + +/// Loads an OpenSSH known_hosts file. +fn load_hostfile(path: &Path) -> Result, anyhow::Error> { + let contents = cargo_util::paths::read(path)?; + let entries = contents + .lines() + .enumerate() + .filter_map(|(lineno, line)| { + let location = KnownHostLocation::File { + path: path.to_path_buf(), + lineno: lineno as u32 + 1, + }; + parse_known_hosts_line(line, location) + }) + .collect(); + Ok(entries) +} + +fn parse_known_hosts_line(line: &str, location: KnownHostLocation) -> Option { + let line = line.trim(); + // FIXME: @revoked and @cert-authority is currently not supported. + if line.is_empty() || line.starts_with('#') || line.starts_with('@') { + return None; + } + let mut parts = line.split([' ', '\t']).filter(|s| !s.is_empty()); + let Some(patterns) = parts.next() else { return None }; + let Some(key_type) = parts.next() else { return None }; + let Some(key) = parts.next() else { return None }; + let Ok(key) = base64::decode(key) else { return None }; + Some(KnownHost { + location, + patterns: patterns.to_string(), + key_type: key_type.to_string(), + key, + }) +} diff --git a/src/cargo/sources/git/mod.rs b/src/cargo/sources/git/mod.rs index b32dbb17b..225bad167 100644 --- a/src/cargo/sources/git/mod.rs +++ b/src/cargo/sources/git/mod.rs @@ -1,4 +1,5 @@ pub use self::source::GitSource; pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote}; +mod known_hosts; mod source; mod utils; diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index 623caceb8..831c43be6 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -683,7 +683,6 @@ where | ErrorClass::Submodule | ErrorClass::FetchHead | ErrorClass::Ssh - | ErrorClass::Callback | ErrorClass::Http => { let mut msg = "network failure seems to have happened\n".to_string(); msg.push_str( @@ -694,6 +693,13 @@ where ); err = err.context(msg); } + ErrorClass::Callback => { + // This unwraps the git2 error. We're using the callback error + // specifically to convey errors from Rust land through the C + // callback interface. We don't need the `; class=Callback + // (26)` that gets tacked on to the git2 error message. + err = anyhow::format_err!("{}", e.message()); + } _ => {} } } @@ -722,12 +728,16 @@ pub fn with_fetch_options( let mut progress = Progress::new("Fetch", config); network::with_retry(config, || { with_authentication(url, git_config, |f| { + let port = Url::parse(url).ok().and_then(|url| url.port()); let mut last_update = Instant::now(); let mut rcb = git2::RemoteCallbacks::new(); // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms` // sliding window for tracking the data transfer rate (in bytes/s). let mut counter = MetricsCounter::<10>::new(0, last_update); rcb.credentials(f); + rcb.certificate_check(|cert, host| { + super::known_hosts::certificate_check(cert, host, port) + }); rcb.transfer_progress(|stats| { let indexed_deltas = stats.indexed_deltas(); let msg = if indexed_deltas > 0 { diff --git a/src/doc/src/appendix/git-authentication.md b/src/doc/src/appendix/git-authentication.md index 7fe1f149a..a7db1ac7f 100644 --- a/src/doc/src/appendix/git-authentication.md +++ b/src/doc/src/appendix/git-authentication.md @@ -58,9 +58,32 @@ on how to start `ssh-agent` and to add keys. > used by Cargo's built-in SSH library. More advanced requirements should use > [`net.git-fetch-with-cli`]. +### SSH Known Hosts + +When connecting to an SSH host, Cargo must verify the identity of the host +using "known hosts", which are a list of host keys. Cargo can look for these +known hosts in OpenSSH-style `known_hosts` files located in their standard +locations (`.ssh/known_hosts` in your home directory, or +`/etc/ssh/ssh_known_hosts` on Unix-like platforms or +`%PROGRAMDATA%\ssh\ssh_known_hosts` on Windows). More information about these +files can be found in the [sshd man page]. + +When connecting to an SSH host before the known hosts has been configured, +Cargo will display an error message instructing you how to add the host key. +This also includes a "fingerprint", which is a smaller hash of the host key, +which should be easier to visually verify. The server administrator can get +the fingerprint by running `ssh-keygen` against the public key (for example, +`ssh-keygen -l -f /etc/ssh/ssh_host_ecdsa_key.pub`). Well-known sites may +publish their fingerprints on the web; for example GitHub posts theirs at +. + +Cargo comes with the host keys for [github.com](https://github.com) built-in. +If those ever change, you can add the new keys to your known_hosts file. + [`credential.helper`]: https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage [`net.git-fetch-with-cli`]: ../reference/config.md#netgit-fetch-with-cli [GCM]: https://github.com/microsoft/Git-Credential-Manager-Core/ [PuTTY]: https://www.chiark.greenend.org.uk/~sgtatham/putty/ [Microsoft installation documentation]: https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_install_firstuse [key management]: https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_keymanagement +[sshd man page]: https://man.openbsd.org/sshd#SSH_KNOWN_HOSTS_FILE_FORMAT