From 44eb16d3b13fb3916492f7e6a5d157101e13f39f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 13 Jul 2015 12:14:05 -0700 Subject: [PATCH] Backtrack more aggressively when resolving Resolving a dependency graph may involve quite a bit of backtracking to select different versions of crates, but the previous implementation of resolution would not backtrack enough. Once a dependency is completely resolved it's possible that any number of candidates for its transitive dependencies were left out of the resolution process (e.g. we didn't hit them yet). These candidates were not previously used for backtracking (because resolution overall of the dependency finished), but this commit alters the implementation to instead consider these as candidates for backtracking. Architecturally this changes the code to CPS to pass around a `finished` continuation to allow tweaking the behavior whenever a dependency successfully resolves. The key change is then that whenever a candidate for a dependency is activated, we ensure the recursive step for the rest of the graph happens as a sub-call. This means that if *anything* in the recursive call fails (including some previous up-the-stack resolution) we'll retry the next candidate. Closes #1800 --- src/cargo/core/resolver/mod.rs | 485 +++++++++++++++++++++------------ tests/resolve.rs | 20 ++ 2 files changed, 337 insertions(+), 168 deletions(-) diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 7b04e93ae..2f884bccf 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -1,8 +1,103 @@ +//! Resolution of the entire dependency graph for a crate +//! +//! This module implements the core logic in taking the world of crates and +//! constraints and creating a resolved graph with locked versions for all +//! crates and their dependencies. This is separate from the registry module +//! which is more worried about discovering crates from various sources, this +//! module just uses the Registry trait as a source to learn about crates from. +//! +//! Actually solving a constraint graph is an NP-hard (or NP-complete, I forget +//! which) problem, this the algorithm is basically a nice heuristic to make +//! sure we get roughly the best answer most of the time. The constraints that +//! we're working with are: +//! +//! 1. Each crate can have any number of dependencies. Each dependency can +//! declare a version range that it is compatible with. +//! 2. Crates can be activated with multiple version (e.g. show up in the +//! dependency graph twice) so long as each pairwise instance have +//! semver-incompatible versions. +//! +//! The algorithm employed here is fairly simple, we simply do a DFS, activating +//! the "newest crate" (highest version) first and then going to the next +//! option. The heuristics we employ are: +//! +//! * Never try to activate a crate version which is incompatible. This means we +//! only try crates which will actually satisfy a dependency and we won't ever +//! try to activate a crate that's semver compatible with something else +//! activatd (as we're only allowed to have one). +//! * Always try to activate the highest version crate first. The default +//! dependency in Cargo (e.g. when you write `foo = "0.1.2"`) is +//! semver-compatible, so selecting the highest version possible will allow us +//! to hopefully satisfy as many dependencies at once. +//! +//! Beyond that, what's implemented below is just a naive backtracking version +//! which should in theory try all possible combinations of dependencies and +//! versions to see if one works. The first resolution that works causes +//! everything to bail out immediately and return success, and only if *nothing* +//! works do we actually return an error up the stack. +//! +//! ## Performance +//! +//! Note that this is a relatively performance-critical portion of Cargo. The +//! data that we're processing is proportional to the size of the dependency +//! graph, which can often be quite large (e.g. take a look at Servo). To make +//! matters worse the DFS algorithm we're implemented is inherently quite +//! inefficient and recursive. When we add the requirement of backtracking on +//! top it means that we're implementing something that's very recursive and +//! probably shouldn't be allocating all over the place. +//! +//! Once we've avoided too many unnecessary allocations, however (e.g. using +//! references, using reference counting, etc), it turns out that the +//! performance in this module largely comes down to stack sizes due to the +//! recursive nature of the implementation. +//! +//! ### Small Stack Sizes (e.g. y u inline(never)) +//! +//! One of the most important optimizations in this module right now is the +//! attempt to minimize the stack space taken up by the `activate` and +//! `activate_deps` functions. These two functions are mutually recursive in a +//! CPS fashion. +//! +//! The recursion depth, if I'm getting this right, is something along the order +//! of O(E) where E is the number of edges in the dependency graph, and that's +//! on the order of O(N^2) where N is the number of crates in the graph. As a +//! result we need to watch our stack size! +//! +//! Currently rustc is not great at producing small stacks because of landing +//! pads and filling drop, so the first attempt at making small stacks is having +//! literally small functions with very few owned values on the stack. This is +//! also why there are many #[inline(never)] annotations in this module. By +//! preventing these functions from being inlined we can make sure that these +//! stack sizes stay small as the number of locals are under control. +//! +//! Another hazard when watching out for small stacks is passing around large +//! structures by value. For example the `Context` below is a relatively large +//! struct, so we always place it behind a `Box` to ensure the size at runtime +//! is just a word (e.g. very easy to pass around). +//! +//! Combined together these tricks (plus a very recent version of LLVM) allow us +//! to have a relatively small stack footprint for this implementation. Possible +//! future optimizations include: +//! +//! * Turn off landing pads for all of Cargo +//! * Wait for dynamic drop +//! * Use a manual stack instead of the OS stack (I suspect this will be super +//! painful to implement) +//! * Spawn a new thread with a very large stack (this is what the compiler +//! does) +//! * Implement a form of segmented stacks where we manually check the stack +//! limit every so often. +//! +//! For now the current implementation of this module gets us past Servo's +//! dependency graph (one of the largest known ones), so hopefully it'll work +//! for a bit longer as well! + use std::cell::RefCell; use std::collections::HashSet; use std::collections::hash_map::HashMap; use std::fmt; use std::rc::Rc; +use std::slice; use semver; use core::{PackageId, Registry, SourceId, Summary, Dependency}; @@ -32,12 +127,24 @@ pub struct Resolve { #[derive(Clone, Copy)] pub enum Method<'a> { Everything, - Required{ dev_deps: bool, - features: &'a [String], - uses_default_features: bool, - target_platform: Option<&'a str>}, + Required { + dev_deps: bool, + features: &'a [String], + uses_default_features: bool, + target_platform: Option<&'a str>, + }, } +// Err(..) == standard transient error (e.g. I/O error) +// Ok(Err(..)) == resolve error, but is human readable +// Ok(Ok(..)) == success in resolving +type ResolveResult = CargoResult>>; + +// Information about the dependencies for a crate, a tuple of: +// +// (dependency info, candidates, features activated) +type DepInfo<'a> = (&'a Dependency, Vec>, Vec); + impl Resolve { fn new(root: PackageId) -> Resolve { let mut g = Graph::new(); @@ -140,7 +247,7 @@ pub fn resolve(summary: &Summary, method: Method, visited: Rc::new(RefCell::new(HashSet::new())), }); let _p = profile::start(format!("resolving: {}", summary.package_id())); - match try!(activate(cx, registry, &summary, method)) { + match try!(activate(cx, registry, &summary, method, &mut |cx, _| Ok(Ok(cx)))) { Ok(cx) => { debug!("resolved: {:?}", cx.resolve); Ok(cx.resolve) @@ -149,11 +256,18 @@ pub fn resolve(summary: &Summary, method: Method, } } +/// Attempts to activate the summary `parent` in the context `cx`. +/// +/// This function will pull dependency summaries from the registry provided, and +/// the dependencies of the package will be determined by the `method` provided. +/// Once the resolution of this package has finished **entirely**, the current +/// context will be passed to the `finished` callback provided. fn activate(mut cx: Box, registry: &mut Registry, parent: &Rc, - method: Method) - -> CargoResult>> { + method: Method, + finished: &mut FnMut(Box, &mut Registry) -> ResolveResult) + -> ResolveResult { // Dependency graphs are required to be a DAG, so we keep a set of // packages we're visiting and bail if we hit a dupe. let id = parent.package_id(); @@ -163,110 +277,65 @@ fn activate(mut cx: Box, } // If we're already activated, then that was easy! - if flag_activated(&mut *cx, parent, &method) { + if cx.flag_activated(parent, &method) { cx.visited.borrow_mut().remove(id); - return Ok(Ok(cx)) + return finished(cx, registry) } debug!("activating {}", parent.package_id()); + let deps = try!(cx.build_deps(registry, parent, method)); + // Extracting the platform request. let platform = match method { - Method::Required{target_platform: platform, ..} => platform, + Method::Required { target_platform, .. } => target_platform, Method::Everything => None, }; - // First, figure out our set of dependencies based on the requsted set of - // features. This also calculates what features we're going to enable for - // our own dependencies. - let deps = try!(resolve_features(&mut cx, parent, method)); - - // Next, transform all dependencies into a list of possible candidates which - // can satisfy that dependency. - let mut deps = try!(deps.into_iter().map(|(dep, features)| { - let mut candidates = try!(registry.query(dep)); - // When we attempt versions for a package, we'll want to start at the - // maximum version and work our way down. - candidates.sort_by(|a, b| { - b.version().cmp(a.version()) - }); - let candidates = candidates.into_iter().map(Rc::new).collect::>(); - Ok((dep, candidates, features)) - }).collect::>>()); - - // When we recurse, attempt to resolve dependencies with fewer candidates - // before recursing on dependencies with more candidates. This way if the - // dependency with only one candidate can't be resolved we don't have to do - // a bunch of work before we figure that out. - deps.sort_by(|&(_, ref a, _), &(_, ref b, _)| { - a.len().cmp(&b.len()) - }); - - // Workaround compilation error: `deps` does not live long enough - let platform = platform.map(|s| &*s); - - Ok(match try!(activate_deps(cx, registry, parent, platform, &deps, 0)) { - Ok(cx) => { - cx.visited.borrow_mut().remove(parent.package_id()); - Ok(cx) - } - Err(e) => Err(e), + activate_deps(cx, registry, parent, platform, deps.iter(), 0, + &mut |cx, registry| { + cx.visited.borrow_mut().remove(parent.package_id()); + finished(cx, registry) }) } -// Activate this summary by inserting it into our list of known activations. -// -// Returns if this summary with the given method is already activated. -fn flag_activated(cx: &mut Context, - summary: &Rc, - method: &Method) -> bool { - let id = summary.package_id(); - let key = (id.name().to_string(), id.source_id().clone()); - let prev = cx.activations.entry(key).or_insert(Vec::new()); - if !prev.iter().any(|c| c == summary) { - cx.resolve.graph.add(id.clone(), &[]); - prev.push(summary.clone()); - return false - } - debug!("checking if {} is already activated", summary.package_id()); - let (features, use_default) = match *method { - Method::Required { features, uses_default_features, .. } => { - (features, uses_default_features) - } - Method::Everything => return false, - }; - - let has_default_feature = summary.features().contains_key("default"); - match cx.resolve.features(id) { - Some(prev) => { - features.iter().all(|f| prev.contains(f)) && - (!use_default || prev.contains("default") || !has_default_feature) - } - None => features.len() == 0 && (!use_default || !has_default_feature) - } -} - +/// Activates the dependencies for a package, one by one in turn. +/// +/// This function will attempt to activate all possible candidates for each +/// dependency of the package specified by `parent`. The `deps` iterator +/// provided is an iterator over all dependencies where each element yielded +/// informs us what the candidates are for the dependency in question. +/// +/// The `platform` argument is the target platform that the dependencies are +/// being activated for. +/// +/// If all dependencies can be activated and resolved to a version in the +/// dependency graph the `finished` callback is invoked with the current state +/// of the world. fn activate_deps<'a>(cx: Box, registry: &mut Registry, parent: &Summary, platform: Option<&'a str>, - deps: &'a [(&Dependency, Vec>, Vec)], - cur: usize) -> CargoResult>> { - if cur == deps.len() { return Ok(Ok(cx)) } - let (dep, ref candidates, ref features) = deps[cur]; + mut deps: slice::Iter<'a, DepInfo>, + cur: usize, + finished: &mut FnMut(Box, &mut Registry) -> ResolveResult) + -> ResolveResult { + let &(dep, ref candidates, ref features) = match deps.next() { + Some(info) => info, + None => return finished(cx, registry), + }; - let method = Method::Required{ + let method = Method::Required { dev_deps: false, - features: &features, + features: features, uses_default_features: dep.uses_default_features(), - target_platform: platform}; + target_platform: platform, + }; - let key = (dep.name().to_string(), dep.source_id().clone()); - let prev_active = cx.activations.get(&key) - .map(|v| &v[..]).unwrap_or(&[]); + let prev_active = cx.prev_active(dep); trace!("{}[{}]>{} {} candidates", parent.name(), cur, dep.name(), - candidates.len()); + candidates.len()); trace!("{}[{}]>{} {} prev activations", parent.name(), cur, - dep.name(), prev_active.len()); + dep.name(), prev_active.len()); // Filter the set of candidates based on the previously activated // versions for this dependency. We can actually use a version if it @@ -296,7 +365,7 @@ fn activate_deps<'a>(cx: Box, let mut last_err = None; for candidate in my_candidates { trace!("{}[{}]>{} trying {}", parent.name(), cur, dep.name(), - candidate.version()); + candidate.version()); let mut my_cx = cx.clone(); my_cx.resolve.graph.link(parent.package_id().clone(), candidate.package_id().clone()); @@ -306,18 +375,17 @@ fn activate_deps<'a>(cx: Box, if !dep.is_transitive() { my_cx.visited.borrow_mut().clear(); } - let my_cx = match try!(activate(my_cx, registry, candidate, method)) { - Ok(cx) => cx, - Err(e) => { last_err = Some(e); continue } - }; - match try!(activate_deps(my_cx, registry, parent, platform, deps, - cur + 1)) { + let my_cx = try!(activate(my_cx, registry, candidate, method, + &mut |cx, registry| { + activate_deps(cx, registry, parent, platform, deps.clone(), cur + 1, + finished) + })); + match my_cx { Ok(cx) => return Ok(Ok(cx)), Err(e) => { last_err = Some(e); } } } - trace!("{}[{}]>{} -- {:?}", parent.name(), cur, dep.name(), - last_err); + trace!("{}[{}]>{} -- {:?}", parent.name(), cur, dep.name(), last_err); // Oh well, we couldn't activate any of the candidates, so we just can't // activate this dependency at all @@ -325,6 +393,8 @@ fn activate_deps<'a>(cx: Box, &candidates)) } +#[inline(never)] // see notes at the top of the module +#[allow(deprecated)] // connect => join in 1.3 fn activation_error(cx: &Context, registry: &mut Registry, err: Option>, @@ -370,6 +440,7 @@ fn activation_error(cx: &Context, return Err(human(msg)) } + // Once we're all the way down here, we're definitely lost in the // weeds! We didn't actually use any candidates above, so we need to // give an error message that nothing was found. @@ -428,76 +499,6 @@ fn compatible(a: &semver::Version, b: &semver::Version) -> bool { a.patch == b.patch } -fn resolve_features<'a>(cx: &mut Context, parent: &'a Summary, - method: Method) - -> CargoResult)>> { - let dev_deps = match method { - Method::Everything => true, - Method::Required { dev_deps, .. } => dev_deps, - }; - - // First, filter by dev-dependencies - let deps = parent.dependencies(); - let deps = deps.iter().filter(|d| d.is_transitive() || dev_deps); - - // Second, ignoring dependencies that should not be compiled for this platform - let deps = deps.filter(|d| { - match method { - Method::Required{target_platform: Some(ref platform), ..} => { - d.is_active_for_platform(platform) - }, - _ => true - } - }); - - let (mut feature_deps, used_features) = try!(build_features(parent, method)); - let mut ret = Vec::new(); - - // Next, sanitize all requested features by whitelisting all the requested - // features that correspond to optional dependencies - for dep in deps { - // weed out optional dependencies, but not those required - if dep.is_optional() && !feature_deps.contains_key(dep.name()) { - continue - } - let mut base = feature_deps.remove(dep.name()).unwrap_or(Vec::new()); - for feature in dep.features().iter() { - base.push(feature.clone()); - if feature.contains("/") { - return Err(human(format!("features in dependencies \ - cannot enable features in \ - other dependencies: `{}`", - feature))); - } - } - ret.push((dep, base)); - } - - // All features can only point to optional dependencies, in which case they - // should have all been weeded out by the above iteration. Any remaining - // features are bugs in that the package does not actually have those - // features. - if feature_deps.len() > 0 { - let unknown = feature_deps.keys().map(|s| &s[..]) - .collect::>(); - if unknown.len() > 0 { - let features = unknown.connect(", "); - return Err(human(format!("Package `{}` does not have these features: \ - `{}`", parent.package_id(), features))) - } - } - - // Record what list of features is active for this package. - if used_features.len() > 0 { - let pkgid = parent.package_id(); - cx.resolve.features.entry(pkgid.clone()) - .or_insert(HashSet::new()) - .extend(used_features); - } - - Ok(ret) -} - // Returns a pair of (feature dependencies, all used features) // // The feature dependencies map is a mapping of package name to list of features @@ -584,3 +585,151 @@ fn build_features(s: &Summary, method: Method) Ok(()) } } + +impl Context { + // Activate this summary by inserting it into our list of known activations. + // + // Returns if this summary with the given method is already activated. + #[inline(never)] // see notes at the top of the module + fn flag_activated(&mut self, + summary: &Rc, + method: &Method) -> bool { + let id = summary.package_id(); + let key = (id.name().to_string(), id.source_id().clone()); + let prev = self.activations.entry(key).or_insert(Vec::new()); + if !prev.iter().any(|c| c == summary) { + self.resolve.graph.add(id.clone(), &[]); + prev.push(summary.clone()); + return false + } + debug!("checking if {} is already activated", summary.package_id()); + let (features, use_default) = match *method { + Method::Required { features, uses_default_features, .. } => { + (features, uses_default_features) + } + Method::Everything => return false, + }; + + let has_default_feature = summary.features().contains_key("default"); + match self.resolve.features(id) { + Some(prev) => { + features.iter().all(|f| prev.contains(f)) && + (!use_default || prev.contains("default") || + !has_default_feature) + } + None => features.len() == 0 && (!use_default || !has_default_feature) + } + } + + #[inline(never)] // see notes at the top of the module + fn build_deps<'a>(&mut self, registry: &mut Registry, + parent: &'a Summary, + method: Method) -> CargoResult>> { + // First, figure out our set of dependencies based on the requsted set + // of features. This also calculates what features we're going to enable + // for our own dependencies. + let deps = try!(self.resolve_features(parent, method)); + + // Next, transform all dependencies into a list of possible candidates + // which can satisfy that dependency. + let mut deps = try!(deps.into_iter().map(|(dep, features)| { + let mut candidates = try!(registry.query(dep)); + // When we attempt versions for a package, we'll want to start at + // the maximum version and work our way down. + candidates.sort_by(|a, b| { + b.version().cmp(a.version()) + }); + let candidates = candidates.into_iter().map(Rc::new).collect(); + Ok((dep, candidates, features)) + }).collect::>>>()); + + // When we recurse, attempt to resolve dependencies with fewer + // candidates before recursing on dependencies with more candidates. + // This way if the dependency with only one candidate can't be resolved + // we don't have to do a bunch of work before we figure that out. + deps.sort_by(|&(_, ref a, _), &(_, ref b, _)| { + a.len().cmp(&b.len()) + }); + + Ok(deps) + } + + #[inline(never)] // see notes at the top of the module + fn prev_active(&self, dep: &Dependency) -> &[Rc] { + let key = (dep.name().to_string(), dep.source_id().clone()); + self.activations.get(&key).map(|v| &v[..]).unwrap_or(&[]) + } + + #[allow(deprecated)] // connect => join in 1.3 + fn resolve_features<'a>(&mut self, parent: &'a Summary, method: Method) + -> CargoResult)>> { + let dev_deps = match method { + Method::Everything => true, + Method::Required { dev_deps, .. } => dev_deps, + }; + + // First, filter by dev-dependencies + let deps = parent.dependencies(); + let deps = deps.iter().filter(|d| d.is_transitive() || dev_deps); + + // Second, ignoring dependencies that should not be compiled for this + // platform + let deps = deps.filter(|d| { + match method { + Method::Required{target_platform: Some(ref platform), ..} => { + d.is_active_for_platform(platform) + }, + _ => true + } + }); + + let (mut feature_deps, used_features) = + try!(build_features(parent, method)); + let mut ret = Vec::new(); + + // Next, sanitize all requested features by whitelisting all the + // requested features that correspond to optional dependencies + for dep in deps { + // weed out optional dependencies, but not those required + if dep.is_optional() && !feature_deps.contains_key(dep.name()) { + continue + } + let mut base = feature_deps.remove(dep.name()).unwrap_or(vec![]); + for feature in dep.features().iter() { + base.push(feature.clone()); + if feature.contains("/") { + return Err(human(format!("features in dependencies \ + cannot enable features in \ + other dependencies: `{}`", + feature))); + } + } + ret.push((dep, base)); + } + + // All features can only point to optional dependencies, in which case + // they should have all been weeded out by the above iteration. Any + // remaining features are bugs in that the package does not actually + // have those features. + if feature_deps.len() > 0 { + let unknown = feature_deps.keys().map(|s| &s[..]) + .collect::>(); + if unknown.len() > 0 { + let features = unknown.connect(", "); + return Err(human(format!("Package `{}` does not have these \ + features: `{}`", parent.package_id(), + features))) + } + } + + // Record what list of features is active for this package. + if used_features.len() > 0 { + let pkgid = parent.package_id(); + self.resolve.features.entry(pkgid.clone()) + .or_insert(HashSet::new()) + .extend(used_features); + } + + Ok(ret) + } +} diff --git a/tests/resolve.rs b/tests/resolve.rs index 768bb14c8..45fe82103 100644 --- a/tests/resolve.rs +++ b/tests/resolve.rs @@ -349,3 +349,23 @@ fn resolving_cycle() { dep_req("foo", "1"), ], &mut reg); } + +#[test] +fn hard_equality() { + extern crate env_logger; + let mut reg = registry(vec!( + pkg!(("foo", "1.0.1")), + pkg!(("foo", "1.0.0")), + + pkg!(("bar", "1.0.0") => [dep_req("foo", "1.0.0")]), + )); + + let res = resolve(pkg_id("root"), vec![ + dep_req("bar", "1"), + dep_req("foo", "=1.0.0"), + ], &mut reg).unwrap(); + + assert_that(&res, contains(names(&[("root", "1.0.0"), + ("foo", "1.0.0"), + ("bar", "1.0.0")]))); +}