From 36ba0a35eecec73c3347c43aeb18f3ea4849117d Mon Sep 17 00:00:00 2001 From: Ben Wiederhake Date: Sat, 7 Mar 2020 17:38:16 +0100 Subject: [PATCH] Travis: Cache toolchain This should give a significant boost to Travis speeds, because most of the compile time is spent building the toolchain over and over again. However, the toolchain (or libc or libm) changes only rarely, so most rebuilds can skip this step. The hashing has been put into a separate file to keep it as decoupled as possible from BuiltIt.sh. --- .travis.yml | 4 +- Toolchain/.gitignore | 2 + Toolchain/BuildIt.sh | 68 +++++++++++++++- Toolchain/ComputeDependenciesHash.sh | 111 +++++++++++++++++++++++++++ 4 files changed, 182 insertions(+), 3 deletions(-) create mode 100755 Toolchain/ComputeDependenciesHash.sh diff --git a/.travis.yml b/.travis.yml index 16a20242a7..a84e3752c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ compiler: cache: directories: - /var/cache/apt/archives/*.deb + - Toolchain/Cache/ notifications: irc: @@ -30,7 +31,8 @@ before_install: script: - cd Toolchain -- ./BuildIt.sh +- TRY_USE_LOCAL_TOOLCHAIN=y ./BuildIt.sh - cd ../Kernel - ./makeall.sh - ../Meta/lint-shell-scripts.sh +- du -ch ../Toolchain/Cache/* || true diff --git a/Toolchain/.gitignore b/Toolchain/.gitignore index 0146895ec6..b4441196e5 100644 --- a/Toolchain/.gitignore +++ b/Toolchain/.gitignore @@ -1,3 +1,5 @@ # Created by QEMU build config-temp config.log +# For caching the entire toolchain (useful on Travis) +Cache/ diff --git a/Toolchain/BuildIt.sh b/Toolchain/BuildIt.sh index 6eb6c3a4c9..5ac3b8274a 100755 --- a/Toolchain/BuildIt.sh +++ b/Toolchain/BuildIt.sh @@ -1,8 +1,10 @@ #!/usr/bin/env bash set -e - # This file will need to be run in bash, for now. + +# === CONFIGURATION AND SETUP === + DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" echo "$DIR" @@ -43,6 +45,44 @@ GCC_NAME="gcc-$GCC_VERSION" GCC_PKG="${GCC_NAME}.tar.gz" GCC_BASE_URL="http://ftp.gnu.org/gnu/gcc" + +# === CHECK CACHE AND REUSE === + +pushd "$DIR" + if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then + echo "Checking cached toolchain:" + + DEPS_CONFIG=" + uname=$(uname),TARGET=${TARGET}, + BuildItHash=$($MD5SUM $(basename $0)), + MAKE=${MAKE},MD5SUM=${MD5SUM},NPROC=${NPROC}, + CC=${CC},CXX=${CXX},with_gmp=${with_gmp},LDFLAGS=${LDFLAGS}, + BINUTILS_VERSION=${BINUTILS_VERSION},BINUTILS_MD5SUM=${BINUTILS_MD5SUM}, + GCC_VERSION=${GCC_VERSION},GCC_MD5SUM=${GCC_MD5SUM}" + echo "Config is:${DEPS_CONFIG}" + if ! DEPS_HASH=$($DIR/ComputeDependenciesHash.sh $MD5SUM <<<"${DEPS_CONFIG}"); then + echo "Dependency hashing failed" + echo "Will rebuild toolchain from scratch, and NOT SAVE THE RESULT." + echo "Someone should look into this, but for now it'll work, albeit inefficient." + # Should be empty anyway, but just to make sure: + DEPS_HASH="" + elif [ -r "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then + echo "Cache at Cache/ToolchainLocal_${DEPS_HASH}.tar.gz exists!" + echo "Extracting toolchain from cache:" + tar xzf "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" + echo "Done 'building' the toolchain." + exit 0 + else + echo "Cache at Cache/ToolchainLocal_${DEPS_HASH}.tar.gz does not exist." + echo "Will rebuild toolchain from scratch, and save the result." + fi + + fi +popd + + +# === DOWNLOAD AND PATCH === + pushd "$DIR/Tarballs" md5="$($MD5SUM $BINUTILS_PKG | cut -f1 -d' ')" echo "bu md5='$md5'" @@ -98,8 +138,10 @@ pushd "$DIR/Tarballs" popd -mkdir -p "$PREFIX" +# === COMPILE AND INSTALL === + +mkdir -p "$PREFIX" mkdir -p "$DIR/Build/binutils" mkdir -p "$DIR/Build/gcc" @@ -162,3 +204,25 @@ pushd "$DIR/Build/" popd popd + +# == SAVE TO CACHE == + +pushd "$DIR" + if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then + # TODO: Compress with -z. It's factor 3, and costs no time. + echo "Caching toolchain:" + + if [ -z "${DEPS_HASH}" ] ; then + echo "NOT SAVED, because hashing failed." + echo "It's computed in the beginning; see there for the error message." + elif [ -e "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then + # Note: This checks for *existence*. Initially we checked for + # *readability*. If Travis borks permissions, there's not much we can do. + echo "Cache exists but was not used?!" + echo "Not touching cache then." + else + mkdir -p Cache/ + tar czf "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" Local/ + fi + fi +popd diff --git a/Toolchain/ComputeDependenciesHash.sh b/Toolchain/ComputeDependenciesHash.sh new file mode 100755 index 0000000000..abb9a99b22 --- /dev/null +++ b/Toolchain/ComputeDependenciesHash.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +set -eu +# This file will need to be run in bash, for now. + +if [ $# -lt 1 ] ; then + echo "USAGE: echo \"YOURCONFIG\" | $0 " >&2 + echo "Example: echo \"uname=Linux,TARGET=i686-pc-serenity\" | $0 md5sum" >&2 + echo "Example: echo \"uname=OpenBSD,TARGET=i686-pc-serenity\" | $0 md5 -q" >&2 + exit 1 +fi + +DIR=$( cd "$( dirname "$0" )" && pwd ) +cd "${DIR}/.." +if [ ! -r LICENSE ] ; then + echo "$0: Got confused by the directories, giving up." >&2 + exit 1 +fi + +# Ensure cleanup +DEPLIST_FILE=$(mktemp /tmp/serenity_deps_XXXXXXXX.lst) +function finish { + rm -f "${DEPLIST_FILE}" +} +trap finish EXIT + +# libstdc++ depends on libc and libm, so we pessimistically assume it depends +# on *all* of their implementation and recursive dependencies. +# Scan all files for potential dependencies. +# Thinking in graphs, this computes the edge list: +cat <(find AK/ Libraries/ Servers/ Kernel/ -name '*.h') \ + <(find Libraries/LibC/ Libraries/LibM/ -name '*.cpp' ! -name 'Test*.cpp' ) | \ + xargs grep -F '#include ' | \ + sed -r \ + -e 's,^(.*/)([^/]+:)#include "(.*)",\1\2\1\3,' \ + -e 's^#include <(Kernel/.*)>^\1^' \ + -e 's^#include <(AK/.*)>^\1^' \ + -e 's^#include <(Lib[A-Za-z]+/.*)>^Libraries/\1^' \ + -e 's^#include <((bits|netinet|sys|arpa|net)/.*)>^Libraries/LibC/\1^' \ + -e 's^#include ^Libraries/LibC/fd_set.h^' \ + -e 's^#include <([a-z]{3,10}(_numbers)?\.h)>^Libraries/LibC/\1^' \ + -e 's^#include <([A-Z][a-z]+Server/.*)>^Servers/\1^' \ + -e 's^#include <(.*)>^UNRESOLVED_I/\1^' \ + -e 's^#include "(.*)"^UNRESOLVED_L/\1^' > "${DEPLIST_FILE}" +# Some #include's cannot be resolved, like . However, these are only +# a problem if they turn up as a transitive dependency of libc and libm. +# We will check for that when the time comes. + +# The initial guess is pessimistic: *all* of libc and libm. +FILE_LIST=$(find Libraries/LibC/ Libraries/LibM/ \( -name '*.cpp' -o -name '*.c' -o -name '*.h' \) ! -name 'Test*') +echo "$0: Exploring dependencies of libstdc++" >&2 +FILE_LIST_COMPLETE="n" +# In each iteration, we extend FILE_LIST by the dependencies not listed yet in +# FILE_LIST. Note that the results are always semantically the same, +# but the order depends on the initial `find` runs. +for _ in $(seq 10) ; do + FILE_REGEX=$(echo "${FILE_LIST}" | sed -zr -e 's,\n$,,' -e 's,\.,\\.,g' -e 's,\n,|,g') + FURTHER_FILE_LIST=$(grep -P "^(${FILE_REGEX}):" "${DEPLIST_FILE}" | grep -Pv ":(${FILE_REGEX})\$" | sed -re 's,^.*:(.*)$,\1,' | sort -u) + if [ -n "${FURTHER_FILE_LIST}" ] ; then + # FILE_LIST should grow to a maximum of "number of all .cpp and .c and .h files", + # i.e. roughly 700 lines. This should be managable, even as the project grows. + FILE_LIST="${FILE_LIST} +${FURTHER_FILE_LIST}" + else + FILE_LIST_COMPLETE="y" + break + fi +done +FURTHER_FILE_LIST="" +FILE_REGEX="" +if [ "${FILE_LIST_COMPLETE}" != "y" ] ; then + # Dependency chains might grow very long. Also, if for some reason we fail + # to filter out the already listed files, the FILE_LIST would grow + # exponentially. Both of these unpleasant cases are handled by capping the + # iteration count to 10 and giving up: + echo "$0: Dependencies don't seem to converge, giving up." >&2 + exit 1 +fi + +# Sort for reproducability, +FILE_LIST=$(echo "${FILE_LIST}" | LC_ALL=C sort -u) +if grep -F 'UNRESOLVED' <&2 ; then +${FILE_LIST} +EOLIST + echo "$0: Unresolved dependency, giving up." + exit 1 +fi + +echo "$0: Computing hashes" >&2 +# "$@" is the md5sum invocation. The piping might hide non-zero exit-codes, +# but thankfully only the first command can reasonably fail. +# Also, abuse the deplist file as a temporary buffer. +cat /dev/stdin > "${DEPLIST_FILE}" +HASHES=$(xargs "$@" <&2 +echo "${HASHES}" >&2 +echo "$0: Toolchain hash:" >&2 +cat <&2