Travis: Toolchain only depends on headers, not impls

When libstdc++ was added in 4977fd22b8, just calling
'make install' was the easiest way to install the headers. And the headers are all
that is needed for libstdc++ to determine the ABI. Since then, BuildIt.sh was
rewritten again and again, and somehow everyone just silently assumed that
libstdc++ also depends on libc.a and libm.a, because surely it does?

Turns out, it doesn't! This massively reduces the dependencies of libstdc++,
hopefully meaning that the Toolchain doesn't need to be rebuilt so often on Travis.

Furthermore, the old method of trying to determine the dependency tree with
bash/grep/etc. has finally broken anyways:

    https://travis-ci.com/github/SerenityOS/serenity/builds/179805569#L567

In summary, this should eliminate most of the Toolchain rebuilds on Travis,
and therefore make Travis build blazingly fast! :^)
This commit is contained in:
Ben Wiederhake 2020-08-15 01:11:58 +02:00 committed by Andreas Kling
parent 0df9ddf604
commit 5f724b6ca1
2 changed files with 33 additions and 90 deletions

View file

@ -19,10 +19,10 @@ MAKE="make"
MD5SUM="md5sum"
NPROC="nproc"
# Each cache entry is 70 MB. 10 entries are 700 MiB.
# Each cache entry is 70 MB. 5 entries are 350 MiB.
# It seems that Travis starts having trouble around a total
# cache size of 9 GiB, so I think this is a good amount.
KEEP_CACHE_COUNT=10
KEEP_CACHE_COUNT=5
if command -v ginstall &>/dev/null; then
INSTALL=ginstall
@ -76,19 +76,27 @@ GCC_BASE_URL="http://ftp.gnu.org/gnu/gcc"
pushd "$DIR"
if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then
echo "Checking cached toolchain:"
DEPS_CONFIG="
# TODO: This is still overly pessimistic.
DEPS_CONFIG="\
uname=$(uname),TARGET=${TARGET},
BuildItHash=$($MD5SUM "$(basename "$0")"),
MAKE=${MAKE},MD5SUM=${MD5SUM},NPROC=${NPROC},
CC=${CC},CXX=${CXX},with_gmp=${with_gmp},LDFLAGS=${LDFLAGS},
BINUTILS_VERSION=${BINUTILS_VERSION},BINUTILS_MD5SUM=${BINUTILS_MD5SUM},
GCC_VERSION=${GCC_VERSION},GCC_MD5SUM=${GCC_MD5SUM}"
echo "Config is:${DEPS_CONFIG}"
if ! DEPS_HASH=$("$DIR/ComputeDependenciesHash.sh" "$MD5SUM" <<<"${DEPS_CONFIG}"); then
# Make it stand out more
echo
echo
echo
echo
echo "Dependency hashing failed"
echo "Will rebuild toolchain from scratch, and NOT SAVE THE RESULT."
echo "Someone should look into this, but for now it'll work, albeit inefficient."
echo
echo
echo
echo
# Should be empty anyway, but just to make sure:
DEPS_HASH=""
elif [ -r "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then
@ -238,12 +246,10 @@ pushd "$DIR/Build/"
echo "XXX install gcc and libgcc"
"$MAKE" install-gcc install-target-libgcc || exit 1
echo "XXX serenity libc and libm"
echo "XXX serenity libc and libm headers"
mkdir -p "$BUILD"
pushd "$BUILD"
CXXFLAGS="-DBUILDING_SERENITY_TOOLCHAIN" cmake ..
cmake --build . --target LibC
"$INSTALL" -D Libraries/LibC/libc.a Libraries/LibM/libm.a Root/usr/lib/
mkdir -p Root/usr/include/
SRC_ROOT=$(realpath "$DIR"/..)
FILES=$(find "$SRC_ROOT"/Libraries/LibC "$SRC_ROOT"/Libraries/LibM -name '*.h' -print)
for header in $FILES; do

View file

@ -1,5 +1,5 @@
#!/usr/bin/env bash
set -eu
set -euo pipefail
# This file will need to be run in bash, for now.
if [ $# -lt 1 ] ; then
@ -23,89 +23,26 @@ function finish {
}
trap finish EXIT
# libstdc++ depends on libc and libm, so we pessimistically assume it depends
# on *all* of their implementation and recursive dependencies.
# Scan all files for potential dependencies.
# Thinking in graphs, this computes the edge list:
cat <(find AK/ Libraries/ Services/ Kernel/ -name '*.h') \
<(find Libraries/LibC/ Libraries/LibM/ -name '*.cpp' ! -name 'Test*.cpp' ) | \
xargs grep -F '#include ' | \
sed -r \
-e 's,^(.*/)([^/]+:)#include "(.*)",\1\2\1\3,' \
-e 's^#include <(Kernel/.*)>^\1^' \
-e 's^#include <(AK/.*)>^\1^' \
-e 's^#include <(Lib[A-Za-z]+/.*)>^Libraries/\1^' \
-e 's^#include <((bits|netinet|sys|arpa|net)/.*)>^Libraries/LibC/\1^' \
-e 's^#include <fd_set.h>^Libraries/LibC/fd_set.h^' \
-e 's^#include <([a-z]{3,10}(_numbers)?\.h)>^Libraries/LibC/\1^' \
-e 's^#include <([A-Z][a-z]+Server/.*)>^Services/\1^' \
-e 's^#include <(.*)>^UNRESOLVED_I/\1^' \
-e 's^#include "(.*)"^UNRESOLVED_L/\1^' > "${DEPLIST_FILE}"
# Some #include's cannot be resolved, like <chrono>. However, these are only
# a problem if they turn up as a transitive dependency of libc and libm.
# We will check for that when the time comes.
# First, capture the caller's input.
echo "$0: Configuration:" >&2
cat /dev/stdin | tee /dev/stderr > "${DEPLIST_FILE}"
# "$@" is the md5sum invocation.
"$@" Toolchain/ComputeDependenciesHash.sh | tee /dev/stderr >> "${DEPLIST_FILE}"
# The initial guess is pessimistic: *all* of libc and libm.
FILE_LIST=$(find Libraries/LibC/ Libraries/LibM/ \( -name '*.cpp' -o -name '*.c' -o -name '*.h' \) ! -name 'Test*')
echo "$0: Exploring dependencies of libstdc++" >&2
FILE_LIST_COMPLETE="n"
# In each iteration, we extend FILE_LIST by the dependencies not listed yet in
# FILE_LIST. Note that the results are always semantically the same,
# but the order depends on the initial `find` runs.
for _ in $(seq 10) ; do
FILE_REGEX=$(echo "${FILE_LIST}" | sed -zr -e 's,\n$,,' -e 's,\.,\\.,g' -e 's,\n,|,g')
FURTHER_FILE_LIST=$(grep -P "^(${FILE_REGEX}):" "${DEPLIST_FILE}" | grep -Pv ":(${FILE_REGEX})\$" | sed -re 's,^.*:(.*)$,\1,' | sort -u)
if [ -n "${FURTHER_FILE_LIST}" ] ; then
# FILE_LIST should grow to a maximum of "number of all .cpp and .c and .h files",
# i.e. roughly 700 lines. This should be managable, even as the project grows.
FILE_LIST="${FILE_LIST}
${FURTHER_FILE_LIST}"
else
FILE_LIST_COMPLETE="y"
break
fi
done
FURTHER_FILE_LIST=""
FILE_REGEX=""
if [ "${FILE_LIST_COMPLETE}" != "y" ] ; then
# Dependency chains might grow very long. Also, if for some reason we fail
# to filter out the already listed files, the FILE_LIST would grow
# exponentially. Both of these unpleasant cases are handled by capping the
# iteration count to 10 and giving up:
echo "$0: Dependencies don't seem to converge, giving up." >&2
exit 1
fi
# libstdc++ depends on the *headers* of libc, so we pessimistically assume it depends
# on *all* of them.
# This list of files can be cut down considerably:
# strace -ff -e trace=file "make install-target-libstdc++-v3" 2>&1 >/dev/null | perl -ne 's/^[^"]+"(([^\\"]|\\[\\"nt])*)".*/$1/ && print' | sort -u | grep -P 'serenity/Build/Root/usr/include/.*\.h$'
# However, we don't want to risk breaking the build when we upgrade gcc in the future.
#
# If you want to further cut down the Toolchain rebuilds on Travis,
# one way would be to reduce this list somehow.
cd Libraries/LibC/
find -name '*.h' | sort | xargs "$@" | tee /dev/stderr >> "${DEPLIST_FILE}"
# Sort for reproducability,
FILE_LIST=$(echo "${FILE_LIST}" | LC_ALL=C sort -u)
if grep -F 'UNRESOLVED' <<EOLIST >&2 ; then
${FILE_LIST}
EOLIST
echo "$0: Unresolved dependency, giving up."
exit 1
fi
echo "$0: Computing hashes" >&2
# "$@" is the md5sum invocation. The piping might hide non-zero exit-codes,
# The piping might hide non-zero exit-codes,
# but thankfully only the first command can reasonably fail.
# Also, abuse the deplist file as a temporary buffer.
cat /dev/stdin > "${DEPLIST_FILE}"
HASHES=$(xargs "$@" <<EOLIST
${FILE_LIST}
Toolchain/ComputeDependenciesHash.sh
${DEPLIST_FILE}
EOLIST
)
# Caller (probably BuildIt.sh) should inject it's own hash via stdin.
# Mask the temporary (= non-reproducable) name of the DEPLIST_FILE:
HASHES=$(echo "${HASHES}" | sed -re 's,/tmp/serenity_deps_........\.lst,CONFIG,')
echo "$0: Hashes are:" >&2
echo "${HASHES}" >&2
echo "$0: Toolchain hash:" >&2
cat <<EOHASH | "$@" - | cut -f1 -d' ' | tee /dev/stderr
${HASHES}
EOHASH
"$@" "${DEPLIST_FILE}" | cut -f1 -d' ' | tee /dev/stderr
echo "$0: Great success!" >&2