diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 4e50e73a11b..ef770d7747c 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -191,7 +191,8 @@ Performance options ------------------- Configuring Python using ``--enable-optimizations --with-lto`` (PGO + LTO) is -recommended for best performance. +recommended for best performance. The experimental ``--enable-bolt`` flag can +also be used to improve performance. .. cmdoption:: --enable-optimizations @@ -231,6 +232,24 @@ recommended for best performance. .. versionadded:: 3.11 To use ThinLTO feature, use ``--with-lto=thin`` on Clang. +.. cmdoption:: --enable-bolt + + Enable usage of the `BOLT post-link binary optimizer + ` (disabled by + default). + + BOLT is part of the LLVM project but is not always included in their binary + distributions. This flag requires that ``llvm-bolt`` and ``merge-fdata`` + are available. + + BOLT is still a fairly new project so this flag should be considered + experimental for now. Because this tool operates on machine code its success + is dependent on a combination of the build environment + the other + optimization configure args + the CPU architecture, and not all combinations + are supported. + + .. versionadded:: 3.12 + .. cmdoption:: --with-computed-gotos Enable computed gotos in evaluation loop (enabled by default on supported diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 9689d9df9df..f9fa8ac3123 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -133,6 +133,10 @@ Optimizations It reduces object size by 8 or 16 bytes on 64bit platform. (:pep:`623`) (Contributed by Inada Naoki in :gh:`92536`.) +* Added experimental support for using the BOLT binary optimizer in the build + process, which improves performance by 1-5%. + (Contributed by Kevin Modzelewski in :gh:`90536`.) + CPython bytecode changes ======================== diff --git a/Makefile.pre.in b/Makefile.pre.in index c647853c223..ae7735cf69a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -640,6 +640,16 @@ profile-opt: profile-run-stamp -rm -f profile-clean-stamp $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)" +bolt-opt: @PREBOLT_RULE@ + rm -f *.fdata + @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst + ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true + @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata + @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot + rm -f *.fdata + rm -f $(BUILDPYTHON).bolt_inst + mv $(BUILDPYTHON).bolt $(BUILDPYTHON) + # Compile and run with gcov .PHONY=coverage coverage-lcov coverage-report coverage: diff --git a/Misc/ACKS b/Misc/ACKS index c1f570acaaf..16a482e40a5 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1212,6 +1212,7 @@ Gideon Mitchell Tim Mitchell Zubin Mithra Florian Mladitsch +Kevin Modzelewski Doug Moen Jakub Molinski Juliette Monsel diff --git a/Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst b/Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst new file mode 100644 index 00000000000..4605e03915e --- /dev/null +++ b/Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst @@ -0,0 +1,2 @@ +Use the BOLT post-link optimizer to improve performance, particularly on +medium-to-large applications. diff --git a/configure b/configure index 82b55a3745d..fb3a3c3fc8f 100755 --- a/configure +++ b/configure @@ -887,6 +887,9 @@ LLVM_PROF_FILE LLVM_PROF_MERGER PGO_PROF_USE_FLAG PGO_PROF_GEN_FLAG +MERGE_FDATA +LLVM_BOLT +PREBOLT_RULE LLVM_AR_FOUND LLVM_AR PROFILE_TASK @@ -1049,6 +1052,7 @@ enable_pystats with_assertions enable_optimizations with_lto +enable_bolt with_address_sanitizer with_memory_sanitizer with_undefined_behavior_sanitizer @@ -1774,6 +1778,8 @@ Optional Features: --enable-pystats enable internal statistics gathering (default is no) --enable-optimizations enable expensive, stable optimizations (PGO, etc.) (default is no) + --enable-bolt enable usage of the llvm-bolt post-link optimizer + (default is no) --enable-loadable-sqlite-extensions support loadable extensions in the sqlite3 module, see Doc/library/sqlite3.rst (default is no) @@ -7878,6 +7884,261 @@ $as_echo "$as_me: llvm-ar found via xcrun: ${LLVM_AR}" >&6;} LDFLAGS_NODIST="$LDFLAGS_NODIST $LTOFLAGS" fi +# Enable bolt flags +Py_BOLT='false' +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-bolt" >&5 +$as_echo_n "checking for --enable-bolt... " >&6; } +# Check whether --enable-bolt was given. +if test "${enable_bolt+set}" = set; then : + enableval=$enable_bolt; +if test "$enableval" != no +then + Py_BOLT='true' + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; }; +else + Py_BOLT='false' + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; }; +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + +if test "$Py_BOLT" = 'true' ; then + PREBOLT_RULE="${DEF_MAKE_ALL_RULE}" + DEF_MAKE_ALL_RULE="bolt-opt" + DEF_MAKE_RULE="build_all" + + # These flags are required for bolt to work: + CFLAGS_NODIST="$CFLAGS_NODIST -fno-reorder-blocks-and-partition" + LDFLAGS_NODIST="$LDFLAGS_NODIST -Wl,--emit-relocs" + + # These flags are required to get good performance from bolt: + CFLAGS_NODIST="$CFLAGS_NODIST -fno-pie" + # We want to add these no-pie flags to linking executables but not shared libraries: + LINKCC="$LINKCC -fno-pie -no-pie" + # Designate the DWARF version into 4 since the LLVM-BOLT does not support DWARF5 yet. + CFLAGS="$CFLAGS -gdwarf-4" + LDFLAGS="$LDFLAGS -gdwarf-4" + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}llvm-bolt", so it can be a program name with args. +set dummy ${ac_tool_prefix}llvm-bolt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_LLVM_BOLT+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $LLVM_BOLT in + [\\/]* | ?:[\\/]*) + ac_cv_path_LLVM_BOLT="$LLVM_BOLT" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in ${llvm_path} +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_LLVM_BOLT="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +LLVM_BOLT=$ac_cv_path_LLVM_BOLT +if test -n "$LLVM_BOLT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LLVM_BOLT" >&5 +$as_echo "$LLVM_BOLT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_LLVM_BOLT"; then + ac_pt_LLVM_BOLT=$LLVM_BOLT + # Extract the first word of "llvm-bolt", so it can be a program name with args. +set dummy llvm-bolt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_LLVM_BOLT+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_LLVM_BOLT in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_LLVM_BOLT="$ac_pt_LLVM_BOLT" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in ${llvm_path} +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_LLVM_BOLT="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_LLVM_BOLT=$ac_cv_path_ac_pt_LLVM_BOLT +if test -n "$ac_pt_LLVM_BOLT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_LLVM_BOLT" >&5 +$as_echo "$ac_pt_LLVM_BOLT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_LLVM_BOLT" = x; then + LLVM_BOLT="''" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LLVM_BOLT=$ac_pt_LLVM_BOLT + fi +else + LLVM_BOLT="$ac_cv_path_LLVM_BOLT" +fi + + if test -n "${LLVM_BOLT}" -a -x "${LLVM_BOLT}" + then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"Found llvm-bolt\"" >&5 +$as_echo "\"Found llvm-bolt\"" >&6; } + else + as_fn_error $? "llvm-bolt is required for a --enable-bolt build but could not be found." "$LINENO" 5 + fi + + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}merge-fdata", so it can be a program name with args. +set dummy ${ac_tool_prefix}merge-fdata; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_MERGE_FDATA+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MERGE_FDATA in + [\\/]* | ?:[\\/]*) + ac_cv_path_MERGE_FDATA="$MERGE_FDATA" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in ${llvm_path} +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_MERGE_FDATA="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +MERGE_FDATA=$ac_cv_path_MERGE_FDATA +if test -n "$MERGE_FDATA"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MERGE_FDATA" >&5 +$as_echo "$MERGE_FDATA" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_MERGE_FDATA"; then + ac_pt_MERGE_FDATA=$MERGE_FDATA + # Extract the first word of "merge-fdata", so it can be a program name with args. +set dummy merge-fdata; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_MERGE_FDATA+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_MERGE_FDATA in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_MERGE_FDATA="$ac_pt_MERGE_FDATA" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in ${llvm_path} +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_MERGE_FDATA="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_MERGE_FDATA=$ac_cv_path_ac_pt_MERGE_FDATA +if test -n "$ac_pt_MERGE_FDATA"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_MERGE_FDATA" >&5 +$as_echo "$ac_pt_MERGE_FDATA" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_MERGE_FDATA" = x; then + MERGE_FDATA="''" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MERGE_FDATA=$ac_pt_MERGE_FDATA + fi +else + MERGE_FDATA="$ac_cv_path_MERGE_FDATA" +fi + + if test -n "${MERGE_FDATA}" -a -x "${MERGE_FDATA}" + then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"Found merge-fdata\"" >&5 +$as_echo "\"Found merge-fdata\"" >&6; } + else + as_fn_error $? "merge-fdata is required for a --enable-bolt build but could not be found." "$LINENO" 5 + fi +fi + # Enable PGO flags. diff --git a/configure.ac b/configure.ac index 85d9e801183..bab405e6ed3 100644 --- a/configure.ac +++ b/configure.ac @@ -1917,6 +1917,59 @@ if test "$Py_LTO" = 'true' ; then LDFLAGS_NODIST="$LDFLAGS_NODIST $LTOFLAGS" fi +# Enable bolt flags +Py_BOLT='false' +AC_MSG_CHECKING(for --enable-bolt) +AC_ARG_ENABLE(bolt, AS_HELP_STRING( + [--enable-bolt], + [enable usage of the llvm-bolt post-link optimizer (default is no)]), +[ +if test "$enableval" != no +then + Py_BOLT='true' + AC_MSG_RESULT(yes); +else + Py_BOLT='false' + AC_MSG_RESULT(no); +fi], +[AC_MSG_RESULT(no)]) + +AC_SUBST(PREBOLT_RULE) +if test "$Py_BOLT" = 'true' ; then + PREBOLT_RULE="${DEF_MAKE_ALL_RULE}" + DEF_MAKE_ALL_RULE="bolt-opt" + DEF_MAKE_RULE="build_all" + + # These flags are required for bolt to work: + CFLAGS_NODIST="$CFLAGS_NODIST -fno-reorder-blocks-and-partition" + LDFLAGS_NODIST="$LDFLAGS_NODIST -Wl,--emit-relocs" + + # These flags are required to get good performance from bolt: + CFLAGS_NODIST="$CFLAGS_NODIST -fno-pie" + # We want to add these no-pie flags to linking executables but not shared libraries: + LINKCC="$LINKCC -fno-pie -no-pie" + # Designate the DWARF version into 4 since the LLVM-BOLT does not support DWARF5 yet. + CFLAGS="$CFLAGS -gdwarf-4" + LDFLAGS="$LDFLAGS -gdwarf-4" + AC_SUBST(LLVM_BOLT) + AC_PATH_TOOL(LLVM_BOLT, llvm-bolt, '', ${llvm_path}) + if test -n "${LLVM_BOLT}" -a -x "${LLVM_BOLT}" + then + AC_MSG_RESULT("Found llvm-bolt") + else + AC_MSG_ERROR([llvm-bolt is required for a --enable-bolt build but could not be found.]) + fi + + AC_SUBST(MERGE_FDATA) + AC_PATH_TOOL(MERGE_FDATA, merge-fdata, '', ${llvm_path}) + if test -n "${MERGE_FDATA}" -a -x "${MERGE_FDATA}" + then + AC_MSG_RESULT("Found merge-fdata") + else + AC_MSG_ERROR([merge-fdata is required for a --enable-bolt build but could not be found.]) + fi +fi + # Enable PGO flags. AC_SUBST(PGO_PROF_GEN_FLAG) AC_SUBST(PGO_PROF_USE_FLAG)