The SSE2 detection and enabling could potentially cause

problems for binary distributions of Python in situations
where the build machine has SSE2 but the target machine
does not.

Therefore, don't enable SSE2 instructions automatically on x86.
This commit is contained in:
Mark Dickinson 2009-04-18 20:17:52 +00:00
parent 153c70f6d7
commit 7abf8d4066
7 changed files with 47 additions and 280 deletions

View file

@ -469,7 +469,7 @@ extern "C" {
the FPU is using 53-bit precision. Here are macros that force this. See
Python/pystrtod.c for an example of their use. */
#ifdef USING_X87_FPU
#ifdef HAVE_GCC_ASM_FOR_X87
#define _Py_SET_53BIT_PRECISION_HEADER \
unsigned short old_387controlword, new_387controlword
#define _Py_SET_53BIT_PRECISION_START \

View file

@ -31,13 +31,6 @@ Core and Builtins
value: str(1e11 + 0.5). (This minor issue has existed in 2.x for a
long time.)
- On x86, SSE2 instructions for floating-point are automatically
detected and, where possible, enabled on platforms using the gcc
compiler. As a consequence, some arithmetic operations may have
different (more accurate!) results on some platforms, and
cross-platform consistency of Python arithmetic should be improved.
This applies particularly to Linux/x86.
- Issue #1580: On most platforms, use a 'short' float repr: for a
finite float x, repr(x) now outputs a string based on the shortest
sequence of decimal digits that rounds to x. Previous behaviour was

View file

@ -15,6 +15,11 @@
#define MAX(x, y) ((x) < (y) ? (y) : (x))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
/* ascii character tests (as opposed to locale tests) */
#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
(c) == '\r' || (c) == '\t' || (c) == '\v')
#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
#ifdef HAVE_IEEEFP_H
#include <ieeefp.h>
#endif
@ -188,7 +193,7 @@ PyFloat_FromString(PyObject *v)
}
last = s + len;
while (*s && isspace(Py_CHARMASK(*s)))
while (*s && ISSPACE(Py_CHARMASK(*s)))
s++;
if (*s == '\0') {
PyErr_SetString(PyExc_ValueError, "empty string for float()");
@ -245,7 +250,7 @@ PyFloat_FromString(PyObject *v)
}
/* Since end != s, the platform made *some* kind of sense out
of the input. Trust it. */
while (*end && isspace(Py_CHARMASK(*end)))
while (*end && ISSPACE(Py_CHARMASK(*end)))
end++;
if (*end != '\0') {
PyOS_snprintf(buffer, sizeof(buffer),
@ -1275,7 +1280,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
********************/
/* leading whitespace and optional sign */
while (isspace(Py_CHARMASK(*s)))
while (ISSPACE(Py_CHARMASK(*s)))
s++;
if (*s == '-') {
s++;
@ -1299,6 +1304,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
s_store = s;
if (*s == '0') {
s++;
if (*s == 'x' || *s == 'X')
if (tolower(*s) == (int)'x')
s++;
else
@ -1345,7 +1351,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
exp = 0;
/* optional trailing whitespace leading to the end of the string */
while (isspace(Py_CHARMASK(*s)))
while (ISSPACE(Py_CHARMASK(*s)))
s++;
if (s != s_end)
goto parse_error;

View file

@ -13,8 +13,7 @@ double _Py_force_double(double x)
}
#endif
#ifdef USING_X87_FPU
# ifdef HAVE_GCC_ASM_FOR_X87
#ifdef HAVE_GCC_ASM_FOR_X87
/* inline assembly for getting and setting the 387 FPU control word on
gcc/x86 */
@ -29,9 +28,6 @@ void _Py_set_387controlword(unsigned short cw) {
__asm__ __volatile__ ("fldcw %0" : : "m" (cw));
}
# else
# error "Unable to get and set x87 control word"
# endif
#endif

187
configure vendored
View file

@ -1,5 +1,5 @@
#! /bin/sh
# From configure.in Revision: 71663 .
# From configure.in Revision: 71704 .
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.61 for python 3.1.
#
@ -21827,174 +21827,21 @@ _ACEOF
fi
# David Gay's code in Python/dtoa.c requires that the FPU uses 53-bit
# The short float repr introduced in Python 3.1 requires the
# correctly-rounded string <-> double conversion functions in
# Python/dtoa.c, which in turn require that the FPU uses 53-bit
# rounding; this is a particular problem on x86, where the x87 FPU has
# a default rounding precision of 64 bits. For gcc/x86, we try to fix
# this by:
#
# (1) using the SSE2 instruction set when available (it usually is
# on modern machines)
# (2) using inline assembler to get and set the x87 FPU control word
# otherwise.
#
# On AMD64 (aka x86-64), gcc automatically enables use of SSE2
# instructions, so we don't bother trying to detect.
# this by using inline assembler to get and set the x87 FPU control
# word.
if test "$GCC" = yes && test -n "`$CC -dM -E - </dev/null | grep i386`"
then
# determine whether we're already using the SSE2 instruction set for math
# (e.g., this is true by default on OS X/x86)
{ echo "$as_me:$LINENO: checking whether SSE2 instructions are already enabled for math" >&5
echo $ECHO_N "checking whether SSE2 instructions are already enabled for math... $ECHO_C" >&6; }
if test -n "`$CC -dM -E - </dev/null | grep __SSE2_MATH__`"
then
ac_sse2_enabled=yes
else
ac_sse2_enabled=no
fi
{ echo "$as_me:$LINENO: result: $ac_sse2_enabled" >&5
echo "${ECHO_T}$ac_sse2_enabled" >&6; }
# if we're not using SSE2 already, we need to either enable it
# (when available), or use inline assembler to get and set the
# 387 control word.
if test $ac_sse2_enabled = no
then
# Check cpuid for SSE2 availability. Bits 25 and 26 of edx tell
# us about SSE and SSE2 respectively.
{ echo "$as_me:$LINENO: checking whether SSE2 instructions are available on this CPU" >&5
echo $ECHO_N "checking whether SSE2 instructions are available on this CPU... $ECHO_C" >&6; }
if test "$cross_compiling" = yes; then
ac_cv_cpu_has_sse2=no
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
int main() {
unsigned int ax, bx, cx, dx, func;
func = 1U;
__asm__ __volatile__ (
"pushl %%ebx\n\t" /* don't clobber ebx */
"cpuid\n\t"
"movl %%ebx, %1\n\t"
"popl %%ebx"
: "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx)
: "a" (func)
: "cc" );
if ((dx & (1U << 25)) && (dx & (1U << 26)))
return 0;
else
return 1;
}
_ACEOF
rm -f conftest$ac_exeext
if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_link") 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && { ac_try='./conftest$ac_exeext'
{ (case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_try") 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
ac_cv_cpu_has_sse2=yes
else
echo "$as_me: program exited with status $ac_status" >&5
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
( exit $ac_status )
ac_cv_cpu_has_sse2=no
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
fi
{ echo "$as_me:$LINENO: result: $ac_cv_cpu_has_sse2" >&5
echo "${ECHO_T}$ac_cv_cpu_has_sse2" >&6; }
# determine whether gcc accepts options to turn on SSE2
{ echo "$as_me:$LINENO: checking whether $CC accepts -msse2 -mfpmath=sse" >&5
echo $ECHO_N "checking whether $CC accepts -msse2 -mfpmath=sse... $ECHO_C" >&6; }
ac_save_cc="$CC"
CC="$CC -msse2 -mfpmath=sse"
if test "$cross_compiling" = yes; then
ac_cv_msse2_ok=no
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
int main() { return 0; }
_ACEOF
rm -f conftest$ac_exeext
if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_link") 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && { ac_try='./conftest$ac_exeext'
{ (case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_try") 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
ac_cv_msse2_ok=yes
else
echo "$as_me: program exited with status $ac_status" >&5
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
( exit $ac_status )
ac_cv_msse2_ok=no
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
fi
CC="$ac_save_cc"
{ echo "$as_me:$LINENO: result: $ac_cv_msse2_ok" >&5
echo "${ECHO_T}$ac_cv_msse2_ok" >&6; }
if test $ac_cv_cpu_has_sse2 = yes && test $ac_cv_msse2_ok = yes
then
BASECFLAGS="$BASECFLAGS -msse2 -mfpmath=sse"
else
# SSE2 doesn't appear to be available. Check that it's okay
# to use gcc inline assembler to get and set x87 control word
cat >>confdefs.h <<\_ACEOF
#define USING_X87_FPU 1
_ACEOF
{ echo "$as_me:$LINENO: checking whether we can use gcc inline assembler to get and set x87 control word" >&5
# Check that it's okay to use gcc inline assembler to get and set
# x87 control word
{ echo "$as_me:$LINENO: checking whether we can use gcc inline assembler to get and set x87 control word" >&5
echo $ECHO_N "checking whether we can use gcc inline assembler to get and set x87 control word... $ECHO_C" >&6; }
cat >conftest.$ac_ext <<_ACEOF
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
@ -22005,9 +21852,9 @@ int
main ()
{
unsigned short cw;
__asm__ __volatile__ ("fnstcw %0" : "=m" (cw));
__asm__ __volatile__ ("fldcw %0" : : "m" (cw));
unsigned short cw;
__asm__ __volatile__ ("fnstcw %0" : "=m" (cw));
__asm__ __volatile__ ("fldcw %0" : : "m" (cw));
;
return 0;
@ -22039,17 +21886,15 @@ sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ echo "$as_me:$LINENO: result: $have_gcc_asm_for_x87" >&5
{ echo "$as_me:$LINENO: result: $have_gcc_asm_for_x87" >&5
echo "${ECHO_T}$have_gcc_asm_for_x87" >&6; }
if test "$have_gcc_asm_for_x87" = yes
then
if test "$have_gcc_asm_for_x87" = yes
then
cat >>confdefs.h <<\_ACEOF
#define HAVE_GCC_ASM_FOR_X87 1
_ACEOF
fi
fi
fi
fi

View file

@ -3142,97 +3142,28 @@ then
in ARM mixed-endian order (byte order 45670123)])
fi
# David Gay's code in Python/dtoa.c requires that the FPU uses 53-bit
# rounding; this is a particular problem on x86, where the x87 FPU has
# a default rounding precision of 64 bits. For gcc/x86, we try to fix
# this by:
#
# (1) using the SSE2 instruction set when available (it usually is
# on modern machines)
# (2) using inline assembler to get and set the x87 FPU control word
# otherwise.
#
# On AMD64 (aka x86-64), gcc automatically enables use of SSE2
# instructions, so we don't bother trying to detect.
# The short float repr introduced in Python 3.1 requires the
# correctly-rounded string <-> double conversion functions from
# Python/dtoa.c, which in turn require that the FPU uses 53-bit
# rounding; this is a problem on x86, where the x87 FPU has a default
# rounding precision of 64 bits. For gcc/x86, we try to fix this by
# using inline assembler to get and set the x87 FPU control word.
if test "$GCC" = yes && test -n "`$CC -dM -E - </dev/null | grep i386`"
then
# determine whether we're already using the SSE2 instruction set for math
# (e.g., this is true by default on OS X/x86)
AC_MSG_CHECKING(whether SSE2 instructions are already enabled for math)
if test -n "`$CC -dM -E - </dev/null | grep __SSE2_MATH__`"
# Check that it's okay to use gcc inline assembler to get and set
# x87 control word. It should be, but you never know...
AC_MSG_CHECKING(whether we can use gcc inline assembler to get and set x87 control word)
AC_TRY_COMPILE([], [
unsigned short cw;
__asm__ __volatile__ ("fnstcw %0" : "=m" (cw));
__asm__ __volatile__ ("fldcw %0" : : "m" (cw));
],
[have_gcc_asm_for_x87=yes], [have_gcc_asm_for_x87=no])
AC_MSG_RESULT($have_gcc_asm_for_x87)
if test "$have_gcc_asm_for_x87" = yes
then
ac_sse2_enabled=yes
else
ac_sse2_enabled=no
fi
AC_MSG_RESULT($ac_sse2_enabled)
# if we're not using SSE2 already, we need to either enable it
# (when available), or use inline assembler to get and set the
# 387 control word.
if test $ac_sse2_enabled = no
then
# Check cpuid for SSE2 availability. Bits 25 and 26 of edx tell
# us about SSE and SSE2 respectively.
AC_MSG_CHECKING(whether SSE2 instructions are available on this CPU)
AC_TRY_RUN([
int main() {
unsigned int ax, bx, cx, dx, func;
func = 1U;
__asm__ __volatile__ (
"pushl %%ebx\n\t" /* don't clobber ebx */
"cpuid\n\t"
"movl %%ebx, %1\n\t"
"popl %%ebx"
: "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx)
: "a" (func)
: "cc" );
if ((dx & (1U << 25)) && (dx & (1U << 26)))
return 0;
else
return 1;
}
],
ac_cv_cpu_has_sse2=yes,
ac_cv_cpu_has_sse2=no,
ac_cv_cpu_has_sse2=no)
AC_MSG_RESULT($ac_cv_cpu_has_sse2)
# determine whether gcc accepts options to turn on SSE2
AC_MSG_CHECKING(whether $CC accepts -msse2 -mfpmath=sse)
ac_save_cc="$CC"
CC="$CC -msse2 -mfpmath=sse"
AC_TRY_RUN([int main() { return 0; }],
ac_cv_msse2_ok=yes,
ac_cv_msse2_ok=no,
ac_cv_msse2_ok=no)
CC="$ac_save_cc"
AC_MSG_RESULT($ac_cv_msse2_ok)
if test $ac_cv_cpu_has_sse2 = yes && test $ac_cv_msse2_ok = yes
then
BASECFLAGS="$BASECFLAGS -msse2 -mfpmath=sse"
else
# SSE2 doesn't appear to be available. Check that it's okay
# to use gcc inline assembler to get and set x87 control word
AC_DEFINE(USING_X87_FPU, 1,
[Define on x86 hardware if the x87 FPU is being used
for floating-point arithmetic])
AC_MSG_CHECKING(whether we can use gcc inline assembler to get and set x87 control word)
AC_TRY_COMPILE([], [
unsigned short cw;
__asm__ __volatile__ ("fnstcw %0" : "=m" (cw));
__asm__ __volatile__ ("fldcw %0" : : "m" (cw));
],
[have_gcc_asm_for_x87=yes], [have_gcc_asm_for_x87=no])
AC_MSG_RESULT($have_gcc_asm_for_x87)
if test "$have_gcc_asm_for_x87" = yes
then
AC_DEFINE(HAVE_GCC_ASM_FOR_X87, 1,
[Define if we can use gcc inline assembler to get and set x87 control word])
fi
fi
AC_DEFINE(HAVE_GCC_ASM_FOR_X87, 1,
[Define if we can use gcc inline assembler to get and set x87 control word])
fi
fi

View file

@ -989,10 +989,6 @@
/* Define if you want to use computed gotos in ceval.c. */
#undef USE_COMPUTED_GOTOS
/* Define on x86 hardware if the x87 FPU is being used for floating-point
arithmetic */
#undef USING_X87_FPU
/* Define if a va_list is an array of some kind */
#undef VA_LIST_IS_ARRAY