mirror of
https://github.com/python/cpython
synced 2024-09-16 00:37:36 +00:00
bpo-38256: Fix binascii.crc32() when inputs are 4+GiB (GH-32000)
When compiled with `USE_ZLIB_CRC32` defined (`configure` sets this on POSIX systems), `binascii.crc32(...)` failed to compute the correct value when the input data was >= 4GiB. Because the zlib crc32 API is limited to a 32-bit length. This lines it up with the `zlib.crc32(...)` implementation that doesn't have that flaw. **Performance:** This also adopts the same GIL releasing for larger inputs logic that `zlib.crc32` has, and causes the Windows build to always use zlib's crc32 instead of our slow C code as zlib is a required build dependency on Windows.
This commit is contained in:
parent
3ae975f1ac
commit
9d1c4d69db
|
@ -4,7 +4,7 @@
|
||||||
import binascii
|
import binascii
|
||||||
import array
|
import array
|
||||||
import re
|
import re
|
||||||
from test.support import warnings_helper
|
from test.support import bigmemtest, _1G, _4G, warnings_helper
|
||||||
|
|
||||||
|
|
||||||
# Note: "*_hex" functions are aliases for "(un)hexlify"
|
# Note: "*_hex" functions are aliases for "(un)hexlify"
|
||||||
|
@ -441,6 +441,14 @@ class BytearrayBinASCIITest(BinASCIITest):
|
||||||
class MemoryviewBinASCIITest(BinASCIITest):
|
class MemoryviewBinASCIITest(BinASCIITest):
|
||||||
type2test = memoryview
|
type2test = memoryview
|
||||||
|
|
||||||
|
class ChecksumBigBufferTestCase(unittest.TestCase):
|
||||||
|
"""bpo-38256 - check that inputs >=4 GiB are handled correctly."""
|
||||||
|
|
||||||
|
@bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
|
||||||
|
def test_big_buffer(self, size):
|
||||||
|
data = b"nyan" * (_1G + 1)
|
||||||
|
self.assertEqual(binascii.crc32(data), 1044521549)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to
|
||||||
|
work properly on inputs 4+GiB in length instead of returning the wrong
|
||||||
|
result. The workaround prior to this was to always feed the function
|
||||||
|
data in increments smaller than 4GiB or to just call the zlib module
|
||||||
|
function.
|
||||||
|
|
||||||
|
We also have :func:`binascii.crc32` release the GIL when computing
|
||||||
|
on larger inputs as :func:`zlib.crc32` and :mod:`hashlib` do.
|
||||||
|
|
||||||
|
This also boosts performance on Windows as it now uses the zlib crc32
|
||||||
|
implementation for :func:`binascii.crc32` for a 2-3x speedup.
|
||||||
|
|
||||||
|
That the stdlib has a crc32 API in two modules is a known historical
|
||||||
|
oddity. This moves us closer to a single implementation behind them.
|
|
@ -737,6 +737,21 @@ static const unsigned int crc_32_tab[256] = {
|
||||||
0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
|
0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
|
||||||
0x2d02ef8dU
|
0x2d02ef8dU
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static unsigned int
|
||||||
|
internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
|
||||||
|
{ /* By Jim Ahlstrom; All rights transferred to CNRI */
|
||||||
|
unsigned int result;
|
||||||
|
|
||||||
|
crc = ~ crc;
|
||||||
|
while (len-- > 0) {
|
||||||
|
crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
|
||||||
|
/* Note: (crc >> 8) MUST zero fill on left */
|
||||||
|
}
|
||||||
|
|
||||||
|
result = (crc ^ 0xFFFFFFFF);
|
||||||
|
return result & 0xffffffff;
|
||||||
|
}
|
||||||
#endif /* USE_ZLIB_CRC32 */
|
#endif /* USE_ZLIB_CRC32 */
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -754,34 +769,46 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
|
||||||
/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
|
/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
|
||||||
|
|
||||||
#ifdef USE_ZLIB_CRC32
|
#ifdef USE_ZLIB_CRC32
|
||||||
/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
|
/* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
|
||||||
|
* modules for historical reasons. */
|
||||||
{
|
{
|
||||||
const Byte *buf;
|
/* Releasing the GIL for very small buffers is inefficient
|
||||||
Py_ssize_t len;
|
and may lower performance */
|
||||||
int signed_val;
|
if (data->len > 1024*5) {
|
||||||
|
unsigned char *buf = data->buf;
|
||||||
|
Py_ssize_t len = data->len;
|
||||||
|
|
||||||
buf = (Byte*)data->buf;
|
Py_BEGIN_ALLOW_THREADS
|
||||||
len = data->len;
|
/* Avoid truncation of length for very large buffers. crc32() takes
|
||||||
signed_val = crc32(crc, buf, len);
|
length as an unsigned int, which may be narrower than Py_ssize_t. */
|
||||||
return (unsigned int)signed_val & 0xffffffffU;
|
while ((size_t)len > UINT_MAX) {
|
||||||
|
crc = crc32(crc, buf, UINT_MAX);
|
||||||
|
buf += (size_t) UINT_MAX;
|
||||||
|
len -= (size_t) UINT_MAX;
|
||||||
|
}
|
||||||
|
crc = crc32(crc, buf, (unsigned int)len);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
} else {
|
||||||
|
crc = crc32(crc, data->buf, (unsigned int)data->len);
|
||||||
|
}
|
||||||
|
return crc & 0xffffffff;
|
||||||
}
|
}
|
||||||
#else /* USE_ZLIB_CRC32 */
|
#else /* USE_ZLIB_CRC32 */
|
||||||
{ /* By Jim Ahlstrom; All rights transferred to CNRI */
|
{
|
||||||
const unsigned char *bin_data;
|
const unsigned char *bin_data = data->buf;
|
||||||
Py_ssize_t len;
|
Py_ssize_t len = data->len;
|
||||||
unsigned int result;
|
|
||||||
|
|
||||||
bin_data = data->buf;
|
/* Releasing the GIL for very small buffers is inefficient
|
||||||
len = data->len;
|
and may lower performance */
|
||||||
|
if (len > 1024*5) {
|
||||||
crc = ~ crc;
|
unsigned int result;
|
||||||
while (len-- > 0) {
|
Py_BEGIN_ALLOW_THREADS
|
||||||
crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
|
result = internal_crc32(bin_data, len, crc);
|
||||||
/* Note: (crc >> 8) MUST zero fill on left */
|
Py_END_ALLOW_THREADS
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
return internal_crc32(bin_data, len, crc);
|
||||||
}
|
}
|
||||||
|
|
||||||
result = (crc ^ 0xFFFFFFFF);
|
|
||||||
return result & 0xffffffff;
|
|
||||||
}
|
}
|
||||||
#endif /* USE_ZLIB_CRC32 */
|
#endif /* USE_ZLIB_CRC32 */
|
||||||
|
|
||||||
|
|
11
Modules/clinic/zlibmodule.c.h
generated
11
Modules/clinic/zlibmodule.c.h
generated
|
@ -753,7 +753,7 @@ PyDoc_STRVAR(zlib_crc32__doc__,
|
||||||
#define ZLIB_CRC32_METHODDEF \
|
#define ZLIB_CRC32_METHODDEF \
|
||||||
{"crc32", (PyCFunction)(void(*)(void))zlib_crc32, METH_FASTCALL, zlib_crc32__doc__},
|
{"crc32", (PyCFunction)(void(*)(void))zlib_crc32, METH_FASTCALL, zlib_crc32__doc__},
|
||||||
|
|
||||||
static PyObject *
|
static unsigned int
|
||||||
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value);
|
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -762,6 +762,7 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||||
PyObject *return_value = NULL;
|
PyObject *return_value = NULL;
|
||||||
Py_buffer data = {NULL, NULL};
|
Py_buffer data = {NULL, NULL};
|
||||||
unsigned int value = 0;
|
unsigned int value = 0;
|
||||||
|
unsigned int _return_value;
|
||||||
|
|
||||||
if (!_PyArg_CheckPositional("crc32", nargs, 1, 2)) {
|
if (!_PyArg_CheckPositional("crc32", nargs, 1, 2)) {
|
||||||
goto exit;
|
goto exit;
|
||||||
|
@ -781,7 +782,11 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
skip_optional:
|
skip_optional:
|
||||||
return_value = zlib_crc32_impl(module, &data, value);
|
_return_value = zlib_crc32_impl(module, &data, value);
|
||||||
|
if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromUnsignedLong((unsigned long)_return_value);
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
/* Cleanup for data */
|
/* Cleanup for data */
|
||||||
|
@ -815,4 +820,4 @@ exit:
|
||||||
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
||||||
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
||||||
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
|
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
|
||||||
/*[clinic end generated code: output=e3e8a6142ea045a7 input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=f009d194565416d1 input=a9049054013a1b77]*/
|
||||||
|
|
|
@ -1420,7 +1420,7 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
zlib.crc32
|
zlib.crc32 -> unsigned_int
|
||||||
|
|
||||||
data: Py_buffer
|
data: Py_buffer
|
||||||
value: unsigned_int(bitwise=True) = 0
|
value: unsigned_int(bitwise=True) = 0
|
||||||
|
@ -1432,9 +1432,9 @@ Compute a CRC-32 checksum of data.
|
||||||
The returned checksum is an integer.
|
The returned checksum is an integer.
|
||||||
[clinic start generated code]*/
|
[clinic start generated code]*/
|
||||||
|
|
||||||
static PyObject *
|
static unsigned int
|
||||||
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
|
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
|
||||||
/*[clinic end generated code: output=63499fa20af7ea25 input=26c3ed430fa00b4c]*/
|
/*[clinic end generated code: output=b217562e4fe6d6a6 input=1229cb2fb5ea948a]*/
|
||||||
{
|
{
|
||||||
/* Releasing the GIL for very small buffers is inefficient
|
/* Releasing the GIL for very small buffers is inefficient
|
||||||
and may lower performance */
|
and may lower performance */
|
||||||
|
@ -1455,7 +1455,7 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
|
||||||
} else {
|
} else {
|
||||||
value = crc32(value, data->buf, (unsigned int)data->len);
|
value = crc32(value, data->buf, (unsigned int)data->len);
|
||||||
}
|
}
|
||||||
return PyLong_FromUnsignedLong(value & 0xffffffffU);
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -366,7 +366,9 @@
|
||||||
<ClCompile Include="..\Modules\arraymodule.c" />
|
<ClCompile Include="..\Modules\arraymodule.c" />
|
||||||
<ClCompile Include="..\Modules\atexitmodule.c" />
|
<ClCompile Include="..\Modules\atexitmodule.c" />
|
||||||
<ClCompile Include="..\Modules\audioop.c" />
|
<ClCompile Include="..\Modules\audioop.c" />
|
||||||
<ClCompile Include="..\Modules\binascii.c" />
|
<ClCompile Include="..\Modules\binascii.c">
|
||||||
|
<PreprocessorDefinitions>USE_ZLIB_CRC32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="..\Modules\cmathmodule.c" />
|
<ClCompile Include="..\Modules\cmathmodule.c" />
|
||||||
<ClCompile Include="..\Modules\_datetimemodule.c" />
|
<ClCompile Include="..\Modules\_datetimemodule.c" />
|
||||||
<ClCompile Include="..\Modules\errnomodule.c" />
|
<ClCompile Include="..\Modules\errnomodule.c" />
|
||||||
|
|
Loading…
Reference in a new issue