From eae7dad40255bad42e4abce53ff8143dcbc66af5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Oct 2022 04:10:58 +0200 Subject: [PATCH] gh-95534: Improve gzip reading speed by 10% (#97664) Change summary: + There is now a `gzip.READ_BUFFER_SIZE` constant that is 128KB. Other programs that read in 128KB chunks: pigz and cat. So this seems best practice among good programs. Also it is faster than 8 kb chunks. + a zlib._ZlibDecompressor was added. This is the _bz2.BZ2Decompressor ported to zlib. Since the zlib.Decompress object is better for in-memory decompression, the _ZlibDecompressor is hidden. It only makes sense in file decompression, and that is already implemented now in the gzip library. No need to bother the users with this. + The ZlibDecompressor uses the older Cpython arrange_output_buffer functions, as those are faster and more appropriate for the use case. + GzipFile.read has been optimized. There is no longer a `unconsumed_tail` member to write back to padded file. This is instead handled by the ZlibDecompressor itself, which has an internal buffer. `_add_read_data` has been inlined, as it was just two calls. EDIT: While I am adding improvements anyway, I figured I could add another one-liner optimization now to the python -m gzip application. That read chunks in io.DEFAULT_BUFFER_SIZE previously, but has been updated now to use READ_BUFFER_SIZE chunks. --- Lib/gzip.py | 24 +- Lib/test/test_zlib.py | 167 +++++ ...2-09-30-09-22-37.gh-issue-95534.ndEfPj.rst | 1 + Modules/clinic/zlibmodule.c.h | 100 ++- Modules/zlibmodule.c | 638 ++++++++++++++++-- 5 files changed, 850 insertions(+), 80 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-09-30-09-22-37.gh-issue-95534.ndEfPj.rst diff --git a/Lib/gzip.py b/Lib/gzip.py index 8edcda4493c..75c6ddc3f2c 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -21,6 +21,8 @@ _COMPRESS_LEVEL_TRADEOFF = 6 _COMPRESS_LEVEL_BEST = 9 +READ_BUFFER_SIZE = 128 * 1024 + def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, encoding=None, errors=None, newline=None): @@ -446,7 +448,7 @@ def _read_gzip_header(fp): class _GzipReader(_compression.DecompressReader): def __init__(self, fp): - super().__init__(_PaddedFile(fp), zlib.decompressobj, + super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor, wbits=-zlib.MAX_WBITS) # Set flag indicating start of a new member self._new_member = True @@ -494,12 +496,13 @@ def read(self, size=-1): self._new_member = False # Read a chunk of data from the file - buf = self._fp.read(io.DEFAULT_BUFFER_SIZE) + if self._decompressor.needs_input: + buf = self._fp.read(READ_BUFFER_SIZE) + uncompress = self._decompressor.decompress(buf, size) + else: + uncompress = self._decompressor.decompress(b"", size) - uncompress = self._decompressor.decompress(buf, size) - if self._decompressor.unconsumed_tail != b"": - self._fp.prepend(self._decompressor.unconsumed_tail) - elif self._decompressor.unused_data != b"": + if self._decompressor.unused_data != b"": # Prepend the already read bytes to the fileobj so they can # be seen by _read_eof() and _read_gzip_header() self._fp.prepend(self._decompressor.unused_data) @@ -510,14 +513,11 @@ def read(self, size=-1): raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") - self._add_read_data( uncompress ) + self._crc = zlib.crc32(uncompress, self._crc) + self._stream_size += len(uncompress) self._pos += len(uncompress) return uncompress - def _add_read_data(self, data): - self._crc = zlib.crc32(data, self._crc) - self._stream_size = self._stream_size + len(data) - def _read_eof(self): # We've read to the end of the file # We check that the computed CRC and size of the @@ -647,7 +647,7 @@ def main(): f = builtins.open(arg, "rb") g = open(arg + ".gz", "wb") while True: - chunk = f.read(io.DEFAULT_BUFFER_SIZE) + chunk = f.read(READ_BUFFER_SIZE) if not chunk: break g.write(chunk) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index f20aad051da..ae54f6c4689 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -944,6 +944,173 @@ def choose_lines(source, number, seed=None, generator=random): """ +class ZlibDecompressorTest(): + # Test adopted from test_bz2.py + TEXT = HAMLET_SCENE + DATA = zlib.compress(HAMLET_SCENE) + BAD_DATA = b"Not a valid deflate block" + def test_Constructor(self): + self.assertRaises(TypeError, zlib._ZlibDecompressor, 42) + + def testDecompress(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(TypeError, zlibd.decompress) + text = zlibd.decompress(self.DATA) + self.assertEqual(text, self.TEXT) + + def testDecompressChunks10(self): + zlibd = zlib._ZlibDecompressor() + text = b'' + n = 0 + while True: + str = self.DATA[n*10:(n+1)*10] + if not str: + break + text += zlibd.decompress(str) + n += 1 + self.assertEqual(text, self.TEXT) + + def testDecompressUnusedData(self): + zlibd = zlib._ZlibDecompressor() + unused_data = b"this is unused data" + text = zlibd.decompress(self.DATA+unused_data) + self.assertEqual(text, self.TEXT) + self.assertEqual(zlibd.unused_data, unused_data) + + def testEOFError(self): + zlibd = zlib._ZlibDecompressor() + text = zlibd.decompress(self.DATA) + self.assertRaises(EOFError, zlibd.decompress, b"anything") + self.assertRaises(EOFError, zlibd.decompress, b"") + + @support.skip_if_pgo_task + @bigmemtest(size=_4G + 100, memuse=3.3) + def testDecompress4G(self, size): + # "Test zlib._ZlibDecompressor.decompress() with >4GiB input" + blocksize = 10 * 1024 * 1024 + block = random.randbytes(blocksize) + try: + data = block * (size // blocksize + 1) + compressed = zlib.compress(data) + zlibd = zlib._ZlibDecompressor() + decompressed = zlibd.decompress(compressed) + self.assertTrue(decompressed == data) + finally: + data = None + compressed = None + decompressed = None + + def testPickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises(TypeError): + pickle.dumps(zlib._ZlibDecompressor(), proto) + + def testDecompressorChunksMaxsize(self): + zlibd = zlib._ZlibDecompressor() + max_length = 100 + out = [] + + # Feed some input + len_ = len(self.BIG_DATA) - 64 + out.append(zlibd.decompress(self.BIG_DATA[:len_], + max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data without providing more input + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data while providing more input + out.append(zlibd.decompress(self.BIG_DATA[len_:], + max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve remaining uncompressed data + while not zlibd.eof: + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + out = b"".join(out) + self.assertEqual(out, self.BIG_TEXT) + self.assertEqual(zlibd.unused_data, b"") + + def test_decompressor_inputbuf_1(self): + # Test reusing input buffer after moving existing + # contents to beginning + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and fill it + self.assertEqual(zlibd.decompress(self.DATA[:100], + max_length=0), b'') + + # Retrieve some results, freeing capacity at beginning + # of input buffer + out.append(zlibd.decompress(b'', 2)) + + # Add more data that fits into input buffer after + # moving existing data to beginning + out.append(zlibd.decompress(self.DATA[100:105], 15)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[105:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_decompressor_inputbuf_2(self): + # Test reusing input buffer by appending data at the + # end right away + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and empty it + self.assertEqual(zlibd.decompress(self.DATA[:200], + max_length=0), b'') + out.append(zlibd.decompress(b'')) + + # Fill buffer with new data + out.append(zlibd.decompress(self.DATA[200:280], 2)) + + # Append some more data, not enough to require resize + out.append(zlibd.decompress(self.DATA[280:300], 2)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_decompressor_inputbuf_3(self): + # Test reusing input buffer after extending it + + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create almost full input buffer + out.append(zlibd.decompress(self.DATA[:200], 5)) + + # Add even more data to it, requiring resize + out.append(zlibd.decompress(self.DATA[200:300], 5)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_failure(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + # Previously, a second call could crash due to internal inconsistency + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + + @support.refcount_test + def test_refleaks_in___init__(self): + gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') + zlibd = zlib._ZlibDecompressor() + refs_before = gettotalrefcount() + for i in range(100): + zlibd.__init__() + self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10) + + class CustomInt: def __index__(self): return 100 diff --git a/Misc/NEWS.d/next/Library/2022-09-30-09-22-37.gh-issue-95534.ndEfPj.rst b/Misc/NEWS.d/next/Library/2022-09-30-09-22-37.gh-issue-95534.ndEfPj.rst new file mode 100644 index 00000000000..131d4ed6999 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-09-30-09-22-37.gh-issue-95534.ndEfPj.rst @@ -0,0 +1 @@ +:meth:`gzip.GzipFile.read` reads 10% faster. diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index a04b954a57f..65412b2435a 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -897,6 +897,104 @@ exit: return return_value; } +PyDoc_STRVAR(zlib_ZlibDecompressor_decompress__doc__, +"decompress($self, /, data, max_length=-1)\n" +"--\n" +"\n" +"Decompress *data*, returning uncompressed data as bytes.\n" +"\n" +"If *max_length* is nonnegative, returns at most *max_length* bytes of\n" +"decompressed data. If this limit is reached and further output can be\n" +"produced, *self.needs_input* will be set to ``False``. In this case, the next\n" +"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n" +"\n" +"If all of the input data was decompressed and returned (either because this\n" +"was less than *max_length* bytes, or because *max_length* was negative),\n" +"*self.needs_input* will be set to True.\n" +"\n" +"Attempting to decompress data after the end of stream is reached raises an\n" +"EOFError. Any data found after the end of the stream is ignored and saved in\n" +"the unused_data attribute."); + +#define ZLIB_ZLIBDECOMPRESSOR_DECOMPRESS_METHODDEF \ + {"decompress", _PyCFunction_CAST(zlib_ZlibDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, zlib_ZlibDecompressor_decompress__doc__}, + +static PyObject * +zlib_ZlibDecompressor_decompress_impl(ZlibDecompressor *self, + Py_buffer *data, Py_ssize_t max_length); + +static PyObject * +zlib_ZlibDecompressor_decompress(ZlibDecompressor *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(data), &_Py_ID(max_length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "max_length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "decompress", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + Py_ssize_t max_length = -1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&data, 'C')) { + _PyArg_BadArgument("decompress", "argument 'data'", "contiguous buffer", args[0]); + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_length = ival; + } +skip_optional_pos: + return_value = zlib_ZlibDecompressor_decompress_impl(self, &data, max_length); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + PyDoc_STRVAR(zlib_adler32__doc__, "adler32($module, data, value=1, /)\n" "--\n" @@ -1031,4 +1129,4 @@ exit: #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=9e5f9911d0c273e1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=57ff7b511ab23132 input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 2fc39a3bd56..88d2dd55635 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -8,6 +8,11 @@ #include "Python.h" #include "structmember.h" // PyMemberDef #include "zlib.h" +#include "stdbool.h" + +#if defined(ZLIB_VERNUM) && ZLIB_VERNUM < 0x1221 +#error "At least zlib version 1.2.2.1 is required" +#endif // Blocks output buffer wrappers #include "pycore_blocks_output_buffer.h" @@ -171,9 +176,6 @@ OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window) } } while (0) #define LEAVE_ZLIB(obj) PyThread_release_lock((obj)->lock); -#if defined(ZLIB_VERNUM) && ZLIB_VERNUM >= 0x1221 -# define AT_LEAST_ZLIB_1_2_2_1 -#endif /* The following parameters are copied from zutil.h, version 0.95 */ #define DEFLATED 8 @@ -185,12 +187,14 @@ OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window) /* Initial buffer size. */ #define DEF_BUF_SIZE (16*1024) +#define DEF_MAX_INITIAL_BUF_SIZE (16 * 1024 * 1024) static PyModuleDef zlibmodule; typedef struct { PyTypeObject *Comptype; PyTypeObject *Decomptype; + PyTypeObject *ZlibDecompressorType; PyObject *ZlibError; } zlibstate; @@ -209,7 +213,7 @@ typedef struct PyObject *unused_data; PyObject *unconsumed_tail; char eof; - int is_initialised; + bool is_initialised; PyObject *zdict; PyThread_type_lock lock; } compobject; @@ -320,7 +324,7 @@ static PyObject * zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) /*[clinic end generated code: output=46bd152fadd66df2 input=c4d06ee5782a7e3f]*/ { - PyObject *RetVal; + PyObject *return_value; int flush; z_stream zst; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -387,11 +391,11 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) err = deflateEnd(&zst); if (err == Z_OK) { - RetVal = OutputBuffer_Finish(&buffer, zst.avail_out); - if (RetVal == NULL) { + return_value = OutputBuffer_Finish(&buffer, zst.avail_out); + if (return_value == NULL) { goto error; } - return RetVal; + return return_value; } else zlib_error(state, zst, err, "while finishing compression"); @@ -419,7 +423,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, Py_ssize_t bufsize) /*[clinic end generated code: output=77c7e35111dc8c42 input=a9ac17beff1f893f]*/ { - PyObject *RetVal; + PyObject *return_value; Byte *ibuf; Py_ssize_t ibuflen; int err, flush; @@ -514,9 +518,9 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, goto error; } - RetVal = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out); - if (RetVal != NULL) { - return RetVal; + return_value = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out); + if (return_value != NULL) { + return return_value; } error: @@ -675,18 +679,10 @@ zlib_decompressobj_impl(PyObject *module, int wbits, PyObject *zdict) case Z_OK: self->is_initialised = 1; if (self->zdict != NULL && wbits < 0) { -#ifdef AT_LEAST_ZLIB_1_2_2_1 if (set_inflate_zdict(state, self) < 0) { Py_DECREF(self); return NULL; } -#else - PyErr_Format(state->ZlibError, - "zlib version %s does not allow raw inflate with dictionary", - ZLIB_VERSION); - Py_DECREF(self); - return NULL; -#endif } return (PyObject *)self; case Z_STREAM_ERROR: @@ -753,7 +749,7 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls, Py_buffer *data) /*[clinic end generated code: output=6731b3f0ff357ca6 input=04d00f65ab01d260]*/ { - PyObject *RetVal; + PyObject *return_value; int err; _BlocksOutputBuffer buffer = {.list = NULL}; zlibstate *state = PyType_GetModuleState(cls); @@ -791,17 +787,17 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls, } while (ibuflen != 0); - RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out); - if (RetVal != NULL) { + return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out); + if (return_value != NULL) { goto success; } error: OutputBuffer_OnError(&buffer); - RetVal = NULL; + return_value = NULL; success: LEAVE_ZLIB(self); - return RetVal; + return return_value; } /* Helper for objdecompress() and flush(). Saves any unconsumed input data in @@ -875,7 +871,7 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, { int err = Z_OK; Py_ssize_t ibuflen; - PyObject *RetVal; + PyObject *return_value; _BlocksOutputBuffer buffer = {.list = NULL}; PyObject *module = PyType_GetModule(cls); @@ -953,17 +949,17 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, goto abort; } - RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out); - if (RetVal != NULL) { + return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out); + if (return_value != NULL) { goto success; } abort: OutputBuffer_OnError(&buffer); - RetVal = NULL; + return_value = NULL; success: LEAVE_ZLIB(self); - return RetVal; + return return_value; } /*[clinic input] @@ -985,7 +981,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) /*[clinic end generated code: output=c7efd13efd62add2 input=286146e29442eb6c]*/ { int err; - PyObject *RetVal; + PyObject *return_value; _BlocksOutputBuffer buffer = {.list = NULL}; zlibstate *state = PyType_GetModuleState(cls); @@ -1042,17 +1038,17 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) goto error; } - RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out); - if (RetVal != NULL) { + return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out); + if (return_value != NULL) { goto success; } error: OutputBuffer_OnError(&buffer); - RetVal = NULL; + return_value = NULL; success: LEAVE_ZLIB(self); - return RetVal; + return return_value; } #ifdef HAVE_ZLIB_COPY @@ -1071,14 +1067,14 @@ zlib_Compress_copy_impl(compobject *self, PyTypeObject *cls) { zlibstate *state = PyType_GetModuleState(cls); - compobject *retval = newcompobject(state->Comptype); - if (!retval) return NULL; + compobject *return_value = newcompobject(state->Comptype); + if (!return_value) return NULL; /* Copy the zstream state * We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe */ ENTER_ZLIB(self); - int err = deflateCopy(&retval->zst, &self->zst); + int err = deflateCopy(&return_value->zst, &self->zst); switch (err) { case Z_OK: break; @@ -1094,22 +1090,22 @@ zlib_Compress_copy_impl(compobject *self, PyTypeObject *cls) goto error; } Py_INCREF(self->unused_data); - Py_XSETREF(retval->unused_data, self->unused_data); + Py_XSETREF(return_value->unused_data, self->unused_data); Py_INCREF(self->unconsumed_tail); - Py_XSETREF(retval->unconsumed_tail, self->unconsumed_tail); + Py_XSETREF(return_value->unconsumed_tail, self->unconsumed_tail); Py_XINCREF(self->zdict); - Py_XSETREF(retval->zdict, self->zdict); - retval->eof = self->eof; + Py_XSETREF(return_value->zdict, self->zdict); + return_value->eof = self->eof; /* Mark it as being initialized */ - retval->is_initialised = 1; + return_value->is_initialised = 1; LEAVE_ZLIB(self); - return (PyObject *)retval; + return (PyObject *)return_value; error: LEAVE_ZLIB(self); - Py_XDECREF(retval); + Py_XDECREF(return_value); return NULL; } @@ -1158,14 +1154,14 @@ zlib_Decompress_copy_impl(compobject *self, PyTypeObject *cls) { zlibstate *state = PyType_GetModuleState(cls); - compobject *retval = newcompobject(state->Decomptype); - if (!retval) return NULL; + compobject *return_value = newcompobject(state->Decomptype); + if (!return_value) return NULL; /* Copy the zstream state * We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe */ ENTER_ZLIB(self); - int err = inflateCopy(&retval->zst, &self->zst); + int err = inflateCopy(&return_value->zst, &self->zst); switch (err) { case Z_OK: break; @@ -1182,22 +1178,22 @@ zlib_Decompress_copy_impl(compobject *self, PyTypeObject *cls) } Py_INCREF(self->unused_data); - Py_XSETREF(retval->unused_data, self->unused_data); + Py_XSETREF(return_value->unused_data, self->unused_data); Py_INCREF(self->unconsumed_tail); - Py_XSETREF(retval->unconsumed_tail, self->unconsumed_tail); + Py_XSETREF(return_value->unconsumed_tail, self->unconsumed_tail); Py_XINCREF(self->zdict); - Py_XSETREF(retval->zdict, self->zdict); - retval->eof = self->eof; + Py_XSETREF(return_value->zdict, self->zdict); + return_value->eof = self->eof; /* Mark it as being initialized */ - retval->is_initialised = 1; + return_value->is_initialised = 1; LEAVE_ZLIB(self); - return (PyObject *)retval; + return (PyObject *)return_value; error: LEAVE_ZLIB(self); - Py_XDECREF(retval); + Py_XDECREF(return_value); return NULL; } @@ -1252,7 +1248,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, { int err, flush; Py_buffer data; - PyObject *RetVal; + PyObject *return_value; Py_ssize_t ibuflen; _BlocksOutputBuffer buffer = {.list = NULL}; _Uint32Window window; // output buffer's UINT32_MAX sliding window @@ -1306,13 +1302,6 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, case Z_STREAM_END: break; default: - if (err == Z_NEED_DICT && self->zdict != NULL) { - if (set_inflate_zdict(state, self) < 0) { - goto abort; - } - else - break; - } goto save; } @@ -1336,18 +1325,475 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, } } - RetVal = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out); - if (RetVal != NULL) { + return_value = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out); + if (return_value != NULL) { goto success; } abort: OutputBuffer_WindowOnError(&buffer, &window); - RetVal = NULL; + return_value = NULL; success: PyBuffer_Release(&data); LEAVE_ZLIB(self); - return RetVal; + return return_value; +} + + +typedef struct { + PyObject_HEAD + z_stream zst; + PyObject *zdict; + PyThread_type_lock lock; + PyObject *unused_data; + uint8_t *input_buffer; + Py_ssize_t input_buffer_size; + /* zst>avail_in is only 32 bit, so we store the true length + separately. Conversion and looping is encapsulated in + decompress_buf() */ + Py_ssize_t avail_in_real; + bool is_initialised; + char eof; /* T_BOOL expects a char */ + char needs_input; +} ZlibDecompressor; + +/*[clinic input] +class zlib.ZlibDecompressor "ZlibDecompressor *" "&ZlibDecompressorType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=0658178ab94645df]*/ + +static void +ZlibDecompressor_dealloc(ZlibDecompressor *self) +{ + PyObject *type = (PyObject *)Py_TYPE(self); + PyThread_free_lock(self->lock); + if (self->is_initialised) { + inflateEnd(&self->zst); + } + PyMem_Free(self->input_buffer); + Py_CLEAR(self->unused_data); + Py_CLEAR(self->zdict); + PyObject_Free(self); + Py_DECREF(type); +} + +static int +set_inflate_zdict_ZlibDecompressor(zlibstate *state, ZlibDecompressor *self) +{ + Py_buffer zdict_buf; + if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) { + return -1; + } + if ((size_t)zdict_buf.len > UINT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "zdict length does not fit in an unsigned int"); + PyBuffer_Release(&zdict_buf); + return -1; + } + int err; + err = inflateSetDictionary(&self->zst, + zdict_buf.buf, (unsigned int)zdict_buf.len); + PyBuffer_Release(&zdict_buf); + if (err != Z_OK) { + zlib_error(state, self->zst, err, "while setting zdict"); + return -1; + } + return 0; +} + +static Py_ssize_t +arrange_output_buffer_with_maximum(uint32_t *avail_out, + uint8_t **next_out, + PyObject **buffer, + Py_ssize_t length, + Py_ssize_t max_length) +{ + Py_ssize_t occupied; + + if (*buffer == NULL) { + if (!(*buffer = PyBytes_FromStringAndSize(NULL, length))) + return -1; + occupied = 0; + } + else { + occupied = *next_out - (uint8_t *)PyBytes_AS_STRING(*buffer); + + if (length == occupied) { + Py_ssize_t new_length; + assert(length <= max_length); + /* can not scale the buffer over max_length */ + if (length == max_length) + return -2; + if (length <= (max_length >> 1)) + new_length = length << 1; + else + new_length = max_length; + if (_PyBytes_Resize(buffer, new_length) < 0) + return -1; + length = new_length; + } + } + + *avail_out = (uint32_t)Py_MIN((size_t)(length - occupied), UINT32_MAX); + *next_out = (uint8_t *)PyBytes_AS_STRING(*buffer) + occupied; + + return length; +} + +static inline Py_ssize_t +arrange_output_buffer(uint32_t *avail_out, + uint8_t **next_out, + PyObject **buffer, + Py_ssize_t length) +{ + Py_ssize_t ret; + + ret = arrange_output_buffer_with_maximum(avail_out, next_out, buffer, + length, + PY_SSIZE_T_MAX); + if (ret == -2) + PyErr_NoMemory(); + return ret; +} + +/* Decompress data of length self->avail_in_real in self->state.next_in. The + output buffer is allocated dynamically and returned. If the max_length is + of sufficiently low size, max_length is allocated immediately. At most + max_length bytes are returned, so some of the input may not be consumed. + self->state.next_in and self->avail_in_real are updated to reflect the + consumed input. */ +static PyObject* +decompress_buf(ZlibDecompressor *self, Py_ssize_t max_length) +{ + /* data_size is strictly positive, but because we repeatedly have to + compare against max_length and PyBytes_GET_SIZE we declare it as + signed */ + PyObject *return_value = NULL; + Py_ssize_t hard_limit; + Py_ssize_t obuflen; + zlibstate *state = PyType_GetModuleState(Py_TYPE(self)); + + int err = Z_OK; + + /* When sys.maxsize is passed as default use DEF_BUF_SIZE as start buffer. + In this particular case the data may not necessarily be very big, so + it is better to grow dynamically.*/ + if ((max_length < 0) || max_length == PY_SSIZE_T_MAX) { + hard_limit = PY_SSIZE_T_MAX; + obuflen = DEF_BUF_SIZE; + } else { + /* Assume that decompressor is used in file decompression with a fixed + block size of max_length. In that case we will reach max_length almost + always (except at the end of the file). So it makes sense to allocate + max_length. */ + hard_limit = max_length; + obuflen = max_length; + if (obuflen > DEF_MAX_INITIAL_BUF_SIZE){ + // Safeguard against memory overflow. + obuflen = DEF_MAX_INITIAL_BUF_SIZE; + } + } + + do { + arrange_input_buffer(&(self->zst), &(self->avail_in_real)); + + do { + obuflen = arrange_output_buffer_with_maximum(&(self->zst.avail_out), + &(self->zst.next_out), + &return_value, + obuflen, + hard_limit); + if (obuflen == -1){ + PyErr_SetString(PyExc_MemoryError, + "Insufficient memory for buffer allocation"); + goto error; + } + else if (obuflen == -2) { + break; + } + Py_BEGIN_ALLOW_THREADS + err = inflate(&self->zst, Z_SYNC_FLUSH); + Py_END_ALLOW_THREADS + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + default: + if (err == Z_NEED_DICT) { + goto error; + } + else { + break; + } + } + } while (self->zst.avail_out == 0); + } while(err != Z_STREAM_END && self->avail_in_real != 0); + + if (err == Z_STREAM_END) { + self->eof = 1; + self->is_initialised = 0; + /* Unlike the Decompress object we call inflateEnd here as there are no + backwards compatibility issues */ + err = inflateEnd(&self->zst); + if (err != Z_OK) { + zlib_error(state, self->zst, err, "while finishing decompression"); + goto error; + } + } else if (err != Z_OK && err != Z_BUF_ERROR) { + zlib_error(state, self->zst, err, "while decompressing data"); + } + + self->avail_in_real += self->zst.avail_in; + + if (_PyBytes_Resize(&return_value, self->zst.next_out - + (uint8_t *)PyBytes_AS_STRING(return_value)) != 0) { + goto error; + } + + goto success; +error: + Py_CLEAR(return_value); +success: + return return_value; +} + + +static PyObject * +decompress(ZlibDecompressor *self, uint8_t *data, + size_t len, Py_ssize_t max_length) +{ + bool input_buffer_in_use; + PyObject *result; + + /* Prepend unconsumed input if necessary */ + if (self->zst.next_in != NULL) { + size_t avail_now, avail_total; + + /* Number of bytes we can append to input buffer */ + avail_now = (self->input_buffer + self->input_buffer_size) + - (self->zst.next_in + self->avail_in_real); + + /* Number of bytes we can append if we move existing + contents to beginning of buffer (overwriting + consumed input) */ + avail_total = self->input_buffer_size - self->avail_in_real; + + if (avail_total < len) { + size_t offset = self->zst.next_in - self->input_buffer; + uint8_t *tmp; + size_t new_size = self->input_buffer_size + len - avail_now; + + /* Assign to temporary variable first, so we don't + lose address of allocated buffer if realloc fails */ + tmp = PyMem_Realloc(self->input_buffer, new_size); + if (tmp == NULL) { + PyErr_SetNone(PyExc_MemoryError); + return NULL; + } + self->input_buffer = tmp; + self->input_buffer_size = new_size; + + self->zst.next_in = self->input_buffer + offset; + } + else if (avail_now < len) { + memmove(self->input_buffer, self->zst.next_in, + self->avail_in_real); + self->zst.next_in = self->input_buffer; + } + memcpy((void*)(self->zst.next_in + self->avail_in_real), data, len); + self->avail_in_real += len; + input_buffer_in_use = 1; + } + else { + self->zst.next_in = data; + self->avail_in_real = len; + input_buffer_in_use = 0; + } + + result = decompress_buf(self, max_length); + if(result == NULL) { + self->zst.next_in = NULL; + return NULL; + } + + if (self->eof) { + self->needs_input = 0; + + if (self->avail_in_real > 0) { + PyObject *unused_data = PyBytes_FromStringAndSize( + (char *)self->zst.next_in, self->avail_in_real); + if (unused_data == NULL) { + goto error; + } + Py_XSETREF(self->unused_data, unused_data); + } + } + else if (self->avail_in_real == 0) { + self->zst.next_in = NULL; + self->needs_input = 1; + } + else { + self->needs_input = 0; + + /* If we did not use the input buffer, we now have + to copy the tail from the caller's buffer into the + input buffer */ + if (!input_buffer_in_use) { + + /* Discard buffer if it's too small + (resizing it may needlessly copy the current contents) */ + if (self->input_buffer != NULL && + self->input_buffer_size < self->avail_in_real) { + PyMem_Free(self->input_buffer); + self->input_buffer = NULL; + } + + /* Allocate if necessary */ + if (self->input_buffer == NULL) { + self->input_buffer = PyMem_Malloc(self->avail_in_real); + if (self->input_buffer == NULL) { + PyErr_SetNone(PyExc_MemoryError); + goto error; + } + self->input_buffer_size = self->avail_in_real; + } + + /* Copy tail */ + memcpy(self->input_buffer, self->zst.next_in, self->avail_in_real); + self->zst.next_in = self->input_buffer; + } + } + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +/*[clinic input] +zlib.ZlibDecompressor.decompress + + data: Py_buffer + max_length: Py_ssize_t=-1 + +Decompress *data*, returning uncompressed data as bytes. + +If *max_length* is nonnegative, returns at most *max_length* bytes of +decompressed data. If this limit is reached and further output can be +produced, *self.needs_input* will be set to ``False``. In this case, the next +call to *decompress()* may provide *data* as b'' to obtain more of the output. + +If all of the input data was decompressed and returned (either because this +was less than *max_length* bytes, or because *max_length* was negative), +*self.needs_input* will be set to True. + +Attempting to decompress data after the end of stream is reached raises an +EOFError. Any data found after the end of the stream is ignored and saved in +the unused_data attribute. +[clinic start generated code]*/ + +static PyObject * +zlib_ZlibDecompressor_decompress_impl(ZlibDecompressor *self, + Py_buffer *data, Py_ssize_t max_length) +/*[clinic end generated code: output=990d32787b775f85 input=0b29d99715250b96]*/ + +{ + PyObject *result = NULL; + + ENTER_ZLIB(self); + if (self->eof) { + PyErr_SetString(PyExc_EOFError, "End of stream already reached"); + } + else { + result = decompress(self, data->buf, data->len, max_length); + } + LEAVE_ZLIB(self); + return result; +} + +PyDoc_STRVAR(ZlibDecompressor__new____doc__, +"_ZlibDecompressor(wbits=15, zdict=b\'\')\n" +"--\n" +"\n" +"Create a decompressor object for decompressing data incrementally.\n" +"\n" +" wbits = 15\n" +" zdict\n" +" The predefined compression dictionary. This is a sequence of bytes\n" +" (such as a bytes object) containing subsequences that are expected\n" +" to occur frequently in the data that is to be compressed. Those\n" +" subsequences that are expected to be most common should come at the\n" +" end of the dictionary. This must be the same dictionary as used by the\n" +" compressor that produced the input data.\n" +"\n"); + +static PyObject * +ZlibDecompressor__new__(PyTypeObject *cls, + PyObject *args, + PyObject *kwargs) +{ + static char *keywords[] = {"wbits", "zdict", NULL}; + static char *format = "|iO:_ZlibDecompressor"; + int wbits = MAX_WBITS; + PyObject *zdict = NULL; + zlibstate *state = PyType_GetModuleState(cls); + + if (!PyArg_ParseTupleAndKeywords( + args, kwargs, format, keywords, &wbits, &zdict)) { + return NULL; + } + ZlibDecompressor *self = PyObject_New(ZlibDecompressor, cls); + self->eof = 0; + self->needs_input = 1; + self->avail_in_real = 0; + self->input_buffer = NULL; + self->input_buffer_size = 0; + if (zdict != NULL) { + Py_INCREF(zdict); + } + self->zdict = zdict; + self->zst.opaque = NULL; + self->zst.zalloc = PyZlib_Malloc; + self->zst.zfree = PyZlib_Free; + self->zst.next_in = NULL; + self->zst.avail_in = 0; + self->unused_data = PyBytes_FromStringAndSize(NULL, 0); + if (self->unused_data == NULL) { + Py_CLEAR(self); + return NULL; + } + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + Py_DECREF(self); + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return NULL; + } + int err = inflateInit2(&(self->zst), wbits); + switch (err) { + case Z_OK: + self->is_initialised = 1; + if (self->zdict != NULL && wbits < 0) { + if (set_inflate_zdict_ZlibDecompressor(state, self) < 0) { + Py_DECREF(self); + return NULL; + } + } + return (PyObject *)self; + case Z_STREAM_ERROR: + Py_DECREF(self); + PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); + return NULL; + case Z_MEM_ERROR: + Py_DECREF(self); + PyErr_SetString(PyExc_MemoryError, + "Can't allocate memory for decompression object"); + return NULL; + default: + zlib_error(state, self->zst, err, "while creating decompression object"); + Py_DECREF(self); + return NULL; + } } #include "clinic/zlibmodule.c.h" @@ -1372,6 +1818,11 @@ static PyMethodDef Decomp_methods[] = {NULL, NULL} }; +static PyMethodDef ZlibDecompressor_methods[] = { + ZLIB_ZLIBDECOMPRESSOR_DECOMPRESS_METHODDEF + {NULL} +}; + #define COMP_OFF(x) offsetof(compobject, x) static PyMemberDef Decomp_members[] = { {"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY}, @@ -1380,6 +1831,26 @@ static PyMemberDef Decomp_members[] = { {NULL}, }; +PyDoc_STRVAR(ZlibDecompressor_eof__doc__, +"True if the end-of-stream marker has been reached."); + +PyDoc_STRVAR(ZlibDecompressor_unused_data__doc__, +"Data found after the end of the compressed stream."); + +PyDoc_STRVAR(ZlibDecompressor_needs_input_doc, +"True if more input is needed before more decompressed data can be produced."); + +static PyMemberDef ZlibDecompressor_members[] = { + {"eof", T_BOOL, offsetof(ZlibDecompressor, eof), + READONLY, ZlibDecompressor_eof__doc__}, + {"unused_data", T_OBJECT_EX, offsetof(ZlibDecompressor, unused_data), + READONLY, ZlibDecompressor_unused_data__doc__}, + {"needs_input", T_BOOL, offsetof(ZlibDecompressor, needs_input), READONLY, + ZlibDecompressor_needs_input_doc}, + {NULL}, +}; + + /*[clinic input] zlib.adler32 @@ -1497,6 +1968,25 @@ static PyType_Spec Decomptype_spec = { .slots = Decomptype_slots, }; +static PyType_Slot ZlibDecompressor_type_slots[] = { + {Py_tp_dealloc, ZlibDecompressor_dealloc}, + {Py_tp_members, ZlibDecompressor_members}, + {Py_tp_new, ZlibDecompressor__new__}, + {Py_tp_doc, (char *)ZlibDecompressor__new____doc__}, + {Py_tp_methods, ZlibDecompressor_methods}, + {0, 0}, +}; + +static PyType_Spec ZlibDecompressor_type_spec = { + .name = "zlib._ZlibDecompressor", + .basicsize = sizeof(ZlibDecompressor), + // Calling PyType_GetModuleState() on a subclass is not safe. + // ZlibDecompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag + // which prevents to create a subclass. + // So calling PyType_GetModuleState() in this file is always safe. + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), + .slots = ZlibDecompressor_type_slots, +}; PyDoc_STRVAR(zlib_module_documentation, "The functions in this module allow compression and decompression using the\n" "zlib library, which is based on GNU zip.\n" @@ -1518,6 +2008,7 @@ zlib_clear(PyObject *mod) zlibstate *state = get_zlib_state(mod); Py_CLEAR(state->Comptype); Py_CLEAR(state->Decomptype); + Py_CLEAR(state->ZlibDecompressorType); Py_CLEAR(state->ZlibError); return 0; } @@ -1528,6 +2019,7 @@ zlib_traverse(PyObject *mod, visitproc visit, void *arg) zlibstate *state = get_zlib_state(mod); Py_VISIT(state->Comptype); Py_VISIT(state->Decomptype); + Py_VISIT(state->ZlibDecompressorType); Py_VISIT(state->ZlibError); return 0; } @@ -1555,6 +2047,12 @@ zlib_exec(PyObject *mod) return -1; } + state->ZlibDecompressorType = (PyTypeObject *)PyType_FromModuleAndSpec( + mod, &ZlibDecompressor_type_spec, NULL); + if (state->ZlibDecompressorType == NULL) { + return -1; + } + state->ZlibError = PyErr_NewException("zlib.error", NULL, NULL); if (state->ZlibError == NULL) { return -1; @@ -1565,6 +2063,12 @@ zlib_exec(PyObject *mod) Py_DECREF(state->ZlibError); return -1; } + Py_INCREF(state->ZlibDecompressorType); + if (PyModule_AddObject(mod, "_ZlibDecompressor", + (PyObject *)state->ZlibDecompressorType) < 0) { + Py_DECREF(state->ZlibDecompressorType); + return -1; + } #define ZLIB_ADD_INT_MACRO(c) \ do { \