mirror of
https://github.com/python/cpython
synced 2024-11-02 12:55:22 +00:00
181 lines
8.4 KiB
TeX
181 lines
8.4 KiB
TeX
\section{\module{zlib} ---
|
|
Compression compatible with \program{gzip}}
|
|
|
|
\declaremodule{builtin}{zlib}
|
|
\modulesynopsis{Low-level interface to compression and decompression
|
|
routines compatible with \program{gzip}.}
|
|
|
|
|
|
For applications that require data compression, the functions in this
|
|
module allow compression and decompression, using the zlib library.
|
|
The zlib library has its own home page at \url{http://www.zlib.net}.
|
|
There are known incompatibilities between the Python module and
|
|
versions of the zlib library earlier than 1.1.3; 1.1.3 has a security
|
|
vulnerability, so we recommend using 1.1.4 or later.
|
|
|
|
zlib's functions have many options and often need to be used in a
|
|
particular order. This documentation doesn't attempt to cover all of
|
|
the permutations; consult the zlib manual at
|
|
\url{http://www.zlib.net/manual.html} for authoritative information.
|
|
|
|
The available exception and functions in this module are:
|
|
|
|
\begin{excdesc}{error}
|
|
Exception raised on compression and decompression errors.
|
|
\end{excdesc}
|
|
|
|
|
|
\begin{funcdesc}{adler32}{string\optional{, value}}
|
|
Computes a Adler-32 checksum of \var{string}. (An Adler-32
|
|
checksum is almost as reliable as a CRC32 but can be computed much
|
|
more quickly.) If \var{value} is present, it is used as the
|
|
starting value of the checksum; otherwise, a fixed default value is
|
|
used. This allows computing a running checksum over the
|
|
concatenation of several input strings. The algorithm is not
|
|
cryptographically strong, and should not be used for
|
|
authentication or digital signatures. Since the algorithm is
|
|
designed for use as a checksum algorithm, it is not suitable for
|
|
use as a general hash algorithm.
|
|
\end{funcdesc}
|
|
|
|
\begin{funcdesc}{compress}{string\optional{, level}}
|
|
Compresses the data in \var{string}, returning a string contained
|
|
compressed data. \var{level} is an integer from \code{1} to
|
|
\code{9} controlling the level of compression; \code{1} is fastest
|
|
and produces the least compression, \code{9} is slowest and produces
|
|
the most. The default value is \code{6}. Raises the
|
|
\exception{error} exception if any error occurs.
|
|
\end{funcdesc}
|
|
|
|
\begin{funcdesc}{compressobj}{\optional{level}}
|
|
Returns a compression object, to be used for compressing data streams
|
|
that won't fit into memory at once. \var{level} is an integer from
|
|
\code{1} to \code{9} controlling the level of compression; \code{1} is
|
|
fastest and produces the least compression, \code{9} is slowest and
|
|
produces the most. The default value is \code{6}.
|
|
\end{funcdesc}
|
|
|
|
\begin{funcdesc}{crc32}{string\optional{, value}}
|
|
Computes a CRC (Cyclic Redundancy Check)%
|
|
\index{Cyclic Redundancy Check}
|
|
\index{checksum!Cyclic Redundancy Check}
|
|
checksum of \var{string}. If
|
|
\var{value} is present, it is used as the starting value of the
|
|
checksum; otherwise, a fixed default value is used. This allows
|
|
computing a running checksum over the concatenation of several
|
|
input strings. The algorithm is not cryptographically strong, and
|
|
should not be used for authentication or digital signatures. Since
|
|
the algorithm is designed for use as a checksum algorithm, it is not
|
|
suitable for use as a general hash algorithm.
|
|
\end{funcdesc}
|
|
|
|
\begin{funcdesc}{decompress}{string\optional{, wbits\optional{, bufsize}}}
|
|
Decompresses the data in \var{string}, returning a string containing
|
|
the uncompressed data. The \var{wbits} parameter controls the size of
|
|
the window buffer. If \var{bufsize} is given, it is used as the
|
|
initial size of the output buffer. Raises the \exception{error}
|
|
exception if any error occurs.
|
|
|
|
The absolute value of \var{wbits} is the base two logarithm of the
|
|
size of the history buffer (the ``window size'') used when compressing
|
|
data. Its absolute value should be between 8 and 15 for the most
|
|
recent versions of the zlib library, larger values resulting in better
|
|
compression at the expense of greater memory usage. The default value
|
|
is 15. When \var{wbits} is negative, the standard
|
|
\program{gzip} header is suppressed; this is an undocumented feature
|
|
of the zlib library, used for compatibility with \program{unzip}'s
|
|
compression file format.
|
|
|
|
\var{bufsize} is the initial size of the buffer used to hold
|
|
decompressed data. If more space is required, the buffer size will be
|
|
increased as needed, so you don't have to get this value exactly
|
|
right; tuning it will only save a few calls to \cfunction{malloc()}. The
|
|
default size is 16384.
|
|
|
|
\end{funcdesc}
|
|
|
|
\begin{funcdesc}{decompressobj}{\optional{wbits}}
|
|
Returns a decompression object, to be used for decompressing data
|
|
streams that won't fit into memory at once. The \var{wbits}
|
|
parameter controls the size of the window buffer.
|
|
\end{funcdesc}
|
|
|
|
Compression objects support the following methods:
|
|
|
|
\begin{methoddesc}[Compress]{compress}{string}
|
|
Compress \var{string}, returning a string containing compressed data
|
|
for at least part of the data in \var{string}. This data should be
|
|
concatenated to the output produced by any preceding calls to the
|
|
\method{compress()} method. Some input may be kept in internal buffers
|
|
for later processing.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}[Compress]{flush}{\optional{mode}}
|
|
All pending input is processed, and a string containing the remaining
|
|
compressed output is returned. \var{mode} can be selected from the
|
|
constants \constant{Z_SYNC_FLUSH}, \constant{Z_FULL_FLUSH}, or
|
|
\constant{Z_FINISH}, defaulting to \constant{Z_FINISH}. \constant{Z_SYNC_FLUSH} and
|
|
\constant{Z_FULL_FLUSH} allow compressing further strings of data, while
|
|
\constant{Z_FINISH} finishes the compressed stream and
|
|
prevents compressing any more data. After calling
|
|
\method{flush()} with \var{mode} set to \constant{Z_FINISH}, the
|
|
\method{compress()} method cannot be called again; the only realistic
|
|
action is to delete the object.
|
|
\end{methoddesc}
|
|
|
|
Decompression objects support the following methods, and two attributes:
|
|
|
|
\begin{memberdesc}{unused_data}
|
|
A string which contains any bytes past the end of the compressed data.
|
|
That is, this remains \code{""} until the last byte that contains
|
|
compression data is available. If the whole string turned out to
|
|
contain compressed data, this is \code{""}, the empty string.
|
|
|
|
The only way to determine where a string of compressed data ends is by
|
|
actually decompressing it. This means that when compressed data is
|
|
contained part of a larger file, you can only find the end of it by
|
|
reading data and feeding it followed by some non-empty string into a
|
|
decompression object's \method{decompress} method until the
|
|
\member{unused_data} attribute is no longer the empty string.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{unconsumed_tail}
|
|
A string that contains any data that was not consumed by the last
|
|
\method{decompress} call because it exceeded the limit for the
|
|
uncompressed data buffer. This data has not yet been seen by the zlib
|
|
machinery, so you must feed it (possibly with further data
|
|
concatenated to it) back to a subsequent \method{decompress} method
|
|
call in order to get correct output.
|
|
\end{memberdesc}
|
|
|
|
|
|
\begin{methoddesc}[Decompress]{decompress}{string\optional{, max_length}}
|
|
Decompress \var{string}, returning a string containing the
|
|
uncompressed data corresponding to at least part of the data in
|
|
\var{string}. This data should be concatenated to the output produced
|
|
by any preceding calls to the
|
|
\method{decompress()} method. Some of the input data may be preserved
|
|
in internal buffers for later processing.
|
|
|
|
If the optional parameter \var{max_length} is supplied then the return value
|
|
will be no longer than \var{max_length}. This may mean that not all of the
|
|
compressed input can be processed; and unconsumed data will be stored
|
|
in the attribute \member{unconsumed_tail}. This string must be passed
|
|
to a subsequent call to \method{decompress()} if decompression is to
|
|
continue. If \var{max_length} is not supplied then the whole input is
|
|
decompressed, and \member{unconsumed_tail} is an empty string.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}[Decompress]{flush}{}
|
|
All pending input is processed, and a string containing the remaining
|
|
uncompressed output is returned. After calling \method{flush()}, the
|
|
\method{decompress()} method cannot be called again; the only realistic
|
|
action is to delete the object.
|
|
\end{methoddesc}
|
|
|
|
\begin{seealso}
|
|
\seemodule{gzip}{Reading and writing \program{gzip}-format files.}
|
|
\seeurl{http://www.zlib.net}{The zlib library home page.}
|
|
\seeurl{http://www.zlib.net/manual.html}{The zlib manual explains
|
|
the semantics and usage of the library's many functions.}
|
|
\end{seealso}
|