Fix gzip.py: Use bytes where 8bit strings have been used formerly.

(The filename gets written in utf-8 encoded form which probably
isn't correct.)

Fix the test.
This commit is contained in:
Walter Dörwald 2007-06-06 16:43:59 +00:00
parent 3a77c7ab16
commit 5b1284d0b7
2 changed files with 31 additions and 29 deletions

View file

@ -104,7 +104,7 @@ def __init__(self, filename=None, mode=None,
self.mode = READ
# Set flag indicating start of a new member
self._new_member = True
self.extrabuf = ""
self.extrabuf = b""
self.extrasize = 0
self.name = filename
# Starts small, scales exponentially
@ -147,20 +147,21 @@ def _init_write(self, filename):
self.bufsize = 0
def _write_gzip_header(self):
self.fileobj.write('\037\213') # magic header
self.fileobj.write('\010') # compression method
self.fileobj.write(b'\037\213') # magic header
self.fileobj.write(b'\010') # compression method
fname = self.name
if fname.endswith(".gz"):
fname = fname[:-3]
flags = 0
if fname:
flags = FNAME
self.fileobj.write(chr(flags))
self.fileobj.write(chr(flags).encode('latin-1'))
write32u(self.fileobj, int(time.time()))
self.fileobj.write('\002')
self.fileobj.write('\377')
self.fileobj.write(b'\002')
self.fileobj.write(b'\377')
if fname:
self.fileobj.write(fname + '\000')
# XXX: Ist utf-8 the correct encoding?
self.fileobj.write(fname.encode('utf-8') + b'\000')
def _init_read(self):
self.crc = zlib.crc32("")
@ -168,7 +169,7 @@ def _init_read(self):
def _read_gzip_header(self):
magic = self.fileobj.read(2)
if magic != '\037\213':
if magic != b'\037\213':
raise IOError, 'Not a gzipped file'
method = ord( self.fileobj.read(1) )
if method != 8:
@ -188,13 +189,13 @@ def _read_gzip_header(self):
# Read and discard a null-terminated string containing the filename
while True:
s = self.fileobj.read(1)
if not s or s=='\000':
if not s or s==b'\000':
break
if flag & FCOMMENT:
# Read and discard a null-terminated string containing a comment
while True:
s = self.fileobj.read(1)
if not s or s=='\000':
if not s or s==b'\000':
break
if flag & FHCRC:
self.fileobj.read(2) # Read & discard the 16-bit header CRC
@ -219,7 +220,7 @@ def read(self, size=-1):
raise IOError(errno.EBADF, "read() on write-only GzipFile object")
if self.extrasize <= 0 and self.fileobj is None:
return ''
return b''
readsize = 1024
if size < 0: # get the whole thing
@ -278,7 +279,7 @@ def _read(self, size=1024):
# If the EOF has been reached, flush the decompression object
# and mark this object as finished.
if buf == "":
if buf == b"":
uncompress = self.decompress.flush()
self._read_eof()
self._add_read_data( uncompress )
@ -287,7 +288,7 @@ def _read(self, size=1024):
uncompress = self.decompress.decompress(buf)
self._add_read_data( uncompress )
if self.decompress.unused_data != "":
if self.decompress.unused_data != b"":
# Ending case: we've come to the end of a member in the file,
# so seek back to the start of the unused data, finish up
# this member, and read a new gzip header.
@ -375,7 +376,7 @@ def rewind(self):
raise IOError("Can't rewind in write mode")
self.fileobj.seek(0)
self._new_member = True
self.extrabuf = ""
self.extrabuf = b""
self.extrasize = 0
self.offset = 0
@ -389,9 +390,10 @@ def seek(self, offset, whence=0):
if offset < self.offset:
raise IOError('Negative seek in write mode')
count = offset - self.offset
chunk = bytes(1024)
for i in range(count // 1024):
self.write(1024 * '\0')
self.write((count % 1024) * '\0')
self.write(chunk)
self.write(bytes(count % 1024))
elif self.mode == READ:
if offset < self.offset:
# for negative seek, rewind and do positive seek
@ -410,7 +412,7 @@ def readline(self, size=-1):
bufs = []
while size != 0:
c = self.read(readsize)
i = c.find('\n')
i = c.find(b'\n')
# We set i=size to break out of the loop under two
# conditions: 1) there's no newline, and the chunk is
@ -419,7 +421,7 @@ def readline(self, size=-1):
if (size <= i) or (i == -1 and len(c) > size):
i = size - 1
if i >= 0 or c == '':
if i >= 0 or c == b'':
bufs.append(c[:i + 1]) # Add portion of last chunk
self._unread(c[i + 1:]) # Push back rest of chunk
break
@ -430,7 +432,7 @@ def readline(self, size=-1):
readsize = min(size, readsize * 2)
if readsize > self.min_readsize:
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
return ''.join(bufs) # Return resulting line
return b''.join(bufs) # Return resulting line
def readlines(self, sizehint=0):
# Negative numbers result in reading all the lines
@ -439,7 +441,7 @@ def readlines(self, sizehint=0):
L = []
while sizehint > 0:
line = self.readline()
if line == "":
if line == b"":
break
L.append(line)
sizehint = sizehint - len(line)

View file

@ -8,14 +8,14 @@
import gzip
data1 = """ int length=DEFAULTALLOC, err = Z_OK;
data1 = b""" int length=DEFAULTALLOC, err = Z_OK;
PyObject *RetVal;
int flushmode = Z_FINISH;
unsigned long start_total_out;
"""
data2 = """/* zlibmodule.c -- gzip-compatible data compression */
data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
/* See http://www.gzip.org/zlib/
/* See http://www.winimage.com/zLibDll for Windows */
"""
@ -63,22 +63,22 @@ def test_many_append(self):
# many, many members. Create such a file and verify that reading it
# works.
f = gzip.open(self.filename, 'wb', 9)
f.write('a')
f.write(b'a')
f.close()
for i in range(0,200):
for i in range(0, 200):
f = gzip.open(self.filename, "ab", 9) # append
f.write('a')
f.write(b'a')
f.close()
# Try reading the file
zgfile = gzip.open(self.filename, "rb")
contents = ""
contents = b""
while 1:
ztxt = zgfile.read(8192)
contents += ztxt
if not ztxt: break
zgfile.close()
self.assertEquals(contents, 'a'*201)
self.assertEquals(contents, b'a'*201)
def test_readline(self):
@ -89,7 +89,7 @@ def test_readline(self):
line_length = 0
while 1:
L = f.readline(line_length)
if L == "" and line_length != 0: break
if not L and line_length != 0: break
self.assert_(len(L) <= line_length)
line_length = (line_length + 1) % 50
f.close()
@ -144,7 +144,7 @@ def test_seek_write(self):
f = gzip.GzipFile(self.filename, 'w')
for pos in range(0, 256, 16):
f.seek(pos)
f.write('GZ\n')
f.write(b'GZ\n')
f.close()
def test_mode(self):