Fix some string encoding issues with entity bodies in HTTP requests.

RFC 2616 says that iso-8859-1 is the default charset for HTTP entity
bodies, but we encoded strings using ascii.  See
http://bugs.python.org/issue5314.  Changed docs and code to use
iso-8859-1.

Also fix some brokenness with passing a file as the body instead of a
string.

Add tests to show that some of this behavior actually works.
This commit is contained in:
Jeremy Hylton 2009-03-27 20:24:34 +00:00
parent 98eb6c2838
commit 236654b82d
3 changed files with 107 additions and 18 deletions

View file

@ -351,14 +351,22 @@ HTTPConnection Objects
.. method:: HTTPConnection.request(method, url[, body[, headers]])
This will send a request to the server using the HTTP request method *method*
and the selector *url*. If the *body* argument is present, it should be a
string of data to send after the headers are finished. Alternatively, it may
be an open file object, in which case the contents of the file is sent; this
file object should support ``fileno()`` and ``read()`` methods. The header
Content-Length is automatically set to the correct value. The *headers*
argument should be a mapping of extra HTTP headers to send with the request.
This will send a request to the server using the HTTP request
method *method* and the selector *url*. If the *body* argument is
present, it should be string or bytes object of data to send after
the headers are finished. Strings are encoded as ISO-8859-1, the
default charset for HTTP. To use other encodings, pass a bytes
object. The Content-Length header is set to the length of the
string.
The *body* may also be an open file object, in which case the
contents of the file is sent; this file object should support
``fileno()`` and ``read()`` methods. The header Content-Length is
automatically set to the length of the file as reported by
stat.
The *headers* argument should be a mapping of extra HTTP
headers to send with the request.
.. method:: HTTPConnection.getresponse()

View file

@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage):
if line in (b'\r\n', b'\n', b''):
break
hstring = b''.join(headers).decode('iso-8859-1')
return email.parser.Parser(_class=_class).parsestr(hstring)
class HTTPResponse(io.RawIOBase):
@ -675,13 +674,22 @@ def send(self, str):
if self.debuglevel > 0:
print("send:", repr(str))
try:
blocksize=8192
if hasattr(str,'read') :
if self.debuglevel > 0: print("sendIng a read()able")
data=str.read(blocksize)
while data:
blocksize = 8192
if hasattr(str, "read") :
if self.debuglevel > 0:
print("sendIng a read()able")
encode = False
if "b" not in str.mode:
encode = True
if self.debuglevel > 0:
print("encoding file using iso-8859-1")
while 1:
data = str.read(blocksize)
if not data:
break
if encode:
data = data.encode("iso-8859-1")
self.sock.sendall(data)
data=str.read(blocksize)
else:
self.sock.sendall(str)
except socket.error as v:
@ -713,8 +721,8 @@ def _send_output(self, message_body=None):
message_body = None
self.send(msg)
if message_body is not None:
#message_body was not a string (i.e. it is a file) and
#we must run the risk of Nagle
# message_body was not a string (i.e. it is a file), and
# we must run the risk of Nagle.
self.send(message_body)
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@ -904,7 +912,9 @@ def _send_request(self, method, url, body, headers):
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
body = body.encode('ascii')
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = body.encode('iso-8859-1')
self.endheaders(body)
def getresponse(self):

View file

@ -272,9 +272,80 @@ def test_attributes(self):
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
self.assertEqual(h.timeout, 30)
class RequestBodyTest(TestCase):
"""Test cases where a request includes a message body."""
def setUp(self):
self.conn = httplib.HTTPConnection('example.com')
self.sock = FakeSocket("")
self.conn.sock = self.sock
def get_headers_and_fp(self):
f = io.BytesIO(self.sock.data)
f.readline() # read the request line
message = httplib.parse_headers(f)
return message, f
def test_manual_content_length(self):
# Set an incorrect content-length so that we can verify that
# it will not be over-ridden by the library.
self.conn.request("PUT", "/url", "body",
{"Content-Length": "42"})
message, f = self.get_headers_and_fp()
self.assertEqual("42", message.get("content-length"))
self.assertEqual(4, len(f.read()))
def test_ascii_body(self):
self.conn.request("PUT", "/url", "body")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("4", message.get("content-length"))
self.assertEqual(b'body', f.read())
def test_latin1_body(self):
self.conn.request("PUT", "/url", "body\xc1")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())
def test_bytes_body(self):
self.conn.request("PUT", "/url", b"body\xc1")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())
def test_file_body(self):
f = open(support.TESTFN, "w")
f.write("body")
f.close()
f = open(support.TESTFN)
self.conn.request("PUT", "/url", f)
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("4", message.get("content-length"))
self.assertEqual(b'body', f.read())
def test_binary_file_body(self):
f = open(support.TESTFN, "wb")
f.write(b"body\xc1")
f.close()
f = open(support.TESTFN, "rb")
self.conn.request("PUT", "/url", f)
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())
def test_main(verbose=None):
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
HTTPSTimeoutTest)
HTTPSTimeoutTest, RequestBodyTest)
if __name__ == '__main__':
test_main()