mirror of
https://github.com/python/cpython
synced 2024-09-19 23:51:46 +00:00
Fix some string encoding issues with entity bodies in HTTP requests.
RFC 2616 says that iso-8859-1 is the default charset for HTTP entity bodies, but we encoded strings using ascii. See http://bugs.python.org/issue5314. Changed docs and code to use iso-8859-1. Also fix some brokenness with passing a file as the body instead of a string. Add tests to show that some of this behavior actually works.
This commit is contained in:
parent
98eb6c2838
commit
236654b82d
|
@ -351,14 +351,22 @@ HTTPConnection Objects
|
|||
|
||||
.. method:: HTTPConnection.request(method, url[, body[, headers]])
|
||||
|
||||
This will send a request to the server using the HTTP request method *method*
|
||||
and the selector *url*. If the *body* argument is present, it should be a
|
||||
string of data to send after the headers are finished. Alternatively, it may
|
||||
be an open file object, in which case the contents of the file is sent; this
|
||||
file object should support ``fileno()`` and ``read()`` methods. The header
|
||||
Content-Length is automatically set to the correct value. The *headers*
|
||||
argument should be a mapping of extra HTTP headers to send with the request.
|
||||
This will send a request to the server using the HTTP request
|
||||
method *method* and the selector *url*. If the *body* argument is
|
||||
present, it should be string or bytes object of data to send after
|
||||
the headers are finished. Strings are encoded as ISO-8859-1, the
|
||||
default charset for HTTP. To use other encodings, pass a bytes
|
||||
object. The Content-Length header is set to the length of the
|
||||
string.
|
||||
|
||||
The *body* may also be an open file object, in which case the
|
||||
contents of the file is sent; this file object should support
|
||||
``fileno()`` and ``read()`` methods. The header Content-Length is
|
||||
automatically set to the length of the file as reported by
|
||||
stat.
|
||||
|
||||
The *headers* argument should be a mapping of extra HTTP
|
||||
headers to send with the request.
|
||||
|
||||
.. method:: HTTPConnection.getresponse()
|
||||
|
||||
|
|
|
@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage):
|
|||
if line in (b'\r\n', b'\n', b''):
|
||||
break
|
||||
hstring = b''.join(headers).decode('iso-8859-1')
|
||||
|
||||
return email.parser.Parser(_class=_class).parsestr(hstring)
|
||||
|
||||
class HTTPResponse(io.RawIOBase):
|
||||
|
@ -675,13 +674,22 @@ def send(self, str):
|
|||
if self.debuglevel > 0:
|
||||
print("send:", repr(str))
|
||||
try:
|
||||
blocksize=8192
|
||||
if hasattr(str,'read') :
|
||||
if self.debuglevel > 0: print("sendIng a read()able")
|
||||
data=str.read(blocksize)
|
||||
while data:
|
||||
blocksize = 8192
|
||||
if hasattr(str, "read") :
|
||||
if self.debuglevel > 0:
|
||||
print("sendIng a read()able")
|
||||
encode = False
|
||||
if "b" not in str.mode:
|
||||
encode = True
|
||||
if self.debuglevel > 0:
|
||||
print("encoding file using iso-8859-1")
|
||||
while 1:
|
||||
data = str.read(blocksize)
|
||||
if not data:
|
||||
break
|
||||
if encode:
|
||||
data = data.encode("iso-8859-1")
|
||||
self.sock.sendall(data)
|
||||
data=str.read(blocksize)
|
||||
else:
|
||||
self.sock.sendall(str)
|
||||
except socket.error as v:
|
||||
|
@ -713,8 +721,8 @@ def _send_output(self, message_body=None):
|
|||
message_body = None
|
||||
self.send(msg)
|
||||
if message_body is not None:
|
||||
#message_body was not a string (i.e. it is a file) and
|
||||
#we must run the risk of Nagle
|
||||
# message_body was not a string (i.e. it is a file), and
|
||||
# we must run the risk of Nagle.
|
||||
self.send(message_body)
|
||||
|
||||
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
|
||||
|
@ -904,7 +912,9 @@ def _send_request(self, method, url, body, headers):
|
|||
for hdr, value in headers.items():
|
||||
self.putheader(hdr, value)
|
||||
if isinstance(body, str):
|
||||
body = body.encode('ascii')
|
||||
# RFC 2616 Section 3.7.1 says that text default has a
|
||||
# default charset of iso-8859-1.
|
||||
body = body.encode('iso-8859-1')
|
||||
self.endheaders(body)
|
||||
|
||||
def getresponse(self):
|
||||
|
|
|
@ -272,9 +272,80 @@ def test_attributes(self):
|
|||
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
|
||||
self.assertEqual(h.timeout, 30)
|
||||
|
||||
class RequestBodyTest(TestCase):
|
||||
"""Test cases where a request includes a message body."""
|
||||
|
||||
def setUp(self):
|
||||
self.conn = httplib.HTTPConnection('example.com')
|
||||
self.sock = FakeSocket("")
|
||||
self.conn.sock = self.sock
|
||||
|
||||
def get_headers_and_fp(self):
|
||||
f = io.BytesIO(self.sock.data)
|
||||
f.readline() # read the request line
|
||||
message = httplib.parse_headers(f)
|
||||
return message, f
|
||||
|
||||
def test_manual_content_length(self):
|
||||
# Set an incorrect content-length so that we can verify that
|
||||
# it will not be over-ridden by the library.
|
||||
self.conn.request("PUT", "/url", "body",
|
||||
{"Content-Length": "42"})
|
||||
message, f = self.get_headers_and_fp()
|
||||
self.assertEqual("42", message.get("content-length"))
|
||||
self.assertEqual(4, len(f.read()))
|
||||
|
||||
def test_ascii_body(self):
|
||||
self.conn.request("PUT", "/url", "body")
|
||||
message, f = self.get_headers_and_fp()
|
||||
self.assertEqual("text/plain", message.get_content_type())
|
||||
self.assertEqual(None, message.get_charset())
|
||||
self.assertEqual("4", message.get("content-length"))
|
||||
self.assertEqual(b'body', f.read())
|
||||
|
||||
def test_latin1_body(self):
|
||||
self.conn.request("PUT", "/url", "body\xc1")
|
||||
message, f = self.get_headers_and_fp()
|
||||
self.assertEqual("text/plain", message.get_content_type())
|
||||
self.assertEqual(None, message.get_charset())
|
||||
self.assertEqual("5", message.get("content-length"))
|
||||
self.assertEqual(b'body\xc1', f.read())
|
||||
|
||||
def test_bytes_body(self):
|
||||
self.conn.request("PUT", "/url", b"body\xc1")
|
||||
message, f = self.get_headers_and_fp()
|
||||
self.assertEqual("text/plain", message.get_content_type())
|
||||
self.assertEqual(None, message.get_charset())
|
||||
self.assertEqual("5", message.get("content-length"))
|
||||
self.assertEqual(b'body\xc1', f.read())
|
||||
|
||||
def test_file_body(self):
|
||||
f = open(support.TESTFN, "w")
|
||||
f.write("body")
|
||||
f.close()
|
||||
f = open(support.TESTFN)
|
||||
self.conn.request("PUT", "/url", f)
|
||||
message, f = self.get_headers_and_fp()
|
||||
self.assertEqual("text/plain", message.get_content_type())
|
||||
self.assertEqual(None, message.get_charset())
|
||||
self.assertEqual("4", message.get("content-length"))
|
||||
self.assertEqual(b'body', f.read())
|
||||
|
||||
def test_binary_file_body(self):
|
||||
f = open(support.TESTFN, "wb")
|
||||
f.write(b"body\xc1")
|
||||
f.close()
|
||||
f = open(support.TESTFN, "rb")
|
||||
self.conn.request("PUT", "/url", f)
|
||||
message, f = self.get_headers_and_fp()
|
||||
self.assertEqual("text/plain", message.get_content_type())
|
||||
self.assertEqual(None, message.get_charset())
|
||||
self.assertEqual("5", message.get("content-length"))
|
||||
self.assertEqual(b'body\xc1', f.read())
|
||||
|
||||
def test_main(verbose=None):
|
||||
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
|
||||
HTTPSTimeoutTest)
|
||||
HTTPSTimeoutTest, RequestBodyTest)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_main()
|
||||
|
|
Loading…
Reference in a new issue