diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 01f4dc423e0..33f90f48ca6 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -50,7 +50,7 @@ def test_trivial(self): f = urllib.request.urlopen(file_url) - buf = f.read() + f.read() f.close() def test_parse_http_list(self): @@ -68,180 +68,158 @@ def test_URLError_reasonstr(self): err = urllib.error.URLError('reason') self.assertIn(err.reason, str(err)) -def test_request_headers_dict(): - """ - The Request.headers dictionary is not a documented interface. It should - stay that way, because the complete set of headers are only accessible - through the .get_header(), .has_header(), .header_items() interface. - However, .headers pre-dates those methods, and so real code will be using - the dictionary. +class RequestHdrsTests(unittest.TestCase): - The introduction in 2.4 of those methods was a mistake for the same reason: - code that previously saw all (urllib2 user)-provided headers in .headers - now sees only a subset (and the function interface is ugly and incomplete). - A better change would have been to replace .headers dict with a dict - subclass (or UserDict.DictMixin instance?) that preserved the .headers - interface and also provided access to the "unredirected" headers. It's - probably too late to fix that, though. + def test_request_headers_dict(self): + """ + The Request.headers dictionary is not a documented interface. It + should stay that way, because the complete set of headers are only + accessible through the .get_header(), .has_header(), .header_items() + interface. However, .headers pre-dates those methods, and so real code + will be using the dictionary. + + The introduction in 2.4 of those methods was a mistake for the same + reason: code that previously saw all (urllib2 user)-provided headers in + .headers now sees only a subset. + + """ + url = "http://example.com" + self.assertEqual(Request(url, + headers={"Spam-eggs": "blah"} + ).headers["Spam-eggs"], "blah") + self.assertEqual(Request(url, + headers={"spam-EggS": "blah"} + ).headers["Spam-eggs"], "blah") + + def test_request_headers_methods(self): + """ + Note the case normalization of header names here, to + .capitalize()-case. This should be preserved for + backwards-compatibility. (In the HTTP case, normalization to + .title()-case is done by urllib2 before sending headers to + http.client). + + Note that e.g. r.has_header("spam-EggS") is currently False, and + r.get_header("spam-EggS") returns None, but that could be changed in + future. + + Method r.remove_header should remove items both from r.headers and + r.unredirected_hdrs dictionaries + """ + url = "http://example.com" + req = Request(url, headers={"Spam-eggs": "blah"}) + self.assertTrue(req.has_header("Spam-eggs")) + self.assertEqual(req.header_items(), [('Spam-eggs', 'blah')]) + + req.add_header("Foo-Bar", "baz") + self.assertEqual(sorted(req.header_items()), + [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]) + self.assertFalse(req.has_header("Not-there")) + self.assertIsNone(req.get_header("Not-there")) + self.assertEqual(req.get_header("Not-there", "default"), "default") - Check .capitalize() case normalization: + def test_password_manager(self): + mgr = urllib.request.HTTPPasswordMgr() + add = mgr.add_password + find_user_pass = mgr.find_user_password + add("Some Realm", "http://example.com/", "joe", "password") + add("Some Realm", "http://example.com/ni", "ni", "ni") + add("c", "http://example.com/foo", "foo", "ni") + add("c", "http://example.com/bar", "bar", "nini") + add("b", "http://example.com/", "first", "blah") + add("b", "http://example.com/", "second", "spam") + add("a", "http://example.com", "1", "a") + add("Some Realm", "http://c.example.com:3128", "3", "c") + add("Some Realm", "d.example.com", "4", "d") + add("Some Realm", "e.example.com:3128", "5", "e") - >>> url = "http://example.com" - >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"] - 'blah' - >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"] - 'blah' + self.assertEqual(find_user_pass("Some Realm", "example.com"), + ('joe', 'password')) - Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError, - but that could be changed in future. + #self.assertEqual(find_user_pass("Some Realm", "http://example.com/ni"), + # ('ni', 'ni')) - """ + self.assertEqual(find_user_pass("Some Realm", "http://example.com"), + ('joe', 'password')) + self.assertEqual(find_user_pass("Some Realm", "http://example.com/"), + ('joe', 'password')) + self.assertEqual( + find_user_pass("Some Realm", "http://example.com/spam"), + ('joe', 'password')) + self.assertEqual( + find_user_pass("Some Realm", "http://example.com/spam/spam"), + ('joe', 'password')) + self.assertEqual(find_user_pass("c", "http://example.com/foo"), + ('foo', 'ni')) + self.assertEqual(find_user_pass("c", "http://example.com/bar"), + ('bar', 'nini')) + self.assertEqual(find_user_pass("b", "http://example.com/"), + ('second', 'spam')) -def test_request_headers_methods(): - """ - Note the case normalization of header names here, to .capitalize()-case. - This should be preserved for backwards-compatibility. (In the HTTP case, - normalization to .title()-case is done by urllib2 before sending headers to - http.client). + # No special relationship between a.example.com and example.com: - >>> url = "http://example.com" - >>> r = Request(url, headers={"Spam-eggs": "blah"}) - >>> r.has_header("Spam-eggs") - True - >>> r.header_items() - [('Spam-eggs', 'blah')] - >>> r.add_header("Foo-Bar", "baz") - >>> items = sorted(r.header_items()) - >>> items - [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')] + self.assertEqual(find_user_pass("a", "http://example.com/"), + ('1', 'a')) + self.assertEqual(find_user_pass("a", "http://a.example.com/"), + (None, None)) - Note that e.g. r.has_header("spam-EggS") is currently False, and - r.get_header("spam-EggS") returns None, but that could be changed in - future. + # Ports: - >>> r.has_header("Not-there") - False - >>> print(r.get_header("Not-there")) - None - >>> r.get_header("Not-there", "default") - 'default' + self.assertEqual(find_user_pass("Some Realm", "c.example.com"), + (None, None)) + self.assertEqual(find_user_pass("Some Realm", "c.example.com:3128"), + ('3', 'c')) + self.assertEqual( + find_user_pass("Some Realm", "http://c.example.com:3128"), + ('3', 'c')) + self.assertEqual(find_user_pass("Some Realm", "d.example.com"), + ('4', 'd')) + self.assertEqual(find_user_pass("Some Realm", "e.example.com:3128"), + ('5', 'e')) - """ + def test_password_manager_default_port(self): + """ + The point to note here is that we can't guess the default port if + there's no scheme. This applies to both add_password and + find_user_password. + """ + mgr = urllib.request.HTTPPasswordMgr() + add = mgr.add_password + find_user_pass = mgr.find_user_password + add("f", "http://g.example.com:80", "10", "j") + add("g", "http://h.example.com", "11", "k") + add("h", "i.example.com:80", "12", "l") + add("i", "j.example.com", "13", "m") + self.assertEqual(find_user_pass("f", "g.example.com:100"), + (None, None)) + self.assertEqual(find_user_pass("f", "g.example.com:80"), + ('10', 'j')) + self.assertEqual(find_user_pass("f", "g.example.com"), + (None, None)) + self.assertEqual(find_user_pass("f", "http://g.example.com:100"), + (None, None)) + self.assertEqual(find_user_pass("f", "http://g.example.com:80"), + ('10', 'j')) + self.assertEqual(find_user_pass("f", "http://g.example.com"), + ('10', 'j')) + self.assertEqual(find_user_pass("g", "h.example.com"), ('11', 'k')) + self.assertEqual(find_user_pass("g", "h.example.com:80"), ('11', 'k')) + self.assertEqual(find_user_pass("g", "http://h.example.com:80"), + ('11', 'k')) + self.assertEqual(find_user_pass("h", "i.example.com"), (None, None)) + self.assertEqual(find_user_pass("h", "i.example.com:80"), ('12', 'l')) + self.assertEqual(find_user_pass("h", "http://i.example.com:80"), + ('12', 'l')) + self.assertEqual(find_user_pass("i", "j.example.com"), ('13', 'm')) + self.assertEqual(find_user_pass("i", "j.example.com:80"), + (None, None)) + self.assertEqual(find_user_pass("i", "http://j.example.com"), + ('13', 'm')) + self.assertEqual(find_user_pass("i", "http://j.example.com:80"), + (None, None)) -def test_password_manager(self): - """ - >>> mgr = urllib.request.HTTPPasswordMgr() - >>> add = mgr.add_password - >>> add("Some Realm", "http://example.com/", "joe", "password") - >>> add("Some Realm", "http://example.com/ni", "ni", "ni") - >>> add("c", "http://example.com/foo", "foo", "ni") - >>> add("c", "http://example.com/bar", "bar", "nini") - >>> add("b", "http://example.com/", "first", "blah") - >>> add("b", "http://example.com/", "second", "spam") - >>> add("a", "http://example.com", "1", "a") - >>> add("Some Realm", "http://c.example.com:3128", "3", "c") - >>> add("Some Realm", "d.example.com", "4", "d") - >>> add("Some Realm", "e.example.com:3128", "5", "e") - - >>> mgr.find_user_password("Some Realm", "example.com") - ('joe', 'password') - >>> mgr.find_user_password("Some Realm", "http://example.com") - ('joe', 'password') - >>> mgr.find_user_password("Some Realm", "http://example.com/") - ('joe', 'password') - >>> mgr.find_user_password("Some Realm", "http://example.com/spam") - ('joe', 'password') - >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam") - ('joe', 'password') - >>> mgr.find_user_password("c", "http://example.com/foo") - ('foo', 'ni') - >>> mgr.find_user_password("c", "http://example.com/bar") - ('bar', 'nini') - - Actually, this is really undefined ATM -## Currently, we use the highest-level path where more than one match: - -## >>> mgr.find_user_password("Some Realm", "http://example.com/ni") -## ('joe', 'password') - - Use latest add_password() in case of conflict: - - >>> mgr.find_user_password("b", "http://example.com/") - ('second', 'spam') - - No special relationship between a.example.com and example.com: - - >>> mgr.find_user_password("a", "http://example.com/") - ('1', 'a') - >>> mgr.find_user_password("a", "http://a.example.com/") - (None, None) - - Ports: - - >>> mgr.find_user_password("Some Realm", "c.example.com") - (None, None) - >>> mgr.find_user_password("Some Realm", "c.example.com:3128") - ('3', 'c') - >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128") - ('3', 'c') - >>> mgr.find_user_password("Some Realm", "d.example.com") - ('4', 'd') - >>> mgr.find_user_password("Some Realm", "e.example.com:3128") - ('5', 'e') - - """ - pass - - -def test_password_manager_default_port(self): - """ - >>> mgr = urllib.request.HTTPPasswordMgr() - >>> add = mgr.add_password - - The point to note here is that we can't guess the default port if there's - no scheme. This applies to both add_password and find_user_password. - - >>> add("f", "http://g.example.com:80", "10", "j") - >>> add("g", "http://h.example.com", "11", "k") - >>> add("h", "i.example.com:80", "12", "l") - >>> add("i", "j.example.com", "13", "m") - >>> mgr.find_user_password("f", "g.example.com:100") - (None, None) - >>> mgr.find_user_password("f", "g.example.com:80") - ('10', 'j') - >>> mgr.find_user_password("f", "g.example.com") - (None, None) - >>> mgr.find_user_password("f", "http://g.example.com:100") - (None, None) - >>> mgr.find_user_password("f", "http://g.example.com:80") - ('10', 'j') - >>> mgr.find_user_password("f", "http://g.example.com") - ('10', 'j') - >>> mgr.find_user_password("g", "h.example.com") - ('11', 'k') - >>> mgr.find_user_password("g", "h.example.com:80") - ('11', 'k') - >>> mgr.find_user_password("g", "http://h.example.com:80") - ('11', 'k') - >>> mgr.find_user_password("h", "i.example.com") - (None, None) - >>> mgr.find_user_password("h", "i.example.com:80") - ('12', 'l') - >>> mgr.find_user_password("h", "http://i.example.com:80") - ('12', 'l') - >>> mgr.find_user_password("i", "j.example.com") - ('13', 'm') - >>> mgr.find_user_password("i", "j.example.com:80") - (None, None) - >>> mgr.find_user_password("i", "http://j.example.com") - ('13', 'm') - >>> mgr.find_user_password("i", "http://j.example.com:80") - (None, None) - - """ - class MockOpener: addheaders = [] def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): @@ -434,7 +412,6 @@ def reset(self): self.requests = [] def http_open(self, req): import email, http.client, copy - from io import StringIO self.requests.append(copy.deepcopy(req)) if self._count == 0: self._count = self._count + 1 @@ -494,7 +471,7 @@ def test_badly_named_methods(self): [("do_open", "return self"), ("proxy_open", "return self")], [("redirect_request", "return self")], ] - handlers = add_ordered_mock_handlers(o, meth_spec) + add_ordered_mock_handlers(o, meth_spec) o.add_handler(urllib.request.UnknownHandler()) for scheme in "do", "proxy", "redirect": self.assertRaises(URLError, o.open, scheme+"://example.com/") @@ -538,7 +515,7 @@ class MockHandlerSubclass(MockHandler): pass handlers.append(h) o.add_handler(h) - r = o.open("http://example.com/") + o.open("http://example.com/") # handlers called in reverse order, thanks to their sort order self.assertEqual(o.calls[0][0], handlers[1]) self.assertEqual(o.calls[1][0], handlers[0]) @@ -573,7 +550,7 @@ class Unknown: def __eq__(self, other): return True req = Request("http://example.com/") - r = o.open(req) + o.open(req) assert len(o.calls) == 2 calls = [(handlers[0], "http_open", (req,)), (handlers[2], "http_error_302", @@ -596,7 +573,7 @@ def test_processors(self): handlers = add_ordered_mock_handlers(o, meth_spec) req = Request("http://example.com/") - r = o.open(req) + o.open(req) # processor methods are called on *all* handlers that define them, # not just the first handler that handles the request calls = [ @@ -674,7 +651,7 @@ def connect_ftp(self, user, passwd, host, port, dirs, import ftplib data = "rheum rhaponicum" h = NullFTPHandler(data) - o = h.parent = MockOpener() + h.parent = MockOpener() for url, host, port, user, passwd, type_, dirs, filename, mimetype in [ ("ftp://localhost/foo/bar/baz.html", @@ -917,7 +894,7 @@ def test_http_doubleslash(self): # break anything. Previously, a double slash directly after the host # could cause incorrect parsing. h = urllib.request.AbstractHTTPHandler() - o = h.parent = MockOpener() + h.parent = MockOpener() data = b"" ds_urls = [ @@ -944,7 +921,7 @@ def test_fixpath_in_weirdurls(self): # start with'/' h = urllib.request.AbstractHTTPHandler() - o = h.parent = MockOpener() + h.parent = MockOpener() weird_url = 'http://www.python.org?getspam' req = Request(weird_url) @@ -987,7 +964,7 @@ def test_errors(self): def test_cookies(self): cj = MockCookieJar() h = urllib.request.HTTPCookieProcessor(cj) - o = h.parent = MockOpener() + h.parent = MockOpener() req = Request("http://example.com/") r = MockResponse(200, "OK", {}, "") @@ -1144,7 +1121,7 @@ def test_proxy(self): req = Request("http://acme.example.com/") self.assertEqual(req.host, "acme.example.com") - r = o.open(req) + o.open(req) self.assertEqual(req.host, "proxy.example.com:3128") self.assertEqual([(handlers[0], "http_open")], @@ -1157,11 +1134,11 @@ def test_proxy_no_proxy(self): o.add_handler(ph) req = Request("http://www.perl.org/") self.assertEqual(req.host, "www.perl.org") - r = o.open(req) + o.open(req) self.assertEqual(req.host, "proxy.example.com") req = Request("http://www.python.org") self.assertEqual(req.host, "www.python.org") - r = o.open(req) + o.open(req) self.assertEqual(req.host, "www.python.org") del os.environ['no_proxy'] @@ -1172,7 +1149,7 @@ def test_proxy_no_proxy_all(self): o.add_handler(ph) req = Request("http://www.python.org") self.assertEqual(req.host, "www.python.org") - r = o.open(req) + o.open(req) self.assertEqual(req.host, "www.python.org") del os.environ['no_proxy'] @@ -1188,7 +1165,7 @@ def test_proxy_https(self): req = Request("https://www.example.com/") self.assertEqual(req.host, "www.example.com") - r = o.open(req) + o.open(req) self.assertEqual(req.host, "proxy.example.com:3128") self.assertEqual([(handlers[0], "https_open")], [tup[0:2] for tup in o.calls]) @@ -1204,7 +1181,7 @@ def test_proxy_https_proxy_authorization(self): req.add_header("User-Agent","Grail") self.assertEqual(req.host, "www.example.com") self.assertIsNone(req._tunnel_host) - r = o.open(req) + o.open(req) # Verify Proxy-Authorization gets tunneled to request. # httpsconn req_headers do not have the Proxy-Authorization header but # the req will have. @@ -1370,7 +1347,7 @@ def _test_basic_auth(self, opener, auth_handler, auth_header, self.assertEqual(user, password_manager.user) self.assertEqual(password, password_manager.password) - r = opener.open(request_url) + opener.open(request_url) # should have asked the password manager for the username/password self.assertEqual(password_manager.target_realm, realm) @@ -1390,13 +1367,17 @@ def _test_basic_auth(self, opener, auth_handler, auth_header, # handle the HTTP auth error password_manager.user = password_manager.password = None http_handler.reset() - r = opener.open(request_url) + opener.open(request_url) self.assertEqual(len(http_handler.requests), 1) self.assertFalse(http_handler.requests[0].has_header(auth_header)) class MiscTests(unittest.TestCase): + def opener_has_handler(self, opener, handler_class): + self.assertTrue(any(h.__class__ == handler_class + for h in opener.handlers)) + def test_build_opener(self): class MyHTTPHandler(urllib.request.HTTPHandler): pass class FooHandler(urllib.request.BaseHandler): @@ -1434,9 +1415,22 @@ class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass self.opener_has_handler(o, MyHTTPHandler) self.opener_has_handler(o, MyOtherHTTPHandler) - def opener_has_handler(self, opener, handler_class): - self.assertTrue(any(h.__class__ == handler_class - for h in opener.handlers)) + def test_HTTPError_interface(self): + """ + Issue 13211 reveals that HTTPError didn't implement the URLError + interface even though HTTPError is a subclass of URLError. + """ + msg = 'something bad happened' + url = code = fp = None + hdrs = 'Content-Length: 42' + err = urllib.error.HTTPError(url, code, msg, hdrs, fp) + self.assertTrue(hasattr(err, 'reason')) + self.assertEqual(err.reason, 'something bad happened') + self.assertTrue(hasattr(err, 'hdrs')) + self.assertEqual(err.hdrs, 'Content-Length: 42') + expected_errmsg = 'HTTP Error %s: %s' % (err.code, err.msg) + self.assertEqual(str(err), expected_errmsg) + class RequestTests(unittest.TestCase): @@ -1498,25 +1492,6 @@ def test_url_fragment(self): req = Request(url) self.assertEqual(req.get_full_url(), url) - def test_HTTPError_interface(self): - """ - Issue 13211 reveals that HTTPError didn't implement the URLError - interface even though HTTPError is a subclass of URLError. - - >>> msg = 'something bad happened' - >>> url = code = fp = None - >>> hdrs = 'Content-Length: 42' - >>> err = urllib.error.HTTPError(url, code, msg, hdrs, fp) - >>> assert hasattr(err, 'reason') - >>> err.reason - 'something bad happened' - >>> assert hasattr(err, 'hdrs') - >>> err.hdrs - 'Content-Length: 42' - >>> expected_errmsg = 'HTTP Error %s: %s' % (err.code, err.msg) - >>> assert str(err) == expected_errmsg - """ - def test_HTTPError_interface_call(self): """ Issue 15701 - HTTPError interface has info method available from URLError @@ -1541,7 +1516,8 @@ def test_main(verbose=None): OpenerDirectorTests, HandlerTests, MiscTests, - RequestTests) + RequestTests, + RequestHdrsTests) support.run_unittest(*tests) if __name__ == "__main__":