(original) (raw)

changeset: 101372:841a9a3f3cf6 parent: 101367:8c973a2f4f50 parent: 101371:cb09fdef19f5 user: Martin Panter vadmium+py@gmail.com date: Mon May 16 07:45:28 2016 +0000 files: Lib/test/test_urllib2.py Lib/urllib/request.py Misc/NEWS description: Issue #14132, Issue #17214: Merge two redirect handling fixes from 3.5 diff -r 8c973a2f4f50 -r 841a9a3f3cf6 Lib/test/test_urllib.py --- a/Lib/test/test_urllib.py Mon May 16 10:12:15 2016 +0300 +++ b/Lib/test/test_urllib.py Mon May 16 07:45:28 2016 +0000 @@ -1,4 +1,4 @@ -"""Regresssion tests for urllib""" +"""Regresssion tests for what was in Python 2's "urllib" module""" import urllib.parse import urllib.request @@ -86,10 +86,11 @@ # buffer to store data for verification in urlopen tests. buf = None - fakesock = FakeSocket(fakedata) def connect(self): - self.sock = self.fakesock + self.sock = FakeSocket(self.fakedata) + type(self).fakesock = self.sock + FakeHTTPConnection.fakedata = fakedata return FakeHTTPConnection diff -r 8c973a2f4f50 -r 841a9a3f3cf6 Lib/test/test_urllib2.py --- a/Lib/test/test_urllib2.py Mon May 16 10:12:15 2016 +0300 +++ b/Lib/test/test_urllib2.py Mon May 16 07:45:28 2016 +0000 @@ -1208,6 +1208,57 @@ fp = o.open('http://www.example.com') self.assertEqual(fp.geturl(), redirected_url.strip()) + def test_redirect_no_path(self): + # Issue 14132: Relative redirect strips original path + real_class = http.client.HTTPConnection + response1 = b"HTTP/1.1 302 Found\r\nLocation: ?query\r\n\r\n" + http.client.HTTPConnection = test_urllib.fakehttp(response1) + self.addCleanup(setattr, http.client, "HTTPConnection", real_class) + urls = iter(("/path", "/path?query")) + def request(conn, method, url, *pos, **kw): + self.assertEqual(url, next(urls)) + real_class.request(conn, method, url, *pos, **kw) + # Change response for subsequent connection + conn.__class__.fakedata = b"HTTP/1.1 200 OK\r\n\r\nHello!" + http.client.HTTPConnection.request = request + fp = urllib.request.urlopen("http://python.org/path") + self.assertEqual(fp.geturl(), "http://python.org/path?query") + + def test_redirect_encoding(self): + # Some characters in the redirect target may need special handling, + # but most ASCII characters should be treated as already encoded + class Handler(urllib.request.HTTPHandler): + def http_open(self, req): + result = self.do_open(self.connection, req) + self.last_buf = self.connection.buf + # Set up a normal response for the next request + self.connection = test_urllib.fakehttp( + b'HTTP/1.1 200 OK\r\n' + b'Content-Length: 3\r\n' + b'\r\n' + b'123' + ) + return result + handler = Handler() + opener = urllib.request.build_opener(handler) + tests = ( + (b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'), + (b'/spaced%20path/', b'/spaced%20path/'), + (b'/spaced path/', b'/spaced%20path/'), + (b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'), + ) + for [location, result] in tests: + with self.subTest(repr(location)): + handler.connection = test_urllib.fakehttp( + b'HTTP/1.1 302 Redirect\r\n' + b'Location: ' + location + b'\r\n' + b'\r\n' + ) + response = opener.open('http://example.com/') + expected = b'GET ' + result + b' ' + request = handler.last_buf + self.assertTrue(request.startswith(expected), repr(request)) + def test_proxy(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) diff -r 8c973a2f4f50 -r 841a9a3f3cf6 Lib/urllib/request.py --- a/Lib/urllib/request.py Mon May 16 10:12:15 2016 +0300 +++ b/Lib/urllib/request.py Mon May 16 07:45:28 2016 +0000 @@ -91,6 +91,7 @@ import posixpath import re import socket +import string import sys import time import collections @@ -676,8 +677,12 @@ # from the user (of urllib.request, in this case). In practice, # essentially all clients do redirect in this case, so we do # the same. - # be conciliant with URIs containing a space + + # Be conciliant with URIs containing a space. This is mainly + # redundant with the more complete encoding done in http_error_302(), + # but it is kept for compatibility with other callers. newurl = newurl.replace(' ', '%20') + CONTENT_HEADERS = ("content-length", "content-type") newheaders = dict((k, v) for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS) @@ -712,11 +717,16 @@ "%s - Redirection to url '%s' is not allowed" % (msg, newurl), headers, fp) - if not urlparts.path: + if not urlparts.path and urlparts.netloc: urlparts = list(urlparts) urlparts[2] = "/" newurl = urlunparse(urlparts) + # http.client.parse_headers() decodes as ISO-8859-1. Recover the + # original bytes and percent-encode non-ASCII bytes, and any special + # characters such as the space. + newurl = quote( + newurl, encoding="iso-8859-1", safe=string.punctuation) newurl = urljoin(req.full_url, newurl) # XXX Probably want to forget about the state of the current diff -r 8c973a2f4f50 -r 841a9a3f3cf6 Misc/NEWS --- a/Misc/NEWS Mon May 16 10:12:15 2016 +0300 +++ b/Misc/NEWS Mon May 16 07:45:28 2016 +0000 @@ -277,6 +277,15 @@ Library ------- +- Issue #14132: Fix urllib.request redirect handling when the target only has + a query string. Original fix by Ján Janech. + +- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII + bytes found in redirect target URLs. Some servers send Location header + fields with non-ASCII bytes, but "http.client" requires the request target + to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on + patch by Christian Heimes. + - Issue #27033: The default value of the decode_data parameter for smtpd.SMTPChannel and smtpd.SMTPServer constructors is changed to False. /vadmium+py@gmail.com