cpython: 841a9a3f3cf6 (original) (raw)
Mercurial > cpython
changeset 101372:841a9a3f3cf6
Issue #14132, Issue #17214: Merge two redirect handling fixes from 3.5 [#14132]
Martin Panter vadmium+py@gmail.com | |
---|---|
date | Mon, 16 May 2016 07:45:28 +0000 |
parents | 8c973a2f4f50(current diff)cb09fdef19f5(diff) |
children | 0fe913de4702 |
files | Lib/test/test_urllib2.py Lib/urllib/request.py Misc/NEWS |
diffstat | 4 files changed, 76 insertions(+), 5 deletions(-)[+] [-] Lib/test/test_urllib.py 7 Lib/test/test_urllib2.py 51 Lib/urllib/request.py 14 Misc/NEWS 9 |
line wrap: on
line diff
--- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1,4 +1,4 @@ -"""Regresssion tests for urllib""" +"""Regresssion tests for what was in Python 2's "urllib" module""" import urllib.parse import urllib.request @@ -86,10 +86,11 @@ def fakehttp(fakedata): # buffer to store data for verification in urlopen tests. buf = None
fakesock = FakeSocket(fakedata)[](#l1.13)
self.sock = self.fakesock[](#l1.16)
self.sock = FakeSocket(self.fakedata)[](#l1.17)
type(self).fakesock = self.sock[](#l1.18)
- FakeHTTPConnection.fakedata = fakedata
--- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1208,6 +1208,57 @@ class HandlerTests(unittest.TestCase): fp = o.open('http://www.example.com')[](#l2.4) self.assertEqual(fp.geturl(), redirected_url.strip())
- def test_redirect_no_path(self):
# Issue 14132: Relative redirect strips original path[](#l2.8)
real_class = http.client.HTTPConnection[](#l2.9)
response1 = b"HTTP/1.1 302 Found\r\nLocation: ?query\r\n\r\n"[](#l2.10)
http.client.HTTPConnection = test_urllib.fakehttp(response1)[](#l2.11)
self.addCleanup(setattr, http.client, "HTTPConnection", real_class)[](#l2.12)
urls = iter(("/path", "/path?query"))[](#l2.13)
def request(conn, method, url, *pos, **kw):[](#l2.14)
self.assertEqual(url, next(urls))[](#l2.15)
real_class.request(conn, method, url, *pos, **kw)[](#l2.16)
# Change response for subsequent connection[](#l2.17)
conn.__class__.fakedata = b"HTTP/1.1 200 OK\r\n\r\nHello!"[](#l2.18)
http.client.HTTPConnection.request = request[](#l2.19)
fp = urllib.request.urlopen("http://python.org/path")[](#l2.20)
self.assertEqual(fp.geturl(), "http://python.org/path?query")[](#l2.21)
- def test_redirect_encoding(self):
# Some characters in the redirect target may need special handling,[](#l2.24)
# but most ASCII characters should be treated as already encoded[](#l2.25)
class Handler(urllib.request.HTTPHandler):[](#l2.26)
def http_open(self, req):[](#l2.27)
result = self.do_open(self.connection, req)[](#l2.28)
self.last_buf = self.connection.buf[](#l2.29)
# Set up a normal response for the next request[](#l2.30)
self.connection = test_urllib.fakehttp([](#l2.31)
b'HTTP/1.1 200 OK\r\n'[](#l2.32)
b'Content-Length: 3\r\n'[](#l2.33)
b'\r\n'[](#l2.34)
b'123'[](#l2.35)
)[](#l2.36)
return result[](#l2.37)
handler = Handler()[](#l2.38)
opener = urllib.request.build_opener(handler)[](#l2.39)
tests = ([](#l2.40)
(b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),[](#l2.41)
(b'/spaced%20path/', b'/spaced%20path/'),[](#l2.42)
(b'/spaced path/', b'/spaced%20path/'),[](#l2.43)
(b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),[](#l2.44)
)[](#l2.45)
for [location, result] in tests:[](#l2.46)
with self.subTest(repr(location)):[](#l2.47)
handler.connection = test_urllib.fakehttp([](#l2.48)
b'HTTP/1.1 302 Redirect\r\n'[](#l2.49)
b'Location: ' + location + b'\r\n'[](#l2.50)
b'\r\n'[](#l2.51)
)[](#l2.52)
response = opener.open('http://example.com/')[](#l2.53)
expected = b'GET ' + result + b' '[](#l2.54)
request = handler.last_buf[](#l2.55)
self.assertTrue(request.startswith(expected), repr(request))[](#l2.56)
+ def test_proxy(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
--- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -91,6 +91,7 @@ import os import posixpath import re import socket +import string import sys import time import collections @@ -676,8 +677,12 @@ class HTTPRedirectHandler(BaseHandler): # from the user (of urllib.request, in this case). In practice, # essentially all clients do redirect in this case, so we do # the same.
# be conciliant with URIs containing a space[](#l3.15)
# Be conciliant with URIs containing a space. This is mainly[](#l3.17)
# redundant with the more complete encoding done in http_error_302(),[](#l3.18)
# but it is kept for compatibility with other callers.[](#l3.19) newurl = newurl.replace(' ', '%20')[](#l3.20)
+ CONTENT_HEADERS = ("content-length", "content-type") newheaders = dict((k, v) for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS) @@ -712,11 +717,16 @@ class HTTPRedirectHandler(BaseHandler): "%s - Redirection to url '%s' is not allowed" % (msg, newurl), headers, fp)
if not urlparts.path:[](#l3.29)
if not urlparts.path and urlparts.netloc:[](#l3.30) urlparts = list(urlparts)[](#l3.31) urlparts[2] = "/"[](#l3.32) newurl = urlunparse(urlparts)[](#l3.33)
# http.client.parse_headers() decodes as ISO-8859-1. Recover the[](#l3.35)
# original bytes and percent-encode non-ASCII bytes, and any special[](#l3.36)
# characters such as the space.[](#l3.37)
newurl = quote([](#l3.38)
newurl, encoding="iso-8859-1", safe=string.punctuation)[](#l3.39) newurl = urljoin(req.full_url, newurl)[](#l3.40)
# XXX Probably want to forget about the state of the current
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -277,6 +277,15 @@ Core and Builtins Library ------- +- Issue #14132: Fix urllib.request redirect handling when the target only has
- a query string. Original fix by Ján Janech. + +- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII
- bytes found in redirect target URLs. Some servers send Location header
- fields with non-ASCII bytes, but "http.client" requires the request target
- to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on
- patch by Christian Heimes. +