cpython: cb09fdef19f5 (original) (raw)
Mercurial > cpython
changeset 101371:cb09fdef19f5 3.5
Issue #17214: Percent-encode non-ASCII bytes in redirect targets Some servers send Location header fields with non-ASCII bytes, but "http. client" requires the request target to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on patch by Christian Heimes. Python 2 does not suffer any problem because it allows non-ASCII bytes in the HTTP request target. [#17214]
Martin Panter vadmium+py@gmail.com | |
---|---|
date | Mon, 16 May 2016 01:14:20 +0000 |
parents | 52a7f580580c |
children | 841a9a3f3cf6 d921932fe02f |
files | Lib/test/test_urllib2.py Lib/urllib/request.py Misc/NEWS |
diffstat | 3 files changed, 52 insertions(+), 1 deletions(-)[+] [-] Lib/test/test_urllib2.py 35 Lib/urllib/request.py 12 Misc/NEWS 6 |
line wrap: on
line diff
--- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1224,6 +1224,41 @@ class HandlerTests(unittest.TestCase): fp = urllib.request.urlopen("http://python.org/path")[](#l1.4) self.assertEqual(fp.geturl(), "http://python.org/path?query")[](#l1.5)
- def test_redirect_encoding(self):
# Some characters in the redirect target may need special handling,[](#l1.8)
# but most ASCII characters should be treated as already encoded[](#l1.9)
class Handler(urllib.request.HTTPHandler):[](#l1.10)
def http_open(self, req):[](#l1.11)
result = self.do_open(self.connection, req)[](#l1.12)
self.last_buf = self.connection.buf[](#l1.13)
# Set up a normal response for the next request[](#l1.14)
self.connection = test_urllib.fakehttp([](#l1.15)
b'HTTP/1.1 200 OK\r\n'[](#l1.16)
b'Content-Length: 3\r\n'[](#l1.17)
b'\r\n'[](#l1.18)
b'123'[](#l1.19)
)[](#l1.20)
return result[](#l1.21)
handler = Handler()[](#l1.22)
opener = urllib.request.build_opener(handler)[](#l1.23)
tests = ([](#l1.24)
(b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),[](#l1.25)
(b'/spaced%20path/', b'/spaced%20path/'),[](#l1.26)
(b'/spaced path/', b'/spaced%20path/'),[](#l1.27)
(b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),[](#l1.28)
)[](#l1.29)
for [location, result] in tests:[](#l1.30)
with self.subTest(repr(location)):[](#l1.31)
handler.connection = test_urllib.fakehttp([](#l1.32)
b'HTTP/1.1 302 Redirect\r\n'[](#l1.33)
b'Location: ' + location + b'\r\n'[](#l1.34)
b'\r\n'[](#l1.35)
)[](#l1.36)
response = opener.open('http://example.com/')[](#l1.37)
expected = b'GET ' + result + b' '[](#l1.38)
request = handler.last_buf[](#l1.39)
self.assertTrue(request.startswith(expected), repr(request))[](#l1.40)
+ def test_proxy(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
--- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -91,6 +91,7 @@ import os import posixpath import re import socket +import string import sys import time import collections @@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler): # from the user (of urllib.request, in this case). In practice, # essentially all clients do redirect in this case, so we do # the same.
# be conciliant with URIs containing a space[](#l2.15)
# Be conciliant with URIs containing a space. This is mainly[](#l2.17)
# redundant with the more complete encoding done in http_error_302(),[](#l2.18)
# but it is kept for compatibility with other callers.[](#l2.19) newurl = newurl.replace(' ', '%20')[](#l2.20)
+ CONTENT_HEADERS = ("content-length", "content-type") newheaders = dict((k, v) for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS) @@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler): urlparts[2] = "/" newurl = urlunparse(urlparts)
# http.client.parse_headers() decodes as ISO-8859-1. Recover the[](#l2.29)
# original bytes and percent-encode non-ASCII bytes, and any special[](#l2.30)
# characters such as the space.[](#l2.31)
newurl = quote([](#l2.32)
newurl, encoding="iso-8859-1", safe=string.punctuation)[](#l2.33) newurl = urljoin(req.full_url, newurl)[](#l2.34)
# XXX Probably want to forget about the state of the current
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -121,6 +121,12 @@ Library
- Issue #14132: Fix urllib.request redirect handling when the target only has a query string. Original fix by Ján Janech. +- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII