fix: HTTP 308 Permanent Redirect status code handling (#2389) · RDFLib/rdflib@e0b3152 (original) (raw)
``
1
`+
from future import annotations
`
``
2
+
``
3
`+
import string
`
``
4
`+
import sys
`
``
5
`+
from typing import Dict
`
``
6
`+
from urllib.error import HTTPError
`
``
7
`+
from urllib.parse import quote as urlquote
`
``
8
`+
from urllib.parse import urljoin, urlsplit
`
``
9
`+
from urllib.request import HTTPRedirectHandler, Request, urlopen
`
``
10
`+
from urllib.response import addinfourl
`
``
11
+
``
12
+
``
13
`+
def _make_redirect_request(request: Request, http_error: HTTPError) -> Request:
`
``
14
`+
"""
`
``
15
`+
Create a new request object for a redirected request.
`
``
16
+
``
17
`` +
The logic is based on urllib.request.HTTPRedirectHandler
from this commit <https://github.com/python/cpython/blob/b58bc8c2a9a316891a5ea1a0487aebfc86c2793a/Lib/urllib/request.py#L641-L751>_
.
``
``
18
+
``
19
`+
:param request: The original request that resulted in the redirect.
`
``
20
`+
:param http_error: The response to the original request that indicates a
`
``
21
`+
redirect should occur and contains the new location.
`
``
22
`+
:return: A new request object to the location indicated by the response.
`
``
23
:raises HTTPError: the supplied ``http_error`` if the redirect request
``
24
`+
cannot be created.
`
``
25
`` +
:raises ValueError: If the response code is None
.
``
``
26
:raises ValueError: If the response does not contain a ``Location`` header
``
27
or the ``Location`` header is not a string.
``
28
:raises HTTPError: If the scheme of the new location is not ``http``,
``
29
``https``, or ``ftp``.
``
30
`+
:raises HTTPError: If there are too many redirects or a redirect loop.
`
``
31
`+
"""
`
``
32
`+
new_url = http_error.headers.get("Location")
`
``
33
`+
if new_url is None:
`
``
34
`+
raise http_error
`
``
35
`+
if not isinstance(new_url, str):
`
``
36
`+
raise ValueError(f"Location header {new_url!r} is not a string")
`
``
37
+
``
38
`+
new_url_parts = urlsplit(new_url)
`
``
39
+
``
40
`+
For security reasons don't allow redirection to anything other than http,
`
``
41
`+
https or ftp.
`
``
42
`+
if new_url_parts.scheme not in ("http", "https", "ftp", ""):
`
``
43
`+
raise HTTPError(
`
``
44
`+
new_url,
`
``
45
`+
http_error.code,
`
``
46
`+
f"{http_error.reason} - Redirection to url {new_url!r} is not allowed",
`
``
47
`+
http_error.headers,
`
``
48
`+
http_error.fp,
`
``
49
`+
)
`
``
50
+
``
51
`+
http.client.parse_headers() decodes as ISO-8859-1. Recover the original
`
``
52
`+
bytes and percent-encode non-ASCII bytes, and any special characters such
`
``
53
`+
as the space.
`
``
54
`+
new_url = urlquote(new_url, encoding="iso-8859-1", safe=string.punctuation)
`
``
55
`+
new_url = urljoin(request.full_url, new_url)
`
``
56
+
``
57
`+
XXX Probably want to forget about the state of the current
`
``
58
`+
request, although that might interact poorly with other
`
``
59
`+
handlers that also use handler-specific request attributes
`
``
60
`+
content_headers = ("content-length", "content-type")
`
``
61
`+
newheaders = {
`
``
62
`+
k: v for k, v in request.headers.items() if k.lower() not in content_headers
`
``
63
`+
}
`
``
64
`+
new_request = Request(
`
``
65
`+
new_url,
`
``
66
`+
headers=newheaders,
`
``
67
`+
origin_req_host=request.origin_req_host,
`
``
68
`+
unverifiable=True,
`
``
69
`+
)
`
``
70
+
``
71
`+
visited: Dict[str, int]
`
``
72
`+
if hasattr(request, "redirect_dict"):
`
``
73
`+
visited = request.redirect_dict
`
``
74
`+
if (
`
``
75
`+
visited.get(new_url, 0) >= HTTPRedirectHandler.max_repeats
`
``
76
`+
or len(visited) >= HTTPRedirectHandler.max_redirections
`
``
77
`+
):
`
``
78
`+
raise HTTPError(
`
``
79
`+
request.full_url,
`
``
80
`+
http_error.code,
`
``
81
`+
HTTPRedirectHandler.inf_msg + http_error.reason,
`
``
82
`+
http_error.headers,
`
``
83
`+
http_error.fp,
`
``
84
`+
)
`
``
85
`+
else:
`
``
86
`+
visited = {}
`
``
87
`+
setattr(request, "redirect_dict", visited)
`
``
88
+
``
89
`+
setattr(new_request, "redirect_dict", visited)
`
``
90
`+
visited[new_url] = visited.get(new_url, 0) + 1
`
``
91
`+
return new_request
`
``
92
+
``
93
+
``
94
`+
def _urlopen(request: Request) -> addinfourl:
`
``
95
`+
"""
`
``
96
`` +
This is a shim for urlopen
that handles HTTP redirects with status code
``
``
97
`+
308 (Permanent Redirect).
`
``
98
+
``
99
`+
This function should be removed once all supported versions of Python
`
``
100
`+
handles the 308 HTTP status code.
`
``
101
+
``
102
`+
:param request: The request to open.
`
``
103
`+
:return: The response to the request.
`
``
104
`+
"""
`
``
105
`+
try:
`
``
106
`+
return urlopen(request)
`
``
107
`+
except HTTPError as error:
`
``
108
`+
if error.code == 308 and sys.version_info < (3, 11):
`
``
109
`+
HTTP response code 308 (Permanent Redirect) is not supported by python
`
``
110
`+
versions older than 3.11. See https://bugs.python.org/issue40321 and
`
``
111
`+
https://github.com/python/cpython/issues/84501 for more details.
`
``
112
`+
This custom error handling should be removed once all supported
`
``
113
`+
versions of Python handles 308.
`
``
114
`+
new_request = _make_redirect_request(request, error)
`
``
115
`+
return _urlopen(new_request)
`
``
116
`+
else:
`
``
117
`+
raise
`