fix: HTTP 308 Permanent Redirect status code handling (#2389) · RDFLib/rdflib@e0b3152 (original) (raw)

``

1

`+

from future import annotations

`

``

2

+

``

3

`+

import string

`

``

4

`+

import sys

`

``

5

`+

from typing import Dict

`

``

6

`+

from urllib.error import HTTPError

`

``

7

`+

from urllib.parse import quote as urlquote

`

``

8

`+

from urllib.parse import urljoin, urlsplit

`

``

9

`+

from urllib.request import HTTPRedirectHandler, Request, urlopen

`

``

10

`+

from urllib.response import addinfourl

`

``

11

+

``

12

+

``

13

`+

def _make_redirect_request(request: Request, http_error: HTTPError) -> Request:

`

``

14

`+

"""

`

``

15

`+

Create a new request object for a redirected request.

`

``

16

+

``

17

`` +

The logic is based on urllib.request.HTTPRedirectHandler from this commit <https://github.com/python/cpython/blob/b58bc8c2a9a316891a5ea1a0487aebfc86c2793a/Lib/urllib/request.py#L641-L751>_.

``

``

18

+

``

19

`+

:param request: The original request that resulted in the redirect.

`

``

20

`+

:param http_error: The response to the original request that indicates a

`

``

21

`+

redirect should occur and contains the new location.

`

``

22

`+

:return: A new request object to the location indicated by the response.

`

``

23


 :raises HTTPError: the supplied ``http_error`` if the redirect request

``

24

`+

cannot be created.

`

``

25

`` +

:raises ValueError: If the response code is None.

``

``

26


 :raises ValueError: If the response does not contain a ``Location`` header

``

27


 or the ``Location`` header is not a string.

``

28


 :raises HTTPError: If the scheme of the new location is not ``http``,

``

29


 ``https``, or ``ftp``.

``

30

`+

:raises HTTPError: If there are too many redirects or a redirect loop.

`

``

31

`+

"""

`

``

32

`+

new_url = http_error.headers.get("Location")

`

``

33

`+

if new_url is None:

`

``

34

`+

raise http_error

`

``

35

`+

if not isinstance(new_url, str):

`

``

36

`+

raise ValueError(f"Location header {new_url!r} is not a string")

`

``

37

+

``

38

`+

new_url_parts = urlsplit(new_url)

`

``

39

+

``

40

`+

For security reasons don't allow redirection to anything other than http,

`

``

41

`+

https or ftp.

`

``

42

`+

if new_url_parts.scheme not in ("http", "https", "ftp", ""):

`

``

43

`+

raise HTTPError(

`

``

44

`+

new_url,

`

``

45

`+

http_error.code,

`

``

46

`+

f"{http_error.reason} - Redirection to url {new_url!r} is not allowed",

`

``

47

`+

http_error.headers,

`

``

48

`+

http_error.fp,

`

``

49

`+

)

`

``

50

+

``

51

`+

http.client.parse_headers() decodes as ISO-8859-1. Recover the original

`

``

52

`+

bytes and percent-encode non-ASCII bytes, and any special characters such

`

``

53

`+

as the space.

`

``

54

`+

new_url = urlquote(new_url, encoding="iso-8859-1", safe=string.punctuation)

`

``

55

`+

new_url = urljoin(request.full_url, new_url)

`

``

56

+

``

57

`+

XXX Probably want to forget about the state of the current

`

``

58

`+

request, although that might interact poorly with other

`

``

59

`+

handlers that also use handler-specific request attributes

`

``

60

`+

content_headers = ("content-length", "content-type")

`

``

61

`+

newheaders = {

`

``

62

`+

k: v for k, v in request.headers.items() if k.lower() not in content_headers

`

``

63

`+

}

`

``

64

`+

new_request = Request(

`

``

65

`+

new_url,

`

``

66

`+

headers=newheaders,

`

``

67

`+

origin_req_host=request.origin_req_host,

`

``

68

`+

unverifiable=True,

`

``

69

`+

)

`

``

70

+

``

71

`+

visited: Dict[str, int]

`

``

72

`+

if hasattr(request, "redirect_dict"):

`

``

73

`+

visited = request.redirect_dict

`

``

74

`+

if (

`

``

75

`+

visited.get(new_url, 0) >= HTTPRedirectHandler.max_repeats

`

``

76

`+

or len(visited) >= HTTPRedirectHandler.max_redirections

`

``

77

`+

):

`

``

78

`+

raise HTTPError(

`

``

79

`+

request.full_url,

`

``

80

`+

http_error.code,

`

``

81

`+

HTTPRedirectHandler.inf_msg + http_error.reason,

`

``

82

`+

http_error.headers,

`

``

83

`+

http_error.fp,

`

``

84

`+

)

`

``

85

`+

else:

`

``

86

`+

visited = {}

`

``

87

`+

setattr(request, "redirect_dict", visited)

`

``

88

+

``

89

`+

setattr(new_request, "redirect_dict", visited)

`

``

90

`+

visited[new_url] = visited.get(new_url, 0) + 1

`

``

91

`+

return new_request

`

``

92

+

``

93

+

``

94

`+

def _urlopen(request: Request) -> addinfourl:

`

``

95

`+

"""

`

``

96

`` +

This is a shim for urlopen that handles HTTP redirects with status code

``

``

97

`+

308 (Permanent Redirect).

`

``

98

+

``

99

`+

This function should be removed once all supported versions of Python

`

``

100

`+

handles the 308 HTTP status code.

`

``

101

+

``

102

`+

:param request: The request to open.

`

``

103

`+

:return: The response to the request.

`

``

104

`+

"""

`

``

105

`+

try:

`

``

106

`+

return urlopen(request)

`

``

107

`+

except HTTPError as error:

`

``

108

`+

if error.code == 308 and sys.version_info < (3, 11):

`

``

109

`+

HTTP response code 308 (Permanent Redirect) is not supported by python

`

``

110

`+

versions older than 3.11. See https://bugs.python.org/issue40321 and

`

``

111

`+

https://github.com/python/cpython/issues/84501 for more details.

`

``

112

`+

This custom error handling should be removed once all supported

`

``

113

`+

versions of Python handles 308.

`

``

114

`+

new_request = _make_redirect_request(request, error)

`

``

115

`+

return _urlopen(new_request)

`

``

116

`+

else:

`

``

117

`+

raise

`