bpo-30458: Disallow control chars in http URLs. (GH-12755) (#13207) · python/cpython@afe3a49 (original) (raw)
`@@ -330,6 +330,61 @@ def test_willclose(self):
`
330
330
`finally:
`
331
331
`self.unfakehttp()
`
332
332
``
``
333
`+
@unittest.skipUnless(ssl, "ssl module required")
`
``
334
`+
def test_url_with_control_char_rejected(self):
`
``
335
`+
for char_no in list(range(0, 0x21)) + [0x7f]:
`
``
336
`+
char = chr(char_no)
`
``
337
`+
schemeless_url = "//localhost:7777/test{}/".format(char)
`
``
338
`+
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
`
``
339
`+
try:
`
``
340
`+
We explicitly test urllib.request.urlopen() instead of the top
`
``
341
`+
level 'def urlopen()' function defined in this... (quite ugly)
`
``
342
`+
test suite. They use different url opening codepaths. Plain
`
``
343
`+
urlopen uses FancyURLOpener which goes via a codepath that
`
``
344
`+
calls urllib.parse.quote() on the URL which makes all of the
`
``
345
`+
above attempts at injection within the url path safe.
`
``
346
`+
escaped_char_repr = repr(char).replace('\', r'\')
`
``
347
`+
InvalidURL = http.client.InvalidURL
`
``
348
`+
with self.assertRaisesRegex(
`
``
349
`+
InvalidURL,
`
``
350
`+
"contain control.*{}".format(escaped_char_repr)):
`
``
351
`+
urllib.request.urlopen("http:{}".format(schemeless_url))
`
``
352
`+
with self.assertRaisesRegex(
`
``
353
`+
InvalidURL,
`
``
354
`+
"contain control.*{}".format(escaped_char_repr)):
`
``
355
`+
urllib.request.urlopen("https:{}".format(schemeless_url))
`
``
356
`+
This code path quotes the URL so there is no injection.
`
``
357
`+
resp = urlopen("http:{}".format(schemeless_url))
`
``
358
`+
self.assertNotIn(char, resp.geturl())
`
``
359
`+
finally:
`
``
360
`+
self.unfakehttp()
`
``
361
+
``
362
`+
@unittest.skipUnless(ssl, "ssl module required")
`
``
363
`+
def test_url_with_newline_header_injection_rejected(self):
`
``
364
`+
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
`
``
365
`+
host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
`
``
366
`+
schemeless_url = "//" + host + ":8080/test/?test=a"
`
``
367
`+
try:
`
``
368
`+
We explicitly test urllib.request.urlopen() instead of the top
`
``
369
`+
level 'def urlopen()' function defined in this... (quite ugly)
`
``
370
`+
test suite. They use different url opening codepaths. Plain
`
``
371
`+
urlopen uses FancyURLOpener which goes via a codepath that
`
``
372
`+
calls urllib.parse.quote() on the URL which makes all of the
`
``
373
`+
above attempts at injection within the url path safe.
`
``
374
`+
InvalidURL = http.client.InvalidURL
`
``
375
`+
with self.assertRaisesRegex(
`
``
376
`+
InvalidURL, r"contain control.\r.(found at least . .)"):
`
``
377
`+
urllib.request.urlopen("http:{}".format(schemeless_url))
`
``
378
`+
with self.assertRaisesRegex(InvalidURL, r"contain control.*\n"):
`
``
379
`+
urllib.request.urlopen("https:{}".format(schemeless_url))
`
``
380
`+
This code path quotes the URL so there is no injection.
`
``
381
`+
resp = urlopen("http:{}".format(schemeless_url))
`
``
382
`+
self.assertNotIn(' ', resp.geturl())
`
``
383
`+
self.assertNotIn('\r', resp.geturl())
`
``
384
`+
self.assertNotIn('\n', resp.geturl())
`
``
385
`+
finally:
`
``
386
`+
self.unfakehttp()
`
``
387
+
333
388
`def test_read_0_9(self):
`
334
389
`# "0.9" response accepted (but not "simple responses" without
`
335
390
`# a status line)
`