bpo-30458: Disallow control chars in http URLs. (GH-12755) (GH-13155) · python/cpython@c50d437 (original) (raw)
`@@ -329,6 +329,59 @@ def test_willclose(self):
`
329
329
`finally:
`
330
330
`self.unfakehttp()
`
331
331
``
``
332
`+
@unittest.skipUnless(ssl, "ssl module required")
`
``
333
`+
def test_url_with_control_char_rejected(self):
`
``
334
`+
for char_no in list(range(0, 0x21)) + [0x7f]:
`
``
335
`+
char = chr(char_no)
`
``
336
`+
schemeless_url = f"//localhost:7777/test{char}/"
`
``
337
`+
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
`
``
338
`+
try:
`
``
339
`+
We explicitly test urllib.request.urlopen() instead of the top
`
``
340
`+
level 'def urlopen()' function defined in this... (quite ugly)
`
``
341
`+
test suite. They use different url opening codepaths. Plain
`
``
342
`+
urlopen uses FancyURLOpener which goes via a codepath that
`
``
343
`+
calls urllib.parse.quote() on the URL which makes all of the
`
``
344
`+
above attempts at injection within the url path safe.
`
``
345
`+
escaped_char_repr = repr(char).replace('\', r'\')
`
``
346
`+
InvalidURL = http.client.InvalidURL
`
``
347
`+
with self.assertRaisesRegex(
`
``
348
`+
InvalidURL, f"contain control.*{escaped_char_repr}"):
`
``
349
`+
urllib.request.urlopen(f"http:{schemeless_url}")
`
``
350
`+
with self.assertRaisesRegex(
`
``
351
`+
InvalidURL, f"contain control.*{escaped_char_repr}"):
`
``
352
`+
urllib.request.urlopen(f"https:{schemeless_url}")
`
``
353
`+
This code path quotes the URL so there is no injection.
`
``
354
`+
resp = urlopen(f"http:{schemeless_url}")
`
``
355
`+
self.assertNotIn(char, resp.geturl())
`
``
356
`+
finally:
`
``
357
`+
self.unfakehttp()
`
``
358
+
``
359
`+
@unittest.skipUnless(ssl, "ssl module required")
`
``
360
`+
def test_url_with_newline_header_injection_rejected(self):
`
``
361
`+
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
`
``
362
`+
host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
`
``
363
`+
schemeless_url = "//" + host + ":8080/test/?test=a"
`
``
364
`+
try:
`
``
365
`+
We explicitly test urllib.request.urlopen() instead of the top
`
``
366
`+
level 'def urlopen()' function defined in this... (quite ugly)
`
``
367
`+
test suite. They use different url opening codepaths. Plain
`
``
368
`+
urlopen uses FancyURLOpener which goes via a codepath that
`
``
369
`+
calls urllib.parse.quote() on the URL which makes all of the
`
``
370
`+
above attempts at injection within the url path safe.
`
``
371
`+
InvalidURL = http.client.InvalidURL
`
``
372
`+
with self.assertRaisesRegex(
`
``
373
`+
InvalidURL, r"contain control.\r.(found at least . .)"):
`
``
374
`+
urllib.request.urlopen(f"http:{schemeless_url}")
`
``
375
`+
with self.assertRaisesRegex(InvalidURL, r"contain control.*\n"):
`
``
376
`+
urllib.request.urlopen(f"https:{schemeless_url}")
`
``
377
`+
This code path quotes the URL so there is no injection.
`
``
378
`+
resp = urlopen(f"http:{schemeless_url}")
`
``
379
`+
self.assertNotIn(' ', resp.geturl())
`
``
380
`+
self.assertNotIn('\r', resp.geturl())
`
``
381
`+
self.assertNotIn('\n', resp.geturl())
`
``
382
`+
finally:
`
``
383
`+
self.unfakehttp()
`
``
384
+
332
385
`def test_read_0_9(self):
`
333
386
`# "0.9" response accepted (but not "simple responses" without
`
334
387
`# a status line)
`