bpo-30458: Disallow control chars in http URLs. (GH-12755) (GH-13155) · python/cpython@c50d437 (original) (raw)

`@@ -329,6 +329,59 @@ def test_willclose(self):

`

329

329

`finally:

`

330

330

`self.unfakehttp()

`

331

331

``

``

332

`+

@unittest.skipUnless(ssl, "ssl module required")

`

``

333

`+

def test_url_with_control_char_rejected(self):

`

``

334

`+

for char_no in list(range(0, 0x21)) + [0x7f]:

`

``

335

`+

char = chr(char_no)

`

``

336

`+

schemeless_url = f"//localhost:7777/test{char}/"

`

``

337

`+

self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")

`

``

338

`+

try:

`

``

339

`+

We explicitly test urllib.request.urlopen() instead of the top

`

``

340

`+

level 'def urlopen()' function defined in this... (quite ugly)

`

``

341

`+

test suite. They use different url opening codepaths. Plain

`

``

342

`+

urlopen uses FancyURLOpener which goes via a codepath that

`

``

343

`+

calls urllib.parse.quote() on the URL which makes all of the

`

``

344

`+

above attempts at injection within the url path safe.

`

``

345

`+

escaped_char_repr = repr(char).replace('\', r'\')

`

``

346

`+

InvalidURL = http.client.InvalidURL

`

``

347

`+

with self.assertRaisesRegex(

`

``

348

`+

InvalidURL, f"contain control.*{escaped_char_repr}"):

`

``

349

`+

urllib.request.urlopen(f"http:{schemeless_url}")

`

``

350

`+

with self.assertRaisesRegex(

`

``

351

`+

InvalidURL, f"contain control.*{escaped_char_repr}"):

`

``

352

`+

urllib.request.urlopen(f"https:{schemeless_url}")

`

``

353

`+

This code path quotes the URL so there is no injection.

`

``

354

`+

resp = urlopen(f"http:{schemeless_url}")

`

``

355

`+

self.assertNotIn(char, resp.geturl())

`

``

356

`+

finally:

`

``

357

`+

self.unfakehttp()

`

``

358

+

``

359

`+

@unittest.skipUnless(ssl, "ssl module required")

`

``

360

`+

def test_url_with_newline_header_injection_rejected(self):

`

``

361

`+

self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")

`

``

362

`+

host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"

`

``

363

`+

schemeless_url = "//" + host + ":8080/test/?test=a"

`

``

364

`+

try:

`

``

365

`+

We explicitly test urllib.request.urlopen() instead of the top

`

``

366

`+

level 'def urlopen()' function defined in this... (quite ugly)

`

``

367

`+

test suite. They use different url opening codepaths. Plain

`

``

368

`+

urlopen uses FancyURLOpener which goes via a codepath that

`

``

369

`+

calls urllib.parse.quote() on the URL which makes all of the

`

``

370

`+

above attempts at injection within the url path safe.

`

``

371

`+

InvalidURL = http.client.InvalidURL

`

``

372

`+

with self.assertRaisesRegex(

`

``

373

`+

InvalidURL, r"contain control.\r.(found at least . .)"):

`

``

374

`+

urllib.request.urlopen(f"http:{schemeless_url}")

`

``

375

`+

with self.assertRaisesRegex(InvalidURL, r"contain control.*\n"):

`

``

376

`+

urllib.request.urlopen(f"https:{schemeless_url}")

`

``

377

`+

This code path quotes the URL so there is no injection.

`

``

378

`+

resp = urlopen(f"http:{schemeless_url}")

`

``

379

`+

self.assertNotIn(' ', resp.geturl())

`

``

380

`+

self.assertNotIn('\r', resp.geturl())

`

``

381

`+

self.assertNotIn('\n', resp.geturl())

`

``

382

`+

finally:

`

``

383

`+

self.unfakehttp()

`

``

384

+

332

385

`def test_read_0_9(self):

`

333

386

`# "0.9" response accepted (but not "simple responses" without

`

334

387

`# a status line)

`