bpo-30458: Disallow control chars in http URLs. (GH-12755) (GH-13155) · python/cpython@c50d437 (original) (raw)

`@@ -329,6 +329,59 @@ def test_willclose(self):

329

`finally:

330

`self.unfakehttp()

331

332

@unittest.skipUnless(ssl, "ssl module required")

333

def test_url_with_control_char_rejected(self):

334

for char_no in list(range(0, 0x21)) + [0x7f]:

335

char = chr(char_no)

336

schemeless_url = f"//localhost:7777/test{char}/"

337

self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")

338

try:

339

We explicitly test urllib.request.urlopen() instead of the top

340

level 'def urlopen()' function defined in this... (quite ugly)

341

test suite. They use different url opening codepaths. Plain

342

urlopen uses FancyURLOpener which goes via a codepath that

343

calls urllib.parse.quote() on the URL which makes all of the

344

above attempts at injection within the url path safe.

345

escaped_char_repr = repr(char).replace('\', r'\')

346

InvalidURL = http.client.InvalidURL

347

with self.assertRaisesRegex(

348

InvalidURL, f"contain control.*{escaped_char_repr}"):

349

urllib.request.urlopen(f"http:{schemeless_url}")

350

with self.assertRaisesRegex(

351

InvalidURL, f"contain control.*{escaped_char_repr}"):

352

urllib.request.urlopen(f"https:{schemeless_url}")

353

This code path quotes the URL so there is no injection.

354

resp = urlopen(f"http:{schemeless_url}")

355

self.assertNotIn(char, resp.geturl())

356

finally:

357

self.unfakehttp()

358

+

359

@unittest.skipUnless(ssl, "ssl module required")

360

def test_url_with_newline_header_injection_rejected(self):

361

self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")

362

host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"

363

schemeless_url = "//" + host + ":8080/test/?test=a"

364

try:

365

We explicitly test urllib.request.urlopen() instead of the top

366

level 'def urlopen()' function defined in this... (quite ugly)

367

test suite. They use different url opening codepaths. Plain

368

urlopen uses FancyURLOpener which goes via a codepath that

369

calls urllib.parse.quote() on the URL which makes all of the

370

above attempts at injection within the url path safe.

371

InvalidURL = http.client.InvalidURL

372

with self.assertRaisesRegex(

373

InvalidURL, r"contain control.\r.(found at least . .)"):

374

urllib.request.urlopen(f"http:{schemeless_url}")

375

with self.assertRaisesRegex(InvalidURL, r"contain control.*\n"):

376

urllib.request.urlopen(f"https:{schemeless_url}")

377

This code path quotes the URL so there is no injection.

378

resp = urlopen(f"http:{schemeless_url}")

379

self.assertNotIn(' ', resp.geturl())

380

self.assertNotIn('\r', resp.geturl())

381

self.assertNotIn('\n', resp.geturl())

382

finally:

383

self.unfakehttp()

384

+

332

385

`def test_read_0_9(self):

333

386

`# "0.9" response accepted (but not "simple responses" without

334

387

`# a status line)