bpo-30458: Disallow control chars in http URLs. (GH-12755) (#13207) · python/cpython@afe3a49 (original) (raw)

`@@ -330,6 +330,61 @@ def test_willclose(self):

`

330

330

`finally:

`

331

331

`self.unfakehttp()

`

332

332

``

``

333

`+

@unittest.skipUnless(ssl, "ssl module required")

`

``

334

`+

def test_url_with_control_char_rejected(self):

`

``

335

`+

for char_no in list(range(0, 0x21)) + [0x7f]:

`

``

336

`+

char = chr(char_no)

`

``

337

`+

schemeless_url = "//localhost:7777/test{}/".format(char)

`

``

338

`+

self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")

`

``

339

`+

try:

`

``

340

`+

We explicitly test urllib.request.urlopen() instead of the top

`

``

341

`+

level 'def urlopen()' function defined in this... (quite ugly)

`

``

342

`+

test suite. They use different url opening codepaths. Plain

`

``

343

`+

urlopen uses FancyURLOpener which goes via a codepath that

`

``

344

`+

calls urllib.parse.quote() on the URL which makes all of the

`

``

345

`+

above attempts at injection within the url path safe.

`

``

346

`+

escaped_char_repr = repr(char).replace('\', r'\')

`

``

347

`+

InvalidURL = http.client.InvalidURL

`

``

348

`+

with self.assertRaisesRegex(

`

``

349

`+

InvalidURL,

`

``

350

`+

"contain control.*{}".format(escaped_char_repr)):

`

``

351

`+

urllib.request.urlopen("http:{}".format(schemeless_url))

`

``

352

`+

with self.assertRaisesRegex(

`

``

353

`+

InvalidURL,

`

``

354

`+

"contain control.*{}".format(escaped_char_repr)):

`

``

355

`+

urllib.request.urlopen("https:{}".format(schemeless_url))

`

``

356

`+

This code path quotes the URL so there is no injection.

`

``

357

`+

resp = urlopen("http:{}".format(schemeless_url))

`

``

358

`+

self.assertNotIn(char, resp.geturl())

`

``

359

`+

finally:

`

``

360

`+

self.unfakehttp()

`

``

361

+

``

362

`+

@unittest.skipUnless(ssl, "ssl module required")

`

``

363

`+

def test_url_with_newline_header_injection_rejected(self):

`

``

364

`+

self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")

`

``

365

`+

host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"

`

``

366

`+

schemeless_url = "//" + host + ":8080/test/?test=a"

`

``

367

`+

try:

`

``

368

`+

We explicitly test urllib.request.urlopen() instead of the top

`

``

369

`+

level 'def urlopen()' function defined in this... (quite ugly)

`

``

370

`+

test suite. They use different url opening codepaths. Plain

`

``

371

`+

urlopen uses FancyURLOpener which goes via a codepath that

`

``

372

`+

calls urllib.parse.quote() on the URL which makes all of the

`

``

373

`+

above attempts at injection within the url path safe.

`

``

374

`+

InvalidURL = http.client.InvalidURL

`

``

375

`+

with self.assertRaisesRegex(

`

``

376

`+

InvalidURL, r"contain control.\r.(found at least . .)"):

`

``

377

`+

urllib.request.urlopen("http:{}".format(schemeless_url))

`

``

378

`+

with self.assertRaisesRegex(InvalidURL, r"contain control.*\n"):

`

``

379

`+

urllib.request.urlopen("https:{}".format(schemeless_url))

`

``

380

`+

This code path quotes the URL so there is no injection.

`

``

381

`+

resp = urlopen("http:{}".format(schemeless_url))

`

``

382

`+

self.assertNotIn(' ', resp.geturl())

`

``

383

`+

self.assertNotIn('\r', resp.geturl())

`

``

384

`+

self.assertNotIn('\n', resp.geturl())

`

``

385

`+

finally:

`

``

386

`+

self.unfakehttp()

`

``

387

+

333

388

`def test_read_0_9(self):

`

334

389

`# "0.9" response accepted (but not "simple responses" without

`

335

390

`# a status line)

`