bpo-36216: Add check for characters in netloc that normalize to separ… · python/cpython@c0d9511 (original) (raw)

`@@ -327,6 +327,21 @@ def _splitnetloc(url, start=0):

`

327

327

`delim = min(delim, wdelim) # use earliest delim position

`

328

328

`return url[start:delim], url[delim:] # return (domain, rest)

`

329

329

``

``

330

`+

def _checknetloc(netloc):

`

``

331

`+

if not netloc or not any(ord(c) > 127 for c in netloc):

`

``

332

`+

return

`

``

333

`+

looking for characters like \u2100 that expand to 'a/c'

`

``

334

`+

IDNA uses NFKC equivalence, so normalize for this check

`

``

335

`+

import unicodedata

`

``

336

`+

netloc2 = unicodedata.normalize('NFKC', netloc)

`

``

337

`+

if netloc == netloc2:

`

``

338

`+

return

`

``

339

`+

_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay

`

``

340

`+

for c in '/?#@:':

`

``

341

`+

if c in netloc2:

`

``

342

`+

raise ValueError("netloc '" + netloc2 + "' contains invalid " +

`

``

343

`+

"characters under NFKC normalization")

`

``

344

+

330

345

`def urlsplit(url, scheme='', allow_fragments=True):

`

331

346

`"""Parse a URL into 5 components:

`

332

347

` :///?#

`

`@@ -356,6 +371,7 @@ def urlsplit(url, scheme='', allow_fragments=True):

`

356

371

`url, fragment = url.split('#', 1)

`

357

372

`if '?' in url:

`

358

373

`url, query = url.split('?', 1)

`

``

374

`+

_checknetloc(netloc)

`

359

375

`v = SplitResult(scheme, netloc, url, query, fragment)

`

360

376

`_parse_cache[key] = v

`

361

377

`return _coerce_result(v)

`

`@@ -379,6 +395,7 @@ def urlsplit(url, scheme='', allow_fragments=True):

`

379

395

`url, fragment = url.split('#', 1)

`

380

396

`if '?' in url:

`

381

397

`url, query = url.split('?', 1)

`

``

398

`+

_checknetloc(netloc)

`

382

399

`v = SplitResult(scheme, netloc, url, query, fragment)

`

383

400

`_parse_cache[key] = v

`

384

401

`return _coerce_result(v)

`