bpo-36216: Add check for characters in netloc that normalize to separ… · python/cpython@c0d9511 (original) (raw)
`@@ -327,6 +327,21 @@ def _splitnetloc(url, start=0):
`
327
327
`delim = min(delim, wdelim) # use earliest delim position
`
328
328
`return url[start:delim], url[delim:] # return (domain, rest)
`
329
329
``
``
330
`+
def _checknetloc(netloc):
`
``
331
`+
if not netloc or not any(ord(c) > 127 for c in netloc):
`
``
332
`+
return
`
``
333
`+
looking for characters like \u2100 that expand to 'a/c'
`
``
334
`+
IDNA uses NFKC equivalence, so normalize for this check
`
``
335
`+
import unicodedata
`
``
336
`+
netloc2 = unicodedata.normalize('NFKC', netloc)
`
``
337
`+
if netloc == netloc2:
`
``
338
`+
return
`
``
339
`+
_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay
`
``
340
`+
for c in '/?#@:':
`
``
341
`+
if c in netloc2:
`
``
342
`+
raise ValueError("netloc '" + netloc2 + "' contains invalid " +
`
``
343
`+
"characters under NFKC normalization")
`
``
344
+
330
345
`def urlsplit(url, scheme='', allow_fragments=True):
`
331
346
`"""Parse a URL into 5 components:
`
332
347
` :///
`
`@@ -356,6 +371,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
`
356
371
`url, fragment = url.split('#', 1)
`
357
372
`if '?' in url:
`
358
373
`url, query = url.split('?', 1)
`
``
374
`+
_checknetloc(netloc)
`
359
375
`v = SplitResult(scheme, netloc, url, query, fragment)
`
360
376
`_parse_cache[key] = v
`
361
377
`return _coerce_result(v)
`
`@@ -379,6 +395,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
`
379
395
`url, fragment = url.split('#', 1)
`
380
396
`if '?' in url:
`
381
397
`url, query = url.split('?', 1)
`
``
398
`+
_checknetloc(netloc)
`
382
399
`v = SplitResult(scheme, netloc, url, query, fragment)
`
383
400
`_parse_cache[key] = v
`
384
401
`return _coerce_result(v)
`