bpo-36742: Fixes handling of pre-normalization characters in urlsplit… · python/cpython@d537ab0 (original) (raw)
File tree
3 files changed
lines changed
- Misc/NEWS.d/next/Security
3 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1011,6 +1011,12 @@ def test_urlsplit_normalization(self): | ||
1011 | 1011 | self.assertIn('\u2100', denorm_chars) |
1012 | 1012 | self.assertIn('\uFF03', denorm_chars) |
1013 | 1013 | |
1014 | +# bpo-36742: Verify port separators are ignored when they | |
1015 | +# existed prior to decomposition | |
1016 | +urllib.parse.urlsplit('http://\u30d5\u309a:80') | |
1017 | +with self.assertRaises(ValueError): | |
1018 | +urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380') | |
1019 | + | |
1014 | 1020 | for scheme in ["http", "https", "ftp"]: |
1015 | 1021 | for c in denorm_chars: |
1016 | 1022 | url = "{}://netloc{}false.netloc/path".format(scheme, c) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -402,13 +402,16 @@ def _checknetloc(netloc): | ||
402 | 402 | # looking for characters like \u2100 that expand to 'a/c' |
403 | 403 | # IDNA uses NFKC equivalence, so normalize for this check |
404 | 404 | import unicodedata |
405 | -netloc2 = unicodedata.normalize('NFKC', netloc) | |
406 | -if netloc == netloc2: | |
405 | +n = netloc.rpartition('@')[2] # ignore anything to the left of '@' | |
406 | +n = n.replace(':', '') # ignore characters already included | |
407 | +n = n.replace('#', '') # but not the surrounding text | |
408 | +n = n.replace('?', '') | |
409 | +netloc2 = unicodedata.normalize('NFKC', n) | |
410 | +if n == netloc2: | |
407 | 411 | return |
408 | -_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay | |
409 | 412 | for c in '/?#@:': |
410 | 413 | if c in netloc2: |
411 | -raise ValueError("netloc '" + netloc2 + "' contains invalid " + | |
414 | +raise ValueError("netloc '" + netloc + "' contains invalid " + | |
412 | 415 | "characters under NFKC normalization") |
413 | 416 | |
414 | 417 | def urlsplit(url, scheme='', allow_fragments=True): |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1 | +Fixes mishandling of pre-normalization characters in urlsplit(). |