[3.8] bpo-43882 - urllib.parse should sanitize urls containing ASCII … · python/cpython@515a7bc (original) (raw)
`@@ -612,6 +612,54 @@ def test_urlsplit_attributes(self):
`
612
612
`with self.assertRaisesRegex(ValueError, "out of range"):
`
613
613
`p.port
`
614
614
``
``
615
`+
def test_urlsplit_remove_unsafe_bytes(self):
`
``
616
`+
Remove ASCII tabs and newlines from input, for http common case scenario.
`
``
617
`+
url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
`
``
618
`+
p = urllib.parse.urlsplit(url)
`
``
619
`+
self.assertEqual(p.scheme, "http")
`
``
620
`+
self.assertEqual(p.netloc, "www.python.org")
`
``
621
`+
self.assertEqual(p.path, "/javascript:alert('msg')/")
`
``
622
`+
self.assertEqual(p.query, "query=something")
`
``
623
`+
self.assertEqual(p.fragment, "fragment")
`
``
624
`+
self.assertEqual(p.username, None)
`
``
625
`+
self.assertEqual(p.password, None)
`
``
626
`+
self.assertEqual(p.hostname, "www.python.org")
`
``
627
`+
self.assertEqual(p.port, None)
`
``
628
`+
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
`
``
629
+
``
630
`+
Remove ASCII tabs and newlines from input as bytes, for http common case scenario.
`
``
631
`+
url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
`
``
632
`+
p = urllib.parse.urlsplit(url)
`
``
633
`+
self.assertEqual(p.scheme, b"http")
`
``
634
`+
self.assertEqual(p.netloc, b"www.python.org")
`
``
635
`+
self.assertEqual(p.path, b"/javascript:alert('msg')/")
`
``
636
`+
self.assertEqual(p.query, b"query=something")
`
``
637
`+
self.assertEqual(p.fragment, b"fragment")
`
``
638
`+
self.assertEqual(p.username, None)
`
``
639
`+
self.assertEqual(p.password, None)
`
``
640
`+
self.assertEqual(p.hostname, b"www.python.org")
`
``
641
`+
self.assertEqual(p.port, None)
`
``
642
`+
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
`
``
643
+
``
644
`+
any scheme
`
``
645
`+
url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
`
``
646
`+
p = urllib.parse.urlsplit(url)
`
``
647
`+
self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
`
``
648
+
``
649
`+
Remove ASCII tabs and newlines from input as bytes, any scheme.
`
``
650
`+
url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
`
``
651
`+
p = urllib.parse.urlsplit(url)
`
``
652
`+
self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
`
``
653
+
``
654
`+
Unsafe bytes is not returned from urlparse cache.
`
``
655
`+
scheme is stored after parsing, sending an scheme with unsafe bytes will not return an unsafe scheme
`
``
656
`+
url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
`
``
657
`+
scheme = "htt\nps"
`
``
658
`+
for _ in range(2):
`
``
659
`+
p = urllib.parse.urlsplit(url, scheme=scheme)
`
``
660
`+
self.assertEqual(p.scheme, "https")
`
``
661
`+
self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment")
`
``
662
+
615
663
`def test_attributes_bad_port(self):
`
616
664
`"""Check handling of invalid ports."""
`
617
665
`for bytes in (False, True):
`