[3.8] bpo-43882 - urllib.parse should sanitize urls containing ASCII … · python/cpython@515a7bc (original) (raw)

`@@ -612,6 +612,54 @@ def test_urlsplit_attributes(self):

`

612

612

`with self.assertRaisesRegex(ValueError, "out of range"):

`

613

613

`p.port

`

614

614

``

``

615

`+

def test_urlsplit_remove_unsafe_bytes(self):

`

``

616

`+

Remove ASCII tabs and newlines from input, for http common case scenario.

`

``

617

`+

url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"

`

``

618

`+

p = urllib.parse.urlsplit(url)

`

``

619

`+

self.assertEqual(p.scheme, "http")

`

``

620

`+

self.assertEqual(p.netloc, "www.python.org")

`

``

621

`+

self.assertEqual(p.path, "/javascript:alert('msg')/")

`

``

622

`+

self.assertEqual(p.query, "query=something")

`

``

623

`+

self.assertEqual(p.fragment, "fragment")

`

``

624

`+

self.assertEqual(p.username, None)

`

``

625

`+

self.assertEqual(p.password, None)

`

``

626

`+

self.assertEqual(p.hostname, "www.python.org")

`

``

627

`+

self.assertEqual(p.port, None)

`

``

628

`+

self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")

`

``

629

+

``

630

`+

Remove ASCII tabs and newlines from input as bytes, for http common case scenario.

`

``

631

`+

url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"

`

``

632

`+

p = urllib.parse.urlsplit(url)

`

``

633

`+

self.assertEqual(p.scheme, b"http")

`

``

634

`+

self.assertEqual(p.netloc, b"www.python.org")

`

``

635

`+

self.assertEqual(p.path, b"/javascript:alert('msg')/")

`

``

636

`+

self.assertEqual(p.query, b"query=something")

`

``

637

`+

self.assertEqual(p.fragment, b"fragment")

`

``

638

`+

self.assertEqual(p.username, None)

`

``

639

`+

self.assertEqual(p.password, None)

`

``

640

`+

self.assertEqual(p.hostname, b"www.python.org")

`

``

641

`+

self.assertEqual(p.port, None)

`

``

642

`+

self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")

`

``

643

+

``

644

`+

any scheme

`

``

645

`+

url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"

`

``

646

`+

p = urllib.parse.urlsplit(url)

`

``

647

`+

self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")

`

``

648

+

``

649

`+

Remove ASCII tabs and newlines from input as bytes, any scheme.

`

``

650

`+

url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"

`

``

651

`+

p = urllib.parse.urlsplit(url)

`

``

652

`+

self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")

`

``

653

+

``

654

`+

Unsafe bytes is not returned from urlparse cache.

`

``

655

`+

scheme is stored after parsing, sending an scheme with unsafe bytes will not return an unsafe scheme

`

``

656

`+

url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"

`

``

657

`+

scheme = "htt\nps"

`

``

658

`+

for _ in range(2):

`

``

659

`+

p = urllib.parse.urlsplit(url, scheme=scheme)

`

``

660

`+

self.assertEqual(p.scheme, "https")

`

``

661

`+

self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment")

`

``

662

+

615

663

`def test_attributes_bad_port(self):

`

616

664

`"""Check handling of invalid ports."""

`

617

665

`for bytes in (False, True):

`