(original) (raw)

changeset: 77045:a9d43e21f7d8 branch: 3.2 parent: 77042:895246f1a06a user: Senthil Kumaran senthil@uthcode.com date: Sat May 19 08:12:00 2012 +0800 files: Lib/test/test_urlparse.py Lib/urllib/parse.py Misc/NEWS description: Issue9374 - Generic parsing of query and fragment portion of urls for any scheme diff -r 895246f1a06a -r a9d43e21f7d8 Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py Fri May 18 21:51:11 2012 +0300 +++ b/Lib/test/test_urlparse.py Sat May 19 08:12:00 2012 +0800 @@ -636,11 +636,20 @@ ('s3', 'foo.com', '/stuff', '', '', '')) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme', 'foo.com', '/stuff', '', '', '')) + self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), + ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) + self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), + ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) + # And for bytes... self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), (b's3', b'foo.com', b'/stuff', b'', b'', b'')) self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) + self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) + self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes diff -r 895246f1a06a -r a9d43e21f7d8 Lib/urllib/parse.py --- a/Lib/urllib/parse.py Fri May 18 21:51:11 2012 +0300 +++ b/Lib/urllib/parse.py Sat May 19 08:12:00 2012 +0800 @@ -44,16 +44,9 @@ 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' @@ -357,9 +350,9 @@ if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") - if allow_fragments and scheme in uses_fragment and '#' in url: + if allow_fragments and '#' in url: url, fragment = url.split('#', 1) - if scheme in uses_query and '?' in url: + if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v diff -r 895246f1a06a -r a9d43e21f7d8 Misc/NEWS --- a/Misc/NEWS Fri May 18 21:51:11 2012 +0300 +++ b/Misc/NEWS Sat May 19 08:12:00 2012 +0800 @@ -63,6 +63,9 @@ Library ------- +- Issue #9374: Generic parsing of query and fragment portions of url for any + scheme. Supported both by RFC3986 and RFC2396. + - Issue #14798: Fix the functions in pyclbr to raise an ImportError when the first part of a dotted name is not a package. Patch by Xavier de Gaye. /senthil@uthcode.com