cpython: bcbad715c2ce (original) (raw)
Mercurial > cpython
changeset 83973:bcbad715c2ce 2.7
#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline. This helps in handling certain types invalid urls in a conservative manner. [#17403]
Senthil Kumaran senthil@uthcode.com | |
---|---|
date | Wed, 29 May 2013 05:58:47 -0700 |
parents | ca24bc6a5a4b |
children | cfdb4598c935 |
files | Lib/robotparser.py Lib/test/test_robotparser.py Misc/NEWS |
diffstat | 3 files changed, 17 insertions(+), 0 deletions(-)[+] [-] Lib/robotparser.py 1 Lib/test/test_robotparser.py 12 Misc/NEWS 4 |
line wrap: on
line diff
--- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -160,6 +160,7 @@ class RuleLine: if path == '' and not allowance: # an empty value means allow all allowance = True
path = urlparse.urlunparse(urlparse.urlparse(path))[](#l1.7) self.path = urllib.quote(path)[](#l1.8) self.allowance = allowance[](#l1.9)
--- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -228,6 +228,18 @@ bad = ['/some/path'] RobotTest(15, doc, good, bad) +# 16. Empty query (issue #17403). Normalizing the url first. +doc = """ +User-agent: * +Allow: /some/path? +Disallow: /another/path? +""" + +good = ['/some/path?'] +bad = ['/another/path?'] + +RobotTest(16, doc, good, bad) + class NetworkTestCase(unittest.TestCase):
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -15,6 +15,10 @@ Core and Builtins Library ------- +- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to