cpython: 3cb07925fcb9 (original) (raw)

Mercurial > cpython

changeset 82659:3cb07925fcb9 3.2

Issue #1285086: Get rid of the refcounting hack and speed up urllib.parse.unquote() and urllib.parse.unquote_to_bytes(). [#1285086]

Serhiy Storchaka storchaka@gmail.com
date Thu, 14 Mar 2013 21:31:37 +0200
parents 9b45873e5a68
children 209a9c2de9bd 4b28a6a3eda6
files Lib/urllib/parse.py Misc/NEWS
diffstat 2 files changed, 30 insertions(+), 36 deletions(-)[+] [-] Lib/urllib/parse.py 63 Misc/NEWS 3

line wrap: on

line diff

--- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -27,6 +27,7 @@ parsing quirks from older RFCs are retai test_urlparse.py provides a good indicator of parsing behavior. """ +import re import sys import collections @@ -470,6 +471,10 @@ def urldefrag(url): defrag = url return _coerce_result(DefragResult(defrag, frag)) +_hexdig = '0123456789ABCDEFabcdef' +_hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])

+ def unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" # Note: strings are encoded as UTF-8. This is only an issue if it contains @@ -480,16 +485,21 @@ def unquote_to_bytes(string): return b'' if isinstance(string, str): string = string.encode('utf-8')

+ +_asciire = re.compile('([\x00-\x7f]+)') def unquote(string, encoding='utf-8', errors='replace'): """Replace %xx escapes by their single-character equivalent. The optional @@ -501,39 +511,20 @@ def unquote(string, encoding='utf-8', er unquote('abc%20def') -> 'abc def'. """

def parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace'):

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -233,6 +233,9 @@ Core and Builtins Library ------- +- Issue #1285086: Get rid of the refcounting hack and speed up