[issue4136] merge json library with simplejson 2.0.3 - Code Review (original) (raw)
OLD
NEW
1 """Implementation of JSONDecoder
1 """Implementation of JSONDecoder
2 """
2 """
3
3
4 import re
4 import re
5 import sys
5 import sys
6 import struct
6
7
7 from json.scanner import Scanner, pattern
8 from json.scanner import make_scanner
8 try:
9 try:
9 from _json import scanstring as c_scanstring
10 from _json import scanstring as c_scanstring
10 except ImportError:
11 except ImportError:
11 c_scanstring = None
12 c_scanstring = None
12
13
13 __all__ = ['JSONDecoder']
14 __all__ = ['JSONDecoder']
14
15
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
16 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
16
17
17 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
18 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
(...skipping 15 matching lines...) Expand all Loading...
33 return fmt.format(msg, lineno, colno, pos)
34 return fmt.format(msg, lineno, colno, pos)
34 endlineno, endcolno = linecol(doc, end)
35 endlineno, endcolno = linecol(doc, end)
35 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
36 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
36 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
37 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
37
38
38
39
39 _CONSTANTS = {
40 _CONSTANTS = {
40 '-Infinity': NegInf,
41 '-Infinity': NegInf,
41 'Infinity': PosInf,
42 'Infinity': PosInf,
42 'NaN': NaN,
43 'NaN': NaN,
43 'true': True,
44 'false': False,
45 'null': None,
46 }
44 }
47
45
48
49 def JSONConstant(match, context, c=_CONSTANTS):
50 s = match.group(0)
51 fn = getattr(context, 'parse_constant', None)
52 if fn is None:
53 rval = c[s]
54 else:
55 rval = fn(s)
56 return rval, None
57 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
58
59
60 def JSONNumber(match, context):
61 match = JSONNumber.regex.match(match.string, *match.span())
62 integer, frac, exp = match.groups()
63 if frac or exp:
64 fn = getattr(context, 'parse_float', None) or float
65 res = fn(integer + (frac or '') + (exp or ''))
66 else:
67 fn = getattr(context, 'parse_int', None) or int
68 res = fn(integer)
69 return res, None
70 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
71
72
73 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
46 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
74 BACKSLASH = {
47 BACKSLASH = {
75 '"': u'"', '\\': u'\\', '/': u'/',
48 '"': u'"', '\\': u'\\', '/': u'/',
76 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
49 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
77 }
50 }
78
51
79 DEFAULT_ENCODING = "utf-8"
52 DEFAULT_ENCODING = "utf-8"
80
53
81
54
82 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU NK.match):
55 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU NK.match):
83 if encoding is None:
56 if encoding is None:
84 encoding = DEFAULT_ENCODING
57 encoding = DEFAULT_ENCODING
85 chunks = []
58 chunks = []
86 _append = chunks.append
59 _append = chunks.append
87 begin = end - 1
60 begin = end - 1
88 while 1:
61 while 1:
89 chunk = _m(s, end)
62 chunk = _m(s, end)
90 if chunk is None:
63 if chunk is None:
91 raise ValueError(
64 raise ValueError(
92 errmsg("Unterminated string starting at", s, begin))
65 errmsg("Unterminated string starting at", s, begin))
93 end = chunk.end()
66 end = chunk.end()
94 content, terminator = chunk.groups()
67 content, terminator = chunk.groups()
95 if content:
68 if content:
96 if not isinstance(content, unicode):
69 if not isinstance(content, unicode):
97 content = unicode(content, encoding)
70 content = unicode(content, encoding)
98 _append(content)
71 _append(content)
99 if terminator == '"':
72 if terminator == '"':
100 break
73 break
101 elif terminator != '\\':
74 elif terminator != '\\':
102 if strict:
75 if strict:
103 msg = "Invalid control character {0!r} at".format(terminator)
76 msg = "Invalid control character {0!r} at".format(esc)
104 raise ValueError(errmsg(msg, s, end))
77 raise ValueError(errmsg(msg, s, end))
105 else:
78 else:
106 _append(terminator)
79 _append(terminator)
107 continue
80 continue
108 try:
81 try:
109 esc = s[end]
82 esc = s[end]
110 except IndexError:
83 except IndexError:
111 raise ValueError(
84 raise ValueError(
112 errmsg("Unterminated string starting at", s, begin))
85 errmsg("Unterminated string starting at", s, begin))
113 if esc != 'u':
86 if esc != 'u':
114 try:
87 try:
115 m = _b[esc]
88 m = _b[esc]
116 except KeyError:
89 except KeyError:
117 msg = "Invalid \\escape: {0!r}".format(esc)
90 raise ValueError(
118 raise ValueError(errmsg(msg, s, end))
91 errmsg("Invalid \\escape: {0!r}".format(esc), s, end))
119 end += 1
92 end += 1
120 else:
93 else:
121 esc = s[end + 1:end + 5]
94 esc = s[end + 1:end + 5]
122 next_end = end + 5
95 next_end = end + 5
123 msg = "Invalid \\uXXXX escape"
96 msg = "Invalid \\uXXXX escape"
124 try:
97 try:
125 if len(esc) != 4:
98 if len(esc) != 4:
126 raise ValueError
99 raise ValueError
127 uni = int(esc, 16)
100 uni = int(esc, 16)
128 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
101 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
129 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
102 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
130 if not s[end + 5:end + 7] == '\\u':
103 if not s[end + 5:end + 7] == '\\u':
131 raise ValueError
104 raise ValueError
132 esc2 = s[end + 7:end + 11]
105 esc2 = s[end + 7:end + 11]
133 if len(esc2) != 4:
106 if len(esc2) != 4:
134 raise ValueError
107 raise ValueError
135 uni2 = int(esc2, 16)
108 uni2 = int(esc2, 16)
136 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
109 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
137 next_end += 6
110 next_end += 6
138 m = unichr(uni)
111 m = unichr(uni)
139 except ValueError:
112 except ValueError:
140 raise ValueError(errmsg(msg, s, end))
113 raise ValueError(errmsg(msg, s, end))
141 end = next_end
114 end = next_end
142 _append(m)
115 _append(m)
143 return u''.join(chunks), end
116 return u''.join(chunks), end
144
117
145
118
146 # Use speedup
119 # Use speedup if available
147 if c_scanstring is not None:
120 scanstring = c_scanstring or py_scanstring
148 scanstring = c_scanstring
149 else:
150 scanstring = py_scanstring
151
121
152 def JSONString(match, context):
122 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
153 encoding = getattr(context, 'encoding', None)
123 WHITESPACE_STR = ' \t\n\r'
154 strict = getattr(context, 'strict', True)
155 return scanstring(match.string, match.end(), encoding, strict)
156 pattern(r'"')(JSONString)
157
124
158
125 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE .match, _ws=WHITESPACE_STR):
159 WHITESPACE = re.compile(r'\s*', FLAGS)
160
161
162 def JSONObject(match, context, _w=WHITESPACE.match):
163 pairs = {}
126 pairs = {}
164 s = match.string
165 end = _w(s, match.end()).end()
166 nextchar = s[end:end + 1]
127 nextchar = s[end:end + 1]
167 # Trivial empty object
128 # Normally we expect nextchar == '"'
168 if nextchar == '}':
169 return pairs, end + 1
170 if nextchar != '"':
129 if nextchar != '"':
171 raise ValueError(errmsg("Expecting property name", s, end))
130 if nextchar in _ws:
131 end = _w(s, end).end()
132 nextchar = s[end:end + 1]
133 # Trivial empty object
134 if nextchar == '}':
135 return pairs, end + 1
136 elif nextchar != '"':
137 raise ValueError(errmsg("Expecting property name", s, end))
172 end += 1
138 end += 1
173 encoding = getattr(context, 'encoding', None)
174 strict = getattr(context, 'strict', True)
175 iterscan = JSONScanner.iterscan
176 while True:
139 while True:
177 key, end = scanstring(s, end, encoding, strict)
140 key, end = scanstring(s, end, encoding, strict)
178 end = _w(s, end).end()
141
142 # To skip some function call overhead we optimize the fast paths where
143 # the JSON key separator is ": " or just ":".
179 if s[end:end + 1] != ':':
144 if s[end:end + 1] != ':':
180 raise ValueError(errmsg("Expecting : delimiter", s, end))
145 end = _w(s, end).end()
181 end = _w(s, end + 1).end()
146 if s[end:end + 1] != ':':
147 raise ValueError(errmsg("Expecting : delimiter", s, end))
148
149 end += 1
150
182 try:
151 try:
183 value, end = iterscan(s, idx=end, context=context).next()
152 if s[end] in _ws:
153 end += 1
154 if s[end] in _ws:
155 end = _w(s, end + 1).end()
156 except IndexError:
157 pass
158
159 try:
160 value, end = scan_once(s, end)
184 except StopIteration:
161 except StopIteration:
185 raise ValueError(errmsg("Expecting object", s, end))
162 raise ValueError(errmsg("Expecting object", s, end))
186 pairs[key] = value
163 pairs[key] = value
187 end = _w(s, end).end()
164
188 nextchar = s[end:end + 1]
165 try:
166 nextchar = s[end]
167 if nextchar in _ws:
168 end = _w(s, end + 1).end()
169 nextchar = s[end]
170 except IndexError:
171 nextchar = ''
189 end += 1
172 end += 1
173
190 if nextchar == '}':
174 if nextchar == '}':
191 break
175 break
192 if nextchar != ',':
176 elif nextchar != ',':
193 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
177 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
194 end = _w(s, end).end()
178
195 nextchar = s[end:end + 1]
179 try:
180 nextchar = s[end]
181 if nextchar in _ws:
182 end += 1
183 nextchar = s[end]
184 if nextchar in _ws:
185 end = _w(s, end + 1).end()
186 nextchar = s[end]
187 except IndexError:
188 nextchar = ''
189
196 end += 1
190 end += 1
197 if nextchar != '"':
191 if nextchar != '"':
198 raise ValueError(errmsg("Expecting property name", s, end - 1))
192 raise ValueError(errmsg("Expecting property name", s, end - 1))
199 object_hook = getattr(context, 'object_hook', None)
193
200 if object_hook is not None:
194 if object_hook is not None:
201 pairs = object_hook(pairs)
195 pairs = object_hook(pairs)
202 return pairs, end
196 return pairs, end
203 pattern(r'{')(JSONObject)
204
197
205
198 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
206 def JSONArray(match, context, _w=WHITESPACE.match):
207 values = []
199 values = []
208 s = match.string
200 nextchar = s[end:end + 1]
209 end = _w(s, match.end()).end()
201 if nextchar in _ws:
202 end = _w(s, end + 1).end()
203 nextchar = s[end:end + 1]
210 # Look-ahead for trivial empty array
204 # Look-ahead for trivial empty array
211 nextchar = s[end:end + 1]
212 if nextchar == ']':
205 if nextchar == ']':
213 return values, end + 1
206 return values, end + 1
214 iterscan = JSONScanner.iterscan
207 _append = values.append
215 while True:
208 while True:
216 try:
209 try:
217 value, end = iterscan(s, idx=end, context=context).next()
210 value, end = scan_once(s, end)
218 except StopIteration:
211 except StopIteration:
219 raise ValueError(errmsg("Expecting object", s, end))
212 raise ValueError(errmsg("Expecting object", s, end))
220 values.append(value)
213 _append(value)
221 end = _w(s, end).end()
222 nextchar = s[end:end + 1]
214 nextchar = s[end:end + 1]
215 if nextchar in _ws:
216 end = _w(s, end + 1).end()
217 nextchar = s[end:end + 1]
223 end += 1
218 end += 1
224 if nextchar == ']':
219 if nextchar == ']':
225 break
220 break
226 if nextchar != ',':
221 elif nextchar != ',':
227 raise ValueError(errmsg("Expecting , delimiter", s, end))
222 raise ValueError(errmsg("Expecting , delimiter", s, end))
228 end = _w(s, end).end()
223
224 try:
225 if s[end] in _ws:
226 end += 1
227 if s[end] in _ws:
228 end = _w(s, end + 1).end()
229 except IndexError:
230 pass
231
229 return values, end
232 return values, end
230 pattern(r'\[')(JSONArray)
231
232
233 ANYTHING = [
234 JSONObject,
235 JSONArray,
236 JSONString,
237 JSONConstant,
238 JSONNumber,
239 ]
240
241 JSONScanner = Scanner(ANYTHING)
242
243
233
244 class JSONDecoder(object):
234 class JSONDecoder(object):
245 """Simple JSON http://json.org decoder
235 """Simple JSON http://json.org decoder
246
236
247 Performs the following translations in decoding by default:
237 Performs the following translations in decoding by default:
248
238
249 +---------------+-------------------+
239 +---------------+-------------------+
250 | JSON | Python |
240 | JSON | Python |
251 +===============+===================+
241 +===============+===================+
252 | object | dict |
242 | object | dict |
(...skipping 10 matching lines...) Expand all Loading...
263 +---------------+-------------------+
253 +---------------+-------------------+
264 | false | False |
254 | false | False |
265 +---------------+-------------------+
255 +---------------+-------------------+
266 | null | None |
256 | null | None |
267 +---------------+-------------------+
257 +---------------+-------------------+
268
258
269 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
259 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
270 their corresponding ``float`` values, which is outside the JSON spec.
260 their corresponding ``float`` values, which is outside the JSON spec.
271 """
261 """
272
262
273 _scanner = Scanner(ANYTHING)
274 __all__ = ['__init__', 'decode', 'raw_decode']
263 __all__ = ['__init__', 'decode', 'raw_decode']
275
264
276 def __init__(self, encoding=None, object_hook=None, parse_float=None,
265 def __init__(self, encoding=None, object_hook=None, parse_float=None,
277 parse_int=None, parse_constant=None, strict=True):
266 parse_int=None, parse_constant=None, strict=True):
278 """``encoding`` determines the encoding used to interpret any ``str``
267 """``encoding`` determines the encoding used to interpret any ``str``
279 objects decoded by this instance (utf-8 by default). It has no
268 objects decoded by this instance (utf-8 by default). It has no
280 effect when decoding ``unicode`` objects.
269 effect when decoding ``unicode`` objects.
281
270
282 Note that currently only encodings that are a superset of ASCII work,
271 Note that currently only encodings that are a superset of ASCII work,
283 strings of other encodings should be passed in as ``unicode``.
272 strings of other encodings should be passed in as ``unicode``.
284
273
285 ``object_hook``, if specified, will be called with the result of
274 ``object_hook``, if specified, will be called with the result
286 every JSON object decoded and its return value will be used in
275 of every JSON object decoded and its return value will be used in
287 place of the given ``dict``. This can be used to provide custom
276 place of the given ``dict``. This can be used to provide custom
288 deserializations (e.g. to support JSON-RPC class hinting).
277 deserializations (e.g. to support JSON-RPC class hinting).
289
278
290 ``parse_float``, if specified, will be called with the string
279 ``parse_float``, if specified, will be called with the string
291 of every JSON float to be decoded. By default this is equivalent to
280 of every JSON float to be decoded. By default this is equivalent to
292 float(num_str). This can be used to use another datatype or parser
281 float(num_str). This can be used to use another datatype or parser
293 for JSON floats (e.g. decimal.Decimal).
282 for JSON floats (e.g. decimal.Decimal).
294
283
295 ``parse_int``, if specified, will be called with the string
284 ``parse_int``, if specified, will be called with the string
296 of every JSON int to be decoded. By default this is equivalent to
285 of every JSON int to be decoded. By default this is equivalent to
297 int(num_str). This can be used to use another datatype or parser
286 int(num_str). This can be used to use another datatype or parser
298 for JSON integers (e.g. float).
287 for JSON integers (e.g. float).
299
288
300 ``parse_constant``, if specified, will be called with one of the
289 ``parse_constant``, if specified, will be called with one of the
301 following strings: -Infinity, Infinity, NaN, null, true, false.
290 following strings: -Infinity, Infinity, NaN.
302 This can be used to raise an exception if invalid JSON numbers
291 This can be used to raise an exception if invalid JSON numbers
303 are encountered.
292 are encountered.
304
293
305 """
294 """
306 self.encoding = encoding
295 self.encoding = encoding
307 self.object_hook = object_hook
296 self.object_hook = object_hook
308 self.parse_float = parse_float
297 self.parse_float = parse_float or float
309 self.parse_int = parse_int
298 self.parse_int = parse_int or int
310 self.parse_constant = parse_constant
299 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
311 self.strict = strict
300 self.strict = strict
301 self.parse_object = JSONObject
302 self.parse_array = JSONArray
303 self.parse_string = scanstring
304 self.scan_once = make_scanner(self)
312
305
313 def decode(self, s, _w=WHITESPACE.match):
306 def decode(self, s, _w=WHITESPACE.match):
314 """
307 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
315 Return the Python representation of ``s`` (a ``str`` or ``unicode``
316 instance containing a JSON document)
308 instance containing a JSON document)
317
309
318 """
310 """
319 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
311 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
320 end = _w(s, end).end()
312 end = _w(s, end).end()
321 if end != len(s):
313 if end != len(s):
322 raise ValueError(errmsg("Extra data", s, end, len(s)))
314 raise ValueError(errmsg("Extra data", s, end, len(s)))
323 return obj
315 return obj
324
316
325 def raw_decode(self, s, **kw):
317 def raw_decode(self, s, idx=0):
326 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
318 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
327 with a JSON document) and return a 2-tuple of the Python
319 with a JSON document) and return a 2-tuple of the Python
328 representation and the index in ``s`` where the document ended.
320 representation and the index in ``s`` where the document ended.
329
321
330 This can be used to decode a JSON document from a string that may
322 This can be used to decode a JSON document from a string that may
331 have extraneous data at the end.
323 have extraneous data at the end.
332
324
333 """
325 """
334 kw.setdefault('context', self)
335 try:
326 try:
336 obj, end = self._scanner.iterscan(s, **kw).next()
327 obj, end = self.scan_once(s, idx)
337 except StopIteration:
328 except StopIteration:
338 raise ValueError("No JSON object could be decoded")
329 raise ValueError("No JSON object could be decoded")
339 return obj, end
330 return obj, end
OLD
NEW