[issue4136] merge json library with simplejson 2.0.3 - Code Review (original) (raw)

OLD

NEW

1 """Implementation of JSONDecoder

1 """Implementation of JSONDecoder

2 """

2 """

3

3

4 import re

4 import re

5 import sys

5 import sys

6 import struct

6

7

7 from json.scanner import Scanner, pattern

8 from json.scanner import make_scanner

8 try:

9 try:

9 from _json import scanstring as c_scanstring

10 from _json import scanstring as c_scanstring

10 except ImportError:

11 except ImportError:

11 c_scanstring = None

12 c_scanstring = None

12

13

13 __all__ = ['JSONDecoder']

14 __all__ = ['JSONDecoder']

14

15

15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL

16 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL

16

17

17 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')

18 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')

(...skipping 15 matching lines...) Expand all Loading...

33 return fmt.format(msg, lineno, colno, pos)

34 return fmt.format(msg, lineno, colno, pos)

34 endlineno, endcolno = linecol(doc, end)

35 endlineno, endcolno = linecol(doc, end)

35 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'

36 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'

36 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)

37 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)

37

38

38

39

39 _CONSTANTS = {

40 _CONSTANTS = {

40 '-Infinity': NegInf,

41 '-Infinity': NegInf,

41 'Infinity': PosInf,

42 'Infinity': PosInf,

42 'NaN': NaN,

43 'NaN': NaN,

43 'true': True,

44 'false': False,

45 'null': None,

46 }

44 }

47

45

48

49 def JSONConstant(match, context, c=_CONSTANTS):

50 s = match.group(0)

51 fn = getattr(context, 'parse_constant', None)

52 if fn is None:

53 rval = c[s]

54 else:

55 rval = fn(s)

56 return rval, None

57 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)

58

59

60 def JSONNumber(match, context):

61 match = JSONNumber.regex.match(match.string, *match.span())

62 integer, frac, exp = match.groups()

63 if frac or exp:

64 fn = getattr(context, 'parse_float', None) or float

65 res = fn(integer + (frac or '') + (exp or ''))

66 else:

67 fn = getattr(context, 'parse_int', None) or int

68 res = fn(integer)

69 return res, None

70 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)

71

72

73 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)

46 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)

74 BACKSLASH = {

47 BACKSLASH = {

75 '"': u'"', '\\': u'\\', '/': u'/',

48 '"': u'"', '\\': u'\\', '/': u'/',

76 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',

49 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',

77 }

50 }

78

51

79 DEFAULT_ENCODING = "utf-8"

52 DEFAULT_ENCODING = "utf-8"

80

53

81

54

82 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU NK.match):

55 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU NK.match):

83 if encoding is None:

56 if encoding is None:

84 encoding = DEFAULT_ENCODING

57 encoding = DEFAULT_ENCODING

85 chunks = []

58 chunks = []

86 _append = chunks.append

59 _append = chunks.append

87 begin = end - 1

60 begin = end - 1

88 while 1:

61 while 1:

89 chunk = _m(s, end)

62 chunk = _m(s, end)

90 if chunk is None:

63 if chunk is None:

91 raise ValueError(

64 raise ValueError(

92 errmsg("Unterminated string starting at", s, begin))

65 errmsg("Unterminated string starting at", s, begin))

93 end = chunk.end()

66 end = chunk.end()

94 content, terminator = chunk.groups()

67 content, terminator = chunk.groups()

95 if content:

68 if content:

96 if not isinstance(content, unicode):

69 if not isinstance(content, unicode):

97 content = unicode(content, encoding)

70 content = unicode(content, encoding)

98 _append(content)

71 _append(content)

99 if terminator == '"':

72 if terminator == '"':

100 break

73 break

101 elif terminator != '\\':

74 elif terminator != '\\':

102 if strict:

75 if strict:

103 msg = "Invalid control character {0!r} at".format(terminator)

76 msg = "Invalid control character {0!r} at".format(esc)

104 raise ValueError(errmsg(msg, s, end))

77 raise ValueError(errmsg(msg, s, end))

105 else:

78 else:

106 _append(terminator)

79 _append(terminator)

107 continue

80 continue

108 try:

81 try:

109 esc = s[end]

82 esc = s[end]

110 except IndexError:

83 except IndexError:

111 raise ValueError(

84 raise ValueError(

112 errmsg("Unterminated string starting at", s, begin))

85 errmsg("Unterminated string starting at", s, begin))

113 if esc != 'u':

86 if esc != 'u':

114 try:

87 try:

115 m = _b[esc]

88 m = _b[esc]

116 except KeyError:

89 except KeyError:

117 msg = "Invalid \\escape: {0!r}".format(esc)

90 raise ValueError(

118 raise ValueError(errmsg(msg, s, end))

91 errmsg("Invalid \\escape: {0!r}".format(esc), s, end))

119 end += 1

92 end += 1

120 else:

93 else:

121 esc = s[end + 1:end + 5]

94 esc = s[end + 1:end + 5]

122 next_end = end + 5

95 next_end = end + 5

123 msg = "Invalid \\uXXXX escape"

96 msg = "Invalid \\uXXXX escape"

124 try:

97 try:

125 if len(esc) != 4:

98 if len(esc) != 4:

126 raise ValueError

99 raise ValueError

127 uni = int(esc, 16)

100 uni = int(esc, 16)

128 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:

101 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:

129 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"

102 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"

130 if not s[end + 5:end + 7] == '\\u':

103 if not s[end + 5:end + 7] == '\\u':

131 raise ValueError

104 raise ValueError

132 esc2 = s[end + 7:end + 11]

105 esc2 = s[end + 7:end + 11]

133 if len(esc2) != 4:

106 if len(esc2) != 4:

134 raise ValueError

107 raise ValueError

135 uni2 = int(esc2, 16)

108 uni2 = int(esc2, 16)

136 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))

109 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))

137 next_end += 6

110 next_end += 6

138 m = unichr(uni)

111 m = unichr(uni)

139 except ValueError:

112 except ValueError:

140 raise ValueError(errmsg(msg, s, end))

113 raise ValueError(errmsg(msg, s, end))

141 end = next_end

114 end = next_end

142 _append(m)

115 _append(m)

143 return u''.join(chunks), end

116 return u''.join(chunks), end

144

117

145

118

146 # Use speedup

119 # Use speedup if available

147 if c_scanstring is not None:

120 scanstring = c_scanstring or py_scanstring

148 scanstring = c_scanstring

149 else:

150 scanstring = py_scanstring

151

121

152 def JSONString(match, context):

122 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)

153 encoding = getattr(context, 'encoding', None)

123 WHITESPACE_STR = ' \t\n\r'

154 strict = getattr(context, 'strict', True)

155 return scanstring(match.string, match.end(), encoding, strict)

156 pattern(r'"')(JSONString)

157

124

158

125 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE .match, _ws=WHITESPACE_STR):

159 WHITESPACE = re.compile(r'\s*', FLAGS)

160

161

162 def JSONObject(match, context, _w=WHITESPACE.match):

163 pairs = {}

126 pairs = {}

164 s = match.string

165 end = _w(s, match.end()).end()

166 nextchar = s[end:end + 1]

127 nextchar = s[end:end + 1]

167 # Trivial empty object

128 # Normally we expect nextchar == '"'

168 if nextchar == '}':

169 return pairs, end + 1

170 if nextchar != '"':

129 if nextchar != '"':

171 raise ValueError(errmsg("Expecting property name", s, end))

130 if nextchar in _ws:

131 end = _w(s, end).end()

132 nextchar = s[end:end + 1]

133 # Trivial empty object

134 if nextchar == '}':

135 return pairs, end + 1

136 elif nextchar != '"':

137 raise ValueError(errmsg("Expecting property name", s, end))

172 end += 1

138 end += 1

173 encoding = getattr(context, 'encoding', None)

174 strict = getattr(context, 'strict', True)

175 iterscan = JSONScanner.iterscan

176 while True:

139 while True:

177 key, end = scanstring(s, end, encoding, strict)

140 key, end = scanstring(s, end, encoding, strict)

178 end = _w(s, end).end()

141

142 # To skip some function call overhead we optimize the fast paths where

143 # the JSON key separator is ": " or just ":".

179 if s[end:end + 1] != ':':

144 if s[end:end + 1] != ':':

180 raise ValueError(errmsg("Expecting : delimiter", s, end))

145 end = _w(s, end).end()

181 end = _w(s, end + 1).end()

146 if s[end:end + 1] != ':':

147 raise ValueError(errmsg("Expecting : delimiter", s, end))

148

149 end += 1

150

182 try:

151 try:

183 value, end = iterscan(s, idx=end, context=context).next()

152 if s[end] in _ws:

153 end += 1

154 if s[end] in _ws:

155 end = _w(s, end + 1).end()

156 except IndexError:

157 pass

158

159 try:

160 value, end = scan_once(s, end)

184 except StopIteration:

161 except StopIteration:

185 raise ValueError(errmsg("Expecting object", s, end))

162 raise ValueError(errmsg("Expecting object", s, end))

186 pairs[key] = value

163 pairs[key] = value

187 end = _w(s, end).end()

164

188 nextchar = s[end:end + 1]

165 try:

166 nextchar = s[end]

167 if nextchar in _ws:

168 end = _w(s, end + 1).end()

169 nextchar = s[end]

170 except IndexError:

171 nextchar = ''

189 end += 1

172 end += 1

173

190 if nextchar == '}':

174 if nextchar == '}':

191 break

175 break

192 if nextchar != ',':

176 elif nextchar != ',':

193 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))

177 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))

194 end = _w(s, end).end()

178

195 nextchar = s[end:end + 1]

179 try:

180 nextchar = s[end]

181 if nextchar in _ws:

182 end += 1

183 nextchar = s[end]

184 if nextchar in _ws:

185 end = _w(s, end + 1).end()

186 nextchar = s[end]

187 except IndexError:

188 nextchar = ''

189

196 end += 1

190 end += 1

197 if nextchar != '"':

191 if nextchar != '"':

198 raise ValueError(errmsg("Expecting property name", s, end - 1))

192 raise ValueError(errmsg("Expecting property name", s, end - 1))

199 object_hook = getattr(context, 'object_hook', None)

193

200 if object_hook is not None:

194 if object_hook is not None:

201 pairs = object_hook(pairs)

195 pairs = object_hook(pairs)

202 return pairs, end

196 return pairs, end

203 pattern(r'{')(JSONObject)

204

197

205

198 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):

206 def JSONArray(match, context, _w=WHITESPACE.match):

207 values = []

199 values = []

208 s = match.string

200 nextchar = s[end:end + 1]

209 end = _w(s, match.end()).end()

201 if nextchar in _ws:

202 end = _w(s, end + 1).end()

203 nextchar = s[end:end + 1]

210 # Look-ahead for trivial empty array

204 # Look-ahead for trivial empty array

211 nextchar = s[end:end + 1]

212 if nextchar == ']':

205 if nextchar == ']':

213 return values, end + 1

206 return values, end + 1

214 iterscan = JSONScanner.iterscan

207 _append = values.append

215 while True:

208 while True:

216 try:

209 try:

217 value, end = iterscan(s, idx=end, context=context).next()

210 value, end = scan_once(s, end)

218 except StopIteration:

211 except StopIteration:

219 raise ValueError(errmsg("Expecting object", s, end))

212 raise ValueError(errmsg("Expecting object", s, end))

220 values.append(value)

213 _append(value)

221 end = _w(s, end).end()

222 nextchar = s[end:end + 1]

214 nextchar = s[end:end + 1]

215 if nextchar in _ws:

216 end = _w(s, end + 1).end()

217 nextchar = s[end:end + 1]

223 end += 1

218 end += 1

224 if nextchar == ']':

219 if nextchar == ']':

225 break

220 break

226 if nextchar != ',':

221 elif nextchar != ',':

227 raise ValueError(errmsg("Expecting , delimiter", s, end))

222 raise ValueError(errmsg("Expecting , delimiter", s, end))

228 end = _w(s, end).end()

223

224 try:

225 if s[end] in _ws:

226 end += 1

227 if s[end] in _ws:

228 end = _w(s, end + 1).end()

229 except IndexError:

230 pass

231

229 return values, end

232 return values, end

230 pattern(r'\[')(JSONArray)

231

232

233 ANYTHING = [

234 JSONObject,

235 JSONArray,

236 JSONString,

237 JSONConstant,

238 JSONNumber,

239 ]

240

241 JSONScanner = Scanner(ANYTHING)

242

243

233

244 class JSONDecoder(object):

234 class JSONDecoder(object):

245 """Simple JSON http://json.org decoder

235 """Simple JSON http://json.org decoder

246

236

247 Performs the following translations in decoding by default:

237 Performs the following translations in decoding by default:

248

238

249 +---------------+-------------------+

239 +---------------+-------------------+

250 | JSON | Python |

240 | JSON | Python |

251 +===============+===================+

241 +===============+===================+

252 | object | dict |

242 | object | dict |

(...skipping 10 matching lines...) Expand all Loading...

263 +---------------+-------------------+

253 +---------------+-------------------+

264 | false | False |

254 | false | False |

265 +---------------+-------------------+

255 +---------------+-------------------+

266 | null | None |

256 | null | None |

267 +---------------+-------------------+

257 +---------------+-------------------+

268

258

269 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as

259 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as

270 their corresponding ``float`` values, which is outside the JSON spec.

260 their corresponding ``float`` values, which is outside the JSON spec.

271 """

261 """

272

262

273 _scanner = Scanner(ANYTHING)

274 __all__ = ['__init__', 'decode', 'raw_decode']

263 __all__ = ['__init__', 'decode', 'raw_decode']

275

264

276 def __init__(self, encoding=None, object_hook=None, parse_float=None,

265 def __init__(self, encoding=None, object_hook=None, parse_float=None,

277 parse_int=None, parse_constant=None, strict=True):

266 parse_int=None, parse_constant=None, strict=True):

278 """``encoding`` determines the encoding used to interpret any ``str``

267 """``encoding`` determines the encoding used to interpret any ``str``

279 objects decoded by this instance (utf-8 by default). It has no

268 objects decoded by this instance (utf-8 by default). It has no

280 effect when decoding ``unicode`` objects.

269 effect when decoding ``unicode`` objects.

281

270

282 Note that currently only encodings that are a superset of ASCII work,

271 Note that currently only encodings that are a superset of ASCII work,

283 strings of other encodings should be passed in as ``unicode``.

272 strings of other encodings should be passed in as ``unicode``.

284

273

285 ``object_hook``, if specified, will be called with the result of

274 ``object_hook``, if specified, will be called with the result

286 every JSON object decoded and its return value will be used in

275 of every JSON object decoded and its return value will be used in

287 place of the given ``dict``. This can be used to provide custom

276 place of the given ``dict``. This can be used to provide custom

288 deserializations (e.g. to support JSON-RPC class hinting).

277 deserializations (e.g. to support JSON-RPC class hinting).

289

278

290 ``parse_float``, if specified, will be called with the string

279 ``parse_float``, if specified, will be called with the string

291 of every JSON float to be decoded. By default this is equivalent to

280 of every JSON float to be decoded. By default this is equivalent to

292 float(num_str). This can be used to use another datatype or parser

281 float(num_str). This can be used to use another datatype or parser

293 for JSON floats (e.g. decimal.Decimal).

282 for JSON floats (e.g. decimal.Decimal).

294

283

295 ``parse_int``, if specified, will be called with the string

284 ``parse_int``, if specified, will be called with the string

296 of every JSON int to be decoded. By default this is equivalent to

285 of every JSON int to be decoded. By default this is equivalent to

297 int(num_str). This can be used to use another datatype or parser

286 int(num_str). This can be used to use another datatype or parser

298 for JSON integers (e.g. float).

287 for JSON integers (e.g. float).

299

288

300 ``parse_constant``, if specified, will be called with one of the

289 ``parse_constant``, if specified, will be called with one of the

301 following strings: -Infinity, Infinity, NaN, null, true, false.

290 following strings: -Infinity, Infinity, NaN.

302 This can be used to raise an exception if invalid JSON numbers

291 This can be used to raise an exception if invalid JSON numbers

303 are encountered.

292 are encountered.

304

293

305 """

294 """

306 self.encoding = encoding

295 self.encoding = encoding

307 self.object_hook = object_hook

296 self.object_hook = object_hook

308 self.parse_float = parse_float

297 self.parse_float = parse_float or float

309 self.parse_int = parse_int

298 self.parse_int = parse_int or int

310 self.parse_constant = parse_constant

299 self.parse_constant = parse_constant or _CONSTANTS.__getitem__

311 self.strict = strict

300 self.strict = strict

301 self.parse_object = JSONObject

302 self.parse_array = JSONArray

303 self.parse_string = scanstring

304 self.scan_once = make_scanner(self)

312

305

313 def decode(self, s, _w=WHITESPACE.match):

306 def decode(self, s, _w=WHITESPACE.match):

314 """

307 """Return the Python representation of ``s`` (a ``str`` or ``unicode``

315 Return the Python representation of ``s`` (a ``str`` or ``unicode``

316 instance containing a JSON document)

308 instance containing a JSON document)

317

309

318 """

310 """

319 obj, end = self.raw_decode(s, idx=_w(s, 0).end())

311 obj, end = self.raw_decode(s, idx=_w(s, 0).end())

320 end = _w(s, end).end()

312 end = _w(s, end).end()

321 if end != len(s):

313 if end != len(s):

322 raise ValueError(errmsg("Extra data", s, end, len(s)))

314 raise ValueError(errmsg("Extra data", s, end, len(s)))

323 return obj

315 return obj

324

316

325 def raw_decode(self, s, **kw):

317 def raw_decode(self, s, idx=0):

326 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning

318 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning

327 with a JSON document) and return a 2-tuple of the Python

319 with a JSON document) and return a 2-tuple of the Python

328 representation and the index in ``s`` where the document ended.

320 representation and the index in ``s`` where the document ended.

329

321

330 This can be used to decode a JSON document from a string that may

322 This can be used to decode a JSON document from a string that may

331 have extraneous data at the end.

323 have extraneous data at the end.

332

324

333 """

325 """

334 kw.setdefault('context', self)

335 try:

326 try:

336 obj, end = self._scanner.iterscan(s, **kw).next()

327 obj, end = self.scan_once(s, idx)

337 except StopIteration:

328 except StopIteration:

338 raise ValueError("No JSON object could be decoded")

329 raise ValueError("No JSON object could be decoded")

339 return obj, end

330 return obj, end

OLD

NEW