[issue4136] merge json library with simplejson 2.0.3 - Code Review (original) (raw)

OLD

NEW

1 #include "Python.h"

1 #include "Python.h"

2 #include "structmember.h"

2

3

3 #define DEFAULT_ENCODING "utf-8"

4 #define DEFAULT_ENCODING "utf-8"

5 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)

6 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)

7 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)

8 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)

9

10 static PyTypeObject PyScannerType;

11 static PyTypeObject PyEncoderType;

12

13 typedef struct _PyScannerObject {

14 PyObject_HEAD

15 PyObject *encoding;

16 PyObject *strict;

17 PyObject *object_hook;

18 PyObject *parse_float;

19 PyObject *parse_int;

20 PyObject *parse_constant;

21 } PyScannerObject;

22

23 static PyMemberDef scanner_members[] = {

24 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encod ing"},

25 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},

26 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},

27 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},

28 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "par se_int"},

29 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READ ONLY, "parse_constant"},

30 {NULL}

31 };

32

33 typedef struct _PyEncoderObject {

34 PyObject_HEAD

35 PyObject *markers;

36 PyObject *defaultfn;

37 PyObject *encoder;

38 PyObject *indent;

39 PyObject *key_separator;

40 PyObject *item_separator;

41 PyObject *sort_keys;

42 PyObject *skipkeys;

43 int fast_encode;

44 int allow_nan;

45 } PyEncoderObject;

46

47 static PyMemberDef encoder_members[] = {

48 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers "},

49 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "defau lt"},

50 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder "},

51 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},

52 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READON LY, "key_separator"},

53 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READ ONLY, "item_separator"},

54 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sor t_keys"},

55 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipk eys"},

56 {NULL}

57 };

58

59 static Py_ssize_t

60 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);

61 static PyObject *

62 ascii_escape_unicode(PyObject *pystr);

63 static PyObject *

64 ascii_escape_str(PyObject *pystr);

65 static PyObject *

66 py_encode_basestring_ascii(PyObject* self, PyObject *pystr);

67 void init_json(void);

68 static PyObject *

69 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr);

70 static PyObject *

71 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr);

72 static PyObject *

73 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);

74 static int

75 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);

76 static void

77 scanner_dealloc(PyObject *self);

78 static int

79 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);

80 static void

81 encoder_dealloc(PyObject *self);

82 static int

83 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level);

84 static int

85 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level);

86 static int

87 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level);

88 static PyObject *

89 _encoded_const(PyObject *const);

90 static void

91 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);

92 static PyObject *

93 encoder_encode_string(PyEncoderObject *s, PyObject *obj);

94 static int

95 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);

96 static PyObject *

97 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);

98 static PyObject *

99 encoder_encode_float(PyEncoderObject *s, PyObject *obj);

100

4 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')

101 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')

102 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))

103

5 #define MIN_EXPANSION 6

104 #define MIN_EXPANSION 6

6

105

7 #ifdef Py_UNICODE_WIDE

106 #ifdef Py_UNICODE_WIDE

8 #define MAX_EXPANSION (2 * MIN_EXPANSION)

107 #define MAX_EXPANSION (2 * MIN_EXPANSION)

9 #else

108 #else

10 #define MAX_EXPANSION MIN_EXPANSION

109 #define MAX_EXPANSION MIN_EXPANSION

11 #endif

110 #endif

12

111

112 static int

113 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)

114 {

115 *size_ptr = PyInt_AsSsize_t(o);

116 if (*size_ptr == -1 && PyErr_Occurred());

117 return 1;

118 return 0;

119 }

120

121 static PyObject *

122 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)

123 {

124 return PyInt_FromSsize_t(*size_ptr);

125 }

126

13 static Py_ssize_t

127 static Py_ssize_t

14 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)

128 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)

15 {

129 {

16 Py_UNICODE x;

17 output[chars++] = '\\';

130 output[chars++] = '\\';

18 switch (c) {

131 switch (c) {

19 case '\\': output[chars++] = (char)c; break;

132 case '\\': output[chars++] = (char)c; break;

20 case '"': output[chars++] = (char)c; break;

133 case '"': output[chars++] = (char)c; break;

21 case '\b': output[chars++] = 'b'; break;

134 case '\b': output[chars++] = 'b'; break;

22 case '\f': output[chars++] = 'f'; break;

135 case '\f': output[chars++] = 'f'; break;

23 case '\n': output[chars++] = 'n'; break;

136 case '\n': output[chars++] = 'n'; break;

24 case '\r': output[chars++] = 'r'; break;

137 case '\r': output[chars++] = 'r'; break;

25 case '\t': output[chars++] = 't'; break;

138 case '\t': output[chars++] = 't'; break;

26 default:

139 default:

27 #ifdef Py_UNICODE_WIDE

140 #ifdef Py_UNICODE_WIDE

28 if (c >= 0x10000) {

141 if (c >= 0x10000) {

29 /* UTF-16 surrogate pair */

142 /* UTF-16 surrogate pair */

30 Py_UNICODE v = c - 0x10000;

143 Py_UNICODE v = c - 0x10000;

31 c = 0xd800 | ((v >> 10) & 0x3ff);

144 c = 0xd800 | ((v >> 10) & 0x3ff);

32 output[chars++] = 'u';

145 output[chars++] = 'u';

33 x = (c & 0xf000) >> 12;

146 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];

34 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

147 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];

35 x = (c & 0x0f00) >> 8;

148 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];

36 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

149 output[chars++] = "0123456789abcdef"[(c ) & 0xf];

37 x = (c & 0x00f0) >> 4;

38 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

39 x = (c & 0x000f);

40 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

41 c = 0xdc00 | (v & 0x3ff);

150 c = 0xdc00 | (v & 0x3ff);

42 output[chars++] = '\\';

151 output[chars++] = '\\';

43 }

152 }

44 #endif

153 #endif

45 output[chars++] = 'u';

154 output[chars++] = 'u';

46 x = (c & 0xf000) >> 12;

155 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];

47 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

156 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];

48 x = (c & 0x0f00) >> 8;

157 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];

49 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

158 output[chars++] = "0123456789abcdef"[(c ) & 0xf];

50 x = (c & 0x00f0) >> 4;

51 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

52 x = (c & 0x000f);

53 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);

54 }

159 }

55 return chars;

160 return chars;

56 }

161 }

57

162

58 static PyObject *

163 static PyObject *

59 ascii_escape_unicode(PyObject *pystr)

164 ascii_escape_unicode(PyObject *pystr)

60 {

165 {

61 Py_ssize_t i;

166 Py_ssize_t i;

62 Py_ssize_t input_chars;

167 Py_ssize_t input_chars;

63 Py_ssize_t output_size;

168 Py_ssize_t output_size;

64 Py_ssize_t chars;

169 Py_ssize_t chars;

65 PyObject *rval;

170 PyObject *rval;

66 char *output;

171 char *output;

67 Py_UNICODE *input_unicode;

172 Py_UNICODE *input_unicode;

68

173

69 input_chars = PyUnicode_GET_SIZE(pystr);

174 input_chars = PyUnicode_GET_SIZE(pystr);

70 input_unicode = PyUnicode_AS_UNICODE(pystr);

175 input_unicode = PyUnicode_AS_UNICODE(pystr);

176

71 /* One char input can be up to 6 chars output, estimate 4 of these */

177 /* One char input can be up to 6 chars output, estimate 4 of these */

72 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;

178 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;

73 rval = PyString_FromStringAndSize(NULL, output_size);

179 rval = PyString_FromStringAndSize(NULL, output_size);

74 if (rval == NULL) {

180 if (rval == NULL) {

75 return NULL;

181 return NULL;

76 }

182 }

77 output = PyString_AS_STRING(rval);

183 output = PyString_AS_STRING(rval);

78 chars = 0;

184 chars = 0;

79 output[chars++] = '"';

185 output[chars++] = '"';

80 for (i = 0; i < input_chars; i++) {

186 for (i = 0; i < input_chars; i++) {

81 Py_UNICODE c = input_unicode[i];

187 Py_UNICODE c = input_unicode[i];

82 if (S_CHAR(c)) {

188 if (S_CHAR(c)) {

83 output[chars++] = (char)c;

189 output[chars++] = (char)c;

84 }

190 }

85 » else {

191 else {

86 chars = ascii_escape_char(c, output, chars);

192 chars = ascii_escape_char(c, output, chars);

87 }

193 }

88 if (output_size - chars < (1 + MAX_EXPANSION)) {

194 if (output_size - chars < (1 + MAX_EXPANSION)) {

89 /* There's more than four, so let's resize by a lot */

195 /* There's more than four, so let's resize by a lot */

90 output_size *= 2;

196 output_size *= 2;

91 /* This is an upper bound */

197 /* This is an upper bound */

92 if (output_size > 2 + (input_chars * MAX_EXPANSION)) {

198 if (output_size > 2 + (input_chars * MAX_EXPANSION)) {

93 output_size = 2 + (input_chars * MAX_EXPANSION);

199 output_size = 2 + (input_chars * MAX_EXPANSION);

94 }

200 }

95 if (_PyString_Resize(&rval, output_size) == -1) {

201 if (_PyString_Resize(&rval, output_size) == -1) {

96 return NULL;

202 return NULL;

97 }

203 }

98 output = PyString_AS_STRING(rval);

204 output = PyString_AS_STRING(rval);

99 }

205 }

100 }

206 }

101 output[chars++] = '"';

207 output[chars++] = '"';

102 if (_PyString_Resize(&rval, chars) == -1) {

208 if (_PyString_Resize(&rval, chars) == -1) {

103 return NULL;

209 return NULL;

104 }

210 }

105 return rval;

211 return rval;

106 }

212 }

107

213

108 static PyObject *

214 static PyObject *

109 ascii_escape_str(PyObject *pystr)

215 ascii_escape_str(PyObject *pystr)

110 {

216 {

111 Py_ssize_t i;

217 Py_ssize_t i;

112 Py_ssize_t input_chars;

218 Py_ssize_t input_chars;

113 Py_ssize_t output_size;

219 Py_ssize_t output_size;

114 Py_ssize_t chars;

220 Py_ssize_t chars;

115 PyObject *rval;

221 PyObject *rval;

116 char *output;

222 char *output;

117 char *input_str;

223 char *input_str;

118

224

119 input_chars = PyString_GET_SIZE(pystr);

225 input_chars = PyString_GET_SIZE(pystr);

120 input_str = PyString_AS_STRING(pystr);

226 input_str = PyString_AS_STRING(pystr);

121 /* One char input can be up to 6 chars output, estimate 4 of these */

227

122 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;

228 /* Fast path for a string that's already ASCII */

229 for (i = 0; i < input_chars; i++) {

230 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];

231 if (!S_CHAR(c)) {

232 /* If we have to escape something, scan the string for unicode */

233 Py_ssize_t j;

234 for (j = i; j < input_chars; j++) {

235 c = (Py_UNICODE)(unsigned char)input_str[j];

236 if (c > 0x7f) {

237 /* We hit a non-ASCII character, bail to unicode mode */

238 PyObject *uni;

239 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict") ;

240 if (uni == NULL) {

241 return NULL;

242 }

243 rval = ascii_escape_unicode(uni);

244 Py_DECREF(uni);

245 return rval;

246 }

247 }

248 break;

249 }

250 }

251

252 if (i == input_chars) {

253 /* Input is already ASCII */

254 output_size = 2 + input_chars;

255 }

256 else {

257 /* One char input can be up to 6 chars output, estimate 4 of these */

258 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;

259 }

123 rval = PyString_FromStringAndSize(NULL, output_size);

260 rval = PyString_FromStringAndSize(NULL, output_size);

124 if (rval == NULL) {

261 if (rval == NULL) {

125 return NULL;

262 return NULL;

126 }

263 }

127 output = PyString_AS_STRING(rval);

264 output = PyString_AS_STRING(rval);

128 chars = 0;

265 output[0] = '"';

129 output[chars++] = '"';

266 ····

130 for (i = 0; i < input_chars; i++) {

267 /* We know that everything up to i is ASCII already */

131 Py_UNICODE c = (Py_UNICODE)input_str[i];

268 chars = i + 1;

269 memcpy(&output[1], input_str, i);

270

271 for (; i < input_chars; i++) {

272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];

132 if (S_CHAR(c)) {

273 if (S_CHAR(c)) {

133 output[chars++] = (char)c;

274 output[chars++] = (char)c;

134 }

275 }

135 » else if (c > 0x7F) {

276 else {

136 /* We hit a non-ASCII character, bail to unicode mode */

137 PyObject *uni;

138 Py_DECREF(rval);

139 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");

140 if (uni == NULL) {

141 return NULL;

142 }

143 rval = ascii_escape_unicode(uni);

144 Py_DECREF(uni);

145 return rval;

146 }

147 » else {

148 chars = ascii_escape_char(c, output, chars);

277 chars = ascii_escape_char(c, output, chars);

149 }

278 }

150 /* An ASCII char can't possibly expand to a surrogate! */

279 /* An ASCII char can't possibly expand to a surrogate! */

151 if (output_size - chars < (1 + MIN_EXPANSION)) {

280 if (output_size - chars < (1 + MIN_EXPANSION)) {

152 /* There's more than four, so let's resize by a lot */

281 /* There's more than four, so let's resize by a lot */

153 output_size *= 2;

282 output_size *= 2;

154 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {

283 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {

155 output_size = 2 + (input_chars * MIN_EXPANSION);

284 output_size = 2 + (input_chars * MIN_EXPANSION);

156 }

285 }

157 if (_PyString_Resize(&rval, output_size) == -1) {

286 if (_PyString_Resize(&rval, output_size) == -1) {

158 return NULL;

287 return NULL;

159 }

288 }

160 output = PyString_AS_STRING(rval);

289 output = PyString_AS_STRING(rval);

161 }

290 }

162 }

291 }

163 output[chars++] = '"';

292 output[chars++] = '"';

164 if (_PyString_Resize(&rval, chars) == -1) {

293 if (_PyString_Resize(&rval, chars) == -1) {

165 return NULL;

294 return NULL;

166 }

295 }

167 return rval;

296 return rval;

168 }

297 }

169

298

170 void

299 static void

171 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)

300 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)

172 {

301 {

173 static PyObject *errmsg_fn = NULL;

302 static PyObject *errmsg_fn = NULL;

174 PyObject *pymsg;

303 PyObject *pymsg;

175 if (errmsg_fn == NULL) {

304 if (errmsg_fn == NULL) {

176 PyObject *decoder = PyImport_ImportModule("json.decoder");

305 PyObject *decoder = PyImport_ImportModule("json.decoder");

177 if (decoder == NULL)

306 if (decoder == NULL)

178 return;

307 return;

179 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");

308 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");

309 Py_DECREF(decoder);

180 if (errmsg_fn == NULL)

310 if (errmsg_fn == NULL)

181 return;

311 return;

182 Py_DECREF(decoder);

183 }

312 }

184 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);

313 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_Fro mSsize_t, &end);

185 if (pymsg) {

314 if (pymsg) {

186 PyErr_SetObject(PyExc_ValueError, pymsg);

315 PyErr_SetObject(PyExc_ValueError, pymsg);

187 Py_DECREF(pymsg);

316 Py_DECREF(pymsg);

188 }

317 }

189 /*

190

191 def linecol(doc, pos):

192 lineno = doc.count('\n', 0, pos) + 1

193 if lineno == 1:

194 colno = pos

195 else:

196 colno = pos - doc.rindex('\n', 0, pos)

197 return lineno, colno

198

199 def errmsg(msg, doc, pos, end=None):

200 lineno, colno = linecol(doc, pos)

201 if end is None:

202 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)

203 endlineno, endcolno = linecol(doc, end)

204 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (

205 msg, lineno, colno, endlineno, endcolno, pos, end)

206

207 */

208 }

318 }

209

319

210 static PyObject *

320 static PyObject *

211 join_list_unicode(PyObject *lst)

321 join_list_string(PyObject *lst)

212 {

322 {

213 static PyObject *ustr = NULL;

323 static PyObject *joinfn = NULL;

214 static PyObject *joinstr = NULL;

324 if (joinfn == NULL) {

215 if (ustr == NULL) {

325 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);

216 Py_UNICODE c = 0;

326 if (ustr == NULL)

217 ustr = PyUnicode_FromUnicode(&c, 0);

327 return NULL;

328 ········

329 joinfn = PyObject_GetAttrString(ustr, "join");

330 Py_DECREF(ustr);

331 if (joinfn == NULL)

332 return NULL;

218 }

333 }

219 if (joinstr == NULL) {

334 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);

220 joinstr = PyString_InternFromString("join");

221 }

222 if (joinstr == NULL || ustr == NULL) {

223 return NULL;

224 }

225 return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);

226 }

335 }

227

336

228 static PyObject *

337 static PyObject *

229 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict)

338 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {

339 PyObject *tpl;

340 PyObject *pyidx;

341 /*

342 steal a reference to rval, returns (rval, idx)

343 */

344 if (rval == NULL) {

345 return NULL;

346 }

347 pyidx = PyInt_FromSsize_t(idx);

348 if (pyidx == NULL) {

349 Py_DECREF(rval);

350 return NULL;

351 }

352 tpl = PyTuple_New(2);

353 if (tpl == NULL) {

354 Py_DECREF(pyidx);

355 Py_DECREF(rval);

356 return NULL;

357 }

358 PyTuple_SET_ITEM(tpl, 0, rval);

359 PyTuple_SET_ITEM(tpl, 1, pyidx);

360 return tpl;

361 }

362

363 static PyObject *

364 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s size_t *next_end_ptr)

230 {

365 {

231 PyObject *rval;

366 PyObject *rval;

232 Py_ssize_t len = PyString_GET_SIZE(pystr);

367 Py_ssize_t len = PyString_GET_SIZE(pystr);

233 Py_ssize_t begin = end - 1;

368 Py_ssize_t begin = end - 1;

234 Py_ssize_t next = begin;

369 Py_ssize_t next = begin;

370 int has_unicode = 0;

235 char *buf = PyString_AS_STRING(pystr);

371 char *buf = PyString_AS_STRING(pystr);

236 PyObject *chunks = PyList_New(0);

372 PyObject *chunks = PyList_New(0);

237 if (chunks == NULL) {

373 if (chunks == NULL) {

238 goto bail;

374 goto bail;

239 }

375 }

240 if (end < 0 || len <= end) {

376 if (end < 0 || len <= end) {

241 PyErr_SetString(PyExc_ValueError, "end is out of bounds");

377 PyErr_SetString(PyExc_ValueError, "end is out of bounds");

242 goto bail;

378 goto bail;

243 }

379 }

244 while (1) {

380 while (1) {

245 /* Find the end of the string or the next escape */

381 /* Find the end of the string or the next escape */

246 Py_UNICODE c = 0;

382 Py_UNICODE c = 0;

247 PyObject *chunk = NULL;

383 PyObject *chunk = NULL;

248 for (next = end; next < len; next++) {

384 for (next = end; next < len; next++) {

249 c = buf[next];

385 c = (unsigned char)buf[next];

250 if (c == '"' || c == '\\') {

386 if (c == '"' || c == '\\') {

251 break;

387 break;

252 }

388 }

253 else if (strict && c <= 0x1f) {

389 else if (strict && c <= 0x1f) {

254 raise_errmsg("Invalid control character at", pystr, next);

390 raise_errmsg("Invalid control character at", pystr, next);

255 goto bail;

391 goto bail;

256 }

392 }

393 else if (c > 0x7f) {

394 has_unicode = 1;

395 }

257 }

396 }

258 if (!(c == '"' || c == '\\')) {

397 if (!(c == '"' || c == '\\')) {

259 raise_errmsg("Unterminated string starting at", pystr, begin);

398 raise_errmsg("Unterminated string starting at", pystr, begin);

260 goto bail;

399 goto bail;

261 }

400 }

262 /* Pick up this chunk if it's not zero length */

401 /* Pick up this chunk if it's not zero length */

263 if (next != end) {

402 if (next != end) {

264 PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end);

403 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - en d);

265 if (strchunk == NULL) {

404 if (strchunk == NULL) {

266 goto bail;

405 goto bail;

267 }

406 }

268 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);

407 if (has_unicode) {

269 Py_DECREF(strchunk);

408 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);

270 if (chunk == NULL) {

409 Py_DECREF(strchunk);

271 goto bail;

410 if (chunk == NULL) {

411 goto bail;

412 }

413 }

414 else {

415 chunk = strchunk;

272 }

416 }

273 if (PyList_Append(chunks, chunk)) {

417 if (PyList_Append(chunks, chunk)) {

274 Py_DECREF(chunk);

418 Py_DECREF(chunk);

275 goto bail;

419 goto bail;

276 }

420 }

277 Py_DECREF(chunk);

421 Py_DECREF(chunk);

278 }

422 }

279 next++;

423 next++;

280 if (c == '"') {

424 if (c == '"') {

281 end = next;

425 end = next;

(...skipping 26 matching lines...) Expand all Loading...

308 else {

452 else {

309 c = 0;

453 c = 0;

310 next++;

454 next++;

311 end = next + 4;

455 end = next + 4;

312 if (end >= len) {

456 if (end >= len) {

313 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);

457 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);

314 goto bail;

458 goto bail;

315 }

459 }

316 /* Decode 4 hex digits */

460 /* Decode 4 hex digits */

317 for (; next < end; next++) {

461 for (; next < end; next++) {

318 Py_ssize_t shl = (end - next - 1) << 2;

319 Py_UNICODE digit = buf[next];

462 Py_UNICODE digit = buf[next];

463 c <<= 4;

320 switch (digit) {

464 switch (digit) {

321 case '0': case '1': case '2': case '3': case '4':

465 case '0': case '1': case '2': case '3': case '4':

322 case '5': case '6': case '7': case '8': case '9':

466 case '5': case '6': case '7': case '8': case '9':

323 c |= (digit - '0') << shl; break;

467 c |= (digit - '0'); break;

324 case 'a': case 'b': case 'c': case 'd': case 'e':

468 case 'a': case 'b': case 'c': case 'd': case 'e':

325 case 'f':

469 case 'f':

326 c |= (digit - 'a' + 10) << shl; break;

470 c |= (digit - 'a' + 10); break;

327 case 'A': case 'B': case 'C': case 'D': case 'E':

471 case 'A': case 'B': case 'C': case 'D': case 'E':

328 case 'F':

472 case 'F':

329 c |= (digit - 'A' + 10) << shl; break;

473 c |= (digit - 'A' + 10); break;

330 default:

474 default:

331 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

475 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

332 goto bail;

476 goto bail;

333 }

477 }

334 }

478 }

335 #ifdef Py_UNICODE_WIDE

479 #ifdef Py_UNICODE_WIDE

336 /* Surrogate pair */

480 /* Surrogate pair */

337 if (c >= 0xd800 && c <= 0xdbff) {

481 if ((c & 0xfc00) == 0xd800) {

338 Py_UNICODE c2 = 0;

482 Py_UNICODE c2 = 0;

339 if (end + 6 >= len) {

483 if (end + 6 >= len) {

340 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,

484 raise_errmsg("Unpaired high surrogate", pystr, end - 5);

341 end - 5);

485 goto bail;

342 }

486 }

343 if (buf[next++] != '\\' || buf[next++] != 'u') {

487 if (buf[next++] != '\\' || buf[next++] != 'u') {

344 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,

488 raise_errmsg("Unpaired high surrogate", pystr, end - 5);

345 end - 5);

489 goto bail;

346 }

490 }

347 end += 6;

491 end += 6;

348 /* Decode 4 hex digits */

492 /* Decode 4 hex digits */

349 for (; next < end; next++) {

493 for (; next < end; next++) {

350 Py_ssize_t shl = (end - next - 1) << 2;

494 c2 <<= 4;

351 Py_UNICODE digit = buf[next];

495 Py_UNICODE digit = buf[next];

352 switch (digit) {

496 switch (digit) {

353 case '0': case '1': case '2': case '3': case '4':

497 case '0': case '1': case '2': case '3': case '4':

354 case '5': case '6': case '7': case '8': case '9':

498 case '5': case '6': case '7': case '8': case '9':

355 c2 |= (digit - '0') << shl; break;

499 c2 |= (digit - '0'); break;

356 case 'a': case 'b': case 'c': case 'd': case 'e':

500 case 'a': case 'b': case 'c': case 'd': case 'e':

357 case 'f':

501 case 'f':

358 c2 |= (digit - 'a' + 10) << shl; break;

502 c2 |= (digit - 'a' + 10); break;

359 case 'A': case 'B': case 'C': case 'D': case 'E':

503 case 'A': case 'B': case 'C': case 'D': case 'E':

360 case 'F':

504 case 'F':

361 c2 |= (digit - 'A' + 10) << shl; break;

505 c2 |= (digit - 'A' + 10); break;

362 default:

506 default:

363 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

507 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

364 goto bail;

508 goto bail;

365 }

509 }

366 }

510 }

511 if ((c2 & 0xfc00) != 0xdc00) {

512 raise_errmsg("Unpaired high surrogate", pystr, end - 5);

513 goto bail;

514 }

367 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));

515 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));

368 }

516 }

517 else if ((c & 0xfc00) == 0xdc00) {

518 raise_errmsg("Unpaired low surrogate", pystr, end - 5);

519 goto bail;

520 }

369 #endif

521 #endif

370 }

522 }

371 chunk = PyUnicode_FromUnicode(&c, 1);

523 if (c > 0x7f) {

372 if (chunk == NULL) {

524 has_unicode = 1;

373 goto bail;

525 }

526 if (has_unicode) {

527 chunk = PyUnicode_FromUnicode(&c, 1);

528 if (chunk == NULL) {

529 goto bail;

530 }

531 }

532 else {

533 char c_char = Py_CHARMASK(c);

534 chunk = PyString_FromStringAndSize(&c_char, 1);

535 if (chunk == NULL) {

536 goto bail;

537 }

374 }

538 }

375 if (PyList_Append(chunks, chunk)) {

539 if (PyList_Append(chunks, chunk)) {

376 Py_DECREF(chunk);

540 Py_DECREF(chunk);

377 goto bail;

541 goto bail;

378 }

542 }

379 Py_DECREF(chunk);

543 Py_DECREF(chunk);

380 }

544 }

381

545

382 rval = join_list_unicode(chunks);

546 rval = join_list_string(chunks);

383 if (rval == NULL) {

547 if (rval == NULL) {

384 goto bail;

548 goto bail;

385 }

549 }

386 Py_CLEAR(chunks);

550 Py_CLEAR(chunks);

387 return Py_BuildValue("(Nn)", rval, end);

551 *next_end_ptr = end;

552 return rval;

388 bail:

553 bail:

554 *next_end_ptr = -1;

389 Py_XDECREF(chunks);

555 Py_XDECREF(chunks);

390 return NULL;

556 return NULL;

391 }

557 }

392

558

393

559

394 static PyObject *

560 static PyObject *

395 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict)

561 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next _end_ptr)

396 {

562 {

397 PyObject *rval;

563 PyObject *rval;

398 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);

564 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);

399 Py_ssize_t begin = end - 1;

565 Py_ssize_t begin = end - 1;

400 Py_ssize_t next = begin;

566 Py_ssize_t next = begin;

401 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);

567 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);

402 PyObject *chunks = PyList_New(0);

568 PyObject *chunks = PyList_New(0);

403 if (chunks == NULL) {

569 if (chunks == NULL) {

404 goto bail;

570 goto bail;

405 }

571 }

(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...

469 else {

635 else {

470 c = 0;

636 c = 0;

471 next++;

637 next++;

472 end = next + 4;

638 end = next + 4;

473 if (end >= len) {

639 if (end >= len) {

474 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);

640 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);

475 goto bail;

641 goto bail;

476 }

642 }

477 /* Decode 4 hex digits */

643 /* Decode 4 hex digits */

478 for (; next < end; next++) {

644 for (; next < end; next++) {

479 Py_ssize_t shl = (end - next - 1) << 2;

480 Py_UNICODE digit = buf[next];

645 Py_UNICODE digit = buf[next];

646 c <<= 4;

481 switch (digit) {

647 switch (digit) {

482 case '0': case '1': case '2': case '3': case '4':

648 case '0': case '1': case '2': case '3': case '4':

483 case '5': case '6': case '7': case '8': case '9':

649 case '5': case '6': case '7': case '8': case '9':

484 c |= (digit - '0') << shl; break;

650 c |= (digit - '0'); break;

485 case 'a': case 'b': case 'c': case 'd': case 'e':

651 case 'a': case 'b': case 'c': case 'd': case 'e':

486 case 'f':

652 case 'f':

487 c |= (digit - 'a' + 10) << shl; break;

653 c |= (digit - 'a' + 10); break;

488 case 'A': case 'B': case 'C': case 'D': case 'E':

654 case 'A': case 'B': case 'C': case 'D': case 'E':

489 case 'F':

655 case 'F':

490 c |= (digit - 'A' + 10) << shl; break;

656 c |= (digit - 'A' + 10); break;

491 default:

657 default:

492 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

658 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

493 goto bail;

659 goto bail;

494 }

660 }

495 }

661 }

496 #ifdef Py_UNICODE_WIDE

662 #ifdef Py_UNICODE_WIDE

497 /* Surrogate pair */

663 /* Surrogate pair */

498 if (c >= 0xd800 && c <= 0xdbff) {

664 if ((c & 0xfc00) == 0xd800) {

499 Py_UNICODE c2 = 0;

665 Py_UNICODE c2 = 0;

500 if (end + 6 >= len) {

666 if (end + 6 >= len) {

501 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,

667 raise_errmsg("Unpaired high surrogate", pystr, end - 5);

502 end - 5);

668 goto bail;

503 }

669 }

504 if (buf[next++] != '\\' || buf[next++] != 'u') {

670 if (buf[next++] != '\\' || buf[next++] != 'u') {

505 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,

671 raise_errmsg("Unpaired high surrogate", pystr, end - 5);

506 end - 5);

672 goto bail;

507 }

673 }

508 end += 6;

674 end += 6;

509 /* Decode 4 hex digits */

675 /* Decode 4 hex digits */

510 for (; next < end; next++) {

676 for (; next < end; next++) {

511 Py_ssize_t shl = (end - next - 1) << 2;

677 c2 <<= 4;

512 Py_UNICODE digit = buf[next];

678 Py_UNICODE digit = buf[next];

513 switch (digit) {

679 switch (digit) {

514 case '0': case '1': case '2': case '3': case '4':

680 case '0': case '1': case '2': case '3': case '4':

515 case '5': case '6': case '7': case '8': case '9':

681 case '5': case '6': case '7': case '8': case '9':

516 c2 |= (digit - '0') << shl; break;

682 c2 |= (digit - '0'); break;

517 case 'a': case 'b': case 'c': case 'd': case 'e':

683 case 'a': case 'b': case 'c': case 'd': case 'e':

518 case 'f':

684 case 'f':

519 c2 |= (digit - 'a' + 10) << shl; break;

685 c2 |= (digit - 'a' + 10); break;

520 case 'A': case 'B': case 'C': case 'D': case 'E':

686 case 'A': case 'B': case 'C': case 'D': case 'E':

521 case 'F':

687 case 'F':

522 c2 |= (digit - 'A' + 10) << shl; break;

688 c2 |= (digit - 'A' + 10); break;

523 default:

689 default:

524 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

690 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);

525 goto bail;

691 goto bail;

526 }

692 }

527 }

693 }

694 if ((c2 & 0xfc00) != 0xdc00) {

695 raise_errmsg("Unpaired high surrogate", pystr, end - 5);

696 goto bail;

697 }

528 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));

698 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));

529 }

699 }

700 else if ((c & 0xfc00) == 0xdc00) {

701 raise_errmsg("Unpaired low surrogate", pystr, end - 5);

702 goto bail;

703 }

530 #endif

704 #endif

531 }

705 }

532 chunk = PyUnicode_FromUnicode(&c, 1);

706 chunk = PyUnicode_FromUnicode(&c, 1);

533 if (chunk == NULL) {

707 if (chunk == NULL) {

534 goto bail;

708 goto bail;

535 }

709 }

536 if (PyList_Append(chunks, chunk)) {

710 if (PyList_Append(chunks, chunk)) {

537 Py_DECREF(chunk);

711 Py_DECREF(chunk);

538 goto bail;

712 goto bail;

539 }

713 }

540 Py_DECREF(chunk);

714 Py_DECREF(chunk);

541 }

715 }

542

716

543 rval = join_list_unicode(chunks);

717 rval = join_list_string(chunks);

544 if (rval == NULL) {

718 if (rval == NULL) {

545 goto bail;

719 goto bail;

546 }

720 }

547 Py_CLEAR(chunks);

721 Py_DECREF(chunks);

548 return Py_BuildValue("(Nn)", rval, end);

722 *next_end_ptr = end;

723 return rval;

549 bail:

724 bail:

725 *next_end_ptr = -1;

550 Py_XDECREF(chunks);

726 Py_XDECREF(chunks);

551 return NULL;

727 return NULL;

552 }

728 }

553

729

554 PyDoc_STRVAR(pydoc_scanstring,

730 PyDoc_STRVAR(pydoc_scanstring,

555 "scanstring(basestring, end, encoding) -> (str, end)\n");

731 "scanstring(basestring, end, encoding) -> (str, end)\n"

732 "\n"

733 "..."

734 );

556

735

557 static PyObject *

736 static PyObject *

558 py_scanstring(PyObject* self, PyObject *args)

737 py_scanstring(PyObject* self, PyObject *args)

559 {

738 {

560 PyObject *pystr;

739 PyObject *pystr;

740 PyObject *rval;

561 Py_ssize_t end;

741 Py_ssize_t end;

742 Py_ssize_t next_end = -1;

562 char *encoding = NULL;

743 char *encoding = NULL;

563 int strict = 0;

744 int strict = 0;

564 if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &st rict)) {

745 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsi ze_t, &end, &encoding, &strict)) {

565 return NULL;

746 return NULL;

566 }

747 }

567 if (encoding == NULL) {

748 if (encoding == NULL) {

568 encoding = DEFAULT_ENCODING;

749 encoding = DEFAULT_ENCODING;

569 }

750 }

570 if (PyString_Check(pystr)) {

751 if (PyString_Check(pystr)) {

571 return scanstring_str(pystr, end, encoding, strict);

752 rval = scanstring_str(pystr, end, encoding, strict, &next_end);

572 }

753 }

573 else if (PyUnicode_Check(pystr)) {

754 else if (PyUnicode_Check(pystr)) {

574 return scanstring_unicode(pystr, end, strict);

755 rval = scanstring_unicode(pystr, end, strict, &next_end);

575 }

756 }

576 else {

757 else {

577 PyErr_Format(PyExc_TypeError,

758 PyErr_Format(PyExc_TypeError,

578 "first argument must be a string or unicode, not %.80s",

759 "first argument must be a string, not %.80s",

579 Py_TYPE(pystr)->tp_name);

760 Py_TYPE(pystr)->tp_name);

580 return NULL;

761 return NULL;

581 }

762 }

763 return _build_rval_index_tuple(rval, next_end);

582 }

764 }

583

765

584 PyDoc_STRVAR(pydoc_encode_basestring_ascii,

766 PyDoc_STRVAR(pydoc_encode_basestring_ascii,

585 "encode_basestring_ascii(basestring) -> str\n");

767 "encode_basestring_ascii(basestring) -> str\n"

768 "\n"

769 "..."

770 );

586

771

587 static PyObject *

772 static PyObject *

588 py_encode_basestring_ascii(PyObject* self, PyObject *pystr)

773 py_encode_basestring_ascii(PyObject* self, PyObject *pystr)

589 {

774 {

590 /* METH_O */

775 /* METH_O */

591 if (PyString_Check(pystr)) {

776 if (PyString_Check(pystr)) {

592 return ascii_escape_str(pystr);

777 return ascii_escape_str(pystr);

593 }

778 }

594 else if (PyUnicode_Check(pystr)) {

779 else if (PyUnicode_Check(pystr)) {

595 return ascii_escape_unicode(pystr);

780 return ascii_escape_unicode(pystr);

596 }

781 }

597 else {

782 else {

598 PyErr_Format(PyExc_TypeError,

783 PyErr_Format(PyExc_TypeError,

599 "first argument must be a string or unicode, not %.80s",

784 "first argument must be a string, not %.80s",

600 Py_TYPE(pystr)->tp_name);

785 Py_TYPE(pystr)->tp_name);

601 return NULL;

786 return NULL;

602 }

787 }

603 }

788 }

604

789

790 static void

791 scanner_dealloc(PyObject *self)

792 {

793 PyScannerObject *s;

794 assert(PyScanner_Check(self));

795 s = (PyScannerObject *)self;

796 Py_CLEAR(s->encoding);

797 Py_CLEAR(s->strict);

798 Py_CLEAR(s->object_hook);

799 Py_CLEAR(s->parse_float);

800 Py_CLEAR(s->parse_int);

801 Py_CLEAR(s->parse_constant);

802 self->ob_type->tp_free(self);

803 }

804

805 static PyObject *

806 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr) {

807 char *str = PyString_AS_STRING(pystr);

808 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;

809 PyObject *rval = PyDict_New();

810 PyObject *key = NULL;

811 PyObject *val = NULL;

812 char *encoding = PyString_AS_STRING(s->encoding);

813 int strict = PyObject_IsTrue(s->strict);

814 Py_ssize_t next_idx;

815 if (rval == NULL)

816 return NULL;

817

818 /* skip whitespace after { */

819 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

820

821 /* only loop if the object is non-empty */

822 if (idx <= end_idx && str[idx] != '}') {

823 while (idx <= end_idx) {

824 /* read key */

825 if (str[idx] != '"') {

826 raise_errmsg("Expecting property name", pystr, idx);

827 goto bail;

828 }

829 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);

830 if (key == NULL)

831 goto bail;

832 idx = next_idx;

833 ············

834 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */

835 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

836 if (idx > end_idx || str[idx] != ':') {

837 raise_errmsg("Expecting : delimiter", pystr, idx);

838 goto bail;

839 }

840 idx++;

841 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

842 ············

843 /* read any JSON data type */

844 val = scan_once_str(s, pystr, idx, &next_idx);

845 if (val == NULL)

846 goto bail;

847

848 if (PyDict_SetItem(rval, key, val) == -1)

849 goto bail;

850

851 Py_CLEAR(key);

852 Py_CLEAR(val);

853 idx = next_idx;

854 ············

855 /* skip whitespace before } or , */

856 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

857

858 /* bail if the object is closed or we didn't get the , delimiter */

859 if (idx > end_idx) break;

860 if (str[idx] == '}') {

861 break;

862 }

863 else if (str[idx] != ',') {

864 raise_errmsg("Expecting , delimiter", pystr, idx);

865 goto bail;

866 }

867 idx++;

868

869 /* skip whitespace after , delimiter */

870 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

871 }

872 }

873 /* verify that idx < end_idx, str[idx] should be '}' */

874 if (idx > end_idx || str[idx] != '}') {

875 raise_errmsg("Expecting object", pystr, end_idx);

876 goto bail;

877 }

878 /* if object_hook is not None: rval = object_hook(rval) */

879 if (s->object_hook != Py_None) {

880 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);

881 if (val == NULL)

882 goto bail;

883 Py_DECREF(rval);

884 rval = val;

885 val = NULL;

886 }

887 *next_idx_ptr = idx + 1;

888 return rval;

889 bail:

890 Py_XDECREF(key);

891 Py_XDECREF(val);

892 Py_DECREF(rval);

893 return NULL;····

894 }

895

896 static PyObject *

897 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ize_t *next_idx_ptr) {

898 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);

899 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;

900 PyObject *val = NULL;

901 PyObject *rval = PyDict_New();

902 PyObject *key = NULL;

903 int strict = PyObject_IsTrue(s->strict);

904 Py_ssize_t next_idx;

905 if (rval == NULL)

906 return NULL;

907 ····

908 /* skip whitespace after { */

909 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

910

911 /* only loop if the object is non-empty */

912 if (idx <= end_idx && str[idx] != '}') {

913 while (idx <= end_idx) {

914 /* read key */

915 if (str[idx] != '"') {

916 raise_errmsg("Expecting property name", pystr, idx);

917 goto bail;

918 }

919 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);

920 if (key == NULL)

921 goto bail;

922 idx = next_idx;

923

924 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */

925 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

926 if (idx > end_idx || str[idx] != ':') {

927 raise_errmsg("Expecting : delimiter", pystr, idx);

928 goto bail;

929 }

930 idx++;

931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

932 ············

933 /* read any JSON term */

934 val = scan_once_unicode(s, pystr, idx, &next_idx);

935 if (val == NULL)

936 goto bail;

937

938 if (PyDict_SetItem(rval, key, val) == -1)

939 goto bail;

940

941 Py_CLEAR(key);

942 Py_CLEAR(val);

943 idx = next_idx;

944

945 /* skip whitespace before } or , */

946 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

947

948 /* bail if the object is closed or we didn't get the , delimiter */

949 if (idx > end_idx) break;

950 if (str[idx] == '}') {

951 break;

952 }

953 else if (str[idx] != ',') {

954 raise_errmsg("Expecting , delimiter", pystr, idx);

955 goto bail;

956 }

957 idx++;

958

959 /* skip whitespace after , delimiter */

960 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

961 }

962 }

963

964 /* verify that idx < end_idx, str[idx] should be '}' */

965 if (idx > end_idx || str[idx] != '}') {

966 raise_errmsg("Expecting object", pystr, end_idx);

967 goto bail;

968 }

969

970 /* if object_hook is not None: rval = object_hook(rval) */

971 if (s->object_hook != Py_None) {

972 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);

973 if (val == NULL)

974 goto bail;

975 Py_DECREF(rval);

976 rval = val;

977 val = NULL;

978 }

979 *next_idx_ptr = idx + 1;

980 return rval;

981 bail:

982 Py_XDECREF(key);

983 Py_XDECREF(val);

984 Py_DECREF(rval);

985 return NULL;

986 }

987

988 static PyObject *

989 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {

990 char *str = PyString_AS_STRING(pystr);

991 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;

992 PyObject *val = NULL;

993 PyObject *rval = PyList_New(0);

994 Py_ssize_t next_idx;

995 if (rval == NULL)

996 return NULL;

997

998 /* skip whitespace after [ */

999 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

1000

1001 /* only loop if the array is non-empty */

1002 if (idx <= end_idx && str[idx] != ']') {

1003 while (idx <= end_idx) {

1004

1005 /* read any JSON term and de-tuplefy the (rval, idx) */

1006 val = scan_once_str(s, pystr, idx, &next_idx);

1007 if (val == NULL)

1008 goto bail;

1009

1010 if (PyList_Append(rval, val) == -1)

1011 goto bail;

1012

1013 Py_CLEAR(val);

1014 idx = next_idx;

1015 ············

1016 /* skip whitespace between term and , */

1017 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

1018

1019 /* bail if the array is closed or we didn't get the , delimiter */

1020 if (idx > end_idx) break;

1021 if (str[idx] == ']') {

1022 break;

1023 }

1024 else if (str[idx] != ',') {

1025 raise_errmsg("Expecting , delimiter", pystr, idx);

1026 goto bail;

1027 }

1028 idx++;

1029 ············

1030 /* skip whitespace after , */

1031 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

1032 }

1033 }

1034

1035 /* verify that idx < end_idx, str[idx] should be ']' */

1036 if (idx > end_idx || str[idx] != ']') {

1037 raise_errmsg("Expecting object", pystr, end_idx);

1038 goto bail;

1039 }

1040 *next_idx_ptr = idx + 1;

1041 return rval;

1042 bail:

1043 Py_XDECREF(val);

1044 Py_DECREF(rval);

1045 return NULL;

1046 }

1047

1048 static PyObject *

1049 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ze_t *next_idx_ptr) {

1050 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);

1051 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;

1052 PyObject *val = NULL;

1053 PyObject *rval = PyList_New(0);

1054 Py_ssize_t next_idx;

1055 if (rval == NULL)

1056 return NULL;

1057

1058 /* skip whitespace after [ */

1059 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

1060

1061 /* only loop if the array is non-empty */

1062 if (idx <= end_idx && str[idx] != ']') {

1063 while (idx <= end_idx) {

1064

1065 /* read any JSON term */

1066 val = scan_once_unicode(s, pystr, idx, &next_idx);

1067 if (val == NULL)

1068 goto bail;

1069

1070 if (PyList_Append(rval, val) == -1)

1071 goto bail;

1072

1073 Py_CLEAR(val);

1074 idx = next_idx;

1075

1076 /* skip whitespace between term and , */

1077 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

1078

1079 /* bail if the array is closed or we didn't get the , delimiter */

1080 if (idx > end_idx) break;

1081 if (str[idx] == ']') {

1082 break;

1083 }

1084 else if (str[idx] != ',') {

1085 raise_errmsg("Expecting , delimiter", pystr, idx);

1086 goto bail;

1087 }

1088 idx++;

1089

1090 /* skip whitespace after , */

1091 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;

1092 }

1093 }

1094

1095 /* verify that idx < end_idx, str[idx] should be ']' */

1096 if (idx > end_idx || str[idx] != ']') {

1097 raise_errmsg("Expecting object", pystr, end_idx);

1098 goto bail;

1099 }

1100 *next_idx_ptr = idx + 1;

1101 return rval;

1102 bail:

1103 Py_XDECREF(val);

1104 Py_DECREF(rval);

1105 return NULL;

1106 }

1107

1108 static PyObject *

1109 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * next_idx_ptr) {

1110 PyObject *cstr;

1111 PyObject *rval;

1112 /* constant is "NaN", "Infinity", or "-Infinity" */

1113 cstr = PyString_InternFromString(constant);

1114 if (cstr == NULL)

1115 return NULL;

1116

1117 /* rval = parse_constant(constant) */

1118 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);

1119 idx += PyString_GET_SIZE(cstr);

1120 Py_DECREF(cstr);

1121 *next_idx_ptr = idx;

1122 return rval;

1123 }

1124

1125 static PyObject *

1126 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz e_t *next_idx_ptr) {

1127 char *str = PyString_AS_STRING(pystr);

1128 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;

1129 Py_ssize_t idx = start;

1130 int is_float = 0;

1131 PyObject *rval;

1132 PyObject *numstr;

1133 ····

1134 /* read a sign if it's there, make sure it's not the end of the string */

1135 if (str[idx] == '-') {

1136 idx++;

1137 if (idx > end_idx) {

1138 PyErr_SetNone(PyExc_StopIteration);

1139 return NULL;

1140 }

1141 }

1142

1143 /* read as many integer digits as we find as long as it doesn't start with 0 */

1144 if (str[idx] >= '1' && str[idx] <= '9') {

1145 idx++;

1146 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;

1147 }

1148 /* if it starts with 0 we only expect one integer digit */

1149 else if (str[idx] == '0') {

1150 idx++;

1151 }

1152 /* no integer digits, error */

1153 else {

1154 PyErr_SetNone(PyExc_StopIteration);

1155 return NULL;

1156 }

1157 ····

1158 /* if the next char is '.' followed by a digit then read all float digits */

1159 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {

1160 is_float = 1;

1161 idx += 2;

1162 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;

1163 }

1164

1165 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */

1166 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {

1167

1168 /* save the index of the 'e' or 'E' just in case we need to backtrack */

1169 Py_ssize_t e_start = idx;

1170 idx++;

1171

1172 /* read an exponent sign if present */

1173 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;

1174

1175 /* read all digits */

1176 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;

1177

1178 /* if we got a digit, then parse as float. if not, backtrack */

1179 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {

1180 is_float = 1;

1181 }

1182 else {

1183 idx = e_start;

1184 }

1185 }

1186 ····

1187 /* copy the section we determined to be a number */

1188 numstr = PyString_FromStringAndSize(&str[start], idx - start);

1189 if (numstr == NULL)

1190 return NULL;

1191 if (is_float) {

1192 /* parse as a float using a fast path if available, otherwise call user defined method */

1193 if (s->parse_float != (PyObject *)&PyFloat_Type) {

1194 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);

1195 }

1196 else {

1197 rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr) ));

1198 }

1199 }

1200 else {

1201 /* parse as an int using a fast path if available, otherwise call user d efined method */

1202 if (s->parse_int != (PyObject *)&PyInt_Type) {

1203 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);

1204 }

1205 else {

1206 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);

1207 }

1208 }

1209 Py_DECREF(numstr);

1210 *next_idx_ptr = idx;

1211 return rval;

1212 }

1213

1214 static PyObject *

1215 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ ssize_t *next_idx_ptr) {

1216 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);

1217 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;

1218 Py_ssize_t idx = start;

1219 int is_float = 0;

1220 PyObject *rval;

1221 PyObject *numstr;

1222

1223 /* read a sign if it's there, make sure it's not the end of the string */

1224 if (str[idx] == '-') {

1225 idx++;

1226 if (idx > end_idx) {

1227 PyErr_SetNone(PyExc_StopIteration);

1228 return NULL;

1229 }

1230 }

1231

1232 /* read as many integer digits as we find as long as it doesn't start with 0 */

1233 if (str[idx] >= '1' && str[idx] <= '9') {

1234 idx++;

1235 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;

1236 }

1237 /* if it starts with 0 we only expect one integer digit */

1238 else if (str[idx] == '0') {

1239 idx++;

1240 }

1241 /* no integer digits, error */

1242 else {

1243 PyErr_SetNone(PyExc_StopIteration);

1244 return NULL;

1245 }

1246

1247 /* if the next char is '.' followed by a digit then read all float digits */

1248 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {

1249 is_float = 1;

1250 idx += 2;

1251 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;

1252 }

1253

1254 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */

1255 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {

1256 Py_ssize_t e_start = idx;

1257 idx++;

1258

1259 /* read an exponent sign if present */

1260 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;

1261

1262 /* read all digits */

1263 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;

1264

1265 /* if we got a digit, then parse as float. if not, backtrack */

1266 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {

1267 is_float = 1;

1268 }

1269 else {

1270 idx = e_start;

1271 }

1272 }

1273

1274 /* copy the section we determined to be a number */

1275 numstr = PyUnicode_FromUnicode(&str[start], idx - start);

1276 if (numstr == NULL)

1277 return NULL;

1278 if (is_float) {

1279 /* parse as a float using a fast path if available, otherwise call user defined method */

1280 if (s->parse_float != (PyObject *)&PyFloat_Type) {

1281 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);

1282 }

1283 else {

1284 rval = PyFloat_FromString(numstr, NULL);

1285 }

1286 }

1287 else {

1288 /* no fast path for unicode -> int, just call */

1289 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);

1290 }

1291 Py_DECREF(numstr);

1292 *next_idx_ptr = idx;

1293 return rval;

1294 }

1295

1296 static PyObject *

1297 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr)

1298 {

1299 char *str = PyString_AS_STRING(pystr);

1300 Py_ssize_t length = PyString_GET_SIZE(pystr);

1301 if (idx >= length) {

1302 PyErr_SetNone(PyExc_StopIteration);

1303 return NULL;

1304 }

1305 switch (str[idx]) {

1306 case '"':

1307 /* string */

1308 return scanstring_str(pystr, idx + 1,

1309 PyString_AS_STRING(s->encoding),

1310 PyObject_IsTrue(s->strict),

1311 next_idx_ptr);

1312 case '{':

1313 /* object */

1314 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);

1315 case '[':

1316 /* array */

1317 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);

1318 case 'n':

1319 /* null */

1320 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {

1321 Py_INCREF(Py_None);

1322 *next_idx_ptr = idx + 4;

1323 return Py_None;

1324 }

1325 break;

1326 case 't':

1327 /* true */

1328 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {

1329 Py_INCREF(Py_True);

1330 *next_idx_ptr = idx + 4;

1331 return Py_True;

1332 }

1333 break;

1334 case 'f':

1335 /* false */

1336 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {

1337 Py_INCREF(Py_False);

1338 *next_idx_ptr = idx + 5;

1339 return Py_False;

1340 }

1341 break;

1342 case 'N':

1343 /* NaN */

1344 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) {

1345 return _parse_constant(s, "NaN", idx, next_idx_ptr);

1346 }

1347 break;

1348 case 'I':

1349 /* Infinity */

1350 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {

1351 return _parse_constant(s, "Infinity", idx, next_idx_ptr);

1352 }

1353 break;

1354 case '-':

1355 /* -Infinity */

1356 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {

1357 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);

1358 }

1359 break;

1360 }

1361 /* Didn't find a string, object, array, or named constant. Look for a number . */

1362 return _match_number_str(s, pystr, idx, next_idx_ptr);

1363 }

1364

1365 static PyObject *

1366 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr)

1367 {

1368 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);

1369 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);

1370 if (idx >= length) {

1371 PyErr_SetNone(PyExc_StopIteration);

1372 return NULL;

1373 }

1374 switch (str[idx]) {

1375 case '"':

1376 /* string */

1377 return scanstring_unicode(pystr, idx + 1,

1378 PyObject_IsTrue(s->strict),

1379 next_idx_ptr);

1380 case '{':

1381 /* object */

1382 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);

1383 case '[':

1384 /* array */

1385 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);

1386 case 'n':

1387 /* null */

1388 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {

1389 Py_INCREF(Py_None);

1390 *next_idx_ptr = idx + 4;

1391 return Py_None;

1392 }

1393 break;

1394 case 't':

1395 /* true */

1396 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {

1397 Py_INCREF(Py_True);

1398 *next_idx_ptr = idx + 4;

1399 return Py_True;

1400 }

1401 break;

1402 case 'f':

1403 /* false */

1404 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {

1405 Py_INCREF(Py_False);

1406 *next_idx_ptr = idx + 5;

1407 return Py_False;

1408 }

1409 break;

1410 case 'N':

1411 /* NaN */

1412 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) {

1413 return _parse_constant(s, "NaN", idx, next_idx_ptr);

1414 }

1415 break;

1416 case 'I':

1417 /* Infinity */

1418 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {

1419 return _parse_constant(s, "Infinity", idx, next_idx_ptr);

1420 }

1421 break;

1422 case '-':

1423 /* -Infinity */

1424 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {

1425 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);

1426 }

1427 break;

1428 }

1429 /* Didn't find a string, object, array, or named constant. Look for a number . */

1430 return _match_number_unicode(s, pystr, idx, next_idx_ptr);

1431 }

1432

1433 static PyObject *

1434 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)

1435 {

1436 PyObject *pystr;

1437 PyObject *rval;

1438 Py_ssize_t idx;

1439 Py_ssize_t next_idx = -1;

1440 static char *kwlist[] = {"string", "idx", NULL};

1441 PyScannerObject *s;

1442 assert(PyScanner_Check(self));

1443 s = (PyScannerObject *)self;

1444 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr , _convertPyInt_AsSsize_t, &idx))

1445 return NULL;

1446

1447 if (PyString_Check(pystr)) {

1448 rval = scan_once_str(s, pystr, idx, &next_idx);

1449 }

1450 else if (PyUnicode_Check(pystr)) {

1451 rval = scan_once_unicode(s, pystr, idx, &next_idx);

1452 }

1453 else {

1454 PyErr_Format(PyExc_TypeError,

1455 "first argument must be a string, not %.80s",

1456 Py_TYPE(pystr)->tp_name);

1457 return NULL;

1458 }

1459 return _build_rval_index_tuple(rval, next_idx);

1460 }

1461

1462 static int

1463 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)

1464 {

1465 PyObject *ctx;

1466 static char *kwlist[] = {"context", NULL};

1467 PyScannerObject *s;

1468

1469 assert(PyScanner_Check(self));

1470 s = (PyScannerObject *)self;

1471

1472 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx) )

1473 return -1;

1474

1475 s->encoding = NULL;

1476 s->strict = NULL;

1477 s->object_hook = NULL;

1478 s->parse_float = NULL;

1479 s->parse_int = NULL;

1480 s->parse_constant = NULL;

1481

1482 /* PyString_AS_STRING is used on encoding */

1483 s->encoding = PyObject_GetAttrString(ctx, "encoding");

1484 if (s->encoding == Py_None) {

1485 Py_DECREF(Py_None);

1486 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);

1487 }

1488 else if (PyUnicode_Check(s->encoding)) {

1489 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);

1490 Py_DECREF(s->encoding);

1491 s->encoding = tmp;

1492 }

1493 if (s->encoding == NULL || !PyString_Check(s->encoding))

1494 goto bail;

1495 ····

1496 /* All of these will fail "gracefully" so we don't need to verify them */

1497 s->strict = PyObject_GetAttrString(ctx, "strict");

1498 if (s->strict == NULL)

1499 goto bail;

1500 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");

1501 if (s->object_hook == NULL)

1502 goto bail;

1503 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");

1504 if (s->parse_float == NULL)

1505 goto bail;

1506 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");

1507 if (s->parse_int == NULL)

1508 goto bail;

1509 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");

1510 if (s->parse_constant == NULL)

1511 goto bail;

1512 ····

1513 return 0;

1514

1515 bail:

1516 Py_CLEAR(s->encoding);

1517 Py_CLEAR(s->strict);

1518 Py_CLEAR(s->object_hook);

1519 Py_CLEAR(s->parse_float);

1520 Py_CLEAR(s->parse_int);

1521 Py_CLEAR(s->parse_constant);

1522 return -1;

1523 }

1524

1525 PyDoc_STRVAR(scanner_doc, "JSON scanner object");

1526

1527 static

1528 PyTypeObject PyScannerType = {

1529 PyObject_HEAD_INIT(0)

1530 0, /* tp_internal */

1531 "make_scanner", /* tp_name */

1532 sizeof(PyScannerObject), /* tp_basicsize */

1533 0, /* tp_itemsize */

1534 scanner_dealloc, /* tp_dealloc */

1535 0, /* tp_print */

1536 0, /* tp_getattr */

1537 0, /* tp_setattr */

1538 0, /* tp_compare */

1539 0, /* tp_repr */

1540 0, /* tp_as_number */

1541 0, /* tp_as_sequence */

1542 0, /* tp_as_mapping */

1543 0, /* tp_hash */

1544 scanner_call, /* tp_call */

1545 0, /* tp_str */

1546 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */

1547 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */

1548 0, /* tp_as_buffer */

1549 Py_TPFLAGS_DEFAULT, /* tp_flags */

1550 scanner_doc, /* tp_doc */

1551 0, /* tp_traverse */

1552 0, /* tp_clear */

1553 0, /* tp_richcompare */

1554 0, /* tp_weaklistoffset */

1555 0, /* tp_iter */

1556 0, /* tp_iternext */

1557 0, /* tp_methods */

1558 scanner_members, /* tp_members */

1559 0, /* tp_getset */

1560 0, /* tp_base */

1561 0, /* tp_dict */

1562 0, /* tp_descr_get */

1563 0, /* tp_descr_set */

1564 0, /* tp_dictoffset */

1565 scanner_init, /* tp_init */

1566 0,/* PyType_GenericAlloc, */ /* tp_alloc */

1567 0,/* PyType_GenericNew, */ /* tp_new */

1568 0,/* _PyObject_Del, */ /* tp_free */

1569 };

1570

1571 static int

1572 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)

1573 {

1574 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_sep arator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};

1575

1576 PyEncoderObject *s;

1577 PyObject *allow_nan;

1578

1579 assert(PyEncoder_Check(self));

1580 s = (PyEncoderObject *)self;

1581

1582 s->markers = NULL;

1583 s->defaultfn = NULL;

1584 s->encoder = NULL;

1585 s->indent = NULL;

1586 s->key_separator = NULL;

1587 s->item_separator = NULL;

1588 s->sort_keys = NULL;

1589 s->skipkeys = NULL;

1590 ····

1591 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlis t,

1592 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))

1593 return -1;

1594 ····

1595 Py_INCREF(s->markers);

1596 Py_INCREF(s->defaultfn);

1597 Py_INCREF(s->encoder);

1598 Py_INCREF(s->indent);

1599 Py_INCREF(s->key_separator);

1600 Py_INCREF(s->item_separator);

1601 Py_INCREF(s->sort_keys);

1602 Py_INCREF(s->skipkeys);

1603 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s ->encoder) == (PyCFunction)py_encode_basestring_ascii);

1604 s->allow_nan = PyObject_IsTrue(allow_nan);

1605 return 0;

1606 }

1607

1608 static PyObject *

1609 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)

1610 {

1611 static char *kwlist[] = {"obj", "_current_indent_level", NULL};

1612 PyObject *obj;

1613 PyObject *rval;

1614 Py_ssize_t indent_level;

1615 PyEncoderObject *s;

1616 assert(PyEncoder_Check(self));

1617 s = (PyEncoderObject *)self;

1618 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,

1619 &obj, _convertPyInt_AsSsize_t, &indent_level))

1620 return NULL;

1621 rval = PyList_New(0);

1622 if (rval == NULL)

1623 return NULL;

1624 if (encoder_listencode_obj(s, rval, obj, indent_level)) {

1625 Py_DECREF(rval);

1626 return NULL;

1627 }

1628 return rval;

1629 }

1630

1631 static PyObject *

1632 _encoded_const(PyObject *obj)

1633 {

1634 if (obj == Py_None) {

1635 static PyObject *s_null = NULL;

1636 if (s_null == NULL) {

1637 s_null = PyString_InternFromString("null");

1638 }

1639 Py_INCREF(s_null);

1640 return s_null;

1641 }

1642 else if (obj == Py_True) {

1643 static PyObject *s_true = NULL;

1644 if (s_true == NULL) {

1645 s_true = PyString_InternFromString("true");

1646 }

1647 Py_INCREF(s_true);

1648 return s_true;

1649 }

1650 else if (obj == Py_False) {

1651 static PyObject *s_false = NULL;

1652 if (s_false == NULL) {

1653 s_false = PyString_InternFromString("false");

1654 }

1655 Py_INCREF(s_false);

1656 return s_false;

1657 }

1658 else {

1659 PyErr_SetString(PyExc_ValueError, "not a const");

1660 return NULL;

1661 }

1662 }

1663

1664 static PyObject *

1665 encoder_encode_float(PyEncoderObject *s, PyObject *obj)

1666 {

1667 double i = PyFloat_AS_DOUBLE(obj);

1668 if (!Py_IS_FINITE(i)) {

1669 if (!s->allow_nan) {

1670 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");

1671 return NULL;

1672 }

1673 if (i > 0) {

1674 return PyString_FromString("Infinity");

1675 }

1676 else if (i < 0) {

1677 return PyString_FromString("-Infinity");

1678 }

1679 else {

1680 return PyString_FromString("NaN");

1681 }

1682 }

1683 /* Use a better float format here? */

1684 return PyObject_Repr(obj);

1685 }

1686

1687 static PyObject *

1688 encoder_encode_string(PyEncoderObject *s, PyObject *obj)

1689 {

1690 if (s->fast_encode)

1691 return py_encode_basestring_ascii(NULL, obj);

1692 else

1693 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);

1694 }

1695

1696 static int

1697 _steal_list_append(PyObject *lst, PyObject *stolen)

1698 {

1699 int rval = PyList_Append(lst, stolen);

1700 Py_DECREF(stolen);

1701 return rval;

1702 }

1703

1704 static int

1705 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level)

1706 {

1707 PyObject *newobj;

1708 int rv;

1709 ····

1710 if (obj == Py_None || obj == Py_True || obj == Py_False) {

1711 PyObject *cstr = _encoded_const(obj);

1712 if (cstr == NULL)

1713 return -1;

1714 return _steal_list_append(rval, cstr);

1715 }

1716 else if (PyString_Check(obj) || PyUnicode_Check(obj))

1717 {

1718 PyObject *encoded = encoder_encode_string(s, obj);

1719 if (encoded == NULL)

1720 return -1;

1721 return _steal_list_append(rval, encoded);

1722 }

1723 else if (PyInt_Check(obj) || PyLong_Check(obj)) {

1724 PyObject *encoded = PyObject_Str(obj);

1725 if (encoded == NULL)

1726 return -1;

1727 return _steal_list_append(rval, encoded);

1728 }

1729 else if (PyFloat_Check(obj)) {

1730 PyObject *encoded = encoder_encode_float(s, obj);

1731 if (encoded == NULL)

1732 return -1;

1733 return _steal_list_append(rval, encoded);

1734 }

1735 else if (PyList_Check(obj) || PyTuple_Check(obj)) {

1736 return encoder_listencode_list(s, rval, obj, indent_level);

1737 }

1738 else if (PyDict_Check(obj)) {

1739 return encoder_listencode_dict(s, rval, obj, indent_level);

1740 }

1741 else {

1742 PyObject *ident = NULL;

1743 if (s->markers != Py_None) {

1744 int has_key;

1745 ident = PyLong_FromVoidPtr(obj);

1746 if (ident == NULL)

1747 return -1;

1748 has_key = PyDict_Contains(s->markers, ident);

1749 if (has_key) {

1750 if (has_key != -1)

1751 PyErr_SetString(PyExc_ValueError, "Circular reference detect ed");

1752 Py_DECREF(ident);

1753 return -1;

1754 }

1755 if (PyDict_SetItem(s->markers, ident, obj)) {

1756 Py_DECREF(ident);

1757 return -1;

1758 }

1759 }

1760 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);

1761 if (newobj == NULL) {

1762 Py_DECREF(ident);

1763 return -1;

1764 }

1765 rv = encoder_listencode_obj(s, rval, newobj, indent_level);

1766 Py_DECREF(newobj);

1767 if (rv) {

1768 Py_DECREF(ident);

1769 return -1;

1770 }

1771 if (ident != NULL) {

1772 if (PyDict_DelItem(s->markers, ident)) {

1773 Py_DECREF(ident);

1774 return -1;

1775 }

1776 Py_DECREF(ident);

1777 }

1778 return rv;

1779 }

1780 }

1781

1782 static int

1783 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level)

1784 {

1785 static PyObject *open_dict = NULL;

1786 static PyObject *close_dict = NULL;

1787 static PyObject *empty_dict = NULL;

1788 PyObject *kstr = NULL;

1789 PyObject *ident = NULL;

1790 PyObject *key, *value;

1791 Py_ssize_t pos;

1792 int skipkeys;

1793 Py_ssize_t idx;

1794 ····

1795 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {

1796 open_dict = PyString_InternFromString("{");

1797 close_dict = PyString_InternFromString("}");

1798 empty_dict = PyString_InternFromString("{}");

1799 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)

1800 return -1;

1801 }

1802 if (PyDict_Size(dct) == 0)

1803 return PyList_Append(rval, empty_dict);

1804 ····

1805 if (s->markers != Py_None) {

1806 int has_key;

1807 ident = PyLong_FromVoidPtr(dct);

1808 if (ident == NULL)

1809 goto bail;

1810 has_key = PyDict_Contains(s->markers, ident);

1811 if (has_key) {

1812 if (has_key != -1)

1813 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ;

1814 goto bail;

1815 }

1816 if (PyDict_SetItem(s->markers, ident, dct)) {

1817 goto bail;

1818 }

1819 }

1820

1821 if (PyList_Append(rval, open_dict))

1822 goto bail;

1823

1824 if (s->indent != Py_None) {

1825 /* TODO: DOES NOT RUN */

1826 indent_level += 1;

1827 /*

1828 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))

1829 separator = _item_separator + newline_indent

1830 buf += newline_indent

1831 */

1832 }

1833

1834 /* TODO: C speedup not implemented for sort_keys */

1835

1836 pos = 0;

1837 skipkeys = PyObject_IsTrue(s->skipkeys);

1838 idx = 0;

1839 while (PyDict_Next(dct, &pos, &key, &value)) {

1840 PyObject *encoded;

1841

1842 if (PyString_Check(key) || PyUnicode_Check(key)) {

1843 Py_INCREF(key);

1844 kstr = key;

1845 }

1846 else if (PyFloat_Check(key)) {

1847 kstr = encoder_encode_float(s, key);

1848 if (kstr == NULL)

1849 goto bail;

1850 }

1851 else if (PyInt_Check(key) || PyLong_Check(key)) {

1852 kstr = PyObject_Str(key);

1853 if (kstr == NULL)

1854 goto bail;

1855 }

1856 else if (key == Py_True || key == Py_False || key == Py_None) {

1857 kstr = _encoded_const(key);

1858 if (kstr == NULL)

1859 goto bail;

1860 }

1861 else if (skipkeys) {

1862 continue;

1863 }

1864 else {

1865 /* TODO: include repr of key */

1866 PyErr_SetString(PyExc_ValueError, "keys must be a string");

1867 goto bail;

1868 }

1869 ········

1870 if (idx) {

1871 if (PyList_Append(rval, s->item_separator))

1872 goto bail;

1873 }

1874 ········

1875 encoded = encoder_encode_string(s, kstr);

1876 Py_CLEAR(kstr);

1877 if (encoded == NULL)

1878 goto bail;

1879 if (PyList_Append(rval, encoded)) {

1880 Py_DECREF(encoded);

1881 goto bail;

1882 }

1883 Py_DECREF(encoded);

1884 if (PyList_Append(rval, s->key_separator))

1885 goto bail;

1886 if (encoder_listencode_obj(s, rval, value, indent_level))

1887 goto bail;

1888 idx += 1;

1889 }

1890 if (ident != NULL) {

1891 if (PyDict_DelItem(s->markers, ident))

1892 goto bail;

1893 Py_CLEAR(ident);

1894 }

1895 if (s->indent != Py_None) {

1896 /* TODO: DOES NOT RUN */

1897 indent_level -= 1;

1898 /*

1899 yield '\n' + (' ' * (_indent * _current_indent_level))

1900 */

1901 }

1902 if (PyList_Append(rval, close_dict))

1903 goto bail;

1904 return 0;

1905 ····

1906 bail:

1907 Py_XDECREF(kstr);

1908 Py_XDECREF(ident);

1909 return -1;

1910 }

1911

1912

1913 static int

1914 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level)

1915 {

1916 static PyObject *open_array = NULL;

1917 static PyObject *close_array = NULL;

1918 static PyObject *empty_array = NULL;

1919 PyObject *ident = NULL;

1920 PyObject *s_fast = NULL;

1921 Py_ssize_t num_items;

1922 PyObject **seq_items;

1923 Py_ssize_t i;

1924 ····

1925 if (open_array == NULL || close_array == NULL || empty_array == NULL) {

1926 open_array = PyString_InternFromString("[");

1927 close_array = PyString_InternFromString("]");

1928 empty_array = PyString_InternFromString("[]");

1929 if (open_array == NULL || close_array == NULL || empty_array == NULL)

1930 return -1;

1931 }

1932 ident = NULL;

1933 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");

1934 if (s_fast == NULL)

1935 return -1;

1936 num_items = PySequence_Fast_GET_SIZE(s_fast);

1937 if (num_items == 0) {

1938 Py_DECREF(s_fast);

1939 return PyList_Append(rval, empty_array);

1940 }

1941 ····

1942 if (s->markers != Py_None) {

1943 int has_key;

1944 ident = PyLong_FromVoidPtr(seq);

1945 if (ident == NULL)

1946 goto bail;

1947 has_key = PyDict_Contains(s->markers, ident);

1948 if (has_key) {

1949 if (has_key != -1)

1950 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ;

1951 goto bail;

1952 }

1953 if (PyDict_SetItem(s->markers, ident, seq)) {

1954 goto bail;

1955 }

1956 }

1957 ····

1958 seq_items = PySequence_Fast_ITEMS(s_fast);

1959 if (PyList_Append(rval, open_array))

1960 goto bail;

1961 if (s->indent != Py_None) {

1962 /* TODO: DOES NOT RUN */

1963 indent_level += 1;

1964 /*

1965 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))

1966 separator = _item_separator + newline_indent

1967 buf += newline_indent

1968 */

1969 }

1970 for (i = 0; i < num_items; i++) {

1971 PyObject *obj = seq_items[i];

1972 if (i) {

1973 if (PyList_Append(rval, s->item_separator))

1974 goto bail;

1975 }

1976 if (encoder_listencode_obj(s, rval, obj, indent_level))

1977 goto bail;

1978 }

1979 if (ident != NULL) {

1980 if (PyDict_DelItem(s->markers, ident))

1981 goto bail;

1982 Py_CLEAR(ident);

1983 }

1984 if (s->indent != Py_None) {

1985 /* TODO: DOES NOT RUN */

1986 indent_level -= 1;

1987 /*

1988 yield '\n' + (' ' * (_indent * _current_indent_level))

1989 */

1990 }

1991 if (PyList_Append(rval, close_array))

1992 goto bail;

1993 Py_DECREF(s_fast);

1994 return 0;

1995 ····

1996 bail:

1997 Py_XDECREF(ident);

1998 Py_DECREF(s_fast);

1999 return -1;

2000 }

2001

2002 static void

2003 encoder_dealloc(PyObject *self)

2004 {

2005 PyEncoderObject *s;

2006 assert(PyEncoder_Check(self));

2007 s = (PyEncoderObject *)self;

2008 Py_CLEAR(s->markers);

2009 Py_CLEAR(s->defaultfn);

2010 Py_CLEAR(s->encoder);

2011 Py_CLEAR(s->indent);

2012 Py_CLEAR(s->key_separator);

2013 Py_CLEAR(s->item_separator);

2014 Py_CLEAR(s->sort_keys);

2015 Py_CLEAR(s->skipkeys);

2016 self->ob_type->tp_free(self);

2017 }

2018

2019 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") ;

2020

2021 static

2022 PyTypeObject PyEncoderType = {

2023 PyObject_HEAD_INIT(0)

2024 0, /* tp_internal */

2025 "make_encoder", /* tp_name */

2026 sizeof(PyEncoderObject), /* tp_basicsize */

2027 0, /* tp_itemsize */

2028 encoder_dealloc, /* tp_dealloc */

2029 0, /* tp_print */

2030 0, /* tp_getattr */

2031 0, /* tp_setattr */

2032 0, /* tp_compare */

2033 0, /* tp_repr */

2034 0, /* tp_as_number */

2035 0, /* tp_as_sequence */

2036 0, /* tp_as_mapping */

2037 0, /* tp_hash */

2038 encoder_call, /* tp_call */

2039 0, /* tp_str */

2040 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */

2041 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */

2042 0, /* tp_as_buffer */

2043 Py_TPFLAGS_DEFAULT, /* tp_flags */

2044 encoder_doc, /* tp_doc */

2045 0, /* tp_traverse */

2046 0, /* tp_clear */

2047 0, /* tp_richcompare */

2048 0, /* tp_weaklistoffset */

2049 0, /* tp_iter */

2050 0, /* tp_iternext */

2051 0, /* tp_methods */

2052 encoder_members, /* tp_members */

2053 0, /* tp_getset */

2054 0, /* tp_base */

2055 0, /* tp_dict */

2056 0, /* tp_descr_get */

2057 0, /* tp_descr_set */

2058 0, /* tp_dictoffset */

2059 encoder_init, /* tp_init */

2060 0,/* PyType_GenericAlloc, */ /* tp_alloc */

2061 0,/* PyType_GenericNew, */ /* tp_new */

2062 0,/* _PyObject_Del, */ /* tp_free */

2063 };

2064

605 static PyMethodDef json_methods[] = {

2065 static PyMethodDef json_methods[] = {

606 {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii,

2066 {"encode_basestring_ascii",

607 METH_O, pydoc_encode_basestring_ascii},

2067 (PyCFunction)py_encode_basestring_ascii,

608 {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS,

2068 METH_O,

609 pydoc_scanstring},

2069 pydoc_encode_basestring_ascii},

2070 {"scanstring",

2071 (PyCFunction)py_scanstring,

2072 METH_VARARGS,

2073 pydoc_scanstring},

610 {NULL, NULL, 0, NULL}

2074 {NULL, NULL, 0, NULL}

611 };

2075 };

612

2076

613 PyDoc_STRVAR(module_doc,

2077 PyDoc_STRVAR(module_doc,

614 "json speedups\n");

2078 "json speedups\n");

615

2079

616 void

2080 void

617 init_json(void)

2081 init_json(void)

618 {

2082 {

619 PyObject *m;

2083 PyObject *m;

2084 PyScannerType.tp_getattro = PyObject_GenericGetAttr;

2085 PyScannerType.tp_setattro = PyObject_GenericSetAttr;

2086 PyScannerType.tp_alloc = PyType_GenericAlloc;

2087 PyScannerType.tp_new = PyType_GenericNew;

2088 PyScannerType.tp_free = _PyObject_Del;

2089 if (PyType_Ready(&PyScannerType) < 0)

2090 return;

2091 PyEncoderType.tp_getattro = PyObject_GenericGetAttr;

2092 PyEncoderType.tp_setattro = PyObject_GenericSetAttr;

2093 PyEncoderType.tp_alloc = PyType_GenericAlloc;

2094 PyEncoderType.tp_new = PyType_GenericNew;

2095 PyEncoderType.tp_free = _PyObject_Del;

2096 if (PyType_Ready(&PyEncoderType) < 0)

2097 return;

620 m = Py_InitModule3("_json", json_methods, module_doc);

2098 m = Py_InitModule3("_json", json_methods, module_doc);

2099 Py_INCREF((PyObject*)&PyScannerType);

2100 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);

2101 Py_INCREF((PyObject*)&PyEncoderType);

2102 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);

621 }

2103 }

OLD

NEW