[issue5863] bz2.BZ2File should accept other file-like objects. - Code Review (original) (raw)

OLD

NEW

1 /*

1 /* _bz2 - Low-level Python interface to libbzip2. */

2

2

3 python-bz2 - python bz2 library interface

4

5 Copyright (c) 2002 Gustavo Niemeyer niemeyer@conectiva.com

6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved

7

8 */

9

10 #include "Python.h"

3 #include "Python.h"

11 #include <stdio.h>

4 #include "structmember.h"

12 #include <bzlib.h>

5 #include <bzlib.h>

13 #include "structmember.h"

14

15 #ifdef WITH_THREAD

16 #include "pythread.h"

17 #endif

18

19 static char __author__[] =

20 "The bz2 python module was written by:\n\

21 \n\

22 Gustavo Niemeyer niemeyer@conectiva.com\n\

23 ";

24

25 /* Our very own off_t-like type, 64-bit if possible */

26 /* copied from Objects/fileobject.c */

27 #if !defined(HAVE_LARGEFILE_SUPPORT)

28 typedef off_t Py_off_t;

29 #elif SIZEOF_OFF_T >= 8

30 typedef off_t Py_off_t;

31 #elif SIZEOF_FPOS_T >= 8

32 typedef fpos_t Py_off_t;

33 #else

34 #error "Large file support, but neither off_t nor fpos_t is large enough."

35 #endif

36

37 #define BUF(v) PyBytes_AS_STRING(v)

38

39 #define MODE_CLOSED 0

40 #define MODE_READ 1

41 #define MODE_READ_EOF 2

42 #define MODE_WRITE 3

43

44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)

45

6

46

7

47 #ifdef BZ_CONFIG_ERROR

8 #ifndef BZ_CONFIG_ERROR

48

49 #if SIZEOF_LONG >= 8

50 #define BZS_TOTAL_OUT(bzs) \

51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)

52 #elif SIZEOF_LONG_LONG >= 8

53 #define BZS_TOTAL_OUT(bzs) \

54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)

55 #else

56 #define BZS_TOTAL_OUT(bzs) \

57 bzs->total_out_lo32

58 #endif

59

60 #else /* ! BZ_CONFIG_ERROR */

61

62 #define BZ2_bzRead bzRead

63 #define BZ2_bzReadOpen bzReadOpen

64 #define BZ2_bzReadClose bzReadClose

65 #define BZ2_bzWrite bzWrite

66 #define BZ2_bzWriteOpen bzWriteOpen

67 #define BZ2_bzWriteClose bzWriteClose

68 #define BZ2_bzCompress bzCompress

9 #define BZ2_bzCompress bzCompress

69 #define BZ2_bzCompressInit bzCompressInit

10 #define BZ2_bzCompressInit bzCompressInit

70 #define BZ2_bzCompressEnd bzCompressEnd

11 #define BZ2_bzCompressEnd bzCompressEnd

71 #define BZ2_bzDecompress bzDecompress

12 #define BZ2_bzDecompress bzDecompress

72 #define BZ2_bzDecompressInit bzDecompressInit

13 #define BZ2_bzDecompressInit bzDecompressInit

73 #define BZ2_bzDecompressEnd bzDecompressEnd

14 #define BZ2_bzDecompressEnd bzDecompressEnd

15 #endif /* ! BZ_CONFIG_ERROR */

74

16

75 #define BZS_TOTAL_OUT(bzs) bzs->total_out

76

77 #endif /* ! BZ_CONFIG_ERROR */

78

79

80 #ifdef WITH_THREAD

81 #define ACQUIRE_LOCK(obj) do { \

82 if (!PyThread_acquire_lock(obj->lock, 0)) { \

83 Py_BEGIN_ALLOW_THREADS \

84 PyThread_acquire_lock(obj->lock, 1); \

85 Py_END_ALLOW_THREADS \

86 } } while(0)

87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)

88 #else

89 #define ACQUIRE_LOCK(obj)

90 #define RELEASE_LOCK(obj)

91 #endif

92

93 /* Bits in f_newlinetypes */

94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */

95 #define NEWLINE_CR 1 /* \r newline seen */

96 #define NEWLINE_LF 2 /* \n newline seen */

97 #define NEWLINE_CRLF 4 /* \r\n newline seen */

98

99 /* ===================================================================== */

100 /* Structure definitions. */

101

102 typedef struct {

103 PyObject_HEAD

104 FILE *rawfp;

105

106 char* f_buf; /* Allocated readahead buffer */

107 char* f_bufend; /* Points after last occupied position */

108 char* f_bufptr; /* Current buffer position */

109

110 BZFILE *fp;

111 int mode;

112 Py_off_t pos;

113 Py_off_t size;

114 #ifdef WITH_THREAD

115 PyThread_type_lock lock;

116 #endif

117 } BZ2FileObject;

118

17

119 typedef struct {

18 typedef struct {

120 PyObject_HEAD

19 PyObject_HEAD

121 bz_stream bzs;

20 bz_stream bzs;

122 int running;

21 int flushed;

123 #ifdef WITH_THREAD

22 } Compressor;

124 PyThread_type_lock lock;

125 #endif

126 } BZ2CompObject;

127

23

128 typedef struct {

24 typedef struct {

129 PyObject_HEAD

25 PyObject_HEAD

130 bz_stream bzs;

26 bz_stream bzs;

131 int running;

27 int eof;

132 PyObject *unused_data;

28 PyObject *unused_data;

133 #ifdef WITH_THREAD

29 } Decompressor;

134 PyThread_type_lock lock;

135 #endif

136 } BZ2DecompObject;

137

30

138 /* ===================================================================== */

139 /* Utility functions. */

140

31

141 /* Refuse regular I/O if there's data in the iteration-buffer.

32 /* Helper functions. */

142 * Mixing them would cause data to arrive out of order, as the read*

143 * methods don't use the iteration buffer. */

144 static int

145 check_iterbuffered(BZ2FileObject *f)

146 {

147 if (f->f_buf != NULL &&

148 (f->f_bufend - f->f_bufptr) > 0 &&

149 f->f_buf[0] != '\0') {

150 PyErr_SetString(PyExc_ValueError,

151 "Mixing iteration and read methods would lose data");

152 return -1;

153 }

154 return 0;

155 }

156

33

157 static int

34 static int

158 Util_CatchBZ2Error(int bzerror)

35 catch_bz2_error(int bzerror)

159 {

36 {

160 int ret = 0;

161 switch(bzerror) {

37 switch(bzerror) {

162 case BZ_OK:

38 case BZ_OK:

39 case BZ_RUN_OK:

40 case BZ_FLUSH_OK:

41 case BZ_FINISH_OK:

163 case BZ_STREAM_END:

42 case BZ_STREAM_END:

164 break;

43 return 0;

165

44

166 #ifdef BZ_CONFIG_ERROR

45 #ifdef BZ_CONFIG_ERROR

167 case BZ_CONFIG_ERROR:

46 case BZ_CONFIG_ERROR:

168 PyErr_SetString(PyExc_SystemError,

47 PyErr_SetString(PyExc_SystemError,

169 "the bz2 library was not compiled "

48 "libbzip2 was not compiled correctly");

170 "correctly");

49 return 1;

171 ret = 1;

172 break;

173 #endif

50 #endif

174

175 case BZ_PARAM_ERROR:

51 case BZ_PARAM_ERROR:

176 PyErr_SetString(PyExc_ValueError,

52 PyErr_SetString(PyExc_ValueError,

177 "the bz2 library has received wrong "

53 "Internal error - "

178 "parameters");

54 "invalid parameters passed to libbzip2");

179 ret = 1;

55 return 1;

180 break;

181

182 case BZ_MEM_ERROR:

56 case BZ_MEM_ERROR:

183 PyErr_NoMemory();

57 PyErr_NoMemory();

184 ret = 1;

58 return 1;

185 break;

186

187 case BZ_DATA_ERROR:

59 case BZ_DATA_ERROR:

188 case BZ_DATA_ERROR_MAGIC:

60 case BZ_DATA_ERROR_MAGIC:

189 PyErr_SetString(PyExc_IOError, "invalid data stream");

61 PyErr_SetString(PyExc_IOError, "Invalid data stream");

190 ret = 1;

62 return 1;

191 break;

192

193 case BZ_IO_ERROR:

63 case BZ_IO_ERROR:

194 PyErr_SetString(PyExc_IOError, "unknown IO error");

64 PyErr_SetString(PyExc_IOError, "Unknown I/O error");

195 ret = 1;

65 return 1;

196 break;

197

198 case BZ_UNEXPECTED_EOF:

66 case BZ_UNEXPECTED_EOF:

199 PyErr_SetString(PyExc_EOFError,

67 PyErr_SetString(PyExc_EOFError,

200 "compressed file ended before the "

68 "Compressed file ended before the logical "

201 "logical end-of-stream was detected");

69 "end-of-stream was detected");

202 ret = 1;

70 return 1;

203 break;

204

205 case BZ_SEQUENCE_ERROR:

71 case BZ_SEQUENCE_ERROR:

206 PyErr_SetString(PyExc_RuntimeError,

72 PyErr_SetString(PyExc_RuntimeError,

207 "wrong sequence of bz2 library "

73 "Internal error - "

208 "commands used");

74 "Invalid sequence of commands sent to libbzip2");

209 ret = 1;

75 return 1;

210 break;

76 default:

77 PyErr_Format(PyExc_IOError,

78 "Unrecognized error from libbzip2: %d", bzerror);

79 return 1;

211 }

80 }

212 return ret;

213 }

81 }

214

82

215 #if BUFSIZ < 8192

83 #if BUFSIZ < 8192

216 #define SMALLCHUNK 8192

84 #define SMALLCHUNK 8192

217 #else

85 #else

218 #define SMALLCHUNK BUFSIZ

86 #define SMALLCHUNK BUFSIZ

219 #endif

87 #endif

220

88

221 #if SIZEOF_INT < 4

89 #if SIZEOF_INT < 4

222 #define BIGCHUNK (512 * 32)

90 #define BIGCHUNK (512 * 32)

223 #else

91 #else

224 #define BIGCHUNK (512 * 1024)

92 #define BIGCHUNK (512 * 1024)

225 #endif

93 #endif

226

94

227 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */

95 static int

228 static size_t

96 grow_buffer(PyObject **buf)

229 Util_NewBufferSize(size_t currentsize)

97 {

230 {

98 size_t size = PyBytes_GET_SIZE(*buf);

231 if (currentsize > SMALLCHUNK) {

99 if (size <= SMALLCHUNK)

232 /* Keep doubling until we reach BIGCHUNK;

100 return _PyBytes_Resize(buf, size + SMALLCHUNK);

233 then keep adding BIGCHUNK. */

101 else if (size <= BIGCHUNK)

234 if (currentsize <= BIGCHUNK)

102 return _PyBytes_Resize(buf, size * 2);

235 return currentsize + currentsize;

103 else

236 else

104 return _PyBytes_Resize(buf, size + BIGCHUNK);

237 return currentsize + BIGCHUNK;

105 }

238 }

106

239 return currentsize + SMALLCHUNK;

107

240 }

108 /* Compressor class. */

241

109

242 /* This is a hacked version of Python's fileobject.c:get_line(). */

110 static PyObject *

243 static PyObject *

111 compress(Compressor *c, char *data, size_t len, int action)

244 Util_GetLine(BZ2FileObject *f, int n)

112 {

245 {

113 size_t data_size = 0;

246 char c;

114 PyObject *result;

247 char *buf, *end;

115

248 size_t total_v_size; /* total # of slots in buffer */

116 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);

249 size_t used_v_size; /* # used slots in buffer */

117 if (result == NULL)

250 size_t increment; /* amount to increment the buffer */

118 return NULL;

251 PyObject *v;

119 c->bzs.next_in = data;

120 c->bzs.avail_in = len;

121 c->bzs.next_out = PyBytes_AS_STRING(result);

122 c->bzs.avail_out = PyBytes_GET_SIZE(result);

123 for (;;) {

124 char *this_out;

125 int bzerror;

126

127 Py_BEGIN_ALLOW_THREADS

128 this_out = c->bzs.next_out;

129 bzerror = BZ2_bzCompress(&c->bzs, action);

130 data_size += c->bzs.next_out - this_out;

131 Py_END_ALLOW_THREADS

132 if (catch_bz2_error(bzerror))

133 goto error;

134

135 /* In regular compression mode, stop when input data is exhausted.

136 In flushing mode, stop when all buffered data has been flushed. */

137 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||

138 (action == BZ_FINISH && bzerror == BZ_STREAM_END))

139 break;

140

141 if (c->bzs.avail_out == 0) {

142 if (grow_buffer(&result) < 0)

143 goto error;

144 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;

145 c->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;

146 }

147 }

148 if (data_size != PyBytes_GET_SIZE(result))

149 if (_PyBytes_Resize(&result, data_size) < 0)

150 goto error;

151 return result;

152

153 error:

154 Py_XDECREF(result);

155 return NULL;

156 }

157

158 static PyObject *

159 Compressor_compress(Compressor *self, PyObject *arg)

160 {

161 Py_buffer buffer;

162 PyObject *result;

163

164 if (self->flushed) {

165 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");

166 return NULL;

167 }

168 if (PyObject_GetBuffer(arg, &buffer, PyBUF_SIMPLE) < 0)

169 return NULL;

170 result = compress(self, buffer.buf, buffer.len, BZ_RUN);

171 PyBuffer_Release(&buffer);

172 return result;

173 }

174

175 static PyObject *

176 Compressor_flush(Compressor *self, PyObject *noargs)

177 {

178 if (self->flushed) {

179 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");

180 return NULL;

181 }

182 self->flushed = 1;

183 return compress(self, NULL, 0, BZ_FINISH);

184 }

185

186 static int

187 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)

188 {

189 int compresslevel = 9;

252 int bzerror;

190 int bzerror;

253 int bytes_read;

191

254

192 if (!PyArg_ParseTuple(args, "|i:Compressor", &compresslevel))

255 total_v_size = n > 0 ? n : 100;

193 return -1;

256 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);

194 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);

257 if (v == NULL)

195 return catch_bz2_error(bzerror) ? -1 : 0;

258 return NULL;

196 }

259

197

260 buf = BUF(v);

198 static void

261 end = buf + total_v_size;

199 Compressor_dealloc(Compressor *self)

262

200 {

201 int bzerror = BZ2_bzCompressEnd(&self->bzs);

202 if (catch_bz2_error(bzerror))

203 PyErr_WriteUnraisable((PyObject *)self);

204 Py_TYPE(self)->tp_free((PyObject *)self);

205 }

206

207 static PyMethodDef Compressor_methods[] = {

208 {"compress", (PyCFunction)Compressor_compress, METH_O,

209 "Provide a block of data to the compressor."},

210 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,

211 "Flush the buffers of the compressor."},

212 {NULL}

213 };

214

215 static PyTypeObject Compressor_Type = {

216 PyVarObject_HEAD_INIT(NULL, 0)

217 "_bz2.Compressor", /* tp_name */

218 sizeof(Compressor), /* tp_basicsize */

219 0, /* tp_itemsize */

220 (destructor)Compressor_dealloc, /* tp_dealloc */

221 0, /* tp_print */

222 0, /* tp_getattr */

223 0, /* tp_setattr */

224 0, /* tp_reserved */

225 0, /* tp_repr */

226 0, /* tp_as_number */

227 0, /* tp_as_sequence */

228 0, /* tp_as_mapping */

229 0, /* tp_hash */

230 0, /* tp_call */

231 0, /* tp_str */

232 0, /* tp_getattro */

233 0, /* tp_setattro */

234 0, /* tp_as_buffer */

235 Py_TPFLAGS_DEFAULT, /* tp_flags */

236 "Wrapper for bz2 compression functions.", /* tp_doc */

237 0, /* tp_traverse */

238 0, /* tp_clear */

239 0, /* tp_richcompare */

240 0, /* tp_weaklistoffset */

241 0, /* tp_iter */

242 0, /* tp_iternext */

243 Compressor_methods, /* tp_methods */

244 0, /* tp_members */

245 0, /* tp_getset */

246 0, /* tp_base */

247 0, /* tp_dict */

248 0, /* tp_descr_get */

249 0, /* tp_descr_set */

250 0, /* tp_dictoffset */

251 (initproc)Compressor_init, /* tp_init */

252 0, /* tp_alloc */

253 PyType_GenericNew, /* tp_new */

254 };

255

256

257 /* Decompressor class. */

258

259 static PyObject *

260 decompress(Decompressor *d, char *data, size_t len)

261 {

262 size_t data_size = 0;

263 PyObject *result;

264

265 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);

266 if (result == NULL)

267 return result;

268 d->bzs.next_in = data;

269 d->bzs.avail_in = len;

270 d->bzs.next_out = PyBytes_AS_STRING(result);

271 d->bzs.avail_out = PyBytes_GET_SIZE(result);

263 for (;;) {

272 for (;;) {

273 char *this_out;

274 int bzerror;

275

264 Py_BEGIN_ALLOW_THREADS

276 Py_BEGIN_ALLOW_THREADS

265 do {

277 this_out = d->bzs.next_out;

266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);

278 bzerror = BZ2_bzDecompress(&d->bzs);

267 f->pos++;

279 data_size += d->bzs.next_out - this_out;

268 if (bytes_read == 0)

269 break;

270 *buf++ = c;

271 } while (bzerror == BZ_OK && c != '\n' && buf != end);

272 Py_END_ALLOW_THREADS

280 Py_END_ALLOW_THREADS

281 if (catch_bz2_error(bzerror))

282 goto error;

273 if (bzerror == BZ_STREAM_END) {

283 if (bzerror == BZ_STREAM_END) {

274 f->size = f->pos;

284 d->eof = 1;

275 f->mode = MODE_READ_EOF;

285 if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */

276 break;

286 Py_CLEAR(d->unused_data);

277 } else if (bzerror != BZ_OK) {

287 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in,

278 Util_CatchBZ2Error(bzerror);

288 d->bzs.avail_in);

279 Py_DECREF(v);

289 if (d->unused_data == NULL)

280 return NULL;

290 goto error;

281 }

282 if (c == '\n')

283 break;

284 /* Must be because buf == end */

285 if (n > 0)

286 break;

287 used_v_size = total_v_size;

288 increment = total_v_size >> 2; /* mild exponential growth */

289 total_v_size += increment;

290 if (total_v_size > INT_MAX) {

291 PyErr_SetString(PyExc_OverflowError,

292 "line is longer than a Python string can hold");

293 Py_DECREF(v);

294 return NULL;

295 }

296 if (_PyBytes_Resize(&v, total_v_size) < 0) {

297 return NULL;

298 }

299 buf = BUF(v) + used_v_size;

300 end = BUF(v) + total_v_size;

301 }

302

303 used_v_size = buf - BUF(v);

304 if (used_v_size != total_v_size) {

305 if (_PyBytes_Resize(&v, used_v_size) < 0) {

306 v = NULL;

307 }

308 }

309 return v;

310 }

311

312 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */

313 static void

314 Util_DropReadAhead(BZ2FileObject *f)

315 {

316 if (f->f_buf != NULL) {

317 PyMem_Free(f->f_buf);

318 f->f_buf = NULL;

319 }

320 }

321

322 /* This is a hacked version of Python's fileobject.c:readahead(). */

323 static int

324 Util_ReadAhead(BZ2FileObject *f, int bufsize)

325 {

326 int chunksize;

327 int bzerror;

328

329 if (f->f_buf != NULL) {

330 if((f->f_bufend - f->f_bufptr) >= 1)

331 return 0;

332 else

333 Util_DropReadAhead(f);

334 }

335 if (f->mode == MODE_READ_EOF) {

336 f->f_bufptr = f->f_buf;

337 f->f_bufend = f->f_buf;

338 return 0;

339 }

340 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {

341 PyErr_NoMemory();

342 return -1;

343 }

344 Py_BEGIN_ALLOW_THREADS

345 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);

346 Py_END_ALLOW_THREADS

347 f->pos += chunksize;

348 if (bzerror == BZ_STREAM_END) {

349 f->size = f->pos;

350 f->mode = MODE_READ_EOF;

351 } else if (bzerror != BZ_OK) {

352 Util_CatchBZ2Error(bzerror);

353 Util_DropReadAhead(f);

354 return -1;

355 }

356 f->f_bufptr = f->f_buf;

357 f->f_bufend = f->f_buf + chunksize;

358 return 0;

359 }

360

361 /* This is a hacked version of Python's

362 * fileobject.c:readahead_get_line_skip(). */

363 static PyBytesObject *

364 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)

365 {

366 PyBytesObject* s;

367 char *bufptr;

368 char *buf;

369 int len;

370

371 if (f->f_buf == NULL)

372 if (Util_ReadAhead(f, bufsize) < 0)

373 return NULL;

374

375 len = f->f_bufend - f->f_bufptr;

376 if (len == 0)

377 return (PyBytesObject *)

378 PyBytes_FromStringAndSize(NULL, skip);

379 bufptr = memchr(f->f_bufptr, '\n', len);

380 if (bufptr != NULL) {

381 bufptr++; /* Count the '\n' */

382 len = bufptr - f->f_bufptr;

383 s = (PyBytesObject *)

384 PyBytes_FromStringAndSize(NULL, skip+len);

385 if (s == NULL)

386 return NULL;

387 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);

388 f->f_bufptr = bufptr;

389 if (bufptr == f->f_bufend)

390 Util_DropReadAhead(f);

391 } else {

392 bufptr = f->f_bufptr;

393 buf = f->f_buf;

394 f->f_buf = NULL; /* Force new readahead buffer */

395 s = Util_ReadAheadGetLineSkip(f, skip+len,

396 bufsize + (bufsize>>2));

397 if (s == NULL) {

398 PyMem_Free(buf);

399 return NULL;

400 }

401 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);

402 PyMem_Free(buf);

403 }

404 return s;

405 }

406

407 /* ===================================================================== */

408 /* Methods of BZ2File. */

409

410 PyDoc_STRVAR(BZ2File_read__doc__,

411 "read([size]) -> string\n\

412 \n\

413 Read at most size uncompressed bytes, returned as a string. If the size\n\

414 argument is negative or omitted, read until EOF is reached.\n\

415 ");

416

417 /* This is a hacked version of Python's fileobject.c:file_read(). */

418 static PyObject *

419 BZ2File_read(BZ2FileObject *self, PyObject *args)

420 {

421 long bytesrequested = -1;

422 size_t bytesread, buffersize, chunksize;

423 int bzerror;

424 PyObject *ret = NULL;

425

426 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))

427 return NULL;

428

429 ACQUIRE_LOCK(self);

430 switch (self->mode) {

431 case MODE_READ:

432 break;

433 case MODE_READ_EOF:

434 ret = PyBytes_FromStringAndSize("", 0);

435 goto cleanup;

436 case MODE_CLOSED:

437 PyErr_SetString(PyExc_ValueError,

438 "I/O operation on closed file");

439 goto cleanup;

440 default:

441 PyErr_SetString(PyExc_IOError,

442 "file is not ready for reading");

443 goto cleanup;

444 }

445

446 /* refuse to mix with f.next() */

447 if (check_iterbuffered(self))

448 goto cleanup;

449

450 if (bytesrequested < 0)

451 buffersize = Util_NewBufferSize((size_t)0);

452 else

453 buffersize = bytesrequested;

454 if (buffersize > INT_MAX) {

455 PyErr_SetString(PyExc_OverflowError,

456 "requested number of bytes is "

457 "more than a Python string can hold");

458 goto cleanup;

459 }

460 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);

461 if (ret == NULL || buffersize == 0)

462 goto cleanup;

463 bytesread = 0;

464

465 for (;;) {

466 Py_BEGIN_ALLOW_THREADS

467 chunksize = BZ2_bzRead(&bzerror, self->fp,

468 BUF(ret)+bytesread,

469 buffersize-bytesread);

470 self->pos += chunksize;

471 Py_END_ALLOW_THREADS

472 bytesread += chunksize;

473 if (bzerror == BZ_STREAM_END) {

474 self->size = self->pos;

475 self->mode = MODE_READ_EOF;

476 break;

477 } else if (bzerror != BZ_OK) {

478 Util_CatchBZ2Error(bzerror);

479 Py_DECREF(ret);

480 ret = NULL;

481 goto cleanup;

482 }

483 if (bytesrequested < 0) {

484 buffersize = Util_NewBufferSize(buffersize);

485 if (_PyBytes_Resize(&ret, buffersize) < 0) {

486 ret = NULL;

487 goto cleanup;

488 }

291 }

489 } else {

490 break;

292 break;

491 }

293 }

492 }

294 if (d->bzs.avail_in == 0)

493 if (bytesread != buffersize) {

295 break;

494 if (_PyBytes_Resize(&ret, bytesread) < 0) {

296 if (d->bzs.avail_out == 0) {

495 ret = NULL;

297 if (grow_buffer(&result) < 0)

298 goto error;

299 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;

300 d->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;

496 }

301 }

497 }

302 }

498

303 if (data_size != PyBytes_GET_SIZE(result))

499 cleanup:

304 if (_PyBytes_Resize(&result, data_size) < 0)

500 RELEASE_LOCK(self);

305 goto error;

501 return ret;

306 return result;

502 }

307

503

308 error:

504 PyDoc_STRVAR(BZ2File_readline__doc__,

309 Py_XDECREF(result);

505 "readline([size]) -> string\n\

310 return NULL;

506 \n\

311 }

507 Return the next line from the file, as a string, retaining newline.\n\

312

508 A non-negative size argument will limit the maximum number of bytes to\n\

313 static PyObject *

509 return (an incomplete line may be returned then). Return an empty\n\

314 Decompressor_decompress(Decompressor *self, PyObject *arg)

510 string at EOF.\n\

315 {

511 ");

316 Py_buffer buffer;

512

317 PyObject *result;

513 static PyObject *

318

514 BZ2File_readline(BZ2FileObject *self, PyObject *args)

319 if (self->eof) {

515 {

320 PyErr_SetString(PyExc_EOFError, "End of stream already reached");

516 PyObject *ret = NULL;

321 return NULL;

517 int sizehint = -1;

322 }

518

323 if (PyObject_GetBuffer(arg, &buffer, PyBUF_SIMPLE) < 0)

519 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))

324 return NULL;

520 return NULL;

325 result = decompress(self, buffer.buf, buffer.len);

521

326 PyBuffer_Release(&buffer);

522 ACQUIRE_LOCK(self);

327 return result;

523 switch (self->mode) {

328 }

524 case MODE_READ:

329

525 break;

330 static int

526 case MODE_READ_EOF:

331 Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)

527 ret = PyBytes_FromStringAndSize("", 0);

332 {

528 goto cleanup;

529 case MODE_CLOSED:

530 PyErr_SetString(PyExc_ValueError,

531 "I/O operation on closed file");

532 goto cleanup;

533 default:

534 PyErr_SetString(PyExc_IOError,

535 "file is not ready for reading");

536 goto cleanup;

537 }

538

539 /* refuse to mix with f.next() */

540 if (check_iterbuffered(self))

541 goto cleanup;

542

543 if (sizehint == 0)

544 ret = PyBytes_FromStringAndSize("", 0);

545 else

546 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);

547

548 cleanup:

549 RELEASE_LOCK(self);

550 return ret;

551 }

552

553 PyDoc_STRVAR(BZ2File_readlines__doc__,

554 "readlines([size]) -> list\n\

555 \n\

556 Call readline() repeatedly and return a list of lines read.\n\

557 The optional size argument, if given, is an approximate bound on the\n\

558 total number of bytes in the lines returned.\n\

559 ");

560

561 /* This is a hacked version of Python's fileobject.c:file_readlines(). */

562 static PyObject *

563 BZ2File_readlines(BZ2FileObject *self, PyObject *args)

564 {

565 long sizehint = 0;

566 PyObject *list = NULL;

567 PyObject *line;

568 char small_buffer[SMALLCHUNK];

569 char *buffer = small_buffer;

570 size_t buffersize = SMALLCHUNK;

571 PyObject *big_buffer = NULL;

572 size_t nfilled = 0;

573 size_t nread;

574 size_t totalread = 0;

575 char *p, *q, *end;

576 int err;

577 int shortread = 0;

578 int bzerror;

333 int bzerror;

579

334

580 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))

335 self->unused_data = PyBytes_FromStringAndSize("", 0);

581 return NULL;

336 if (self->unused_data == NULL)

582

337 return -1;

583 ACQUIRE_LOCK(self);

338 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);

584 switch (self->mode) {

339 if (catch_bz2_error(bzerror)) {

585 case MODE_READ:

340 Py_CLEAR(self->unused_data);

586 break;

341 return -1;

587 case MODE_READ_EOF:

342 }

588 list = PyList_New(0);

343 return 0;

589 goto cleanup;

344 }

590 case MODE_CLOSED:

345

591 PyErr_SetString(PyExc_ValueError,

346 static void

592 "I/O operation on closed file");

347 Decompressor_dealloc(Decompressor *self)

593 goto cleanup;

348 {

594 default:

595 PyErr_SetString(PyExc_IOError,

596 "file is not ready for reading");

597 goto cleanup;

598 }

599

600 /* refuse to mix with f.next() */

601 if (check_iterbuffered(self))

602 goto cleanup;

603

604 if ((list = PyList_New(0)) == NULL)

605 goto cleanup;

606

607 for (;;) {

608 Py_BEGIN_ALLOW_THREADS

609 nread = BZ2_bzRead(&bzerror, self->fp,

610 buffer+nfilled, buffersize-nfilled);

611 self->pos += nread;

612 Py_END_ALLOW_THREADS

613 if (bzerror == BZ_STREAM_END) {

614 self->size = self->pos;

615 self->mode = MODE_READ_EOF;

616 if (nread == 0) {

617 sizehint = 0;

618 break;

619 }

620 shortread = 1;

621 } else if (bzerror != BZ_OK) {

622 Util_CatchBZ2Error(bzerror);

623 error:

624 Py_DECREF(list);

625 list = NULL;

626 goto cleanup;

627 }

628 totalread += nread;

629 p = memchr(buffer+nfilled, '\n', nread);

630 if (!shortread && p == NULL) {

631 /* Need a larger buffer to fit this line */

632 nfilled += nread;

633 buffersize *= 2;

634 if (buffersize > INT_MAX) {

635 PyErr_SetString(PyExc_OverflowError,

636 "line is longer than a Python string can hold");

637 goto error;

638 }

639 if (big_buffer == NULL) {

640 /* Create the big buffer */

641 big_buffer = PyBytes_FromStringAndSize(

642 NULL, buffersize);

643 if (big_buffer == NULL)

644 goto error;

645 buffer = PyBytes_AS_STRING(big_buffer);

646 memcpy(buffer, small_buffer, nfilled);

647 }

648 else {

649 /* Grow the big buffer */

650 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){

651 big_buffer = NULL;

652 goto error;

653 }

654 buffer = PyBytes_AS_STRING(big_buffer);

655 }

656 continue;

657 }

658 end = buffer+nfilled+nread;

659 q = buffer;

660 while (p != NULL) {

661 /* Process complete lines */

662 p++;

663 line = PyBytes_FromStringAndSize(q, p-q);

664 if (line == NULL)

665 goto error;

666 err = PyList_Append(list, line);

667 Py_DECREF(line);

668 if (err != 0)

669 goto error;

670 q = p;

671 p = memchr(q, '\n', end-q);

672 }

673 /* Move the remaining incomplete line to the start */

674 nfilled = end-q;

675 memmove(buffer, q, nfilled);

676 if (sizehint > 0)

677 if (totalread >= (size_t)sizehint)

678 break;

679 if (shortread) {

680 sizehint = 0;

681 break;

682 }

683 }

684 if (nfilled != 0) {

685 /* Partial last line */

686 line = PyBytes_FromStringAndSize(buffer, nfilled);

687 if (line == NULL)

688 goto error;

689 if (sizehint > 0) {

690 /* Need to complete the last line */

691 PyObject *rest = Util_GetLine(self, 0);

692 if (rest == NULL) {

693 Py_DECREF(line);

694 goto error;

695 }

696 PyBytes_Concat(&line, rest);

697 Py_DECREF(rest);

698 if (line == NULL)

699 goto error;

700 }

701 err = PyList_Append(list, line);

702 Py_DECREF(line);

703 if (err != 0)

704 goto error;

705 }

706

707 cleanup:

708 RELEASE_LOCK(self);

709 if (big_buffer) {

710 Py_DECREF(big_buffer);

711 }

712 return list;

713 }

714

715 PyDoc_STRVAR(BZ2File_write__doc__,

716 "write(data) -> None\n\

717 \n\

718 Write the 'data' string to file. Note that due to buffering, close() may\n\

719 be needed before the file on disk reflects the data written.\n\

720 ");

721

722 /* This is a hacked version of Python's fileobject.c:file_write(). */

723 static PyObject *

724 BZ2File_write(BZ2FileObject *self, PyObject *args)

725 {

726 PyObject *ret = NULL;

727 Py_buffer pbuf;

728 char *buf;

729 int len;

730 int bzerror;

349 int bzerror;

731

350

732 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))

351 Py_CLEAR(self->unused_data);

733 return NULL;

352 bzerror = BZ2_bzDecompressEnd(&self->bzs);

734 buf = pbuf.buf;

353 if (catch_bz2_error(bzerror))

735 len = pbuf.len;

354 PyErr_WriteUnraisable((PyObject *)self);

736

737 ACQUIRE_LOCK(self);

738 switch (self->mode) {

739 case MODE_WRITE:

740 break;

741

742 case MODE_CLOSED:

743 PyErr_SetString(PyExc_ValueError,

744 "I/O operation on closed file");

745 goto cleanup;

746

747 default:

748 PyErr_SetString(PyExc_IOError,

749 "file is not ready for writing");

750 goto cleanup;

751 }

752

753 Py_BEGIN_ALLOW_THREADS

754 BZ2_bzWrite (&bzerror, self->fp, buf, len);

755 self->pos += len;

756 Py_END_ALLOW_THREADS

757

758 if (bzerror != BZ_OK) {

759 Util_CatchBZ2Error(bzerror);

760 goto cleanup;

761 }

762

763 Py_INCREF(Py_None);

764 ret = Py_None;

765

766 cleanup:

767 PyBuffer_Release(&pbuf);

768 RELEASE_LOCK(self);

769 return ret;

770 }

771

772 PyDoc_STRVAR(BZ2File_writelines__doc__,

773 "writelines(sequence_of_strings) -> None\n\

774 \n\

775 Write the sequence of strings to the file. Note that newlines are not\n\

776 added. The sequence can be any iterable object producing strings. This is\n\

777 equivalent to calling write() for each string.\n\

778 ");

779

780 /* This is a hacked version of Python's fileobject.c:file_writelines(). */

781 static PyObject *

782 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)

783 {

784 #define CHUNKSIZE 1000

785 PyObject *list = NULL;

786 PyObject *iter = NULL;

787 PyObject *ret = NULL;

788 PyObject *line;

789 int i, j, index, len, islist;

790 int bzerror;

791

792 ACQUIRE_LOCK(self);

793 switch (self->mode) {

794 case MODE_WRITE:

795 break;

796

797 case MODE_CLOSED:

798 PyErr_SetString(PyExc_ValueError,

799 "I/O operation on closed file");

800 goto error;

801

802 default:

803 PyErr_SetString(PyExc_IOError,

804 "file is not ready for writing");

805 goto error;

806 }

807

808 islist = PyList_Check(seq);

809 if (!islist) {

810 iter = PyObject_GetIter(seq);

811 if (iter == NULL) {

812 PyErr_SetString(PyExc_TypeError,

813 "writelines() requires an iterable argument");

814 goto error;

815 }

816 list = PyList_New(CHUNKSIZE);

817 if (list == NULL)

818 goto error;

819 }

820

821 /* Strategy: slurp CHUNKSIZE lines into a private list,

822 checking that they are all strings, then write that list

823 without holding the interpreter lock, then come back for more. */

824 for (index = 0; ; index += CHUNKSIZE) {

825 if (islist) {

826 Py_XDECREF(list);

827 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);

828 if (list == NULL)

829 goto error;

830 j = PyList_GET_SIZE(list);

831 }

832 else {

833 for (j = 0; j < CHUNKSIZE; j++) {

834 line = PyIter_Next(iter);

835 if (line == NULL) {

836 if (PyErr_Occurred())

837 goto error;

838 break;

839 }

840 PyList_SetItem(list, j, line);

841 }

842 }

843 if (j == 0)

844 break;

845

846 /* Check that all entries are indeed byte strings. If not,

847 apply the same rules as for file.write() and

848 convert the rets to strings. This is slow, but

849 seems to be the only way since all conversion APIs

850 could potentially execute Python code. */

851 for (i = 0; i < j; i++) {

852 PyObject *v = PyList_GET_ITEM(list, i);

853 if (!PyBytes_Check(v)) {

854 const char *buffer;

855 Py_ssize_t len;

856 if (PyObject_AsCharBuffer(v, &buffer, &len)) {

857 PyErr_SetString(PyExc_TypeError,

858 "writelines() "

859 "argument must be "

860 "a sequence of "

861 "bytes objects");

862 goto error;

863 }

864 line = PyBytes_FromStringAndSize(buffer,

865 len);

866 if (line == NULL)

867 goto error;

868 Py_DECREF(v);

869 PyList_SET_ITEM(list, i, line);

870 }

871 }

872

873 /* Since we are releasing the global lock, the

874 following code may *not* execute Python code. */

875 Py_BEGIN_ALLOW_THREADS

876 for (i = 0; i < j; i++) {

877 line = PyList_GET_ITEM(list, i);

878 len = PyBytes_GET_SIZE(line);

879 BZ2_bzWrite (&bzerror, self->fp,

880 PyBytes_AS_STRING(line), len);

881 if (bzerror != BZ_OK) {

882 Py_BLOCK_THREADS

883 Util_CatchBZ2Error(bzerror);

884 goto error;

885 }

886 }

887 Py_END_ALLOW_THREADS

888

889 if (j < CHUNKSIZE)

890 break;

891 }

892

893 Py_INCREF(Py_None);

894 ret = Py_None;

895

896 error:

897 RELEASE_LOCK(self);

898 Py_XDECREF(list);

899 Py_XDECREF(iter);

900 return ret;

901 #undef CHUNKSIZE

902 }

903

904 PyDoc_STRVAR(BZ2File_seek__doc__,

905 "seek(offset [, whence]) -> None\n\

906 \n\

907 Move to new file position. Argument offset is a byte count. Optional\n\

908 argument whence defaults to 0 (offset from start of file, offset\n\

909 should be >= 0); other values are 1 (move relative to current position,\n\

910 positive or negative), and 2 (move relative to end of file, usually\n\

911 negative, although many platforms allow seeking beyond the end of a file).\n\

912 \n\

913 Note that seeking of bz2 files is emulated, and depending on the parameters\n\

914 the operation may be extremely slow.\n\

915 ");

916

917 static PyObject *

918 BZ2File_seek(BZ2FileObject *self, PyObject *args)

919 {

920 int where = 0;

921 PyObject *offobj;

922 Py_off_t offset;

923 char small_buffer[SMALLCHUNK];

924 char *buffer = small_buffer;

925 size_t buffersize = SMALLCHUNK;

926 Py_off_t bytesread = 0;

927 size_t readsize;

928 int chunksize;

929 int bzerror;

930 PyObject *ret = NULL;

931

932 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))

933 return NULL;

934 #if !defined(HAVE_LARGEFILE_SUPPORT)

935 offset = PyLong_AsLong(offobj);

936 #else

937 offset = PyLong_Check(offobj) ?

938 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);

939 #endif

940 if (PyErr_Occurred())

941 return NULL;

942

943 ACQUIRE_LOCK(self);

944 Util_DropReadAhead(self);

945 switch (self->mode) {

946 case MODE_READ:

947 case MODE_READ_EOF:

948 break;

949

950 case MODE_CLOSED:

951 PyErr_SetString(PyExc_ValueError,

952 "I/O operation on closed file");

953 goto cleanup;

954

955 default:

956 PyErr_SetString(PyExc_IOError,

957 "seek works only while reading");

958 goto cleanup;

959 }

960

961 if (where == 2) {

962 if (self->size == -1) {

963 assert(self->mode != MODE_READ_EOF);

964 for (;;) {

965 Py_BEGIN_ALLOW_THREADS

966 chunksize = BZ2_bzRead(&bzerror, self->fp,

967 buffer, buffersize);

968 self->pos += chunksize;

969 Py_END_ALLOW_THREADS

970

971 bytesread += chunksize;

972 if (bzerror == BZ_STREAM_END) {

973 break;

974 } else if (bzerror != BZ_OK) {

975 Util_CatchBZ2Error(bzerror);

976 goto cleanup;

977 }

978 }

979 self->mode = MODE_READ_EOF;

980 self->size = self->pos;

981 bytesread = 0;

982 }

983 offset = self->size + offset;

984 } else if (where == 1) {

985 offset = self->pos + offset;

986 }

987

988 /* Before getting here, offset must be the absolute position the file

989 * pointer should be set to. */

990

991 if (offset >= self->pos) {

992 /* we can move forward */

993 offset -= self->pos;

994 } else {

995 /* we cannot move back, so rewind the stream */

996 BZ2_bzReadClose(&bzerror, self->fp);

997 if (bzerror != BZ_OK) {

998 Util_CatchBZ2Error(bzerror);

999 goto cleanup;

1000 }

1001 rewind(self->rawfp);

1002 self->pos = 0;

1003 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,

1004 0, 0, NULL, 0);

1005 if (bzerror != BZ_OK) {

1006 Util_CatchBZ2Error(bzerror);

1007 goto cleanup;

1008 }

1009 self->mode = MODE_READ;

1010 }

1011

1012 if (offset <= 0 || self->mode == MODE_READ_EOF)

1013 goto exit;

1014

1015 /* Before getting here, offset must be set to the number of bytes

1016 * to walk forward. */

1017 for (;;) {

1018 if (offset-bytesread > buffersize)

1019 readsize = buffersize;

1020 else

1021 /* offset might be wider that readsize, but the result

1022 * of the subtraction is bound by buffersize (see the

1023 * condition above). buffersize is 8192. */

1024 readsize = (size_t)(offset-bytesread);

1025 Py_BEGIN_ALLOW_THREADS

1026 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);

1027 self->pos += chunksize;

1028 Py_END_ALLOW_THREADS

1029 bytesread += chunksize;

1030 if (bzerror == BZ_STREAM_END) {

1031 self->size = self->pos;

1032 self->mode = MODE_READ_EOF;

1033 break;

1034 } else if (bzerror != BZ_OK) {

1035 Util_CatchBZ2Error(bzerror);

1036 goto cleanup;

1037 }

1038 if (bytesread == offset)

1039 break;

1040 }

1041

1042 exit:

1043 Py_INCREF(Py_None);

1044 ret = Py_None;

1045

1046 cleanup:

1047 RELEASE_LOCK(self);

1048 return ret;

1049 }

1050

1051 PyDoc_STRVAR(BZ2File_tell__doc__,

1052 "tell() -> int\n\

1053 \n\

1054 Return the current file position, an integer (may be a long integer).\n\

1055 ");

1056

1057 static PyObject *

1058 BZ2File_tell(BZ2FileObject *self, PyObject *args)

1059 {

1060 PyObject *ret = NULL;

1061

1062 if (self->mode == MODE_CLOSED) {

1063 PyErr_SetString(PyExc_ValueError,

1064 "I/O operation on closed file");

1065 goto cleanup;

1066 }

1067

1068 #if !defined(HAVE_LARGEFILE_SUPPORT)

1069 ret = PyLong_FromLong(self->pos);

1070 #else

1071 ret = PyLong_FromLongLong(self->pos);

1072 #endif

1073

1074 cleanup:

1075 return ret;

1076 }

1077

1078 PyDoc_STRVAR(BZ2File_close__doc__,

1079 "close() -> None or (perhaps) an integer\n\

1080 \n\

1081 Close the file. Sets data attribute .closed to true. A closed file\n\

1082 cannot be used for further I/O operations. close() may be called more\n\

1083 than once without error.\n\

1084 ");

1085

1086 static PyObject *

1087 BZ2File_close(BZ2FileObject *self)

1088 {

1089 PyObject *ret = NULL;

1090 int bzerror = BZ_OK;

1091

1092 if (self->mode == MODE_CLOSED) {

1093 Py_RETURN_NONE;

1094 }

1095

1096 ACQUIRE_LOCK(self);

1097 switch (self->mode) {

1098 case MODE_READ:

1099 case MODE_READ_EOF:

1100 BZ2_bzReadClose(&bzerror, self->fp);

1101 break;

1102 case MODE_WRITE:

1103 BZ2_bzWriteClose(&bzerror, self->fp,

1104 0, NULL, NULL);

1105 break;

1106 }

1107 self->mode = MODE_CLOSED;

1108 fclose(self->rawfp);

1109 self->rawfp = NULL;

1110 if (bzerror == BZ_OK) {

1111 Py_INCREF(Py_None);

1112 ret = Py_None;

1113 }

1114 else {

1115 Util_CatchBZ2Error(bzerror);

1116 }

1117

1118 RELEASE_LOCK(self);

1119 return ret;

1120 }

1121

1122 PyDoc_STRVAR(BZ2File_enter_doc,

1123 "__enter__() -> self.");

1124

1125 static PyObject *

1126 BZ2File_enter(BZ2FileObject *self)

1127 {

1128 if (self->mode == MODE_CLOSED) {

1129 PyErr_SetString(PyExc_ValueError,

1130 "I/O operation on closed file");

1131 return NULL;

1132 }

1133 Py_INCREF(self);

1134 return (PyObject *) self;

1135 }

1136

1137 PyDoc_STRVAR(BZ2File_exit_doc,

1138 "__exit__(*excinfo) -> None. Closes the file.");

1139

1140 static PyObject *

1141 BZ2File_exit(BZ2FileObject *self, PyObject *args)

1142 {

1143 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);

1144 if (!ret)

1145 /* If error occurred, pass through */

1146 return NULL;

1147 Py_DECREF(ret);

1148 Py_RETURN_NONE;

1149 }

1150

1151

1152 static PyObject *BZ2File_getiter(BZ2FileObject *self);

1153

1154 static PyMethodDef BZ2File_methods[] = {

1155 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},

1156 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__ doc__},

1157 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readline s__doc__},

1158 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},

1159 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__ doc__},

1160 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},

1161 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},

1162 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},

1163 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},

1164 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},

1165 {NULL, NULL} /* sentinel */

1166 };

1167

1168

1169 /* ===================================================================== */

1170 /* Getters and setters of BZ2File. */

1171

1172 static PyObject *

1173 BZ2File_get_closed(BZ2FileObject *self, void *closure)

1174 {

1175 return PyLong_FromLong(self->mode == MODE_CLOSED);

1176 }

1177

1178 static PyGetSetDef BZ2File_getset[] = {

1179 {"closed", (getter)BZ2File_get_closed, NULL,

1180 "True if the file is closed"},

1181 {NULL} /* Sentinel */

1182 };

1183

1184

1185 /* ===================================================================== */

1186 /* Slot definitions for BZ2File_Type. */

1187

1188 static int

1189 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)

1190 {

1191 static char *kwlist[] = {"filename", "mode", "buffering",

1192 "compresslevel", 0};

1193 PyObject *name_obj = NULL;

1194 char *name;

1195 char *mode = "r";

1196 int buffering = -1;

1197 int compresslevel = 9;

1198 int bzerror;

1199 int mode_char = 0;

1200

1201 self->size = -1;

1202

1203 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",

1204 kwlist, PyUnicode_FSConverter, &name_obj,

1205 &mode, &buffering,

1206 &compresslevel))

1207 return -1;

1208

1209 name = PyBytes_AsString(name_obj);

1210 if (compresslevel < 1 || compresslevel > 9) {

1211 PyErr_SetString(PyExc_ValueError,

1212 "compresslevel must be between 1 and 9");

1213 Py_DECREF(name_obj);

1214 return -1;

1215 }

1216

1217 for (;;) {

1218 int error = 0;

1219 switch (*mode) {

1220 case 'r':

1221 case 'w':

1222 if (mode_char)

1223 error = 1;

1224 mode_char = *mode;

1225 break;

1226

1227 case 'b':

1228 break;

1229

1230 default:

1231 error = 1;

1232 break;

1233 }

1234 if (error) {

1235 PyErr_Format(PyExc_ValueError,

1236 "invalid mode char %c", *mode);

1237 Py_DECREF(name_obj);

1238 return -1;

1239 }

1240 mode++;

1241 if (*mode == '\0')

1242 break;

1243 }

1244

1245 if (mode_char == 0) {

1246 mode_char = 'r';

1247 }

1248

1249 mode = (mode_char == 'r') ? "rb" : "wb";

1250

1251 self->rawfp = fopen(name, mode);

1252 Py_DECREF(name_obj);

1253 if (self->rawfp == NULL) {

1254 PyErr_SetFromErrno(PyExc_IOError);

1255 return -1;

1256 }

1257 /* XXX Ignore buffering */

1258

1259 /* From now on, we have stuff to dealloc, so jump to error label

1260 * instead of returning */

1261

1262 #ifdef WITH_THREAD

1263 self->lock = PyThread_allocate_lock();

1264 if (!self->lock) {

1265 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");

1266 goto error;

1267 }

1268 #endif

1269

1270 if (mode_char == 'r')

1271 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,

1272 0, 0, NULL, 0);

1273 else

1274 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,

1275 compresslevel, 0, 0);

1276

1277 if (bzerror != BZ_OK) {

1278 Util_CatchBZ2Error(bzerror);

1279 goto error;

1280 }

1281

1282 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;

1283

1284 return 0;

1285

1286 error:

1287 fclose(self->rawfp);

1288 self->rawfp = NULL;

1289 #ifdef WITH_THREAD

1290 if (self->lock) {

1291 PyThread_free_lock(self->lock);

1292 self->lock = NULL;

1293 }

1294 #endif

1295 return -1;

1296 }

1297

1298 static void

1299 BZ2File_dealloc(BZ2FileObject *self)

1300 {

1301 int bzerror;

1302 #ifdef WITH_THREAD

1303 if (self->lock)

1304 PyThread_free_lock(self->lock);

1305 #endif

1306 switch (self->mode) {

1307 case MODE_READ:

1308 case MODE_READ_EOF:

1309 BZ2_bzReadClose(&bzerror, self->fp);

1310 break;

1311 case MODE_WRITE:

1312 BZ2_bzWriteClose(&bzerror, self->fp,

1313 0, NULL, NULL);

1314 break;

1315 }

1316 Util_DropReadAhead(self);

1317 if (self->rawfp != NULL)

1318 fclose(self->rawfp);

1319 Py_TYPE(self)->tp_free((PyObject *)self);

355 Py_TYPE(self)->tp_free((PyObject *)self);

1320 }

356 }

1321

357

1322 /* This is a hacked version of Python's fileobject.c:file_getiter(). */

358 static PyMethodDef Decompressor_methods[] = {

1323 static PyObject *

359 {"decompress", (PyCFunction)Decompressor_decompress, METH_O,

1324 BZ2File_getiter(BZ2FileObject *self)

360 "Provide a block of data to the decompressor."},

1325 {

361 {NULL}

1326 if (self->mode == MODE_CLOSED) {

362 };

1327 PyErr_SetString(PyExc_ValueError,

363

1328 "I/O operation on closed file");

364 static PyMemberDef Decompressor_members[] = {

1329 return NULL;

365 {"eof", T_INT, offsetof(Decompressor, eof), READONLY,

1330 }

366 "True if the end-of-stream marker has been reached."},

1331 Py_INCREF((PyObject*)self);

367 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,

1332 return (PyObject *)self;

368 "Data found after the end of the compressed stream."},

1333 }

369 {NULL}

1334

370 };

1335 /* This is a hacked version of Python's fileobject.c:file_iternext(). */

371

1336 #define READAHEAD_BUFSIZE 8192

372 static PyTypeObject Decompressor_Type = {

1337 static PyObject *

1338 BZ2File_iternext(BZ2FileObject *self)

1339 {

1340 PyBytesObject* ret;

1341 ACQUIRE_LOCK(self);

1342 if (self->mode == MODE_CLOSED) {

1343 RELEASE_LOCK(self);

1344 PyErr_SetString(PyExc_ValueError,

1345 "I/O operation on closed file");

1346 return NULL;

1347 }

1348 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);

1349 RELEASE_LOCK(self);

1350 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {

1351 Py_XDECREF(ret);

1352 return NULL;

1353 }

1354 return (PyObject *)ret;

1355 }

1356

1357 /* ===================================================================== */

1358 /* BZ2File_Type definition. */

1359

1360 PyDoc_VAR(BZ2File__doc__) =

1361 PyDoc_STR(

1362 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\

1363 \n\

1364 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\

1365 writing. When opened for writing, the file will be created if it doesn't\n\

1366 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\

1367 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\

1368 is given, must be a number between 1 and 9.\n\

1369 Data read is always returned in bytes; data written ought to be bytes.\n\

1370 ");

1371

1372 static PyTypeObject BZ2File_Type = {

1373 PyVarObject_HEAD_INIT(NULL, 0)

373 PyVarObject_HEAD_INIT(NULL, 0)

1374 "bz2.BZ2File", /*tp_name*/

374 "_bz2.Decompressor", /* tp_name */

1375 sizeof(BZ2FileObject), /*tp_basicsize*/

375 sizeof(Decompressor), /* tp_basicsize */

1376 0, /*tp_itemsize*/

376 0, /* tp_itemsize */

1377 (destructor)BZ2File_dealloc, /*tp_dealloc*/

377 (destructor)Decompressor_dealloc, /* tp_dealloc */

1378 0, /*tp_print*/

378 0, /* tp_print */

1379 0, /*tp_getattr*/

379 0, /* tp_getattr */

1380 0, /*tp_setattr*/

380 0, /* tp_setattr */

1381 0, /*tp_reserved*/

381 0, /* tp_reserved */

1382 0, /*tp_repr*/

382 0, /* tp_repr */

1383 0, /*tp_as_number*/

383 0, /* tp_as_number */

1384 0, /*tp_as_sequence*/

384 0, /* tp_as_sequence */

1385 0, /*tp_as_mapping*/

385 0, /* tp_as_mapping */

1386 0, /*tp_hash*/

386 0, /* tp_hash */

1387 0, /*tp_call*/

387 0, /* tp_call */

1388 0, /*tp_str*/

388 0, /* tp_str */

1389 PyObject_GenericGetAttr,/*tp_getattro*/

389 0, /* tp_getattro */

1390 PyObject_GenericSetAttr,/*tp_setattro*/

390 0, /* tp_setattro */

1391 0, /*tp_as_buffer*/

391 0, /* tp_as_buffer */

1392 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/

392 Py_TPFLAGS_DEFAULT, /* tp_flags */

1393 BZ2File__doc__, /*tp_doc*/

393 "Wrapper for bz2 decompression functions.", /* tp_doc */

1394 0, /*tp_traverse*/

394 0, /* tp_traverse */

1395 0, /*tp_clear*/

395 0, /* tp_clear */

1396 0, /*tp_richcompare*/

396 0, /* tp_richcompare */

1397 0, /*tp_weaklistoffset*/

397 0, /* tp_weaklistoffset */

1398 (getiterfunc)BZ2File_getiter, /*tp_iter*/

398 0, /* tp_iter */

1399 (iternextfunc)BZ2File_iternext, /*tp_iternext*/

399 0, /* tp_iternext */

1400 BZ2File_methods, /*tp_methods*/

400 Decompressor_methods, /* tp_methods */

1401 0, /*tp_members*/

401 Decompressor_members, /* tp_members */

1402 BZ2File_getset, /*tp_getset*/

402 0, /* tp_getset */

1403 0, /*tp_base*/

403 0, /* tp_base */

1404 0, /*tp_dict*/

404 0, /* tp_dict */

1405 0, /*tp_descr_get*/

405 0, /* tp_descr_get */

1406 0, /*tp_descr_set*/

406 0, /* tp_descr_set */

1407 0, /*tp_dictoffset*/

407 0, /* tp_dictoffset */

1408 (initproc)BZ2File_init, /*tp_init*/

408 (initproc)Decompressor_init, /* tp_init */

1409 PyType_GenericAlloc, /*tp_alloc*/

409 0, /* tp_alloc */

1410 PyType_GenericNew, /*tp_new*/

410 PyType_GenericNew, /* tp_new */

1411 PyObject_Free, /*tp_free*/

411 };

1412 0, /*tp_is_gc*/

412

1413 };

413

1414

414 /* Module initialization. */

1415

415

1416 /* ===================================================================== */

416 static struct PyModuleDef _bz2module = {

1417 /* Methods of BZ2Comp. */

1418

1419 PyDoc_STRVAR(BZ2Comp_compress__doc__,

1420 "compress(data) -> string\n\

1421 \n\

1422 Provide more data to the compressor object. It will return chunks of\n\

1423 compressed data whenever possible. When you've finished providing data\n\

1424 to compress, call the flush() method to finish the compression process,\n\

1425 and return what is left in the internal buffers.\n\

1426 ");

1427

1428 static PyObject *

1429 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)

1430 {

1431 Py_buffer pdata;

1432 char *data;

1433 int datasize;

1434 int bufsize = SMALLCHUNK;

1435 PY_LONG_LONG totalout;

1436 PyObject *ret = NULL;

1437 bz_stream *bzs = &self->bzs;

1438 int bzerror;

1439

1440 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))

1441 return NULL;

1442 data = pdata.buf;

1443 datasize = pdata.len;

1444

1445 if (datasize == 0) {

1446 PyBuffer_Release(&pdata);

1447 return PyBytes_FromStringAndSize("", 0);

1448 }

1449

1450 ACQUIRE_LOCK(self);

1451 if (!self->running) {

1452 PyErr_SetString(PyExc_ValueError,

1453 "this object was already flushed");

1454 goto error;

1455 }

1456

1457 ret = PyBytes_FromStringAndSize(NULL, bufsize);

1458 if (!ret)

1459 goto error;

1460

1461 bzs->next_in = data;

1462 bzs->avail_in = datasize;

1463 bzs->next_out = BUF(ret);

1464 bzs->avail_out = bufsize;

1465

1466 totalout = BZS_TOTAL_OUT(bzs);

1467

1468 for (;;) {

1469 Py_BEGIN_ALLOW_THREADS

1470 bzerror = BZ2_bzCompress(bzs, BZ_RUN);

1471 Py_END_ALLOW_THREADS

1472 if (bzerror != BZ_RUN_OK) {

1473 Util_CatchBZ2Error(bzerror);

1474 goto error;

1475 }

1476 if (bzs->avail_in == 0)

1477 break; /* no more input data */

1478 if (bzs->avail_out == 0) {

1479 bufsize = Util_NewBufferSize(bufsize);

1480 if (_PyBytes_Resize(&ret, bufsize) < 0) {

1481 BZ2_bzCompressEnd(bzs);

1482 goto error;

1483 }

1484 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)

1485 - totalout);

1486 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));

1487 }

1488 }

1489

1490 if (_PyBytes_Resize(&ret,

1491 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)

1492 goto error;

1493

1494 RELEASE_LOCK(self);

1495 PyBuffer_Release(&pdata);

1496 return ret;

1497

1498 error:

1499 RELEASE_LOCK(self);

1500 PyBuffer_Release(&pdata);

1501 Py_XDECREF(ret);

1502 return NULL;

1503 }

1504

1505 PyDoc_STRVAR(BZ2Comp_flush__doc__,

1506 "flush() -> string\n\

1507 \n\

1508 Finish the compression process and return what is left in internal buffers.\n\

1509 You must not use the compressor object after calling this method.\n\

1510 ");

1511

1512 static PyObject *

1513 BZ2Comp_flush(BZ2CompObject *self)

1514 {

1515 int bufsize = SMALLCHUNK;

1516 PyObject *ret = NULL;

1517 bz_stream *bzs = &self->bzs;

1518 PY_LONG_LONG totalout;

1519 int bzerror;

1520

1521 ACQUIRE_LOCK(self);

1522 if (!self->running) {

1523 PyErr_SetString(PyExc_ValueError, "object was already "

1524 "flushed");

1525 goto error;

1526 }

1527 self->running = 0;

1528

1529 ret = PyBytes_FromStringAndSize(NULL, bufsize);

1530 if (!ret)

1531 goto error;

1532

1533 bzs->next_out = BUF(ret);

1534 bzs->avail_out = bufsize;

1535

1536 totalout = BZS_TOTAL_OUT(bzs);

1537

1538 for (;;) {

1539 Py_BEGIN_ALLOW_THREADS

1540 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);

1541 Py_END_ALLOW_THREADS

1542 if (bzerror == BZ_STREAM_END) {

1543 break;

1544 } else if (bzerror != BZ_FINISH_OK) {

1545 Util_CatchBZ2Error(bzerror);

1546 goto error;

1547 }

1548 if (bzs->avail_out == 0) {

1549 bufsize = Util_NewBufferSize(bufsize);

1550 if (_PyBytes_Resize(&ret, bufsize) < 0)

1551 goto error;

1552 bzs->next_out = BUF(ret);

1553 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)

1554 - totalout);

1555 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));

1556 }

1557 }

1558

1559 if (bzs->avail_out != 0) {

1560 if (_PyBytes_Resize(&ret,

1561 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)

1562 goto error;

1563 }

1564

1565 RELEASE_LOCK(self);

1566 return ret;

1567

1568 error:

1569 RELEASE_LOCK(self);

1570 Py_XDECREF(ret);

1571 return NULL;

1572 }

1573

1574 static PyMethodDef BZ2Comp_methods[] = {

1575 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,

1576 BZ2Comp_compress__doc__},

1577 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,

1578 BZ2Comp_flush__doc__},

1579 {NULL, NULL} /* sentinel */

1580 };

1581

1582

1583 /* ===================================================================== */

1584 /* Slot definitions for BZ2Comp_Type. */

1585

1586 static int

1587 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)

1588 {

1589 int compresslevel = 9;

1590 int bzerror;

1591 static char *kwlist[] = {"compresslevel", 0};

1592

1593 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",

1594 kwlist, &compresslevel))

1595 return -1;

1596

1597 if (compresslevel < 1 || compresslevel > 9) {

1598 PyErr_SetString(PyExc_ValueError,

1599 "compresslevel must be between 1 and 9");

1600 goto error;

1601 }

1602

1603 #ifdef WITH_THREAD

1604 self->lock = PyThread_allocate_lock();

1605 if (!self->lock) {

1606 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");

1607 goto error;

1608 }

1609 #endif

1610

1611 memset(&self->bzs, 0, sizeof(bz_stream));

1612 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);

1613 if (bzerror != BZ_OK) {

1614 Util_CatchBZ2Error(bzerror);

1615 goto error;

1616 }

1617

1618 self->running = 1;

1619

1620 return 0;

1621 error:

1622 #ifdef WITH_THREAD

1623 if (self->lock) {

1624 PyThread_free_lock(self->lock);

1625 self->lock = NULL;

1626 }

1627 #endif

1628 return -1;

1629 }

1630

1631 static void

1632 BZ2Comp_dealloc(BZ2CompObject *self)

1633 {

1634 #ifdef WITH_THREAD

1635 if (self->lock)

1636 PyThread_free_lock(self->lock);

1637 #endif

1638 BZ2_bzCompressEnd(&self->bzs);

1639 Py_TYPE(self)->tp_free((PyObject *)self);

1640 }

1641

1642

1643 /* ===================================================================== */

1644 /* BZ2Comp_Type definition. */

1645

1646 PyDoc_STRVAR(BZ2Comp__doc__,

1647 "BZ2Compressor([compresslevel=9]) -> compressor object\n\

1648 \n\

1649 Create a new compressor object. This object may be used to compress\n\

1650 data sequentially. If you want to compress data in one shot, use the\n\

1651 compress() function instead. The compresslevel parameter, if given,\n\

1652 must be a number between 1 and 9.\n\

1653 ");

1654

1655 static PyTypeObject BZ2Comp_Type = {

1656 PyVarObject_HEAD_INIT(NULL, 0)

1657 "bz2.BZ2Compressor", /*tp_name*/

1658 sizeof(BZ2CompObject), /*tp_basicsize*/

1659 0, /*tp_itemsize*/

1660 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/

1661 0, /*tp_print*/

1662 0, /*tp_getattr*/

1663 0, /*tp_setattr*/

1664 0, /*tp_reserved*/

1665 0, /*tp_repr*/

1666 0, /*tp_as_number*/

1667 0, /*tp_as_sequence*/

1668 0, /*tp_as_mapping*/

1669 0, /*tp_hash*/

1670 0, /*tp_call*/

1671 0, /*tp_str*/

1672 PyObject_GenericGetAttr,/*tp_getattro*/

1673 PyObject_GenericSetAttr,/*tp_setattro*/

1674 0, /*tp_as_buffer*/

1675 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/

1676 BZ2Comp__doc__, /*tp_doc*/

1677 0, /*tp_traverse*/

1678 0, /*tp_clear*/

1679 0, /*tp_richcompare*/

1680 0, /*tp_weaklistoffset*/

1681 0, /*tp_iter*/

1682 0, /*tp_iternext*/

1683 BZ2Comp_methods, /*tp_methods*/

1684 0, /*tp_members*/

1685 0, /*tp_getset*/

1686 0, /*tp_base*/

1687 0, /*tp_dict*/

1688 0, /*tp_descr_get*/

1689 0, /*tp_descr_set*/

1690 0, /*tp_dictoffset*/

1691 (initproc)BZ2Comp_init, /*tp_init*/

1692 PyType_GenericAlloc, /*tp_alloc*/

1693 PyType_GenericNew, /*tp_new*/

1694 PyObject_Free, /*tp_free*/

1695 0, /*tp_is_gc*/

1696 };

1697

1698

1699 /* ===================================================================== */

1700 /* Members of BZ2Decomp. */

1701

1702 #undef OFF

1703 #define OFF(x) offsetof(BZ2DecompObject, x)

1704

1705 static PyMemberDef BZ2Decomp_members[] = {

1706 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},

1707 {NULL} /* Sentinel */

1708 };

1709

1710

1711 /* ===================================================================== */

1712 /* Methods of BZ2Decomp. */

1713

1714 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,

1715 "decompress(data) -> string\n\

1716 \n\

1717 Provide more data to the decompressor object. It will return chunks\n\

1718 of decompressed data whenever possible. If you try to decompress data\n\

1719 after the end of stream is found, EOFError will be raised. If any data\n\

1720 was found after the end of stream, it'll be ignored and saved in\n\

1721 unused_data attribute.\n\

1722 ");

1723

1724 static PyObject *

1725 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)

1726 {

1727 Py_buffer pdata;

1728 char *data;

1729 int datasize;

1730 int bufsize = SMALLCHUNK;

1731 PY_LONG_LONG totalout;

1732 PyObject *ret = NULL;

1733 bz_stream *bzs = &self->bzs;

1734 int bzerror;

1735

1736 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))

1737 return NULL;

1738 data = pdata.buf;

1739 datasize = pdata.len;

1740

1741 ACQUIRE_LOCK(self);

1742 if (!self->running) {

1743 PyErr_SetString(PyExc_EOFError, "end of stream was "

1744 "already found");

1745 goto error;

1746 }

1747

1748 ret = PyBytes_FromStringAndSize(NULL, bufsize);

1749 if (!ret)

1750 goto error;

1751

1752 bzs->next_in = data;

1753 bzs->avail_in = datasize;

1754 bzs->next_out = BUF(ret);

1755 bzs->avail_out = bufsize;

1756

1757 totalout = BZS_TOTAL_OUT(bzs);

1758

1759 for (;;) {

1760 Py_BEGIN_ALLOW_THREADS

1761 bzerror = BZ2_bzDecompress(bzs);

1762 Py_END_ALLOW_THREADS

1763 if (bzerror == BZ_STREAM_END) {

1764 if (bzs->avail_in != 0) {

1765 Py_DECREF(self->unused_data);

1766 self->unused_data =

1767 PyBytes_FromStringAndSize(bzs->next_in,

1768 bzs->avail_in);

1769 }

1770 self->running = 0;

1771 break;

1772 }

1773 if (bzerror != BZ_OK) {

1774 Util_CatchBZ2Error(bzerror);

1775 goto error;

1776 }

1777 if (bzs->avail_in == 0)

1778 break; /* no more input data */

1779 if (bzs->avail_out == 0) {

1780 bufsize = Util_NewBufferSize(bufsize);

1781 if (_PyBytes_Resize(&ret, bufsize) < 0) {

1782 BZ2_bzDecompressEnd(bzs);

1783 goto error;

1784 }

1785 bzs->next_out = BUF(ret);

1786 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)

1787 - totalout);

1788 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));

1789 }

1790 }

1791

1792 if (bzs->avail_out != 0) {

1793 if (_PyBytes_Resize(&ret,

1794 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)

1795 goto error;

1796 }

1797

1798 RELEASE_LOCK(self);

1799 PyBuffer_Release(&pdata);

1800 return ret;

1801

1802 error:

1803 RELEASE_LOCK(self);

1804 PyBuffer_Release(&pdata);

1805 Py_XDECREF(ret);

1806 return NULL;

1807 }

1808

1809 static PyMethodDef BZ2Decomp_methods[] = {

1810 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_de compress__doc__},

1811 {NULL, NULL} /* sentinel */

1812 };

1813

1814

1815 /* ===================================================================== */

1816 /* Slot definitions for BZ2Decomp_Type. */

1817

1818 static int

1819 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)

1820 {

1821 int bzerror;

1822

1823 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))

1824 return -1;

1825

1826 #ifdef WITH_THREAD

1827 self->lock = PyThread_allocate_lock();

1828 if (!self->lock) {

1829 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");

1830 goto error;

1831 }

1832 #endif

1833

1834 self->unused_data = PyBytes_FromStringAndSize("", 0);

1835 if (!self->unused_data)

1836 goto error;

1837

1838 memset(&self->bzs, 0, sizeof(bz_stream));

1839 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);

1840 if (bzerror != BZ_OK) {

1841 Util_CatchBZ2Error(bzerror);

1842 goto error;

1843 }

1844

1845 self->running = 1;

1846

1847 return 0;

1848

1849 error:

1850 #ifdef WITH_THREAD

1851 if (self->lock) {

1852 PyThread_free_lock(self->lock);

1853 self->lock = NULL;

1854 }

1855 #endif

1856 Py_CLEAR(self->unused_data);

1857 return -1;

1858 }

1859

1860 static void

1861 BZ2Decomp_dealloc(BZ2DecompObject *self)

1862 {

1863 #ifdef WITH_THREAD

1864 if (self->lock)

1865 PyThread_free_lock(self->lock);

1866 #endif

1867 Py_XDECREF(self->unused_data);

1868 BZ2_bzDecompressEnd(&self->bzs);

1869 Py_TYPE(self)->tp_free((PyObject *)self);

1870 }

1871

1872

1873 /* ===================================================================== */

1874 /* BZ2Decomp_Type definition. */

1875

1876 PyDoc_STRVAR(BZ2Decomp__doc__,

1877 "BZ2Decompressor() -> decompressor object\n\

1878 \n\

1879 Create a new decompressor object. This object may be used to decompress\n\

1880 data sequentially. If you want to decompress data in one shot, use the\n\

1881 decompress() function instead.\n\

1882 ");

1883

1884 static PyTypeObject BZ2Decomp_Type = {

1885 PyVarObject_HEAD_INIT(NULL, 0)

1886 "bz2.BZ2Decompressor", /*tp_name*/

1887 sizeof(BZ2DecompObject), /*tp_basicsize*/

1888 0, /*tp_itemsize*/

1889 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/

1890 0, /*tp_print*/

1891 0, /*tp_getattr*/

1892 0, /*tp_setattr*/

1893 0, /*tp_reserved*/

1894 0, /*tp_repr*/

1895 0, /*tp_as_number*/

1896 0, /*tp_as_sequence*/

1897 0, /*tp_as_mapping*/

1898 0, /*tp_hash*/

1899 0, /*tp_call*/

1900 0, /*tp_str*/

1901 PyObject_GenericGetAttr,/*tp_getattro*/

1902 PyObject_GenericSetAttr,/*tp_setattro*/

1903 0, /*tp_as_buffer*/

1904 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/

1905 BZ2Decomp__doc__, /*tp_doc*/

1906 0, /*tp_traverse*/

1907 0, /*tp_clear*/

1908 0, /*tp_richcompare*/

1909 0, /*tp_weaklistoffset*/

1910 0, /*tp_iter*/

1911 0, /*tp_iternext*/

1912 BZ2Decomp_methods, /*tp_methods*/

1913 BZ2Decomp_members, /*tp_members*/

1914 0, /*tp_getset*/

1915 0, /*tp_base*/

1916 0, /*tp_dict*/

1917 0, /*tp_descr_get*/

1918 0, /*tp_descr_set*/

1919 0, /*tp_dictoffset*/

1920 (initproc)BZ2Decomp_init, /*tp_init*/

1921 PyType_GenericAlloc, /*tp_alloc*/

1922 PyType_GenericNew, /*tp_new*/

1923 PyObject_Free, /*tp_free*/

1924 0, /*tp_is_gc*/

1925 };

1926

1927

1928 /* ===================================================================== */

1929 /* Module functions. */

1930

1931 PyDoc_STRVAR(bz2_compress__doc__,

1932 "compress(data [, compresslevel=9]) -> string\n\

1933 \n\

1934 Compress data in one shot. If you want to compress data sequentially,\n\

1935 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\

1936 given, must be a number between 1 and 9.\n\

1937 ");

1938

1939 static PyObject *

1940 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)

1941 {

1942 int compresslevel=9;

1943 Py_buffer pdata;

1944 char *data;

1945 int datasize;

1946 int bufsize;

1947 PyObject *ret = NULL;

1948 bz_stream _bzs;

1949 bz_stream *bzs = &_bzs;

1950 int bzerror;

1951 static char *kwlist[] = {"data", "compresslevel", 0};

1952

1953 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",

1954 kwlist, &pdata,

1955 &compresslevel))

1956 return NULL;

1957 data = pdata.buf;

1958 datasize = pdata.len;

1959

1960 if (compresslevel < 1 || compresslevel > 9) {

1961 PyErr_SetString(PyExc_ValueError,

1962 "compresslevel must be between 1 and 9");

1963 PyBuffer_Release(&pdata);

1964 return NULL;

1965 }

1966

1967 /* Conforming to bz2 manual, this is large enough to fit compressed

1968 * data in one shot. We will check it later anyway. */

1969 bufsize = datasize + (datasize/100+1) + 600;

1970

1971 ret = PyBytes_FromStringAndSize(NULL, bufsize);

1972 if (!ret) {

1973 PyBuffer_Release(&pdata);

1974 return NULL;

1975 }

1976

1977 memset(bzs, 0, sizeof(bz_stream));

1978

1979 bzs->next_in = data;

1980 bzs->avail_in = datasize;

1981 bzs->next_out = BUF(ret);

1982 bzs->avail_out = bufsize;

1983

1984 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);

1985 if (bzerror != BZ_OK) {

1986 Util_CatchBZ2Error(bzerror);

1987 PyBuffer_Release(&pdata);

1988 Py_DECREF(ret);

1989 return NULL;

1990 }

1991

1992 for (;;) {

1993 Py_BEGIN_ALLOW_THREADS

1994 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);

1995 Py_END_ALLOW_THREADS

1996 if (bzerror == BZ_STREAM_END) {

1997 break;

1998 } else if (bzerror != BZ_FINISH_OK) {

1999 BZ2_bzCompressEnd(bzs);

2000 Util_CatchBZ2Error(bzerror);

2001 PyBuffer_Release(&pdata);

2002 Py_DECREF(ret);

2003 return NULL;

2004 }

2005 if (bzs->avail_out == 0) {

2006 bufsize = Util_NewBufferSize(bufsize);

2007 if (_PyBytes_Resize(&ret, bufsize) < 0) {

2008 BZ2_bzCompressEnd(bzs);

2009 PyBuffer_Release(&pdata);

2010 return NULL;

2011 }

2012 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);

2013 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));

2014 }

2015 }

2016

2017 if (bzs->avail_out != 0) {

2018 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {

2019 ret = NULL;

2020 }

2021 }

2022 BZ2_bzCompressEnd(bzs);

2023

2024 PyBuffer_Release(&pdata);

2025 return ret;

2026 }

2027

2028 PyDoc_STRVAR(bz2_decompress__doc__,

2029 "decompress(data) -> decompressed data\n\

2030 \n\

2031 Decompress data in one shot. If you want to decompress data sequentially,\n\

2032 use an instance of BZ2Decompressor instead.\n\

2033 ");

2034

2035 static PyObject *

2036 bz2_decompress(PyObject *self, PyObject *args)

2037 {

2038 Py_buffer pdata;

2039 char *data;

2040 int datasize;

2041 int bufsize = SMALLCHUNK;

2042 PyObject *ret;

2043 bz_stream _bzs;

2044 bz_stream *bzs = &_bzs;

2045 int bzerror;

2046

2047 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))

2048 return NULL;

2049 data = pdata.buf;

2050 datasize = pdata.len;

2051

2052 if (datasize == 0) {

2053 PyBuffer_Release(&pdata);

2054 return PyBytes_FromStringAndSize("", 0);

2055 }

2056

2057 ret = PyBytes_FromStringAndSize(NULL, bufsize);

2058 if (!ret) {

2059 PyBuffer_Release(&pdata);

2060 return NULL;

2061 }

2062

2063 memset(bzs, 0, sizeof(bz_stream));

2064

2065 bzs->next_in = data;

2066 bzs->avail_in = datasize;

2067 bzs->next_out = BUF(ret);

2068 bzs->avail_out = bufsize;

2069

2070 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);

2071 if (bzerror != BZ_OK) {

2072 Util_CatchBZ2Error(bzerror);

2073 Py_DECREF(ret);

2074 PyBuffer_Release(&pdata);

2075 return NULL;

2076 }

2077

2078 for (;;) {

2079 Py_BEGIN_ALLOW_THREADS

2080 bzerror = BZ2_bzDecompress(bzs);

2081 Py_END_ALLOW_THREADS

2082 if (bzerror == BZ_STREAM_END) {

2083 break;

2084 } else if (bzerror != BZ_OK) {

2085 BZ2_bzDecompressEnd(bzs);

2086 Util_CatchBZ2Error(bzerror);

2087 PyBuffer_Release(&pdata);

2088 Py_DECREF(ret);

2089 return NULL;

2090 }

2091 if (bzs->avail_in == 0) {

2092 BZ2_bzDecompressEnd(bzs);

2093 PyErr_SetString(PyExc_ValueError,

2094 "couldn't find end of stream");

2095 PyBuffer_Release(&pdata);

2096 Py_DECREF(ret);

2097 return NULL;

2098 }

2099 if (bzs->avail_out == 0) {

2100 bufsize = Util_NewBufferSize(bufsize);

2101 if (_PyBytes_Resize(&ret, bufsize) < 0) {

2102 BZ2_bzDecompressEnd(bzs);

2103 PyBuffer_Release(&pdata);

2104 return NULL;

2105 }

2106 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);

2107 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));

2108 }

2109 }

2110

2111 if (bzs->avail_out != 0) {

2112 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {

2113 ret = NULL;

2114 }

2115 }

2116 BZ2_bzDecompressEnd(bzs);

2117 PyBuffer_Release(&pdata);

2118

2119 return ret;

2120 }

2121

2122 static PyMethodDef bz2_methods[] = {

2123 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,

2124 bz2_compress__doc__},

2125 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,

2126 bz2_decompress__doc__},

2127 {NULL, NULL} /* sentinel */

2128 };

2129

2130 /* ===================================================================== */

2131 /* Initialization function. */

2132

2133 PyDoc_STRVAR(bz2__doc__,

2134 "The python bz2 module provides a comprehensive interface for\n\

2135 the bz2 compression library. It implements a complete file\n\

2136 interface, one shot (de)compression functions, and types for\n\

2137 sequential (de)compression.\n\

2138 ");

2139

2140

2141 static struct PyModuleDef bz2module = {

2142 PyModuleDef_HEAD_INIT,

417 PyModuleDef_HEAD_INIT,

2143 "bz2",

418 "_bz2",

2144 bz2__doc__,

419 NULL,

2145 -1,

420 -1,

2146 bz2_methods,

421 NULL,

2147 NULL,

422 NULL,

2148 NULL,

423 NULL,

2149 NULL,

424 NULL,

2150 NULL

425 NULL

2151 };

426 };

2152

427

2153 PyMODINIT_FUNC

428 PyMODINIT_FUNC

2154 PyInit_bz2(void)

429 PyInit__bz2(void)

2155 {

430 {

2156 PyObject *m;

431 PyObject *m;

2157

432

2158 if (PyType_Ready(&BZ2File_Type) < 0)

433 if (PyType_Ready(&Compressor_Type) < 0)

2159 return NULL;

434 return NULL;

2160 if (PyType_Ready(&BZ2Comp_Type) < 0)

435 if (PyType_Ready(&Decompressor_Type) < 0)

2161 return NULL;

2162 if (PyType_Ready(&BZ2Decomp_Type) < 0)

2163 return NULL;

436 return NULL;

2164

437

2165 m = PyModule_Create(&bz2module);

438 m = PyModule_Create(&_bz2module);

2166 if (m == NULL)

439 if (m == NULL)

2167 return NULL;

440 return NULL;

2168

441

2169 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));

442 Py_INCREF(&Compressor_Type);

443 PyModule_AddObject(m, "Compressor", (PyObject *)&Compressor_Type);

2170

444

2171 Py_INCREF(&BZ2File_Type);

445 Py_INCREF(&Decompressor_Type);

2172 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);

446 PyModule_AddObject(m, "Decompressor", (PyObject *)&Decompressor_Type);

2173

447

2174 Py_INCREF(&BZ2Comp_Type);

2175 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);

2176

2177 Py_INCREF(&BZ2Decomp_Type);

2178 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);

2179 return m;

448 return m;

2180 }

449 }

OLD

NEW