[issue5863] bz2.BZ2File should accept other file-like objects. - Code Review (original) (raw)
OLD
NEW
1 /*
1 /* _bz2 - Low-level Python interface to libbzip2. */
2
2
3 python-bz2 - python bz2 library interface
4
5 Copyright (c) 2002 Gustavo Niemeyer niemeyer@conectiva.com
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8 */
9
10 #include "Python.h"
3 #include "Python.h"
11 #include <stdio.h>
4 #include "structmember.h"
12 #include <bzlib.h>
5 #include <bzlib.h>
13 #include "structmember.h"
14
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
18
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer niemeyer@conectiva.com\n\
23 ";
24
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
36
37 #define BUF(v) PyBytes_AS_STRING(v)
38
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
43
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
45
6
46
7
47 #ifdef BZ_CONFIG_ERROR
8 #ifndef BZ_CONFIG_ERROR
48
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
59
60 #else /* ! BZ_CONFIG_ERROR */
61
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
9 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
10 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
11 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
12 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
13 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
14 #define BZ2_bzDecompressEnd bzDecompressEnd
15 #endif /* ! BZ_CONFIG_ERROR */
74
16
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77 #endif /* ! BZ_CONFIG_ERROR */
78
79
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88 #else
89 #define ACQUIRE_LOCK(obj)
90 #define RELEASE_LOCK(obj)
91 #endif
92
93 /* Bits in f_newlinetypes */
94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95 #define NEWLINE_CR 1 /* \r newline seen */
96 #define NEWLINE_LF 2 /* \n newline seen */
97 #define NEWLINE_CRLF 4 /* \r\n newline seen */
98
99 /* ===================================================================== */
100 /* Structure definitions. */
101
102 typedef struct {
103 PyObject_HEAD
104 FILE *rawfp;
105
106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
109
110 BZFILE *fp;
111 int mode;
112 Py_off_t pos;
113 Py_off_t size;
114 #ifdef WITH_THREAD
115 PyThread_type_lock lock;
116 #endif
117 } BZ2FileObject;
118
17
119 typedef struct {
18 typedef struct {
120 PyObject_HEAD
19 PyObject_HEAD
121 bz_stream bzs;
20 bz_stream bzs;
122 int running;
21 int flushed;
123 #ifdef WITH_THREAD
22 } Compressor;
124 PyThread_type_lock lock;
125 #endif
126 } BZ2CompObject;
127
23
128 typedef struct {
24 typedef struct {
129 PyObject_HEAD
25 PyObject_HEAD
130 bz_stream bzs;
26 bz_stream bzs;
131 int running;
27 int eof;
132 PyObject *unused_data;
28 PyObject *unused_data;
133 #ifdef WITH_THREAD
29 } Decompressor;
134 PyThread_type_lock lock;
135 #endif
136 } BZ2DecompObject;
137
30
138 /* ===================================================================== */
139 /* Utility functions. */
140
31
141 /* Refuse regular I/O if there's data in the iteration-buffer.
32 /* Helper functions. */
142 * Mixing them would cause data to arrive out of order, as the read*
143 * methods don't use the iteration buffer. */
144 static int
145 check_iterbuffered(BZ2FileObject *f)
146 {
147 if (f->f_buf != NULL &&
148 (f->f_bufend - f->f_bufptr) > 0 &&
149 f->f_buf[0] != '\0') {
150 PyErr_SetString(PyExc_ValueError,
151 "Mixing iteration and read methods would lose data");
152 return -1;
153 }
154 return 0;
155 }
156
33
157 static int
34 static int
158 Util_CatchBZ2Error(int bzerror)
35 catch_bz2_error(int bzerror)
159 {
36 {
160 int ret = 0;
161 switch(bzerror) {
37 switch(bzerror) {
162 case BZ_OK:
38 case BZ_OK:
39 case BZ_RUN_OK:
40 case BZ_FLUSH_OK:
41 case BZ_FINISH_OK:
163 case BZ_STREAM_END:
42 case BZ_STREAM_END:
164 break;
43 return 0;
165
44
166 #ifdef BZ_CONFIG_ERROR
45 #ifdef BZ_CONFIG_ERROR
167 case BZ_CONFIG_ERROR:
46 case BZ_CONFIG_ERROR:
168 PyErr_SetString(PyExc_SystemError,
47 PyErr_SetString(PyExc_SystemError,
169 "the bz2 library was not compiled "
48 "libbzip2 was not compiled correctly");
170 "correctly");
49 return 1;
171 ret = 1;
172 break;
173 #endif
50 #endif
174
175 case BZ_PARAM_ERROR:
51 case BZ_PARAM_ERROR:
176 PyErr_SetString(PyExc_ValueError,
52 PyErr_SetString(PyExc_ValueError,
177 "the bz2 library has received wrong "
53 "Internal error - "
178 "parameters");
54 "invalid parameters passed to libbzip2");
179 ret = 1;
55 return 1;
180 break;
181
182 case BZ_MEM_ERROR:
56 case BZ_MEM_ERROR:
183 PyErr_NoMemory();
57 PyErr_NoMemory();
184 ret = 1;
58 return 1;
185 break;
186
187 case BZ_DATA_ERROR:
59 case BZ_DATA_ERROR:
188 case BZ_DATA_ERROR_MAGIC:
60 case BZ_DATA_ERROR_MAGIC:
189 PyErr_SetString(PyExc_IOError, "invalid data stream");
61 PyErr_SetString(PyExc_IOError, "Invalid data stream");
190 ret = 1;
62 return 1;
191 break;
192
193 case BZ_IO_ERROR:
63 case BZ_IO_ERROR:
194 PyErr_SetString(PyExc_IOError, "unknown IO error");
64 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
195 ret = 1;
65 return 1;
196 break;
197
198 case BZ_UNEXPECTED_EOF:
66 case BZ_UNEXPECTED_EOF:
199 PyErr_SetString(PyExc_EOFError,
67 PyErr_SetString(PyExc_EOFError,
200 "compressed file ended before the "
68 "Compressed file ended before the logical "
201 "logical end-of-stream was detected");
69 "end-of-stream was detected");
202 ret = 1;
70 return 1;
203 break;
204
205 case BZ_SEQUENCE_ERROR:
71 case BZ_SEQUENCE_ERROR:
206 PyErr_SetString(PyExc_RuntimeError,
72 PyErr_SetString(PyExc_RuntimeError,
207 "wrong sequence of bz2 library "
73 "Internal error - "
208 "commands used");
74 "Invalid sequence of commands sent to libbzip2");
209 ret = 1;
75 return 1;
210 break;
76 default:
77 PyErr_Format(PyExc_IOError,
78 "Unrecognized error from libbzip2: %d", bzerror);
79 return 1;
211 }
80 }
212 return ret;
213 }
81 }
214
82
215 #if BUFSIZ < 8192
83 #if BUFSIZ < 8192
216 #define SMALLCHUNK 8192
84 #define SMALLCHUNK 8192
217 #else
85 #else
218 #define SMALLCHUNK BUFSIZ
86 #define SMALLCHUNK BUFSIZ
219 #endif
87 #endif
220
88
221 #if SIZEOF_INT < 4
89 #if SIZEOF_INT < 4
222 #define BIGCHUNK (512 * 32)
90 #define BIGCHUNK (512 * 32)
223 #else
91 #else
224 #define BIGCHUNK (512 * 1024)
92 #define BIGCHUNK (512 * 1024)
225 #endif
93 #endif
226
94
227 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
95 static int
228 static size_t
96 grow_buffer(PyObject **buf)
229 Util_NewBufferSize(size_t currentsize)
97 {
230 {
98 size_t size = PyBytes_GET_SIZE(*buf);
231 if (currentsize > SMALLCHUNK) {
99 if (size <= SMALLCHUNK)
232 /* Keep doubling until we reach BIGCHUNK;
100 return _PyBytes_Resize(buf, size + SMALLCHUNK);
233 then keep adding BIGCHUNK. */
101 else if (size <= BIGCHUNK)
234 if (currentsize <= BIGCHUNK)
102 return _PyBytes_Resize(buf, size * 2);
235 return currentsize + currentsize;
103 else
236 else
104 return _PyBytes_Resize(buf, size + BIGCHUNK);
237 return currentsize + BIGCHUNK;
105 }
238 }
106
239 return currentsize + SMALLCHUNK;
107
240 }
108 /* Compressor class. */
241
109
242 /* This is a hacked version of Python's fileobject.c:get_line(). */
110 static PyObject *
243 static PyObject *
111 compress(Compressor *c, char *data, size_t len, int action)
244 Util_GetLine(BZ2FileObject *f, int n)
112 {
245 {
113 size_t data_size = 0;
246 char c;
114 PyObject *result;
247 char *buf, *end;
115
248 size_t total_v_size; /* total # of slots in buffer */
116 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
249 size_t used_v_size; /* # used slots in buffer */
117 if (result == NULL)
250 size_t increment; /* amount to increment the buffer */
118 return NULL;
251 PyObject *v;
119 c->bzs.next_in = data;
120 c->bzs.avail_in = len;
121 c->bzs.next_out = PyBytes_AS_STRING(result);
122 c->bzs.avail_out = PyBytes_GET_SIZE(result);
123 for (;;) {
124 char *this_out;
125 int bzerror;
126
127 Py_BEGIN_ALLOW_THREADS
128 this_out = c->bzs.next_out;
129 bzerror = BZ2_bzCompress(&c->bzs, action);
130 data_size += c->bzs.next_out - this_out;
131 Py_END_ALLOW_THREADS
132 if (catch_bz2_error(bzerror))
133 goto error;
134
135 /* In regular compression mode, stop when input data is exhausted.
136 In flushing mode, stop when all buffered data has been flushed. */
137 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
138 (action == BZ_FINISH && bzerror == BZ_STREAM_END))
139 break;
140
141 if (c->bzs.avail_out == 0) {
142 if (grow_buffer(&result) < 0)
143 goto error;
144 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
145 c->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
146 }
147 }
148 if (data_size != PyBytes_GET_SIZE(result))
149 if (_PyBytes_Resize(&result, data_size) < 0)
150 goto error;
151 return result;
152
153 error:
154 Py_XDECREF(result);
155 return NULL;
156 }
157
158 static PyObject *
159 Compressor_compress(Compressor *self, PyObject *arg)
160 {
161 Py_buffer buffer;
162 PyObject *result;
163
164 if (self->flushed) {
165 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
166 return NULL;
167 }
168 if (PyObject_GetBuffer(arg, &buffer, PyBUF_SIMPLE) < 0)
169 return NULL;
170 result = compress(self, buffer.buf, buffer.len, BZ_RUN);
171 PyBuffer_Release(&buffer);
172 return result;
173 }
174
175 static PyObject *
176 Compressor_flush(Compressor *self, PyObject *noargs)
177 {
178 if (self->flushed) {
179 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
180 return NULL;
181 }
182 self->flushed = 1;
183 return compress(self, NULL, 0, BZ_FINISH);
184 }
185
186 static int
187 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
188 {
189 int compresslevel = 9;
252 int bzerror;
190 int bzerror;
253 int bytes_read;
191
254
192 if (!PyArg_ParseTuple(args, "|i:Compressor", &compresslevel))
255 total_v_size = n > 0 ? n : 100;
193 return -1;
256 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
194 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
257 if (v == NULL)
195 return catch_bz2_error(bzerror) ? -1 : 0;
258 return NULL;
196 }
259
197
260 buf = BUF(v);
198 static void
261 end = buf + total_v_size;
199 Compressor_dealloc(Compressor *self)
262
200 {
201 int bzerror = BZ2_bzCompressEnd(&self->bzs);
202 if (catch_bz2_error(bzerror))
203 PyErr_WriteUnraisable((PyObject *)self);
204 Py_TYPE(self)->tp_free((PyObject *)self);
205 }
206
207 static PyMethodDef Compressor_methods[] = {
208 {"compress", (PyCFunction)Compressor_compress, METH_O,
209 "Provide a block of data to the compressor."},
210 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
211 "Flush the buffers of the compressor."},
212 {NULL}
213 };
214
215 static PyTypeObject Compressor_Type = {
216 PyVarObject_HEAD_INIT(NULL, 0)
217 "_bz2.Compressor", /* tp_name */
218 sizeof(Compressor), /* tp_basicsize */
219 0, /* tp_itemsize */
220 (destructor)Compressor_dealloc, /* tp_dealloc */
221 0, /* tp_print */
222 0, /* tp_getattr */
223 0, /* tp_setattr */
224 0, /* tp_reserved */
225 0, /* tp_repr */
226 0, /* tp_as_number */
227 0, /* tp_as_sequence */
228 0, /* tp_as_mapping */
229 0, /* tp_hash */
230 0, /* tp_call */
231 0, /* tp_str */
232 0, /* tp_getattro */
233 0, /* tp_setattro */
234 0, /* tp_as_buffer */
235 Py_TPFLAGS_DEFAULT, /* tp_flags */
236 "Wrapper for bz2 compression functions.", /* tp_doc */
237 0, /* tp_traverse */
238 0, /* tp_clear */
239 0, /* tp_richcompare */
240 0, /* tp_weaklistoffset */
241 0, /* tp_iter */
242 0, /* tp_iternext */
243 Compressor_methods, /* tp_methods */
244 0, /* tp_members */
245 0, /* tp_getset */
246 0, /* tp_base */
247 0, /* tp_dict */
248 0, /* tp_descr_get */
249 0, /* tp_descr_set */
250 0, /* tp_dictoffset */
251 (initproc)Compressor_init, /* tp_init */
252 0, /* tp_alloc */
253 PyType_GenericNew, /* tp_new */
254 };
255
256
257 /* Decompressor class. */
258
259 static PyObject *
260 decompress(Decompressor *d, char *data, size_t len)
261 {
262 size_t data_size = 0;
263 PyObject *result;
264
265 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
266 if (result == NULL)
267 return result;
268 d->bzs.next_in = data;
269 d->bzs.avail_in = len;
270 d->bzs.next_out = PyBytes_AS_STRING(result);
271 d->bzs.avail_out = PyBytes_GET_SIZE(result);
263 for (;;) {
272 for (;;) {
273 char *this_out;
274 int bzerror;
275
264 Py_BEGIN_ALLOW_THREADS
276 Py_BEGIN_ALLOW_THREADS
265 do {
277 this_out = d->bzs.next_out;
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
278 bzerror = BZ2_bzDecompress(&d->bzs);
267 f->pos++;
279 data_size += d->bzs.next_out - this_out;
268 if (bytes_read == 0)
269 break;
270 *buf++ = c;
271 } while (bzerror == BZ_OK && c != '\n' && buf != end);
272 Py_END_ALLOW_THREADS
280 Py_END_ALLOW_THREADS
281 if (catch_bz2_error(bzerror))
282 goto error;
273 if (bzerror == BZ_STREAM_END) {
283 if (bzerror == BZ_STREAM_END) {
274 f->size = f->pos;
284 d->eof = 1;
275 f->mode = MODE_READ_EOF;
285 if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */
276 break;
286 Py_CLEAR(d->unused_data);
277 } else if (bzerror != BZ_OK) {
287 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in,
278 Util_CatchBZ2Error(bzerror);
288 d->bzs.avail_in);
279 Py_DECREF(v);
289 if (d->unused_data == NULL)
280 return NULL;
290 goto error;
281 }
282 if (c == '\n')
283 break;
284 /* Must be because buf == end */
285 if (n > 0)
286 break;
287 used_v_size = total_v_size;
288 increment = total_v_size >> 2; /* mild exponential growth */
289 total_v_size += increment;
290 if (total_v_size > INT_MAX) {
291 PyErr_SetString(PyExc_OverflowError,
292 "line is longer than a Python string can hold");
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (_PyBytes_Resize(&v, total_v_size) < 0) {
297 return NULL;
298 }
299 buf = BUF(v) + used_v_size;
300 end = BUF(v) + total_v_size;
301 }
302
303 used_v_size = buf - BUF(v);
304 if (used_v_size != total_v_size) {
305 if (_PyBytes_Resize(&v, used_v_size) < 0) {
306 v = NULL;
307 }
308 }
309 return v;
310 }
311
312 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
313 static void
314 Util_DropReadAhead(BZ2FileObject *f)
315 {
316 if (f->f_buf != NULL) {
317 PyMem_Free(f->f_buf);
318 f->f_buf = NULL;
319 }
320 }
321
322 /* This is a hacked version of Python's fileobject.c:readahead(). */
323 static int
324 Util_ReadAhead(BZ2FileObject *f, int bufsize)
325 {
326 int chunksize;
327 int bzerror;
328
329 if (f->f_buf != NULL) {
330 if((f->f_bufend - f->f_bufptr) >= 1)
331 return 0;
332 else
333 Util_DropReadAhead(f);
334 }
335 if (f->mode == MODE_READ_EOF) {
336 f->f_bufptr = f->f_buf;
337 f->f_bufend = f->f_buf;
338 return 0;
339 }
340 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
341 PyErr_NoMemory();
342 return -1;
343 }
344 Py_BEGIN_ALLOW_THREADS
345 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
346 Py_END_ALLOW_THREADS
347 f->pos += chunksize;
348 if (bzerror == BZ_STREAM_END) {
349 f->size = f->pos;
350 f->mode = MODE_READ_EOF;
351 } else if (bzerror != BZ_OK) {
352 Util_CatchBZ2Error(bzerror);
353 Util_DropReadAhead(f);
354 return -1;
355 }
356 f->f_bufptr = f->f_buf;
357 f->f_bufend = f->f_buf + chunksize;
358 return 0;
359 }
360
361 /* This is a hacked version of Python's
362 * fileobject.c:readahead_get_line_skip(). */
363 static PyBytesObject *
364 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
365 {
366 PyBytesObject* s;
367 char *bufptr;
368 char *buf;
369 int len;
370
371 if (f->f_buf == NULL)
372 if (Util_ReadAhead(f, bufsize) < 0)
373 return NULL;
374
375 len = f->f_bufend - f->f_bufptr;
376 if (len == 0)
377 return (PyBytesObject *)
378 PyBytes_FromStringAndSize(NULL, skip);
379 bufptr = memchr(f->f_bufptr, '\n', len);
380 if (bufptr != NULL) {
381 bufptr++; /* Count the '\n' */
382 len = bufptr - f->f_bufptr;
383 s = (PyBytesObject *)
384 PyBytes_FromStringAndSize(NULL, skip+len);
385 if (s == NULL)
386 return NULL;
387 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
388 f->f_bufptr = bufptr;
389 if (bufptr == f->f_bufend)
390 Util_DropReadAhead(f);
391 } else {
392 bufptr = f->f_bufptr;
393 buf = f->f_buf;
394 f->f_buf = NULL; /* Force new readahead buffer */
395 s = Util_ReadAheadGetLineSkip(f, skip+len,
396 bufsize + (bufsize>>2));
397 if (s == NULL) {
398 PyMem_Free(buf);
399 return NULL;
400 }
401 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
402 PyMem_Free(buf);
403 }
404 return s;
405 }
406
407 /* ===================================================================== */
408 /* Methods of BZ2File. */
409
410 PyDoc_STRVAR(BZ2File_read__doc__,
411 "read([size]) -> string\n\
412 \n\
413 Read at most size uncompressed bytes, returned as a string. If the size\n\
414 argument is negative or omitted, read until EOF is reached.\n\
415 ");
416
417 /* This is a hacked version of Python's fileobject.c:file_read(). */
418 static PyObject *
419 BZ2File_read(BZ2FileObject *self, PyObject *args)
420 {
421 long bytesrequested = -1;
422 size_t bytesread, buffersize, chunksize;
423 int bzerror;
424 PyObject *ret = NULL;
425
426 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
427 return NULL;
428
429 ACQUIRE_LOCK(self);
430 switch (self->mode) {
431 case MODE_READ:
432 break;
433 case MODE_READ_EOF:
434 ret = PyBytes_FromStringAndSize("", 0);
435 goto cleanup;
436 case MODE_CLOSED:
437 PyErr_SetString(PyExc_ValueError,
438 "I/O operation on closed file");
439 goto cleanup;
440 default:
441 PyErr_SetString(PyExc_IOError,
442 "file is not ready for reading");
443 goto cleanup;
444 }
445
446 /* refuse to mix with f.next() */
447 if (check_iterbuffered(self))
448 goto cleanup;
449
450 if (bytesrequested < 0)
451 buffersize = Util_NewBufferSize((size_t)0);
452 else
453 buffersize = bytesrequested;
454 if (buffersize > INT_MAX) {
455 PyErr_SetString(PyExc_OverflowError,
456 "requested number of bytes is "
457 "more than a Python string can hold");
458 goto cleanup;
459 }
460 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
461 if (ret == NULL || buffersize == 0)
462 goto cleanup;
463 bytesread = 0;
464
465 for (;;) {
466 Py_BEGIN_ALLOW_THREADS
467 chunksize = BZ2_bzRead(&bzerror, self->fp,
468 BUF(ret)+bytesread,
469 buffersize-bytesread);
470 self->pos += chunksize;
471 Py_END_ALLOW_THREADS
472 bytesread += chunksize;
473 if (bzerror == BZ_STREAM_END) {
474 self->size = self->pos;
475 self->mode = MODE_READ_EOF;
476 break;
477 } else if (bzerror != BZ_OK) {
478 Util_CatchBZ2Error(bzerror);
479 Py_DECREF(ret);
480 ret = NULL;
481 goto cleanup;
482 }
483 if (bytesrequested < 0) {
484 buffersize = Util_NewBufferSize(buffersize);
485 if (_PyBytes_Resize(&ret, buffersize) < 0) {
486 ret = NULL;
487 goto cleanup;
488 }
291 }
489 } else {
490 break;
292 break;
491 }
293 }
492 }
294 if (d->bzs.avail_in == 0)
493 if (bytesread != buffersize) {
295 break;
494 if (_PyBytes_Resize(&ret, bytesread) < 0) {
296 if (d->bzs.avail_out == 0) {
495 ret = NULL;
297 if (grow_buffer(&result) < 0)
298 goto error;
299 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
300 d->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
496 }
301 }
497 }
302 }
498
303 if (data_size != PyBytes_GET_SIZE(result))
499 cleanup:
304 if (_PyBytes_Resize(&result, data_size) < 0)
500 RELEASE_LOCK(self);
305 goto error;
501 return ret;
306 return result;
502 }
307
503
308 error:
504 PyDoc_STRVAR(BZ2File_readline__doc__,
309 Py_XDECREF(result);
505 "readline([size]) -> string\n\
310 return NULL;
506 \n\
311 }
507 Return the next line from the file, as a string, retaining newline.\n\
312
508 A non-negative size argument will limit the maximum number of bytes to\n\
313 static PyObject *
509 return (an incomplete line may be returned then). Return an empty\n\
314 Decompressor_decompress(Decompressor *self, PyObject *arg)
510 string at EOF.\n\
315 {
511 ");
316 Py_buffer buffer;
512
317 PyObject *result;
513 static PyObject *
318
514 BZ2File_readline(BZ2FileObject *self, PyObject *args)
319 if (self->eof) {
515 {
320 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
516 PyObject *ret = NULL;
321 return NULL;
517 int sizehint = -1;
322 }
518
323 if (PyObject_GetBuffer(arg, &buffer, PyBUF_SIMPLE) < 0)
519 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
324 return NULL;
520 return NULL;
325 result = decompress(self, buffer.buf, buffer.len);
521
326 PyBuffer_Release(&buffer);
522 ACQUIRE_LOCK(self);
327 return result;
523 switch (self->mode) {
328 }
524 case MODE_READ:
329
525 break;
330 static int
526 case MODE_READ_EOF:
331 Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
527 ret = PyBytes_FromStringAndSize("", 0);
332 {
528 goto cleanup;
529 case MODE_CLOSED:
530 PyErr_SetString(PyExc_ValueError,
531 "I/O operation on closed file");
532 goto cleanup;
533 default:
534 PyErr_SetString(PyExc_IOError,
535 "file is not ready for reading");
536 goto cleanup;
537 }
538
539 /* refuse to mix with f.next() */
540 if (check_iterbuffered(self))
541 goto cleanup;
542
543 if (sizehint == 0)
544 ret = PyBytes_FromStringAndSize("", 0);
545 else
546 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
547
548 cleanup:
549 RELEASE_LOCK(self);
550 return ret;
551 }
552
553 PyDoc_STRVAR(BZ2File_readlines__doc__,
554 "readlines([size]) -> list\n\
555 \n\
556 Call readline() repeatedly and return a list of lines read.\n\
557 The optional size argument, if given, is an approximate bound on the\n\
558 total number of bytes in the lines returned.\n\
559 ");
560
561 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
562 static PyObject *
563 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
564 {
565 long sizehint = 0;
566 PyObject *list = NULL;
567 PyObject *line;
568 char small_buffer[SMALLCHUNK];
569 char *buffer = small_buffer;
570 size_t buffersize = SMALLCHUNK;
571 PyObject *big_buffer = NULL;
572 size_t nfilled = 0;
573 size_t nread;
574 size_t totalread = 0;
575 char *p, *q, *end;
576 int err;
577 int shortread = 0;
578 int bzerror;
333 int bzerror;
579
334
580 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
335 self->unused_data = PyBytes_FromStringAndSize("", 0);
581 return NULL;
336 if (self->unused_data == NULL)
582
337 return -1;
583 ACQUIRE_LOCK(self);
338 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
584 switch (self->mode) {
339 if (catch_bz2_error(bzerror)) {
585 case MODE_READ:
340 Py_CLEAR(self->unused_data);
586 break;
341 return -1;
587 case MODE_READ_EOF:
342 }
588 list = PyList_New(0);
343 return 0;
589 goto cleanup;
344 }
590 case MODE_CLOSED:
345
591 PyErr_SetString(PyExc_ValueError,
346 static void
592 "I/O operation on closed file");
347 Decompressor_dealloc(Decompressor *self)
593 goto cleanup;
348 {
594 default:
595 PyErr_SetString(PyExc_IOError,
596 "file is not ready for reading");
597 goto cleanup;
598 }
599
600 /* refuse to mix with f.next() */
601 if (check_iterbuffered(self))
602 goto cleanup;
603
604 if ((list = PyList_New(0)) == NULL)
605 goto cleanup;
606
607 for (;;) {
608 Py_BEGIN_ALLOW_THREADS
609 nread = BZ2_bzRead(&bzerror, self->fp,
610 buffer+nfilled, buffersize-nfilled);
611 self->pos += nread;
612 Py_END_ALLOW_THREADS
613 if (bzerror == BZ_STREAM_END) {
614 self->size = self->pos;
615 self->mode = MODE_READ_EOF;
616 if (nread == 0) {
617 sizehint = 0;
618 break;
619 }
620 shortread = 1;
621 } else if (bzerror != BZ_OK) {
622 Util_CatchBZ2Error(bzerror);
623 error:
624 Py_DECREF(list);
625 list = NULL;
626 goto cleanup;
627 }
628 totalread += nread;
629 p = memchr(buffer+nfilled, '\n', nread);
630 if (!shortread && p == NULL) {
631 /* Need a larger buffer to fit this line */
632 nfilled += nread;
633 buffersize *= 2;
634 if (buffersize > INT_MAX) {
635 PyErr_SetString(PyExc_OverflowError,
636 "line is longer than a Python string can hold");
637 goto error;
638 }
639 if (big_buffer == NULL) {
640 /* Create the big buffer */
641 big_buffer = PyBytes_FromStringAndSize(
642 NULL, buffersize);
643 if (big_buffer == NULL)
644 goto error;
645 buffer = PyBytes_AS_STRING(big_buffer);
646 memcpy(buffer, small_buffer, nfilled);
647 }
648 else {
649 /* Grow the big buffer */
650 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
651 big_buffer = NULL;
652 goto error;
653 }
654 buffer = PyBytes_AS_STRING(big_buffer);
655 }
656 continue;
657 }
658 end = buffer+nfilled+nread;
659 q = buffer;
660 while (p != NULL) {
661 /* Process complete lines */
662 p++;
663 line = PyBytes_FromStringAndSize(q, p-q);
664 if (line == NULL)
665 goto error;
666 err = PyList_Append(list, line);
667 Py_DECREF(line);
668 if (err != 0)
669 goto error;
670 q = p;
671 p = memchr(q, '\n', end-q);
672 }
673 /* Move the remaining incomplete line to the start */
674 nfilled = end-q;
675 memmove(buffer, q, nfilled);
676 if (sizehint > 0)
677 if (totalread >= (size_t)sizehint)
678 break;
679 if (shortread) {
680 sizehint = 0;
681 break;
682 }
683 }
684 if (nfilled != 0) {
685 /* Partial last line */
686 line = PyBytes_FromStringAndSize(buffer, nfilled);
687 if (line == NULL)
688 goto error;
689 if (sizehint > 0) {
690 /* Need to complete the last line */
691 PyObject *rest = Util_GetLine(self, 0);
692 if (rest == NULL) {
693 Py_DECREF(line);
694 goto error;
695 }
696 PyBytes_Concat(&line, rest);
697 Py_DECREF(rest);
698 if (line == NULL)
699 goto error;
700 }
701 err = PyList_Append(list, line);
702 Py_DECREF(line);
703 if (err != 0)
704 goto error;
705 }
706
707 cleanup:
708 RELEASE_LOCK(self);
709 if (big_buffer) {
710 Py_DECREF(big_buffer);
711 }
712 return list;
713 }
714
715 PyDoc_STRVAR(BZ2File_write__doc__,
716 "write(data) -> None\n\
717 \n\
718 Write the 'data' string to file. Note that due to buffering, close() may\n\
719 be needed before the file on disk reflects the data written.\n\
720 ");
721
722 /* This is a hacked version of Python's fileobject.c:file_write(). */
723 static PyObject *
724 BZ2File_write(BZ2FileObject *self, PyObject *args)
725 {
726 PyObject *ret = NULL;
727 Py_buffer pbuf;
728 char *buf;
729 int len;
730 int bzerror;
349 int bzerror;
731
350
732 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
351 Py_CLEAR(self->unused_data);
733 return NULL;
352 bzerror = BZ2_bzDecompressEnd(&self->bzs);
734 buf = pbuf.buf;
353 if (catch_bz2_error(bzerror))
735 len = pbuf.len;
354 PyErr_WriteUnraisable((PyObject *)self);
736
737 ACQUIRE_LOCK(self);
738 switch (self->mode) {
739 case MODE_WRITE:
740 break;
741
742 case MODE_CLOSED:
743 PyErr_SetString(PyExc_ValueError,
744 "I/O operation on closed file");
745 goto cleanup;
746
747 default:
748 PyErr_SetString(PyExc_IOError,
749 "file is not ready for writing");
750 goto cleanup;
751 }
752
753 Py_BEGIN_ALLOW_THREADS
754 BZ2_bzWrite (&bzerror, self->fp, buf, len);
755 self->pos += len;
756 Py_END_ALLOW_THREADS
757
758 if (bzerror != BZ_OK) {
759 Util_CatchBZ2Error(bzerror);
760 goto cleanup;
761 }
762
763 Py_INCREF(Py_None);
764 ret = Py_None;
765
766 cleanup:
767 PyBuffer_Release(&pbuf);
768 RELEASE_LOCK(self);
769 return ret;
770 }
771
772 PyDoc_STRVAR(BZ2File_writelines__doc__,
773 "writelines(sequence_of_strings) -> None\n\
774 \n\
775 Write the sequence of strings to the file. Note that newlines are not\n\
776 added. The sequence can be any iterable object producing strings. This is\n\
777 equivalent to calling write() for each string.\n\
778 ");
779
780 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
781 static PyObject *
782 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
783 {
784 #define CHUNKSIZE 1000
785 PyObject *list = NULL;
786 PyObject *iter = NULL;
787 PyObject *ret = NULL;
788 PyObject *line;
789 int i, j, index, len, islist;
790 int bzerror;
791
792 ACQUIRE_LOCK(self);
793 switch (self->mode) {
794 case MODE_WRITE:
795 break;
796
797 case MODE_CLOSED:
798 PyErr_SetString(PyExc_ValueError,
799 "I/O operation on closed file");
800 goto error;
801
802 default:
803 PyErr_SetString(PyExc_IOError,
804 "file is not ready for writing");
805 goto error;
806 }
807
808 islist = PyList_Check(seq);
809 if (!islist) {
810 iter = PyObject_GetIter(seq);
811 if (iter == NULL) {
812 PyErr_SetString(PyExc_TypeError,
813 "writelines() requires an iterable argument");
814 goto error;
815 }
816 list = PyList_New(CHUNKSIZE);
817 if (list == NULL)
818 goto error;
819 }
820
821 /* Strategy: slurp CHUNKSIZE lines into a private list,
822 checking that they are all strings, then write that list
823 without holding the interpreter lock, then come back for more. */
824 for (index = 0; ; index += CHUNKSIZE) {
825 if (islist) {
826 Py_XDECREF(list);
827 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
828 if (list == NULL)
829 goto error;
830 j = PyList_GET_SIZE(list);
831 }
832 else {
833 for (j = 0; j < CHUNKSIZE; j++) {
834 line = PyIter_Next(iter);
835 if (line == NULL) {
836 if (PyErr_Occurred())
837 goto error;
838 break;
839 }
840 PyList_SetItem(list, j, line);
841 }
842 }
843 if (j == 0)
844 break;
845
846 /* Check that all entries are indeed byte strings. If not,
847 apply the same rules as for file.write() and
848 convert the rets to strings. This is slow, but
849 seems to be the only way since all conversion APIs
850 could potentially execute Python code. */
851 for (i = 0; i < j; i++) {
852 PyObject *v = PyList_GET_ITEM(list, i);
853 if (!PyBytes_Check(v)) {
854 const char *buffer;
855 Py_ssize_t len;
856 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
857 PyErr_SetString(PyExc_TypeError,
858 "writelines() "
859 "argument must be "
860 "a sequence of "
861 "bytes objects");
862 goto error;
863 }
864 line = PyBytes_FromStringAndSize(buffer,
865 len);
866 if (line == NULL)
867 goto error;
868 Py_DECREF(v);
869 PyList_SET_ITEM(list, i, line);
870 }
871 }
872
873 /* Since we are releasing the global lock, the
874 following code may *not* execute Python code. */
875 Py_BEGIN_ALLOW_THREADS
876 for (i = 0; i < j; i++) {
877 line = PyList_GET_ITEM(list, i);
878 len = PyBytes_GET_SIZE(line);
879 BZ2_bzWrite (&bzerror, self->fp,
880 PyBytes_AS_STRING(line), len);
881 if (bzerror != BZ_OK) {
882 Py_BLOCK_THREADS
883 Util_CatchBZ2Error(bzerror);
884 goto error;
885 }
886 }
887 Py_END_ALLOW_THREADS
888
889 if (j < CHUNKSIZE)
890 break;
891 }
892
893 Py_INCREF(Py_None);
894 ret = Py_None;
895
896 error:
897 RELEASE_LOCK(self);
898 Py_XDECREF(list);
899 Py_XDECREF(iter);
900 return ret;
901 #undef CHUNKSIZE
902 }
903
904 PyDoc_STRVAR(BZ2File_seek__doc__,
905 "seek(offset [, whence]) -> None\n\
906 \n\
907 Move to new file position. Argument offset is a byte count. Optional\n\
908 argument whence defaults to 0 (offset from start of file, offset\n\
909 should be >= 0); other values are 1 (move relative to current position,\n\
910 positive or negative), and 2 (move relative to end of file, usually\n\
911 negative, although many platforms allow seeking beyond the end of a file).\n\
912 \n\
913 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
914 the operation may be extremely slow.\n\
915 ");
916
917 static PyObject *
918 BZ2File_seek(BZ2FileObject *self, PyObject *args)
919 {
920 int where = 0;
921 PyObject *offobj;
922 Py_off_t offset;
923 char small_buffer[SMALLCHUNK];
924 char *buffer = small_buffer;
925 size_t buffersize = SMALLCHUNK;
926 Py_off_t bytesread = 0;
927 size_t readsize;
928 int chunksize;
929 int bzerror;
930 PyObject *ret = NULL;
931
932 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
933 return NULL;
934 #if !defined(HAVE_LARGEFILE_SUPPORT)
935 offset = PyLong_AsLong(offobj);
936 #else
937 offset = PyLong_Check(offobj) ?
938 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
939 #endif
940 if (PyErr_Occurred())
941 return NULL;
942
943 ACQUIRE_LOCK(self);
944 Util_DropReadAhead(self);
945 switch (self->mode) {
946 case MODE_READ:
947 case MODE_READ_EOF:
948 break;
949
950 case MODE_CLOSED:
951 PyErr_SetString(PyExc_ValueError,
952 "I/O operation on closed file");
953 goto cleanup;
954
955 default:
956 PyErr_SetString(PyExc_IOError,
957 "seek works only while reading");
958 goto cleanup;
959 }
960
961 if (where == 2) {
962 if (self->size == -1) {
963 assert(self->mode != MODE_READ_EOF);
964 for (;;) {
965 Py_BEGIN_ALLOW_THREADS
966 chunksize = BZ2_bzRead(&bzerror, self->fp,
967 buffer, buffersize);
968 self->pos += chunksize;
969 Py_END_ALLOW_THREADS
970
971 bytesread += chunksize;
972 if (bzerror == BZ_STREAM_END) {
973 break;
974 } else if (bzerror != BZ_OK) {
975 Util_CatchBZ2Error(bzerror);
976 goto cleanup;
977 }
978 }
979 self->mode = MODE_READ_EOF;
980 self->size = self->pos;
981 bytesread = 0;
982 }
983 offset = self->size + offset;
984 } else if (where == 1) {
985 offset = self->pos + offset;
986 }
987
988 /* Before getting here, offset must be the absolute position the file
989 * pointer should be set to. */
990
991 if (offset >= self->pos) {
992 /* we can move forward */
993 offset -= self->pos;
994 } else {
995 /* we cannot move back, so rewind the stream */
996 BZ2_bzReadClose(&bzerror, self->fp);
997 if (bzerror != BZ_OK) {
998 Util_CatchBZ2Error(bzerror);
999 goto cleanup;
1000 }
1001 rewind(self->rawfp);
1002 self->pos = 0;
1003 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1004 0, 0, NULL, 0);
1005 if (bzerror != BZ_OK) {
1006 Util_CatchBZ2Error(bzerror);
1007 goto cleanup;
1008 }
1009 self->mode = MODE_READ;
1010 }
1011
1012 if (offset <= 0 || self->mode == MODE_READ_EOF)
1013 goto exit;
1014
1015 /* Before getting here, offset must be set to the number of bytes
1016 * to walk forward. */
1017 for (;;) {
1018 if (offset-bytesread > buffersize)
1019 readsize = buffersize;
1020 else
1021 /* offset might be wider that readsize, but the result
1022 * of the subtraction is bound by buffersize (see the
1023 * condition above). buffersize is 8192. */
1024 readsize = (size_t)(offset-bytesread);
1025 Py_BEGIN_ALLOW_THREADS
1026 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
1027 self->pos += chunksize;
1028 Py_END_ALLOW_THREADS
1029 bytesread += chunksize;
1030 if (bzerror == BZ_STREAM_END) {
1031 self->size = self->pos;
1032 self->mode = MODE_READ_EOF;
1033 break;
1034 } else if (bzerror != BZ_OK) {
1035 Util_CatchBZ2Error(bzerror);
1036 goto cleanup;
1037 }
1038 if (bytesread == offset)
1039 break;
1040 }
1041
1042 exit:
1043 Py_INCREF(Py_None);
1044 ret = Py_None;
1045
1046 cleanup:
1047 RELEASE_LOCK(self);
1048 return ret;
1049 }
1050
1051 PyDoc_STRVAR(BZ2File_tell__doc__,
1052 "tell() -> int\n\
1053 \n\
1054 Return the current file position, an integer (may be a long integer).\n\
1055 ");
1056
1057 static PyObject *
1058 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1059 {
1060 PyObject *ret = NULL;
1061
1062 if (self->mode == MODE_CLOSED) {
1063 PyErr_SetString(PyExc_ValueError,
1064 "I/O operation on closed file");
1065 goto cleanup;
1066 }
1067
1068 #if !defined(HAVE_LARGEFILE_SUPPORT)
1069 ret = PyLong_FromLong(self->pos);
1070 #else
1071 ret = PyLong_FromLongLong(self->pos);
1072 #endif
1073
1074 cleanup:
1075 return ret;
1076 }
1077
1078 PyDoc_STRVAR(BZ2File_close__doc__,
1079 "close() -> None or (perhaps) an integer\n\
1080 \n\
1081 Close the file. Sets data attribute .closed to true. A closed file\n\
1082 cannot be used for further I/O operations. close() may be called more\n\
1083 than once without error.\n\
1084 ");
1085
1086 static PyObject *
1087 BZ2File_close(BZ2FileObject *self)
1088 {
1089 PyObject *ret = NULL;
1090 int bzerror = BZ_OK;
1091
1092 if (self->mode == MODE_CLOSED) {
1093 Py_RETURN_NONE;
1094 }
1095
1096 ACQUIRE_LOCK(self);
1097 switch (self->mode) {
1098 case MODE_READ:
1099 case MODE_READ_EOF:
1100 BZ2_bzReadClose(&bzerror, self->fp);
1101 break;
1102 case MODE_WRITE:
1103 BZ2_bzWriteClose(&bzerror, self->fp,
1104 0, NULL, NULL);
1105 break;
1106 }
1107 self->mode = MODE_CLOSED;
1108 fclose(self->rawfp);
1109 self->rawfp = NULL;
1110 if (bzerror == BZ_OK) {
1111 Py_INCREF(Py_None);
1112 ret = Py_None;
1113 }
1114 else {
1115 Util_CatchBZ2Error(bzerror);
1116 }
1117
1118 RELEASE_LOCK(self);
1119 return ret;
1120 }
1121
1122 PyDoc_STRVAR(BZ2File_enter_doc,
1123 "__enter__() -> self.");
1124
1125 static PyObject *
1126 BZ2File_enter(BZ2FileObject *self)
1127 {
1128 if (self->mode == MODE_CLOSED) {
1129 PyErr_SetString(PyExc_ValueError,
1130 "I/O operation on closed file");
1131 return NULL;
1132 }
1133 Py_INCREF(self);
1134 return (PyObject *) self;
1135 }
1136
1137 PyDoc_STRVAR(BZ2File_exit_doc,
1138 "__exit__(*excinfo) -> None. Closes the file.");
1139
1140 static PyObject *
1141 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1142 {
1143 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1144 if (!ret)
1145 /* If error occurred, pass through */
1146 return NULL;
1147 Py_DECREF(ret);
1148 Py_RETURN_NONE;
1149 }
1150
1151
1152 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1153
1154 static PyMethodDef BZ2File_methods[] = {
1155 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1156 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__ doc__},
1157 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readline s__doc__},
1158 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1159 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__ doc__},
1160 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1161 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1162 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1163 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1164 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1165 {NULL, NULL} /* sentinel */
1166 };
1167
1168
1169 /* ===================================================================== */
1170 /* Getters and setters of BZ2File. */
1171
1172 static PyObject *
1173 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1174 {
1175 return PyLong_FromLong(self->mode == MODE_CLOSED);
1176 }
1177
1178 static PyGetSetDef BZ2File_getset[] = {
1179 {"closed", (getter)BZ2File_get_closed, NULL,
1180 "True if the file is closed"},
1181 {NULL} /* Sentinel */
1182 };
1183
1184
1185 /* ===================================================================== */
1186 /* Slot definitions for BZ2File_Type. */
1187
1188 static int
1189 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1190 {
1191 static char *kwlist[] = {"filename", "mode", "buffering",
1192 "compresslevel", 0};
1193 PyObject *name_obj = NULL;
1194 char *name;
1195 char *mode = "r";
1196 int buffering = -1;
1197 int compresslevel = 9;
1198 int bzerror;
1199 int mode_char = 0;
1200
1201 self->size = -1;
1202
1203 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",
1204 kwlist, PyUnicode_FSConverter, &name_obj,
1205 &mode, &buffering,
1206 &compresslevel))
1207 return -1;
1208
1209 name = PyBytes_AsString(name_obj);
1210 if (compresslevel < 1 || compresslevel > 9) {
1211 PyErr_SetString(PyExc_ValueError,
1212 "compresslevel must be between 1 and 9");
1213 Py_DECREF(name_obj);
1214 return -1;
1215 }
1216
1217 for (;;) {
1218 int error = 0;
1219 switch (*mode) {
1220 case 'r':
1221 case 'w':
1222 if (mode_char)
1223 error = 1;
1224 mode_char = *mode;
1225 break;
1226
1227 case 'b':
1228 break;
1229
1230 default:
1231 error = 1;
1232 break;
1233 }
1234 if (error) {
1235 PyErr_Format(PyExc_ValueError,
1236 "invalid mode char %c", *mode);
1237 Py_DECREF(name_obj);
1238 return -1;
1239 }
1240 mode++;
1241 if (*mode == '\0')
1242 break;
1243 }
1244
1245 if (mode_char == 0) {
1246 mode_char = 'r';
1247 }
1248
1249 mode = (mode_char == 'r') ? "rb" : "wb";
1250
1251 self->rawfp = fopen(name, mode);
1252 Py_DECREF(name_obj);
1253 if (self->rawfp == NULL) {
1254 PyErr_SetFromErrno(PyExc_IOError);
1255 return -1;
1256 }
1257 /* XXX Ignore buffering */
1258
1259 /* From now on, we have stuff to dealloc, so jump to error label
1260 * instead of returning */
1261
1262 #ifdef WITH_THREAD
1263 self->lock = PyThread_allocate_lock();
1264 if (!self->lock) {
1265 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1266 goto error;
1267 }
1268 #endif
1269
1270 if (mode_char == 'r')
1271 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1272 0, 0, NULL, 0);
1273 else
1274 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1275 compresslevel, 0, 0);
1276
1277 if (bzerror != BZ_OK) {
1278 Util_CatchBZ2Error(bzerror);
1279 goto error;
1280 }
1281
1282 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1283
1284 return 0;
1285
1286 error:
1287 fclose(self->rawfp);
1288 self->rawfp = NULL;
1289 #ifdef WITH_THREAD
1290 if (self->lock) {
1291 PyThread_free_lock(self->lock);
1292 self->lock = NULL;
1293 }
1294 #endif
1295 return -1;
1296 }
1297
1298 static void
1299 BZ2File_dealloc(BZ2FileObject *self)
1300 {
1301 int bzerror;
1302 #ifdef WITH_THREAD
1303 if (self->lock)
1304 PyThread_free_lock(self->lock);
1305 #endif
1306 switch (self->mode) {
1307 case MODE_READ:
1308 case MODE_READ_EOF:
1309 BZ2_bzReadClose(&bzerror, self->fp);
1310 break;
1311 case MODE_WRITE:
1312 BZ2_bzWriteClose(&bzerror, self->fp,
1313 0, NULL, NULL);
1314 break;
1315 }
1316 Util_DropReadAhead(self);
1317 if (self->rawfp != NULL)
1318 fclose(self->rawfp);
1319 Py_TYPE(self)->tp_free((PyObject *)self);
355 Py_TYPE(self)->tp_free((PyObject *)self);
1320 }
356 }
1321
357
1322 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
358 static PyMethodDef Decompressor_methods[] = {
1323 static PyObject *
359 {"decompress", (PyCFunction)Decompressor_decompress, METH_O,
1324 BZ2File_getiter(BZ2FileObject *self)
360 "Provide a block of data to the decompressor."},
1325 {
361 {NULL}
1326 if (self->mode == MODE_CLOSED) {
362 };
1327 PyErr_SetString(PyExc_ValueError,
363
1328 "I/O operation on closed file");
364 static PyMemberDef Decompressor_members[] = {
1329 return NULL;
365 {"eof", T_INT, offsetof(Decompressor, eof), READONLY,
1330 }
366 "True if the end-of-stream marker has been reached."},
1331 Py_INCREF((PyObject*)self);
367 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1332 return (PyObject *)self;
368 "Data found after the end of the compressed stream."},
1333 }
369 {NULL}
1334
370 };
1335 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
371
1336 #define READAHEAD_BUFSIZE 8192
372 static PyTypeObject Decompressor_Type = {
1337 static PyObject *
1338 BZ2File_iternext(BZ2FileObject *self)
1339 {
1340 PyBytesObject* ret;
1341 ACQUIRE_LOCK(self);
1342 if (self->mode == MODE_CLOSED) {
1343 RELEASE_LOCK(self);
1344 PyErr_SetString(PyExc_ValueError,
1345 "I/O operation on closed file");
1346 return NULL;
1347 }
1348 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1349 RELEASE_LOCK(self);
1350 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1351 Py_XDECREF(ret);
1352 return NULL;
1353 }
1354 return (PyObject *)ret;
1355 }
1356
1357 /* ===================================================================== */
1358 /* BZ2File_Type definition. */
1359
1360 PyDoc_VAR(BZ2File__doc__) =
1361 PyDoc_STR(
1362 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1363 \n\
1364 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1365 writing. When opened for writing, the file will be created if it doesn't\n\
1366 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1367 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1368 is given, must be a number between 1 and 9.\n\
1369 Data read is always returned in bytes; data written ought to be bytes.\n\
1370 ");
1371
1372 static PyTypeObject BZ2File_Type = {
1373 PyVarObject_HEAD_INIT(NULL, 0)
373 PyVarObject_HEAD_INIT(NULL, 0)
1374 "bz2.BZ2File", /*tp_name*/
374 "_bz2.Decompressor", /* tp_name */
1375 sizeof(BZ2FileObject), /*tp_basicsize*/
375 sizeof(Decompressor), /* tp_basicsize */
1376 0, /*tp_itemsize*/
376 0, /* tp_itemsize */
1377 (destructor)BZ2File_dealloc, /*tp_dealloc*/
377 (destructor)Decompressor_dealloc, /* tp_dealloc */
1378 0, /*tp_print*/
378 0, /* tp_print */
1379 0, /*tp_getattr*/
379 0, /* tp_getattr */
1380 0, /*tp_setattr*/
380 0, /* tp_setattr */
1381 0, /*tp_reserved*/
381 0, /* tp_reserved */
1382 0, /*tp_repr*/
382 0, /* tp_repr */
1383 0, /*tp_as_number*/
383 0, /* tp_as_number */
1384 0, /*tp_as_sequence*/
384 0, /* tp_as_sequence */
1385 0, /*tp_as_mapping*/
385 0, /* tp_as_mapping */
1386 0, /*tp_hash*/
386 0, /* tp_hash */
1387 0, /*tp_call*/
387 0, /* tp_call */
1388 0, /*tp_str*/
388 0, /* tp_str */
1389 PyObject_GenericGetAttr,/*tp_getattro*/
389 0, /* tp_getattro */
1390 PyObject_GenericSetAttr,/*tp_setattro*/
390 0, /* tp_setattro */
1391 0, /*tp_as_buffer*/
391 0, /* tp_as_buffer */
1392 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
392 Py_TPFLAGS_DEFAULT, /* tp_flags */
1393 BZ2File__doc__, /*tp_doc*/
393 "Wrapper for bz2 decompression functions.", /* tp_doc */
1394 0, /*tp_traverse*/
394 0, /* tp_traverse */
1395 0, /*tp_clear*/
395 0, /* tp_clear */
1396 0, /*tp_richcompare*/
396 0, /* tp_richcompare */
1397 0, /*tp_weaklistoffset*/
397 0, /* tp_weaklistoffset */
1398 (getiterfunc)BZ2File_getiter, /*tp_iter*/
398 0, /* tp_iter */
1399 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
399 0, /* tp_iternext */
1400 BZ2File_methods, /*tp_methods*/
400 Decompressor_methods, /* tp_methods */
1401 0, /*tp_members*/
401 Decompressor_members, /* tp_members */
1402 BZ2File_getset, /*tp_getset*/
402 0, /* tp_getset */
1403 0, /*tp_base*/
403 0, /* tp_base */
1404 0, /*tp_dict*/
404 0, /* tp_dict */
1405 0, /*tp_descr_get*/
405 0, /* tp_descr_get */
1406 0, /*tp_descr_set*/
406 0, /* tp_descr_set */
1407 0, /*tp_dictoffset*/
407 0, /* tp_dictoffset */
1408 (initproc)BZ2File_init, /*tp_init*/
408 (initproc)Decompressor_init, /* tp_init */
1409 PyType_GenericAlloc, /*tp_alloc*/
409 0, /* tp_alloc */
1410 PyType_GenericNew, /*tp_new*/
410 PyType_GenericNew, /* tp_new */
1411 PyObject_Free, /*tp_free*/
411 };
1412 0, /*tp_is_gc*/
412
1413 };
413
1414
414 /* Module initialization. */
1415
415
1416 /* ===================================================================== */
416 static struct PyModuleDef _bz2module = {
1417 /* Methods of BZ2Comp. */
1418
1419 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1420 "compress(data) -> string\n\
1421 \n\
1422 Provide more data to the compressor object. It will return chunks of\n\
1423 compressed data whenever possible. When you've finished providing data\n\
1424 to compress, call the flush() method to finish the compression process,\n\
1425 and return what is left in the internal buffers.\n\
1426 ");
1427
1428 static PyObject *
1429 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1430 {
1431 Py_buffer pdata;
1432 char *data;
1433 int datasize;
1434 int bufsize = SMALLCHUNK;
1435 PY_LONG_LONG totalout;
1436 PyObject *ret = NULL;
1437 bz_stream *bzs = &self->bzs;
1438 int bzerror;
1439
1440 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1441 return NULL;
1442 data = pdata.buf;
1443 datasize = pdata.len;
1444
1445 if (datasize == 0) {
1446 PyBuffer_Release(&pdata);
1447 return PyBytes_FromStringAndSize("", 0);
1448 }
1449
1450 ACQUIRE_LOCK(self);
1451 if (!self->running) {
1452 PyErr_SetString(PyExc_ValueError,
1453 "this object was already flushed");
1454 goto error;
1455 }
1456
1457 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1458 if (!ret)
1459 goto error;
1460
1461 bzs->next_in = data;
1462 bzs->avail_in = datasize;
1463 bzs->next_out = BUF(ret);
1464 bzs->avail_out = bufsize;
1465
1466 totalout = BZS_TOTAL_OUT(bzs);
1467
1468 for (;;) {
1469 Py_BEGIN_ALLOW_THREADS
1470 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1471 Py_END_ALLOW_THREADS
1472 if (bzerror != BZ_RUN_OK) {
1473 Util_CatchBZ2Error(bzerror);
1474 goto error;
1475 }
1476 if (bzs->avail_in == 0)
1477 break; /* no more input data */
1478 if (bzs->avail_out == 0) {
1479 bufsize = Util_NewBufferSize(bufsize);
1480 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1481 BZ2_bzCompressEnd(bzs);
1482 goto error;
1483 }
1484 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1485 - totalout);
1486 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1487 }
1488 }
1489
1490 if (_PyBytes_Resize(&ret,
1491 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1492 goto error;
1493
1494 RELEASE_LOCK(self);
1495 PyBuffer_Release(&pdata);
1496 return ret;
1497
1498 error:
1499 RELEASE_LOCK(self);
1500 PyBuffer_Release(&pdata);
1501 Py_XDECREF(ret);
1502 return NULL;
1503 }
1504
1505 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1506 "flush() -> string\n\
1507 \n\
1508 Finish the compression process and return what is left in internal buffers.\n\
1509 You must not use the compressor object after calling this method.\n\
1510 ");
1511
1512 static PyObject *
1513 BZ2Comp_flush(BZ2CompObject *self)
1514 {
1515 int bufsize = SMALLCHUNK;
1516 PyObject *ret = NULL;
1517 bz_stream *bzs = &self->bzs;
1518 PY_LONG_LONG totalout;
1519 int bzerror;
1520
1521 ACQUIRE_LOCK(self);
1522 if (!self->running) {
1523 PyErr_SetString(PyExc_ValueError, "object was already "
1524 "flushed");
1525 goto error;
1526 }
1527 self->running = 0;
1528
1529 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1530 if (!ret)
1531 goto error;
1532
1533 bzs->next_out = BUF(ret);
1534 bzs->avail_out = bufsize;
1535
1536 totalout = BZS_TOTAL_OUT(bzs);
1537
1538 for (;;) {
1539 Py_BEGIN_ALLOW_THREADS
1540 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1541 Py_END_ALLOW_THREADS
1542 if (bzerror == BZ_STREAM_END) {
1543 break;
1544 } else if (bzerror != BZ_FINISH_OK) {
1545 Util_CatchBZ2Error(bzerror);
1546 goto error;
1547 }
1548 if (bzs->avail_out == 0) {
1549 bufsize = Util_NewBufferSize(bufsize);
1550 if (_PyBytes_Resize(&ret, bufsize) < 0)
1551 goto error;
1552 bzs->next_out = BUF(ret);
1553 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1554 - totalout);
1555 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1556 }
1557 }
1558
1559 if (bzs->avail_out != 0) {
1560 if (_PyBytes_Resize(&ret,
1561 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1562 goto error;
1563 }
1564
1565 RELEASE_LOCK(self);
1566 return ret;
1567
1568 error:
1569 RELEASE_LOCK(self);
1570 Py_XDECREF(ret);
1571 return NULL;
1572 }
1573
1574 static PyMethodDef BZ2Comp_methods[] = {
1575 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1576 BZ2Comp_compress__doc__},
1577 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1578 BZ2Comp_flush__doc__},
1579 {NULL, NULL} /* sentinel */
1580 };
1581
1582
1583 /* ===================================================================== */
1584 /* Slot definitions for BZ2Comp_Type. */
1585
1586 static int
1587 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1588 {
1589 int compresslevel = 9;
1590 int bzerror;
1591 static char *kwlist[] = {"compresslevel", 0};
1592
1593 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1594 kwlist, &compresslevel))
1595 return -1;
1596
1597 if (compresslevel < 1 || compresslevel > 9) {
1598 PyErr_SetString(PyExc_ValueError,
1599 "compresslevel must be between 1 and 9");
1600 goto error;
1601 }
1602
1603 #ifdef WITH_THREAD
1604 self->lock = PyThread_allocate_lock();
1605 if (!self->lock) {
1606 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1607 goto error;
1608 }
1609 #endif
1610
1611 memset(&self->bzs, 0, sizeof(bz_stream));
1612 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1613 if (bzerror != BZ_OK) {
1614 Util_CatchBZ2Error(bzerror);
1615 goto error;
1616 }
1617
1618 self->running = 1;
1619
1620 return 0;
1621 error:
1622 #ifdef WITH_THREAD
1623 if (self->lock) {
1624 PyThread_free_lock(self->lock);
1625 self->lock = NULL;
1626 }
1627 #endif
1628 return -1;
1629 }
1630
1631 static void
1632 BZ2Comp_dealloc(BZ2CompObject *self)
1633 {
1634 #ifdef WITH_THREAD
1635 if (self->lock)
1636 PyThread_free_lock(self->lock);
1637 #endif
1638 BZ2_bzCompressEnd(&self->bzs);
1639 Py_TYPE(self)->tp_free((PyObject *)self);
1640 }
1641
1642
1643 /* ===================================================================== */
1644 /* BZ2Comp_Type definition. */
1645
1646 PyDoc_STRVAR(BZ2Comp__doc__,
1647 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1648 \n\
1649 Create a new compressor object. This object may be used to compress\n\
1650 data sequentially. If you want to compress data in one shot, use the\n\
1651 compress() function instead. The compresslevel parameter, if given,\n\
1652 must be a number between 1 and 9.\n\
1653 ");
1654
1655 static PyTypeObject BZ2Comp_Type = {
1656 PyVarObject_HEAD_INIT(NULL, 0)
1657 "bz2.BZ2Compressor", /*tp_name*/
1658 sizeof(BZ2CompObject), /*tp_basicsize*/
1659 0, /*tp_itemsize*/
1660 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1661 0, /*tp_print*/
1662 0, /*tp_getattr*/
1663 0, /*tp_setattr*/
1664 0, /*tp_reserved*/
1665 0, /*tp_repr*/
1666 0, /*tp_as_number*/
1667 0, /*tp_as_sequence*/
1668 0, /*tp_as_mapping*/
1669 0, /*tp_hash*/
1670 0, /*tp_call*/
1671 0, /*tp_str*/
1672 PyObject_GenericGetAttr,/*tp_getattro*/
1673 PyObject_GenericSetAttr,/*tp_setattro*/
1674 0, /*tp_as_buffer*/
1675 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1676 BZ2Comp__doc__, /*tp_doc*/
1677 0, /*tp_traverse*/
1678 0, /*tp_clear*/
1679 0, /*tp_richcompare*/
1680 0, /*tp_weaklistoffset*/
1681 0, /*tp_iter*/
1682 0, /*tp_iternext*/
1683 BZ2Comp_methods, /*tp_methods*/
1684 0, /*tp_members*/
1685 0, /*tp_getset*/
1686 0, /*tp_base*/
1687 0, /*tp_dict*/
1688 0, /*tp_descr_get*/
1689 0, /*tp_descr_set*/
1690 0, /*tp_dictoffset*/
1691 (initproc)BZ2Comp_init, /*tp_init*/
1692 PyType_GenericAlloc, /*tp_alloc*/
1693 PyType_GenericNew, /*tp_new*/
1694 PyObject_Free, /*tp_free*/
1695 0, /*tp_is_gc*/
1696 };
1697
1698
1699 /* ===================================================================== */
1700 /* Members of BZ2Decomp. */
1701
1702 #undef OFF
1703 #define OFF(x) offsetof(BZ2DecompObject, x)
1704
1705 static PyMemberDef BZ2Decomp_members[] = {
1706 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1707 {NULL} /* Sentinel */
1708 };
1709
1710
1711 /* ===================================================================== */
1712 /* Methods of BZ2Decomp. */
1713
1714 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1715 "decompress(data) -> string\n\
1716 \n\
1717 Provide more data to the decompressor object. It will return chunks\n\
1718 of decompressed data whenever possible. If you try to decompress data\n\
1719 after the end of stream is found, EOFError will be raised. If any data\n\
1720 was found after the end of stream, it'll be ignored and saved in\n\
1721 unused_data attribute.\n\
1722 ");
1723
1724 static PyObject *
1725 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1726 {
1727 Py_buffer pdata;
1728 char *data;
1729 int datasize;
1730 int bufsize = SMALLCHUNK;
1731 PY_LONG_LONG totalout;
1732 PyObject *ret = NULL;
1733 bz_stream *bzs = &self->bzs;
1734 int bzerror;
1735
1736 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1737 return NULL;
1738 data = pdata.buf;
1739 datasize = pdata.len;
1740
1741 ACQUIRE_LOCK(self);
1742 if (!self->running) {
1743 PyErr_SetString(PyExc_EOFError, "end of stream was "
1744 "already found");
1745 goto error;
1746 }
1747
1748 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1749 if (!ret)
1750 goto error;
1751
1752 bzs->next_in = data;
1753 bzs->avail_in = datasize;
1754 bzs->next_out = BUF(ret);
1755 bzs->avail_out = bufsize;
1756
1757 totalout = BZS_TOTAL_OUT(bzs);
1758
1759 for (;;) {
1760 Py_BEGIN_ALLOW_THREADS
1761 bzerror = BZ2_bzDecompress(bzs);
1762 Py_END_ALLOW_THREADS
1763 if (bzerror == BZ_STREAM_END) {
1764 if (bzs->avail_in != 0) {
1765 Py_DECREF(self->unused_data);
1766 self->unused_data =
1767 PyBytes_FromStringAndSize(bzs->next_in,
1768 bzs->avail_in);
1769 }
1770 self->running = 0;
1771 break;
1772 }
1773 if (bzerror != BZ_OK) {
1774 Util_CatchBZ2Error(bzerror);
1775 goto error;
1776 }
1777 if (bzs->avail_in == 0)
1778 break; /* no more input data */
1779 if (bzs->avail_out == 0) {
1780 bufsize = Util_NewBufferSize(bufsize);
1781 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1782 BZ2_bzDecompressEnd(bzs);
1783 goto error;
1784 }
1785 bzs->next_out = BUF(ret);
1786 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1787 - totalout);
1788 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1789 }
1790 }
1791
1792 if (bzs->avail_out != 0) {
1793 if (_PyBytes_Resize(&ret,
1794 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1795 goto error;
1796 }
1797
1798 RELEASE_LOCK(self);
1799 PyBuffer_Release(&pdata);
1800 return ret;
1801
1802 error:
1803 RELEASE_LOCK(self);
1804 PyBuffer_Release(&pdata);
1805 Py_XDECREF(ret);
1806 return NULL;
1807 }
1808
1809 static PyMethodDef BZ2Decomp_methods[] = {
1810 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_de compress__doc__},
1811 {NULL, NULL} /* sentinel */
1812 };
1813
1814
1815 /* ===================================================================== */
1816 /* Slot definitions for BZ2Decomp_Type. */
1817
1818 static int
1819 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1820 {
1821 int bzerror;
1822
1823 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1824 return -1;
1825
1826 #ifdef WITH_THREAD
1827 self->lock = PyThread_allocate_lock();
1828 if (!self->lock) {
1829 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1830 goto error;
1831 }
1832 #endif
1833
1834 self->unused_data = PyBytes_FromStringAndSize("", 0);
1835 if (!self->unused_data)
1836 goto error;
1837
1838 memset(&self->bzs, 0, sizeof(bz_stream));
1839 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1840 if (bzerror != BZ_OK) {
1841 Util_CatchBZ2Error(bzerror);
1842 goto error;
1843 }
1844
1845 self->running = 1;
1846
1847 return 0;
1848
1849 error:
1850 #ifdef WITH_THREAD
1851 if (self->lock) {
1852 PyThread_free_lock(self->lock);
1853 self->lock = NULL;
1854 }
1855 #endif
1856 Py_CLEAR(self->unused_data);
1857 return -1;
1858 }
1859
1860 static void
1861 BZ2Decomp_dealloc(BZ2DecompObject *self)
1862 {
1863 #ifdef WITH_THREAD
1864 if (self->lock)
1865 PyThread_free_lock(self->lock);
1866 #endif
1867 Py_XDECREF(self->unused_data);
1868 BZ2_bzDecompressEnd(&self->bzs);
1869 Py_TYPE(self)->tp_free((PyObject *)self);
1870 }
1871
1872
1873 /* ===================================================================== */
1874 /* BZ2Decomp_Type definition. */
1875
1876 PyDoc_STRVAR(BZ2Decomp__doc__,
1877 "BZ2Decompressor() -> decompressor object\n\
1878 \n\
1879 Create a new decompressor object. This object may be used to decompress\n\
1880 data sequentially. If you want to decompress data in one shot, use the\n\
1881 decompress() function instead.\n\
1882 ");
1883
1884 static PyTypeObject BZ2Decomp_Type = {
1885 PyVarObject_HEAD_INIT(NULL, 0)
1886 "bz2.BZ2Decompressor", /*tp_name*/
1887 sizeof(BZ2DecompObject), /*tp_basicsize*/
1888 0, /*tp_itemsize*/
1889 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1890 0, /*tp_print*/
1891 0, /*tp_getattr*/
1892 0, /*tp_setattr*/
1893 0, /*tp_reserved*/
1894 0, /*tp_repr*/
1895 0, /*tp_as_number*/
1896 0, /*tp_as_sequence*/
1897 0, /*tp_as_mapping*/
1898 0, /*tp_hash*/
1899 0, /*tp_call*/
1900 0, /*tp_str*/
1901 PyObject_GenericGetAttr,/*tp_getattro*/
1902 PyObject_GenericSetAttr,/*tp_setattro*/
1903 0, /*tp_as_buffer*/
1904 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1905 BZ2Decomp__doc__, /*tp_doc*/
1906 0, /*tp_traverse*/
1907 0, /*tp_clear*/
1908 0, /*tp_richcompare*/
1909 0, /*tp_weaklistoffset*/
1910 0, /*tp_iter*/
1911 0, /*tp_iternext*/
1912 BZ2Decomp_methods, /*tp_methods*/
1913 BZ2Decomp_members, /*tp_members*/
1914 0, /*tp_getset*/
1915 0, /*tp_base*/
1916 0, /*tp_dict*/
1917 0, /*tp_descr_get*/
1918 0, /*tp_descr_set*/
1919 0, /*tp_dictoffset*/
1920 (initproc)BZ2Decomp_init, /*tp_init*/
1921 PyType_GenericAlloc, /*tp_alloc*/
1922 PyType_GenericNew, /*tp_new*/
1923 PyObject_Free, /*tp_free*/
1924 0, /*tp_is_gc*/
1925 };
1926
1927
1928 /* ===================================================================== */
1929 /* Module functions. */
1930
1931 PyDoc_STRVAR(bz2_compress__doc__,
1932 "compress(data [, compresslevel=9]) -> string\n\
1933 \n\
1934 Compress data in one shot. If you want to compress data sequentially,\n\
1935 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1936 given, must be a number between 1 and 9.\n\
1937 ");
1938
1939 static PyObject *
1940 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1941 {
1942 int compresslevel=9;
1943 Py_buffer pdata;
1944 char *data;
1945 int datasize;
1946 int bufsize;
1947 PyObject *ret = NULL;
1948 bz_stream _bzs;
1949 bz_stream *bzs = &_bzs;
1950 int bzerror;
1951 static char *kwlist[] = {"data", "compresslevel", 0};
1952
1953 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1954 kwlist, &pdata,
1955 &compresslevel))
1956 return NULL;
1957 data = pdata.buf;
1958 datasize = pdata.len;
1959
1960 if (compresslevel < 1 || compresslevel > 9) {
1961 PyErr_SetString(PyExc_ValueError,
1962 "compresslevel must be between 1 and 9");
1963 PyBuffer_Release(&pdata);
1964 return NULL;
1965 }
1966
1967 /* Conforming to bz2 manual, this is large enough to fit compressed
1968 * data in one shot. We will check it later anyway. */
1969 bufsize = datasize + (datasize/100+1) + 600;
1970
1971 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1972 if (!ret) {
1973 PyBuffer_Release(&pdata);
1974 return NULL;
1975 }
1976
1977 memset(bzs, 0, sizeof(bz_stream));
1978
1979 bzs->next_in = data;
1980 bzs->avail_in = datasize;
1981 bzs->next_out = BUF(ret);
1982 bzs->avail_out = bufsize;
1983
1984 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1985 if (bzerror != BZ_OK) {
1986 Util_CatchBZ2Error(bzerror);
1987 PyBuffer_Release(&pdata);
1988 Py_DECREF(ret);
1989 return NULL;
1990 }
1991
1992 for (;;) {
1993 Py_BEGIN_ALLOW_THREADS
1994 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1995 Py_END_ALLOW_THREADS
1996 if (bzerror == BZ_STREAM_END) {
1997 break;
1998 } else if (bzerror != BZ_FINISH_OK) {
1999 BZ2_bzCompressEnd(bzs);
2000 Util_CatchBZ2Error(bzerror);
2001 PyBuffer_Release(&pdata);
2002 Py_DECREF(ret);
2003 return NULL;
2004 }
2005 if (bzs->avail_out == 0) {
2006 bufsize = Util_NewBufferSize(bufsize);
2007 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2008 BZ2_bzCompressEnd(bzs);
2009 PyBuffer_Release(&pdata);
2010 return NULL;
2011 }
2012 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2013 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2014 }
2015 }
2016
2017 if (bzs->avail_out != 0) {
2018 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2019 ret = NULL;
2020 }
2021 }
2022 BZ2_bzCompressEnd(bzs);
2023
2024 PyBuffer_Release(&pdata);
2025 return ret;
2026 }
2027
2028 PyDoc_STRVAR(bz2_decompress__doc__,
2029 "decompress(data) -> decompressed data\n\
2030 \n\
2031 Decompress data in one shot. If you want to decompress data sequentially,\n\
2032 use an instance of BZ2Decompressor instead.\n\
2033 ");
2034
2035 static PyObject *
2036 bz2_decompress(PyObject *self, PyObject *args)
2037 {
2038 Py_buffer pdata;
2039 char *data;
2040 int datasize;
2041 int bufsize = SMALLCHUNK;
2042 PyObject *ret;
2043 bz_stream _bzs;
2044 bz_stream *bzs = &_bzs;
2045 int bzerror;
2046
2047 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2048 return NULL;
2049 data = pdata.buf;
2050 datasize = pdata.len;
2051
2052 if (datasize == 0) {
2053 PyBuffer_Release(&pdata);
2054 return PyBytes_FromStringAndSize("", 0);
2055 }
2056
2057 ret = PyBytes_FromStringAndSize(NULL, bufsize);
2058 if (!ret) {
2059 PyBuffer_Release(&pdata);
2060 return NULL;
2061 }
2062
2063 memset(bzs, 0, sizeof(bz_stream));
2064
2065 bzs->next_in = data;
2066 bzs->avail_in = datasize;
2067 bzs->next_out = BUF(ret);
2068 bzs->avail_out = bufsize;
2069
2070 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2071 if (bzerror != BZ_OK) {
2072 Util_CatchBZ2Error(bzerror);
2073 Py_DECREF(ret);
2074 PyBuffer_Release(&pdata);
2075 return NULL;
2076 }
2077
2078 for (;;) {
2079 Py_BEGIN_ALLOW_THREADS
2080 bzerror = BZ2_bzDecompress(bzs);
2081 Py_END_ALLOW_THREADS
2082 if (bzerror == BZ_STREAM_END) {
2083 break;
2084 } else if (bzerror != BZ_OK) {
2085 BZ2_bzDecompressEnd(bzs);
2086 Util_CatchBZ2Error(bzerror);
2087 PyBuffer_Release(&pdata);
2088 Py_DECREF(ret);
2089 return NULL;
2090 }
2091 if (bzs->avail_in == 0) {
2092 BZ2_bzDecompressEnd(bzs);
2093 PyErr_SetString(PyExc_ValueError,
2094 "couldn't find end of stream");
2095 PyBuffer_Release(&pdata);
2096 Py_DECREF(ret);
2097 return NULL;
2098 }
2099 if (bzs->avail_out == 0) {
2100 bufsize = Util_NewBufferSize(bufsize);
2101 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2102 BZ2_bzDecompressEnd(bzs);
2103 PyBuffer_Release(&pdata);
2104 return NULL;
2105 }
2106 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2107 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2108 }
2109 }
2110
2111 if (bzs->avail_out != 0) {
2112 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2113 ret = NULL;
2114 }
2115 }
2116 BZ2_bzDecompressEnd(bzs);
2117 PyBuffer_Release(&pdata);
2118
2119 return ret;
2120 }
2121
2122 static PyMethodDef bz2_methods[] = {
2123 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2124 bz2_compress__doc__},
2125 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2126 bz2_decompress__doc__},
2127 {NULL, NULL} /* sentinel */
2128 };
2129
2130 /* ===================================================================== */
2131 /* Initialization function. */
2132
2133 PyDoc_STRVAR(bz2__doc__,
2134 "The python bz2 module provides a comprehensive interface for\n\
2135 the bz2 compression library. It implements a complete file\n\
2136 interface, one shot (de)compression functions, and types for\n\
2137 sequential (de)compression.\n\
2138 ");
2139
2140
2141 static struct PyModuleDef bz2module = {
2142 PyModuleDef_HEAD_INIT,
417 PyModuleDef_HEAD_INIT,
2143 "bz2",
418 "_bz2",
2144 bz2__doc__,
419 NULL,
2145 -1,
420 -1,
2146 bz2_methods,
421 NULL,
2147 NULL,
422 NULL,
2148 NULL,
423 NULL,
2149 NULL,
424 NULL,
2150 NULL
425 NULL
2151 };
426 };
2152
427
2153 PyMODINIT_FUNC
428 PyMODINIT_FUNC
2154 PyInit_bz2(void)
429 PyInit__bz2(void)
2155 {
430 {
2156 PyObject *m;
431 PyObject *m;
2157
432
2158 if (PyType_Ready(&BZ2File_Type) < 0)
433 if (PyType_Ready(&Compressor_Type) < 0)
2159 return NULL;
434 return NULL;
2160 if (PyType_Ready(&BZ2Comp_Type) < 0)
435 if (PyType_Ready(&Decompressor_Type) < 0)
2161 return NULL;
2162 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2163 return NULL;
436 return NULL;
2164
437
2165 m = PyModule_Create(&bz2module);
438 m = PyModule_Create(&_bz2module);
2166 if (m == NULL)
439 if (m == NULL)
2167 return NULL;
440 return NULL;
2168
441
2169 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
442 Py_INCREF(&Compressor_Type);
443 PyModule_AddObject(m, "Compressor", (PyObject *)&Compressor_Type);
2170
444
2171 Py_INCREF(&BZ2File_Type);
445 Py_INCREF(&Decompressor_Type);
2172 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
446 PyModule_AddObject(m, "Decompressor", (PyObject *)&Decompressor_Type);
2173
447
2174 Py_INCREF(&BZ2Comp_Type);
2175 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2176
2177 Py_INCREF(&BZ2Decomp_Type);
2178 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2179 return m;
448 return m;
2180 }
449 }
OLD
NEW