[issue4565] Rewrite the IO stack in C - Code Review (original) (raw)

OLD

NEW

(Empty)

1 """

2 Python implementation of the io module.

3 """

4

5 import os

6 import abc

7 import codecs

8 # Import _thread instead of threading to reduce startup cost

9 try:

10 from _thread import allocate_lock as Lock

11 except ImportError:

12 from _dummy_thread import allocate_lock as Lock

13

14 import io

15 from io import __all__

16

17 # open() uses st_blksize whenever we can

18 DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes

19

20 # NOTE: Base classes defined here are registered with the "official" ABCs

21 # defined in io.py. We don't use real inheritance though, because we don't

22 # want to inherit the C implementations.

23

24

25 class BlockingIOError(IOError):

26

27 """Exception raised when I/O would block on a non-blocking I/O stream."""

28

29 def __init__(self, errno, strerror, characters_written=0):

30 super().__init__(errno, strerror)

31 if not isinstance(characters_written, int):

32 raise TypeError("characters_written must be a integer")

33 self.characters_written = characters_written

34

35

36 def open(file, mode="r", buffering=None, encoding=None, errors=None,

37 newline=None, closefd=True):

38

39 r"""Open file and return a stream. Raise IOError upon failure.

40

41 file is either a text or byte string giving the name (and the path

42 if the file isn't in the current working directory) of the file to

43 be opened or an integer file descriptor of the file to be

44 wrapped. (If a file descriptor is given, it is closed when the

45 returned I/O object is closed, unless closefd is set to False.)

46

47 mode is an optional string that specifies the mode in which the file

48 is opened. It defaults to 'r' which means open for reading in text

49 mode. Other common values are 'w' for writing (truncating the file if

50 it already exists), and 'a' for appending (which on some Unix systems,

51 means that all writes append to the end of the file regardless of the

52 current seek position). In text mode, if encoding is not specified the

53 encoding used is platform dependent. (For reading and writing raw

54 bytes use binary mode and leave encoding unspecified.) The available

55 modes are:

56

57 ========= ===============================================================

58 Character Meaning

59 --------- ---------------------------------------------------------------

60 'r' open for reading (default)

61 'w' open for writing, truncating the file first

62 'a' open for writing, appending to the end of the file if it exists

63 'b' binary mode

64 't' text mode (default)

65 '+' open a disk file for updating (reading and writing)

66 'U' universal newline mode (for backwards compatibility; unneeded

67 for new code)

68 ========= ===============================================================

69

70 The default mode is 'rt' (open for reading text). For binary random

71 access, the mode 'w+b' opens and truncates the file to 0 bytes, while

72 'r+b' opens the file without truncation.

73

74 Python distinguishes between files opened in binary and text modes,

75 even when the underlying operating system doesn't. Files opened in

76 binary mode (appending 'b' to the mode argument) return contents as

77 bytes objects without any decoding. In text mode (the default, or when

78 't' is appended to the mode argument), the contents of the file are

79 returned as strings, the bytes having been first decoded using a

80 platform-dependent encoding or using the specified encoding if given.

81

82 buffering is an optional integer used to set the buffering policy. By

83 default full buffering is on. Pass 0 to switch buffering off (only

84 allowed in binary mode), 1 to set line buffering, and an integer > 1

85 for full buffering.

86

87 encoding is the name of the encoding used to decode or encode the

88 file. This should only be used in text mode. The default encoding is

89 platform dependent, but any encoding supported by Python can be

90 passed. See the codecs module for the list of supported encodings.

91

92 errors is an optional string that specifies how encoding errors are to

93 be handled---this argument should not be used in binary mode. Pass

94 'strict' to raise a ValueError exception if there is an encoding error

95 (the default of None has the same effect), or pass 'ignore' to ignore

96 errors. (Note that ignoring encoding errors can lead to data loss.)

97 See the documentation for codecs.register for a list of the permitted

98 encoding error strings.

99

100 newline controls how universal newlines works (it only applies to text

101 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as

102 follows:

103

104 * On input, if newline is None, universal newlines mode is

105 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and

106 these are translated into '\n' before being returned to the

107 caller. If it is '', universal newline mode is enabled, but line

108 endings are returned to the caller untranslated. If it has any of

109 the other legal values, input lines are only terminated by the given

110 string, and the line ending is returned to the caller untranslated.

111

112 * On output, if newline is None, any '\n' characters written are

113 translated to the system default line separator, os.linesep. If

114 newline is '', no translation takes place. If newline is any of the

115 other legal values, any '\n' characters written are translated to

116 the given string.

117

118 If closefd is False, the underlying file descriptor will be kept open

119 when the file is closed. This does not work when a file name is given

120 and must be True in that case.

121

122 open() returns a file object whose type depends on the mode, and

123 through which the standard file operations such as reading and writing

124 are performed. When open() is used to open a file in a text mode ('w',

125 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open

126 a file in a binary mode, the returned class varies: in read binary

127 mode, it returns a BufferedReader; in write binary and append binary

128 modes, it returns a BufferedWriter, and in read/write mode, it returns

129 a BufferedRandom.

130

131 It is also possible to use a string or bytearray as a file for both

132 reading and writing. For strings StringIO can be used like a file

133 opened in a text mode, and for bytes a BytesIO can be used like a file

134 opened in a binary mode.

135 """

136 if not isinstance(file, (str, bytes, int)):

137 raise TypeError("invalid file: %r" % file)

138 if not isinstance(mode, str):

139 raise TypeError("invalid mode: %r" % mode)

140 if buffering is not None and not isinstance(buffering, int):

141 raise TypeError("invalid buffering: %r" % buffering)

142 if encoding is not None and not isinstance(encoding, str):

143 raise TypeError("invalid encoding: %r" % encoding)

144 if errors is not None and not isinstance(errors, str):

145 raise TypeError("invalid errors: %r" % errors)

146 modes = set(mode)

147 if modes - set("arwb+tU") or len(mode) > len(modes):

148 raise ValueError("invalid mode: %r" % mode)

149 reading = "r" in modes

150 writing = "w" in modes

151 appending = "a" in modes

152 updating = "+" in modes

153 text = "t" in modes

154 binary = "b" in modes

155 if "U" in modes:

156 if writing or appending:

157 raise ValueError("can't use U and writing mode at once")

158 reading = True

159 if text and binary:

160 raise ValueError("can't have text and binary mode at once")

161 if reading + writing + appending > 1:

162 raise ValueError("can't have read/write/append mode at once")

163 if not (reading or writing or appending):

164 raise ValueError("must have exactly one of read/write/append mode")

165 if binary and encoding is not None:

166 raise ValueError("binary mode doesn't take an encoding argument")

167 if binary and errors is not None:

168 raise ValueError("binary mode doesn't take an errors argument")

169 if binary and newline is not None:

170 raise ValueError("binary mode doesn't take a newline argument")

171 raw = FileIO(file,

172 (reading and "r" or "") +

173 (writing and "w" or "") +

174 (appending and "a" or "") +

175 (updating and "+" or ""),

176 closefd)

177 if buffering is None:

178 buffering = -1

179 line_buffering = False

180 if buffering == 1 or buffering < 0 and raw.isatty():

181 buffering = -1

182 line_buffering = True

183 if buffering < 0:

184 buffering = DEFAULT_BUFFER_SIZE

185 try:

186 bs = os.fstat(raw.fileno()).st_blksize

187 except (os.error, AttributeError):

188 pass

189 else:

190 if bs > 1:

191 buffering = bs

192 if buffering < 0:

193 raise ValueError("invalid buffering size")

194 if buffering == 0:

195 if binary:

196 return raw

197 raise ValueError("can't have unbuffered text I/O")

198 if updating:

199 buffer = BufferedRandom(raw, buffering)

200 elif writing or appending:

201 buffer = BufferedWriter(raw, buffering)

202 elif reading:

203 buffer = BufferedReader(raw, buffering)

204 else:

205 raise ValueError("unknown mode: %r" % mode)

206 if binary:

207 return buffer

208 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)

209 text.mode = mode

210 return text

211

212

213 class DocDescriptor:

214 """Helper for builtins.open.__doc__

215 """

216 def __get__(self, obj, typ):

217 return (

218 "open(file, mode='r', buffering=None, encoding=None, "

219 "errors=None, newline=None, closefd=True)\n\n" +

220 open.__doc__)

221

222 class OpenWrapper:

223 """Wrapper for builtins.open

224

225 Trick so that open won't become a bound method when stored

226 as a class variable (as dbm.dumb does).

227

228 See initstdio() in Python/pythonrun.c.

229 """

230 __doc__ = DocDescriptor()

231

232 def __new__(cls, *args, **kwargs):

233 return open(*args, **kwargs)

234

235

236 class UnsupportedOperation(ValueError, IOError):

237 pass

238

239

240 class IOBase(metaclass=abc.ABCMeta):

241

242 """The abstract base class for all I/O classes, acting on streams of

243 bytes. There is no public constructor.

244

245 This class provides dummy implementations for many methods that

246 derived classes can override selectively; the default implementations

247 represent a file that cannot be read, written or seeked.

248

249 Even though IOBase does not declare read, readinto, or write because

250 their signatures will vary, implementations and clients should

251 consider those methods part of the interface. Also, implementations

252 may raise a IOError when operations they do not support are called.

253

254 The basic type used for binary data read from or written to a file is

255 bytes. bytearrays are accepted too, and in some cases (such as

256 readinto) needed. Text I/O classes work with str data.

257

258 Note that calling any method (even inquiries) on a closed stream is

259 undefined. Implementations may raise IOError in this case.

260

261 IOBase (and its subclasses) support the iterator protocol, meaning

262 that an IOBase object can be iterated over yielding the lines in a

263 stream.

264

265 IOBase also supports the :keyword:`with` statement. In this example,

266 fp is closed after the suite of the with statment is complete:

267

268 with open('spam.txt', 'r') as fp:

269 fp.write('Spam and eggs!')

270 """

271

272 ### Internal ###

273

274 def _unsupported(self, name: str) -> IOError:

275 """Internal: raise an exception for unsupported operations."""

276 raise UnsupportedOperation("%s.%s() not supported" %

277 (self.__class__.__name__, name))

278

279 ### Positioning ###

280

281 def seek(self, pos: int, whence: int = 0) -> int:

282 """Change stream position.

283

284 Change the stream position to byte offset offset. offset is

285 interpreted relative to the position indicated by whence. Values

286 for whence are:

287

288 * 0 -- start of stream (the default); offset should be zero or positive

289 * 1 -- current stream position; offset may be negative

290 * 2 -- end of stream; offset is usually negative

291

292 Return the new absolute position.

293 """

294 self._unsupported("seek")

295

296 def tell(self) -> int:

297 """Return current stream position."""

298 return self.seek(0, 1)

299

300 def truncate(self, pos: int = None) -> int:

301 """Truncate file to size bytes.

302

303 Size defaults to the current IO position as reported by tell(). Return

304 the new size.

305 """

306 self._unsupported("truncate")

307

308 ### Flush and close ###

309

310 def flush(self) -> None:

311 """Flush write buffers, if applicable.

312

313 This is not implemented for read-only and non-blocking streams.

314 """

315 # XXX Should this return the number of bytes written???

316

317 __closed = False

318

319 def close(self) -> None:

320 """Flush and close the IO object.

321

322 This method has no effect if the file is already closed.

323 """

324 if not self.__closed:

325 try:

326 self.flush()

327 except IOError:

328 pass # If flush() fails, just give up

329 self.__closed = True

330

331 def __del__(self) -> None:

332 """Destructor. Calls close()."""

333 # The try/except block is in case this is called at program

334 # exit time, when it's possible that globals have already been

335 # deleted, and then the close() call might fail. Since

336 # there's nothing we can do about such failures and they annoy

337 # the end users, we suppress the traceback.

338 try:

339 self.close()

340 except:

341 pass

342

343 ### Inquiries ###

344

345 def seekable(self) -> bool:

346 """Return whether object supports random access.

347

348 If False, seek(), tell() and truncate() will raise IOError.

349 This method may need to do a test seek().

350 """

351 return False

352

353 def _checkSeekable(self, msg=None):

354 """Internal: raise an IOError if file is not seekable

355 """

356 if not self.seekable():

357 raise IOError("File or stream is not seekable."

358 if msg is None else msg)

359

360

361 def readable(self) -> bool:

362 """Return whether object was opened for reading.

363

364 If False, read() will raise IOError.

365 """

366 return False

367

368 def _checkReadable(self, msg=None):

369 """Internal: raise an IOError if file is not readable

370 """

371 if not self.readable():

372 raise IOError("File or stream is not readable."

373 if msg is None else msg)

374

375 def writable(self) -> bool:

376 """Return whether object was opened for writing.

377

378 If False, write() and truncate() will raise IOError.

379 """

380 return False

381

382 def _checkWritable(self, msg=None):

383 """Internal: raise an IOError if file is not writable

384 """

385 if not self.writable():

386 raise IOError("File or stream is not writable."

387 if msg is None else msg)

388

389 @property

390 def closed(self):

391 """closed: bool. True iff the file has been closed.

392

393 For backwards compatibility, this is a property, not a predicate.

394 """

395 return self.__closed

396

397 def _checkClosed(self, msg=None):

398 """Internal: raise an ValueError if file is closed

399 """

400 if self.closed:

401 raise ValueError("I/O operation on closed file."

402 if msg is None else msg)

403

404 ### Context manager ###

405

406 def __enter__(self) -> "IOBase": # That's a forward reference

407 """Context management protocol. Returns self."""

408 self._checkClosed()

409 return self

410

411 def __exit__(self, *args) -> None:

412 """Context management protocol. Calls close()"""

413 self.close()

414

415 ### Lower-level APIs ###

416

417 # XXX Should these be present even if unimplemented?

418

419 def fileno(self) -> int:

420 """Returns underlying file descriptor if one exists.

421

422 An IOError is raised if the IO object does not use a file descriptor.

423 """

424 self._unsupported("fileno")

425

426 def isatty(self) -> bool:

427 """Return whether this is an 'interactive' stream.

428

429 Return False if it can't be determined.

430 """

431 self._checkClosed()

432 return False

433

434 ### Readline[s] and writelines ###

435

436 def readline(self, limit: int = -1) -> bytes:

437 r"""Read and return a line from the stream.

438

439 If limit is specified, at most limit bytes will be read.

440

441 The line terminator is always b'\n' for binary files; for text

442 files, the newlines argument to open can be used to select the line

443 terminator(s) recognized.

444 """

445 # For backwards compatibility, a (slowish) readline().

446 if hasattr(self, "peek"):

447 def nreadahead():

448 readahead = self.peek(1)

449 if not readahead:

450 return 1

451 n = (readahead.find(b"\n") + 1) or len(readahead)

452 if limit >= 0:

453 n = min(n, limit)

454 return n

455 else:

456 def nreadahead():

457 return 1

458 if limit is None:

459 limit = -1

460 res = bytearray()

461 while limit < 0 or len(res) < limit:

462 b = self.read(nreadahead())

463 if not b:

464 break

465 res += b

466 if res.endswith(b"\n"):

467 break

468 return bytes(res)

469

470 def __iter__(self):

471 self._checkClosed()

472 return self

473

474 def __next__(self):

475 line = self.readline()

476 if not line:

477 raise StopIteration

478 return line

479

480 def readlines(self, hint=None):

481 """Return a list of lines from the stream.

482

483 hint can be specified to control the number of lines read: no more

484 lines will be read if the total size (in bytes/characters) of all

485 lines so far exceeds hint.

486 """

487 if hint is None or hint <= 0:

488 return list(self)

489 n = 0

490 lines = []

491 for line in self:

492 lines.append(line)

493 n += len(line)

494 if n >= hint:

495 break

496 return lines

497

498 def writelines(self, lines):

499 self._checkClosed()

500 for line in lines:

501 self.write(line)

502

503 io.IOBase.register(IOBase)

504

505

506 class RawIOBase(IOBase):

507

508 """Base class for raw binary I/O."""

509

510 # The read() method is implemented by calling readinto(); derived

511 # classes that want to support read() only need to implement

512 # readinto() as a primitive operation. In general, readinto() can be

513 # more efficient than read().

514

515 # (It would be tempting to also provide an implementation of

516 # readinto() in terms of read(), in case the latter is a more suitable

517 # primitive operation, but that would lead to nasty recursion in case

518 # a subclass doesn't implement either.)

519

520 def read(self, n: int = -1) -> bytes:

521 """Read and return up to n bytes.

522

523 Returns an empty bytes object on EOF, or None if the object is

524 set not to block and has no data to read.

525 """

526 if n is None:

527 n = -1

528 if n < 0:

529 return self.readall()

530 b = bytearray(n.__index__())

531 n = self.readinto(b)

532 del b[n:]

533 return bytes(b)

534

535 def readall(self):

536 """Read until EOF, using multiple read() call."""

537 res = bytearray()

538 while True:

539 data = self.read(DEFAULT_BUFFER_SIZE)

540 if not data:

541 break

542 res += data

543 return bytes(res)

544

545 def readinto(self, b: bytearray) -> int:

546 """Read up to len(b) bytes into b.

547

548 Returns number of bytes read (0 for EOF), or None if the object

549 is set not to block as has no data to read.

550 """

551 self._unsupported("readinto")

552

553 def write(self, b: bytes) -> int:

554 """Write the given buffer to the IO stream.

555

556 Returns the number of bytes written, which may be less than len(b).

557 """

558 self._unsupported("write")

559

560 io.RawIOBase.register(RawIOBase)

561 from _io import FileIO

562 RawIOBase.register(FileIO)

563

564

565 class BufferedIOBase(IOBase):

566

567 """Base class for buffered IO objects.

568

569 The main difference with RawIOBase is that the read() method

570 supports omitting the size argument, and does not have a default

571 implementation that defers to readinto().

572

573 In addition, read(), readinto() and write() may raise

574 BlockingIOError if the underlying raw stream is in non-blocking

575 mode and not ready; unlike their raw counterparts, they will never

576 return None.

577

578 A typical implementation should not inherit from a RawIOBase

579 implementation, but wrap one.

580 """

581

582 def read(self, n: int = None) -> bytes:

583 """Read and return up to n bytes.

584

585 If the argument is omitted, None, or negative, reads and

586 returns all data until EOF.

587

588 If the argument is positive, and the underlying raw stream is

589 not 'interactive', multiple raw reads may be issued to satisfy

590 the byte count (unless EOF is reached first). But for

591 interactive raw streams (XXX and for pipes?), at most one raw

592 read will be issued, and a short result does not imply that

593 EOF is imminent.

594

595 Returns an empty bytes array on EOF.

596

597 Raises BlockingIOError if the underlying raw stream has no

598 data at the moment.

599 """

600 self._unsupported("read")

601

602 def read1(self, n: int=None) -> bytes:

603 """Read up to n bytes with at most one read() system call."""

604 self._unsupported("read1")

605

606 def readinto(self, b: bytearray) -> int:

607 """Read up to len(b) bytes into b.

608

609 Like read(), this may issue multiple reads to the underlying raw

610 stream, unless the latter is 'interactive'.

611

612 Returns the number of bytes read (0 for EOF).

613

614 Raises BlockingIOError if the underlying raw stream has no

615 data at the moment.

616 """

617 # XXX This ought to work with anything that supports the buffer API

618 data = self.read(len(b))

619 n = len(data)

620 try:

621 b[:n] = data

622 except TypeError as err:

623 import array

624 if not isinstance(b, array.array):

625 raise err

626 b[:n] = array.array('b', data)

627 return n

628

629 def write(self, b: bytes) -> int:

630 """Write the given buffer to the IO stream.

631

632 Return the number of bytes written, which is never less than

633 len(b).

634

635 Raises BlockingIOError if the buffer is full and the

636 underlying raw stream cannot accept more data at the moment.

637 """

638 self._unsupported("write")

639

640 io.BufferedIOBase.register(BufferedIOBase)

641

642

643 class _BufferedIOMixin(BufferedIOBase):

644

645 """A mixin implementation of BufferedIOBase with an underlying raw stream.

646

647 This passes most requests on to the underlying raw stream. It

648 does *not* provide implementations of read(), readinto() or

649 write().

650 """

651

652 def __init__(self, raw):

653 self.raw = raw

654

655 ### Positioning ###

656

657 def seek(self, pos, whence=0):

658 new_position = self.raw.seek(pos, whence)

659 if new_position < 0:

660 raise IOError("seek() returned an invalid position")

661 return new_position

662

663 def tell(self):

664 pos = self.raw.tell()

665 if pos < 0:

666 raise IOError("tell() returned an invalid position")

667 return pos

668

669 def truncate(self, pos=None):

670 # Flush the stream. We're mixing buffered I/O with lower-level I/O,

671 # and a flush may be necessary to synch both views of the current

672 # file state.

673 self.flush()

674

675 if pos is None:

676 pos = self.tell()

677 # XXX: Should seek() be used, instead of passing the position

678 # XXX directly to truncate?

679 return self.raw.truncate(pos)

680

681 ### Flush and close ###

682

683 def flush(self):

684 self.raw.flush()

685

686 def close(self):

687 if not self.closed:

688 try:

689 self.flush()

690 except IOError:

691 pass # If flush() fails, just give up

692 self.raw.close()

693

694 ### Inquiries ###

695

696 def seekable(self):

697 return self.raw.seekable()

698

699 def readable(self):

700 return self.raw.readable()

701

702 def writable(self):

703 return self.raw.writable()

704

705 @property

706 def closed(self):

707 return self.raw.closed

708

709 @property

710 def name(self):

711 return self.raw.name

712

713 @property

714 def mode(self):

715 return self.raw.mode

716

717 ### Lower-level APIs ###

718

719 def fileno(self):

720 return self.raw.fileno()

721

722 def isatty(self):

723 return self.raw.isatty()

724

725

726 class BytesIO(BufferedIOBase):

727

728 """Buffered I/O implementation using an in-memory bytes buffer."""

729

730 def __init__(self, initial_bytes=None):

731 buf = bytearray()

732 if initial_bytes is not None:

733 buf += initial_bytes

734 self._buffer = buf

735 self._pos = 0

736

737 def getvalue(self):

738 """Return the bytes value (contents) of the buffer

739 """

740 if self.closed:

741 raise ValueError("getvalue on closed file")

742 return bytes(self._buffer)

743

744 def read(self, n=None):

745 if self.closed:

746 raise ValueError("read from closed file")

747 if n is None:

748 n = -1

749 if n < 0:

750 n = len(self._buffer)

751 if len(self._buffer) <= self._pos:

752 return b""

753 newpos = min(len(self._buffer), self._pos + n)

754 b = self._buffer[self._pos : newpos]

755 self._pos = newpos

756 return bytes(b)

757

758 def read1(self, n):

759 """This is the same as read.

760 """

761 return self.read(n)

762

763 def write(self, b):

764 if self.closed:

765 raise ValueError("write to closed file")

766 if isinstance(b, str):

767 raise TypeError("can't write str to binary stream")

768 n = len(b)

769 if n == 0:

770 return 0

771 pos = self._pos

772 if pos > len(self._buffer):

773 # Inserts null bytes between the current end of the file

774 # and the new write position.

775 padding = b'\x00' * (pos - len(self._buffer))

776 self._buffer += padding

777 self._buffer[pos:pos + n] = b

778 self._pos += n

779 return n

780

781 def seek(self, pos, whence=0):

782 if self.closed:

783 raise ValueError("seek on closed file")

784 try:

785 pos = pos.__index__()

786 except AttributeError as err:

787 raise TypeError("an integer is required") from err

788 if whence == 0:

789 if pos < 0:

790 raise ValueError("negative seek position %r" % (pos,))

791 self._pos = pos

792 elif whence == 1:

793 self._pos = max(0, self._pos + pos)

794 elif whence == 2:

795 self._pos = max(0, len(self._buffer) + pos)

796 else:

797 raise ValueError("invalid whence value")

798 return self._pos

799

800 def tell(self):

801 if self.closed:

802 raise ValueError("tell on closed file")

803 return self._pos

804

805 def truncate(self, pos=None):

806 if self.closed:

807 raise ValueError("truncate on closed file")

808 if pos is None:

809 pos = self._pos

810 elif pos < 0:

811 raise ValueError("negative truncate position %r" % (pos,))

812 del self._buffer[pos:]

813 return self.seek(pos)

814

815 def readable(self):

816 return True

817

818 def writable(self):

819 return True

820

821 def seekable(self):

822 return True

823

824

825 class BufferedReader(_BufferedIOMixin):

826

827 """BufferedReader(raw[, buffer_size])

828

829 A buffer for a readable, sequential BaseRawIO object.

830

831 The constructor creates a BufferedReader for the given readable raw

832 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE

833 is used.

834 """

835

836 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):

837 """Create a new buffered reader using the given readable raw IO object.

838 """

839 raw._checkReadable()

840 _BufferedIOMixin.__init__(self, raw)

841 if buffer_size <= 0:

842 raise ValueError("invalid buffer size")

843 self.buffer_size = buffer_size

844 self._reset_read_buf()

845 self._read_lock = Lock()

846

847 def _reset_read_buf(self):

848 self._read_buf = b""

849 self._read_pos = 0

850

851 def read(self, n=None):

852 """Read n bytes.

853

854 Returns exactly n bytes of data unless the underlying raw IO

855 stream reaches EOF or if the call would block in non-blocking

856 mode. If n is negative, read until EOF or until read() would

857 block.

858 """

859 if n is not None and n < -1:

860 raise ValueError("invalid number of bytes to read")

861 with self._read_lock:

862 return self._read_unlocked(n)

863

864 def _read_unlocked(self, n=None):

865 nodata_val = b""

866 empty_values = (b"", None)

867 buf = self._read_buf

868 pos = self._read_pos

869

870 # Special case for when the number of bytes to read is unspecified.

871 if n is None or n == -1:

872 self._reset_read_buf()

873 chunks = [buf[pos:]] # Strip the consumed bytes.

874 current_size = 0

875 while True:

876 # Read until EOF or until read() would block.

877 chunk = self.raw.read()

878 if chunk in empty_values:

879 nodata_val = chunk

880 break

881 current_size += len(chunk)

882 chunks.append(chunk)

883 return b"".join(chunks) or nodata_val

884

885 # The number of bytes to read is specified, return at most n bytes.

886 avail = len(buf) - pos # Length of the available buffered data.

887 if n <= avail:

888 # Fast path: the data to read is fully buffered.

889 self._read_pos += n

890 return buf[pos:pos+n]

891 # Slow path: read from the stream until enough bytes are read,

892 # or until an EOF occurs or until read() would block.

893 chunks = [buf[pos:]]

894 wanted = max(self.buffer_size, n)

895 while avail < n:

896 chunk = self.raw.read(wanted)

897 if chunk in empty_values:

898 nodata_val = chunk

899 break

900 avail += len(chunk)

901 chunks.append(chunk)

902 # n is more then avail only when an EOF occurred or when

903 # read() would have blocked.

904 n = min(n, avail)

905 out = b"".join(chunks)

906 self._read_buf = out[n:] # Save the extra data in the buffer.

907 self._read_pos = 0

908 return out[:n] if out else nodata_val

909

910 def peek(self, n=0):

911 """Returns buffered bytes without advancing the position.

912

913 The argument indicates a desired minimal number of bytes; we

914 do at most one raw read to satisfy it. We never return more

915 than self.buffer_size.

916 """

917 with self._read_lock:

918 return self._peek_unlocked(n)

919

920 def _peek_unlocked(self, n=0):

921 want = min(n, self.buffer_size)

922 have = len(self._read_buf) - self._read_pos

923 if have < want or have <= 0:

924 to_read = self.buffer_size - have

925 current = self.raw.read(to_read)

926 if current:

927 self._read_buf = self._read_buf[self._read_pos:] + current

928 self._read_pos = 0

929 return self._read_buf[self._read_pos:]

930

931 def read1(self, n):

932 """Reads up to n bytes, with at most one read() system call."""

933 # Returns up to n bytes. If at least one byte is buffered, we

934 # only return buffered bytes. Otherwise, we do one raw read.

935 if n < 0:

936 raise ValueError("number of bytes to read must be positive")

937 if n == 0:

938 return b""

939 with self._read_lock:

940 self._peek_unlocked(1)

941 return self._read_unlocked(

942 min(n, len(self._read_buf) - self._read_pos))

943

944 def tell(self):

945 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_po s

946

947 def seek(self, pos, whence=0):

948 if not (0 <= whence <= 2):

949 raise ValueError("invalid whence value")

950 with self._read_lock:

951 if whence == 1:

952 pos -= len(self._read_buf) - self._read_pos

953 pos = _BufferedIOMixin.seek(self, pos, whence)

954 self._reset_read_buf()

955 return pos

956

957 class BufferedWriter(_BufferedIOMixin):

958

959 """A buffer for a writeable sequential RawIO object.

960

961 The constructor creates a BufferedWriter for the given writeable raw

962 stream. If the buffer_size is not given, it defaults to

963 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to

964 twice the buffer size.

965 """

966

967 def __init__(self, raw,

968 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):

969 raw._checkWritable()

970 _BufferedIOMixin.__init__(self, raw)

971 if buffer_size <= 0:

972 raise ValueError("invalid buffer size")

973 self.buffer_size = buffer_size

974 self.max_buffer_size = (2*buffer_size

975 if max_buffer_size is None

976 else max_buffer_size)

977 self._write_buf = bytearray()

978 self._write_lock = Lock()

979

980 def write(self, b):

981 if self.closed:

982 raise ValueError("write to closed file")

983 if isinstance(b, str):

984 raise TypeError("can't write str to binary stream")

985 with self._write_lock:

986 # XXX we can implement some more tricks to try and avoid

987 # partial writes

988 if len(self._write_buf) > self.buffer_size:

989 # We're full, so let's pre-flush the buffer

990 try:

991 self._flush_unlocked()

992 except BlockingIOError as e:

993 # We can't accept anything else.

994 # XXX Why not just let the exception pass through?

995 raise BlockingIOError(e.errno, e.strerror, 0)

996 before = len(self._write_buf)

997 self._write_buf.extend(b)

998 written = len(self._write_buf) - before

999 if len(self._write_buf) > self.buffer_size:

1000 try:

1001 self._flush_unlocked()

1002 except BlockingIOError as e:

1003 if len(self._write_buf) > self.max_buffer_size:

1004 # We've hit max_buffer_size. We have to accept a

1005 # partial write and cut back our buffer.

1006 overage = len(self._write_buf) - self.max_buffer_size

1007 written -= overage

1008 self._write_buf = self._write_buf[:self.max_buffer_size]

1009 raise BlockingIOError(e.errno, e.strerror, written)

1010 return written

1011

1012 def truncate(self, pos=None):

1013 with self._write_lock:

1014 self._flush_unlocked()

1015 if pos is None:

1016 pos = self.raw.tell()

1017 return self.raw.truncate(pos)

1018

1019 def flush(self):

1020 with self._write_lock:

1021 self._flush_unlocked()

1022

1023 def _flush_unlocked(self):

1024 if self.closed:

1025 raise ValueError("flush of closed file")

1026 written = 0

1027 try:

1028 while self._write_buf:

1029 n = self.raw.write(self._write_buf)

1030 if n > len(self._write_buf) or n < 0:

1031 raise IOError("write() returned incorrect number of bytes")

1032 del self._write_buf[:n]

1033 written += n

1034 except BlockingIOError as e:

1035 n = e.characters_written

1036 del self._write_buf[:n]

1037 written += n

1038 raise BlockingIOError(e.errno, e.strerror, written)

1039

1040 def tell(self):

1041 return _BufferedIOMixin.tell(self) + len(self._write_buf)

1042

1043 def seek(self, pos, whence=0):

1044 if not (0 <= whence <= 2):

1045 raise ValueError("invalid whence")

1046 with self._write_lock:

1047 self._flush_unlocked()

1048 return _BufferedIOMixin.seek(self, pos, whence)

1049

1050

1051 class BufferedRWPair(BufferedIOBase):

1052

1053 """A buffered reader and writer object together.

1054

1055 A buffered reader object and buffered writer object put together to

1056 form a sequential IO object that can read and write. This is typically

1057 used with a socket or two-way pipe.

1058

1059 reader and writer are RawIOBase objects that are readable and

1060 writeable respectively. If the buffer_size is omitted it defaults to

1061 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)

1062 defaults to twice the buffer size.

1063 """

1064

1065 # XXX The usefulness of this (compared to having two separate IO

1066 # objects) is questionable.

1067

1068 def __init__(self, reader, writer,

1069 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):

1070 """Constructor.

1071

1072 The arguments are two RawIO instances.

1073 """

1074 reader._checkReadable()

1075 writer._checkWritable()

1076 self.reader = BufferedReader(reader, buffer_size)

1077 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)

1078

1079 def read(self, n=None):

1080 if n is None:

1081 n = -1

1082 return self.reader.read(n)

1083

1084 def readinto(self, b):

1085 return self.reader.readinto(b)

1086

1087 def write(self, b):

1088 return self.writer.write(b)

1089

1090 def peek(self, n=0):

1091 return self.reader.peek(n)

1092

1093 def read1(self, n):

1094 return self.reader.read1(n)

1095

1096 def readable(self):

1097 return self.reader.readable()

1098

1099 def writable(self):

1100 return self.writer.writable()

1101

1102 def flush(self):

1103 return self.writer.flush()

1104

1105 def close(self):

1106 self.writer.close()

1107 self.reader.close()

1108

1109 def isatty(self):

1110 return self.reader.isatty() or self.writer.isatty()

1111

1112 @property

1113 def closed(self):

1114 return self.writer.closed

1115

1116

1117 class BufferedRandom(BufferedWriter, BufferedReader):

1118

1119 """A buffered interface to random access streams.

1120

1121 The constructor creates a reader and writer for a seekable stream,

1122 raw, given in the first argument. If the buffer_size is omitted it

1123 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered

1124 writer) defaults to twice the buffer size.

1125 """

1126

1127 def __init__(self, raw,

1128 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):

1129 raw._checkSeekable()

1130 BufferedReader.__init__(self, raw, buffer_size)

1131 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)

1132

1133 def seek(self, pos, whence=0):

1134 if not (0 <= whence <= 2):

1135 raise ValueError("invalid whence")

1136 self.flush()

1137 if self._read_buf:

1138 # Undo read ahead.

1139 with self._read_lock:

1140 self.raw.seek(self._read_pos - len(self._read_buf), 1)

1141 # First do the raw seek, then empty the read buffer, so that

1142 # if the raw seek fails, we don't lose buffered data forever.

1143 pos = self.raw.seek(pos, whence)

1144 with self._read_lock:

1145 self._reset_read_buf()

1146 if pos < 0:

1147 raise IOError("seek() returned invalid position")

1148 return pos

1149

1150 def tell(self):

1151 if self._write_buf:

1152 return BufferedWriter.tell(self)

1153 else:

1154 return BufferedReader.tell(self)

1155

1156 def truncate(self, pos=None):

1157 if pos is None:

1158 pos = self.tell()

1159 # Use seek to flush the read buffer.

1160 self.seek(pos)

1161 return BufferedWriter.truncate(self)

1162

1163 def read(self, n=None):

1164 if n is None:

1165 n = -1

1166 self.flush()

1167 return BufferedReader.read(self, n)

1168

1169 def readinto(self, b):

1170 self.flush()

1171 return BufferedReader.readinto(self, b)

1172

1173 def peek(self, n=0):

1174 self.flush()

1175 return BufferedReader.peek(self, n)

1176

1177 def read1(self, n):

1178 self.flush()

1179 return BufferedReader.read1(self, n)

1180

1181 def write(self, b):

1182 if self._read_buf:

1183 # Undo readahead

1184 with self._read_lock:

1185 self.raw.seek(self._read_pos - len(self._read_buf), 1)

1186 self._reset_read_buf()

1187 return BufferedWriter.write(self, b)

1188

1189

1190 class TextIOBase(IOBase):

1191

1192 """Base class for text I/O.

1193

1194 This class provides a character and line based interface to stream

1195 I/O. There is no readinto method because Python's character strings

1196 are immutable. There is no public constructor.

1197 """

1198

1199 def read(self, n: int = -1) -> str:

1200 """Read at most n characters from stream.

1201

1202 Read from underlying buffer until we have n characters or we hit EOF.

1203 If n is negative or omitted, read until EOF.

1204 """

1205 self._unsupported("read")

1206

1207 def write(self, s: str) -> int:

1208 """Write string s to stream."""

1209 self._unsupported("write")

1210

1211 def truncate(self, pos: int = None) -> int:

1212 """Truncate size to pos."""

1213 self._unsupported("truncate")

1214

1215 def readline(self) -> str:

1216 """Read until newline or EOF.

1217

1218 Returns an empty string if EOF is hit immediately.

1219 """

1220 self._unsupported("readline")

1221

1222 @property

1223 def encoding(self):

1224 """Subclasses should override."""

1225 return None

1226

1227 @property

1228 def newlines(self):

1229 """Line endings translated so far.

1230

1231 Only line endings translated during reading are considered.

1232

1233 Subclasses should override.

1234 """

1235 return None

1236

1237 io.TextIOBase.register(TextIOBase)

1238

1239

1240 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):

1241 r"""Codec used when reading a file in universal newlines mode. It wraps

1242 another incremental decoder, translating \r\n and \r into \n. It also

1243 records the types of newlines encountered. When used with

1244 translate=False, it ensures that the newline sequence is returned in

1245 one piece.

1246 """

1247 def __init__(self, decoder, translate, errors='strict'):

1248 codecs.IncrementalDecoder.__init__(self, errors=errors)

1249 self.translate = translate

1250 self.decoder = decoder

1251 self.seennl = 0

1252 self.pendingcr = False

1253

1254 def decode(self, input, final=False):

1255 # decode input (with the eventual \r from a previous pass)

1256 if self.decoder is None:

1257 output = input

1258 else:

1259 output = self.decoder.decode(input, final=final)

1260 if self.pendingcr and (output or final):

1261 output = "\r" + output

1262 self.pendingcr = False

1263

1264 # retain last \r even when not translating data:

1265 # then readline() is sure to get \r\n in one pass

1266 if output.endswith("\r") and not final:

1267 output = output[:-1]

1268 self.pendingcr = True

1269

1270 # Record which newlines are read

1271 crlf = output.count('\r\n')

1272 cr = output.count('\r') - crlf

1273 lf = output.count('\n') - crlf

1274 self.seennl |= (lf and self._LF) | (cr and self._CR) \

1275 | (crlf and self._CRLF)

1276

1277 if self.translate:

1278 if crlf:

1279 output = output.replace("\r\n", "\n")

1280 if cr:

1281 output = output.replace("\r", "\n")

1282

1283 return output

1284

1285 def getstate(self):

1286 if self.decoder is None:

1287 buf = b""

1288 flag = 0

1289 else:

1290 buf, flag = self.decoder.getstate()

1291 flag <<= 1

1292 if self.pendingcr:

1293 flag |= 1

1294 return buf, flag

1295

1296 def setstate(self, state):

1297 buf, flag = state

1298 self.pendingcr = bool(flag & 1)

1299 if self.decoder is not None:

1300 self.decoder.setstate((buf, flag >> 1))

1301

1302 def reset(self):

1303 self.seennl = 0

1304 self.pendingcr = False

1305 if self.decoder is not None:

1306 self.decoder.reset()

1307

1308 _LF = 1

1309 _CR = 2

1310 _CRLF = 4

1311

1312 @property

1313 def newlines(self):

1314 return (None,

1315 "\n",

1316 "\r",

1317 ("\r", "\n"),

1318 "\r\n",

1319 ("\n", "\r\n"),

1320 ("\r", "\r\n"),

1321 ("\r", "\n", "\r\n")

1322 )[self.seennl]

1323

1324

1325 class TextIOWrapper(TextIOBase):

1326

1327 r"""Character and line based layer over a BufferedIOBase object, buffer.

1328

1329 encoding gives the name of the encoding that the stream will be

1330 decoded or encoded with. It defaults to locale.getpreferredencoding.

1331

1332 errors determines the strictness of encoding and decoding (see the

1333 codecs.register) and defaults to "strict".

1334

1335 newline can be None, '', '\n', '\r', or '\r\n'. It controls the

1336 handling of line endings. If it is None, universal newlines is

1337 enabled. With this enabled, on input, the lines endings '\n', '\r',

1338 or '\r\n' are translated to '\n' before being returned to the

1339 caller. Conversely, on output, '\n' is translated to the system

1340 default line separator, os.linesep. If newline is any other of its

1341 legal values, that newline becomes the newline when the file is read

1342 and it is returned untranslated. On output, '\n' is converted to the

1343 newline.

1344

1345 If line_buffering is True, a call to flush is implied when a call to

1346 write contains a newline character.

1347 """

1348

1349 _CHUNK_SIZE = 2048

1350

1351 def __init__(self, buffer, encoding=None, errors=None, newline=None,

1352 line_buffering=False):

1353 if newline is not None and not isinstance(newline, str):

1354 raise TypeError("illegal newline type: %r" % (type(newline),))

1355 if newline not in (None, "", "\n", "\r", "\r\n"):

1356 raise ValueError("illegal newline value: %r" % (newline,))

1357 if encoding is None:

1358 try:

1359 encoding = os.device_encoding(buffer.fileno())

1360 except (AttributeError, UnsupportedOperation):

1361 pass

1362 if encoding is None:

1363 try:

1364 import locale

1365 except ImportError:

1366 # Importing locale may fail if Python is being built

1367 encoding = "ascii"

1368 else:

1369 encoding = locale.getpreferredencoding()

1370

1371 if not isinstance(encoding, str):

1372 raise ValueError("invalid encoding: %r" % encoding)

1373

1374 if errors is None:

1375 errors = "strict"

1376 else:

1377 if not isinstance(errors, str):

1378 raise ValueError("invalid errors: %r" % errors)

1379

1380 self.buffer = buffer

1381 self._line_buffering = line_buffering

1382 self._encoding = encoding

1383 self._errors = errors

1384 self._readuniversal = not newline

1385 self._readtranslate = newline is None

1386 self._readnl = newline

1387 self._writetranslate = newline != ''

1388 self._writenl = newline or os.linesep

1389 self._encoder = None

1390 self._decoder = None

1391 self._decoded_chars = '' # buffer for text returned from decoder

1392 self._decoded_chars_used = 0 # offset into _decoded_chars for read()

1393 self._snapshot = None # info for reconstructing decoder state

1394 self._seekable = self._telling = self.buffer.seekable()

1395

1396 # self._snapshot is either None, or a tuple (dec_flags, next_input)

1397 # where dec_flags is the second (integer) item of the decoder state

1398 # and next_input is the chunk of input bytes that comes next after the

1399 # snapshot point. We use this to reconstruct decoder states in tell().

1400

1401 # Naming convention:

1402 # - "bytes_..." for integer variables that count input bytes

1403 # - "chars_..." for integer variables that count decoded characters

1404

1405 @property

1406 def encoding(self):

1407 return self._encoding

1408

1409 @property

1410 def errors(self):

1411 return self._errors

1412

1413 @property

1414 def line_buffering(self):

1415 return self._line_buffering

1416

1417 def seekable(self):

1418 return self._seekable

1419

1420 def readable(self):

1421 return self.buffer.readable()

1422

1423 def writable(self):

1424 return self.buffer.writable()

1425

1426 def flush(self):

1427 self.buffer.flush()

1428 self._telling = self._seekable

1429

1430 def close(self):

1431 try:

1432 self.flush()

1433 except:

1434 pass # If flush() fails, just give up

1435 self.buffer.close()

1436

1437 @property

1438 def closed(self):

1439 return self.buffer.closed

1440

1441 @property

1442 def name(self):

1443 return self.buffer.name

1444

1445 def fileno(self):

1446 return self.buffer.fileno()

1447

1448 def isatty(self):

1449 return self.buffer.isatty()

1450

1451 def write(self, s: str):

1452 if self.closed:

1453 raise ValueError("write to closed file")

1454 if not isinstance(s, str):

1455 raise TypeError("can't write %s to text stream" %

1456 s.__class__.__name__)

1457 length = len(s)

1458 haslf = (self._writetranslate or self._line_buffering) and "\n" in s

1459 if haslf and self._writetranslate and self._writenl != "\n":

1460 s = s.replace("\n", self._writenl)

1461 encoder = self._encoder or self._get_encoder()

1462 # XXX What if we were just reading?

1463 b = encoder.encode(s)

1464 self.buffer.write(b)

1465 if self._line_buffering and (haslf or "\r" in s):

1466 self.flush()

1467 self._snapshot = None

1468 if self._decoder:

1469 self._decoder.reset()

1470 return length

1471

1472 def _get_encoder(self):

1473 make_encoder = codecs.getincrementalencoder(self._encoding)

1474 self._encoder = make_encoder(self._errors)

1475 return self._encoder

1476

1477 def _get_decoder(self):

1478 make_decoder = codecs.getincrementaldecoder(self._encoding)

1479 decoder = make_decoder(self._errors)

1480 if self._readuniversal:

1481 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)

1482 self._decoder = decoder

1483 return decoder

1484

1485 # The following three methods implement an ADT for _decoded_chars.

1486 # Text returned from the decoder is buffered here until the client

1487 # requests it by calling our read() or readline() method.

1488 def _set_decoded_chars(self, chars):

1489 """Set the _decoded_chars buffer."""

1490 self._decoded_chars = chars

1491 self._decoded_chars_used = 0

1492

1493 def _get_decoded_chars(self, n=None):

1494 """Advance into the _decoded_chars buffer."""

1495 offset = self._decoded_chars_used

1496 if n is None:

1497 chars = self._decoded_chars[offset:]

1498 else:

1499 chars = self._decoded_chars[offset:offset + n]

1500 self._decoded_chars_used += len(chars)

1501 return chars

1502

1503 def _rewind_decoded_chars(self, n):

1504 """Rewind the _decoded_chars buffer."""

1505 if self._decoded_chars_used < n:

1506 raise AssertionError("rewind decoded_chars out of bounds")

1507 self._decoded_chars_used -= n

1508

1509 def _read_chunk(self):

1510 """

1511 Read and decode the next chunk of data from the BufferedReader.

1512 """

1513

1514 # The return value is True unless EOF was reached. The decoded

1515 # string is placed in self._decoded_chars (replacing its previous

1516 # value). The entire input chunk is sent to the decoder, though

1517 # some of it may remain buffered in the decoder, yet to be

1518 # converted.

1519

1520 if self._decoder is None:

1521 raise ValueError("no decoder")

1522

1523 if self._telling:

1524 # To prepare for tell(), we need to snapshot a point in the

1525 # file where the decoder's input buffer is empty.

1526

1527 dec_buffer, dec_flags = self._decoder.getstate()

1528 # Given this, we know there was a valid snapshot point

1529 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).

1530

1531 # Read a chunk, decode it, and put the result in self._decoded_chars.

1532 input_chunk = self.buffer.read1(self._CHUNK_SIZE)

1533 eof = not input_chunk

1534 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))

1535

1536 if self._telling:

1537 # At the snapshot point, len(dec_buffer) bytes before the read,

1538 # the next input to be decoded is dec_buffer + input_chunk.

1539 self._snapshot = (dec_flags, dec_buffer + input_chunk)

1540

1541 return not eof

1542

1543 def _pack_cookie(self, position, dec_flags=0,

1544 bytes_to_feed=0, need_eof=0, chars_to_skip=0):

1545 # The meaning of a tell() cookie is: seek to position, set the

1546 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them

1547 # into the decoder with need_eof as the EOF flag, then skip

1548 # chars_to_skip characters of the decoded result. For most simple

1549 # decoders, tell() will often just give a byte offset in the file.

1550 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |

1551 (chars_to_skip<<192) | bool(need_eof)<<256)

1552

1553 def _unpack_cookie(self, bigint):

1554 rest, position = divmod(bigint, 1<<64)

1555 rest, dec_flags = divmod(rest, 1<<64)

1556 rest, bytes_to_feed = divmod(rest, 1<<64)

1557 need_eof, chars_to_skip = divmod(rest, 1<<64)

1558 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip

1559

1560 def tell(self):

1561 if not self._seekable:

1562 raise IOError("underlying stream is not seekable")

1563 if not self._telling:

1564 raise IOError("telling position disabled by next() call")

1565 self.flush()

1566 position = self.buffer.tell()

1567 decoder = self._decoder

1568 if decoder is None or self._snapshot is None:

1569 if self._decoded_chars:

1570 # This should never happen.

1571 raise AssertionError("pending decoded text")

1572 return position

1573

1574 # Skip backward to the snapshot point (see _read_chunk).

1575 dec_flags, next_input = self._snapshot

1576 position -= len(next_input)

1577

1578 # How many decoded characters have been used up since the snapshot?

1579 chars_to_skip = self._decoded_chars_used

1580 if chars_to_skip == 0:

1581 # We haven't moved from the snapshot point.

1582 return self._pack_cookie(position, dec_flags)

1583

1584 # Starting from the snapshot position, we will walk the decoder

1585 # forward until it gives us enough decoded characters.

1586 saved_state = decoder.getstate()

1587 try:

1588 # Note our initial start point.

1589 decoder.setstate((b'', dec_flags))

1590 start_pos = position

1591 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0

1592 need_eof = 0

1593

1594 # Feed the decoder one byte at a time. As we go, note the

1595 # nearest "safe start point" before the current location

1596 # (a point where the decoder has nothing buffered, so seek()

1597 # can safely start from there and advance to this location).

1598 next_byte = bytearray(1)

1599 for next_byte[0] in next_input:

1600 bytes_fed += 1

1601 chars_decoded += len(decoder.decode(next_byte))

1602 dec_buffer, dec_flags = decoder.getstate()

1603 if not dec_buffer and chars_decoded <= chars_to_skip:

1604 # Decoder buffer is empty, so this is a safe start point.

1605 start_pos += bytes_fed

1606 chars_to_skip -= chars_decoded

1607 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0

1608 if chars_decoded >= chars_to_skip:

1609 break

1610 else:

1611 # We didn't get enough decoded data; signal EOF to get more.

1612 chars_decoded += len(decoder.decode(b'', final=True))

1613 need_eof = 1

1614 if chars_decoded < chars_to_skip:

1615 raise IOError("can't reconstruct logical file position")

1616

1617 # The returned cookie corresponds to the last safe start point.

1618 return self._pack_cookie(

1619 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)

1620 finally:

1621 decoder.setstate(saved_state)

1622

1623 def truncate(self, pos=None):

1624 self.flush()

1625 if pos is None:

1626 pos = self.tell()

1627 self.seek(pos)

1628 return self.buffer.truncate()

1629

1630 def seek(self, cookie, whence=0):

1631 if self.closed:

1632 raise ValueError("tell on closed file")

1633 if not self._seekable:

1634 raise IOError("underlying stream is not seekable")

1635 if whence == 1: # seek relative to current position

1636 if cookie != 0:

1637 raise IOError("can't do nonzero cur-relative seeks")

1638 # Seeking to the current position should attempt to

1639 # sync the underlying buffer with the current position.

1640 whence = 0

1641 cookie = self.tell()

1642 if whence == 2: # seek relative to end of file

1643 if cookie != 0:

1644 raise IOError("can't do nonzero end-relative seeks")

1645 self.flush()

1646 position = self.buffer.seek(0, 2)

1647 self._set_decoded_chars('')

1648 self._snapshot = None

1649 if self._decoder:

1650 self._decoder.reset()

1651 return position

1652 if whence != 0:

1653 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %

1654 (whence,))

1655 if cookie < 0:

1656 raise ValueError("negative seek position %r" % (cookie,))

1657 self.flush()

1658

1659 # The strategy of seek() is to go back to the safe start point

1660 # and replay the effect of read(chars_to_skip) from there.

1661 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \

1662 self._unpack_cookie(cookie)

1663

1664 # Seek back to the safe start point.

1665 self.buffer.seek(start_pos)

1666 self._set_decoded_chars('')

1667 self._snapshot = None

1668

1669 # Restore the decoder to its state from the safe start point.

1670 if self._decoder or dec_flags or chars_to_skip:

1671 self._decoder = self._decoder or self._get_decoder()

1672 self._decoder.setstate((b'', dec_flags))

1673 self._snapshot = (dec_flags, b'')

1674

1675 if chars_to_skip:

1676 # Just like _read_chunk, feed the decoder and save a snapshot.

1677 input_chunk = self.buffer.read(bytes_to_feed)

1678 self._set_decoded_chars(

1679 self._decoder.decode(input_chunk, need_eof))

1680 self._snapshot = (dec_flags, input_chunk)

1681

1682 # Skip chars_to_skip of the decoded characters.

1683 if len(self._decoded_chars) < chars_to_skip:

1684 raise IOError("can't restore logical file position")

1685 self._decoded_chars_used = chars_to_skip

1686

1687 return cookie

1688

1689 def read(self, n=None):

1690 if n is None:

1691 n = -1

1692 decoder = self._decoder or self._get_decoder()

1693 if n < 0:

1694 # Read everything.

1695 result = (self._get_decoded_chars() +

1696 decoder.decode(self.buffer.read(), final=True))

1697 self._set_decoded_chars('')

1698 self._snapshot = None

1699 return result

1700 else:

1701 # Keep reading chunks until we have n characters to return.

1702 eof = False

1703 result = self._get_decoded_chars(n)

1704 while len(result) < n and not eof:

1705 eof = not self._read_chunk()

1706 result += self._get_decoded_chars(n - len(result))

1707 return result

1708

1709 def __next__(self):

1710 self._telling = False

1711 line = self.readline()

1712 if not line:

1713 self._snapshot = None

1714 self._telling = self._seekable

1715 raise StopIteration

1716 return line

1717

1718 def readline(self, limit=None):

1719 if self.closed:

1720 raise ValueError("read from closed file")

1721 if limit is None:

1722 limit = -1

1723

1724 # Grab all the decoded text (we will rewind any extra bits later).

1725 line = self._get_decoded_chars()

1726

1727 start = 0

1728 decoder = self._decoder or self._get_decoder()

1729

1730 pos = endpos = None

1731 while True:

1732 if self._readtranslate:

1733 # Newlines are already translated, only search for \n

1734 pos = line.find('\n', start)

1735 if pos >= 0:

1736 endpos = pos + 1

1737 break

1738 else:

1739 start = len(line)

1740

1741 elif self._readuniversal:

1742 # Universal newline search. Find any of \r, \r\n, \n

1743 # The decoder ensures that \r\n are not split in two pieces

1744

1745 # In C we'd look for these in parallel of course.

1746 nlpos = line.find("\n", start)

1747 crpos = line.find("\r", start)

1748 if crpos == -1:

1749 if nlpos == -1:

1750 # Nothing found

1751 start = len(line)

1752 else:

1753 # Found \n

1754 endpos = nlpos + 1

1755 break

1756 elif nlpos == -1:

1757 # Found lone \r

1758 endpos = crpos + 1

1759 break

1760 elif nlpos < crpos:

1761 # Found \n

1762 endpos = nlpos + 1

1763 break

1764 elif nlpos == crpos + 1:

1765 # Found \r\n

1766 endpos = crpos + 2

1767 break

1768 else:

1769 # Found \r

1770 endpos = crpos + 1

1771 break

1772 else:

1773 # non-universal

1774 pos = line.find(self._readnl)

1775 if pos >= 0:

1776 endpos = pos + len(self._readnl)

1777 break

1778

1779 if limit >= 0 and len(line) >= limit:

1780 endpos = limit # reached length limit

1781 break

1782

1783 # No line ending seen yet - get more data

1784 more_line = ''

1785 while self._read_chunk():

1786 if self._decoded_chars:

1787 break

1788 if self._decoded_chars:

1789 line += self._get_decoded_chars()

1790 else:

1791 # end of file

1792 self._set_decoded_chars('')

1793 self._snapshot = None

1794 return line

1795

1796 if limit >= 0 and endpos > limit:

1797 endpos = limit # don't exceed limit

1798

1799 # Rewind _decoded_chars to just after the line ending we found.

1800 self._rewind_decoded_chars(len(line) - endpos)

1801 return line[:endpos]

1802

1803 @property

1804 def newlines(self):

1805 return self._decoder.newlines if self._decoder else None

1806

1807

1808 class StringIO(TextIOWrapper):

1809 """Text I/O implementation using an in-memory buffer.

1810

1811 The initial_value argument sets the value of object. The newline

1812 argument is like the one of TextIOWrapper's constructor.

1813 """

1814

1815 # XXX This is really slow, but fully functional

1816

1817 def __init__(self, initial_value="", newline="\n"):

1818 super(StringIO, self).__init__(BytesIO(),

1819 encoding="utf-8",

1820 errors="strict",

1821 newline=newline)

1822 if initial_value:

1823 if not isinstance(initial_value, str):

1824 initial_value = str(initial_value)

1825 self.write(initial_value)

1826 self.seek(0)

1827

1828 def getvalue(self):

1829 self.flush()

1830 return self.buffer.getvalue().decode(self._encoding, self._errors)

OLD

NEW