[Python-Dev] Decoding incomplete unicode (original) (raw)

Wed Jul 28 14:46:47 CEST 2004

pending = '' # incomplete 

def read(self, size=-1):
    while True:
        r = fp.read(size)
        if self.pending:
            r = self.pending + r
            self.pending = ''

        if r:
            try:
                outputbuffer = r.decode('utf-8')
            except MBERR_TOOFEW: # incomplete multibyte sequence
                pass
            except MBERR_ILLSEQ: # illegal sequence
                raise UnicodeDecodeError, "illseq"

        if not r or size == -1: # end of the stream
            if r have not consumed up for the output:
                raise UnicodeDecodeError, "toofew"

        if r have not consumed up for the output:
            self.pending = remainders of r

        if (size == -1 or               # one time read up
            len(outputbuffer) > 0 or    # output buffer isn't empty
            original length of r == 0): # the end of the stream
                break

        size = 1 # read 1 byte in next try

    return outputbuffer