[Python-checkins] python/dist/src/Lib/email Parser.py,1.21,1.22 (original) (raw)

Sat May 8 23:46:44 EDT 2004

def __init__(self, fp):
    self.fp = fp
    self.unread = []
def readline(self):
    """ Return a line of data.
    If data has been pushed back with unreadline(), the most recently
    returned unreadline()d data will be returned.
    """
    if self.unread:
        return self.unread.pop()
    else:
        return self.fp.readline()
def unreadline(self, line):
    """Push a line back into the object. 
    """
    self.unread.append(line)
def peekline(self):
    """Non-destructively look at the next line"""
    line = self.readline()
    self.unreadline(line)
    return line
def read(self):
    """Return the remaining data
    """
    r = self.fp.read()
    if self.unread:
        r = "\n".join(self.unread) + r
        self.unread = []
    return r
def readuntil(self, re, afterblank=0, includematch=0):
    """Read a line at a time until we get the specified RE. 
    Returns the text up to (and including, if includematch is true) the 
    matched text, and the RE match object. If afterblank is true, 
    there must be a blank line before the matched text. Moves current 
    filepointer to the line following the matched line. If we reach 
    end-of-file, return what we've got so far, and return None as the
    RE match object.
    """
    prematch = []
    blankseen = 0
    while 1:
        line = self.readline()
        if not line:
            # end of file
            return EMPTYSTRING.join(prematch), None
        if afterblank:
            if NLCRE.match(line):
                blankseen = 1
                continue
            else:
                blankseen = 0
        m = re.match(line)
        if (m and not afterblank) or (m and afterblank and blankseen):
            if includematch:
                prematch.append(line)
            return EMPTYSTRING.join(prematch), m
        prematch.append(line)
def _parsemessage(self, container, fp):
    # Parse the body. We walk through the body from top to bottom,
    # keeping track of the current multipart nesting as we go.
    # We return the object that gets the data at the end of this 
    # block.
    boundary = container.get_boundary()
    isdigest = (container.get_content_type() == 'multipart/digest')
    if boundary: 
        separator = '--' + boundary
        boundaryRE = re.compile(
                r'(?P<sep>' + re.escape(separator) + 
                r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)$')
        preamble, matchobj = fp.readuntil(boundaryRE)
        if not matchobj:
            # Broken - we hit the end of file. Just set the body 
            # to the text.
            container.set_payload(preamble)
            return container
        if preamble:
            container.preamble = preamble
        else:
            # The module docs specify an empty preamble is None, not ''
            container.preamble = None
        while 1:
            subobj = self._class()
            if isdigest:
                subobj.set_default_type('message/rfc822')
                firstline = fp.peekline()
                if firstline.strip():
                    # we have MIME headers. all good. 
                    self._parseheaders(subobj, fp)
                else:
                    # no MIME headers. this is allowed for multipart/digest
                    # Consume the extra blank line
                    fp.readline()
                    pass
            else:
                self._parseheaders(subobj, fp)
            container.attach(subobj)
            maintype = subobj.get_content_maintype()
            hassubparts = (subobj.get_content_maintype() in 
                                            ( "message", "multipart" ))
            if hassubparts:
                subobj = self._parsemessage(subobj, fp)
            trailer, matchobj = fp.readuntil(boundaryRE)
            if matchobj is None or trailer:
                mo = re.search('(?P<sep>\r\n|\r|\n){2}$', trailer)
                if not mo:
                    mo = re.search('(?P<sep>\r\n|\r|\n)$', trailer)
                    if not mo:
                        raise Errors.BoundaryError(
                      'No terminating boundary and no trailing empty line')
                linesep = mo.group('sep')
                trailer = trailer[:-len(linesep)]
            if trailer:
                self._attach_trailer(subobj, trailer)
            if matchobj is None or matchobj.group('end'):
                # That was the last piece of data. Let our caller attach
                # the epilogue to us. But before we do that, push the
                # line ending of the match group back into the readline
                # buffer, as it's part of the epilogue.
                if matchobj:
                    fp.unreadline(matchobj.group('linesep'))
                return container
    elif container.get_content_maintype() == "multipart":
        # Very bad.  A message is a multipart with no boundary!
        raise Errors.BoundaryError(
                'multipart message with no defined boundary')
    elif container.get_content_maintype() == "message":
        ct = container.get_content_type()
        if ct == "message/rfc822":
            submessage = self._class()
            self._parseheaders(submessage, fp)
            self._parsemessage(submessage, fp)
            container.attach(submessage)
            return submessage
        elif ct == "message/delivery-status":
            # This special kind of type contains blocks of headers 
            # separated by a blank line.  We'll represent each header 
            # block as a separate Message object
            while 1:
                nextblock = self._class()
                self._parseheaders(nextblock, fp)
                container.attach(nextblock)
                # next peek ahead to see whether we've hit the end or not
                nextline = fp.peekline()
                if nextline[:2] == "--":
                    break
            return container
        else:
            # Other sort of message object (e.g. external-body)
            msg = self._class()
            self._parsemessage(msg, fp)
            container.attach(msg)
            return msg
    else:
        # single body section. We let our caller set the payload.
        return container
def _attach_trailer(self, obj, trailer):
    if obj.get_content_maintype() in ("message", "multipart"):
        obj.epilogue = trailer
    else:
        obj.set_payload(trailer)

[Python-checkins] python/dist/src/Lib/email Parser.py,1.21,1.22 (original) (raw)

Index: Parser.py