[Python-checkins] python/dist/src/Lib/email FeedParser.py,1.1,1.2 (original) (raw)

Sat May 8 23:29:25 EDT 2004

          if lastheader:
              # XXX reconsider the joining of folded lines
    for line in self._input:
        if line is NeedMoreData:
            yield None # Need More Data
            continue
        self._input.unreadline(line)
        if not completing:
            headers = []
            # Now collect all headers.
            for line in self._input:
                if line is NeedMoreData:
                    yield None # Need More Data
                    continue
                if not self.headerRE.match(line):
                    self._parse_headers(headers)
                    # A message/rfc822 has no body and no internal 
                    # boundary.
                    if self._cur.get_content_maintype() == "message":
                        self._new_sub_object()
                        completing = False
                        headers = []
                        continue
                    if line.strip():
                        # No blank line between headers and body. 
                        # Push this line back, it's the first line of 
                        # the body.
                        self._input.unreadline(line)
                    break
                else:
                    headers.append(line)
            else:
                # We're done with the data and are still inside the headers
                self._parse_headers(headers)
        # Now we're dealing with the body
        boundary = self._cur.get_boundary()
        isdigest = (self._cur.get_content_type() == 'multipart/digest')
        if boundary and not self._cur._finishing:
            separator = '--' + boundary
            self._cur._boundaryRE = re.compile(
                    r'(?P<sep>' + re.escape(separator) +
                    r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)$')
            for r in self._input.readuntil(self._cur._boundaryRE):
                if r is NeedMoreData:
                     yield NeedMoreData
                else:
                    preamble, matchobj = r
                    break
            if not matchobj:
                # Broken - we hit the end of file. Just set the body 
                # to the text.
                if completing:
                    self._attach_trailer(last, preamble)
                else:
                    self._attach_preamble(self._cur, preamble)
                # XXX move back to the parent container.
                self._pop_container()
                completing = True
                continue
            if preamble:
                if completing:
                    preamble = preamble[:-len(matchobj.group('linesep'))]
                    self._attach_trailer(last, preamble)
                else:
                    self._attach_preamble(self._cur, preamble)
            elif not completing:
                # The module docs specify an empty preamble is None, not ''
                self._cur.preamble = None
                # If we _are_ completing, the last object gets no payload
            if matchobj.group('end'):
                # That was the end boundary tag. Bounce back to the
                # parent container
                last = self._pop_container()
                self._input.unreadline(matchobj.group('linesep'))
                completing = True
                continue
            # A number of MTAs produced by a nameless large company
            # we shall call "SicroMoft" produce repeated boundary 
            # lines.
            while True:
                line = self._input.peekline()
                if line is NeedMoreData:
                    yield None
                    continue
                if self._cur._boundaryRE.match(line):
                    self._input.readline()
                else:
                    break
            self._new_sub_object()
            completing = False
            if isdigest:
                self._cur.set_default_type('message/rfc822')
                continue
        else:
            # non-multipart or after end-boundary
            if last is not self._root:
                last = self._pop_container()
            if self._cur.get_content_maintype() == "message":
                # We double-pop to leave the RFC822 object
                self._pop_container()
                completing = True
            elif self._cur._boundaryRE and last <> self._root:
                completing = True
            else:
                # Non-multipart top level, or in the trailer of the 
                # top level multipart
                while not self._input.is_done():
                    yield None
                data = list(self._input)
                body = EMPTYSTRING.join(data)
                self._attach_trailer(last, body)
def _attach_trailer(self, obj, trailer):
    #import pdb ; pdb.set_trace()
    if obj.get_content_maintype() in ( "multipart", "message" ):
        obj.epilogue = trailer
    else:
        obj.set_payload(trailer)
def _attach_preamble(self, obj, trailer):
    if obj.get_content_maintype() in ( "multipart", "message" ):
        obj.preamble = trailer
    else:
        obj.set_payload(trailer)
def _new_sub_object(self):
    new = self._class()
    #print "pushing", self._objectstack, repr(new)
    if self._objectstack:
        self._objectstack[-1].attach(new)
    self._objectstack.append(new)
    new._boundaryRE = None
    new._finishing = False
    self._cur = new
def _pop_container(self):
    # Move the pointer to the container of the current object.
    # Returns the (old) current object
    #import pdb ; pdb.set_trace()
    #print "popping", self._objectstack
    last = self._objectstack.pop()
    if self._objectstack:
        self._cur = self._objectstack[-1]
    else:
        self._cur._finishing = True
    return last
def _pop_message(self):
    retval = self._msgstack.pop()
    if self._msgstack:
        self._cur = self._msgstack[-1]
    else:
        self._cur = None
    return retval

[Python-checkins] python/dist/src/Lib/email FeedParser.py,1.1,1.2 (original) (raw)

Index: FeedParser.py

- - def _parsegen(self): - # Parse any currently available text - self._new_sub_object() - self._root = self._cur - completing = False - last = None