(original) (raw)

changeset: 69334:a95d936ce8eb user: R David Murray rdmurray@bitdance.com date: Wed Apr 13 16:46:05 2011 -0400 files: Doc/library/email.parser.rst Lib/email/generator.py Lib/email/parser.py Lib/test/test_email/test_email.py Misc/NEWS description: #11684: Complete parser bytes interface by adding BytesHeaderParser Patch by Steffen Daode Nurpmeso. diff -r fafc84b45a9e -r a95d936ce8eb Doc/library/email.parser.rst --- a/Doc/library/email.parser.rst Wed Apr 13 11:50:34 2011 -0700 +++ b/Doc/library/email.parser.rst Wed Apr 13 16:46:05 2011 -0400 @@ -94,12 +94,14 @@ The :class:`Parser` class, imported from the :mod:`email.parser` module, provides an API that can be used to parse a message when the complete contents of the message are available in a string or file. The :mod:`email.parser` -module also provides a second class, called :class:`HeaderParser` which can be -used if you're only interested in the headers of the message. -:class:`HeaderParser` can be much faster in these situations, since it does not -attempt to parse the message body, instead setting the payload to the raw body -as a string. :class:`HeaderParser` has the same API as the :class:`Parser` -class. +module also provides header-only parsers, called :class:`HeaderParser` and +:class:`BytesHeaderParser`, which can be used if you're only interested in the +headers of the message. :class:`HeaderParser` and :class:`BytesHeaderParser` +can be much faster in these situations, since they do not attempt to parse the +message body, instead setting the payload to the raw body as a string. They +have the same API as the :class:`Parser` and :class:`BytesParser` classes. + +.. versionadded:: 3.3 BytesHeaderParser .. class:: Parser(_class=email.message.Message) diff -r fafc84b45a9e -r a95d936ce8eb Lib/email/generator.py --- a/Lib/email/generator.py Wed Apr 13 11:50:34 2011 -0700 +++ b/Lib/email/generator.py Wed Apr 13 16:46:05 2011 -0400 @@ -297,10 +297,12 @@ # message/rfc822. Such messages are generated by, for example, # Groupwise when forwarding unadorned messages. (Issue 7970.) So # in that case we just emit the string body. - payload = msg.get_payload() + payload = msg._payload if isinstance(payload, list): g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) payload = s.getvalue() + else: + payload = self._encode(payload) self._fp.write(payload) # This used to be a module level function; we use a classmethod for this diff -r fafc84b45a9e -r a95d936ce8eb Lib/email/parser.py --- a/Lib/email/parser.py Wed Apr 13 11:50:34 2011 -0700 +++ b/Lib/email/parser.py Wed Apr 13 16:46:05 2011 -0400 @@ -4,7 +4,7 @@ """A parser of RFC 2822 and MIME email messages.""" -__all__ = ['Parser', 'HeaderParser'] +__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] import warnings from io import StringIO, TextIOWrapper @@ -114,3 +114,11 @@ """ text = text.decode('ASCII', errors='surrogateescape') return self.parser.parsestr(text, headersonly) + + +class BytesHeaderParser(BytesParser): + def parse(self, fp, headersonly=True): + return BytesParser.parse(self, fp, headersonly=True) + + def parsebytes(self, text, headersonly=True): + return BytesParser.parsebytes(self, text, headersonly=True) diff -r fafc84b45a9e -r a95d936ce8eb Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Wed Apr 13 11:50:34 2011 -0700 +++ b/Lib/test/test_email/test_email.py Wed Apr 13 16:46:05 2011 -0400 @@ -177,6 +177,17 @@ gen.flatten(msg, False) self.assertEqual(out.getvalue(), msgdata) + def test_byte_message_rfc822_only(self): + # Make sure new bytes header parser also passes this. + with openfile('msg_46.txt', 'rb') as fp: + msgdata = fp.read() + parser = email.parser.BytesHeaderParser() + msg = parser.parsebytes(msgdata) + out = BytesIO() + gen = email.generator.BytesGenerator(out) + gen.flatten(msg) + self.assertEqual(out.getvalue(), msgdata) + def test_get_decoded_payload(self): eq = self.assertEqual msg = self._msgobj('msg_10.txt') @@ -2749,6 +2760,7 @@ class TestParsers(TestEmailBase): + def test_header_parser(self): eq = self.assertEqual # Parse only the headers of a complex multipart MIME document @@ -2760,6 +2772,18 @@ self.assertFalse(msg.is_multipart()) self.assertTrue(isinstance(msg.get_payload(), str)) + def test_bytes_header_parser(self): + eq = self.assertEqual + # Parse only the headers of a complex multipart MIME document + with openfile('msg_02.txt', 'rb') as fp: + msg = email.parser.BytesHeaderParser().parse(fp) + eq(msg['from'], 'ppp-request@zzz.org') + eq(msg['to'], 'ppp@zzz.org') + eq(msg.get_content_type(), 'multipart/mixed') + self.assertFalse(msg.is_multipart()) + self.assertTrue(isinstance(msg.get_payload(), str)) + self.assertTrue(isinstance(msg.get_payload(decode=True), bytes)) + def test_whitespace_continuation(self): eq = self.assertEqual # This message contains a line after the Subject: header that has only diff -r fafc84b45a9e -r a95d936ce8eb Misc/NEWS --- a/Misc/NEWS Wed Apr 13 11:50:34 2011 -0700 +++ b/Misc/NEWS Wed Apr 13 16:46:05 2011 -0400 @@ -103,6 +103,8 @@ Library ------- +- Issue #11684: complete email.parser bytes API by adding BytesHeaderParser. + - The bz2 module now handles 4GiB+ input buffers correctly. - Issue #9233: Fix json.loads('{}') to return a dict (instead of a list), when /rdmurray@bitdance.com