cpython: a95d936ce8eb (original) (raw)
Mercurial > cpython
changeset 69334:a95d936ce8eb
#11684: Complete parser bytes interface by adding BytesHeaderParser Patch by Steffen Daode Nurpmeso. [#11684]
R David Murray rdmurray@bitdance.com | |
---|---|
date | Wed, 13 Apr 2011 16:46:05 -0400 |
parents | fafc84b45a9e |
children | 602d317d2257 |
files | Doc/library/email.parser.rst Lib/email/generator.py Lib/email/parser.py Lib/test/test_email/test_email.py Misc/NEWS |
diffstat | 5 files changed, 46 insertions(+), 8 deletions(-)[+] [-] Doc/library/email.parser.rst 14 Lib/email/generator.py 4 Lib/email/parser.py 10 Lib/test/test_email/test_email.py 24 Misc/NEWS 2 |
line wrap: on
line diff
--- a/Doc/library/email.parser.rst
+++ b/Doc/library/email.parser.rst
@@ -94,12 +94,14 @@ Parser class API
The :class:Parser
class, imported from the :mod:email.parser
module,
provides an API that can be used to parse a message when the complete contents
of the message are available in a string or file. The :mod:email.parser
-module also provides a second class, called :class:HeaderParser
which can be
-used if you're only interested in the headers of the message.
-:class:HeaderParser
can be much faster in these situations, since it does not
-attempt to parse the message body, instead setting the payload to the raw body
-as a string. :class:HeaderParser
has the same API as the :class:Parser
-class.
+module also provides header-only parsers, called :class:HeaderParser
and
+:class:BytesHeaderParser
, which can be used if you're only interested in the
+headers of the message. :class:HeaderParser
and :class:BytesHeaderParser
+can be much faster in these situations, since they do not attempt to parse the
+message body, instead setting the payload to the raw body as a string. They
+have the same API as the :class:Parser
and :class:BytesParser
classes.
+
+.. versionadded:: 3.3 BytesHeaderParser
.. class:: Parser(_class=email.message.Message)
--- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -297,10 +297,12 @@ class Generator: # message/rfc822. Such messages are generated by, for example, # Groupwise when forwarding unadorned messages. (Issue 7970.) So # in that case we just emit the string body.
payload = msg.get_payload()[](#l2.7)
payload = msg._payload[](#l2.8) if isinstance(payload, list):[](#l2.9) g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)[](#l2.10) payload = s.getvalue()[](#l2.11)
else:[](#l2.12)
payload = self._encode(payload)[](#l2.13) self._fp.write(payload)[](#l2.14)
# This used to be a module level function; we use a classmethod for this
--- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -4,7 +4,7 @@ """A parser of RFC 2822 and MIME email messages.""" -all = ['Parser', 'HeaderParser'] +all = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] import warnings from io import StringIO, TextIOWrapper @@ -114,3 +114,11 @@ class BytesParser: """ text = text.decode('ASCII', errors='surrogateescape') return self.parser.parsestr(text, headersonly) + + +class BytesHeaderParser(BytesParser):
- def parse(self, fp, headersonly=True):
return BytesParser.parse(self, fp, headersonly=True)[](#l3.20)
- def parsebytes(self, text, headersonly=True):
return BytesParser.parsebytes(self, text, headersonly=True)[](#l3.23)
--- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -177,6 +177,17 @@ class TestMessageAPI(TestEmailBase): gen.flatten(msg, False) self.assertEqual(out.getvalue(), msgdata)
- def test_byte_message_rfc822_only(self):
# Make sure new bytes header parser also passes this.[](#l4.8)
with openfile('msg_46.txt', 'rb') as fp:[](#l4.9)
msgdata = fp.read()[](#l4.10)
parser = email.parser.BytesHeaderParser()[](#l4.11)
msg = parser.parsebytes(msgdata)[](#l4.12)
out = BytesIO()[](#l4.13)
gen = email.generator.BytesGenerator(out)[](#l4.14)
gen.flatten(msg)[](#l4.15)
self.assertEqual(out.getvalue(), msgdata)[](#l4.16)
+ def test_get_decoded_payload(self): eq = self.assertEqual msg = self._msgobj('msg_10.txt') @@ -2749,6 +2760,7 @@ Do you like this message? class TestParsers(TestEmailBase): + def test_header_parser(self): eq = self.assertEqual # Parse only the headers of a complex multipart MIME document @@ -2760,6 +2772,18 @@ class TestParsers(TestEmailBase): self.assertFalse(msg.is_multipart()) self.assertTrue(isinstance(msg.get_payload(), str))
- def test_bytes_header_parser(self):
eq = self.assertEqual[](#l4.34)
# Parse only the headers of a complex multipart MIME document[](#l4.35)
with openfile('msg_02.txt', 'rb') as fp:[](#l4.36)
msg = email.parser.BytesHeaderParser().parse(fp)[](#l4.37)
eq(msg['from'], 'ppp-request@zzz.org')[](#l4.38)
eq(msg['to'], 'ppp@zzz.org')[](#l4.39)
eq(msg.get_content_type(), 'multipart/mixed')[](#l4.40)
self.assertFalse(msg.is_multipart())[](#l4.41)
self.assertTrue(isinstance(msg.get_payload(), str))[](#l4.42)
self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))[](#l4.43)
+ def test_whitespace_continuation(self): eq = self.assertEqual # This message contains a line after the Subject: header that has only
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -103,6 +103,8 @@ Core and Builtins Library ------- +- Issue #11684: complete email.parser bytes API by adding BytesHeaderParser. +