cpython: 8dd2f5754b2f (original) (raw)

Mercurial > cpython

changeset 77630:8dd2f5754b2f

#15114: the strict mode of HTMLParser and the HTMLParseError exception are deprecated now that the parser is able to parse invalid markup. [#15114]

Ezio Melotti ezio.melotti@gmail.com
date Sat, 23 Jun 2012 15:27:51 +0200
parents 9945d7dfa72c
children 0e8285321659
files Doc/library/html.parser.rst Lib/html/parser.py Lib/test/test_htmlparser.py Misc/NEWS
diffstat 4 files changed, 35 insertions(+), 18 deletions(-)[+] [-] Doc/library/html.parser.rst 21 Lib/html/parser.py 21 Lib/test/test_htmlparser.py 6 Misc/NEWS 5

line wrap: on

line diff

--- a/Doc/library/html.parser.rst +++ b/Doc/library/html.parser.rst @@ -16,13 +16,14 @@ This module defines a class :class:HTMLParser which serves as the basis for parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML. -.. class:: HTMLParser(strict=True) +.. class:: HTMLParser(strict=False)

+ An exception is defined as well: @@ -46,6 +51,10 @@ An exception is defined as well: detected, and :attr:offset is the number of characters into the line at which the construct starts.

+ Example HTML Parser Application -------------------------------

--- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -10,6 +10,7 @@ import _markupbase import re +import warnings

Regular expressions used for parsing

@@ -113,14 +114,16 @@ class HTMLParser(_markupbase.ParserBase) CDATA_CONTENT_ELEMENTS = ("script", "style")

@@ -271,8 +274,8 @@ class HTMLParser(_markupbase.ParserBase) # See also parse_declaration in _markupbase def parse_html_declaration(self, i): rawdata = self.rawdata

@@ -292,8 +295,8 @@ class HTMLParser(_markupbase.ParserBase) # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state[](#l2.45) def parse_bogus_comment(self, i, report=1): rawdata = self.rawdata

--- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -102,7 +102,8 @@ class TestCaseBase(unittest.TestCase): class HTMLParserStrictTestCase(TestCaseBase): def get_collector(self):

def test_processing_instruction_only(self): self._run_check("<?processing instruction>", [ @@ -594,7 +595,8 @@ class HTMLParserTolerantTestCase(HTMLPar class AttributesStrictTestCase(TestCaseBase): def get_collector(self):

def test_attr_syntax(self): output = [

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -43,6 +43,9 @@ Core and Builtins Library ------- +- Issue #15114: the strict mode of HTMLParser and the HTMLParseError exception