#15114: The html.parser module now raises a DeprecationWarning when t… · python/cpython@88ebfb1 (original) (raw)

4 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -74,7 +74,7 @@ as they are encountered::
74 74 def handle_data(self, data):
75 75 print("Encountered some data :", data)
76 76
77 - parser = MyHTMLParser(strict=False)
77 + parser = MyHTMLParser()
78 78 parser.feed('Test'
79 79 '

Parse me!

')
80 80
@@ -272,7 +272,7 @@ examples::
272 272 def handle_decl(self, data):
273 273 print("Decl :", data)
274 274
275 - parser = MyHTMLParser(strict=False)
275 + parser = MyHTMLParser()
276 276
277 277 Parsing a doctype::
278 278
Original file line number Diff line number Diff line change
@@ -94,6 +94,8 @@ def __str__(self):
94 94 return result
95 95
96 96
97 +_strict_sentinel = object()
98 +
97 99 class HTMLParser(_markupbase.ParserBase):
98 100 """Find tags and other markup and call handler functions.
99 101
@@ -116,16 +118,18 @@ class HTMLParser(_markupbase.ParserBase):
116 118
117 119 CDATA_CONTENT_ELEMENTS = ("script", "style")
118 120
119 -def __init__(self, strict=False):
121 +def __init__(self, strict=_strict_sentinel):
120 122 """Initialize and reset this instance.
121 123
122 124 If strict is set to False (the default) the parser will parse invalid
123 125 markup, otherwise it will raise an error. Note that the strict mode
124 - is deprecated.
126 + and argument are deprecated.
125 127 """
126 -if strict:
127 -warnings.warn("The strict mode is deprecated.",
128 +if strict is not _strict_sentinel:
129 +warnings.warn("The strict argument and mode are deprecated.",
128 130 DeprecationWarning, stacklevel=2)
131 +else:
132 +strict = False # default
129 133 self.strict = strict
130 134 self.reset()
131 135
@@ -151,6 +155,8 @@ def close(self):
151 155 self.goahead(1)
152 156
153 157 def error(self, message):
158 +warnings.warn("The 'error' method is deprecated.",
159 +DeprecationWarning, stacklevel=2)
154 160 raise HTMLParseError(message, self.getpos())
155 161
156 162 __starttag_text = None
Original file line number Diff line number Diff line change
@@ -96,7 +96,9 @@ def parse(source=source):
96 96 parser = self.get_collector()
97 97 parser.feed(source)
98 98 parser.close()
99 -self.assertRaises(html.parser.HTMLParseError, parse)
99 +with self.assertRaises(html.parser.HTMLParseError):
100 +with self.assertWarns(DeprecationWarning):
101 +parse()
100 102
101 103
102 104 class HTMLParserStrictTestCase(TestCaseBase):
@@ -360,7 +362,16 @@ def test_condcoms(self):
360 362 class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
361 363
362 364 def get_collector(self):
363 -return EventCollector(strict=False)
365 +return EventCollector()
366 +
367 +def test_deprecation_warnings(self):
368 +with self.assertWarns(DeprecationWarning):
369 +EventCollector(strict=True)
370 +with self.assertWarns(DeprecationWarning):
371 +EventCollector(strict=False)
372 +with self.assertRaises(html.parser.HTMLParseError):
373 +with self.assertWarns(DeprecationWarning):
374 +EventCollector().error('test')
364 375
365 376 def test_tolerant_parsing(self):
366 377 self._run_check('<html te>>xt&a<<bc\n'
@@ -676,7 +687,7 @@ def test_entityrefs_in_attributes(self):
676 687 class AttributesTolerantTestCase(AttributesStrictTestCase):
677 688
678 689 def get_collector(self):
679 -return EventCollector(strict=False)
690 +return EventCollector()
680 691
681 692 def test_attr_funky_names2(self):
682 693 self._run_check(
Original file line number Diff line number Diff line change
@@ -31,6 +31,9 @@ Core and Builtins
31 31 Library
32 32 -------
33 33
34 +- Issue #15114: The html.parser module now raises a DeprecationWarning when the
35 + strict argument of HTMLParser or the HTMLParser.error method are used.
36 +
34 37 - Issue #19410: Undo the special-casing removal of '' for
35 38 importlib.machinery.FileFinder.
36 39