bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) · python/cpython@e6d9fcb (original) (raw)

`@@ -34,6 +34,7 @@

`

34

34

`except UnicodeEncodeError:

`

35

35

`raise unittest.SkipTest("filename is not encodable to utf8")

`

36

36

`SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")

`

``

37

`+

UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")

`

37

38

``

38

39

`SAMPLE_XML = """\

`

39

40

`

`

`@@ -1739,6 +1740,37 @@ def eq(self, other):

`

1739

1740

`self.assertIsInstance(e[0].tag, str)

`

1740

1741

`self.assertEqual(e[0].tag, 'changed')

`

1741

1742

``

``

1743

`+

def check_expat224_utf8_bug(self, text):

`

``

1744

`+

xml = b'' % text

`

``

1745

`+

root = ET.XML(xml)

`

``

1746

`+

self.assertEqual(root.get('b'), text.decode('utf-8'))

`

``

1747

+

``

1748

`+

def test_expat224_utf8_bug(self):

`

``

1749

`+

bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.

`

``

1750

`+

Check that Expat 2.2.4 fixed the bug.

`

``

1751

`+

`

``

1752

`+

Test buffer bounds at odd and even positions.

`

``

1753

+

``

1754

`+

text = b'\xc3\xa0' * 1024

`

``

1755

`+

self.check_expat224_utf8_bug(text)

`

``

1756

+

``

1757

`+

text = b'x' + b'\xc3\xa0' * 1024

`

``

1758

`+

self.check_expat224_utf8_bug(text)

`

``

1759

+

``

1760

`+

def test_expat224_utf8_bug_file(self):

`

``

1761

`+

with open(UTF8_BUG_XMLFILE, 'rb') as fp:

`

``

1762

`+

raw = fp.read()

`

``

1763

`+

root = ET.fromstring(raw)

`

``

1764

`+

xmlattr = root.get('b')

`

``

1765

+

``

1766

`+

"Parse" manually the XML file to extract the value of the 'b'

`

``

1767

`+

attribute of the XML element

`

``

1768

`+

text = raw.decode('utf-8').strip()

`

``

1769

`+

text = text.replace('\r\n', ' ')

`

``

1770

`+

text = text[6:-4]

`

``

1771

`+

self.assertEqual(root.get('b'), text)

`

``

1772

+

``

1773

+

1742

1774

``

1743

1775

`# --------------------------------------------------------------------

`

1744

1776

``