bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) (#3746) · python/cpython@ad051cb (original) (raw)

`@@ -33,6 +33,7 @@

`

33

33

`except UnicodeEncodeError:

`

34

34

`raise unittest.SkipTest("filename is not encodable to utf8")

`

35

35

`SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")

`

``

36

`+

UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")

`

36

37

``

37

38

`SAMPLE_XML = """\

`

38

39

`

`

`@@ -1724,6 +1725,37 @@ def eq(self, other):

`

1724

1725

`self.assertIsInstance(e[0].tag, str)

`

1725

1726

`self.assertEqual(e[0].tag, 'changed')

`

1726

1727

``

``

1728

`+

def check_expat224_utf8_bug(self, text):

`

``

1729

`+

xml = b'' % text

`

``

1730

`+

root = ET.XML(xml)

`

``

1731

`+

self.assertEqual(root.get('b'), text.decode('utf-8'))

`

``

1732

+

``

1733

`+

def test_expat224_utf8_bug(self):

`

``

1734

`+

bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.

`

``

1735

`+

Check that Expat 2.2.4 fixed the bug.

`

``

1736

`+

`

``

1737

`+

Test buffer bounds at odd and even positions.

`

``

1738

+

``

1739

`+

text = b'\xc3\xa0' * 1024

`

``

1740

`+

self.check_expat224_utf8_bug(text)

`

``

1741

+

``

1742

`+

text = b'x' + b'\xc3\xa0' * 1024

`

``

1743

`+

self.check_expat224_utf8_bug(text)

`

``

1744

+

``

1745

`+

def test_expat224_utf8_bug_file(self):

`

``

1746

`+

with open(UTF8_BUG_XMLFILE, 'rb') as fp:

`

``

1747

`+

raw = fp.read()

`

``

1748

`+

root = ET.fromstring(raw)

`

``

1749

`+

xmlattr = root.get('b')

`

``

1750

+

``

1751

`+

"Parse" manually the XML file to extract the value of the 'b'

`

``

1752

`+

attribute of the XML element

`

``

1753

`+

text = raw.decode('utf-8').strip()

`

``

1754

`+

text = text.replace('\r\n', ' ')

`

``

1755

`+

text = text[6:-4]

`

``

1756

`+

self.assertEqual(root.get('b'), text)

`

``

1757

+

``

1758

+

1727

1759

``

1728

1760

`# --------------------------------------------------------------------

`

1729

1761

``