bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) · python/cpython@e6d9fcb (original) (raw)
`@@ -34,6 +34,7 @@
`
34
34
`except UnicodeEncodeError:
`
35
35
`raise unittest.SkipTest("filename is not encodable to utf8")
`
36
36
`SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
`
``
37
`+
UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
`
37
38
``
38
39
`SAMPLE_XML = """\
`
39
40
`
`
`@@ -1739,6 +1740,37 @@ def eq(self, other):
`
1739
1740
`self.assertIsInstance(e[0].tag, str)
`
1740
1741
`self.assertEqual(e[0].tag, 'changed')
`
1741
1742
``
``
1743
`+
def check_expat224_utf8_bug(self, text):
`
``
1744
`+
xml = b'' % text
`
``
1745
`+
root = ET.XML(xml)
`
``
1746
`+
self.assertEqual(root.get('b'), text.decode('utf-8'))
`
``
1747
+
``
1748
`+
def test_expat224_utf8_bug(self):
`
``
1749
`+
bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
`
``
1750
`+
Check that Expat 2.2.4 fixed the bug.
`
``
1751
`+
`
``
1752
`+
Test buffer bounds at odd and even positions.
`
``
1753
+
``
1754
`+
text = b'\xc3\xa0' * 1024
`
``
1755
`+
self.check_expat224_utf8_bug(text)
`
``
1756
+
``
1757
`+
text = b'x' + b'\xc3\xa0' * 1024
`
``
1758
`+
self.check_expat224_utf8_bug(text)
`
``
1759
+
``
1760
`+
def test_expat224_utf8_bug_file(self):
`
``
1761
`+
with open(UTF8_BUG_XMLFILE, 'rb') as fp:
`
``
1762
`+
raw = fp.read()
`
``
1763
`+
root = ET.fromstring(raw)
`
``
1764
`+
xmlattr = root.get('b')
`
``
1765
+
``
1766
`+
"Parse" manually the XML file to extract the value of the 'b'
`
``
1767
`+
attribute of the XML element
`
``
1768
`+
text = raw.decode('utf-8').strip()
`
``
1769
`+
text = text.replace('\r\n', ' ')
`
``
1770
`+
text = text[6:-4]
`
``
1771
`+
self.assertEqual(root.get('b'), text)
`
``
1772
+
``
1773
+
1742
1774
``
1743
1775
`# --------------------------------------------------------------------
`
1744
1776
``