bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) (#3746) · python/cpython@ad051cb (original) (raw)
`@@ -33,6 +33,7 @@
`
33
33
`except UnicodeEncodeError:
`
34
34
`raise unittest.SkipTest("filename is not encodable to utf8")
`
35
35
`SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
`
``
36
`+
UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
`
36
37
``
37
38
`SAMPLE_XML = """\
`
38
39
`
`
`@@ -1724,6 +1725,37 @@ def eq(self, other):
`
1724
1725
`self.assertIsInstance(e[0].tag, str)
`
1725
1726
`self.assertEqual(e[0].tag, 'changed')
`
1726
1727
``
``
1728
`+
def check_expat224_utf8_bug(self, text):
`
``
1729
`+
xml = b'' % text
`
``
1730
`+
root = ET.XML(xml)
`
``
1731
`+
self.assertEqual(root.get('b'), text.decode('utf-8'))
`
``
1732
+
``
1733
`+
def test_expat224_utf8_bug(self):
`
``
1734
`+
bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
`
``
1735
`+
Check that Expat 2.2.4 fixed the bug.
`
``
1736
`+
`
``
1737
`+
Test buffer bounds at odd and even positions.
`
``
1738
+
``
1739
`+
text = b'\xc3\xa0' * 1024
`
``
1740
`+
self.check_expat224_utf8_bug(text)
`
``
1741
+
``
1742
`+
text = b'x' + b'\xc3\xa0' * 1024
`
``
1743
`+
self.check_expat224_utf8_bug(text)
`
``
1744
+
``
1745
`+
def test_expat224_utf8_bug_file(self):
`
``
1746
`+
with open(UTF8_BUG_XMLFILE, 'rb') as fp:
`
``
1747
`+
raw = fp.read()
`
``
1748
`+
root = ET.fromstring(raw)
`
``
1749
`+
xmlattr = root.get('b')
`
``
1750
+
``
1751
`+
"Parse" manually the XML file to extract the value of the 'b'
`
``
1752
`+
attribute of the XML element
`
``
1753
`+
text = raw.decode('utf-8').strip()
`
``
1754
`+
text = text.replace('\r\n', ' ')
`
``
1755
`+
text = text[6:-4]
`
``
1756
`+
self.assertEqual(root.get('b'), text)
`
``
1757
+
``
1758
+
1727
1759
``
1728
1760
`# --------------------------------------------------------------------
`
1729
1761
``