@@ -30,6 +30,7 @@ |
|
|
30 |
30 |
|
31 |
31 |
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") |
32 |
32 |
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") |
|
33 |
+UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") |
33 |
34 |
|
34 |
35 |
SAMPLE_XML = """\ |
35 |
36 |
|
@@ -1494,6 +1495,36 @@ def test_issue10777(self): |
|
|
1494 |
1495 |
ET.register_namespace('test10777', 'http://myuri/') |
1495 |
1496 |
ET.register_namespace('test10777', 'http://myuri/') |
1496 |
1497 |
|
|
1498 |
+def check_expat224_utf8_bug(self, text): |
|
1499 |
+xml = b'' % text |
|
1500 |
+root = ET.XML(xml) |
|
1501 |
+self.assertEqual(root.get('b'), text.decode('utf-8')) |
|
1502 |
+ |
|
1503 |
+def test_expat224_utf8_bug(self): |
|
1504 |
+# bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. |
|
1505 |
+# Check that Expat 2.2.4 fixed the bug. |
|
1506 |
+# |
|
1507 |
+# Test buffer bounds at odd and even positions. |
|
1508 |
+ |
|
1509 |
+text = b'\xc3\xa0' * 1024 |
|
1510 |
+self.check_expat224_utf8_bug(text) |
|
1511 |
+ |
|
1512 |
+text = b'x' + b'\xc3\xa0' * 1024 |
|
1513 |
+self.check_expat224_utf8_bug(text) |
|
1514 |
+ |
|
1515 |
+def test_expat224_utf8_bug_file(self): |
|
1516 |
+with open(UTF8_BUG_XMLFILE, 'rb') as fp: |
|
1517 |
+raw = fp.read() |
|
1518 |
+root = ET.fromstring(raw) |
|
1519 |
+xmlattr = root.get('b') |
|
1520 |
+ |
|
1521 |
+# "Parse" manually the XML file to extract the value of the 'b' |
|
1522 |
+# attribute of the XML element |
|
1523 |
+text = raw.decode('utf-8').strip() |
|
1524 |
+text = text.replace('\r\n', ' ') |
|
1525 |
+text = text[6:-4] |
|
1526 |
+self.assertEqual(root.get('b'), text) |
|
1527 |
+ |
1497 |
1528 |
|
1498 |
1529 |
# -------------------------------------------------------------------- |
1499 |
1530 |
|