cpython: 846c165cf643 (original) (raw)
--- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -16,10 +16,11 @@ from xml.sax.handler import feature_name from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from io import BytesIO, StringIO import codecs +import gc import os.path import shutil from test import support -from test.support import findfile, run_unittest +from test.support import findfile, run_unittest, TESTFN TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") @@ -95,6 +96,126 @@ class XmlTestBase(unittest.TestCase): self.assertEqual(attrs["attr"], "val") self.assertEqual(attrs.getQNameByName("attr"), "attr") + +def xml_str(doc, encoding=None):
+ +def xml_bytes(doc, encoding, decl_encoding=...):
- if decl_encoding is ...:
decl_encoding = encoding[](#l1.28)
- return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
+ +def make_xml_file(doc, encoding, decl_encoding=...):
- if decl_encoding is ...:
decl_encoding = encoding[](#l1.33)
- with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
f.write(xml_str(doc, decl_encoding))[](#l1.35)
+ + +class ParseTest(unittest.TestCase):
- def check_parse(self, f):
from xml.sax import parse[](#l1.45)
result = StringIO()[](#l1.46)
parse(f, XMLGenerator(result, 'utf-8'))[](#l1.47)
self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))[](#l1.48)
- def test_parse_text(self):
encodings = ('us-ascii', 'iso-8859-1', 'utf-8',[](#l1.51)
'utf-16', 'utf-16le', 'utf-16be')[](#l1.52)
for encoding in encodings:[](#l1.53)
self.check_parse(StringIO(xml_str(self.data, encoding)))[](#l1.54)
make_xml_file(self.data, encoding)[](#l1.55)
with open(TESTFN, 'r', encoding=encoding) as f:[](#l1.56)
self.check_parse(f)[](#l1.57)
self.check_parse(StringIO(self.data))[](#l1.58)
make_xml_file(self.data, encoding, None)[](#l1.59)
with open(TESTFN, 'r', encoding=encoding) as f:[](#l1.60)
self.check_parse(f)[](#l1.61)
- def test_parse_bytes(self):
# UTF-8 is default encoding, US-ASCII is compatible with UTF-8,[](#l1.64)
# UTF-16 is autodetected[](#l1.65)
encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')[](#l1.66)
for encoding in encodings:[](#l1.67)
self.check_parse(BytesIO(xml_bytes(self.data, encoding)))[](#l1.68)
make_xml_file(self.data, encoding)[](#l1.69)
self.check_parse(TESTFN)[](#l1.70)
with open(TESTFN, 'rb') as f:[](#l1.71)
self.check_parse(f)[](#l1.72)
self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))[](#l1.73)
make_xml_file(self.data, encoding, None)[](#l1.74)
self.check_parse(TESTFN)[](#l1.75)
with open(TESTFN, 'rb') as f:[](#l1.76)
self.check_parse(f)[](#l1.77)
# accept UTF-8 with BOM[](#l1.78)
self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))[](#l1.79)
make_xml_file(self.data, 'utf-8-sig', 'utf-8')[](#l1.80)
self.check_parse(TESTFN)[](#l1.81)
with open(TESTFN, 'rb') as f:[](#l1.82)
self.check_parse(f)[](#l1.83)
self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))[](#l1.84)
make_xml_file(self.data, 'utf-8-sig', None)[](#l1.85)
self.check_parse(TESTFN)[](#l1.86)
with open(TESTFN, 'rb') as f:[](#l1.87)
self.check_parse(f)[](#l1.88)
# accept data with declared encoding[](#l1.89)
self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))[](#l1.90)
make_xml_file(self.data, 'iso-8859-1')[](#l1.91)
self.check_parse(TESTFN)[](#l1.92)
with open(TESTFN, 'rb') as f:[](#l1.93)
self.check_parse(f)[](#l1.94)
# fail on non-UTF-8 incompatible data without declared encoding[](#l1.95)
with self.assertRaises(SAXException):[](#l1.96)
self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))[](#l1.97)
make_xml_file(self.data, 'iso-8859-1', None)[](#l1.98)
with support.check_warnings(('unclosed file', ResourceWarning)):[](#l1.99)
# XXX Failed parser leaks an opened file.[](#l1.100)
with self.assertRaises(SAXException):[](#l1.101)
self.check_parse(TESTFN)[](#l1.102)
# Collect leaked file.[](#l1.103)
gc.collect()[](#l1.104)
with open(TESTFN, 'rb') as f:[](#l1.105)
with self.assertRaises(SAXException):[](#l1.106)
self.check_parse(f)[](#l1.107)
- def test_parse_InputSource(self):
# accept data without declared but with explicitly specified encoding[](#l1.110)
make_xml_file(self.data, 'iso-8859-1', None)[](#l1.111)
with open(TESTFN, 'rb') as f:[](#l1.112)
input = InputSource()[](#l1.113)
input.setByteStream(f)[](#l1.114)
input.setEncoding('iso-8859-1')[](#l1.115)
self.check_parse(input)[](#l1.116)
- def check_parseString(self, s):
from xml.sax import parseString[](#l1.119)
result = StringIO()[](#l1.120)
parseString(s, XMLGenerator(result, 'utf-8'))[](#l1.121)
self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))[](#l1.122)
- def test_parseString_bytes(self):
# UTF-8 is default encoding, US-ASCII is compatible with UTF-8,[](#l1.125)
# UTF-16 is autodetected[](#l1.126)
encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')[](#l1.127)
for encoding in encodings:[](#l1.128)
self.check_parseString(xml_bytes(self.data, encoding))[](#l1.129)
self.check_parseString(xml_bytes(self.data, encoding, None))[](#l1.130)
# accept UTF-8 with BOM[](#l1.131)
self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))[](#l1.132)
self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))[](#l1.133)
# accept data with declared encoding[](#l1.134)
self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))[](#l1.135)
# fail on non-UTF-8 incompatible data without declared encoding[](#l1.136)
with self.assertRaises(SAXException):[](#l1.137)
self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))[](#l1.138)
+ class MakeParserTest(unittest.TestCase): def test_make_parser2(self): # Creating parsers several times in a row should succeed. @@ -1115,6 +1236,7 @@ class XmlReaderTest(XmlTestBase): def test_main(): run_unittest(MakeParserTest,
ParseTest,[](#l1.147) SaxutilsTest,[](#l1.148) PrepareInputSourceTest,[](#l1.149) StringXmlgenTest,[](#l1.150)