cpython: 3ac1b21fbb42 (original) (raw)
--- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -15,10 +15,11 @@ from xml.sax.handler import feature_name from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from cStringIO import StringIO import io +import gc import os.path import shutil import test.test_support as support -from test.test_support import findfile, run_unittest +from test.test_support import findfile, run_unittest, TESTFN import unittest TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") @@ -90,6 +91,111 @@ class XmlTestBase(unittest.TestCase): self.assertEqual(attrs["attr"], "val") self.assertEqual(attrs.getQNameByName("attr"), "attr") + +def xml_unicode(doc, encoding=None):
+ +def xml_bytes(doc, encoding, decl_encoding=Ellipsis):
- if decl_encoding is Ellipsis:
decl_encoding = encoding[](#l1.28)
- return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
+ +def make_xml_file(doc, encoding, decl_encoding=Ellipsis):
- if decl_encoding is Ellipsis:
decl_encoding = encoding[](#l1.33)
- with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
f.write(xml_unicode(doc, decl_encoding))[](#l1.35)
+ + +class ParseTest(unittest.TestCase):
- def check_parse(self, f):
from xml.sax import parse[](#l1.46)
result = StringIO()[](#l1.47)
parse(f, XMLGenerator(result, 'utf-8'))[](#l1.48)
self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8'))[](#l1.49)
- def test_parse_bytes(self):
# UTF-8 is default encoding, US-ASCII is compatible with UTF-8,[](#l1.52)
# UTF-16 is autodetected[](#l1.53)
encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')[](#l1.54)
for encoding in encodings:[](#l1.55)
self.check_parse(io.BytesIO(xml_bytes(self.data, encoding)))[](#l1.56)
make_xml_file(self.data, encoding)[](#l1.57)
self.check_parse(TESTFN)[](#l1.58)
with io.open(TESTFN, 'rb') as f:[](#l1.59)
self.check_parse(f)[](#l1.60)
self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None)))[](#l1.61)
make_xml_file(self.data, encoding, None)[](#l1.62)
self.check_parse(TESTFN)[](#l1.63)
with io.open(TESTFN, 'rb') as f:[](#l1.64)
self.check_parse(f)[](#l1.65)
# accept UTF-8 with BOM[](#l1.66)
self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))[](#l1.67)
make_xml_file(self.data, 'utf-8-sig', 'utf-8')[](#l1.68)
self.check_parse(TESTFN)[](#l1.69)
with io.open(TESTFN, 'rb') as f:[](#l1.70)
self.check_parse(f)[](#l1.71)
self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))[](#l1.72)
make_xml_file(self.data, 'utf-8-sig', None)[](#l1.73)
self.check_parse(TESTFN)[](#l1.74)
with io.open(TESTFN, 'rb') as f:[](#l1.75)
self.check_parse(f)[](#l1.76)
# accept data with declared encoding[](#l1.77)
self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1')))[](#l1.78)
make_xml_file(self.data, 'iso-8859-1')[](#l1.79)
self.check_parse(TESTFN)[](#l1.80)
with io.open(TESTFN, 'rb') as f:[](#l1.81)
self.check_parse(f)[](#l1.82)
# fail on non-UTF-8 incompatible data without declared encoding[](#l1.83)
with self.assertRaises(SAXException):[](#l1.84)
self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))[](#l1.85)
make_xml_file(self.data, 'iso-8859-1', None)[](#l1.86)
with self.assertRaises(SAXException):[](#l1.87)
self.check_parse(TESTFN)[](#l1.88)
with io.open(TESTFN, 'rb') as f:[](#l1.89)
with self.assertRaises(SAXException):[](#l1.90)
self.check_parse(f)[](#l1.91)
- def test_parse_InputSource(self):
# accept data without declared but with explicitly specified encoding[](#l1.94)
make_xml_file(self.data, 'iso-8859-1', None)[](#l1.95)
with io.open(TESTFN, 'rb') as f:[](#l1.96)
input = InputSource()[](#l1.97)
input.setByteStream(f)[](#l1.98)
input.setEncoding('iso-8859-1')[](#l1.99)
self.check_parse(input)[](#l1.100)
- def check_parseString(self, s):
from xml.sax import parseString[](#l1.103)
result = StringIO()[](#l1.104)
parseString(s, XMLGenerator(result, 'utf-8'))[](#l1.105)
self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8'))[](#l1.106)
- def test_parseString_bytes(self):
# UTF-8 is default encoding, US-ASCII is compatible with UTF-8,[](#l1.109)
# UTF-16 is autodetected[](#l1.110)
encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')[](#l1.111)
for encoding in encodings:[](#l1.112)
self.check_parseString(xml_bytes(self.data, encoding))[](#l1.113)
self.check_parseString(xml_bytes(self.data, encoding, None))[](#l1.114)
# accept UTF-8 with BOM[](#l1.115)
self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))[](#l1.116)
self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))[](#l1.117)
# accept data with declared encoding[](#l1.118)
self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))[](#l1.119)
# fail on non-UTF-8 incompatible data without declared encoding[](#l1.120)
with self.assertRaises(SAXException):[](#l1.121)
self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))[](#l1.122)
+ + class MakeParserTest(unittest.TestCase): def test_make_parser2(self): # Creating parsers several times in a row should succeed. @@ -949,6 +1055,7 @@ class XmlReaderTest(XmlTestBase): def test_main(): run_unittest(MakeParserTest,
ParseTest,[](#l1.132) SaxutilsTest,[](#l1.133) PrepareInputSourceTest,[](#l1.134) StringXmlgenTest,[](#l1.135)