(original) (raw)
import unittest import codecs from StringIO import StringIO # inspired by test code in # test_multibytecodec_support.py test_incrementalencoder() # 1st (hex-coded) utf8 is 2-byte, 2nd is 3-byte, unistring = u"ABC\u00A1\u2200XYZ" u8_bytes = "ABC\xC2\xA1\xE2\x88\x80XYZ" u8sig_bytes = codecs.BOM_UTF8 + u8_bytes #enc = "utf_8" # passes _bare, fails _bom enc = "utf_8_sig" def m(got, exp, msg): return "%s\n .....got:%r\n expected:%r" % (msg, got, exp) class Test_utf8sig(unittest.TestCase): def test_stream_bom(self): UTF8Reader = codecs.getreader(enc) for sizehint in [None] + range(1, 11) + \ [64, 128, 256, 512, 1024]: istream = UTF8Reader(StringIO(u8sig_bytes)) ostream = StringIO() while 1: if sizehint is not None: data = istream.read(sizehint) else: data = istream.read() if not data: break ostream.write(data) got = ostream.getvalue() self.assertEqual(got, unistring, m(got, unistring, "enc:%r stream read (with BOM-sig present)" % enc)) def test_stream_bare(self): UTF8Reader = codecs.getreader(enc) for sizehint in [None] + range(1, 11) + \ [64, 128, 256, 512, 1024]: istream = UTF8Reader(StringIO(u8_bytes)) ostream = StringIO() while 1: if sizehint is not None: data = istream.read(sizehint) else: data = istream.read() if not data: break ostream.write(data) got = ostream.getvalue() self.assertEqual(got, unistring, m(got, unistring, "enc %r: stream read (no BOM-sig present)" % enc)) if __name__ == "__main__": unittest.main() #===eof===