bpo-28080: Add support for the fallback encoding in ZIP files (GH-32007) · python/cpython@a25a985 (original) (raw)
`@@ -21,8 +21,10 @@
`
21
21
`from random import randint, random, randbytes
`
22
22
``
23
23
`from test.support import script_helper
`
24
``
`-
from test.support import (findfile, requires_zlib, requires_bz2,
`
25
``
`-
requires_lzma, captured_stdout, requires_subprocess)
`
``
24
`+
from test.support import (
`
``
25
`+
findfile, requires_zlib, requires_bz2, requires_lzma,
`
``
26
`+
captured_stdout, captured_stderr, requires_subprocess
`
``
27
`+
)
`
26
28
`from test.support.os_helper import (
`
27
29
`TESTFN, unlink, rmtree, temp_dir, temp_cwd, fd_count
`
28
30
`)
`
`@@ -3210,5 +3212,139 @@ def test_inheritance(self, alpharep):
`
3210
3212
`assert isinstance(file, cls)
`
3211
3213
``
3212
3214
``
``
3215
`+
class EncodedMetadataTests(unittest.TestCase):
`
``
3216
`+
file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'
`
``
3217
`+
file_content = [
`
``
3218
`+
"This is pure ASCII.\n".encode('ascii'),
`
``
3219
`+
This is modern Japanese. (UTF-8)
`
``
3220
`+
"\u3053\u308c\u306f\u73fe\u4ee3\u7684\u65e5\u672c\u8a9e\u3067\u3059\u3002\n".encode('utf-8'),
`
``
3221
`+
This is obsolete Japanese. (Shift JIS)
`
``
3222
`+
"\u3053\u308c\u306f\u53e4\u3044\u65e5\u672c\u8a9e\u3067\u3059\u3002\n".encode('shift_jis'),
`
``
3223
`+
]
`
``
3224
+
``
3225
`+
def setUp(self):
`
``
3226
`+
self.addCleanup(unlink, TESTFN)
`
``
3227
`+
Create .zip of 3 members with Han names encoded in Shift JIS.
`
``
3228
`+
Each name is 1 Han character encoding to 2 bytes in Shift JIS.
`
``
3229
`+
The ASCII names are arbitrary as long as they are length 2 and
`
``
3230
`+
not otherwise contained in the zip file.
`
``
3231
`+
Data elements are encoded bytes (ascii, utf-8, shift_jis).
`
``
3232
`+
placeholders = ["n1", "n2"] + self.file_names[2:]
`
``
3233
`+
with zipfile.ZipFile(TESTFN, mode="w") as tf:
`
``
3234
`+
for temp, content in zip(placeholders, self.file_content):
`
``
3235
`+
tf.writestr(temp, content, zipfile.ZIP_STORED)
`
``
3236
`+
Hack in the Shift JIS names with flag bit 11 (UTF-8) unset.
`
``
3237
`+
with open(TESTFN, "rb") as tf:
`
``
3238
`+
data = tf.read()
`
``
3239
`+
for name, temp in zip(self.file_names, placeholders[:2]):
`
``
3240
`+
data = data.replace(temp.encode('ascii'),
`
``
3241
`+
name.encode('shift_jis'))
`
``
3242
`+
with open(TESTFN, "wb") as tf:
`
``
3243
`+
tf.write(data)
`
``
3244
+
``
3245
`+
def _test_read(self, zipfp, expected_names, expected_content):
`
``
3246
`+
Check the namelist
`
``
3247
`+
names = zipfp.namelist()
`
``
3248
`+
self.assertEqual(sorted(names), sorted(expected_names))
`
``
3249
+
``
3250
`+
Check infolist
`
``
3251
`+
infos = zipfp.infolist()
`
``
3252
`+
names = [zi.filename for zi in infos]
`
``
3253
`+
self.assertEqual(sorted(names), sorted(expected_names))
`
``
3254
+
``
3255
`+
check getinfo
`
``
3256
`+
for name, content in zip(expected_names, expected_content):
`
``
3257
`+
info = zipfp.getinfo(name)
`
``
3258
`+
self.assertEqual(info.filename, name)
`
``
3259
`+
self.assertEqual(info.file_size, len(content))
`
``
3260
`+
self.assertEqual(zipfp.read(name), content)
`
``
3261
+
``
3262
`+
def test_read_with_metadata_encoding(self):
`
``
3263
`+
Read the ZIP archive with correct metadata_encoding
`
``
3264
`+
with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp:
`
``
3265
`+
self._test_read(zipfp, self.file_names, self.file_content)
`
``
3266
+
``
3267
`+
def test_read_without_metadata_encoding(self):
`
``
3268
`+
Read the ZIP archive without metadata_encoding
`
``
3269
`+
expected_names = [name.encode('shift_jis').decode('cp437')
`
``
3270
`+
for name in self.file_names[:2]] + self.file_names[2:]
`
``
3271
`+
with zipfile.ZipFile(TESTFN, "r") as zipfp:
`
``
3272
`+
self._test_read(zipfp, expected_names, self.file_content)
`
``
3273
+
``
3274
`+
def test_read_with_incorrect_metadata_encoding(self):
`
``
3275
`+
Read the ZIP archive with incorrect metadata_encoding
`
``
3276
`+
expected_names = [name.encode('shift_jis').decode('koi8-u')
`
``
3277
`+
for name in self.file_names[:2]] + self.file_names[2:]
`
``
3278
`+
with zipfile.ZipFile(TESTFN, "r", metadata_encoding='koi8-u') as zipfp:
`
``
3279
`+
self._test_read(zipfp, expected_names, self.file_content)
`
``
3280
+
``
3281
`+
def test_read_with_unsuitable_metadata_encoding(self):
`
``
3282
`+
Read the ZIP archive with metadata_encoding unsuitable for
`
``
3283
`+
decoding metadata
`
``
3284
`+
with self.assertRaises(UnicodeDecodeError):
`
``
3285
`+
zipfile.ZipFile(TESTFN, "r", metadata_encoding='ascii')
`
``
3286
`+
with self.assertRaises(UnicodeDecodeError):
`
``
3287
`+
zipfile.ZipFile(TESTFN, "r", metadata_encoding='utf-8')
`
``
3288
+
``
3289
`+
def test_read_after_append(self):
`
``
3290
`+
newname = '\u56db' # Han 'four'
`
``
3291
`+
expected_names = [name.encode('shift_jis').decode('cp437')
`
``
3292
`+
for name in self.file_names[:2]] + self.file_names[2:]
`
``
3293
`+
expected_names.append(newname)
`
``
3294
`+
expected_content = (*self.file_content, b"newcontent")
`
``
3295
+
``
3296
`+
with zipfile.ZipFile(TESTFN, "a") as zipfp:
`
``
3297
`+
zipfp.writestr(newname, "newcontent")
`
``
3298
`+
self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))
`
``
3299
+
``
3300
`+
with zipfile.ZipFile(TESTFN, "r") as zipfp:
`
``
3301
`+
self._test_read(zipfp, expected_names, expected_content)
`
``
3302
+
``
3303
`+
with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp:
`
``
3304
`+
self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))
`
``
3305
`+
for i, (name, content) in enumerate(zip(expected_names, expected_content)):
`
``
3306
`+
info = zipfp.getinfo(name)
`
``
3307
`+
self.assertEqual(info.filename, name)
`
``
3308
`+
self.assertEqual(info.file_size, len(content))
`
``
3309
`+
if i < 2:
`
``
3310
`+
with self.assertRaises(zipfile.BadZipFile):
`
``
3311
`+
zipfp.read(name)
`
``
3312
`+
else:
`
``
3313
`+
self.assertEqual(zipfp.read(name), content)
`
``
3314
+
``
3315
`+
def test_write_with_metadata_encoding(self):
`
``
3316
`+
ZF = zipfile.ZipFile
`
``
3317
`+
for mode in ("w", "x", "a"):
`
``
3318
`+
with self.assertRaisesRegex(ValueError,
`
``
3319
`+
"^metadata_encoding is only"):
`
``
3320
`+
ZF("nonesuch.zip", mode, metadata_encoding="shift_jis")
`
``
3321
+
``
3322
`+
def test_cli_with_metadata_encoding(self):
`
``
3323
`+
errmsg = "Non-conforming encodings not supported with -c."
`
``
3324
`+
args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"]
`
``
3325
`+
with captured_stdout() as stdout:
`
``
3326
`+
with captured_stderr() as stderr:
`
``
3327
`+
self.assertRaises(SystemExit, zipfile.main, args)
`
``
3328
`+
self.assertEqual(stdout.getvalue(), "")
`
``
3329
`+
self.assertIn(errmsg, stderr.getvalue())
`
``
3330
+
``
3331
`+
with captured_stdout() as stdout:
`
``
3332
`+
zipfile.main(["--metadata-encoding=shift_jis", "-t", TESTFN])
`
``
3333
`+
listing = stdout.getvalue()
`
``
3334
+
``
3335
`+
with captured_stdout() as stdout:
`
``
3336
`+
zipfile.main(["--metadata-encoding=shift_jis", "-l", TESTFN])
`
``
3337
`+
listing = stdout.getvalue()
`
``
3338
`+
for name in self.file_names:
`
``
3339
`+
self.assertIn(name, listing)
`
``
3340
+
``
3341
`+
os.mkdir(TESTFN2)
`
``
3342
`+
self.addCleanup(rmtree, TESTFN2)
`
``
3343
`+
zipfile.main(["--metadata-encoding=shift_jis", "-e", TESTFN, TESTFN2])
`
``
3344
`+
listing = os.listdir(TESTFN2)
`
``
3345
`+
for name in self.file_names:
`
``
3346
`+
self.assertIn(name, listing)
`
``
3347
+
``
3348
+
3213
3349
`if name == "main":
`
3214
3350
`unittest.main()
`