bpo-28080: Add support for the fallback encoding in ZIP files (GH-32007) · python/cpython@a25a985 (original) (raw)

`@@ -21,8 +21,10 @@

`

21

21

`from random import randint, random, randbytes

`

22

22

``

23

23

`from test.support import script_helper

`

24

``

`-

from test.support import (findfile, requires_zlib, requires_bz2,

`

25

``

`-

requires_lzma, captured_stdout, requires_subprocess)

`

``

24

`+

from test.support import (

`

``

25

`+

findfile, requires_zlib, requires_bz2, requires_lzma,

`

``

26

`+

captured_stdout, captured_stderr, requires_subprocess

`

``

27

`+

)

`

26

28

`from test.support.os_helper import (

`

27

29

`TESTFN, unlink, rmtree, temp_dir, temp_cwd, fd_count

`

28

30

`)

`

`@@ -3210,5 +3212,139 @@ def test_inheritance(self, alpharep):

`

3210

3212

`assert isinstance(file, cls)

`

3211

3213

``

3212

3214

``

``

3215

`+

class EncodedMetadataTests(unittest.TestCase):

`

``

3216

`+

file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'

`

``

3217

`+

file_content = [

`

``

3218

`+

"This is pure ASCII.\n".encode('ascii'),

`

``

3219

`+

This is modern Japanese. (UTF-8)

`

``

3220

`+

"\u3053\u308c\u306f\u73fe\u4ee3\u7684\u65e5\u672c\u8a9e\u3067\u3059\u3002\n".encode('utf-8'),

`

``

3221

`+

This is obsolete Japanese. (Shift JIS)

`

``

3222

`+

"\u3053\u308c\u306f\u53e4\u3044\u65e5\u672c\u8a9e\u3067\u3059\u3002\n".encode('shift_jis'),

`

``

3223

`+

]

`

``

3224

+

``

3225

`+

def setUp(self):

`

``

3226

`+

self.addCleanup(unlink, TESTFN)

`

``

3227

`+

Create .zip of 3 members with Han names encoded in Shift JIS.

`

``

3228

`+

Each name is 1 Han character encoding to 2 bytes in Shift JIS.

`

``

3229

`+

The ASCII names are arbitrary as long as they are length 2 and

`

``

3230

`+

not otherwise contained in the zip file.

`

``

3231

`+

Data elements are encoded bytes (ascii, utf-8, shift_jis).

`

``

3232

`+

placeholders = ["n1", "n2"] + self.file_names[2:]

`

``

3233

`+

with zipfile.ZipFile(TESTFN, mode="w") as tf:

`

``

3234

`+

for temp, content in zip(placeholders, self.file_content):

`

``

3235

`+

tf.writestr(temp, content, zipfile.ZIP_STORED)

`

``

3236

`+

Hack in the Shift JIS names with flag bit 11 (UTF-8) unset.

`

``

3237

`+

with open(TESTFN, "rb") as tf:

`

``

3238

`+

data = tf.read()

`

``

3239

`+

for name, temp in zip(self.file_names, placeholders[:2]):

`

``

3240

`+

data = data.replace(temp.encode('ascii'),

`

``

3241

`+

name.encode('shift_jis'))

`

``

3242

`+

with open(TESTFN, "wb") as tf:

`

``

3243

`+

tf.write(data)

`

``

3244

+

``

3245

`+

def _test_read(self, zipfp, expected_names, expected_content):

`

``

3246

`+

Check the namelist

`

``

3247

`+

names = zipfp.namelist()

`

``

3248

`+

self.assertEqual(sorted(names), sorted(expected_names))

`

``

3249

+

``

3250

`+

Check infolist

`

``

3251

`+

infos = zipfp.infolist()

`

``

3252

`+

names = [zi.filename for zi in infos]

`

``

3253

`+

self.assertEqual(sorted(names), sorted(expected_names))

`

``

3254

+

``

3255

`+

check getinfo

`

``

3256

`+

for name, content in zip(expected_names, expected_content):

`

``

3257

`+

info = zipfp.getinfo(name)

`

``

3258

`+

self.assertEqual(info.filename, name)

`

``

3259

`+

self.assertEqual(info.file_size, len(content))

`

``

3260

`+

self.assertEqual(zipfp.read(name), content)

`

``

3261

+

``

3262

`+

def test_read_with_metadata_encoding(self):

`

``

3263

`+

Read the ZIP archive with correct metadata_encoding

`

``

3264

`+

with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp:

`

``

3265

`+

self._test_read(zipfp, self.file_names, self.file_content)

`

``

3266

+

``

3267

`+

def test_read_without_metadata_encoding(self):

`

``

3268

`+

Read the ZIP archive without metadata_encoding

`

``

3269

`+

expected_names = [name.encode('shift_jis').decode('cp437')

`

``

3270

`+

for name in self.file_names[:2]] + self.file_names[2:]

`

``

3271

`+

with zipfile.ZipFile(TESTFN, "r") as zipfp:

`

``

3272

`+

self._test_read(zipfp, expected_names, self.file_content)

`

``

3273

+

``

3274

`+

def test_read_with_incorrect_metadata_encoding(self):

`

``

3275

`+

Read the ZIP archive with incorrect metadata_encoding

`

``

3276

`+

expected_names = [name.encode('shift_jis').decode('koi8-u')

`

``

3277

`+

for name in self.file_names[:2]] + self.file_names[2:]

`

``

3278

`+

with zipfile.ZipFile(TESTFN, "r", metadata_encoding='koi8-u') as zipfp:

`

``

3279

`+

self._test_read(zipfp, expected_names, self.file_content)

`

``

3280

+

``

3281

`+

def test_read_with_unsuitable_metadata_encoding(self):

`

``

3282

`+

Read the ZIP archive with metadata_encoding unsuitable for

`

``

3283

`+

decoding metadata

`

``

3284

`+

with self.assertRaises(UnicodeDecodeError):

`

``

3285

`+

zipfile.ZipFile(TESTFN, "r", metadata_encoding='ascii')

`

``

3286

`+

with self.assertRaises(UnicodeDecodeError):

`

``

3287

`+

zipfile.ZipFile(TESTFN, "r", metadata_encoding='utf-8')

`

``

3288

+

``

3289

`+

def test_read_after_append(self):

`

``

3290

`+

newname = '\u56db' # Han 'four'

`

``

3291

`+

expected_names = [name.encode('shift_jis').decode('cp437')

`

``

3292

`+

for name in self.file_names[:2]] + self.file_names[2:]

`

``

3293

`+

expected_names.append(newname)

`

``

3294

`+

expected_content = (*self.file_content, b"newcontent")

`

``

3295

+

``

3296

`+

with zipfile.ZipFile(TESTFN, "a") as zipfp:

`

``

3297

`+

zipfp.writestr(newname, "newcontent")

`

``

3298

`+

self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))

`

``

3299

+

``

3300

`+

with zipfile.ZipFile(TESTFN, "r") as zipfp:

`

``

3301

`+

self._test_read(zipfp, expected_names, expected_content)

`

``

3302

+

``

3303

`+

with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp:

`

``

3304

`+

self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))

`

``

3305

`+

for i, (name, content) in enumerate(zip(expected_names, expected_content)):

`

``

3306

`+

info = zipfp.getinfo(name)

`

``

3307

`+

self.assertEqual(info.filename, name)

`

``

3308

`+

self.assertEqual(info.file_size, len(content))

`

``

3309

`+

if i < 2:

`

``

3310

`+

with self.assertRaises(zipfile.BadZipFile):

`

``

3311

`+

zipfp.read(name)

`

``

3312

`+

else:

`

``

3313

`+

self.assertEqual(zipfp.read(name), content)

`

``

3314

+

``

3315

`+

def test_write_with_metadata_encoding(self):

`

``

3316

`+

ZF = zipfile.ZipFile

`

``

3317

`+

for mode in ("w", "x", "a"):

`

``

3318

`+

with self.assertRaisesRegex(ValueError,

`

``

3319

`+

"^metadata_encoding is only"):

`

``

3320

`+

ZF("nonesuch.zip", mode, metadata_encoding="shift_jis")

`

``

3321

+

``

3322

`+

def test_cli_with_metadata_encoding(self):

`

``

3323

`+

errmsg = "Non-conforming encodings not supported with -c."

`

``

3324

`+

args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"]

`

``

3325

`+

with captured_stdout() as stdout:

`

``

3326

`+

with captured_stderr() as stderr:

`

``

3327

`+

self.assertRaises(SystemExit, zipfile.main, args)

`

``

3328

`+

self.assertEqual(stdout.getvalue(), "")

`

``

3329

`+

self.assertIn(errmsg, stderr.getvalue())

`

``

3330

+

``

3331

`+

with captured_stdout() as stdout:

`

``

3332

`+

zipfile.main(["--metadata-encoding=shift_jis", "-t", TESTFN])

`

``

3333

`+

listing = stdout.getvalue()

`

``

3334

+

``

3335

`+

with captured_stdout() as stdout:

`

``

3336

`+

zipfile.main(["--metadata-encoding=shift_jis", "-l", TESTFN])

`

``

3337

`+

listing = stdout.getvalue()

`

``

3338

`+

for name in self.file_names:

`

``

3339

`+

self.assertIn(name, listing)

`

``

3340

+

``

3341

`+

os.mkdir(TESTFN2)

`

``

3342

`+

self.addCleanup(rmtree, TESTFN2)

`

``

3343

`+

zipfile.main(["--metadata-encoding=shift_jis", "-e", TESTFN, TESTFN2])

`

``

3344

`+

listing = os.listdir(TESTFN2)

`

``

3345

`+

for name in self.file_names:

`

``

3346

`+

self.assertIn(name, listing)

`

``

3347

+

``

3348

+

3213

3349

`if name == "main":

`

3214

3350

`unittest.main()

`