bpo-34726: Fix handling of hash-based pycs in zipimport. (GH-10327) · python/cpython@a6e956b (original) (raw)

`@@ -578,33 +578,53 @@ def _eq_mtime(t1, t2):

`

578

578

`# dostime only stores even seconds, so be lenient

`

579

579

`return abs(t1 - t2) <= 1

`

580

580

``

``

581

+

581

582

`# Given the contents of a .py[co] file, unmarshal the data

`

582

583

`# and return the code object. Return None if it the magic word doesn't

`

583

``

`-

match (we do this instead of raising an exception as we fall back

`

``

584

`+

match, or if the recorded .py[co] metadata does not match the source,

`

``

585

`+

(we do this instead of raising an exception as we fall back

`

584

586

`# to .py if available and we don't want to mask other errors).

`

585

``

`-

def _unmarshal_code(pathname, data, mtime):

`

586

``

`-

if len(data) < 16:

`

587

``

`-

raise ZipImportError('bad pyc data')

`

588

``

-

589

``

`-

if data[:4] != _bootstrap_external.MAGIC_NUMBER:

`

590

``

`-

_bootstrap._verbose_message('{!r} has bad magic', pathname)

`

591

``

`-

return None # signal caller to try alternative

`

592

``

-

593

``

`-

flags = _unpack_uint32(data[4:8])

`

594

``

`-

if flags != 0:

`

595

``

`-

Hash-based pyc. We currently refuse to handle checked hash-based

`

596

``

`-

pycs. We could validate hash-based pycs against the source, but it

`

597

``

`-

seems likely that most people putting hash-based pycs in a zipfile

`

598

``

`-

will use unchecked ones.

`

``

587

`+

def _unmarshal_code(self, pathname, fullpath, fullname, data):

`

``

588

`+

exc_details = {

`

``

589

`+

'name': fullname,

`

``

590

`+

'path': fullpath,

`

``

591

`+

}

`

``

592

+

``

593

`+

try:

`

``

594

`+

flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)

`

``

595

`+

except ImportError:

`

``

596

`+

return None

`

``

597

+

``

598

`+

hash_based = flags & 0b1 != 0

`

``

599

`+

if hash_based:

`

``

600

`+

check_source = flags & 0b10 != 0

`

599

601

`if (_imp.check_hash_based_pycs != 'never' and

`

600

``

`-

(flags != 0x1 or _imp.check_hash_based_pycs == 'always')):

`

601

``

`-

return None

`

602

``

`-

elif mtime != 0 and not _eq_mtime(_unpack_uint32(data[8:12]), mtime):

`

603

``

`-

_bootstrap._verbose_message('{!r} has bad mtime', pathname)

`

604

``

`-

return None # signal caller to try alternative

`

``

602

`+

(check_source or _imp.check_hash_based_pycs == 'always')):

`

``

603

`+

source_bytes = _get_pyc_source(self, fullpath)

`

``

604

`+

if source_bytes is not None:

`

``

605

`+

source_hash = _imp.source_hash(

`

``

606

`+

_bootstrap_external._RAW_MAGIC_NUMBER,

`

``

607

`+

source_bytes,

`

``

608

`+

)

`

``

609

+

``

610

`+

try:

`

``

611

`+

_boostrap_external._validate_hash_pyc(

`

``

612

`+

data, source_hash, fullname, exc_details)

`

``

613

`+

except ImportError:

`

``

614

`+

return None

`

``

615

`+

else:

`

``

616

`+

source_mtime, source_size = \

`

``

617

`+

_get_mtime_and_size_of_source(self, fullpath)

`

``

618

+

``

619

`+

if source_mtime:

`

``

620

`+

We don't use _bootstrap_external._validate_timestamp_pyc

`

``

621

`+

to allow for a more lenient timestamp check.

`

``

622

`+

if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or

`

``

623

`+

_unpack_uint32(data[12:16]) != source_size):

`

``

624

`+

_bootstrap._verbose_message(

`

``

625

`+

f'bytecode is stale for {fullname!r}')

`

``

626

`+

return None

`

605

627

``

606

``

`-

XXX the pyc's size field is ignored; timestamp collisions are probably

`

607

``

`-

unimportant with zip files.

`

608

628

`code = marshal.loads(data[16:])

`

609

629

`if not isinstance(code, _code_type):

`

610

630

`raise TypeError(f'compiled module {pathname!r} is not a code object')

`

`@@ -639,9 +659,9 @@ def _parse_dostime(d, t):

`

639

659

`-1, -1, -1))

`

640

660

``

641

661

`# Given a path to a .pyc file in the archive, return the

`

642

``

`-

modification time of the matching .py file, or 0 if no source

`

643

``

`-

is available.

`

644

``

`-

def _get_mtime_of_source(self, path):

`

``

662

`+

modification time of the matching .py file and its size,

`

``

663

`+

or (0, 0) if no source is available.

`

``

664

`+

def _get_mtime_and_size_of_source(self, path):

`

645

665

`try:

`

646

666

`# strip 'c' or 'o' from *.py[co]

`

647

667

`assert path[-1:] in ('c', 'o')

`

`@@ -651,9 +671,27 @@ def _get_mtime_of_source(self, path):

`

651

671

`# with an embedded pyc time stamp

`

652

672

`time = toc_entry[5]

`

653

673

`date = toc_entry[6]

`

654

``

`-

return _parse_dostime(date, time)

`

``

674

`+

uncompressed_size = toc_entry[3]

`

``

675

`+

return _parse_dostime(date, time), uncompressed_size

`

655

676

`except (KeyError, IndexError, TypeError):

`

656

``

`-

return 0

`

``

677

`+

return 0, 0

`

``

678

+

``

679

+

``

680

`+

Given a path to a .pyc file in the archive, return the

`

``

681

`+

contents of the matching .py file, or None if no source

`

``

682

`+

is available.

`

``

683

`+

def _get_pyc_source(self, path):

`

``

684

`+

strip 'c' or 'o' from *.py[co]

`

``

685

`+

assert path[-1:] in ('c', 'o')

`

``

686

`+

path = path[:-1]

`

``

687

+

``

688

`+

try:

`

``

689

`+

toc_entry = self._files[path]

`

``

690

`+

except KeyError:

`

``

691

`+

return None

`

``

692

`+

else:

`

``

693

`+

return _get_data(self.archive, toc_entry)

`

``

694

+

657

695

``

658

696

`# Get the code object associated with the module specified by

`

659

697

`# 'fullname'.

`

`@@ -670,8 +708,7 @@ def _get_module_code(self, fullname):

`

670

708

`modpath = toc_entry[0]

`

671

709

`data = _get_data(self.archive, toc_entry)

`

672

710

`if isbytecode:

`

673

``

`-

mtime = _get_mtime_of_source(self, fullpath)

`

674

``

`-

code = _unmarshal_code(modpath, data, mtime)

`

``

711

`+

code = _unmarshal_code(self, modpath, fullpath, fullname, data)

`

675

712

`else:

`

676

713

`code = _compile_source(modpath, data)

`

677

714

`if code is None:

`