bpo-34726: Fix handling of hash-based pycs in zipimport. (GH-10327) · python/cpython@a6e956b (original) (raw)
`@@ -578,33 +578,53 @@ def _eq_mtime(t1, t2):
`
578
578
`# dostime only stores even seconds, so be lenient
`
579
579
`return abs(t1 - t2) <= 1
`
580
580
``
``
581
+
581
582
`# Given the contents of a .py[co] file, unmarshal the data
`
582
583
`# and return the code object. Return None if it the magic word doesn't
`
583
``
`-
match (we do this instead of raising an exception as we fall back
`
``
584
`+
match, or if the recorded .py[co] metadata does not match the source,
`
``
585
`+
(we do this instead of raising an exception as we fall back
`
584
586
`# to .py if available and we don't want to mask other errors).
`
585
``
`-
def _unmarshal_code(pathname, data, mtime):
`
586
``
`-
if len(data) < 16:
`
587
``
`-
raise ZipImportError('bad pyc data')
`
588
``
-
589
``
`-
if data[:4] != _bootstrap_external.MAGIC_NUMBER:
`
590
``
`-
_bootstrap._verbose_message('{!r} has bad magic', pathname)
`
591
``
`-
return None # signal caller to try alternative
`
592
``
-
593
``
`-
flags = _unpack_uint32(data[4:8])
`
594
``
`-
if flags != 0:
`
595
``
`-
Hash-based pyc. We currently refuse to handle checked hash-based
`
596
``
`-
pycs. We could validate hash-based pycs against the source, but it
`
597
``
`-
seems likely that most people putting hash-based pycs in a zipfile
`
598
``
`-
will use unchecked ones.
`
``
587
`+
def _unmarshal_code(self, pathname, fullpath, fullname, data):
`
``
588
`+
exc_details = {
`
``
589
`+
'name': fullname,
`
``
590
`+
'path': fullpath,
`
``
591
`+
}
`
``
592
+
``
593
`+
try:
`
``
594
`+
flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
`
``
595
`+
except ImportError:
`
``
596
`+
return None
`
``
597
+
``
598
`+
hash_based = flags & 0b1 != 0
`
``
599
`+
if hash_based:
`
``
600
`+
check_source = flags & 0b10 != 0
`
599
601
`if (_imp.check_hash_based_pycs != 'never' and
`
600
``
`-
(flags != 0x1 or _imp.check_hash_based_pycs == 'always')):
`
601
``
`-
return None
`
602
``
`-
elif mtime != 0 and not _eq_mtime(_unpack_uint32(data[8:12]), mtime):
`
603
``
`-
_bootstrap._verbose_message('{!r} has bad mtime', pathname)
`
604
``
`-
return None # signal caller to try alternative
`
``
602
`+
(check_source or _imp.check_hash_based_pycs == 'always')):
`
``
603
`+
source_bytes = _get_pyc_source(self, fullpath)
`
``
604
`+
if source_bytes is not None:
`
``
605
`+
source_hash = _imp.source_hash(
`
``
606
`+
_bootstrap_external._RAW_MAGIC_NUMBER,
`
``
607
`+
source_bytes,
`
``
608
`+
)
`
``
609
+
``
610
`+
try:
`
``
611
`+
_boostrap_external._validate_hash_pyc(
`
``
612
`+
data, source_hash, fullname, exc_details)
`
``
613
`+
except ImportError:
`
``
614
`+
return None
`
``
615
`+
else:
`
``
616
`+
source_mtime, source_size = \
`
``
617
`+
_get_mtime_and_size_of_source(self, fullpath)
`
``
618
+
``
619
`+
if source_mtime:
`
``
620
`+
We don't use _bootstrap_external._validate_timestamp_pyc
`
``
621
`+
to allow for a more lenient timestamp check.
`
``
622
`+
if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
`
``
623
`+
_unpack_uint32(data[12:16]) != source_size):
`
``
624
`+
_bootstrap._verbose_message(
`
``
625
`+
f'bytecode is stale for {fullname!r}')
`
``
626
`+
return None
`
605
627
``
606
``
`-
XXX the pyc's size field is ignored; timestamp collisions are probably
`
607
``
`-
unimportant with zip files.
`
608
628
`code = marshal.loads(data[16:])
`
609
629
`if not isinstance(code, _code_type):
`
610
630
`raise TypeError(f'compiled module {pathname!r} is not a code object')
`
`@@ -639,9 +659,9 @@ def _parse_dostime(d, t):
`
639
659
`-1, -1, -1))
`
640
660
``
641
661
`# Given a path to a .pyc file in the archive, return the
`
642
``
`-
modification time of the matching .py file, or 0 if no source
`
643
``
`-
is available.
`
644
``
`-
def _get_mtime_of_source(self, path):
`
``
662
`+
modification time of the matching .py file and its size,
`
``
663
`+
or (0, 0) if no source is available.
`
``
664
`+
def _get_mtime_and_size_of_source(self, path):
`
645
665
`try:
`
646
666
`# strip 'c' or 'o' from *.py[co]
`
647
667
`assert path[-1:] in ('c', 'o')
`
`@@ -651,9 +671,27 @@ def _get_mtime_of_source(self, path):
`
651
671
`# with an embedded pyc time stamp
`
652
672
`time = toc_entry[5]
`
653
673
`date = toc_entry[6]
`
654
``
`-
return _parse_dostime(date, time)
`
``
674
`+
uncompressed_size = toc_entry[3]
`
``
675
`+
return _parse_dostime(date, time), uncompressed_size
`
655
676
`except (KeyError, IndexError, TypeError):
`
656
``
`-
return 0
`
``
677
`+
return 0, 0
`
``
678
+
``
679
+
``
680
`+
Given a path to a .pyc file in the archive, return the
`
``
681
`+
contents of the matching .py file, or None if no source
`
``
682
`+
is available.
`
``
683
`+
def _get_pyc_source(self, path):
`
``
684
`+
strip 'c' or 'o' from *.py[co]
`
``
685
`+
assert path[-1:] in ('c', 'o')
`
``
686
`+
path = path[:-1]
`
``
687
+
``
688
`+
try:
`
``
689
`+
toc_entry = self._files[path]
`
``
690
`+
except KeyError:
`
``
691
`+
return None
`
``
692
`+
else:
`
``
693
`+
return _get_data(self.archive, toc_entry)
`
``
694
+
657
695
``
658
696
`# Get the code object associated with the module specified by
`
659
697
`# 'fullname'.
`
`@@ -670,8 +708,7 @@ def _get_module_code(self, fullname):
`
670
708
`modpath = toc_entry[0]
`
671
709
`data = _get_data(self.archive, toc_entry)
`
672
710
`if isbytecode:
`
673
``
`-
mtime = _get_mtime_of_source(self, fullpath)
`
674
``
`-
code = _unmarshal_code(modpath, data, mtime)
`
``
711
`+
code = _unmarshal_code(self, modpath, fullpath, fullname, data)
`
675
712
`else:
`
676
713
`code = _compile_source(modpath, data)
`
677
714
`if code is None:
`