[3.4] bpo-32072: Fix issues with binary plists. (GH-4455) (#4658) · python/cpython@c59731d (original) (raw)
`@@ -590,6 +590,8 @@ def init(self, message="Invalid file"):
`
590
590
``
591
591
`_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
`
592
592
``
``
593
`+
_undefined = object()
`
``
594
+
593
595
`class _BinaryPlistParser:
`
594
596
`"""
`
595
597
` Read or write a binary plist file, following the description of the binary
`
`@@ -620,7 +622,8 @@ def parse(self, fp):
`
620
622
` ) = struct.unpack('>6xBBQQQ', trailer)
`
621
623
`self._fp.seek(offset_table_offset)
`
622
624
`self._object_offsets = self._read_ints(num_objects, offset_size)
`
623
``
`-
return self._read_object(self._object_offsets[top_object])
`
``
625
`+
self._objects = [_undefined] * num_objects
`
``
626
`+
return self._read_object(top_object)
`
624
627
``
625
628
`except (OSError, IndexError, struct.error):
`
626
629
`raise InvalidFileException()
`
`@@ -646,71 +649,77 @@ def _read_ints(self, n, size):
`
646
649
`def _read_refs(self, n):
`
647
650
`return self._read_ints(n, self._ref_size)
`
648
651
``
649
``
`-
def _read_object(self, offset):
`
``
652
`+
def _read_object(self, ref):
`
650
653
`"""
`
651
``
`-
read the object at offset.
`
``
654
`+
read the object by reference.
`
652
655
``
653
656
` May recursively read sub-objects (content of an array/dict/set)
`
654
657
` """
`
``
658
`+
result = self._objects[ref]
`
``
659
`+
if result is not _undefined:
`
``
660
`+
return result
`
``
661
+
``
662
`+
offset = self._object_offsets[ref]
`
655
663
`self._fp.seek(offset)
`
656
664
`token = self._fp.read(1)[0]
`
657
665
`tokenH, tokenL = token & 0xF0, token & 0x0F
`
658
666
``
659
667
`if token == 0x00:
`
660
``
`-
return None
`
``
668
`+
result = None
`
661
669
``
662
670
`elif token == 0x08:
`
663
``
`-
return False
`
``
671
`+
result = False
`
664
672
``
665
673
`elif token == 0x09:
`
666
``
`-
return True
`
``
674
`+
result = True
`
667
675
``
668
676
`# The referenced source code also mentions URL (0x0c, 0x0d) and
`
669
677
`# UUID (0x0e), but neither can be generated using the Cocoa libraries.
`
670
678
``
671
679
`elif token == 0x0f:
`
672
``
`-
return b''
`
``
680
`+
result = b''
`
673
681
``
674
682
`elif tokenH == 0x10: # int
`
675
``
`-
return int.from_bytes(self._fp.read(1 << tokenL),
`
676
``
`-
'big', signed=tokenL >= 3)
`
``
683
`+
result = int.from_bytes(self._fp.read(1 << tokenL),
`
``
684
`+
'big', signed=tokenL >= 3)
`
677
685
``
678
686
`elif token == 0x22: # real
`
679
``
`-
return struct.unpack('>f', self._fp.read(4))[0]
`
``
687
`+
result = struct.unpack('>f', self._fp.read(4))[0]
`
680
688
``
681
689
`elif token == 0x23: # real
`
682
``
`-
return struct.unpack('>d', self._fp.read(8))[0]
`
``
690
`+
result = struct.unpack('>d', self._fp.read(8))[0]
`
683
691
``
684
692
`elif token == 0x33: # date
`
685
693
`f = struct.unpack('>d', self._fp.read(8))[0]
`
686
694
`# timestamp 0 of binary plists corresponds to 1/1/2001
`
687
695
`# (year of Mac OS X 10.0), instead of 1/1/1970.
`
688
``
`-
return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
`
``
696
`+
result = datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
`
689
697
``
690
698
`elif tokenH == 0x40: # data
`
691
699
`s = self._get_size(tokenL)
`
692
700
`if self._use_builtin_types:
`
693
``
`-
return self._fp.read(s)
`
``
701
`+
result = self._fp.read(s)
`
694
702
`else:
`
695
``
`-
return Data(self._fp.read(s))
`
``
703
`+
result = Data(self._fp.read(s))
`
696
704
``
697
705
`elif tokenH == 0x50: # ascii string
`
698
706
`s = self._get_size(tokenL)
`
699
707
`result = self._fp.read(s).decode('ascii')
`
700
``
`-
return result
`
``
708
`+
result = result
`
701
709
``
702
710
`elif tokenH == 0x60: # unicode string
`
703
711
`s = self._get_size(tokenL)
`
704
``
`-
return self._fp.read(s * 2).decode('utf-16be')
`
``
712
`+
result = self._fp.read(s * 2).decode('utf-16be')
`
705
713
``
706
714
`# tokenH == 0x80 is documented as 'UID' and appears to be used for
`
707
715
`# keyed-archiving, not in plists.
`
708
716
``
709
717
`elif tokenH == 0xA0: # array
`
710
718
`s = self._get_size(tokenL)
`
711
719
`obj_refs = self._read_refs(s)
`
712
``
`-
return [self._read_object(self._object_offsets[x])
`
713
``
`-
for x in obj_refs]
`
``
720
`+
result = []
`
``
721
`+
self._objects[ref] = result
`
``
722
`+
result.extend(self._read_object(x) for x in obj_refs)
`
714
723
``
715
724
`# tokenH == 0xB0 is documented as 'ordset', but is not actually
`
716
725
`# implemented in the Apple reference code.
`
`@@ -723,12 +732,15 @@ def _read_object(self, offset):
`
723
732
`key_refs = self._read_refs(s)
`
724
733
`obj_refs = self._read_refs(s)
`
725
734
`result = self._dict_type()
`
``
735
`+
self._objects[ref] = result
`
726
736
`for k, o in zip(key_refs, obj_refs):
`
727
``
`-
result[self._read_object(self._object_offsets[k])
`
728
``
`-
] = self._read_object(self._object_offsets[o])
`
729
``
`-
return result
`
``
737
`+
result[self._read_object(k)] = self._read_object(o)
`
730
738
``
731
``
`-
raise InvalidFileException()
`
``
739
`+
else:
`
``
740
`+
raise InvalidFileException()
`
``
741
+
``
742
`+
self._objects[ref] = result
`
``
743
`+
return result
`
732
744
``
733
745
`def _count_to_size(count):
`
734
746
`if count < 1 << 8:
`
`@@ -743,6 +755,8 @@ def _count_to_size(count):
`
743
755
`else:
`
744
756
`return 8
`
745
757
``
``
758
`+
_scalars = (str, int, float, datetime.datetime, bytes)
`
``
759
+
746
760
`class _BinaryPlistWriter (object):
`
747
761
`def init(self, fp, sort_keys, skipkeys):
`
748
762
`self._fp = fp
`
`@@ -798,24 +812,25 @@ def _flatten(self, value):
`
798
812
`# First check if the object is in the object table, not used for
`
799
813
`# containers to ensure that two subcontainers with the same contents
`
800
814
`# will be serialized as distinct values.
`
801
``
`-
if isinstance(value, (
`
802
``
`-
str, int, float, datetime.datetime, bytes, bytearray)):
`
``
815
`+
if isinstance(value, _scalars):
`
803
816
`if (type(value), value) in self._objtable:
`
804
817
`return
`
805
818
``
806
819
`elif isinstance(value, Data):
`
807
820
`if (type(value.data), value.data) in self._objtable:
`
808
821
`return
`
809
822
``
``
823
`+
elif id(value) in self._objidtable:
`
``
824
`+
return
`
``
825
+
810
826
`# Add to objectreference map
`
811
827
`refnum = len(self._objlist)
`
812
828
`self._objlist.append(value)
`
813
``
`-
try:
`
814
``
`-
if isinstance(value, Data):
`
815
``
`-
self._objtable[(type(value.data), value.data)] = refnum
`
816
``
`-
else:
`
817
``
`-
self._objtable[(type(value), value)] = refnum
`
818
``
`-
except TypeError:
`
``
829
`+
if isinstance(value, _scalars):
`
``
830
`+
self._objtable[(type(value), value)] = refnum
`
``
831
`+
elif isinstance(value, Data):
`
``
832
`+
self._objtable[(type(value.data), value.data)] = refnum
`
``
833
`+
else:
`
819
834
`self._objidtable[id(value)] = refnum
`
820
835
``
821
836
`# And finally recurse into containers
`
`@@ -842,12 +857,11 @@ def _flatten(self, value):
`
842
857
`self._flatten(o)
`
843
858
``
844
859
`def _getrefnum(self, value):
`
845
``
`-
try:
`
846
``
`-
if isinstance(value, Data):
`
847
``
`-
return self._objtable[(type(value.data), value.data)]
`
848
``
`-
else:
`
849
``
`-
return self._objtable[(type(value), value)]
`
850
``
`-
except TypeError:
`
``
860
`+
if isinstance(value, _scalars):
`
``
861
`+
return self._objtable[(type(value), value)]
`
``
862
`+
elif isinstance(value, Data):
`
``
863
`+
return self._objtable[(type(value.data), value.data)]
`
``
864
`+
else:
`
851
865
`return self._objidtable[id(value)]
`
852
866
``
853
867
`def _write_size(self, token, size):
`