[3.4] bpo-32072: Fix issues with binary plists. (GH-4455) (#4658) · python/cpython@c59731d (original) (raw)

`@@ -590,6 +590,8 @@ def init(self, message="Invalid file"):

`

590

590

``

591

591

`_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}

`

592

592

``

``

593

`+

_undefined = object()

`

``

594

+

593

595

`class _BinaryPlistParser:

`

594

596

`"""

`

595

597

` Read or write a binary plist file, following the description of the binary

`

`@@ -620,7 +622,8 @@ def parse(self, fp):

`

620

622

` ) = struct.unpack('>6xBBQQQ', trailer)

`

621

623

`self._fp.seek(offset_table_offset)

`

622

624

`self._object_offsets = self._read_ints(num_objects, offset_size)

`

623

``

`-

return self._read_object(self._object_offsets[top_object])

`

``

625

`+

self._objects = [_undefined] * num_objects

`

``

626

`+

return self._read_object(top_object)

`

624

627

``

625

628

`except (OSError, IndexError, struct.error):

`

626

629

`raise InvalidFileException()

`

`@@ -646,71 +649,77 @@ def _read_ints(self, n, size):

`

646

649

`def _read_refs(self, n):

`

647

650

`return self._read_ints(n, self._ref_size)

`

648

651

``

649

``

`-

def _read_object(self, offset):

`

``

652

`+

def _read_object(self, ref):

`

650

653

`"""

`

651

``

`-

read the object at offset.

`

``

654

`+

read the object by reference.

`

652

655

``

653

656

` May recursively read sub-objects (content of an array/dict/set)

`

654

657

` """

`

``

658

`+

result = self._objects[ref]

`

``

659

`+

if result is not _undefined:

`

``

660

`+

return result

`

``

661

+

``

662

`+

offset = self._object_offsets[ref]

`

655

663

`self._fp.seek(offset)

`

656

664

`token = self._fp.read(1)[0]

`

657

665

`tokenH, tokenL = token & 0xF0, token & 0x0F

`

658

666

``

659

667

`if token == 0x00:

`

660

``

`-

return None

`

``

668

`+

result = None

`

661

669

``

662

670

`elif token == 0x08:

`

663

``

`-

return False

`

``

671

`+

result = False

`

664

672

``

665

673

`elif token == 0x09:

`

666

``

`-

return True

`

``

674

`+

result = True

`

667

675

``

668

676

`# The referenced source code also mentions URL (0x0c, 0x0d) and

`

669

677

`# UUID (0x0e), but neither can be generated using the Cocoa libraries.

`

670

678

``

671

679

`elif token == 0x0f:

`

672

``

`-

return b''

`

``

680

`+

result = b''

`

673

681

``

674

682

`elif tokenH == 0x10: # int

`

675

``

`-

return int.from_bytes(self._fp.read(1 << tokenL),

`

676

``

`-

'big', signed=tokenL >= 3)

`

``

683

`+

result = int.from_bytes(self._fp.read(1 << tokenL),

`

``

684

`+

'big', signed=tokenL >= 3)

`

677

685

``

678

686

`elif token == 0x22: # real

`

679

``

`-

return struct.unpack('>f', self._fp.read(4))[0]

`

``

687

`+

result = struct.unpack('>f', self._fp.read(4))[0]

`

680

688

``

681

689

`elif token == 0x23: # real

`

682

``

`-

return struct.unpack('>d', self._fp.read(8))[0]

`

``

690

`+

result = struct.unpack('>d', self._fp.read(8))[0]

`

683

691

``

684

692

`elif token == 0x33: # date

`

685

693

`f = struct.unpack('>d', self._fp.read(8))[0]

`

686

694

`# timestamp 0 of binary plists corresponds to 1/1/2001

`

687

695

`# (year of Mac OS X 10.0), instead of 1/1/1970.

`

688

``

`-

return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)

`

``

696

`+

result = datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)

`

689

697

``

690

698

`elif tokenH == 0x40: # data

`

691

699

`s = self._get_size(tokenL)

`

692

700

`if self._use_builtin_types:

`

693

``

`-

return self._fp.read(s)

`

``

701

`+

result = self._fp.read(s)

`

694

702

`else:

`

695

``

`-

return Data(self._fp.read(s))

`

``

703

`+

result = Data(self._fp.read(s))

`

696

704

``

697

705

`elif tokenH == 0x50: # ascii string

`

698

706

`s = self._get_size(tokenL)

`

699

707

`result = self._fp.read(s).decode('ascii')

`

700

``

`-

return result

`

``

708

`+

result = result

`

701

709

``

702

710

`elif tokenH == 0x60: # unicode string

`

703

711

`s = self._get_size(tokenL)

`

704

``

`-

return self._fp.read(s * 2).decode('utf-16be')

`

``

712

`+

result = self._fp.read(s * 2).decode('utf-16be')

`

705

713

``

706

714

`# tokenH == 0x80 is documented as 'UID' and appears to be used for

`

707

715

`# keyed-archiving, not in plists.

`

708

716

``

709

717

`elif tokenH == 0xA0: # array

`

710

718

`s = self._get_size(tokenL)

`

711

719

`obj_refs = self._read_refs(s)

`

712

``

`-

return [self._read_object(self._object_offsets[x])

`

713

``

`-

for x in obj_refs]

`

``

720

`+

result = []

`

``

721

`+

self._objects[ref] = result

`

``

722

`+

result.extend(self._read_object(x) for x in obj_refs)

`

714

723

``

715

724

`# tokenH == 0xB0 is documented as 'ordset', but is not actually

`

716

725

`# implemented in the Apple reference code.

`

`@@ -723,12 +732,15 @@ def _read_object(self, offset):

`

723

732

`key_refs = self._read_refs(s)

`

724

733

`obj_refs = self._read_refs(s)

`

725

734

`result = self._dict_type()

`

``

735

`+

self._objects[ref] = result

`

726

736

`for k, o in zip(key_refs, obj_refs):

`

727

``

`-

result[self._read_object(self._object_offsets[k])

`

728

``

`-

] = self._read_object(self._object_offsets[o])

`

729

``

`-

return result

`

``

737

`+

result[self._read_object(k)] = self._read_object(o)

`

730

738

``

731

``

`-

raise InvalidFileException()

`

``

739

`+

else:

`

``

740

`+

raise InvalidFileException()

`

``

741

+

``

742

`+

self._objects[ref] = result

`

``

743

`+

return result

`

732

744

``

733

745

`def _count_to_size(count):

`

734

746

`if count < 1 << 8:

`

`@@ -743,6 +755,8 @@ def _count_to_size(count):

`

743

755

`else:

`

744

756

`return 8

`

745

757

``

``

758

`+

_scalars = (str, int, float, datetime.datetime, bytes)

`

``

759

+

746

760

`class _BinaryPlistWriter (object):

`

747

761

`def init(self, fp, sort_keys, skipkeys):

`

748

762

`self._fp = fp

`

`@@ -798,24 +812,25 @@ def _flatten(self, value):

`

798

812

`# First check if the object is in the object table, not used for

`

799

813

`# containers to ensure that two subcontainers with the same contents

`

800

814

`# will be serialized as distinct values.

`

801

``

`-

if isinstance(value, (

`

802

``

`-

str, int, float, datetime.datetime, bytes, bytearray)):

`

``

815

`+

if isinstance(value, _scalars):

`

803

816

`if (type(value), value) in self._objtable:

`

804

817

`return

`

805

818

``

806

819

`elif isinstance(value, Data):

`

807

820

`if (type(value.data), value.data) in self._objtable:

`

808

821

`return

`

809

822

``

``

823

`+

elif id(value) in self._objidtable:

`

``

824

`+

return

`

``

825

+

810

826

`# Add to objectreference map

`

811

827

`refnum = len(self._objlist)

`

812

828

`self._objlist.append(value)

`

813

``

`-

try:

`

814

``

`-

if isinstance(value, Data):

`

815

``

`-

self._objtable[(type(value.data), value.data)] = refnum

`

816

``

`-

else:

`

817

``

`-

self._objtable[(type(value), value)] = refnum

`

818

``

`-

except TypeError:

`

``

829

`+

if isinstance(value, _scalars):

`

``

830

`+

self._objtable[(type(value), value)] = refnum

`

``

831

`+

elif isinstance(value, Data):

`

``

832

`+

self._objtable[(type(value.data), value.data)] = refnum

`

``

833

`+

else:

`

819

834

`self._objidtable[id(value)] = refnum

`

820

835

``

821

836

`# And finally recurse into containers

`

`@@ -842,12 +857,11 @@ def _flatten(self, value):

`

842

857

`self._flatten(o)

`

843

858

``

844

859

`def _getrefnum(self, value):

`

845

``

`-

try:

`

846

``

`-

if isinstance(value, Data):

`

847

``

`-

return self._objtable[(type(value.data), value.data)]

`

848

``

`-

else:

`

849

``

`-

return self._objtable[(type(value), value)]

`

850

``

`-

except TypeError:

`

``

860

`+

if isinstance(value, _scalars):

`

``

861

`+

return self._objtable[(type(value), value)]

`

``

862

`+

elif isinstance(value, Data):

`

``

863

`+

return self._objtable[(type(value.data), value.data)]

`

``

864

`+

else:

`

851

865

`return self._objidtable[id(value)]

`

852

866

``

853

867

`def _write_size(self, token, size):

`