[3.4] bpo-32072: Fix issues with binary plists. (GH-4455) (#4658) · python/cpython@c59731d (original) (raw)

`@@ -590,6 +590,8 @@ def init(self, message="Invalid file"):

590

591

`_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}

592

593

_undefined = object()

594

+

593

595

`class _BinaryPlistParser:

594

596

`"""

595

597

` Read or write a binary plist file, following the description of the binary

`@@ -620,7 +622,8 @@ def parse(self, fp):

620

622

` ) = struct.unpack('>6xBBQQQ', trailer)

621

623

`self._fp.seek(offset_table_offset)

622

624

`self._object_offsets = self._read_ints(num_objects, offset_size)

623

return self._read_object(self._object_offsets[top_object])

625

self._objects = [_undefined] * num_objects

626

return self._read_object(top_object)

624

627

625

628

`except (OSError, IndexError, struct.error):

626

629

`raise InvalidFileException()

`@@ -646,71 +649,77 @@ def _read_ints(self, n, size):

646

649

`def _read_refs(self, n):

647

650

`return self._read_ints(n, self._ref_size)

648

651

649

def _read_object(self, offset):

652

def _read_object(self, ref):

650

653

`"""

651

read the object at offset.

654

read the object by reference.

652

655

653

656

` May recursively read sub-objects (content of an array/dict/set)

654

657

` """

658

result = self._objects[ref]

659

if result is not _undefined:

660

return result

661

+

662

offset = self._object_offsets[ref]

655

663

`self._fp.seek(offset)

656

664

`token = self._fp.read(1)[0]

657

665

`tokenH, tokenL = token & 0xF0, token & 0x0F

658

666

659

667

`if token == 0x00:

660

return None

668

result = None

661

669

662

670

`elif token == 0x08:

663

return False

671

result = False

664

672

665

673

`elif token == 0x09:

666

return True

674

result = True

667

675

668

676

`# The referenced source code also mentions URL (0x0c, 0x0d) and

669

677

`# UUID (0x0e), but neither can be generated using the Cocoa libraries.

670

678

671

679

`elif token == 0x0f:

672

return b''

680

result = b''

673

681

674

682

`elif tokenH == 0x10: # int

675

return int.from_bytes(self._fp.read(1 << tokenL),

676

'big', signed=tokenL >= 3)

683

result = int.from_bytes(self._fp.read(1 << tokenL),

684

'big', signed=tokenL >= 3)

677

685

678

686

`elif token == 0x22: # real

679

return struct.unpack('>f', self._fp.read(4))[0]

687

result = struct.unpack('>f', self._fp.read(4))[0]

680

688

681

689

`elif token == 0x23: # real

682

return struct.unpack('>d', self._fp.read(8))[0]

690

result = struct.unpack('>d', self._fp.read(8))[0]

683

691

684

692

`elif token == 0x33: # date

685

693

`f = struct.unpack('>d', self._fp.read(8))[0]

686

694

`# timestamp 0 of binary plists corresponds to 1/1/2001

687

695

`# (year of Mac OS X 10.0), instead of 1/1/1970.

688

return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)

696

result = datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)

689

697

690

698

`elif tokenH == 0x40: # data

691

699

`s = self._get_size(tokenL)

692

700

`if self._use_builtin_types:

693

return self._fp.read(s)

701

result = self._fp.read(s)

694

702

`else:

695

return Data(self._fp.read(s))

703

result = Data(self._fp.read(s))

696

704

697

705

`elif tokenH == 0x50: # ascii string

698

706

`s = self._get_size(tokenL)

699

707

`result = self._fp.read(s).decode('ascii')

700

return result

708

result = result

701

709

702

710

`elif tokenH == 0x60: # unicode string

703

711

`s = self._get_size(tokenL)

704

return self._fp.read(s * 2).decode('utf-16be')

712

result = self._fp.read(s * 2).decode('utf-16be')

705

713

706

714

`# tokenH == 0x80 is documented as 'UID' and appears to be used for

707

715

`# keyed-archiving, not in plists.

708

716

709

717

`elif tokenH == 0xA0: # array

710

718

`s = self._get_size(tokenL)

711

719

`obj_refs = self._read_refs(s)

712

return [self._read_object(self._object_offsets[x])

713

for x in obj_refs]

720

result = []

721

self._objects[ref] = result

722

result.extend(self._read_object(x) for x in obj_refs)

714

723

715

724

`# tokenH == 0xB0 is documented as 'ordset', but is not actually

716

725

`# implemented in the Apple reference code.

`@@ -723,12 +732,15 @@ def _read_object(self, offset):

723

732

`key_refs = self._read_refs(s)

724

733

`obj_refs = self._read_refs(s)

725

734

`result = self._dict_type()

735

self._objects[ref] = result

726

736

`for k, o in zip(key_refs, obj_refs):

727

result[self._read_object(self._object_offsets[k])

728

] = self._read_object(self._object_offsets[o])

729

return result

737

result[self._read_object(k)] = self._read_object(o)

730

738

731

raise InvalidFileException()

739

else:

740

raise InvalidFileException()

741

+

742

self._objects[ref] = result

743

return result

732

744

733

745

`def _count_to_size(count):

734

746

`if count < 1 << 8:

`@@ -743,6 +755,8 @@ def _count_to_size(count):

743

755

`else:

744

756

`return 8

745

757

758

_scalars = (str, int, float, datetime.datetime, bytes)

759

+

746

760

`class _BinaryPlistWriter (object):

747

761

`def init(self, fp, sort_keys, skipkeys):

748

762

`self._fp = fp

`@@ -798,24 +812,25 @@ def _flatten(self, value):

798

812

`# First check if the object is in the object table, not used for

799

813

`# containers to ensure that two subcontainers with the same contents

800

814

`# will be serialized as distinct values.

801

if isinstance(value, (

802

str, int, float, datetime.datetime, bytes, bytearray)):

815

if isinstance(value, _scalars):

803

816

`if (type(value), value) in self._objtable:

804

817

`return

805

818

806

819

`elif isinstance(value, Data):

807

820

`if (type(value.data), value.data) in self._objtable:

808

821

`return

809

822

823

elif id(value) in self._objidtable:

824

return

825

+

810

826

`# Add to objectreference map

811

827

`refnum = len(self._objlist)

812

828

`self._objlist.append(value)

813

try:

814

if isinstance(value, Data):

815

self._objtable[(type(value.data), value.data)] = refnum

816

else:

817

self._objtable[(type(value), value)] = refnum

818

except TypeError:

829

if isinstance(value, _scalars):

830

self._objtable[(type(value), value)] = refnum

831

elif isinstance(value, Data):

832

self._objtable[(type(value.data), value.data)] = refnum

833

else:

819

834

`self._objidtable[id(value)] = refnum

820

835

821

836

`# And finally recurse into containers

`@@ -842,12 +857,11 @@ def _flatten(self, value):

842

857

`self._flatten(o)

843

858

844

859

`def _getrefnum(self, value):

845

try:

846

if isinstance(value, Data):

847

return self._objtable[(type(value.data), value.data)]

848

else:

849

return self._objtable[(type(value), value)]

850

except TypeError:

860

if isinstance(value, _scalars):

861

return self._objtable[(type(value), value)]

862

elif isinstance(value, Data):

863

return self._objtable[(type(value.data), value.data)]

864

else:

851

865

`return self._objidtable[id(value)]

852

866

853

867

`def _write_size(self, token, size):