[Python-checkins] bpo-26707: Enable plistlib to read UID keys. (GH-12153)

Serhiy Storchaka webhook-mailer at python.org
Wed May 15 16:14:50 EDT 2019


https://github.com/python/cpython/commit/c981ad16b0f9740bd3381c96b4227a1faa1a88d9
commit: c981ad16b0f9740bd3381c96b4227a1faa1a88d9
branch: master
author: Jon Janzen <jjjonjanzen at gmail.com>
committer: Serhiy Storchaka <storchaka at gmail.com>
date: 2019-05-15T23:14:38+03:00
summary:

bpo-26707: Enable plistlib to read UID keys. (GH-12153)

Plistlib currently throws an exception when asked to decode a valid
.plist file that was generated by Apple's NSKeyedArchiver. Specifically,
this is caused by a byte 0x80 (signifying a UID) not being understood.

This fixes the problem by enabling the binary plist reader and writer
to read and write plistlib.UID objects.

files:
A Misc/NEWS.d/next/Library/2019-03-04-01-28-33.bpo-26707.QY4kRZ.rst
M Doc/library/plistlib.rst
M Doc/whatsnew/3.8.rst
M Lib/plistlib.py
M Lib/test/test_plistlib.py
M Mac/Tools/plistlib_generate_testdata.py
M Misc/ACKS

diff --git a/Doc/library/plistlib.rst b/Doc/library/plistlib.rst
index 8bd6b63a8ee5..d84fcac0ef23 100644
--- a/Doc/library/plistlib.rst
+++ b/Doc/library/plistlib.rst
@@ -36,6 +36,10 @@ or :class:`datetime.datetime` objects.
 .. versionchanged:: 3.4
    New API, old API deprecated.  Support for binary format plists added.
 
+.. versionchanged:: 3.8
+   Support added for reading and writing :class:`UID` tokens in binary plists as used
+   by NSKeyedArchiver and NSKeyedUnarchiver.
+
 .. seealso::
 
    `PList manual page <https://developer.apple.com/library/content/documentation/Cocoa/Conceptual/PropertyLists/>`_
@@ -179,6 +183,16 @@ The following classes are available:
 
    .. deprecated:: 3.4 Use a :class:`bytes` object instead.
 
+.. class:: UID(data)
+
+   Wraps an :class:`int`.  This is used when reading or writing NSKeyedArchiver
+   encoded data, which contains UID (see PList manual).
+
+   It has one attribute, :attr:`data` which can be used to retrieve the int value
+   of the UID.  :attr:`data` must be in the range `0 <= data <= 2**64`.
+
+   .. versionadded:: 3.8
+
 
 The following constants are available:
 
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index a2af201215c2..c135183095ca 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -394,6 +394,14 @@ to a path.
 (Contributed by Joannah Nanjekye in :issue:`26978`)
 
 
+plistlib
+--------
+
+Added new :class:`plistlib.UID` and enabled support for reading and writing
+NSKeyedArchiver-encoded binary plists.
+(Contributed by Jon Janzen in :issue:`26707`.)
+
+
 socket
 ------
 
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index 248f5143f4ed..0133c89bdc66 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -48,7 +48,7 @@
 __all__ = [
     "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
     "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
-    "load", "dump", "loads", "dumps"
+    "load", "dump", "loads", "dumps", "UID"
 ]
 
 import binascii
@@ -175,6 +175,34 @@ def __repr__(self):
 #
 
 
+class UID:
+    def __init__(self, data):
+        if not isinstance(data, int):
+            raise TypeError("data must be an int")
+        if data >= 1 << 64:
+            raise ValueError("UIDs cannot be >= 2**64")
+        if data < 0:
+            raise ValueError("UIDs must be positive")
+        self.data = data
+
+    def __index__(self):
+        return self.data
+
+    def __repr__(self):
+        return "%s(%s)" % (self.__class__.__name__, repr(self.data))
+
+    def __reduce__(self):
+        return self.__class__, (self.data,)
+
+    def __eq__(self, other):
+        if not isinstance(other, UID):
+            return NotImplemented
+        return self.data == other.data
+
+    def __hash__(self):
+        return hash(self.data)
+
+
 #
 # XML support
 #
@@ -649,8 +677,9 @@ def _read_object(self, ref):
             s = self._get_size(tokenL)
             result = self._fp.read(s * 2).decode('utf-16be')
 
-        # tokenH == 0x80 is documented as 'UID' and appears to be used for
-        # keyed-archiving, not in plists.
+        elif tokenH == 0x80:  # UID
+            # used by Key-Archiver plist files
+            result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big'))
 
         elif tokenH == 0xA0:  # array
             s = self._get_size(tokenL)
@@ -874,6 +903,20 @@ def _write_object(self, value):
 
             self._fp.write(t)
 
+        elif isinstance(value, UID):
+            if value.data < 0:
+                raise ValueError("UIDs must be positive")
+            elif value.data < 1 << 8:
+                self._fp.write(struct.pack('>BB', 0x80, value))
+            elif value.data < 1 << 16:
+                self._fp.write(struct.pack('>BH', 0x81, value))
+            elif value.data < 1 << 32:
+                self._fp.write(struct.pack('>BL', 0x83, value))
+            elif value.data < 1 << 64:
+                self._fp.write(struct.pack('>BQ', 0x87, value))
+            else:
+                raise OverflowError(value)
+
         elif isinstance(value, (list, tuple)):
             refs = [self._getrefnum(o) for o in value]
             s = len(refs)
diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py
index 8d8e0a750a22..5c2d0265079a 100644
--- a/Lib/test/test_plistlib.py
+++ b/Lib/test/test_plistlib.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2003-2013 Python Software Foundation
-
+import copy
+import operator
+import pickle
 import unittest
 import plistlib
 import os
@@ -10,6 +12,8 @@
 from test import support
 from io import BytesIO
 
+from plistlib import UID
+
 ALL_FORMATS=(plistlib.FMT_XML, plistlib.FMT_BINARY)
 
 # The testdata is generated using Mac/Tools/plistlib_generate_testdata.py
@@ -88,6 +92,17 @@
         ZwB0AHwAiACUAJoApQCuALsAygDTAOQA7QD4AQQBDwEdASsBNgE3ATgBTwFn
         AW4BcAFyAXQBdgF/AYMBhQGHAYwBlQGbAZ0BnwGhAaUBpwGwAbkBwAHBAcIB
         xQHHAsQC0gAAAAAAAAIBAAAAAAAAADkAAAAAAAAAAAAAAAAAAALs'''),
+    'KEYED_ARCHIVE': binascii.a2b_base64(b'''
+        YnBsaXN0MDDUAQIDBAUGHB1YJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVy
+        VCR0b3ASAAGGoKMHCA9VJG51bGzTCQoLDA0OVnB5dHlwZVYkY2xhc3NZTlMu
+        c3RyaW5nEAGAAl8QE0tleUFyY2hpdmUgVUlEIFRlc3TTEBESExQZWiRjbGFz
+        c25hbWVYJGNsYXNzZXNbJGNsYXNzaGludHNfEBdPQ19CdWlsdGluUHl0aG9u
+        VW5pY29kZaQVFhcYXxAXT0NfQnVpbHRpblB5dGhvblVuaWNvZGVfEBBPQ19Q
+        eXRob25Vbmljb2RlWE5TU3RyaW5nWE5TT2JqZWN0ohobXxAPT0NfUHl0aG9u
+        U3RyaW5nWE5TU3RyaW5nXxAPTlNLZXllZEFyY2hpdmVy0R4fVHJvb3SAAQAI
+        ABEAGgAjAC0AMgA3ADsAQQBIAE8AVgBgAGIAZAB6AIEAjACVAKEAuwDAANoA
+        7QD2AP8BAgEUAR0BLwEyATcAAAAAAAACAQAAAAAAAAAgAAAAAAAAAAAAAAAA
+        AAABOQ=='''),
 }
 
 
@@ -151,6 +166,14 @@ def test_invalid_type(self):
             with self.subTest(fmt=fmt):
                 self.assertRaises(TypeError, plistlib.dumps, pl, fmt=fmt)
 
+    def test_invalid_uid(self):
+        with self.assertRaises(TypeError):
+            UID("not an int")
+        with self.assertRaises(ValueError):
+            UID(2 ** 64)
+        with self.assertRaises(ValueError):
+            UID(-19)
+
     def test_int(self):
         for pl in [0, 2**8-1, 2**8, 2**16-1, 2**16, 2**32-1, 2**32,
                    2**63-1, 2**64-1, 1, -2**63]:
@@ -200,6 +223,45 @@ def test_indentation_dict_mix(self):
         data = {'1': {'2': [{'3': [[[[[{'test': b'aaaaaa'}]]]]]}]}}
         self.assertEqual(plistlib.loads(plistlib.dumps(data)), data)
 
+    def test_uid(self):
+        data = UID(1)
+        self.assertEqual(plistlib.loads(plistlib.dumps(data, fmt=plistlib.FMT_BINARY)), data)
+        dict_data = {
+            'uid0': UID(0),
+            'uid2': UID(2),
+            'uid8': UID(2 ** 8),
+            'uid16': UID(2 ** 16),
+            'uid32': UID(2 ** 32),
+            'uid63': UID(2 ** 63)
+        }
+        self.assertEqual(plistlib.loads(plistlib.dumps(dict_data, fmt=plistlib.FMT_BINARY)), dict_data)
+
+    def test_uid_data(self):
+        uid = UID(1)
+        self.assertEqual(uid.data, 1)
+
+    def test_uid_eq(self):
+        self.assertEqual(UID(1), UID(1))
+        self.assertNotEqual(UID(1), UID(2))
+        self.assertNotEqual(UID(1), "not uid")
+
+    def test_uid_hash(self):
+        self.assertEqual(hash(UID(1)), hash(UID(1)))
+
+    def test_uid_repr(self):
+        self.assertEqual(repr(UID(1)), "UID(1)")
+
+    def test_uid_index(self):
+        self.assertEqual(operator.index(UID(1)), 1)
+
+    def test_uid_pickle(self):
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            self.assertEqual(pickle.loads(pickle.dumps(UID(19), protocol=proto)), UID(19))
+
+    def test_uid_copy(self):
+        self.assertEqual(copy.copy(UID(1)), UID(1))
+        self.assertEqual(copy.deepcopy(UID(1)), UID(1))
+
     def test_appleformatting(self):
         for use_builtin_types in (True, False):
             for fmt in ALL_FORMATS:
@@ -648,6 +710,38 @@ def test_dataobject_deprecated(self):
         self.assertEqual(cur, in_data)
 
 
+class TestKeyedArchive(unittest.TestCase):
+    def test_keyed_archive_data(self):
+        # This is the structure of a NSKeyedArchive packed plist
+        data = {
+            '$version': 100000,
+            '$objects': [
+                '$null', {
+                    'pytype': 1,
+                    '$class': UID(2),
+                    'NS.string': 'KeyArchive UID Test'
+                },
+                {
+                    '$classname': 'OC_BuiltinPythonUnicode',
+                    '$classes': [
+                        'OC_BuiltinPythonUnicode',
+                        'OC_PythonUnicode',
+                        'NSString',
+                        'NSObject'
+                    ],
+                    '$classhints': [
+                        'OC_PythonString', 'NSString'
+                    ]
+                }
+            ],
+            '$archiver': 'NSKeyedArchiver',
+            '$top': {
+                'root': UID(1)
+            }
+        }
+        self.assertEqual(plistlib.loads(TESTDATA["KEYED_ARCHIVE"]), data)
+
+
 class MiscTestCase(unittest.TestCase):
     def test__all__(self):
         blacklist = {"PlistFormat", "PLISTHEADER"}
@@ -655,7 +749,7 @@ def test__all__(self):
 
 
 def test_main():
-    support.run_unittest(TestPlistlib, TestPlistlibDeprecated, MiscTestCase)
+    support.run_unittest(TestPlistlib, TestPlistlibDeprecated, TestKeyedArchive, MiscTestCase)
 
 
 if __name__ == '__main__':
diff --git a/Mac/Tools/plistlib_generate_testdata.py b/Mac/Tools/plistlib_generate_testdata.py
index 057b61765b8a..3349c604a37a 100755
--- a/Mac/Tools/plistlib_generate_testdata.py
+++ b/Mac/Tools/plistlib_generate_testdata.py
@@ -5,6 +5,7 @@
 from Cocoa import NSPropertyListXMLFormat_v1_0, NSPropertyListBinaryFormat_v1_0
 from Cocoa import CFUUIDCreateFromString, NSNull, NSUUID, CFPropertyListCreateData
 from Cocoa import NSURL
+from Cocoa import NSKeyedArchiver
 
 import datetime
 from collections import OrderedDict
@@ -89,6 +90,8 @@ def main():
         else:
             print("    %s: binascii.a2b_base64(b'''\n        %s'''),"%(fmt_name, _encode_base64(bytes(data)).decode('ascii')[:-1]))
 
+    keyed_archive_data = NSKeyedArchiver.archivedDataWithRootObject_("KeyArchive UID Test")
+    print("    'KEYED_ARCHIVE': binascii.a2b_base64(b'''\n        %s''')," % (_encode_base64(bytes(keyed_archive_data)).decode('ascii')[:-1]))
     print("}")
     print()
 
diff --git a/Misc/ACKS b/Misc/ACKS
index ec5b017d515a..f5a31a878c52 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -754,6 +754,7 @@ Geert Jansen
 Jack Jansen
 Hans-Peter Jansen
 Bill Janssen
+Jon Janzen
 Thomas Jarosch
 Juhana Jauhiainen
 Rajagopalasarma Jayakrishnan
diff --git a/Misc/NEWS.d/next/Library/2019-03-04-01-28-33.bpo-26707.QY4kRZ.rst b/Misc/NEWS.d/next/Library/2019-03-04-01-28-33.bpo-26707.QY4kRZ.rst
new file mode 100644
index 000000000000..ab76540c9eec
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-03-04-01-28-33.bpo-26707.QY4kRZ.rst
@@ -0,0 +1 @@
+Enable plistlib to read and write binary plist files that were created as a KeyedArchive file. Specifically, this allows the plistlib to process 0x80 tokens as UID objects.
\ No newline at end of file



More information about the Python-checkins mailing list