[Python-checkins] python/dist/src/Lib pickle.py,1.77,1.78 pickletools.py,1.4,1.5

gvanrossum@users.sourceforge.net gvanrossum@users.sourceforge.net
Mon, 27 Jan 2003 13:44:27 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv17632

Modified Files:
	pickle.py pickletools.py 
Log Message:
Begin documenting protocol 2.


Index: pickle.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickle.py,v
retrieving revision 1.77
retrieving revision 1.78
diff -C2 -d -r1.77 -r1.78
*** pickle.py	27 Jan 2003 21:25:41 -0000	1.77
--- pickle.py	27 Jan 2003 21:44:24 -0000	1.78
***************
*** 128,131 ****
--- 128,147 ----
  FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
  
+ # Protocol 2 (not yet implemented) (XXX comments will be added later)
+ 
+ NEWOBJ          = '\x81'
+ PROTO           = '\x80'
+ EXT2            = '\x83'
+ EXT1            = '\x82'
+ TUPLE1          = '\x85'
+ EXT4            = '\x84'
+ TUPLE3          = '\x87'
+ TUPLE2          = '\x86'
+ NEWFALSE        = '\x89'
+ NEWTRUE         = '\x88'
+ LONG2           = '\x8b'
+ LONG1           = '\x8a'
+ LONG4           = '\x8c'
+ 
  
  __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])

Index: pickletools.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickletools.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** pickletools.py	27 Jan 2003 19:40:47 -0000	1.4
--- pickletools.py	27 Jan 2003 21:44:25 -0000	1.5
***************
*** 579,582 ****
--- 579,699 ----
               """)
  
+ # Protocol 2 formats
+ 
+ def decode_long(data):
+     r"""Decode a long from a two's complement little-endian binary string.
+     >>> decode_long("\xff\x00")
+     255L
+     >>> decode_long("\xff\x7f")
+     32767L
+     >>> decode_long("\x00\xff")
+     -256L
+     >>> decode_long("\x00\x80")
+     -32768L
+     >>> 
+     """
+     x = 0L
+     i = 0L
+     for c in data:
+         x |= long(ord(c)) << i
+         i += 8L
+     if i and (x & (1L << (i-1L))):
+         x -= 1L << i
+     return x
+ 
+ def read_long1(f):
+     r"""
+     >>> import StringIO
+     >>> read_long1(StringIO.StringIO("\x02\xff\x00"))
+     255L
+     >>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
+     32767L
+     >>> read_long1(StringIO.StringIO("\x02\x00\xff"))
+     -256L
+     >>> read_long1(StringIO.StringIO("\x02\x00\x80"))
+     -32768L
+     >>> 
+     """
+ 
+     n = read_uint1(f)
+     data = f.read(n)
+     if len(data) != n:
+         raise ValueError("not enough data in stream to read long1")
+     return decode_long(data)
+ 
+ long1 = ArgumentDescriptor(
+     name="long1",
+     n=TAKEN_FROM_ARGUMENT,
+     reader=read_long1,
+     doc="""A binary long, little-endian, using 1-byte size.
+ 
+     This first reads one byte as an unsigned size, then reads that
+     many bytes and interprets them as a little-endian long.
+     """)
+ 
+ def read_long2(f):
+     r"""
+     >>> import StringIO
+     >>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
+     255L
+     >>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
+     32767L
+     >>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
+     -256L
+     >>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
+     -32768L
+     >>> 
+     """
+ 
+     n = read_uint2(f)
+     data = f.read(n)
+     if len(data) != n:
+         raise ValueError("not enough data in stream to read long2")
+     return decode_long(data)
+ 
+ long2 = ArgumentDescriptor(
+     name="long2",
+     n=TAKEN_FROM_ARGUMENT,
+     reader=read_long2,
+     doc="""A binary long, little-endian, using 2-byte size.
+ 
+     This first reads two byte as an unsigned size, then reads that
+     many bytes and interprets them as a little-endian long.
+     """)
+ 
+ def read_long4(f):
+     r"""
+     >>> import StringIO
+     >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
+     255L
+     >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
+     32767L
+     >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
+     -256L
+     >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
+     -32768L
+     >>> 
+     """
+ 
+     n = read_int4(f)
+     if n < 0:
+         raise ValueError("unicodestring4 byte count < 0: %d" % n)
+     data = f.read(n)
+     if len(data) != n:
+         raise ValueError("not enough data in stream to read long1")
+     return decode_long(data)
+ 
+ long4 = ArgumentDescriptor(
+     name="long4",
+     n=TAKEN_FROM_ARGUMENT,
+     reader=read_long4,
+     doc="""A binary representation of a long, little-endian.
+ 
+     This first reads four bytes as a signed size (but requires the
+     size to be >= 0), then reads that many bytes and interprets them
+     as a little-endian long.
+     """)
+ 
+ 
  ##############################################################################
  # Object descriptors.  The stack used by the pickle machine holds objects,
***************
*** 628,631 ****
--- 745,753 ----
                              "a Python bool.")
  
+ pybool = StackObject(
+              name='bool',
+              obtype=(bool,),
+              doc="A Python bool object.")
+ 
  pyfloat = StackObject(
                name='float',
***************
*** 1437,1440 ****
--- 1559,1728 ----
        returns is pushed on the stack.  See PERSID for more detail.
        """),
+ 
+     # Protocol 2 opcodes
+ 
+     I(name='PROTO',
+       code='\x80',
+       arg=uint1,
+       stack_before=[],
+       stack_after=[],
+       proto=2,
+       doc="""Protocol version indicator.
+ 
+       For protocol 2 and above, a pickle must start with this opcode.
+       The argument is the protocol version, an int in range(2, 256).
+       """),
+ 
+     I(name='NEWOBJ',
+       code='\x81',
+       arg=None,
+       stack_before=[anyobject, anyobject],
+       stack_after=[anyobject],
+       proto=2,
+       doc="""Build an object instance.
+ 
+       The stack before should be thought of as containing a class
+       object followed by an argument tuple (the tuple being the stack
+       top).  Call these cls and args.  They are popped off the stack,
+       and the value returned by cls.__new__(cls, *args) is pushed back
+       onto the stack.
+       """),
+ 
+     I(name='EXT1',
+       code='\x82',
+       arg=uint1,
+       stack_before=[],
+       stack_after=[anyobject],
+       proto=2,
+       doc="""Extension code.
+ 
+       This code and the similar EXT2 and EXT4 allow using a registry
+       of popular objects that are pickled by name, typically classes.
+       It is envisioned that through a global negotiation and
+       registration process, third parties can set up a mapping between
+       ints and object names.
+ 
+       In order to guarantee pickle interchangeability, the extension
+       code registry ought to be global, although a range of codes may
+       be reserved for private use.
+       """),
+ 
+     I(name='EXT2',
+       code='\x83',
+       arg=uint2,
+       stack_before=[],
+       stack_after=[anyobject],
+       proto=2,
+       doc="""Extension code.
+ 
+       See EXT1.
+       """),
+ 
+     I(name='EXT4',
+       code='\x84',
+       arg=int4,
+       stack_before=[],
+       stack_after=[anyobject],
+       proto=2,
+       doc="""Extension code.
+ 
+       See EXT1.
+       """),
+ 
+     I(name='TUPLE1',
+       code='\x85',
+       arg=None,
+       stack_before=[anyobject],
+       stack_after=[pytuple],
+       proto=2,
+       doc="""One-tuple.
+ 
+       This code pops one value off the stack and pushes a tuple of
+       length 1 whose one item is that value back onto it.  IOW:
+ 
+           stack[-1] = tuple(stack[-1:])
+       """),
+ 
+     I(name='TUPLE2',
+       code='\x86',
+       arg=None,
+       stack_before=[anyobject, anyobject],
+       stack_after=[pytuple],
+       proto=2,
+       doc="""One-tuple.
+ 
+       This code pops two values off the stack and pushes a tuple
+       of length 2 whose items are those values back onto it.  IOW:
+ 
+           stack[-2:] = [tuple(stack[-2:])]
+       """),
+ 
+     I(name='TUPLE3',
+       code='\x87',
+       arg=None,
+       stack_before=[anyobject, anyobject, anyobject],
+       stack_after=[pytuple],
+       proto=2,
+       doc="""One-tuple.
+ 
+       This code pops three values off the stack and pushes a tuple
+       of length 3 whose items are those values back onto it.  IOW:
+ 
+           stack[-3:] = [tuple(stack[-3:])]
+       """),
+ 
+     I(name='NEWTRUE',
+       code='\x88',
+       arg=None,
+       stack_before=[],
+       stack_after=[pybool],
+       proto=2,
+       doc="""True.
+ 
+       Push True onto the stack."""),
+ 
+     I(name='NEWFALSE',
+       code='\x89',
+       arg=None,
+       stack_before=[],
+       stack_after=[pybool],
+       proto=2,
+       doc="""True.
+ 
+       Push False onto the stack."""),
+ 
+     I(name="LONG1",
+       code='\x8a',
+       arg=long1,
+       stack_before=[],
+       stack_after=[pylong],
+       proto=2,
+       doc="""Long integer using one-byte length.
+ 
+       A more efficient encoding of a Python long; the long1 encoding
+       says it all."""),
+ 
+     I(name="LONG2",
+       code='\x8b',
+       arg=long2,
+       stack_before=[],
+       stack_after=[pylong],
+       proto=2,
+       doc="""Long integer using two-byte length.
+ 
+       A more efficient encoding of a Python long; the long2 encoding
+       says it all."""),
+ 
+     I(name="LONG4",
+       code='\x8c',
+       arg=long4,
+       stack_before=[],
+       stack_after=[pylong],
+       proto=2,
+       doc="""Long integer using found-byte length.
+ 
+       A more efficient encoding of a Python long; the long4 encoding
+       says it all."""),
+ 
  ]
  del I