[Python-checkins] python/nondist/sandbox/pickletools pickletools.py,1.9,1.10
tim_one@users.sourceforge.net
tim_one@users.sourceforge.net
Sat, 25 Jan 2003 21:09:36 -0800
Update of /cvsroot/python/python/nondist/sandbox/pickletools
In directory sc8-pr-cvs1:/tmp/cvs-serv12571
Modified Files:
pickletools.py
Log Message:
Add the 3 string spellings.
Index: pickletools.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/pickletools/pickletools.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** pickletools.py 26 Jan 2003 04:27:03 -0000 1.9
--- pickletools.py 26 Jan 2003 05:09:34 -0000 1.10
***************
*** 107,112 ****
# the opcode stream, immediately following an opcode.
! UP_TO_NEWLINE = -1 # represents the "number of bytes" consumed by an
! # argument delimited by the next newline character
class ArgumentDescriptor(object):
--- 107,117 ----
# the opcode stream, immediately following an opcode.
! # Represents the number of bytes consumed by an argument delimited by the
! # next newline character.
! UP_TO_NEWLINE = -1
!
! # Represents the number of bytes consumed by a two-argument opcode where
! # the first argument gives the number of bytes in the second argument.
! TAKEN_FROM_ARGUMENT = -2
class ArgumentDescriptor(object):
***************
*** 132,136 ****
self.name = name
! assert isinstance(n, int) and (n >= 0 or n is UP_TO_NEWLINE)
self.n = n
--- 137,144 ----
self.name = name
! assert isinstance(n, int) and (n >= 0 or
! n is UP_TO_NEWLINE or
! n is TAKEN_FROM_ARGUMENT)
!
self.n = n
***************
*** 203,216 ****
! def read_stringnl(f, decode=True):
"""
>>> import StringIO
! >>> read_stringnl(StringIO.StringIO("abcd\\nefg\\n"))
'abcd'
>>> read_stringnl(StringIO.StringIO("\\n"))
''
! >>> read_stringnl(StringIO.StringIO("abcd"))
Traceback (most recent call last):
...
--- 211,232 ----
! def read_stringnl(f, decode=True, stripquotes=True):
"""
>>> import StringIO
! >>> read_stringnl(StringIO.StringIO("'abcd'\\nefg\\n"))
'abcd'
>>> read_stringnl(StringIO.StringIO("\\n"))
+ Traceback (most recent call last):
+ ...
+ ValueError: no string quotes around ''
+
+ >>> read_stringnl(StringIO.StringIO("\\n"), stripquotes=False)
''
! >>> read_stringnl(StringIO.StringIO("''\\n"))
! ''
!
! >>> read_stringnl(StringIO.StringIO('"abcd"'))
Traceback (most recent call last):
...
***************
*** 218,222 ****
Embedded escapes are undone in the result.
! >>> read_stringnl(StringIO.StringIO("a\\\\nb\\x00c\\td\\ne"))
'a\\nb\\x00c\\td'
"""
--- 234,238 ----
Embedded escapes are undone in the result.
! >>> read_stringnl(StringIO.StringIO("'a\\\\nb\\x00c\\td'\\n'e'"))
'a\\nb\\x00c\\td'
"""
***************
*** 226,229 ****
--- 242,257 ----
raise ValueError("no newline found when trying to read stringnl")
data = data[:-1] # lose the newline
+
+ if stripquotes:
+ for q in "'\"":
+ if data.startswith(q):
+ if not data.endswith(q):
+ raise ValueError("strinq quote %r not found at both "
+ "ends of %r" % (q, data))
+ data = data[1:-1]
+ break
+ else:
+ raise ValueError("no string quotes around %r" % data)
+
# I'm not sure when 'string_escape' was added to the std codecs; it's
# crazy not to use it if it's there.
***************
*** 238,245 ****
doc="""A newline-terminated string.
! This is a repr-style string, with embedded escapes.
""")
def read_decimalnl_short(f):
"""
--- 266,336 ----
doc="""A newline-terminated string.
! This is a repr-style string, with embedded escapes, and
! bracketing quotes.
""")
+ def read_string4(f):
+ """
+ >>> import StringIO
+ >>> read_string4(StringIO.StringIO("\\x00\\x00\\x00\\x00abc"))
+ ''
+ >>> read_string4(StringIO.StringIO("\\x03\\x00\\x00\\x00abcdef"))
+ 'abc'
+ >>> read_string4(StringIO.StringIO("\\x00\\x00\\x00\\x03abcdef"))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 50331648 bytes in a string4, but only 6 remain
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("string4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a string4, but only %d remain" %
+ (n, len(data)))
+
+ string4 = ArgumentDescriptor(
+ name="string4",
+ n=TAKEN_FROM_ARGUMENT,
+ reader=read_string4,
+ doc="""A counted string.
+
+ The first argument is a 4-byte little-endian signed int giving
+ the number of bytes in the string, and the second argument is
+ that many bytes.
+ """)
+
+
+ def read_string1(f):
+ """
+ >>> import StringIO
+ >>> read_string1(StringIO.StringIO("\\x00"))
+ ''
+ >>> read_string1(StringIO.StringIO("\\x03abcdef"))
+ 'abc'
+ """
+
+ n = read_uint1(f)
+ assert n >= 0
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a string1, but only %d remain" %
+ (n, len(data)))
+
+ string1 = ArgumentDescriptor(
+ name="string1",
+ n=TAKEN_FROM_ARGUMENT,
+ reader=read_string1,
+ doc="""A counted string.
+
+ The first argument is a 1-byte unsigned int giving the number
+ of bytes in the string, and the second argument is that many
+ bytes.
+ """)
+
def read_decimalnl_short(f):
"""
***************
*** 254,258 ****
"""
! s = read_stringnl(f, decode=False)
if s.endswith("L"):
raise ValueError("trailing 'L' not allowed in %r" % s)
--- 345,349 ----
"""
! s = read_stringnl(f, decode=False, stripquotes=False)
if s.endswith("L"):
raise ValueError("trailing 'L' not allowed in %r" % s)
***************
*** 289,293 ****
"""
! s = read_stringnl(f, decode=False)
if not s.endswith("L"):
raise ValueError("trailing 'L' required in %r" % s)
--- 380,384 ----
"""
! s = read_stringnl(f, decode=False, stripquotes=False)
if not s.endswith("L"):
raise ValueError("trailing 'L' required in %r" % s)
***************
*** 325,329 ****
-1.25
"""
! s = read_stringnl(f, decode=False)
return float(s)
--- 416,420 ----
-1.25
"""
! s = read_stringnl(f, decode=False, stripquotes=False)
return float(s)
***************
*** 429,432 ****
--- 520,533 ----
doc="A Python float object.")
+ pystring = StackObject(
+ name='str',
+ obtype=str,
+ doc="A Python string object.")
+
+ pyunicode = StackObject(
+ name='unicode',
+ obtype=unicode,
+ doc="A Python Unicode string object.")
+
pynone = StackObject(
name="None",
***************
*** 532,536 ****
opcodes = [
! # Six ways to spell integers.
I(name='INT',
--- 633,637 ----
opcodes = [
! # Ways to spell integers.
I(name='INT',
***************
*** 540,544 ****
stack_after=[pyinteger_or_bool],
proto=0,
! doc="""Newline-terminated decimal integer literal.
The intent may have been that this always fit in a short Python int,
--- 641,645 ----
stack_after=[pyinteger_or_bool],
proto=0,
! doc="""Push an integer or bool. The argument is a decimal literal string.
The intent may have been that this always fit in a short Python int,
***************
*** 549,558 ****
Another difference is due to that, when bool was introduced as a
! distinct type in 2.3, builtin int singletons True and False were
! also added to 2.2.2. For compatibility in both directions, True gets
! pickled as INT + "I01\\n", and False as INT + "I00\\n". Leading zeroes
! are never produced for a genuine integer. The 2.3 (and later)
! unpicklers special-case these and return bool instead; earlier
! unpicklers ignore the leading "0" and return the int.
"""),
--- 650,659 ----
Another difference is due to that, when bool was introduced as a
! distinct type in 2.3, builtin names True and False were also added to
! 2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
! True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
! Leading zeroes are never produced for a genuine integer. The 2.3
! (and later) unpicklers special-case these and return bool instead;
! earlier unpicklers ignore the leading "0" and return the int.
"""),
***************
*** 563,567 ****
stack_after=[pylong],
proto=0,
! doc="""Newline-terminated decimal integer literal.
The same as INT, except that the literal ends with 'L', and always
--- 664,668 ----
stack_after=[pylong],
proto=0,
! doc="""Push a long integer. The argument is a decimal literal string.
The same as INT, except that the literal ends with 'L', and always
***************
*** 576,580 ****
stack_after=[pyint],
proto=1,
! doc="""Four-byte signed integer.
This handles the full range of Python (short) integers on a 32-bit
--- 677,681 ----
stack_after=[pyint],
proto=1,
! doc="""Push a four-byte signed integer.
This handles the full range of Python (short) integers on a 32-bit
***************
*** 590,594 ****
stack_after=[pyint],
proto=1,
! doc="""One-byte unsigned integer.
This is a space optimization for pickling very small non-negative ints,
--- 691,695 ----
stack_after=[pyint],
proto=1,
! doc="""Push a one-byte unsigned integer.
This is a space optimization for pickling very small non-negative ints,
***************
*** 602,606 ****
stack_after=[pyint],
proto=1,
! doc="""Two-byte unsigned integer.
This is a space optimization for pickling small positive ints, in
--- 703,707 ----
stack_after=[pyint],
proto=1,
! doc="""Push a two-byte unsigned integer.
This is a space optimization for pickling small positive ints, in
***************
*** 608,612 ****
"""),
! # Two ways to spell floats.
I(name='FLOAT',
--- 709,713 ----
"""),
! # Ways to spell floats.
I(name='FLOAT',
***************
*** 646,649 ****
--- 747,791 ----
"""),
+ # Ways to spell strings (8-bit, not Unicode).
+
+ I(name='STRING',
+ code='S',
+ args=[stringnl],
+ stack_before=[],
+ stack_after=[pystring],
+ proto=0,
+ doc="""Push a Python string object.
+
+ The argument is a repr-style string, with bracketing quote characters,
+ and perhaps embedded escapes. The argument extends until the next
+ newline character.
+ """),
+
+ I(name='BINSTRING',
+ code='T',
+ args=[string4],
+ stack_before=[],
+ stack_after=[pystring],
+ proto=1,
+ doc="""Push a Python string object.
+
+ There are two arguments: the first is a 4-byte little-endian signed int
+ giving the number of bytes in the string, and the second is that many
+ bytes, which are taken literally as the string content.
+ """),
+
+ I(name='SHORT_BINSTRING',
+ code='U',
+ args=[string1],
+ stack_before=[],
+ stack_after=[pystring],
+ proto=1,
+ doc="""Push a Python string object.
+
+ There are two arguments: the first is a 1-byte unsigned int giving
+ the number of bytes in the string, and the second is that many bytes,
+ which are taken literally as the string content.
+ """),
+
# A way to spell None.
***************
*** 737,773 ****
I(name='REDUCE',
code='R',
- args=[],
- stack_before=[],
- stack_after=[],
- proto=0,
- doc="""XXX One-line description goes here.
-
- XXX Doc body goes here.
- """),
-
- I(name='STRING',
- code='S',
- args=[],
- stack_before=[],
- stack_after=[],
- proto=0,
- doc="""XXX One-line description goes here.
-
- XXX Doc body goes here.
- """),
-
- I(name='BINSTRING',
- code='T',
- args=[],
- stack_before=[],
- stack_after=[],
- proto=0,
- doc="""XXX One-line description goes here.
-
- XXX Doc body goes here.
- """),
-
- I(name='SHORT_BINSTRING',
- code='U',
args=[],
stack_before=[],
--- 879,882 ----