[Python-checkins] python/nondist/sandbox/pickletools pickletools.py,1.9,1.10

Sat, 25 Jan 2003 21:09:36 -0800

Update of /cvsroot/python/python/nondist/sandbox/pickletools
In directory sc8-pr-cvs1:/tmp/cvs-serv12571

Modified Files:
	pickletools.py 
Log Message:
Add the 3 string spellings.

Index: pickletools.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/pickletools/pickletools.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** pickletools.py	26 Jan 2003 04:27:03 -0000	1.9
--- pickletools.py	26 Jan 2003 05:09:34 -0000	1.10
***************
*** 107,112 ****
  # the opcode stream, immediately following an opcode.

! UP_TO_NEWLINE = -1   # represents the "number of bytes" consumed by an
!                      # argument delimited by the next newline character

  class ArgumentDescriptor(object):
--- 107,117 ----
  # the opcode stream, immediately following an opcode.

! # Represents the number of bytes consumed by an argument delimited by the
! # next newline character.
! UP_TO_NEWLINE = -1
! 
! # Represents the number of bytes consumed by a two-argument opcode where
! # the first argument gives the number of bytes in the second argument.
! TAKEN_FROM_ARGUMENT = -2

  class ArgumentDescriptor(object):
***************
*** 132,136 ****
          self.name = name

!         assert isinstance(n, int) and (n >= 0 or n is UP_TO_NEWLINE)
          self.n = n

--- 137,144 ----
          self.name = name

!         assert isinstance(n, int) and (n >= 0 or
!                                        n is UP_TO_NEWLINE or
!                                        n is TAKEN_FROM_ARGUMENT)
! 
          self.n = n

***************
*** 203,216 ****

! def read_stringnl(f, decode=True):
      """
      >>> import StringIO
!     >>> read_stringnl(StringIO.StringIO("abcd\\nefg\\n"))
      'abcd'

      >>> read_stringnl(StringIO.StringIO("\\n"))
      ''

!     >>> read_stringnl(StringIO.StringIO("abcd"))
      Traceback (most recent call last):
      ...
--- 211,232 ----

! def read_stringnl(f, decode=True, stripquotes=True):
      """
      >>> import StringIO
!     >>> read_stringnl(StringIO.StringIO("'abcd'\\nefg\\n"))
      'abcd'

      >>> read_stringnl(StringIO.StringIO("\\n"))
+     Traceback (most recent call last):
+     ...
+     ValueError: no string quotes around ''
+ 
+     >>> read_stringnl(StringIO.StringIO("\\n"), stripquotes=False)
      ''

!     >>> read_stringnl(StringIO.StringIO("''\\n"))
!     ''
! 
!     >>> read_stringnl(StringIO.StringIO('"abcd"'))
      Traceback (most recent call last):
      ...
***************
*** 218,222 ****

      Embedded escapes are undone in the result.
!     >>> read_stringnl(StringIO.StringIO("a\\\\nb\\x00c\\td\\ne"))
      'a\\nb\\x00c\\td'
      """
--- 234,238 ----

      Embedded escapes are undone in the result.
!     >>> read_stringnl(StringIO.StringIO("'a\\\\nb\\x00c\\td'\\n'e'"))
      'a\\nb\\x00c\\td'
      """
***************
*** 226,229 ****
--- 242,257 ----
          raise ValueError("no newline found when trying to read stringnl")
      data = data[:-1]    # lose the newline
+ 
+     if stripquotes:
+         for q in "'\"":
+             if data.startswith(q):
+                 if not data.endswith(q):
+                     raise ValueError("strinq quote %r not found at both "
+                                      "ends of %r" % (q, data))
+                 data = data[1:-1]
+                 break
+         else:
+             raise ValueError("no string quotes around %r" % data)
+ 
      # I'm not sure when 'string_escape' was added to the std codecs; it's
      # crazy not to use it if it's there.
***************
*** 238,245 ****
                 doc="""A newline-terminated string.

!                    This is a repr-style string, with embedded escapes.
                     """)

  def read_decimalnl_short(f):
      """
--- 266,336 ----
                 doc="""A newline-terminated string.

!                    This is a repr-style string, with embedded escapes, and
!                    bracketing quotes.
                     """)

+ def read_string4(f):
+     """
+     >>> import StringIO
+     >>> read_string4(StringIO.StringIO("\\x00\\x00\\x00\\x00abc"))
+     ''
+     >>> read_string4(StringIO.StringIO("\\x03\\x00\\x00\\x00abcdef"))
+     'abc'
+     >>> read_string4(StringIO.StringIO("\\x00\\x00\\x00\\x03abcdef"))
+     Traceback (most recent call last):
+     ...
+     ValueError: expected 50331648 bytes in a string4, but only 6 remain
+     """
+ 
+     n = read_int4(f)
+     if n < 0:
+         raise ValueError("string4 byte count < 0: %d" % n)
+     data = f.read(n)
+     if len(data) == n:
+         return data
+     raise ValueError("expected %d bytes in a string4, but only %d remain" %
+                      (n, len(data)))
+ 
+ string4 = ArgumentDescriptor(
+               name="string4",
+               n=TAKEN_FROM_ARGUMENT,
+               reader=read_string4,
+               doc="""A counted string.
+ 
+               The first argument is a 4-byte little-endian signed int giving
+               the number of bytes in the string, and the second argument is
+               that many bytes.
+               """)
+ 
+ 
+ def read_string1(f):
+     """
+     >>> import StringIO
+     >>> read_string1(StringIO.StringIO("\\x00"))
+     ''
+     >>> read_string1(StringIO.StringIO("\\x03abcdef"))
+     'abc'
+     """
+ 
+     n = read_uint1(f)
+     assert n >= 0
+     data = f.read(n)
+     if len(data) == n:
+         return data
+     raise ValueError("expected %d bytes in a string1, but only %d remain" %
+                      (n, len(data)))
+ 
+ string1 = ArgumentDescriptor(
+               name="string1",
+               n=TAKEN_FROM_ARGUMENT,
+               reader=read_string1,
+               doc="""A counted string.
+ 
+               The first argument is a 1-byte unsigned int giving the number
+               of bytes in the string, and the second argument is that many
+               bytes.
+               """)
+ 
  def read_decimalnl_short(f):
      """
***************
*** 254,258 ****
      """

!     s = read_stringnl(f, decode=False)
      if s.endswith("L"):
          raise ValueError("trailing 'L' not allowed in %r" % s)
--- 345,349 ----
      """

!     s = read_stringnl(f, decode=False, stripquotes=False)
      if s.endswith("L"):
          raise ValueError("trailing 'L' not allowed in %r" % s)
***************
*** 289,293 ****
      """

!     s = read_stringnl(f, decode=False)
      if not s.endswith("L"):
          raise ValueError("trailing 'L' required in %r" % s)
--- 380,384 ----
      """

!     s = read_stringnl(f, decode=False, stripquotes=False)
      if not s.endswith("L"):
          raise ValueError("trailing 'L' required in %r" % s)
***************
*** 325,329 ****
      -1.25
      """
!     s = read_stringnl(f, decode=False)
      return float(s)

--- 416,420 ----
      -1.25
      """
!     s = read_stringnl(f, decode=False, stripquotes=False)
      return float(s)

***************
*** 429,432 ****
--- 520,533 ----
                doc="A Python float object.")

+ pystring = StackObject(
+                name='str',
+                obtype=str,
+                doc="A Python string object.")
+ 
+ pyunicode = StackObject(
+                 name='unicode',
+                 obtype=unicode,
+                 doc="A Python Unicode string object.")
+ 
  pynone = StackObject(
               name="None",
***************
*** 532,536 ****
  opcodes = [

!     # Six ways to spell integers.

      I(name='INT',
--- 633,637 ----
  opcodes = [

!     # Ways to spell integers.

      I(name='INT',
***************
*** 540,544 ****
        stack_after=[pyinteger_or_bool],
        proto=0,
!       doc="""Newline-terminated decimal integer literal.

        The intent may have been that this always fit in a short Python int,
--- 641,645 ----
        stack_after=[pyinteger_or_bool],
        proto=0,
!       doc="""Push an integer or bool.  The argument is a decimal literal string.

        The intent may have been that this always fit in a short Python int,
***************
*** 549,558 ****

        Another difference is due to that, when bool was introduced as a
!       distinct type in 2.3, builtin int singletons True and False were
!       also added to 2.2.2.  For compatibility in both directions, True gets
!       pickled as INT + "I01\\n", and False as INT + "I00\\n".  Leading zeroes
!       are never produced for a genuine integer.  The 2.3 (and later)
!       unpicklers special-case these and return bool instead; earlier
!       unpicklers ignore the leading "0" and return the int.
        """),

--- 650,659 ----

        Another difference is due to that, when bool was introduced as a
!       distinct type in 2.3, builtin names True and False were also added to
!       2.2.2, mapping to ints 1 and 0.  For compatibility in both directions,
!       True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
!       Leading zeroes are never produced for a genuine integer.  The 2.3
!       (and later)  unpicklers special-case these and return bool instead;
!       earlier unpicklers ignore the leading "0" and return the int.
        """),

***************
*** 563,567 ****
        stack_after=[pylong],
        proto=0,
!       doc="""Newline-terminated decimal integer literal.

        The same as INT, except that the literal ends with 'L', and always
--- 664,668 ----
        stack_after=[pylong],
        proto=0,
!       doc="""Push a long integer.  The argument is a decimal literal string.

        The same as INT, except that the literal ends with 'L', and always
***************
*** 576,580 ****
        stack_after=[pyint],
        proto=1,
!       doc="""Four-byte signed integer.

        This handles the full range of Python (short) integers on a 32-bit
--- 677,681 ----
        stack_after=[pyint],
        proto=1,
!       doc="""Push a four-byte signed integer.

        This handles the full range of Python (short) integers on a 32-bit
***************
*** 590,594 ****
        stack_after=[pyint],
        proto=1,
!       doc="""One-byte unsigned integer.

        This is a space optimization for pickling very small non-negative ints,
--- 691,695 ----
        stack_after=[pyint],
        proto=1,
!       doc="""Push a one-byte unsigned integer.

        This is a space optimization for pickling very small non-negative ints,
***************
*** 602,606 ****
        stack_after=[pyint],
        proto=1,
!       doc="""Two-byte unsigned integer.

        This is a space optimization for pickling small positive ints, in
--- 703,707 ----
        stack_after=[pyint],
        proto=1,
!       doc="""Push a two-byte unsigned integer.

        This is a space optimization for pickling small positive ints, in
***************
*** 608,612 ****
        """),

!     # Two ways to spell floats.

      I(name='FLOAT',
--- 709,713 ----
        """),

!     # Ways to spell floats.

      I(name='FLOAT',
***************
*** 646,649 ****
--- 747,791 ----
        """),

+     # Ways to spell strings (8-bit, not Unicode).
+ 
+     I(name='STRING',
+       code='S',
+       args=[stringnl],
+       stack_before=[],
+       stack_after=[pystring],
+       proto=0,
+       doc="""Push a Python string object.
+ 
+       The argument is a repr-style string, with bracketing quote characters,
+       and perhaps embedded escapes.  The argument extends until the next
+       newline character.
+       """),
+ 
+     I(name='BINSTRING',
+       code='T',
+       args=[string4],
+       stack_before=[],
+       stack_after=[pystring],
+       proto=1,
+       doc="""Push a Python string object.
+ 
+       There are two arguments:  the first is a 4-byte little-endian signed int
+       giving the number of bytes in the string, and the second is that many
+       bytes, which are taken literally as the string content.
+       """),
+ 
+     I(name='SHORT_BINSTRING',
+       code='U',
+       args=[string1],
+       stack_before=[],
+       stack_after=[pystring],
+       proto=1,
+       doc="""Push a Python string object.
+ 
+       There are two arguments:  the first is a 1-byte unsigned int giving
+       the number of bytes in the string, and the second is that many bytes,
+       which are taken literally as the string content.
+       """),
+ 
      # A way to spell None.

***************
*** 737,773 ****
      I(name='REDUCE',
        code='R',
-       args=[],
-       stack_before=[],
-       stack_after=[],
-       proto=0,
-       doc="""XXX One-line description goes here.
- 
-       XXX Doc body goes here.
-       """),
- 
-     I(name='STRING',
-       code='S',
-       args=[],
-       stack_before=[],
-       stack_after=[],
-       proto=0,
-       doc="""XXX One-line description goes here.
- 
-       XXX Doc body goes here.
-       """),
- 
-     I(name='BINSTRING',
-       code='T',
-       args=[],
-       stack_before=[],
-       stack_after=[],
-       proto=0,
-       doc="""XXX One-line description goes here.
- 
-       XXX Doc body goes here.
-       """),
- 
-     I(name='SHORT_BINSTRING',
-       code='U',
        args=[],
        stack_before=[],
--- 879,882 ----