[pypy-commit] pypy numpy-record-dtypes: merged upstream
alex_gaynor
noreply at buildbot.pypy.org
Sun Mar 4 03:26:22 CET 2012
Author: Alex Gaynor <alex.gaynor at gmail.com>
Branch: numpy-record-dtypes
Changeset: r53161:070a10dbf7b0
Date: 2012-03-03 21:08 -0500
http://bitbucket.org/pypy/pypy/changeset/070a10dbf7b0/
Log: merged upstream
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -2,16 +2,95 @@
# One-liner implementation of cPickle
#
-from pickle import *
+from pickle import Pickler, dump, dumps, PickleError, PicklingError, UnpicklingError, _EmptyClass
from pickle import __doc__, __version__, format_version, compatible_formats
+from types import *
+from copy_reg import dispatch_table
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import marshal, struct, sys
try: from __pypy__ import builtinify
except ImportError: builtinify = lambda f: f
+# These are purely informational; no code uses these.
+format_version = "2.0" # File format version we write
+compatible_formats = ["1.0", # Original protocol 0
+ "1.1", # Protocol 0 with INST added
+ "1.2", # Original protocol 1
+ "1.3", # Protocol 1 with BINFLOAT added
+ "2.0", # Protocol 2
+ ] # Old format versions we can read
+
+# Keep in synch with cPickle. This is the highest protocol number we
+# know how to read.
+HIGHEST_PROTOCOL = 2
BadPickleGet = KeyError
UnpickleableError = PicklingError
+MARK = ord('(') # push special markobject on stack
+STOP = ord('.') # every pickle ends with STOP
+POP = ord('0') # discard topmost stack item
+POP_MARK = ord('1') # discard stack top through topmost markobject
+DUP = ord('2') # duplicate top stack item
+FLOAT = ord('F') # push float object; decimal string argument
+INT = ord('I') # push integer or bool; decimal string argument
+BININT = ord('J') # push four-byte signed int
+BININT1 = ord('K') # push 1-byte unsigned int
+LONG = ord('L') # push long; decimal string argument
+BININT2 = ord('M') # push 2-byte unsigned int
+NONE = ord('N') # push None
+PERSID = ord('P') # push persistent object; id is taken from string arg
+BINPERSID = ord('Q') # " " " ; " " " " stack
+REDUCE = ord('R') # apply callable to argtuple, both on stack
+STRING = ord('S') # push string; NL-terminated string argument
+BINSTRING = ord('T') # push string; counted binary string argument
+SHORT_BINSTRING = ord('U') # " " ; " " " " < 256 bytes
+UNICODE = ord('V') # push Unicode string; raw-unicode-escaped'd argument
+BINUNICODE = ord('X') # " " " ; counted UTF-8 string argument
+APPEND = ord('a') # append stack top to list below it
+BUILD = ord('b') # call __setstate__ or __dict__.update()
+GLOBAL = ord('c') # push self.find_class(modname, name); 2 string args
+DICT = ord('d') # build a dict from stack items
+EMPTY_DICT = ord('}') # push empty dict
+APPENDS = ord('e') # extend list on stack by topmost stack slice
+GET = ord('g') # push item from memo on stack; index is string arg
+BINGET = ord('h') # " " " " " " ; " " 1-byte arg
+INST = ord('i') # build & push class instance
+LONG_BINGET = ord('j') # push item from memo on stack; index is 4-byte arg
+LIST = ord('l') # build list from topmost stack items
+EMPTY_LIST = ord(']') # push empty list
+OBJ = ord('o') # build & push class instance
+PUT = ord('p') # store stack top in memo; index is string arg
+BINPUT = ord('q') # " " " " " ; " " 1-byte arg
+LONG_BINPUT = ord('r') # " " " " " ; " " 4-byte arg
+SETITEM = ord('s') # add key+value pair to dict
+TUPLE = ord('t') # build tuple from topmost stack items
+EMPTY_TUPLE = ord(')') # push empty tuple
+SETITEMS = ord('u') # modify dict by adding topmost key+value pairs
+BINFLOAT = ord('G') # push float; arg is 8-byte float encoding
+
+TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
+FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
+
+# Protocol 2
+
+PROTO = ord('\x80') # identify pickle protocol
+NEWOBJ = ord('\x81') # build object by applying cls.__new__ to argtuple
+EXT1 = ord('\x82') # push object from extension registry; 1-byte index
+EXT2 = ord('\x83') # ditto, but 2-byte index
+EXT4 = ord('\x84') # ditto, but 4-byte index
+TUPLE1 = ord('\x85') # build 1-tuple from stack top
+TUPLE2 = ord('\x86') # build 2-tuple from two topmost stack items
+TUPLE3 = ord('\x87') # build 3-tuple from three topmost stack items
+NEWTRUE = ord('\x88') # push True
+NEWFALSE = ord('\x89') # push False
+LONG1 = ord('\x8a') # push long from < 256 bytes
+LONG4 = ord('\x8b') # push really big long
+
+_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+
+
# ____________________________________________________________
# XXX some temporary dark magic to produce pickled dumps that are
# closer to the ones produced by cPickle in CPython
@@ -44,3 +123,474 @@
file = StringIO()
Pickler(file, protocol).dump(obj)
return file.getvalue()
+
+# Why use struct.pack() for pickling but marshal.loads() for
+# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
+# marshal.loads() is twice as fast as struct.unpack()!
+mloads = marshal.loads
+
+# Unpickling machinery
+
+class Unpickler(object):
+
+ def __init__(self, file):
+ """This takes a file-like object for reading a pickle data stream.
+
+ The protocol version of the pickle is detected automatically, so no
+ proto argument is needed.
+
+ The file-like object must have two methods, a read() method that
+ takes an integer argument, and a readline() method that requires no
+ arguments. Both methods should return a string. Thus file-like
+ object can be a file object opened for reading, a StringIO object,
+ or any other custom object that meets this interface.
+ """
+ self.readline = file.readline
+ self.read = file.read
+ self.memo = {}
+
+ def load(self):
+ """Read a pickled object representation from the open file.
+
+ Return the reconstituted object hierarchy specified in the file.
+ """
+ self.mark = object() # any new unique object
+ self.stack = []
+ self.append = self.stack.append
+ try:
+ key = ord(self.read(1))
+ while key != STOP:
+ self.dispatch[key](self)
+ key = ord(self.read(1))
+ except TypeError:
+ if self.read(1) == '':
+ raise EOFError
+ raise
+ return self.stack.pop()
+
+ # Return largest index k such that self.stack[k] is self.mark.
+ # If the stack doesn't contain a mark, eventually raises IndexError.
+ # This could be sped by maintaining another stack, of indices at which
+ # the mark appears. For that matter, the latter stack would suffice,
+ # and we wouldn't need to push mark objects on self.stack at all.
+ # Doing so is probably a good thing, though, since if the pickle is
+ # corrupt (or hostile) we may get a clue from finding self.mark embedded
+ # in unpickled objects.
+ def marker(self):
+ k = len(self.stack)-1
+ while self.stack[k] is not self.mark: k -= 1
+ return k
+
+ dispatch = {}
+
+ def load_proto(self):
+ proto = ord(self.read(1))
+ if not 0 <= proto <= 2:
+ raise ValueError, "unsupported pickle protocol: %d" % proto
+ dispatch[PROTO] = load_proto
+
+ def load_persid(self):
+ pid = self.readline()[:-1]
+ self.append(self.persistent_load(pid))
+ dispatch[PERSID] = load_persid
+
+ def load_binpersid(self):
+ pid = self.stack.pop()
+ self.append(self.persistent_load(pid))
+ dispatch[BINPERSID] = load_binpersid
+
+ def load_none(self):
+ self.append(None)
+ dispatch[NONE] = load_none
+
+ def load_false(self):
+ self.append(False)
+ dispatch[NEWFALSE] = load_false
+
+ def load_true(self):
+ self.append(True)
+ dispatch[NEWTRUE] = load_true
+
+ def load_int(self):
+ data = self.readline()
+ if data == FALSE[1:]:
+ val = False
+ elif data == TRUE[1:]:
+ val = True
+ else:
+ try:
+ val = int(data)
+ except ValueError:
+ val = long(data)
+ self.append(val)
+ dispatch[INT] = load_int
+
+ def load_binint(self):
+ self.append(mloads('i' + self.read(4)))
+ dispatch[BININT] = load_binint
+
+ def load_binint1(self):
+ self.append(ord(self.read(1)))
+ dispatch[BININT1] = load_binint1
+
+ def load_binint2(self):
+ self.append(mloads('i' + self.read(2) + '\000\000'))
+ dispatch[BININT2] = load_binint2
+
+ def load_long(self):
+ self.append(long(self.readline()[:-1], 0))
+ dispatch[LONG] = load_long
+
+ def load_long1(self):
+ n = ord(self.read(1))
+ bytes = self.read(n)
+ self.append(decode_long(bytes))
+ dispatch[LONG1] = load_long1
+
+ def load_long4(self):
+ n = mloads('i' + self.read(4))
+ bytes = self.read(n)
+ self.append(decode_long(bytes))
+ dispatch[LONG4] = load_long4
+
+ def load_float(self):
+ self.append(float(self.readline()[:-1]))
+ dispatch[FLOAT] = load_float
+
+ def load_binfloat(self, unpack=struct.unpack):
+ self.append(unpack('>d', self.read(8))[0])
+ dispatch[BINFLOAT] = load_binfloat
+
+ def load_string(self):
+ rep = self.readline()
+ if len(rep) < 3:
+ raise ValueError, "insecure string pickle"
+ if rep[0] == "'" == rep[-2]:
+ rep = rep[1:-2]
+ elif rep[0] == '"' == rep[-2]:
+ rep = rep[1:-2]
+ else:
+ raise ValueError, "insecure string pickle"
+ self.append(rep.decode("string-escape"))
+ dispatch[STRING] = load_string
+
+ def load_binstring(self):
+ L = mloads('i' + self.read(4))
+ self.append(self.read(L))
+ dispatch[BINSTRING] = load_binstring
+
+ def load_unicode(self):
+ self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
+ dispatch[UNICODE] = load_unicode
+
+ def load_binunicode(self):
+ L = mloads('i' + self.read(4))
+ self.append(unicode(self.read(L),'utf-8'))
+ dispatch[BINUNICODE] = load_binunicode
+
+ def load_short_binstring(self):
+ L = ord(self.read(1))
+ self.append(self.read(L))
+ dispatch[SHORT_BINSTRING] = load_short_binstring
+
+ def load_tuple(self):
+ k = self.marker()
+ self.stack[k:] = [tuple(self.stack[k+1:])]
+ dispatch[TUPLE] = load_tuple
+
+ def load_empty_tuple(self):
+ self.stack.append(())
+ dispatch[EMPTY_TUPLE] = load_empty_tuple
+
+ def load_tuple1(self):
+ self.stack[-1] = (self.stack[-1],)
+ dispatch[TUPLE1] = load_tuple1
+
+ def load_tuple2(self):
+ self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
+ dispatch[TUPLE2] = load_tuple2
+
+ def load_tuple3(self):
+ self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
+ dispatch[TUPLE3] = load_tuple3
+
+ def load_empty_list(self):
+ self.stack.append([])
+ dispatch[EMPTY_LIST] = load_empty_list
+
+ def load_empty_dictionary(self):
+ self.stack.append({})
+ dispatch[EMPTY_DICT] = load_empty_dictionary
+
+ def load_list(self):
+ k = self.marker()
+ self.stack[k:] = [self.stack[k+1:]]
+ dispatch[LIST] = load_list
+
+ def load_dict(self):
+ k = self.marker()
+ d = {}
+ items = self.stack[k+1:]
+ for i in range(0, len(items), 2):
+ key = items[i]
+ value = items[i+1]
+ d[key] = value
+ self.stack[k:] = [d]
+ dispatch[DICT] = load_dict
+
+ # INST and OBJ differ only in how they get a class object. It's not
+ # only sensible to do the rest in a common routine, the two routines
+ # previously diverged and grew different bugs.
+ # klass is the class to instantiate, and k points to the topmost mark
+ # object, following which are the arguments for klass.__init__.
+ def _instantiate(self, klass, k):
+ args = tuple(self.stack[k+1:])
+ del self.stack[k:]
+ instantiated = 0
+ if (not args and
+ type(klass) is ClassType and
+ not hasattr(klass, "__getinitargs__")):
+ try:
+ value = _EmptyClass()
+ value.__class__ = klass
+ instantiated = 1
+ except RuntimeError:
+ # In restricted execution, assignment to inst.__class__ is
+ # prohibited
+ pass
+ if not instantiated:
+ try:
+ value = klass(*args)
+ except TypeError, err:
+ raise TypeError, "in constructor for %s: %s" % (
+ klass.__name__, str(err)), sys.exc_info()[2]
+ self.append(value)
+
+ def load_inst(self):
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ klass = self.find_class(module, name)
+ self._instantiate(klass, self.marker())
+ dispatch[INST] = load_inst
+
+ def load_obj(self):
+ # Stack is ... markobject classobject arg1 arg2 ...
+ k = self.marker()
+ klass = self.stack.pop(k+1)
+ self._instantiate(klass, k)
+ dispatch[OBJ] = load_obj
+
+ def load_newobj(self):
+ args = self.stack.pop()
+ cls = self.stack[-1]
+ obj = cls.__new__(cls, *args)
+ self.stack[-1] = obj
+ dispatch[NEWOBJ] = load_newobj
+
+ def load_global(self):
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ klass = self.find_class(module, name)
+ self.append(klass)
+ dispatch[GLOBAL] = load_global
+
+ def load_ext1(self):
+ code = ord(self.read(1))
+ self.get_extension(code)
+ dispatch[EXT1] = load_ext1
+
+ def load_ext2(self):
+ code = mloads('i' + self.read(2) + '\000\000')
+ self.get_extension(code)
+ dispatch[EXT2] = load_ext2
+
+ def load_ext4(self):
+ code = mloads('i' + self.read(4))
+ self.get_extension(code)
+ dispatch[EXT4] = load_ext4
+
+ def get_extension(self, code):
+ nil = []
+ obj = _extension_cache.get(code, nil)
+ if obj is not nil:
+ self.append(obj)
+ return
+ key = _inverted_registry.get(code)
+ if not key:
+ raise ValueError("unregistered extension code %d" % code)
+ obj = self.find_class(*key)
+ _extension_cache[code] = obj
+ self.append(obj)
+
+ def find_class(self, module, name):
+ # Subclasses may override this
+ __import__(module)
+ mod = sys.modules[module]
+ klass = getattr(mod, name)
+ return klass
+
+ def load_reduce(self):
+ args = self.stack.pop()
+ func = self.stack[-1]
+ value = self.stack[-1](*args)
+ self.stack[-1] = value
+ dispatch[REDUCE] = load_reduce
+
+ def load_pop(self):
+ del self.stack[-1]
+ dispatch[POP] = load_pop
+
+ def load_pop_mark(self):
+ k = self.marker()
+ del self.stack[k:]
+ dispatch[POP_MARK] = load_pop_mark
+
+ def load_dup(self):
+ self.append(self.stack[-1])
+ dispatch[DUP] = load_dup
+
+ def load_get(self):
+ self.append(self.memo[self.readline()[:-1]])
+ dispatch[GET] = load_get
+
+ def load_binget(self):
+ i = ord(self.read(1))
+ self.append(self.memo[repr(i)])
+ dispatch[BINGET] = load_binget
+
+ def load_long_binget(self):
+ i = mloads('i' + self.read(4))
+ self.append(self.memo[repr(i)])
+ dispatch[LONG_BINGET] = load_long_binget
+
+ def load_put(self):
+ self.memo[self.readline()[:-1]] = self.stack[-1]
+ dispatch[PUT] = load_put
+
+ def load_binput(self):
+ i = ord(self.read(1))
+ self.memo[repr(i)] = self.stack[-1]
+ dispatch[BINPUT] = load_binput
+
+ def load_long_binput(self):
+ i = mloads('i' + self.read(4))
+ self.memo[repr(i)] = self.stack[-1]
+ dispatch[LONG_BINPUT] = load_long_binput
+
+ def load_append(self):
+ value = self.stack.pop()
+ self.stack[-1].append(value)
+ dispatch[APPEND] = load_append
+
+ def load_appends(self):
+ stack = self.stack
+ mark = self.marker()
+ lst = stack[mark - 1]
+ lst.extend(stack[mark + 1:])
+ del stack[mark:]
+ dispatch[APPENDS] = load_appends
+
+ def load_setitem(self):
+ stack = self.stack
+ value = stack.pop()
+ key = stack.pop()
+ dict = stack[-1]
+ dict[key] = value
+ dispatch[SETITEM] = load_setitem
+
+ def load_setitems(self):
+ stack = self.stack
+ mark = self.marker()
+ dict = stack[mark - 1]
+ for i in range(mark + 1, len(stack), 2):
+ dict[stack[i]] = stack[i + 1]
+
+ del stack[mark:]
+ dispatch[SETITEMS] = load_setitems
+
+ def load_build(self):
+ stack = self.stack
+ state = stack.pop()
+ inst = stack[-1]
+ setstate = getattr(inst, "__setstate__", None)
+ if setstate:
+ setstate(state)
+ return
+ slotstate = None
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ if state:
+ try:
+ d = inst.__dict__
+ try:
+ for k, v in state.iteritems():
+ d[intern(k)] = v
+ # keys in state don't have to be strings
+ # don't blow up, but don't go out of our way
+ except TypeError:
+ d.update(state)
+
+ except RuntimeError:
+ # XXX In restricted execution, the instance's __dict__
+ # is not accessible. Use the old way of unpickling
+ # the instance variables. This is a semantic
+ # difference when unpickling in restricted
+ # vs. unrestricted modes.
+ # Note, however, that cPickle has never tried to do the
+ # .update() business, and always uses
+ # PyObject_SetItem(inst.__dict__, key, value) in a
+ # loop over state.items().
+ for k, v in state.items():
+ setattr(inst, k, v)
+ if slotstate:
+ for k, v in slotstate.items():
+ setattr(inst, k, v)
+ dispatch[BUILD] = load_build
+
+ def load_mark(self):
+ self.append(self.mark)
+ dispatch[MARK] = load_mark
+
+#from pickle import decode_long
+
+def decode_long(data):
+ r"""Decode a long from a two's complement little-endian binary string.
+
+ >>> decode_long('')
+ 0L
+ >>> decode_long("\xff\x00")
+ 255L
+ >>> decode_long("\xff\x7f")
+ 32767L
+ >>> decode_long("\x00\xff")
+ -256L
+ >>> decode_long("\x00\x80")
+ -32768L
+ >>> decode_long("\x80")
+ -128L
+ >>> decode_long("\x7f")
+ 127L
+ """
+
+ nbytes = len(data)
+ if nbytes == 0:
+ return 0L
+ ind = nbytes - 1
+ while ind and ord(data[ind]) == 0:
+ ind -= 1
+ n = ord(data[ind])
+ while ind:
+ n <<= 8
+ ind -= 1
+ if ord(data[ind]):
+ n += ord(data[ind])
+ if ord(data[nbytes - 1]) >= 128:
+ n -= 1L << (nbytes << 3)
+ return n
+
+def load(f):
+ return Unpickler(f).load()
+
+def loads(str):
+ f = StringIO(str)
+ return Unpickler(f).load()
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1032,8 +1032,8 @@
def __setstate(self, string):
if len(string) != 4 or not (1 <= ord(string[2]) <= 12):
raise TypeError("not enough arguments")
- yhi, ylo, self._month, self._day = map(ord, string)
- self._year = yhi * 256 + ylo
+ self._month, self._day = ord(string[2]), ord(string[3])
+ self._year = ord(string[0]) * 256 + ord(string[1])
def __reduce__(self):
return (self.__class__, self._getstate())
@@ -1421,9 +1421,10 @@
def __setstate(self, string, tzinfo):
if len(string) != 6 or ord(string[0]) >= 24:
raise TypeError("an integer is required")
- self._hour, self._minute, self._second, us1, us2, us3 = \
- map(ord, string)
- self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ self._hour, self._minute, self._second = ord(string[0]), \
+ ord(string[1]), ord(string[2])
+ self._microsecond = (((ord(string[3]) << 8) | \
+ ord(string[4])) << 8) | ord(string[5])
self._tzinfo = tzinfo
def __reduce__(self):
@@ -1903,10 +1904,11 @@
return (basestate, self._tzinfo)
def __setstate(self, string, tzinfo):
- (yhi, ylo, self._month, self._day, self._hour,
- self._minute, self._second, us1, us2, us3) = map(ord, string)
- self._year = yhi * 256 + ylo
- self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ (self._month, self._day, self._hour, self._minute,
+ self._second) = (ord(string[2]), ord(string[3]), ord(string[4]),
+ ord(string[5]), ord(string[6]))
+ self._year = ord(string[0]) * 256 + ord(string[1])
+ self._microsecond = (((ord(string[7]) << 8) | ord(string[8])) << 8) | ord(string[9])
self._tzinfo = tzinfo
def __reduce__(self):
diff --git a/pypy/module/select/test/test_ztranslation.py b/pypy/module/select/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/test/test_ztranslation.py
@@ -0,0 +1,5 @@
+
+from pypy.objspace.fake.checkmodule import checkmodule
+
+def test_select_translates():
+ checkmodule('select')
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -62,6 +62,14 @@
@jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
@enforceargs(None, None, int, int, int)
def copy_string_contents(src, dst, srcstart, dststart, length):
+ """Copies 'length' characters from the 'src' string to the 'dst'
+ string, starting at position 'srcstart' and 'dststart'."""
+ # xxx Warning: don't try to do this at home. It relies on a lot
+ # of details to be sure that it works correctly in all cases.
+ # Notably: no GC operation at all from the first cast_ptr_to_adr()
+ # because it might move the strings. The keepalive_until_here()
+ # are obscurely essential to make sure that the strings stay alive
+ # longer than the raw_memcopy().
assert srcstart >= 0
assert dststart >= 0
assert length >= 0
More information about the pypy-commit
mailing list