[pypy-svn] r12988 - in pypy/dist/pypy: objspace/flow translator/pickle

tismer at codespeak.net tismer at codespeak.net
Wed Jun 1 21:52:12 CEST 2005


Author: tismer
Date: Wed Jun  1 21:52:12 2005
New Revision: 12988

Modified:
   pypy/dist/pypy/objspace/flow/model.py
   pypy/dist/pypy/translator/pickle/genpickle.py
Log:
pickling works in principle.

todo:

- add something to translator that reduces what we save

- add tests  (makes only sense after the monster fits into memory)

Big problem:
I generate a 50 MB python file. When I try to import this, python
grows toalmost 1.6 GB, swaps itself to death, and finally crashes
with an error that I cannot debug since my space is filled.
No idea why it crashes, maybe the code object is just too large.
The crash happens, *before* a .pyc file is created!


Modified: pypy/dist/pypy/objspace/flow/model.py
==============================================================================
--- pypy/dist/pypy/objspace/flow/model.py	(original)
+++ pypy/dist/pypy/objspace/flow/model.py	Wed Jun  1 21:52:12 2005
@@ -8,7 +8,7 @@
 
 """
     memory size before and after introduction of __slots__
-    using targetpypymain
+    using targetpypymain with -no-c
 
     slottified          annotation  ann+genc
     -------------------------------------------
@@ -26,9 +26,28 @@
     Probably an effect of less fragmentation.
 """
 
+COUNTOBJECTS = False
+
 __metaclass__ = type
 
-class FunctionGraph:
+class Missing:
+    pass
+
+class Slotted:
+    __slots__ = []
+    from copy_reg import _slotnames
+    _slotnames = classmethod(_slotnames)
+    def __getstate__(self):
+        names = self._slotnames()
+        return tuple([getattr(self, name, Missing) for name in names])
+    def __setstate__(self, args):
+        names = self._slotnames()
+        [setattr(self, name, value) for name, value in zip(names, args)
+         if value is not Missing]
+        
+class FunctionGraph(Slotted):
+    __slots__ = """func source name startblock returnblock exceptblock""".split()
+    
     def __init__(self, name, startblock, return_var=None):
         self.name        = name    # function name (possibly mangled already)
         self.startblock  = startblock
@@ -69,7 +88,7 @@
         from pypy.translator.tool.graphpage import SingleGraphPage
         SingleGraphPage(self).display()
 
-class Link:
+class Link(Slotted):
 
     __slots__ = """args target exitcase prevblock
                 last_exception last_exc_value""".split()
@@ -110,7 +129,7 @@
     def __repr__(self):
         return "link from %s to %s" % (str(self.prevblock), str(self.target))
 
-class Block:
+class Block(Slotted):
     __slots__ = """isstartblock inputargs operations exitswitch
                 exits exc_handler""".split()
     
@@ -180,12 +199,13 @@
         self.exits = exits
 
 
-class Variable:
+class Variable(Slotted):
     __slots__ = ["_name", "concretetype"]
-    
+
     countall = 0
-    countmax = 0
-    countcurr = 0
+    if COUNTOBJECTS:
+        countmax = 0
+        countcurr = 0
 
     def name(self):
         name = self._name
@@ -201,13 +221,15 @@
     def __init__(self, name=None):
         self._name = Variable.countall
         Variable.countall += 1
-        Variable.countcurr += 1
-        Variable.countmax = max(Variable.countmax, Variable.countcurr)
+        if COUNTOBJECTS:
+            Variable.countcurr += 1
+            Variable.countmax = max(Variable.countmax, Variable.countcurr)
         if name is not None:
             self.rename(name)
 
-    def __del__(self):
-        Variable.countcurr -= 1
+    if COUNTOBJECTS:
+        def __del__(self):
+            Variable.countcurr -= 1
 
     def __repr__(self):
         return '%s' % self.name
@@ -228,18 +250,18 @@
         self._name = name + '_' + self.name[1:]
 
 
-class Constant(Hashable):
+class Constant(Hashable, Slotted):
     __slots__ = ["concretetype"]
 
 
-class SpaceOperation:
+class SpaceOperation(Slotted):
     __slots__ = "opname args result offset".split()
-    
-    def __init__(self, opname, args, result):
+
+    def __init__(self, opname, args, result, offset=-1):
         self.opname = opname      # operation name
         self.args   = list(args)  # mixed list of var/const
         self.result = result      # either Variable or Constant instance
-        self.offset = -1          # offset in code string, to be added later
+        self.offset = offset      # offset in code string
 
     def __eq__(self, other):
         return (self.__class__ is other.__class__ and 
@@ -256,11 +278,12 @@
     def __repr__(self):
         return "%r = %s(%s)" % (self.result, self.opname, ", ".join(map(repr, self.args)))
 
-class Atom(object):
+class Atom:
     def __init__(self, name):
         self.name = name
     def __repr__(self):
         return self.name
+
 last_exception = Atom('last_exception')
 # if Block().exitswitch == Constant(last_exception), it means that we are
 # interested in catching the exception that the *last operation* of the

Modified: pypy/dist/pypy/translator/pickle/genpickle.py
==============================================================================
--- pypy/dist/pypy/translator/pickle/genpickle.py	(original)
+++ pypy/dist/pypy/translator/pickle/genpickle.py	Wed Jun  1 21:52:12 2005
@@ -17,6 +17,7 @@
 import pickle
 
 from types import *
+import types
 
 # ____________________________________________________________
 
@@ -90,15 +91,10 @@
         return name
 
     def nameof_module(self, value):
-        assert value is os or not hasattr(value, "__file__") or \
-               not (value.__file__.endswith('.pyc') or
-                    value.__file__.endswith('.py') or
-                    value.__file__.endswith('.pyo')), \
-               "%r is not a builtin module (probably :)"%value
+        # all allowed here, we reproduce ourselves
         name = self.uniquename('mod%s'%value.__name__)
         self.initcode_python(name, "__import__(%r)" % (value.__name__,))
         return name
-        
 
     def nameof_int(self, value):
         return repr(value)
@@ -156,6 +152,7 @@
                 func, ob, typ))
             return name
 
+    # old version:
     def should_translate_attr(self, pbc, attr):
         ann = self.translator.annotator
         if ann is None or isinstance(pbc, ObjSpace):
@@ -169,16 +166,27 @@
             return True
         return False
 
+    # new version: save if we don't know
+    def should_translate_attr(self, pbc, attr):
+        ann = self.translator.annotator
+        if ann:
+            classdef = ann.getuserclasses().get(pbc.__class__)
+        else:
+            classdef = None
+        ignore = getattr(pbc.__class__, 'NOT_RPYTHON_ATTRIBUTES', [])
+        if attr in ignore:
+            return False
+        if classdef:
+            return classdef.about_attribute(attr) is not None
+        # by default, render if we don't know anything
+        return True
+
     def nameof_builtin_function_or_method(self, func):
         if func.__self__ is None:
             # builtin function
             # where does it come from? Python2.2 doesn't have func.__module__
             for modname, module in sys.modules.items():
-                if hasattr(module, '__file__'):
-                    if (module.__file__.endswith('.py') or
-                        module.__file__.endswith('.pyc') or
-                        module.__file__.endswith('.pyo')):
-                        continue    # skip non-builtin modules
+                # here we don't ignore extension modules
                 if func is getattr(module, func.__name__, None):
                     break
             else:
@@ -280,6 +288,8 @@
         type(type.__dict__['__dict__']): "type(type.__dict__['__dict__'])",
         # type 'member_descriptor':
         type(type.__dict__['__basicsize__']): "type(type.__dict__['__basicsize__'])",
+        # type 'instancemethod':
+        type(Exception().__init__): 'type(Exception().__init__)',
         }
 
     def nameof_type(self, cls):
@@ -288,6 +298,13 @@
         name = self.uniquename('gtype_%s' % cls.__name__)
         if getattr(__builtin__, cls.__name__, None) is cls:
             expr = cls.__name__    # type available from __builtin__
+        elif cls in types.__dict__.values():
+            for key, value in types.__dict__.items():
+                if value is cls:
+                    break
+            self.initcode.append('from types import %s as %s' % (
+                key, name))
+            return name
         else:
             expr = self.typename_mapping[cls]
         self.initcode_python(name, expr)
@@ -318,7 +335,7 @@
             try:
                 __import__(module)
                 mod = sys.modules[module]
-            except (ImportError, KeyError):
+            except (ImportError, KeyError, TypeError):
                 pass
             else:
                 if dic is mod.__dict__:
@@ -363,7 +380,14 @@
             base_class = None
             base = cls
         def initinstance():
-            content = instance.__dict__.items()
+            if hasattr(instance, '__setstate__'):
+                # the instance knows what to do
+                args = self.nameof(restorestate)
+                yield '%s.__setstate__(%s)' % (name, args)
+                return
+            assert type(restorestate) is dict, (
+                "%s has no dict and no __setstate__" % name)
+            content = restorestate.items()
             content.sort()
             for key, value in content:
                 if self.should_translate_attr(instance, key):
@@ -374,18 +398,23 @@
             restorer = reduced[0]
             restorename = self.save_global(restorer)
             restoreargs = reduced[1]
+            if len(reduced) > 2:
+                restorestate = reduced[2]
+            else:
+                restorestate = None
             # ignore possible dict, handled later by initinstance filtering
             # in other cases, we expect that the class knows what to pickle.
         else:
             restoreargs = (base, cls)
             restorename = '%s.__new__' % base
+            restorestate = instance.__dict__
         restoreargsname = self.nameof(restoreargs)
-        if isinstance(cls, type):
+        if isinstance(klass, type):
             self.initcode.append('%s = %s(*%s)' % (name, restorename,
                                                    restoreargsname))
         else:
             self.initcode.append('%s = new.instance(%s)' % (name, cls))
-        if hasattr(instance, '__dict__'):
+        if restorestate is not None:
             self.later(initinstance())
         return name
 
@@ -461,7 +490,7 @@
         self.initcode.append('%s = %r' % (lnostrname, lnostr))
         argobj = self.nameof(args)
         codeobj = self.uniquename('gcode_' + code.co_name)
-        self.initcode.append('%s = new.code(%s)' % (codeobj, argobj))
+        self.initcode.append('%s = new.code(*%s)' % (codeobj, argobj))
         return codeobj
 
     def nameof_file(self, fil):



More information about the Pypy-commit mailing list