[pypy-svn] r13127 - in pypy/dist/pypy: objspace/flow translator/pickle

tismer at codespeak.net tismer at codespeak.net
Mon Jun 6 23:08:11 CEST 2005


Author: tismer
Date: Mon Jun  6 23:08:09 2005
New Revision: 13127

Added:
   pypy/dist/pypy/translator/pickle/loader.py   (contents, props changed)
   pypy/dist/pypy/translator/pickle/main.py   (contents, props changed)
   pypy/dist/pypy/translator/pickle/writer.py   (contents, props changed)
Modified:
   pypy/dist/pypy/objspace/flow/model.py
   pypy/dist/pypy/translator/pickle/genpickle.py
Log:
changed a few things to make the generated source smaller.
It is now about 37 MB.

Added writers and loaders for plain text and zipfile.

todo:

- write main, deciding which file format to use
- integrate with translate_pypy (will use main.load and main.save)
- write tests  (eek, how do I test such stuff, any ideas?)


Modified: pypy/dist/pypy/objspace/flow/model.py
==============================================================================
--- pypy/dist/pypy/objspace/flow/model.py	(original)
+++ pypy/dist/pypy/objspace/flow/model.py	Mon Jun  6 23:08:09 2005
@@ -293,7 +293,13 @@
         return "%r = %s(%s)" % (self.result, self.opname, ", ".join(map(repr, self.args)))
 
     def __reduce_ex__(self, *args):
-        return SpaceOperation, (self.opname, self.args, self.result, self.offset)
+        # avoid lots of useless list entities
+        return _sop, (self.opname, self.result, self.offset) + tuple(self.args)
+    __reduce__ = __reduce_ex__
+
+# a small and efficient restorer
+def _sop(opname, result, offset, *args):
+    return SpaceOperation(opname, args, result, offset)
 
 class Atom:
     def __init__(self, name):

Modified: pypy/dist/pypy/translator/pickle/genpickle.py
==============================================================================
--- pypy/dist/pypy/translator/pickle/genpickle.py	(original)
+++ pypy/dist/pypy/translator/pickle/genpickle.py	Mon Jun  6 23:08:09 2005
@@ -3,6 +3,10 @@
 The purpose is to create something that allows
 to restart code generation after flowing and maybe
 annotation.
+
+The generated source appeared to be way too large
+for the CPython compiler. Therefore, we cut the
+source into pieces and compile them seperately.
 """
 from __future__ import generators, division, nested_scopes
 import __future__
@@ -17,11 +21,12 @@
 from pypy.objspace.flow.flowcontext import SpamBlock, EggBlock
 from pypy.annotation.model import SomeInteger, SomeObject, SomeChar, SomeBool
 from pypy.annotation.model import SomeList, SomeString, SomeTuple
+from pypy.annotation.unaryop import SomeInstance
 from pypy.interpreter.baseobjspace import ObjSpace
 from pypy.translator.pickle import slotted
 
 from pickle import whichmodule, PicklingError
-from copy_reg import _reduce_ex, _reconstructor
+from copy_reg import _reconstructor
 
 import pickle
 
@@ -33,7 +38,7 @@
 
 class GenPickle:
 
-    def __init__(self, translator, outfile = None):
+    def __init__(self, translator, writer = None):
         self.translator = translator
         self.initcode = [
             'from __future__ import %s\n' % ', '.join(all_feature_names) +
@@ -56,8 +61,6 @@
         self.namespace.make_reserved_names('None False True')
         self.namespace.make_reserved_names('new types sys')
         self.namespace.make_reserved_names(' '.join(all_feature_names))
-        self.inline_consts = True # save lots of space
-        self._nesting = 0 # for formatting nested tuples etc.
         # we distinguish between the "user program" and other stuff.
         # "user program" will never use save_global.
         self.domains = (
@@ -68,7 +71,7 @@
             '__main__',
             )
         self.shortnames = {
-            SpaceOperation: 'S',
+            SpaceOperation: 'SOP',
             Variable:       'V',
             Constant:       'C',
             Block:          'B',
@@ -83,9 +86,9 @@
             SomeList:       'SL',
             SomeString:     'SS',
             SomeTuple:      'ST',
+            SomeInstance:   'SIN',
             }
-        self.outfile = outfile
-        self._partition = 1234
+        self.writer = writer
 
     def nameof(self, obj, debug=None, namehint=None):
         key = Constant(obj)
@@ -125,21 +128,20 @@
                 self.picklenames[key] = name
             return name
 
-    def nameofargs(self, tup):
+    def nameofargs(self, tup, plain_tuple = False):
         """ a string with the nameofs, concatenated """
-        if len(tup) < 5:
-            # see if there is nesting to be expected
-            for each in tup:
-                if type(each) is tuple:
-                    break
-            else:
-                return ', '.join([self.nameof(arg) for arg in tup])
-        # we always wrap into multi-lines, this is simple and readable
-        self._nesting += 1
-        space = '  ' * self._nesting
-        ret = '\n' + space + (',\n' + space).join(
-            [self.nameof(arg) for arg in tup]) + ',\n' + space
-        self._nesting -= 1
+        # see if we can build a compact representation
+        for each in tup:
+            if type(each) is tuple and len(each) > 2:
+                break
+        else:
+            ret = ', '.join([self.nameof(arg) for arg in tup])
+            if plain_tuple and len(tup) == 1:
+                ret += ','
+            if len(ret) <= 90:
+                return ret
+        ret = '\n ' + (',\n ').join(
+            [self.nameof(arg) for arg in tup]) + ',\n '
         return ret
 
     def uniquename(self, basename):
@@ -184,27 +186,18 @@
     # the compiler folds the consts the same way as we do.
     # note that true pickling is more exact, here.
     nameof_long = nameof_float = nameof_bool = nameof_NoneType = nameof_int
-
-    def nameof_str(self, value):
-        if self.inline_consts:
-            return repr(value)
-        name = self.uniquename('gstr_' + value[:32])
-        self.initcode_python(name, repr(value))
-        return name
-
-    def nameof_unicode(self, value):
-        if self.inline_consts:
-            return repr(value)
-        name = self.uniquename('guni_' + str(value[:32]))
-        self.initcode_python(name, repr(value))
-        return name
+    nameof_str = nameof_unicode = nameof_int
 
     def skipped_function(self, func):
-        # debugging only!  Generates a placeholder for missing functions
+        # Generates a placeholder for missing functions
         # that raises an exception when called.
+        # The original code object is retained in an
+        # attribute '_skipped_code'
         name = self.uniquename('gskippedfunc_' + func.__name__)
+        codename = self.nameof(func.func_code)
         self.initcode.append('def %s(*a,**k):\n' 
                              '  raise NotImplementedError' % name)
+        self.initcode.append('%s._skipped_code = %s' % (name, codename) )
         return name
 
     def nameof_staticmethod(self, sm):
@@ -390,31 +383,23 @@
         return name
 
     def nameof_tuple(self, tup):
-        # instead of defining myriads of tuples, it seems to
-        # be cheaper to create them inline, although they don't
-        # get constant folded like strings and numbers.
-        if self.inline_consts:
-            argstr = self.nameofargs(tup)
-            if len(tup) == 1 and not argstr.rstrip().endswith(','):
-                argstr += ','
-            return '(%s)' % argstr
-        name = self.uniquename('g%dtuple' % len(tup))
-        args = [self.nameof(x) for x in tup]
-        args = ', '.join(args)
-        if args:
-            args += ','
-        self.initcode_python(name, '(%s)' % args)
+        chunk = 20
+        name = self.uniquename('T%d' % len(tup))
+        argstr = self.nameofargs(tup[:chunk], True)
+        self.initcode_python(name, '(%s)' % argstr)
+        for i in range(chunk, len(tup), chunk):
+            argstr = self.nameofargs(tup[i:i+chunk], True)
+            self.initcode.append('%s += (%s)' % (name, argstr) )
         return name
 
     def nameof_list(self, lis):
+        chunk = 20
         name = self.uniquename('L%d' % len(lis))
-        extend = self.nameof(_ex)
         def initlist():
             chunk = 20
             for i in range(0, len(lis), chunk):
-                items = lis[i:i+chunk]
-                itemstr = self.nameofargs(items)
-                yield '%s(%s, %s)' % (extend, name, itemstr)
+                argstr = self.nameofargs(lis[i:i+chunk])
+                yield '%s += [%s]' % (name, argstr)
         self.initcode_python(name, '[]')
         self.later(initlist())
         return name
@@ -488,9 +473,9 @@
                 yield '%s.__setstate__(%s)' % (name, args)
                 return
             elif type(restorestate) is tuple:
-                setstate = self.nameof(slotted.__setstate__)
-                args = self.nameof(restorestate)
-                yield '%s(%s, %s)' % (setstate, name, args)
+                setstate = self.nameof(_set)
+                argstr = self.nameofargs(restorestate)
+                yield '%s(%s, %s)' % (setstate, name, argstr)
                 return
             assert type(restorestate) is dict, (
                 "%s has no dict and no __setstate__" % name)
@@ -518,12 +503,14 @@
                     ' please update %s' % (cls.__name__, __name__) )
                 restorestate = slotted.__getstate__(instance)
                 restorer = _rec
-                restoreargs = klass, object, None
+                restoreargs = klass,
             else:
                 restorer = reduced[0]
+                restoreargs = reduced[1]
                 if restorer is _reconstructor:
                     restorer = _rec
-                restoreargs = reduced[1]
+                    if restoreargs[1:] == (object, None):
+                        restoreargs = restoreargs[:1]
                 if len(reduced) > 2:
                     restorestate = reduced[2]
                 else:
@@ -603,8 +590,6 @@
         args = (func.func_code, func.func_globals, func.func_name,
                 func.func_defaults, func.func_closure)
         pyfuncobj = self.uniquename('gfunc_' + func.__name__)
-        # touch code,to avoid extra indentation
-        self.nameof(func.func_code)
         self.initcode.append('%s = new.function(%s)' % (pyfuncobj,
                              self.nameofargs(args)) )
         if func.__dict__:
@@ -635,20 +620,6 @@
                 code.co_varnames, code.co_filename, code.co_name,
                 code.co_firstlineno, code.co_lnotab, code.co_freevars,
                 code.co_cellvars)
-        if not self.inline_consts:
-            # make the code, filename and lnotab strings nicer
-            codestr = code.co_code
-            codestrname = self.uniquename('gcodestr_' + code.co_name)
-            self.picklenames[Constant(codestr)] = codestrname
-            self.initcode.append('%s = %r' % (codestrname, codestr))
-            fnstr = code.co_filename
-            fnstrname = self.uniquename('gfname_' + code.co_name)
-            self.picklenames[Constant(fnstr)] = fnstrname
-            self.initcode.append('%s = %r' % (fnstrname, fnstr))
-            lnostr = code.co_lnotab
-            lnostrname = self.uniquename('glnotab_' + code.co_name)
-            self.picklenames[Constant(lnostr)] = lnostrname
-            self.initcode.append('%s = %r' % (lnostrname, lnostr))
         argstr = self.nameofargs(args)
         codeobj = self.uniquename('gcode_' + code.co_name)
         self.initcode.append('%s = new.code(%s)' % (codeobj, argstr))
@@ -672,34 +643,20 @@
     def later(self, gen):
         self.latercode.append((gen, self.debugstack))
 
-    def spill_source(self, final):
-        def write_block(lines):
-            if not lines:
-                return
-            txt = '\n'.join(lines)
-            print >> self.outfile, txt
-            print >> self.outfile, '## SECTION ##'
-
-        if not self.outfile:
-            return
-        chunk = self._partition
-        while len(self.initcode) >= chunk:
-            write_block(self.initcode[:chunk])
-            del self.initcode[:chunk]
-        if final and self.initcode:
-            write_block(self.initcode)
-            del self.initcode[:]
-
     def collect_initcode(self):
+        writer = self.writer
         while self.latercode:
             gen, self.debugstack = self.latercode.pop()
             #self.initcode.extend(gen) -- eats TypeError! bad CPython!
             for line in gen:
                 self.initcode.append(line)
             self.debugstack = ()
-            if len(self.initcode) >= self._partition:
-                self.spill_source(False)
-        self.spill_source(True)
+            if writer:
+                for line in self.initcode:
+                    writer.write(line)
+                del self.initcode[:]
+        if writer:
+            writer.close()
 
     def getfrozenbytecode(self):
         self.initcode.append('')
@@ -733,8 +690,8 @@
 
 # some shortcuts, to make the pickle smaller
 
-def _ex(lis, *args):
-    lis.extend(args)
+def _rec(klass, base=object, state=None):
+    return _reconstructor(klass, base, state)
 
-def _rec(*args):
-    return _reconstructor(*args)
+def _set(obj, *args):
+    slotted.__setstate__(obj, args)

Added: pypy/dist/pypy/translator/pickle/loader.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/pickle/loader.py	Mon Jun  6 23:08:09 2005
@@ -0,0 +1,50 @@
+import zipfile, marshal, md5
+
+class Loader:
+    def __init__(self, fname):
+        self.f = self.open_file(fname)
+
+    def open_file(self, fname):
+        raise SyntaxError, "implement open_file"
+
+    def next_block(self):
+        raise SyntaxError, "implement next_block"
+
+    def load(self):
+        dic = {}
+        for blk in self.next_block():
+            exec blk in dic
+        try:
+            return dic['ginst_Translator']
+        finally:
+            self.close()
+
+    def close(self):
+        self.f.close()
+
+
+class TextLoader(Loader):
+
+    def open_file(self, fname):
+        return file(fname)
+
+    def next_block(self):
+        data = self.f.read().split('## SECTION ##\n')
+        while data:
+            yield data.pop(0)
+    
+class ZipLoader(Loader):
+    """ load compiled code from a ZIP file """
+        
+    def open_file(self, fname):
+        return zipfile.ZipFile(fname, "r")
+
+    def next_block(self):
+        root = self.f.read('root')
+        dump = self.f.read(root)
+        assert md5.new(dump).hexdigest() == root, "broken checksum"
+        blocknames = marshal.loads(dump)
+        for name in blocknames:
+            dump = self.f.read(name)
+            assert md5.new(dump).hexdigest() == name, "broken checksum"
+            yield marshal.loads(dump)

Added: pypy/dist/pypy/translator/pickle/main.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/pickle/main.py	Mon Jun  6 23:08:09 2005
@@ -0,0 +1,5 @@
+def load(fname):
+    pass
+
+def save(translator, fname):
+    pass

Added: pypy/dist/pypy/translator/pickle/writer.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/pickle/writer.py	Mon Jun  6 23:08:09 2005
@@ -0,0 +1,64 @@
+import zipfile, marshal, md5
+
+class Writer:
+    def __init__(self, fname):
+        self.pieces = []
+        self.chunksize = 100000
+        self.count = 0
+        self.blocknum = 0
+        self.f = self.open_file(fname)
+
+    def open_file(self, fname):
+        raise SyntaxError, "implement open_file"
+
+    def write(self, text):
+        self.pieces.append(text)
+        self.count += len(text) + 1
+        if self.count >= self.chunksize:
+            src = '\n'.join(self.pieces)
+            del self.pieces[:]
+            self.count -= self.chunksize
+            self.putblock(src)
+            self.blocknum += 1
+
+    def close(self):
+        src = '\n'.join(self.pieces)
+        self.putblock(src)
+        self.finalize()
+        self.f.close()
+
+    def finalize(self):
+        pass
+
+
+class TextWriter(Writer):
+
+    def open_file(self, fname):
+        return file(fname, 'w')
+
+    def putblock(self, src):
+        print >> self.f, src
+        print >> self.f, '## SECTION ##'
+    
+class ZipWriter(Writer):
+    """ write compiled code to a ZIP file """
+
+    def __init__(self, fname):
+        Writer.__init__(self, fname)
+        self.blocknames = []
+        
+    def open_file(self, fname):
+        return zipfile.ZipFile(fname, "w", zipfile.ZIP_DEFLATED)
+
+    def putblock(self, src):
+        cod = compile(src, 'block_%d' % self.blocknum, 'exec')
+        dump = marshal.dumps(cod)
+        digest = md5.new(dump).hexdigest()
+        self.blocknames.append(digest)
+        self.f.writestr(digest, dump)
+
+    def finalize(self):
+        dump = marshal.dumps(self.blocknames)
+        digest = md5.new(dump).hexdigest()
+        self.f.writestr(digest, dump)
+        self.f.writestr('root', digest)



More information about the Pypy-commit mailing list