[pypy-svn] r13127 - in pypy/dist/pypy: objspace/flow translator/pickle
tismer at codespeak.net
tismer at codespeak.net
Mon Jun 6 23:08:11 CEST 2005
Author: tismer
Date: Mon Jun 6 23:08:09 2005
New Revision: 13127
Added:
pypy/dist/pypy/translator/pickle/loader.py (contents, props changed)
pypy/dist/pypy/translator/pickle/main.py (contents, props changed)
pypy/dist/pypy/translator/pickle/writer.py (contents, props changed)
Modified:
pypy/dist/pypy/objspace/flow/model.py
pypy/dist/pypy/translator/pickle/genpickle.py
Log:
changed a few things to make the generated source smaller.
It is now about 37 MB.
Added writers and loaders for plain text and zipfile.
todo:
- write main, deciding which file format to use
- integrate with translate_pypy (will use main.load and main.save)
- write tests (eek, how do I test such stuff, any ideas?)
Modified: pypy/dist/pypy/objspace/flow/model.py
==============================================================================
--- pypy/dist/pypy/objspace/flow/model.py (original)
+++ pypy/dist/pypy/objspace/flow/model.py Mon Jun 6 23:08:09 2005
@@ -293,7 +293,13 @@
return "%r = %s(%s)" % (self.result, self.opname, ", ".join(map(repr, self.args)))
def __reduce_ex__(self, *args):
- return SpaceOperation, (self.opname, self.args, self.result, self.offset)
+ # avoid lots of useless list entities
+ return _sop, (self.opname, self.result, self.offset) + tuple(self.args)
+ __reduce__ = __reduce_ex__
+
+# a small and efficient restorer
+def _sop(opname, result, offset, *args):
+ return SpaceOperation(opname, args, result, offset)
class Atom:
def __init__(self, name):
Modified: pypy/dist/pypy/translator/pickle/genpickle.py
==============================================================================
--- pypy/dist/pypy/translator/pickle/genpickle.py (original)
+++ pypy/dist/pypy/translator/pickle/genpickle.py Mon Jun 6 23:08:09 2005
@@ -3,6 +3,10 @@
The purpose is to create something that allows
to restart code generation after flowing and maybe
annotation.
+
+The generated source appeared to be way too large
+for the CPython compiler. Therefore, we cut the
+source into pieces and compile them seperately.
"""
from __future__ import generators, division, nested_scopes
import __future__
@@ -17,11 +21,12 @@
from pypy.objspace.flow.flowcontext import SpamBlock, EggBlock
from pypy.annotation.model import SomeInteger, SomeObject, SomeChar, SomeBool
from pypy.annotation.model import SomeList, SomeString, SomeTuple
+from pypy.annotation.unaryop import SomeInstance
from pypy.interpreter.baseobjspace import ObjSpace
from pypy.translator.pickle import slotted
from pickle import whichmodule, PicklingError
-from copy_reg import _reduce_ex, _reconstructor
+from copy_reg import _reconstructor
import pickle
@@ -33,7 +38,7 @@
class GenPickle:
- def __init__(self, translator, outfile = None):
+ def __init__(self, translator, writer = None):
self.translator = translator
self.initcode = [
'from __future__ import %s\n' % ', '.join(all_feature_names) +
@@ -56,8 +61,6 @@
self.namespace.make_reserved_names('None False True')
self.namespace.make_reserved_names('new types sys')
self.namespace.make_reserved_names(' '.join(all_feature_names))
- self.inline_consts = True # save lots of space
- self._nesting = 0 # for formatting nested tuples etc.
# we distinguish between the "user program" and other stuff.
# "user program" will never use save_global.
self.domains = (
@@ -68,7 +71,7 @@
'__main__',
)
self.shortnames = {
- SpaceOperation: 'S',
+ SpaceOperation: 'SOP',
Variable: 'V',
Constant: 'C',
Block: 'B',
@@ -83,9 +86,9 @@
SomeList: 'SL',
SomeString: 'SS',
SomeTuple: 'ST',
+ SomeInstance: 'SIN',
}
- self.outfile = outfile
- self._partition = 1234
+ self.writer = writer
def nameof(self, obj, debug=None, namehint=None):
key = Constant(obj)
@@ -125,21 +128,20 @@
self.picklenames[key] = name
return name
- def nameofargs(self, tup):
+ def nameofargs(self, tup, plain_tuple = False):
""" a string with the nameofs, concatenated """
- if len(tup) < 5:
- # see if there is nesting to be expected
- for each in tup:
- if type(each) is tuple:
- break
- else:
- return ', '.join([self.nameof(arg) for arg in tup])
- # we always wrap into multi-lines, this is simple and readable
- self._nesting += 1
- space = ' ' * self._nesting
- ret = '\n' + space + (',\n' + space).join(
- [self.nameof(arg) for arg in tup]) + ',\n' + space
- self._nesting -= 1
+ # see if we can build a compact representation
+ for each in tup:
+ if type(each) is tuple and len(each) > 2:
+ break
+ else:
+ ret = ', '.join([self.nameof(arg) for arg in tup])
+ if plain_tuple and len(tup) == 1:
+ ret += ','
+ if len(ret) <= 90:
+ return ret
+ ret = '\n ' + (',\n ').join(
+ [self.nameof(arg) for arg in tup]) + ',\n '
return ret
def uniquename(self, basename):
@@ -184,27 +186,18 @@
# the compiler folds the consts the same way as we do.
# note that true pickling is more exact, here.
nameof_long = nameof_float = nameof_bool = nameof_NoneType = nameof_int
-
- def nameof_str(self, value):
- if self.inline_consts:
- return repr(value)
- name = self.uniquename('gstr_' + value[:32])
- self.initcode_python(name, repr(value))
- return name
-
- def nameof_unicode(self, value):
- if self.inline_consts:
- return repr(value)
- name = self.uniquename('guni_' + str(value[:32]))
- self.initcode_python(name, repr(value))
- return name
+ nameof_str = nameof_unicode = nameof_int
def skipped_function(self, func):
- # debugging only! Generates a placeholder for missing functions
+ # Generates a placeholder for missing functions
# that raises an exception when called.
+ # The original code object is retained in an
+ # attribute '_skipped_code'
name = self.uniquename('gskippedfunc_' + func.__name__)
+ codename = self.nameof(func.func_code)
self.initcode.append('def %s(*a,**k):\n'
' raise NotImplementedError' % name)
+ self.initcode.append('%s._skipped_code = %s' % (name, codename) )
return name
def nameof_staticmethod(self, sm):
@@ -390,31 +383,23 @@
return name
def nameof_tuple(self, tup):
- # instead of defining myriads of tuples, it seems to
- # be cheaper to create them inline, although they don't
- # get constant folded like strings and numbers.
- if self.inline_consts:
- argstr = self.nameofargs(tup)
- if len(tup) == 1 and not argstr.rstrip().endswith(','):
- argstr += ','
- return '(%s)' % argstr
- name = self.uniquename('g%dtuple' % len(tup))
- args = [self.nameof(x) for x in tup]
- args = ', '.join(args)
- if args:
- args += ','
- self.initcode_python(name, '(%s)' % args)
+ chunk = 20
+ name = self.uniquename('T%d' % len(tup))
+ argstr = self.nameofargs(tup[:chunk], True)
+ self.initcode_python(name, '(%s)' % argstr)
+ for i in range(chunk, len(tup), chunk):
+ argstr = self.nameofargs(tup[i:i+chunk], True)
+ self.initcode.append('%s += (%s)' % (name, argstr) )
return name
def nameof_list(self, lis):
+ chunk = 20
name = self.uniquename('L%d' % len(lis))
- extend = self.nameof(_ex)
def initlist():
chunk = 20
for i in range(0, len(lis), chunk):
- items = lis[i:i+chunk]
- itemstr = self.nameofargs(items)
- yield '%s(%s, %s)' % (extend, name, itemstr)
+ argstr = self.nameofargs(lis[i:i+chunk])
+ yield '%s += [%s]' % (name, argstr)
self.initcode_python(name, '[]')
self.later(initlist())
return name
@@ -488,9 +473,9 @@
yield '%s.__setstate__(%s)' % (name, args)
return
elif type(restorestate) is tuple:
- setstate = self.nameof(slotted.__setstate__)
- args = self.nameof(restorestate)
- yield '%s(%s, %s)' % (setstate, name, args)
+ setstate = self.nameof(_set)
+ argstr = self.nameofargs(restorestate)
+ yield '%s(%s, %s)' % (setstate, name, argstr)
return
assert type(restorestate) is dict, (
"%s has no dict and no __setstate__" % name)
@@ -518,12 +503,14 @@
' please update %s' % (cls.__name__, __name__) )
restorestate = slotted.__getstate__(instance)
restorer = _rec
- restoreargs = klass, object, None
+ restoreargs = klass,
else:
restorer = reduced[0]
+ restoreargs = reduced[1]
if restorer is _reconstructor:
restorer = _rec
- restoreargs = reduced[1]
+ if restoreargs[1:] == (object, None):
+ restoreargs = restoreargs[:1]
if len(reduced) > 2:
restorestate = reduced[2]
else:
@@ -603,8 +590,6 @@
args = (func.func_code, func.func_globals, func.func_name,
func.func_defaults, func.func_closure)
pyfuncobj = self.uniquename('gfunc_' + func.__name__)
- # touch code,to avoid extra indentation
- self.nameof(func.func_code)
self.initcode.append('%s = new.function(%s)' % (pyfuncobj,
self.nameofargs(args)) )
if func.__dict__:
@@ -635,20 +620,6 @@
code.co_varnames, code.co_filename, code.co_name,
code.co_firstlineno, code.co_lnotab, code.co_freevars,
code.co_cellvars)
- if not self.inline_consts:
- # make the code, filename and lnotab strings nicer
- codestr = code.co_code
- codestrname = self.uniquename('gcodestr_' + code.co_name)
- self.picklenames[Constant(codestr)] = codestrname
- self.initcode.append('%s = %r' % (codestrname, codestr))
- fnstr = code.co_filename
- fnstrname = self.uniquename('gfname_' + code.co_name)
- self.picklenames[Constant(fnstr)] = fnstrname
- self.initcode.append('%s = %r' % (fnstrname, fnstr))
- lnostr = code.co_lnotab
- lnostrname = self.uniquename('glnotab_' + code.co_name)
- self.picklenames[Constant(lnostr)] = lnostrname
- self.initcode.append('%s = %r' % (lnostrname, lnostr))
argstr = self.nameofargs(args)
codeobj = self.uniquename('gcode_' + code.co_name)
self.initcode.append('%s = new.code(%s)' % (codeobj, argstr))
@@ -672,34 +643,20 @@
def later(self, gen):
self.latercode.append((gen, self.debugstack))
- def spill_source(self, final):
- def write_block(lines):
- if not lines:
- return
- txt = '\n'.join(lines)
- print >> self.outfile, txt
- print >> self.outfile, '## SECTION ##'
-
- if not self.outfile:
- return
- chunk = self._partition
- while len(self.initcode) >= chunk:
- write_block(self.initcode[:chunk])
- del self.initcode[:chunk]
- if final and self.initcode:
- write_block(self.initcode)
- del self.initcode[:]
-
def collect_initcode(self):
+ writer = self.writer
while self.latercode:
gen, self.debugstack = self.latercode.pop()
#self.initcode.extend(gen) -- eats TypeError! bad CPython!
for line in gen:
self.initcode.append(line)
self.debugstack = ()
- if len(self.initcode) >= self._partition:
- self.spill_source(False)
- self.spill_source(True)
+ if writer:
+ for line in self.initcode:
+ writer.write(line)
+ del self.initcode[:]
+ if writer:
+ writer.close()
def getfrozenbytecode(self):
self.initcode.append('')
@@ -733,8 +690,8 @@
# some shortcuts, to make the pickle smaller
-def _ex(lis, *args):
- lis.extend(args)
+def _rec(klass, base=object, state=None):
+ return _reconstructor(klass, base, state)
-def _rec(*args):
- return _reconstructor(*args)
+def _set(obj, *args):
+ slotted.__setstate__(obj, args)
Added: pypy/dist/pypy/translator/pickle/loader.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/pickle/loader.py Mon Jun 6 23:08:09 2005
@@ -0,0 +1,50 @@
+import zipfile, marshal, md5
+
+class Loader:
+ def __init__(self, fname):
+ self.f = self.open_file(fname)
+
+ def open_file(self, fname):
+ raise SyntaxError, "implement open_file"
+
+ def next_block(self):
+ raise SyntaxError, "implement next_block"
+
+ def load(self):
+ dic = {}
+ for blk in self.next_block():
+ exec blk in dic
+ try:
+ return dic['ginst_Translator']
+ finally:
+ self.close()
+
+ def close(self):
+ self.f.close()
+
+
+class TextLoader(Loader):
+
+ def open_file(self, fname):
+ return file(fname)
+
+ def next_block(self):
+ data = self.f.read().split('## SECTION ##\n')
+ while data:
+ yield data.pop(0)
+
+class ZipLoader(Loader):
+ """ load compiled code from a ZIP file """
+
+ def open_file(self, fname):
+ return zipfile.ZipFile(fname, "r")
+
+ def next_block(self):
+ root = self.f.read('root')
+ dump = self.f.read(root)
+ assert md5.new(dump).hexdigest() == root, "broken checksum"
+ blocknames = marshal.loads(dump)
+ for name in blocknames:
+ dump = self.f.read(name)
+ assert md5.new(dump).hexdigest() == name, "broken checksum"
+ yield marshal.loads(dump)
Added: pypy/dist/pypy/translator/pickle/main.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/pickle/main.py Mon Jun 6 23:08:09 2005
@@ -0,0 +1,5 @@
+def load(fname):
+ pass
+
+def save(translator, fname):
+ pass
Added: pypy/dist/pypy/translator/pickle/writer.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/pickle/writer.py Mon Jun 6 23:08:09 2005
@@ -0,0 +1,64 @@
+import zipfile, marshal, md5
+
+class Writer:
+ def __init__(self, fname):
+ self.pieces = []
+ self.chunksize = 100000
+ self.count = 0
+ self.blocknum = 0
+ self.f = self.open_file(fname)
+
+ def open_file(self, fname):
+ raise SyntaxError, "implement open_file"
+
+ def write(self, text):
+ self.pieces.append(text)
+ self.count += len(text) + 1
+ if self.count >= self.chunksize:
+ src = '\n'.join(self.pieces)
+ del self.pieces[:]
+ self.count -= self.chunksize
+ self.putblock(src)
+ self.blocknum += 1
+
+ def close(self):
+ src = '\n'.join(self.pieces)
+ self.putblock(src)
+ self.finalize()
+ self.f.close()
+
+ def finalize(self):
+ pass
+
+
+class TextWriter(Writer):
+
+ def open_file(self, fname):
+ return file(fname, 'w')
+
+ def putblock(self, src):
+ print >> self.f, src
+ print >> self.f, '## SECTION ##'
+
+class ZipWriter(Writer):
+ """ write compiled code to a ZIP file """
+
+ def __init__(self, fname):
+ Writer.__init__(self, fname)
+ self.blocknames = []
+
+ def open_file(self, fname):
+ return zipfile.ZipFile(fname, "w", zipfile.ZIP_DEFLATED)
+
+ def putblock(self, src):
+ cod = compile(src, 'block_%d' % self.blocknum, 'exec')
+ dump = marshal.dumps(cod)
+ digest = md5.new(dump).hexdigest()
+ self.blocknames.append(digest)
+ self.f.writestr(digest, dump)
+
+ def finalize(self):
+ dump = marshal.dumps(self.blocknames)
+ digest = md5.new(dump).hexdigest()
+ self.f.writestr(digest, dump)
+ self.f.writestr('root', digest)
More information about the Pypy-commit
mailing list