[Python-3000] Adaptation vs. Generic Functions
Tim Hochberg
tim.hochberg at cox.net
Sun Apr 9 23:22:28 CEST 2006
"""This is my attempt to rewrite pprint.py to be based on protocols. Or
overloading or specialization or whatever they end up being called ;-). My
first attempt only touched about 20 lines of the original pprint and
probably
had 90% of the usability gain that this one does, but for some reason I
decided
that wasn't enough work, and I went back and tried to make it completely
extensible.
This took far too much time, as touching one part of the code led to another
part of the code until I had ripped the whole thing apart. Then I had to
figure
out how to put it back together sensibly. My first,
simplest-thing-that-could-
possibly-work, version was slow. About 20x slower than the original
pprint.py
for pformat, based on pprint._perfcheck. This led to another round of
reorganization and optimization. The current version is slightly faster than
the original for percheck, but slightly slower for saferepr.
Let me add this cautionary note -- this primarily intended as for
illustrative purposes. There's a good chance it has bugs. It does,
however, pass
test_pprint except for test_subclassing, which it can't really be
expected to
pass.
Rather than having you scroll all through this long message/file, I'll
instead
place all the examples up here at the top. Assuming that you save this as
pprint2.py, you should be able to run the examples just by executing the
file.
"""
if __name__ == "__main__":
import pprint2
# Here's that same old object that I stole Fred Lundh's librarybook.
data = (
"this is a string", [1, 2, 3, 4], ("more tuples",
1.0, 2.3, 4.5), "this is yet another string"
)
# [1] You can use pprint just as before.
pprint2.pprint(data)
print
# ('this is a string',
# [1, 2, 3, 4],
# ('more tuples', 1.0, 2.3, 4.5),
# 'this is yet another string')
# [2] However, the core object is actually pprinter, which by
default returns
# a string instead of printing to stdout.
print pprint2.pprinter(data)
print
# Same as for [1]
# [3] Let's extend pprinter so that integers are printed in hex. We use
# register_simple because we don't care about all of the details
regarding
# indentation and recursion. register_simple wants a function that takes
# a single object and returns a string.
mypprinter = pprint2.PrettyPrinter(pprint2.pprinter)
print mypprinter(data)
print
# Still the same as for [1]
@mypprinter.register_simple(int)
@mypprinter.register_simple(long)
def format_int_as_hex(obj):
return hex(obj)
print mypprinter(data)
print
# ('this is a string',
# [0x1, 0x2, 0x3, 0x4],
# ('more tuples', 1.0, 2.3, 4.5),
# 'this is yet another string')
# Pretty cool! Note that pprinter itself is unchanged.
print pprint2.pprinter(data)
print
# Still the same as for [1].
# Ok, just to finish things off, let's override something using the full
# interface. This interface is not fully documented or even fully
# understood (I copied it from pprint.py, but I'm sure it warped in
# transition.
#
# This example is pretty useless, but at least you get to see full
# overriding. Note that this function must return an iterable.
# For real examples, see the registrations at the bottom of the program.
# (Although be warned, they can be messy!)
@mypprinter.register(list)
def format_list(obj, context, indent, allowance, level):
yield ("<list @%s [indent=%s, allowance=%s, level=%s]>" %
(id(obj), indent, allowance, level))
print mypprinter(data)
print
# ('this is a string',
# <list @12020992 [indent=1, allowance=1, level=3]>,
# ('more tuples', 1.0, 2.3, 4.5),
# 'this is yet another string')
# I'm not sure why level is 3 instead of 2, that my be a buglet. I'm not
# going to track it down right now.
# OK, that's it for now. Back to my normally scheduled life!
"""
I include Registry and Protocol here since the Protocol implemention keeps
changing. Tracking down a compatible implementation would probably be hard.
I use Registries instead of dicts for my Protocol class. So far, I like this
approach. It makes runtime extension of Protocols simple while exiling the
increased complexity to the Registry class.
While I like the semantics, the implementation is questionable. As currently
implemented lookups are potentially slow while setting and deleting is fast.
This is exactly backwards! This could be reimplemented with the same
semantics
using some sort of view observer magic as suggested by Nick Coghlan.
"""
_missing = object()
_unknown = object()
class Registry(dict):
"""A dict like object that forwards item requests to its parent if
neeeded."""
def __init__(self, *parents):
dict.__init__(self)
self.parents = list(parents)
def __contains__(self, key):
return self.get(key, _missing) is not _missing
def __getitem__(self, key):
x = self._get(key, set())
if x is _missing:
raise KeyError('not found')
return x
def _get(self, key, visited):
# Get a value from self or parents. Return _missing on failure
# visited contains the ids of objects searched so far.
myid = _id(self)
if myid in visited:
return _missing
visited.add(myid)
x = dict.get(self, key, _missing)
if x is not _missing:
return x
for p in self.parents:
if isinstance(p, Registry):
x = p._get(key, visited)
else:
x = p.get(key, _missing)
if x is not _missing:
return x
return _missing
def get(self, key, default=None):
x = self._get(key, set())
if x is _missing:
return default
return x
"""
Yet another Protocol implementation. This one is pretty similar to most
of the
other recent ones except that it uses Registries.
"""
class Protocol(object):
"""Declare a protocol object that subclasses parents if provided"""
def __init__(self, *parents):
self.registry = Registry(*(x.registry for x in parents))
def register(self, *args):
"""Function decorator to register as an adapter for given keys"""
if len(args) == 1:
args = args[0]
def helper(adapter):
if adapter is None:
adapter = null_adapter
self.registry[args] = adapter
return adapter
return helper
def signatures(self, arg):
"""Find signatures for given call arguments"""
# Default behaviour dispatches on the type of the first argument
return _type(arg).__mro__
def default_adapter(self, *args):
"""Call result when no adapter was found"""
raise TypeError("Can't adapt <%s> to %s" %
(_commajoin(x.__class__.__name__ for x in args),
self.__class__.__name__))
def find_adapter(self, *args):
"""Find an adapter for args"""
for key in self.signatures(*args):
adapter = self.registry.get(key, _missing)
if adapter is not _missing:
return adapter
return self.default_adapter
def __call__(self, *args):
"""Adapt supplied arguments to this protocol"""
return self.find_adapter(*args)(*args)
"""
Here we have the actual pprint2 stuff.
The first order of business is just defining some marker and helper
classes.
Nothing exciting.
"""
import sys
class Marker(object):
def __init__(self, value):
self.value = value
# These marker classes are used to tell PrettyPrinter to use the formatter
# for a long object (long in the sense that len(pformat(obj)) is long).
class LongList(Marker): pass
class LongTuple(Marker): pass
class LongSequence(Marker): pass
class LongDict(Marker): pass
long_types = {list : LongList, tuple : LongTuple, dict : LongDict}
# More marker classes. Recursive is used, suprise, for recursive objects and
# Generic is used to format objects that are not otherwise formatable.
class Recursive(Marker): pass
# Generic is a little different from the other marker classes as it doesn't
# wrap it's arguments.
class Generic(object): pass
# This is used to pass context information down to the registered
formatters.
class Context(object):
__slots__ = ['locked', 'width', 'maxlevels', 'indent_per_level',
'readable', 'recursive', 'adapter_cache',
'long_adapter_cache', 'formatter']
def __init__(self, width, maxlevels, indent_per_level, formatter):
self.locked = set()
self.width = width
self.maxlevels = maxlevels
self.indent_per_level = indent_per_level
self.readable = True
self.recursive = False
self.formatter = formatter
self.adapter_cache = {}
self.long_adapter_cache = {}
# Some microoptimizations stolen from the original
_id = id
_len = len
_type = type
_commajoin = ', '.join
_join = ''.join
"""
Now we have reached the core of the implementation.
"""
class PrettyPrinter(Protocol):
"""PrettyPrinter is a Protocol that accepts objects and either
returns a
pretty representation of the object or writes the representation
to a
stream if that is supplied.
"""
# Override signatures so that only superclasses with the same repr are
# return. This centralizes a bunch of logic in pprint.py, in one place
# simplifies things a lot farther down. Also, always yield Generic, so
# that the default behavious can be overridden.
def signatures(self, *args):
repr = _type(args[0]).__repr__
for x in _type(args[0]).__mro__:
if x.__repr__ is not repr:
break
yield x
yield Generic
# Here to ease changing formatting of simple types, like ints for
example.
def register_simple(self, arg):
"""Register a simple converter that takes a obj and returns a
string"""
def helper(adapter):
if adapter is None:
adapter = null_adapter
def func(obj, context, indent, allowance, level):
yield adapter(obj)
self.registry[arg] = func
return adapter
return helper
# pprint makes a distinction between objects with long representations,
# which it splits between lines and others. This finds the correct
formatter
# for a given type.
def _find_long_adapter(self, objtype, obj, context):
try:
return context.long_adapter_cache[objtype]
except KeyError:
for sig in self.signatures(obj, context):
longtype = long_types.get(sig, None)
if longtype:
try:
adapter = self.registry[longtype]
break
except KeyError:
pass
else:
adapter = None
context.long_adapter_cache[objtype] = adapter
return adapter
# This is the core of the algorithm. It's really quite simple if you
look
# past two complicating factors. First, there's a bunch of caching going
# on, that's essential for performance reasons. Second, everything is
# implemented in terms of generators. This allows the adapters to not
# worry about the whole stream issue.
def _format(self, obj, context, indent=0, allowance=0, level=0):
max_length = context.width - indent - allowance - 1
objtype = type(obj)
try:
adapter = context.adapter_cache[objtype]
except:
adapter = context.adapter_cache[objtype] =
self.find_adapter(obj)
chunkmaker = adapter(obj, context, indent, allowance, level+1)
rep = ''
for chunk in chunkmaker:
rep += chunk
if (len(rep) > max_length):
# If max_length gets too long, we try to use a long adapter
# instead. If that works, we clear rep and break out. Other-
# wise, we keep rep and still break out, but still with the
# old chunkmaker.
longadapter = self._find_long_adapter(objtype, obj, context)
if longadapter:
rep = ''
context.locked.discard(_id(obj))
chunkmaker = longadapter(Marker(obj),
context, indent, allowance,
level+1)
break
# Yield rep and any remaining chunks.
yield rep
for chunk in chunkmaker:
yield chunk
# This just does some error checking, sets up the stream, and hands
things
# off to _format.
def __call__(self, obj, stream=None, indent=1, width=80, depth=None):
indent = int(indent)
width = int(width)
if indent < 0:
raise ValueError("indent must be >= 0")
if not (depth is None or depth > 0):
raise ValueError("depth must be > 0")
if width <= 0:
raise ValueError("width must be > 0")
context = Context(width, depth, indent, self._format)
if stream is None:
return _join(self._format(obj, context))
else:
for chunk in self._format(obj, context):
stream.write(chunk)
# recreate the pprint interface.
def isrecursive(self, obj):
context = Context(80, None, 1, self._format)
for chunk in self._format(obj, context):
pass
return context.recursive
def isreadable(self, obj):
context = Context(80, None, 1, self._format)
for chunk in self._format(obj, context):
pass
return context.readable and not context.recursive
"""
Now that we have the Protocol object, we define two instances. One
(saferepr)
that doesn't do anything special with long lines, and one (pprinter) that
does. pprinter extends saferepr, so any changes to saferepr will be
picked up
automagically by pprinter.
With those two in place, it's a simple matter to recreate the pprint.py
module
interface. Then we're done except for actually creating and registering
all of
the functions.
"""
saferepr = PrettyPrinter()
pprinter = PrettyPrinter(saferepr)
# Recreate the module interface.
pformat = pprinter
isreadable = saferepr.isreadable
isrecursive = saferepr.isrecursive
def pprint(obj, indent=1, width=80, depth=None, stream=None):
if stream is None:
stream = sys.stdout
pprinter(obj, stream, indent, width, depth)
stream.write('\n')
"""
Way down here at the bottom is where we define and register all the
behaviour
of everything. Don't look too closely at these implementations -- I
ripped them
from pprint.py and modified them to fit here without thinking about them too
much. They may have suffered in the transition.
"""
@saferepr.register(Generic)
def _format_generic(obj, context, indent, allowance, level):
rep = repr(obj)
if rep.startswith('<'):
context.readable = False
yield rep
@saferepr.register(Recursive)
def _format_recursive(markerobj, context, indent, allowance, level):
obj = markerobj.value
context.recursive = True
yield ("<Recursion on %s with id=%s>"
% (_type(obj).__name__, _id(obj)))
@saferepr.register(float)
def _format_float(obj, context, indent, allowance, level):
yield str(obj)
@saferepr.register(str)
def _format_str(obj, context, indent, allowance, level):
if 'locale' not in sys.modules:
yield repr(obj)
return
if "'" in obj and '"' not in obj:
closure = '"'
quotes = {'"': '\\"'}
else:
closure = "'"
quotes = {"'": "\\'"}
qget = quotes.get
chars = []
write = chars.append
for char in obj:
if char.isalpha():
write(char)
else:
write(qget(char, repr(char)[1:-1]))
yield ("%s%s%s" % (closure, _join(chars), closure))
@saferepr.register(dict)
def _format_dict(obj, context, indent, allowance, level):
objid = _id(obj)
if objid in context.locked:
for chunk in context.formatter(Recursive(obj), context,
indent, allowance, level):
yield chunk
else:
context.locked.add(objid)
yield '{'
if context.maxlevels and level > context.maxlevels:
yield "..."
else:
if obj:
format = context.formatter
items = obj.iteritems()
# This should really be sorted, but tests don't expect it.
k, v = items.next()
for chunk in format(k, context, indent, allowance, level):
yield chunk
yield ': '
for chunk in format(v, context, indent, allowance, level):
yield chunk
for k, v in items:
yield ', '
for chunk in format(k, context):
yield chunk
yield ': '
for chunk in format(v, context):
yield chunk
yield '}'
context.locked.remove(objid)
@saferepr.register(list)
@saferepr.register(tuple)
def _format_sequence(obj, context, indent, allowance, level):
typ = _type(obj)
if issubclass(typ, list):
if not obj:
yield "[]"
return
format = "[%s]"
elif _len(obj) == 1:
format = "(%s,)"
else:
if not obj:
yield "()"
return
format = "(%s)"
objid = _id(obj)
if context.maxlevels and level > context.maxlevels:
yield format % "..."
return
if objid in context.locked:
for chunk in context.formatter(Recursive(obj), context,
indent, allowance, level):
yield chunk
return
context.locked.add(objid)
components = []
append = components.append
formatter = context.formatter
for o in obj:
for orepr in formatter(o, context, indent, allowance, level):
append(orepr)
yield format % _commajoin(components)
context.locked.remove(objid)
@pprinter.register(LongList)
def _format_longlist(longobj, context, indent, allowance, level):
yield '['
for chunk in context.formatter(LongSequence(longobj.value), context,
indent,
allowance, level):
yield chunk
yield ']'
@pprinter.register(LongTuple)
def _format_longtuple(longobj, context, indent, allowance, level):
obj = longobj.value
yield '('
for chunk in context.formatter(LongSequence(longobj.value), context,
indent,
allowance, level):
yield chunk
if _len(obj) == 1:
yield ','
yield ')'
@pprinter.register(LongSequence)
def _format_longsequence(longobj, context, indent, allowance, level):
obj = longobj.value
if context.indent_per_level > 1:
yield (context.indent_per_level - 1) * ' '
length = _len(obj)
if length:
objid = _id(obj)
if objid in context.locked:
for chunk in context.formatter(Recursive(obj), context,
indent, allowance, level):
yield chunk
return
context.locked.add(objid)
format = context.formatter
indent += context.indent_per_level
objiter = iter(obj)
for chunk in format(objiter.next(), context, indent,
allowance+1, level):
yield chunk
if length > 1:
indentation = ',\n' + ' '*indent
for ent in objiter:
yield indentation
for chunk in format(ent, context, indent, allowance+1,
level):
yield chunk
context.locked.remove(objid)
@pprinter.register(LongDict) # could break out into keys and use
sequence somehow.
def _format_longdict(longobj, context, indent, allowance, level):
obj = longobj.value
yield '{'
if context.indent_per_level > 1:
yield (context.indent_per_level - 1) * ' '
length = _len(obj)
if length:
format = context.formatter
objid = _id(obj)
if objid in context.locked:
for chunk in context.formatter(Recursive(obj), context,
indent, allowance, level):
yield chunk
return
context.locked.add(objid)
indent += context.indent_per_level
items = obj.items()
items.sort()
itemsiter = iter(items)
key, ent = itemsiter.next()
valindent = indent
valallow = allowance
for chunk in format(key, context, indent, allowance, level):
valindent += _len(chunk)
yield chunk
yield ': '
valindent += 2
valallow += 1
for chunk in format(ent, context):
yield chunk
if length > 1:
indentation = ',\n'+' '*indent
for key, ent in itemsiter:
yield indentation
for chunk in format(key, context, indent, allowance, level):
yield chunk
yield ': '
for chunk in format(ent, context, valindent, valallow,
level):
yield chunk
context.locked.remove(objid)
yield '}'
def _perfcheck(object=None):
import time
if object is None:
object = [("string", (1, 2), [3, 4], {5: 6, 7: 8})] * 10000
p = PrettyPrinter()
t1 = time.time()
saferepr(object)
t2 = time.time()
pformat(object)
t3 = time.time()
print "safe_repr:", t2 - t1
print "pformat:", t3 - t2
#~ _perfcheck()
More information about the Python-3000
mailing list