Multidimensional Dictionaries: Possible?
Lutz Ehrlich
ehrlich at EMBL-Heidelberg.DE
Mon Jan 31 11:44:33 EST 2000
G'day,
please check out the code below. Might be what you're looking for.
Cheers,
Lutz
"""
$Id: GrowDict.py,v 1.7 1999/11/16 17:23:08 ehrlich Exp $
This class implements a growing dictionary that you can index
however you want. It effectively catches all KeyError occurrences.
Some features include saving/loading from a stream (file or url or whatever
looks like a file in Python) and dumping the thing as XML. You can add two dictionaries,
thereby merging their respective trees.
A TimeStampGrowDict can keep a list of modification timestamps and can be sorted according
to the latest timestamp date.
These classes can come in handy when you need a hierarchical container for your
data but you don't know the layout beforehand. You could even emulate a file system ;-)
Usage:
import GrowDict
reload(GrowDict)
gd = GrowDict.GrowDict()
gd[0]['ppp'][3455]= [1,2,3]
gd['comment'] = 'test this'
gd.pretty()
out = open('gd.pretty','w')
gd.pretty(out)
out.close()
GrowDict.gd_save(open('gd1.pickle','w'),gd)
gd2= GrowDict.gd_load(open('gd1.pickle'))
gd3 = GrowDict.GrowDict()
gd3[100]['ff'] = 'test data'
# merging of dicts
gd4 = gd1 + gd3
# dumping as XML
gd4.toxml()
f = open('gd.xml','w')
gd.toxml(f)
f.close()
# dicts with timestamps
t1 = GrowDict.TimeStampGrowDict('first timestamp text')
t1[0]['gaggag'] = [1,2,43]
t1.stamp('added list data')
t2 = GrowDict.TimeStampGrowDict('comparison data')
t2['data'] = 'whatsoever'
if t1 > t2: print 't1 has been modified later than t2'
"""
import sys
import copy
import DictWithDefault
class GrowDict(DictWithDefault.DictWithDefault):
def __init__(self):
self.data = {}
self.defaultClass = GrowDict
def __getitem__(self,key):
try:
item = self.data[key]
except KeyError:
item = copy.copy(self.defaultClass())
self.data[key] = item
return item
def clean(self):
"""deleting empty leaves from the growDict"""
for (kk,vv) in self.items():
if type(vv) == type(self):
if len(vv.items()) == 0:
del self[kk]
else:
vv.clean()
def depth(self):
"""
return how many layers I contain, only makes sense for balanced treelike structures.
"""
res = 0
for kk,vv in self.items():
if type(vv) == type(self):
res = vv.depth() + 1
else:
res = 1
return res
def pretty(self,f=sys.stdout,indent=0):
for kk,vv in self.items():
f.write('\t' * indent + _encode(kk)+'\n')
if isinstance(vv,GrowDict):
vv.pretty(f,indent + 1)
else:
f.write( '\t' *(indent+1)+ _encode(vv)+'\n')
def toxml(self,f=sys.stdout):
self.doc = convert(self)
dumpAsXML(self.doc,f)
def __deepcopy__(self,memo):
from copy import deepcopy
res = self.__class__()
for kk, vv in self.items():
res[deepcopy(kk,memo)] = deepcopy(vv,memo)
return res
def __add__(self,other):
"""
addition happens on all levels. The addition semantics are:
0) When a key of 'other' is not found in 'self', the entire
subtree is inserted. No problem here.
1) A a key of 'other' is found in 'self' as well: Three choices
here:
a) Both values are not GrowDicts: Addition is done by catenation
a[0] = 'rrr' b[0] = 5 then (a+b)[0] = ['rrr',5]
b) Both values are GrowDicts: Addition is done recursively for the
values
c) One value is a GrowDict (or subclassed), the other is not:
This raises an error until I can think of a decent solution.
"""
from copy import deepcopy
res = deepcopy(self)
for kk,vv in other.items():
newkk = deepcopy(kk)
if not res.has_key(newkk): # insert the entire subtree
res[newkk] = deepcopy(vv)
else:
resGD = isinstance(res[newkk],GrowDict)
othGD = isinstance(vv,GrowDict)
if not resGD and not othGD: # non-GrowDicts, catenate them
res[newkk] = [res[newkk],vv]
elif resGD and not othGD:
raise IOError,'merging subtree GDs of different depth'
elif not resGD and othGD:
raise IOError,'merging subtree GDs of different depth'
elif resGD and othGD: # both are GDs, add them
res[newkk] = res[newkk] + deepcopy(vv)
return res
def _encode(it):
return str(it)
def gd_save(stream, item):
import cPickle
if type(stream)==type(''): stream = open(stream,'w') # accepting fileNames as well
p = cPickle.Pickler(stream)
p.dump(item)
stream.close()
def gd_load(stream):
import cPickle
if type(stream)==type(''): stream = open(stream) # accepting fileNames as well
p = cPickle.Unpickler(stream)
return p.load()
"""
XML conversion
"""
try:
import DOM
import DOM.Ext
import DOM.Ext.Factory
import DOM.Ext.Factory
PP = DOM.Ext.PrettyPrint
localCreateDocument = DOM.Ext.Factory.CreateDocument
except ImportError:
try:
from xml.dom.core import createDocument
except ImportError:
raise ImportError,"""Couldn't find any XML support for your Python. Please"""\
""" install 4DOM from http://www.fourthought.com/opentech/projects/4DOM/"""
def dummyPP(a,b=sys.stdout):
print """PrettyPrinting not supported right now. Please install 4DOM from """\
"""http://www.fourthought.com/opentech/projects/4DOM/"""
b.write(a.toxml())
PP = dummyPP
localCreateDocument = createDocument
def convert(growDict):
"""
given a BMR file object, convet it to a DOM
"""
import sys
doc = localCreateDocument()
GD = doc.createElement('GD')
doc.appendChild(GD)
_traverse(GD,growDict,doc)
return doc
def dumpAsXML(aDOM,f=sys.stdout):
"""
given a document object model, do some fancy prettyprinting into XML
"""
PP(aDOM,f)
def _traverse(node,growDict,factory):
"""
traverse the starNode hierarchy and insert the appropriate DOM elements
"""
isDict = 0
try:
if type(growDict) == type({}):
isDict = 1
if isinstance(growDict,GrowDict):
isDict = 1
except:
pass
if isDict:
for key, value in growDict.items():
newNode = factory.createElement("entry")
newNode.setAttribute("key", _encode(key))
_traverse(newNode,value,factory)
node.appendChild(newNode)
else:
newNode = factory.createTextNode(_encode(growDict))
node.appendChild(newNode)
import DateTime
class TimeStamp:
"""
simple class to implement timestamps consisting of a date and a
comment
"""
def __init__(self,comment=''):
self.date = DateTime.now()
self.comment = comment
def __str__(self):
return '<TimeStamp: ' + str(self.date) + ' ' + self.comment + '>'
__repr__ = __str__
def __cmp__(self,other):
return cmp(self.date,other.date)
def toFn(self):
from string import zfill
d = self.date
year = str(d.year)
month = zfill(str(d.month),2)
day = zfill(str(d.day),2)
hour = zfill(str(d.hour),2)
minute= zfill(str(d.minute),2)
return '%s_%s_%s_%s:%s' %(year,month,day,hour,minute)
class TimeStampGrowDict(GrowDict):
"""
simple extension to the GrowDict to support timestamps.
An instance is earlier/less than another if its last timestamp
is ealier/less.
"""
def __init__(self,comment=''):
self.timeStampL = [TimeStamp(comment)]
GrowDict.__init__(self)
def stamp(self,comment):
self.timeStampL.insert(0,TimeStamp(comment))
def __cmp__(self,other):
return cmp(self.timeStampL[0],other.timeStampL[0])
def pretty(self,f=sys.stdout,indent=0):
if indent==0:
for ts in self.timeStampL:
f.write(str(ts) + '\n')
GrowDict.pretty(self,f,indent)
--
______________________________________________________________________
Lutz Ehrlich http://www.embl-heidelberg.de/~ehrlich
mailto:lutz.ehrlich at embl-heidelberg.de
European Molecular Biology Laboratory phone: +49-6221-387-140
Meyerhofstr. 1 fax : +49-6221-387-517
D-69012 Heidelberg, Germany
More information about the Python-list
mailing list