Multidimensional Dictionaries: Possible?

Mon Jan 31 11:44:33 EST 2000

G'day,

please check out the code below. Might be what you're looking for.

Cheers,
	Lutz

"""
$Id: GrowDict.py,v 1.7 1999/11/16 17:23:08 ehrlich Exp $

This class implements a growing dictionary that you can index
however you want. It effectively catches all KeyError occurrences.
Some features include saving/loading from a stream (file or url or whatever
looks like a file in Python) and dumping the thing as XML. You can add two dictionaries,
thereby merging their respective trees.
A TimeStampGrowDict can keep a list of modification timestamps and can be sorted according 
to the latest timestamp date.

These classes can come in handy when you need a hierarchical container for your
data but you don't know the layout beforehand. You could even emulate a file system ;-)

Usage:

import GrowDict
reload(GrowDict)
gd = GrowDict.GrowDict()
gd[0]['ppp'][3455]=  [1,2,3]
gd['comment'] = 'test this'

gd.pretty()
out = open('gd.pretty','w')
gd.pretty(out)
out.close()

GrowDict.gd_save(open('gd1.pickle','w'),gd)
gd2= GrowDict.gd_load(open('gd1.pickle'))

gd3 = GrowDict.GrowDict()
gd3[100]['ff'] = 'test data'

# merging of dicts
gd4 = gd1 + gd3

# dumping as XML
gd4.toxml()
f = open('gd.xml','w')
gd.toxml(f)
f.close()

# dicts with timestamps 
t1 = GrowDict.TimeStampGrowDict('first timestamp text')
t1[0]['gaggag'] = [1,2,43]
t1.stamp('added list data')

t2 = GrowDict.TimeStampGrowDict('comparison data')
t2['data'] = 'whatsoever'

if t1 > t2: print 't1 has been modified later than t2'

"""
import sys
import copy
import DictWithDefault

class GrowDict(DictWithDefault.DictWithDefault):
    def __init__(self):
	self.data = {}
	self.defaultClass = GrowDict

    def __getitem__(self,key):
	try:
	    item = self.data[key]
	except KeyError:
	    item = copy.copy(self.defaultClass())
	    self.data[key] = item
	return item

    def clean(self):
	"""deleting empty leaves from the growDict"""
	for (kk,vv) in self.items():
	    if type(vv) == type(self):
		if len(vv.items()) == 0:
		    del self[kk]
		else:
		    vv.clean()
    def depth(self):
	"""
	return how many layers I contain, only makes sense for balanced treelike structures.
	"""
	res = 0
	for kk,vv in self.items():
	    if type(vv) == type(self):
		res = vv.depth() + 1
	    else:
		res = 1
	return res

    def pretty(self,f=sys.stdout,indent=0):

	for kk,vv in self.items():
	    f.write('\t' * indent  +  _encode(kk)+'\n')
	    if isinstance(vv,GrowDict):
		vv.pretty(f,indent + 1)
	    else:
		f.write( '\t' *(indent+1)+ _encode(vv)+'\n')

    def toxml(self,f=sys.stdout):
	self.doc = convert(self)
	dumpAsXML(self.doc,f)

    def __deepcopy__(self,memo):
	from copy import deepcopy
	res = self.__class__()
	for kk, vv in self.items():
	    res[deepcopy(kk,memo)] = deepcopy(vv,memo)

	return res

    def __add__(self,other):
	"""
	addition happens on all levels. The addition semantics are:

	0) When a key of 'other' is not found in 'self', the entire 
	subtree is inserted. No problem here.

	1) A a key of 'other' is found in 'self' as well: Three choices 
	here:

	  a) Both values are not GrowDicts: Addition is done by catenation
	  a[0] = 'rrr'  b[0] = 5 then (a+b)[0] = ['rrr',5]

	  b) Both values are GrowDicts: Addition is done recursively for the 
	  values

	  c) One value is a GrowDict (or subclassed), the other is not:
	  This raises an error until I can think of a decent solution.

	"""
	from copy import deepcopy
	res = deepcopy(self)
	for kk,vv in other.items():
	    newkk = deepcopy(kk)
	    if not res.has_key(newkk): # insert the entire subtree
		res[newkk] = deepcopy(vv)
	    else:
		resGD = isinstance(res[newkk],GrowDict)
		othGD = isinstance(vv,GrowDict)
		if not resGD and not othGD: # non-GrowDicts, catenate them
		    res[newkk] = [res[newkk],vv]
		elif resGD and not othGD:
		    raise IOError,'merging subtree GDs of different depth'
		elif not resGD and othGD:
		    raise IOError,'merging subtree GDs of different depth'
		elif resGD and othGD: # both are GDs, add them
		    res[newkk] = res[newkk] + deepcopy(vv)

	return res

def _encode(it):
    return str(it)

def gd_save(stream, item):
    import cPickle
    if type(stream)==type(''): stream = open(stream,'w') # accepting fileNames as well
    p = cPickle.Pickler(stream)
    p.dump(item)
    stream.close()

def gd_load(stream):
    import cPickle
    if type(stream)==type(''): stream = open(stream) # accepting fileNames as well
    p = cPickle.Unpickler(stream)
    return p.load()

"""
XML conversion
"""
try:
    import DOM
    import DOM.Ext
    import DOM.Ext.Factory
    import DOM.Ext.Factory

    PP = DOM.Ext.PrettyPrint
    localCreateDocument = DOM.Ext.Factory.CreateDocument

except ImportError:

    try:
        from xml.dom.core import createDocument
    except ImportError:
        raise ImportError,"""Couldn't find any XML support for your Python. Please"""\
        """ install 4DOM from http://www.fourthought.com/opentech/projects/4DOM/"""

    def dummyPP(a,b=sys.stdout):
        print """PrettyPrinting not supported right now. Please install 4DOM from """\
              """http://www.fourthought.com/opentech/projects/4DOM/"""
        b.write(a.toxml())
    PP = dummyPP
    localCreateDocument  = createDocument

def convert(growDict):
    """
    given a BMR file object, convet it to a DOM
    """
    import sys

    doc = localCreateDocument()
    GD = doc.createElement('GD')
    doc.appendChild(GD)
    _traverse(GD,growDict,doc)
    return doc

def dumpAsXML(aDOM,f=sys.stdout):
    """
    given a document object model, do some fancy prettyprinting into XML
    """
    PP(aDOM,f)

def _traverse(node,growDict,factory):
    """
    traverse the starNode hierarchy and insert the appropriate DOM elements

    """
    isDict = 0
    try:
	if type(growDict) == type({}):
	    isDict = 1

	if isinstance(growDict,GrowDict):
	    isDict = 1
    except:
	pass

    if  isDict:
	for key, value in growDict.items():
	    newNode  = factory.createElement("entry")
	    newNode.setAttribute("key", _encode(key))

	    _traverse(newNode,value,factory)
	    node.appendChild(newNode)

    else:
	newNode =  factory.createTextNode(_encode(growDict))
	node.appendChild(newNode)

import DateTime
class TimeStamp:
    """
    simple class to implement timestamps consisting of a date and a 
    comment
    """
    def __init__(self,comment=''):
	self.date = DateTime.now()
	self.comment = comment

    def __str__(self):
	return '<TimeStamp: ' + str(self.date) + ' ' + self.comment + '>'

    __repr__ = __str__

    def __cmp__(self,other):
	return cmp(self.date,other.date)

    def toFn(self):
	from string import zfill
	d  = self.date
	year  = str(d.year)
	month = zfill(str(d.month),2)
	day   = zfill(str(d.day),2)
	hour  = zfill(str(d.hour),2)
	minute= zfill(str(d.minute),2)

	return '%s_%s_%s_%s:%s' %(year,month,day,hour,minute)

class TimeStampGrowDict(GrowDict):
    """
    simple extension to the GrowDict to support timestamps. 
    An instance is earlier/less than another if its last timestamp 
    is ealier/less.
    """

    def __init__(self,comment=''):
	self.timeStampL = [TimeStamp(comment)]
	GrowDict.__init__(self)

    def stamp(self,comment):
	self.timeStampL.insert(0,TimeStamp(comment))

    def __cmp__(self,other):
	return cmp(self.timeStampL[0],other.timeStampL[0])

    def pretty(self,f=sys.stdout,indent=0):
	if indent==0:
	    for ts in self.timeStampL:
		f.write(str(ts) + '\n')
	GrowDict.pretty(self,f,indent)

-- 
______________________________________________________________________

Lutz Ehrlich		 	http://www.embl-heidelberg.de/~ehrlich
				mailto:lutz.ehrlich at embl-heidelberg.de

European Molecular Biology Laboratory		phone: +49-6221-387-140
Meyerhofstr. 1					fax  : +49-6221-387-517
D-69012 Heidelberg, Germany