[XML-SIG] DOM toxml() method

Christian Tismer tismer@appliedbiometrics.com
Sun, 23 May 1999 18:53:19 +0200


This is a multi-part message in MIME format.
--------------71AC2DF7ECB30B2725A67297
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit



Jeffrey Chang wrote:
> 
> How are people generating nicely-formatted XML from DOM trees?
> 
> Has anyone written a function that will take a DOM tree and will insert
> whitespace, where necessary, to generate a pretty XML document?  It would
> be almost the reverse of utils.strip_whitespace.

Well, I did a little on this a while ago.
But don't ask me about it's current state,
had a lot of other projects meanwhile...

ciao - chris

-- 
Christian Tismer             :^)   <mailto:tismer@appliedbiometrics.com>
Applied Biometrics GmbH      :     Have a break! Take a ride on Python's
Kaiserin-Augusta-Allee 101   :    *Starship* http://starship.python.net
10553 Berlin                 :     PGP key -> http://wwwkeys.pgp.net
PGP Fingerprint       E182 71C7 1A9D 66E9 9D15  D3CC D4D7 93E2 1FAE F6DF
     we're tired of banana software - shipped green, ripens at home
--------------71AC2DF7ECB30B2725A67297
Content-Type: text/plain; charset=us-ascii;
 name="indenter.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="indenter.py"


# pretty printer for SAX
# CT990122
# based upon the saxutils.Canonizer code

# V.0.2 support for sgmlop which doesn't give ignorableWhitespace info

from xml.sax import saxexts, saxlib, saxutils

import string, sys

class Indenter(saxlib.HandlerBase):
    "A SAX document handler that produces indented XML output."

    def __init__(self,writer=sys.stdout, indent=2):
        self.elem_level=0
        self.writer=writer
        self.indent=indent
        self.last_level=-1
        self.buffer = ""   # lazy buffer for whitespace stripping
    
    def processingInstruction (self,target, remainder):
        #if not target=="xml":
            self.writer.write("<?"+target+" "+remainder+"?>\n")

    def startElement(self,name,amap):
        if self.buffer:
            self.write_buffer()
        self.writer.write("\n"+self.indent*self.elem_level*" "+"<"+name)
        
        a_names=amap.keys()
        a_names.sort()

        for a_name in a_names:
            self.writer.write(" "+a_name+"=\"")
            self.write_data(amap[a_name], 1)
            self.writer.write("\"")
        self.writer.write(">")
        self.last_level = self.elem_level
        self.elem_level=self.elem_level+1

    def endElement(self,name):
        if self.buffer:
            self.write_buffer()
        self.elem_level=self.elem_level-1
        if self.last_level < self.elem_level:
            self.writer.write("\n"+self.indent*self.elem_level*" "+"</"+name+">")
        else:
            self.writer.write("</"+name+">")
            self.last_level = -1

    def ignorableWhitespace(self,data,start_ix,length):
        # we drop white space here.
        # self.characters(data,start_ix,length)
        pass
        
    def characters(self,data,start_ix,length):
        if self.elem_level>0:
            self.put_buffer(data[start_ix:start_ix+length])
            
    def put_buffer(self, txt):
        self.buffer = self.buffer+txt
        
    def write_buffer(self):
        if self.buffer:
            self.write_data(string.strip(self.buffer))
            self.buffer = ""
            
    def write_data(self,data, quotes=0):
        "Writes datachars to writer."
        data=string.replace(data,"&","&amp;")
        data=string.replace(data,"<","&lt;")
        if quotes:
            data=string.replace(data,"\"","&quot;")
        data=string.replace(data,">","&gt;")
        self.writer.write(data)
        
    def endDocument(self):
        self.write_buffer()
        self.writer.write("\n")
        try:
            pass #self.writer.close()
        except NameError:
            pass # It's OK, if the method isn't there we probably don't need it


"""
Example to format a DOM:

>>> i=Indenter()
>>> p=saxexts.make_parser()
>>> p.setErrorHandler(saxutils.ErrorPrinter())
>>> p.setDocumentHandler(i)
>>> p.parseFile(cStringIO.StringIO(dom.toxml()))

Example to format a file to a file, with sgmlop as parser:

>>> f=open(r'd:\tmp\test.xml',"w")
>>> i=Indenter(f)
>>> p=saxexts.make_parser("xml.sax.drivers.drv_sgmlop")
>>> p.setErrorHandler(saxutils.ErrorPrinter())
>>> p.setDocumentHandler(i)
>>> p.parseFile(r"h:\pns\projekte\srz\roteli\birgit\sgml\praep.sgm.umgebrochen.xml")
>>> f.close()
"""

# speed comparison:
# a very minimalistic parser which just finds tags.

def indent(infile, outfile=sys.stdout, indent=2):
    split = string.split
    strip = string.strip
    if type(infile)==type(""):
        txt = infile
    else:
        txt = infile.read()
    lis = split(txt, "<")
    level = 0
    lastl = -1
    try:
        txt = strip(lis[0])
        p = 1
        while 1:
            parts = split(lis[p], ">")
            if len(parts) > 2:
                parts[:-1]=join(parts[:-1], ">")
            if parts[0][:1] != "/":  # assume start tag
                outfile.write(strip(txt)+"\n"+indent*level*" "+"<"+parts[0]+">")
                txt = parts[1]
                lastl = level
                if parts[0][-1] not in "/?": # kein empty tag oder PI?
                    level=level+1
            else:
                outfile.write(strip(txt))
                txt = parts[1]
                level=level-1
                if lastl < level:
                    outfile.write("\n"+indent*level*" "+"<"+parts[0]+">")
                else:
                    outfile.write("<"+parts[0]+">")
                    lastl = -1
            p = p + 1
    except IndexError:
        pass
    outfile.write(txt)
    

--------------71AC2DF7ECB30B2725A67297--