[XML-SIG] Structural view of XML files?

Dinu C. Gherman gherman@darwin.in-berlin.de
Sun, 05 Nov 2000 23:43:06 +0100


This is a multi-part message in MIME format.
--------------E297EF79130E2B6151467772
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hello,

I'm using a little tool that I found *somewhere* and attached
to this message in a slightly modified version. It is supposed
to display an indented tree view of XML files. And this is
what it does, but only as long as there are no entities in
the files, like 'ü'. Sigh... 

Does anybody know of a version that handles this XML 'feature'
as well? I'm not exactly very familiar with all the SAX frame-
work stuff... but I can imagine there is some method that I
need to implement or something like this... Or this a version-
ing problem? 

Regards,

Dinu

-- 
Dinu C. Gherman
................................................................
"The only possible values [for quality] are 'excellent' and 'in-
sanely excellent', depending on whether lives are at stake or 
not. Otherwise you don't enjoy your work, you don't work well, 
and the project goes down the drain." 
                    (Kent Beck, "Extreme Programming Explained")
--------------E297EF79130E2B6151467772
Content-Type: text/plain; charset=us-ascii;
 name="xmlpp.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="xmlpp.py"

#! /usr/local/bin/python

# $Id: simple_appl.py 0.3 1999/01/19 20:42:17 simon Exp simon $

"""This simple application writes a structured view of the contents of
an XML file. The line number after the opening tag demonstrates the
use of locator methods. The error and fatalError methods can simply
print the exception because the SAXParseException class has a __str__
method."""

from xml.sax import saxexts, saxlib, saxutils
import sys, urllib, string

indentString = "    "


class DocumentHandler(saxlib.DocumentHandler):
    """Handle general document events. This is the main client
    interface for SAX: it contains callbacks for the most important
    document events, such as the start and end of elements. You need
    to create an object that implements this interface, and then
    register it with the Parser. If you do not want to implement
    the entire interface, you can derive a class from HandlerBase,
    which implements the default functionality. You can find the
    location of any document event using the Locator interface
    supplied by setDocumentLocator()."""

    def __init__(self):
        self.start_tag = {'name' : [], 'indent': '', 'line' : ''}
        
    def setDocumentLocator(self, locator):
        "Receive an object for locating the origin of SAX document events."
        self.locator = locator

    def startDocument(self):
        "Handle an event for the beginning of a document."
        self.level = -1 # we are still below the root element
        pass
##        try:
##            print "Document: %s" % (self.locator.getSystemId())
##        except AttributeError:
##            pass
        
    def startElement(self, name, attrs):
        "Handle an event for the beginning of an element."
        self.output_start_tag('start') # output start element of parent
        self.level = self.level + 1
        self.start_tag['indent'] = indentString * self.level
        self.start_tag['name'] = [name]

        # attrs is an AttributeMap object
        # that implements the AttributeList methods.
        for i in range(attrs.getLength()):
            self.start_tag['name'].append("%s=\"%s\"" % (attrs.getName(i),attrs.getValue(i)))

        try:
            self.start_tag['line'] = self.locator.getLineNumber()
        except AttributeError:
            self.start_tag['line'] = None

    def endElement(self, name):
        "Handle an event for the end of an element."
        # output start tag (empty element) or print end tag
        if not self.output_start_tag('end'):
            print "%s</%s>" % (indentString * self.level, name)
        self.level = self.level - 1

    def characters(self, all_data, start, length):
        "Handle a character data event."
        # all_data contains the whole file;
        # start:start+length is this part's slice
        data = string.strip(all_data[start:start+length])
        if data:
            self.output_start_tag('data') # output start element of parent
            print "%s%s" % (indentString * (self.level + 1), data)

    def output_start_tag (self, where):
        """startElement puts its data in self.start_tag;
        startElement, characters, and endElement call output_start_tag;

        when called by startElement or characters
        and the start tag (of the parent) is still unprinted:
        print start tag, return 1;
        else return None;

        when called by endElement
        and the start tag is still unprinted:
        print empty element tag, return 1;
        else return None"""

        if self.start_tag['name']: # if still unprinted
            if where in ['start', 'data']:
                STAGC = ">"
            elif where in ['end']:
                STAGC = "/>"
            else:
                raise ValueError, 'output_start_tag("start"|"data"|"end")'
            output = "%s<%s%s" % \
                     (self.start_tag['indent'],
                      string.join(self.start_tag['name']), STAGC)
            if self.start_tag['line']:
                #output = "%s (line %s)" % (output, self.start_tag['line'])
                output = "%s" % output
            print output
            self.start_tag = {'name' : [], 'indent': '', 'line' : ''}
            return 1
        else:
            return None

class ErrorHandler:
    """Basic interface for SAX error handlers. If you create an object
    that implements this interface, then register the object with your
    Parser, the parser will call the methods in your object to report
    all warnings and errors. There are three levels of errors
    available: warnings, (possibly) recoverable errors, and
    unrecoverable errors. All methods take a SAXParseException as the
    only parameter."""

    global SGMLSyntaxError
    SGMLSyntaxError = "SGML syntax error"

    def error(self, exception):
        "Handle a recoverable error."
        sys.stderr.write ("Error: %s\n" % exception)

    def fatalError(self, exception):
        "Handle a non-recoverable error."
        sys.stderr.write ("Fatal error: %s\n" % exception)
        raise SGMLSyntaxError

    def warning(self, exception):
        "Handle a warning."
        sys.stderr.write ("Warning: %s\n" % exception)

# pick a specific parser
from xml.sax.drivers import drv_xmlproc
SAXparser=drv_xmlproc.SAX_XPParser()

SAXparser.setDocumentHandler(DocumentHandler())

if __name__ == '__main__':
    # Redirect stdout into a file, if second argument provided.
    path = ''
    try:
        path = sys.argv[2]
        f = open(path, 'w')
        sys.stdout = f
    except IndexError:
        pass        
        
    try:
        SAXparser.parse(sys.argv[1])
    # catch the 'SGMLSyntaxError's raised by our own ErrorHandler
    except SGMLSyntaxError:
        sys.stderr.write("%s; processing aborted\n" % (SGMLSyntaxError))
        sys.exit(1)
    # catch the SAXParseException errors raised by the SAX parser
    # and passed on by ErrorRaiser
    except saxlib.SAXParseException:
        sys.stderr.write("%s; processing aborted\n"
                         % (saxlib.SAXParseException))
        sys.exit(1)

    if path:
        f.close()    
--------------E297EF79130E2B6151467772--