[XML-SIG] Structural view of XML files?
Dinu C. Gherman
gherman@darwin.in-berlin.de
Sun, 05 Nov 2000 23:43:06 +0100
This is a multi-part message in MIME format.
--------------E297EF79130E2B6151467772
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Hello,
I'm using a little tool that I found *somewhere* and attached
to this message in a slightly modified version. It is supposed
to display an indented tree view of XML files. And this is
what it does, but only as long as there are no entities in
the files, like 'ü'. Sigh...
Does anybody know of a version that handles this XML 'feature'
as well? I'm not exactly very familiar with all the SAX frame-
work stuff... but I can imagine there is some method that I
need to implement or something like this... Or this a version-
ing problem?
Regards,
Dinu
--
Dinu C. Gherman
................................................................
"The only possible values [for quality] are 'excellent' and 'in-
sanely excellent', depending on whether lives are at stake or
not. Otherwise you don't enjoy your work, you don't work well,
and the project goes down the drain."
(Kent Beck, "Extreme Programming Explained")
--------------E297EF79130E2B6151467772
Content-Type: text/plain; charset=us-ascii;
name="xmlpp.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="xmlpp.py"
#! /usr/local/bin/python
# $Id: simple_appl.py 0.3 1999/01/19 20:42:17 simon Exp simon $
"""This simple application writes a structured view of the contents of
an XML file. The line number after the opening tag demonstrates the
use of locator methods. The error and fatalError methods can simply
print the exception because the SAXParseException class has a __str__
method."""
from xml.sax import saxexts, saxlib, saxutils
import sys, urllib, string
indentString = " "
class DocumentHandler(saxlib.DocumentHandler):
"""Handle general document events. This is the main client
interface for SAX: it contains callbacks for the most important
document events, such as the start and end of elements. You need
to create an object that implements this interface, and then
register it with the Parser. If you do not want to implement
the entire interface, you can derive a class from HandlerBase,
which implements the default functionality. You can find the
location of any document event using the Locator interface
supplied by setDocumentLocator()."""
def __init__(self):
self.start_tag = {'name' : [], 'indent': '', 'line' : ''}
def setDocumentLocator(self, locator):
"Receive an object for locating the origin of SAX document events."
self.locator = locator
def startDocument(self):
"Handle an event for the beginning of a document."
self.level = -1 # we are still below the root element
pass
## try:
## print "Document: %s" % (self.locator.getSystemId())
## except AttributeError:
## pass
def startElement(self, name, attrs):
"Handle an event for the beginning of an element."
self.output_start_tag('start') # output start element of parent
self.level = self.level + 1
self.start_tag['indent'] = indentString * self.level
self.start_tag['name'] = [name]
# attrs is an AttributeMap object
# that implements the AttributeList methods.
for i in range(attrs.getLength()):
self.start_tag['name'].append("%s=\"%s\"" % (attrs.getName(i),attrs.getValue(i)))
try:
self.start_tag['line'] = self.locator.getLineNumber()
except AttributeError:
self.start_tag['line'] = None
def endElement(self, name):
"Handle an event for the end of an element."
# output start tag (empty element) or print end tag
if not self.output_start_tag('end'):
print "%s</%s>" % (indentString * self.level, name)
self.level = self.level - 1
def characters(self, all_data, start, length):
"Handle a character data event."
# all_data contains the whole file;
# start:start+length is this part's slice
data = string.strip(all_data[start:start+length])
if data:
self.output_start_tag('data') # output start element of parent
print "%s%s" % (indentString * (self.level + 1), data)
def output_start_tag (self, where):
"""startElement puts its data in self.start_tag;
startElement, characters, and endElement call output_start_tag;
when called by startElement or characters
and the start tag (of the parent) is still unprinted:
print start tag, return 1;
else return None;
when called by endElement
and the start tag is still unprinted:
print empty element tag, return 1;
else return None"""
if self.start_tag['name']: # if still unprinted
if where in ['start', 'data']:
STAGC = ">"
elif where in ['end']:
STAGC = "/>"
else:
raise ValueError, 'output_start_tag("start"|"data"|"end")'
output = "%s<%s%s" % \
(self.start_tag['indent'],
string.join(self.start_tag['name']), STAGC)
if self.start_tag['line']:
#output = "%s (line %s)" % (output, self.start_tag['line'])
output = "%s" % output
print output
self.start_tag = {'name' : [], 'indent': '', 'line' : ''}
return 1
else:
return None
class ErrorHandler:
"""Basic interface for SAX error handlers. If you create an object
that implements this interface, then register the object with your
Parser, the parser will call the methods in your object to report
all warnings and errors. There are three levels of errors
available: warnings, (possibly) recoverable errors, and
unrecoverable errors. All methods take a SAXParseException as the
only parameter."""
global SGMLSyntaxError
SGMLSyntaxError = "SGML syntax error"
def error(self, exception):
"Handle a recoverable error."
sys.stderr.write ("Error: %s\n" % exception)
def fatalError(self, exception):
"Handle a non-recoverable error."
sys.stderr.write ("Fatal error: %s\n" % exception)
raise SGMLSyntaxError
def warning(self, exception):
"Handle a warning."
sys.stderr.write ("Warning: %s\n" % exception)
# pick a specific parser
from xml.sax.drivers import drv_xmlproc
SAXparser=drv_xmlproc.SAX_XPParser()
SAXparser.setDocumentHandler(DocumentHandler())
if __name__ == '__main__':
# Redirect stdout into a file, if second argument provided.
path = ''
try:
path = sys.argv[2]
f = open(path, 'w')
sys.stdout = f
except IndexError:
pass
try:
SAXparser.parse(sys.argv[1])
# catch the 'SGMLSyntaxError's raised by our own ErrorHandler
except SGMLSyntaxError:
sys.stderr.write("%s; processing aborted\n" % (SGMLSyntaxError))
sys.exit(1)
# catch the SAXParseException errors raised by the SAX parser
# and passed on by ErrorRaiser
except saxlib.SAXParseException:
sys.stderr.write("%s; processing aborted\n"
% (saxlib.SAXParseException))
sys.exit(1)
if path:
f.close()
--------------E297EF79130E2B6151467772--