
Is there a simple example available that demonstrates how to use elements of the new xml.sax package? I'd prefer something that doesn't require me to install anything other than what comes with the Python 2.0 distribution? Thanks, Skip

* Skip Montanaro | | Is there a simple example available that demonstrates how to use | elements of the new xml.sax package? Here are a couple from an internal seminar I recently gave on this. I could make the slides available as well, if there is interest. --Lars M. --- elem_count.py import sys from xml.sax import make_parser, handler class FancyCounter(handler.ContentHandler): def __init__(self): self._elems = 0 self._attrs = 0 self._elem_types = {} self._attr_types = {} def startElement(self, name, attrs): self._elems = self._elems + 1 self._attrs = self._attrs + len(attrs) self._elem_types[name] = self._elem_types.get(name, 0) + 1 for name in attrs.keys(): self._attr_types[name] = self._attr_types.get(name, 0) + 1 def endDocument(self): print "There were", self._elems, "elements." print "There were", self._attrs, "attributes." print "---ELEMENT TYPES" for pair in self._elem_types.items(): print "%20s %d" % pair print "---ATTRIBUTE TYPES" for pair in self._attr_types.items(): print "%20s %d" % pair parser = make_parser() parser.setContentHandler(FancyCounter()) parser.parse(sys.argv[1]) --- roundtrip.py """ A simple demo that reads in an XML document and spits out an equivalent, but not necessarily identical, document. """ import sys, string from xml.sax import saxutils, handler, make_parser # --- The ContentHandler class ContentGenerator(handler.ContentHandler): def __init__(self, out = sys.stdout): handler.ContentHandler.__init__(self) self._out = out # ContentHandler methods def startDocument(self): self._out.write('<?xml version="1.0" encoding="iso-8859-1"?>\n') def startElement(self, name, attrs): self._out.write('<' + name) for (name, value) in attrs.items(): self._out.write(' %s="%s"' % (name, saxutils.escape(value))) self._out.write('>') def endElement(self, name): self._out.write('</%s>' % name) def characters(self, content): self._out.write(saxutils.escape(content)) def ignorableWhitespace(self, content): self._out.write(content) def processingInstruction(self, target, data): self._out.write('<?%s %s?>' % (target, data)) # --- The main program parser = make_parser() parser.setContentHandler(ContentGenerator()) parser.parse(sys.argv[1]) --- rss2html.py import sys from xml.sax import make_parser, handler # --- Templates top = \ """ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <HTML> <HEAD> <TITLE>%s</TITLE> </HEAD> <BODY> <H1>%s</H1> """ bottom = \ """ </ul> <HR> <ADDRESS> Converted to HTML by sax_rss2html.py. </ADDRESS> </BODY> </HTML> """ # --- The ContentHandler class RSSHandler(handler.ContentHandler): def __init__(self, out = sys.stdout): handler.ContentHandler.__init__(self) self._out = out self._text = "" self._parent = None self._list_started = 0 self._title = None self._link = None self._descr = "" # ContentHandler methods def startElement(self, name, attrs): if name == "channel" or name == "image" or name == "item": self._parent = name self._text = "" def endElement(self, name): if self._parent == "channel": if name == "title": self._out.write(top % (self._text, self._text)) elif name == "description": self._out.write("<p>%s</p>\n" % self._text) elif self._parent == "item": if name == "title": self._title = self._text elif name == "link": self._link = self._text elif name == "description": self._descr = self._text elif name == "item": if not self._list_started: self._out.write("<ul>\n") self._list_started = 1 self._out.write(' <li><a href="%s">%s</a> %s\n' % (self._link, self._title, self._descr)) self._title = None self._link = None self._descr = "" if name == "rss": self._out.write(bottom) def characters(self, content): self._text = self._text + content # --- Main program parser = make_parser() parser.setContentHandler(RSSHandler()) parser.parse(sys.argv[1])

* Lars Marius Garshol | | Here are a couple from an internal seminar I recently gave on this. | I could make the slides available as well, if there is interest. * Fred L. Drake, Jr. | | Could we include this in Demos/xml/ ? Sure. Do whatever you like with them. --Lars M.

* Skip Montanaro | | Is there a simple example available that demonstrates how to use | elements of the new xml.sax package? Here are a couple from an internal seminar I recently gave on this. I could make the slides available as well, if there is interest. --Lars M. --- elem_count.py import sys from xml.sax import make_parser, handler class FancyCounter(handler.ContentHandler): def __init__(self): self._elems = 0 self._attrs = 0 self._elem_types = {} self._attr_types = {} def startElement(self, name, attrs): self._elems = self._elems + 1 self._attrs = self._attrs + len(attrs) self._elem_types[name] = self._elem_types.get(name, 0) + 1 for name in attrs.keys(): self._attr_types[name] = self._attr_types.get(name, 0) + 1 def endDocument(self): print "There were", self._elems, "elements." print "There were", self._attrs, "attributes." print "---ELEMENT TYPES" for pair in self._elem_types.items(): print "%20s %d" % pair print "---ATTRIBUTE TYPES" for pair in self._attr_types.items(): print "%20s %d" % pair parser = make_parser() parser.setContentHandler(FancyCounter()) parser.parse(sys.argv[1]) --- roundtrip.py """ A simple demo that reads in an XML document and spits out an equivalent, but not necessarily identical, document. """ import sys, string from xml.sax import saxutils, handler, make_parser # --- The ContentHandler class ContentGenerator(handler.ContentHandler): def __init__(self, out = sys.stdout): handler.ContentHandler.__init__(self) self._out = out # ContentHandler methods def startDocument(self): self._out.write('<?xml version="1.0" encoding="iso-8859-1"?>\n') def startElement(self, name, attrs): self._out.write('<' + name) for (name, value) in attrs.items(): self._out.write(' %s="%s"' % (name, saxutils.escape(value))) self._out.write('>') def endElement(self, name): self._out.write('</%s>' % name) def characters(self, content): self._out.write(saxutils.escape(content)) def ignorableWhitespace(self, content): self._out.write(content) def processingInstruction(self, target, data): self._out.write('<?%s %s?>' % (target, data)) # --- The main program parser = make_parser() parser.setContentHandler(ContentGenerator()) parser.parse(sys.argv[1]) --- rss2html.py import sys from xml.sax import make_parser, handler # --- Templates top = \ """ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <HTML> <HEAD> <TITLE>%s</TITLE> </HEAD> <BODY> <H1>%s</H1> """ bottom = \ """ </ul> <HR> <ADDRESS> Converted to HTML by sax_rss2html.py. </ADDRESS> </BODY> </HTML> """ # --- The ContentHandler class RSSHandler(handler.ContentHandler): def __init__(self, out = sys.stdout): handler.ContentHandler.__init__(self) self._out = out self._text = "" self._parent = None self._list_started = 0 self._title = None self._link = None self._descr = "" # ContentHandler methods def startElement(self, name, attrs): if name == "channel" or name == "image" or name == "item": self._parent = name self._text = "" def endElement(self, name): if self._parent == "channel": if name == "title": self._out.write(top % (self._text, self._text)) elif name == "description": self._out.write("<p>%s</p>\n" % self._text) elif self._parent == "item": if name == "title": self._title = self._text elif name == "link": self._link = self._text elif name == "description": self._descr = self._text elif name == "item": if not self._list_started: self._out.write("<ul>\n") self._list_started = 1 self._out.write(' <li><a href="%s">%s</a> %s\n' % (self._link, self._title, self._descr)) self._title = None self._link = None self._descr = "" if name == "rss": self._out.write(bottom) def characters(self, content): self._text = self._text + content # --- Main program parser = make_parser() parser.setContentHandler(RSSHandler()) parser.parse(sys.argv[1])

* Lars Marius Garshol | | Here are a couple from an internal seminar I recently gave on this. | I could make the slides available as well, if there is interest. * Fred L. Drake, Jr. | | Could we include this in Demos/xml/ ? Sure. Do whatever you like with them. --Lars M.
participants (3)
-
Fred L. Drake, Jr.
-
Lars Marius Garshol
-
Skip Montanaro