# XML Toolkit # Copyright (C) 2005 Petko Petkov (GNUCITIZEN) ppetkov@gnucitizen.org # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # XML PARSER class Parser(object): """ Parser is responsible for parsing xml content. All methods are classmethods by design. This means that there is not need to instantiate this class. """ # UNESCAPE FUNCTIONS @classmethod def unescape(self, string, entities = {}): """ unescape(string, entities = {}) -> unescaped_string Replace xml entities with their string representation. This method is a reverse of the escape method. """ string = string.replace('>', '>') string = string.replace('<', '<') string = string.replace('&', '&') for entity, value in entities.iteritems(): string = string.replace(value, entity) return string @classmethod def unquote(self, string, entities = {}): """ unquote(string, entities) -> unquoted_string Replace xml entities with their string representation. This method is a reverse of the unquote method. """ _entities = {} _entities.update(entities) _entities['"'] = '"e;' _entities["'"] = ''' string = string.strip() if string.startswith('"'): string = string.strip('"') else: string = string.strip("'") return self.unescape(string, _entities) # SPLIT FUNCTIONS @classmethod def splitdtd(self, dtdstring): """ splitdtd(dtdstring) -> dtdstring TODO: to be implemented """ return dtdstring @classmethod def splitcdata(self, cdatastring): """ splitcdata(cdatastring) -> string Remove the XML CDATA encapsulation. """ return cdatastring[9:-3] @classmethod def splitcomment(self, commentstring): """ splitcomment(commentstring) -> string Remove the XML Comment encapsulation. """ return commentstring[4:-3] @classmethod def splitpi(self, pistring): """ splitpi(pistring) -> target, content Remove the XML Process Instruction encapsulation. """ pistring = pistring[2:-2] try: space = pistring.index(' ') except: space = 0 return pistring[:space], pistring[space:] @classmethod def splitattributes(self, attributestring): """ splitattributes(attributestring) -> dict Remove the XML Attribute encapsulation and return a dictionary that maps attribute names to their coresponding values. """ string = attributestring.strip() if not string: return {} attributedict = {} while True: index = string.index('=') name = string[:index].strip() rest = string[index + 1:].strip() if rest.startswith('"'): end = rest[1:].index('"') else: end = rest[1:].index("'") value = self.unquote(rest[:end + 2]) attributedict[name] = value string = rest[end + 2:].strip() if not string: break return attributedict @classmethod def splitstarttag(self, tagstring): """ splitstarttag(tagstring) -> qname, attributestring Remove the XML tag encapsulation and split to a tuple that contains the tag name and the attributestring. """ tag = tagstring[1:-1] try: space = tag.index(' ') except: space = len(tag) qname = tag[:space] attributestring = tag[space:] return qname, attributestring @classmethod def splitendtag(self, tagstring): """ splitendtag(tagstring) -> qname Remove XML tag encapsulation. """ return tagstring[2:-1].strip() @classmethod def splitemptytag(self, tagstring): """ splitemptytag(tagstring) -> qname, attributestring Remove the XML tag encapsulation and split to a tuple that contains the tag name and the attributestring. """ tag = tagstring[1:-1] try: space = tag.index(' ') except: space = len(tag) qname = tag[:space] attributestring = tag[space:].rstrip('/') return qname, attributestring @classmethod def splitstring(self, string): """ splitstring(string) -> generator Split string to XML Nodes. """ if not string: raise StopIteration while True: try: index = string.index('<') except: index = len(string) if string[:index]: end = index elif string.startswith('') + 3 elif string.startswith('') + 2 elif string.startswith('') + 2 elif string.startswith('') + 3 else: end = string.index('>') + 1 yield string[:end] string = string[end:] if not string: break @classmethod def splitstream(self, stream): """ splitstream(stream) -> generator Split stream to XML Nodes. TODO: implement stream parser instead of calling splitstring method """ return self.splitstring(stream.read()) @classmethod def splitfile(self, filepath): """ splitfile(filepath) -> generator Split file to XML Nodes. """ file = open(filepath) generator = self.splitstream(file) file.close() return generator @classmethod def parsenodes(self, nodes, handler): """ parsenodes(nodes, handler) -> None Dispatch XML Nodes to their coresponding event handler. """ for node in nodes: if node.startswith(''): qname, attributes = self.splitemptytag(node) handler.beginelement(qname, self.splitattributes(attributes)) handler.endelement(qname) elif node.startswith('<'): qname, attributes = self.splitstarttag(node) handler.beginelement(qname, self.splitattributes(attributes)) else: handler.text(node) @classmethod def parsestream(self, stream, handler): """ parsestream(stream, handler) -> None Dispatch XML Nodes from stream to their coresponding event handler. """ self.parsenodes(self.splitstream(stream), handler) @classmethod def parsefile(self, filepath, handler): """ parsefile(filepath, handler) -> None Dispatch XML Nodes from file to their coresponding event handler. """ self.parsenodes(self.splitfile(filepath), handler) @classmethod def parsestring(self, string, handler): """ parsestring(string, handler) -> None Dispatch XML Nodes from string to their coresponding event handler. """ self.parsenodes(self.splitstring(string), handler) # XML PARSER WITH NAMESPACE SUPPORT class ParserNS(Parser): """ ParserNS extends Parser by adding namespace support. All methods are classmethods by design. This means that there is no need to instantiate this class. """ @classmethod def splitqname(self, qname): """ splitqname(qname) -> prefix, localName Split qualified name to prefix, localName tuple. """ try: index = qname.index(':') except: return None, qname return qname[:index], qname[index + 1:] @classmethod def splituname(self, uname): """ splituname(uname) -> namespace, localName Split universal name to namespace, localName tuple. """ try: index = uname.index('}') if not uname.startswith('{'): raise except: return None, uname return uname[1:index], uname[index + 1:] @classmethod def splitnamespaces(self, attributes): """ splitnamespaces(attributes) -> namespaces, attributes Separate namespace declarations from the attribute dictionary. """ namespacedict = {} attributedict = {} for name, value in attributes.iteritems(): prefix, _name = self.splitqname(name) if prefix == 'xmlns': namespacedict[_name] = value elif prefix == '' and _name == 'xmlns': namespacedict[''] = value else: attributedict[name] = value return namespacedict, attributedict @classmethod def findnamespace(self, prefix, nslevels): """ findnamespace(prefix, nslevels) -> namespace Find namespace by prefix. This functions is a bit misleading. The nslevels dictionary contains level to namespaces dictionary mappings. The level represents the level at which a namespace declration is found. """ for index in reversed(nslevels.keys()): try: return nslevels[index][prefix] except: pass return None @classmethod def rebuildattributes(self, attributes, nslevels): """ rebuildattributes(attributes, nslevels) -> qualified_attributes Rebuild attributes according to nslevels. The nslevels dictionary is used by the findnamespace method to find the coresponding namespace for each attribute. """ _attributes = {} for name, value in attributes.iteritems(): prefix, name = self.splitqname(name) namespace = self.findnamespace(prefix, nslevels) _attributes[namespace, prefix, name] = value return _attributes @classmethod def parsenodes(self, nodes, handler): """ parsenodes(nodes, handler) -> None Dispatch XML Nodes to their coresponding event handler. """ namespaces = {} count = 0 for node in nodes: if node.startswith(''): qname, attributes = self.splitemptytag(node) prefix, name = self.splitqname(qname) attributes = self.splitattributes(attributes) nsattributes, attributes = self.splitnamespaces(attributes) attributes = self.rebuildattributes(attributes, namespaces) if nsattributes: namespaces[count] = nsattributes namespace = self.findnamespace(prefix, namespaces) if namespaces.has_key(count): del namespaces[count] handler.beginelement((name, prefix, namespace), attributes) handler.endelement((name, prefix, namespace)) elif node.startswith('<'): qname, attributes = self.splitstarttag(node) prefix, name = self.splitqname(qname) attributes = self.splitattributes(attributes) nsattributes, attributes = self.splitnamespaces(attributes) attributes = self.rebuildattributes(attributes, namespaces) if nsattributes: namespaces[count] = nsattributes namespace = self.findnamespace(prefix, namespaces) handler.beginelement((name, prefix, namespace), attributes) count += 1 else: handler.text(node) # CONTENT HANDLER class Handler(object): """ Handle XML Events. """ def beginelement(self, qname, attributes): pass def endelement(self, qname): pass def dtd(self, content): pass def text(self, content): pass def cdata(self, content): pass def comment(self, content): pass def pi(self, target, content): pass # XML COMPOSER class Composer(Object): """ Composer is responsible for composing xml content. Some methods are classmethods by design. This means that there is no need to instantiate this class in order to call them. """ # ESCAPE FUNCTIONS @classmethod def escape(self, string, entities = {}): """ escape(string, entities = {}) -> escaped_string Replace special strings with their xml represenation. The optional entity dictionary is there if additional string substitutions are required. """ string = string.replace('&', '&') string = string.replace('<', '<') string = string.replace('>', '>') for entity, value in entities.iteritems(): string = string.replace(entity, value) return string @classmethod def quote(self, string, entities = {}): """ quote(string, entities) -> quoted_string Replace special strings with their xml representation and quote. This function is useful when dealing with attributes. The optional entity dictionary is there if additional string substitutions are required. """ _entities = {} _entities.update(entities) _entities['"'] = '"e;' _entities["'"] = ''' return '"%s"' % self.escape(string, _entities) # JOIN FUNCTIONS @classmethod def joindtd(self, dtdstring): """ joindtd(dtdstring) -> dtdstring TODO: to be implemented """ return dtdstring @classmethod def joincdata(self, string): """ joincdata(string) -> cdatastring Encapsulate string into CDATA. """ return '' @classmethod def joincomment(self, commentstring): """ joincomment(string) -> commentstring Encapsulate string into comment. """ return '' @classmethod def joinpi(self, target, content): """ joinpi(target, content) -> pistring Encapsulate target and content into Process Instruction. """ if not target: return '' else: return '' @classmethod def joinattributes(self, dict): """ joinattributes(dict) -> attributestring Encapsulate dict into attributes. """ return ' '.join(['%s=%s' % (name, self.quote(value)) \ for name, value in dict.iteritems()]) @classmethod def joinstarttag(self, qname, attributestring): """ joinstarttag(qname, attributestring) -> tagstring Encapsulate qname and attributestring into XML start tag. """ if attributestring: return '<%s %s>' % (qname, attributestring) else: return '<' + qname + '>' @classmethod def joinendtag(self, qname): """ joinendtag(qname) -> tagstring Encapsulate qname into XML end tag. """ return '' @classmethod def joinemptytag(self, qname, attributestring): """ joinemptytag(qname, attributestring) -> tagstring Encapsulate qname and attributestring into XML start tag. """ if attributestring: return '<%s %s/>' % (qname, attributestring) else: return '<' + qname + '/>' @classmethod def joinstring(self, generator): string = '' for node in generator: string = string + node return string @classmethod def joinstream(self, generator, stream): for node in generator: stream.write(node) @classmethod def joinfile(self, generator, filepath): file = open(filepath, 'w') self.joinstream(generator, file) file.close() @classmethod def composenodes(self, nodes, handler): for node in nodes: handler.write(node) @classmethod def composestring(self, generator): pass @classmethod def composestream(self, stream, generator): pass @classmethod def composefile(self, filepath, generator): pass