#    XML Toolkit
#    Copyright (C) 2005  Petko Petkov (GNUCITIZEN) ppetkov@gnucitizen.org
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

# XML PARSER

class Parser(object):
	"""
	Parser is responsible for parsing xml content. All methods are classmethods
	by design. This means that there is not need to instantiate this class.
	"""

	# UNESCAPE FUNCTIONS

	@classmethod
	def unescape(self, string, entities = {}):
		"""
		unescape(string, entities = {}) -> unescaped_string

		Replace xml entities with their string representation. This method is
		a reverse of the escape method.
		"""

		string = string.replace('&gt;', '>')
		string = string.replace('&lt;', '<')
		string = string.replace('&amp;', '&')

		for entity, value in entities.iteritems():
			string = string.replace(value, entity)

		return string

	@classmethod
	def unquote(self, string, entities = {}):
		"""
		unquote(string, entities) -> unquoted_string

		Replace xml entities with their string representation. This method is a
		reverse of the unquote method.
		"""

		_entities = {}
		_entities.update(entities)
		_entities['"'] = '&quote;'
		_entities["'"] = '&apos;'

		string = string.strip()

		if string.startswith('"'):
			string = string.strip('"')

		else:
			string = string.strip("'")

		return self.unescape(string, _entities)

	# SPLIT FUNCTIONS

	@classmethod
	def splitdtd(self, dtdstring):
		"""
		splitdtd(dtdstring) -> dtdstring

		TODO: to be implemented
		"""

		return dtdstring

	@classmethod
	def splitcdata(self, cdatastring):
		"""
		splitcdata(cdatastring) -> string

		Remove the XML CDATA encapsulation.
		"""

		return cdatastring[9:-3]

	@classmethod
	def splitcomment(self, commentstring):
		"""
		splitcomment(commentstring) -> string

		Remove the XML Comment encapsulation.
		"""

		return commentstring[4:-3]

	@classmethod
	def splitpi(self, pistring):
		"""
		splitpi(pistring) -> target, content

		Remove the XML Process Instruction encapsulation.
		"""

		pistring = pistring[2:-2]

		try:
			space = pistring.index(' ')

		except:
			space = 0

		return pistring[:space], pistring[space:]

	@classmethod
	def splitattributes(self, attributestring):
		"""
		splitattributes(attributestring) -> dict

		Remove the XML Attribute encapsulation and return a dictionary that
		maps attribute names to their coresponding values.
		"""

		string = attributestring.strip()

		if not string:
			return {}

		attributedict = {}

		while True:
			index = string.index('=')
			name = string[:index].strip()
			rest = string[index + 1:].strip()

			if rest.startswith('"'):
				end = rest[1:].index('"')

			else:
				end = rest[1:].index("'")

			value = self.unquote(rest[:end + 2])
			attributedict[name] = value

			string = rest[end + 2:].strip()

			if not string:
				break

		return attributedict

	@classmethod
	def splitstarttag(self, tagstring):
		"""
		splitstarttag(tagstring) -> qname, attributestring

		Remove the XML tag encapsulation and split to a tuple that contains the
		tag name and the attributestring.
		"""

		tag = tagstring[1:-1]

		try:
			space = tag.index(' ')

		except:
			space = len(tag)

		qname = tag[:space]
		attributestring = tag[space:]

		return qname, attributestring

	@classmethod
	def splitendtag(self, tagstring):
		"""
		splitendtag(tagstring) -> qname

		Remove XML tag encapsulation.
		"""

		return tagstring[2:-1].strip()

	@classmethod
	def splitemptytag(self, tagstring):
		"""
		splitemptytag(tagstring) -> qname, attributestring

		Remove the XML tag encapsulation and split to a tuple that contains the
		tag name and the attributestring.
		"""

		tag = tagstring[1:-1]

		try:
			space = tag.index(' ')

		except:
			space = len(tag)

		qname = tag[:space]
		attributestring = tag[space:].rstrip('/')

		return qname, attributestring

	@classmethod
	def splitstring(self, string):
		"""
		splitstring(string) -> generator

		Split string to XML Nodes.
		"""

		if not string:
			raise StopIteration

		while True:
			try:
				index = string.index('<')

			except:
				index = len(string)

			if string[:index]:
				end = index

			elif string.startswith('<![CDATA['):
				end = string.index(']]>') + 3


			elif string.startswith('<?'):
				end = string.index('?>') + 2

			elif string.startswith('<!DOCTYPE'):
				end = string.index(']>') + 2

			elif string.startswith('<!--'):
				end = string.index('-->') + 3

			else:
				end = string.index('>') + 1

			yield string[:end]
			string = string[end:]

			if not string:
				break

	@classmethod
	def splitstream(self, stream):
		"""
		splitstream(stream) -> generator

		Split stream to XML Nodes.
		
		TODO: implement stream parser instead of calling splitstring method
		"""

		return self.splitstring(stream.read())

	@classmethod
	def splitfile(self, filepath):
		"""
		splitfile(filepath) -> generator

		Split file to XML Nodes.
		"""

		file = open(filepath)
		generator = self.splitstream(file)
		file.close()

		return generator

	@classmethod
	def parsenodes(self, nodes, handler):
		"""
		parsenodes(nodes, handler) -> None

		Dispatch XML Nodes to their coresponding event handler.
		"""

		for node in nodes:
			if node.startswith('<![CDATA['):
				handler.cdata(self.splitcdata(node))

			elif node.startswith('<?'):
				handler.pi(*self.splitpi(node))

			elif node.startswith('<!DOCTYPE'):
				handler.dtd(*self.splitdtd(node))

			elif node.startswith('<!--'):
				handler.comment(self.splitcomment(node))

			elif node.startswith('</'):
				handler.endelement(self.splitendtag(node))

			elif node.endswith('/>'):
				qname, attributes = self.splitemptytag(node)
				handler.beginelement(qname, self.splitattributes(attributes))
				handler.endelement(qname)

			elif node.startswith('<'):
				qname, attributes = self.splitstarttag(node)
				handler.beginelement(qname, self.splitattributes(attributes))

			else:
				handler.text(node)

	@classmethod
	def parsestream(self, stream, handler):
		"""
		parsestream(stream, handler) -> None

		Dispatch XML Nodes from stream to their coresponding event handler.
		"""

		self.parsenodes(self.splitstream(stream), handler)

	@classmethod
	def parsefile(self, filepath, handler):
		"""
		parsefile(filepath, handler) -> None

		Dispatch XML Nodes from file to their coresponding event handler.
		"""

		self.parsenodes(self.splitfile(filepath), handler)

	@classmethod
	def parsestring(self, string, handler):
		"""
		parsestring(string, handler) -> None

		Dispatch XML Nodes from string to their coresponding event handler.
		"""

		self.parsenodes(self.splitstring(string), handler)

# XML PARSER WITH NAMESPACE SUPPORT

class ParserNS(Parser):
	"""
	ParserNS extends Parser by adding namespace support. All methods are
	classmethods by design. This means that there is no need to instantiate
	this class.
	"""

	@classmethod
	def splitqname(self, qname):
		"""
		splitqname(qname) -> prefix, localName

		Split qualified name to prefix, localName tuple.
		"""

		try:
			index = qname.index(':')

		except:
			return None, qname

		return qname[:index], qname[index + 1:]

	@classmethod
	def splituname(self, uname):
		"""
		splituname(uname) -> namespace, localName

		Split universal name to namespace, localName tuple.
		"""

		try:
			index = uname.index('}')

			if not uname.startswith('{'):
				raise

		except:
			return None, uname

		return uname[1:index], uname[index + 1:]

	@classmethod
	def splitnamespaces(self, attributes):
		"""
		splitnamespaces(attributes) -> namespaces, attributes

		Separate namespace declarations from the attribute dictionary.
		"""

		namespacedict = {}
		attributedict = {}

		for name, value in attributes.iteritems():
			prefix, _name = self.splitqname(name)

			if prefix == 'xmlns':
				namespacedict[_name] = value

			elif prefix == '' and _name == 'xmlns':
				namespacedict[''] = value

			else:
				attributedict[name] = value

		return namespacedict, attributedict

	@classmethod
	def findnamespace(self, prefix, nslevels):
		"""
		findnamespace(prefix, nslevels) -> namespace

		Find namespace by prefix. This functions is a bit misleading. The
		nslevels dictionary contains level to namespaces dictionary mappings.
		The level represents the level at which a namespace declration is
		found.
		"""

		for index in reversed(nslevels.keys()):
			try:
				return nslevels[index][prefix]

			except:
				pass

		return None

	@classmethod
	def rebuildattributes(self, attributes, nslevels):
		"""
		rebuildattributes(attributes, nslevels) -> qualified_attributes

		Rebuild attributes according to nslevels. The nslevels dictionary is
		used by the findnamespace method to find the coresponding namespace for
		each attribute.
		"""

		_attributes = {}

		for name, value in attributes.iteritems():
			prefix, name = self.splitqname(name)
			namespace = self.findnamespace(prefix, nslevels)
			_attributes[namespace, prefix, name] = value

		return _attributes

	@classmethod
	def parsenodes(self, nodes, handler):
		"""
		parsenodes(nodes, handler) -> None

		Dispatch XML Nodes to their coresponding event handler.
		"""

		namespaces = {}
		count = 0

		for node in nodes:
			if node.startswith('<![CDATA['):
				handler.cdata(self.splitcdata(node))

			elif node.startswith('<?'):
				handler.pi(*self.splitpi(node))

			elif node.startswith('<!DOCTYPE'):
				handler.dtd(self.splitdtd(node))

			elif node.startswith('<!--'):
				handler.comment(self.splitcomment(node))

			elif node.startswith('</'):
				count -= 1

				qname = self.splitendtag(node)
				prefix, name = self.splitqname(qname)
				namespace = self.findnamespace(prefix, namespaces)

				if namespaces.has_key(count):
					del namespaces[count]

				handler.endelement((name, prefix, namespace))

			elif node.endswith('/>'):
				qname, attributes = self.splitemptytag(node)
				prefix, name = self.splitqname(qname)
				attributes = self.splitattributes(attributes)
				nsattributes, attributes = self.splitnamespaces(attributes)
				attributes = self.rebuildattributes(attributes, namespaces)

				if nsattributes:
					namespaces[count] = nsattributes

				namespace = self.findnamespace(prefix, namespaces)

				if namespaces.has_key(count):
					del namespaces[count]

				handler.beginelement((name, prefix, namespace), attributes)
				handler.endelement((name, prefix, namespace))

			elif node.startswith('<'):
				qname, attributes = self.splitstarttag(node)
				prefix, name = self.splitqname(qname)
				attributes = self.splitattributes(attributes)
				nsattributes, attributes = self.splitnamespaces(attributes)
				attributes = self.rebuildattributes(attributes, namespaces)

				if nsattributes:
					namespaces[count] = nsattributes

				namespace = self.findnamespace(prefix, namespaces)
				handler.beginelement((name, prefix, namespace), attributes)

				count += 1

			else:
				handler.text(node)

# CONTENT HANDLER

class Handler(object):
	"""
	Handle XML Events.
	"""

	def beginelement(self, qname, attributes):
		pass

	def endelement(self, qname):
		pass

	def dtd(self, content):
		pass

	def text(self, content):
		pass

	def cdata(self, content):
		pass

	def comment(self, content):
		pass

	def pi(self, target, content):
		pass

# XML COMPOSER

class Composer(Object):
	"""
	Composer is responsible for composing xml content. Some methods are
	classmethods by design. This means that there is no need to instantiate
	this class in order to call them.
	"""

	# ESCAPE FUNCTIONS

	@classmethod
	def escape(self, string, entities = {}):
		"""
		escape(string, entities = {}) -> escaped_string

		Replace special strings with their xml represenation. The optional
		entity dictionary is there if additional string substitutions are
		required.
		"""

		string = string.replace('&', '&amp;')
		string = string.replace('<', '&lt;')
		string = string.replace('>', '&gt;')

		for entity, value in entities.iteritems():
			string = string.replace(entity, value)

		return string

	@classmethod
	def quote(self, string, entities = {}):
		"""
		quote(string, entities) -> quoted_string

		Replace special strings with their xml representation and quote. This
		function is useful when dealing with attributes. The optional entity
		dictionary is there if additional string substitutions are required.
		"""

		_entities = {}
		_entities.update(entities)
		_entities['"'] = '&quote;'
		_entities["'"] = '&apos;'

		return '"%s"' % self.escape(string, _entities)

	# JOIN FUNCTIONS

	@classmethod
	def joindtd(self, dtdstring):
		"""
		joindtd(dtdstring) -> dtdstring

		TODO: to be implemented
		"""

		return dtdstring

	@classmethod
	def joincdata(self, string):
		"""
		joincdata(string) -> cdatastring

		Encapsulate string into CDATA.
		"""

		return '<![CDATA[' + string + ']]>'

	@classmethod
	def joincomment(self, commentstring):
		"""
		joincomment(string) -> commentstring

		Encapsulate string into comment.
		"""

		return '<!--' + commentstring + '-->'

	@classmethod
	def joinpi(self, target, content):
		"""
		joinpi(target, content) -> pistring

		Encapsulate target and content into Process Instruction.
		"""

		if not target:
			return '<?' + content + '?>'

		else:
			return '<?' + target + ' ' + content + '?>'

	@classmethod
	def joinattributes(self, dict):
		"""
		joinattributes(dict) -> attributestring

		Encapsulate dict into attributes.
		"""

		return ' '.join(['%s=%s' % (name, self.quote(value)) \
			for name, value in dict.iteritems()])

	@classmethod
	def joinstarttag(self, qname, attributestring):
		"""
		joinstarttag(qname, attributestring) -> tagstring

		Encapsulate qname and attributestring into XML start tag.
		"""

		if attributestring:
			return '<%s %s>' % (qname, attributestring)

		else:
			return '<' + qname + '>'

	@classmethod
	def joinendtag(self, qname):
		"""
		joinendtag(qname) -> tagstring

		Encapsulate qname into XML end tag.
		"""

		return '</' + qname + '>'

	@classmethod
	def joinemptytag(self, qname, attributestring):
		"""
		joinemptytag(qname, attributestring) -> tagstring

		Encapsulate qname and attributestring into XML start tag.
		"""

		if attributestring:
			return '<%s %s/>' % (qname, attributestring)

		else:
			return '<' + qname + '/>'

	@classmethod
	def joinstring(self, generator):
		string = ''

		for node in generator:
			string = string + node

		return string

	@classmethod
	def joinstream(self, generator, stream):
		for node in generator:
			stream.write(node)

	@classmethod
	def joinfile(self, generator, filepath):
		file = open(filepath, 'w')
		self.joinstream(generator, file)
		file.close()

	@classmethod
	def composenodes(self, nodes, handler):
		for node in nodes:
			handler.write(node)

	@classmethod
	def composestring(self, generator):
		pass

	@classmethod
	def composestream(self, stream, generator):
		pass

	@classmethod
	def composefile(self, filepath, generator):
		pass