[Python-checkins] CVS: python/dist/src/Doc/tools/sgmlconv esistools.py,1.4,1.5
Fred L. Drake
fdrake@users.sourceforge.net
Fri, 23 Mar 2001 08:42:11 -0800
Update of /cvsroot/python/python/dist/src/Doc/tools/sgmlconv
In directory usw-pr-cvs1:/tmp/cvs-serv26376
Modified Files:
esistools.py
Log Message:
Re-write to no longer depend on an old version of PyXML. This now
implements a SAX XMLReader interface instead of the old Builder interface
used with PyDOM (now obsolete).
This only depends on the standard library, not PyXML.
Index: esistools.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Doc/tools/sgmlconv/esistools.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** esistools.py 1999/08/26 18:04:32 1.4
--- esistools.py 2001/03/23 16:42:08 1.5
***************
*** 4,21 ****
import re
import string
- import sys
- import xml.dom.core
- import xml.dom.esis_builder
! _data_rx = re.compile(r"[^\\][^\\]*")
def decode(s):
r = ''
while s:
! m = _data_rx.match(s)
if m:
r = r + m.group()
! s = s[len(m.group()):]
elif s[1] == "\\":
r = r + "\\"
--- 4,24 ----
import re
import string
+ import xml.dom.pulldom
! import xml.sax
! import xml.sax.handler
! import xml.sax.xmlreader
+
+ _data_match = re.compile(r"[^\\][^\\]*").match
+
def decode(s):
r = ''
while s:
! m = _data_match(s)
if m:
r = r + m.group()
! s = s[m.end():]
elif s[1] == "\\":
r = r + "\\"
***************
*** 24,27 ****
--- 27,34 ----
r = r + "\n"
s = s[2:]
+ elif s[1] == "%":
+ s = s[2:]
+ n, s = s.split(";", 1)
+ r = r + unichr(int(n))
else:
raise ValueError, "can't handle " + `s`
***************
*** 36,83 ****
del c
def encode(s):
! return string.join(map(_charmap.get, s), '')
! class ExtendedEsisBuilder(xml.dom.esis_builder.EsisBuilder):
! def __init__(self, *args, **kw):
! self.__empties = {}
! self.__is_empty = 0
! apply(xml.dom.esis_builder.EsisBuilder.__init__, (self,) + args, kw)
! self.buildFragment()
! def feed(self, data):
! for line in string.split(data, '\n'):
! if not line:
! break
! event = line[0]
! text = line[1:]
! if event == '(':
! element = self.document.createElement(text, self.attr_store)
! self.attr_store = {}
! self.push(element)
! if self.__is_empty:
! self.__empties[text] = text
! self.__is_empty = 0
! elif event == ')':
! self.pop()
! elif event == 'A':
! l = re.split(' ', text, 2)
! name = l[0]
! value = decode(l[2])
! self.attr_store[name] = value
! elif event == '-':
! text = self.document.createText(decode(text))
! self.push(text)
! elif event == 'C':
! return
! elif event == 'e':
! self.__is_empty = 1
! elif event == '&':
! eref = self.document.createEntityReference(text)
! self.push(eref)
! else:
! sys.stderr.write('Unknown event: %s\n' % line)
def get_empties(self):
! return self.__empties.keys()
--- 43,310 ----
del c
+ _null_join = ''.join
def encode(s):
! return _null_join(map(_charmap.get, s))
! class ESISReader(xml.sax.xmlreader.XMLReader):
! """SAX Reader which reads from an ESIS stream.
! No verification of the document structure is performed by the
! reader; a general verifier could be used as the target
! ContentHandler instance.
!
! """
! _decl_handler = None
! _lexical_handler = None
+ _public_id = None
+ _system_id = None
+
+ _buffer = ""
+ _is_empty = 0
+ _lineno = 0
+ _started = 0
+
+ def __init__(self, contentHandler=None, errorHandler=None):
+ xml.sax.xmlreader.XMLReader.__init__(self)
+ self._attrs = {}
+ self._attributes = Attributes(self._attrs)
+ self._locator = Locator()
+ self._empties = {}
+ if contentHandler:
+ self.setContentHandler(contentHandler)
+ if errorHandler:
+ self.setErrorHandler(errorHandler)
+
def get_empties(self):
! return self._empties.keys()
!
! #
! # XMLReader interface
! #
!
! def parse(self, source):
! raise RuntimeError
! self._locator._public_id = source.getPublicId()
! self._locator._system_id = source.getSystemId()
! fp = source.getByteStream()
! handler = self.getContentHandler()
! if handler:
! handler.startDocument()
! lineno = 0
! while 1:
! token, data = self._get_token(fp)
! if token is None:
! break
! lineno = lineno + 1
! self._locator._lineno = lineno
! self._handle_token(token, data)
! handler = self.getContentHandler()
! if handler:
! handler.startDocument()
!
! def feed(self, data):
! if not self._started:
! handler = self.getContentHandler()
! if handler:
! handler.startDocument()
! self._started = 1
! data = self._buffer + data
! self._buffer = None
! lines = data.split("\n")
! if lines:
! for line in lines[:-1]:
! self._lineno = self._lineno + 1
! self._locator._lineno = self._lineno
! if not line:
! e = xml.sax.SAXParseException(
! "ESIS input line contains no token type mark",
! None, self._locator)
! self.getErrorHandler().error(e)
! else:
! self._handle_token(line[0], line[1:])
! self._buffer = lines[-1]
! else:
! self._buffer = ""
!
! def close(self):
! handler = self.getContentHandler()
! if handler:
! handler.endDocument()
! self._buffer = ""
!
! def _get_token(self, fp):
! try:
! line = fp.readline()
! except IOError, e:
! e = SAXException("I/O error reading input stream", e)
! self.getErrorHandler().fatalError(e)
! return
! if not line:
! return None, None
! if line[-1] == "\n":
! line = line[:-1]
! if not line:
! e = xml.sax.SAXParseException(
! "ESIS input line contains no token type mark",
! None, self._locator)
! self.getErrorHandler().error(e)
! return
! return line[0], line[1:]
!
! def _handle_token(self, token, data):
! handler = self.getContentHandler()
! if token == '-':
! if data and handler:
! handler.characters(decode(data))
! elif token == ')':
! if handler:
! handler.endElement(decode(data))
! elif token == '(':
! if self._is_empty:
! self._empties[data] = 1
! if handler:
! handler.startElement(data, self._attributes)
! self._attrs.clear()
! self._is_empty = 0
! elif token == 'A':
! name, value = data.split(' ', 1)
! if value != "IMPLIED":
! type, value = value.split(' ', 1)
! self._attrs[name] = (decode(value), type)
! elif token == '&':
! # entity reference in SAX?
! pass
! elif token == '?':
! if handler:
! if ' ' in data:
! target, data = string.split(data, None, 1)
! else:
! target, data = data, ""
! handler.processingInstruction(target, decode(data))
! elif token == 'N':
! handler = self.getDTDHandler()
! if handler:
! handler.notationDecl(data, self._public_id, self._system_id)
! self._public_id = None
! self._system_id = None
! elif token == 'p':
! self._public_id = decode(data)
! elif token == 's':
! self._system_id = decode(data)
! elif token == 'e':
! self._is_empty = 1
! elif token == 'C':
! pass
! else:
! e = SAXParseException("unknown ESIS token in event stream",
! None, self._locator)
! self.getErrorHandler().error(e)
!
! def setContentHandler(self, handler):
! old = self.getContentHandler()
! if old:
! old.setDocumentLocator(None)
! if handler:
! handler.setDocumentLocator(self._locator)
! xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
!
! def getProperty(self, property):
! if property == xml.sax.handler.property_lexical_handler:
! return self._lexical_handler
!
! elif property == xml.sax.handler.property_declaration_handler:
! return self._decl_handler
!
! else:
! raise xml.sax.SAXNotRecognizedException("unknown property %s"
! % `property`)
!
! def setProperty(self, property, value):
! if property == xml.sax.handler.property_lexical_handler:
! if self._lexical_handler:
! self._lexical_handler.setDocumentLocator(None)
! if value:
! value.setDocumentLocator(self._locator)
! self._lexical_handler = value
!
! elif property == xml.sax.handler.property_declaration_handler:
! if self._decl_handler:
! self._decl_handler.setDocumentLocator(None)
! if value:
! value.setDocumentLocator(self._locator)
! self._decl_handler = value
!
! else:
! raise xml.sax.SAXNotRecognizedException()
!
! def getFeature(self, feature):
! if feature == xml.sax.handler.feature_namespaces:
! return 1
! else:
! return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
!
! def setFeature(self, feature, enabled):
! if feature == xml.sax.handler.feature_namespaces:
! pass
! else:
! xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
!
!
! class Attributes(xml.sax.xmlreader.AttributesImpl):
! # self._attrs has the form {name: (value, type)}
!
! def getType(self, name):
! return self._attrs[name][1]
!
! def getValue(self, name):
! return self._attrs[name][0]
!
! def getValueByQName(self, name):
! return self._attrs[name][0]
!
! def __getitem__(self, name):
! return self._attrs[name][0]
!
! def get(self, name, default=None):
! if self._attrs.has_key(name):
! return self._attrs[name][0]
! return default
!
! def items(self):
! L = []
! for name, (value, type) in self._attrs.items():
! L.append((name, value))
! return L
!
! def values(self):
! L = []
! for value, type in self._attrs.values():
! L.append(value)
! return L
!
!
! class Locator(xml.sax.xmlreader.Locator):
! _lineno = -1
! _public_id = None
! _system_id = None
!
! def getLineNumber(self):
! return self._lineno
!
! def getPublicId(self):
! return self._public_id
!
! def getSystemId(self):
! return self._system_id
!
!
! def parse(stream_or_string, parser=None):
! if type(stream_or_string) in [type(""), type(u"")]:
! stream = open(stream_or_string)
! else:
! stream = stream_or_string
! if not parser:
! parser = ESISReader()
! return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)