[XML-SIG] 0.6.4 problem with reading DOM tree from XML with validation
scott snyder
scott snyder <snyder@fnal.gov>
Fri, 02 Mar 2001 19:39:59 CST
hi -
Reading a DOM tree from XML with validation seems to have broken between
0.6.2 and 0.6.4. For example, if i run the following program:
-------------------------------------------------------------
from xml.dom.ext.reader.Sax2 import FromXmlFile
f = open ('test.xml', 'w')
f.write ("""<?xml version="1.0"?>
<!DOCTYPE configuration SYSTEM "NONEXISTENT.dtd">
<configuration/>""")
f.close()
doc = FromXmlFile ('test.xml', None, 1)
print doc
-------------------------------------------------------------
with 0.6.4, it runs without error, even though the DTD referred to
does not exist.
$ python read.py
<XML Document at 82026f8>
0.6.2, on the other hand, does give me the error i expect:
[sss@karma xmltest]$ python read.py
Traceback (innermost last):
File "read.py", line 9, in ?
doc = FromXmlFile ('test.xml', None, 1)
... (traceback trimmed) ...
File "xml/dom/ext/reader/Sax2.py", line 240, in fatalError
raise exception
xml.sax._exceptions.SAXParseException: Unknown:2:50: Couldn't open resource 'NONEXISTENT.dtd'
The immediate problem is fixed by this change:
*** xml/dom/ext/reader/Sax2.py-orig Tue Feb 20 00:47:40 2001
--- xml/dom/ext/reader/Sax2.py Fri Mar 2 18:29:21 2001
***************
*** 274,279 ****
--- 274,281 ----
def __init__(self, validate=0, keepAllWs=0, catName=None,
saxHandlerClass=XmlDomGenerator, parser=None):
self.parser = parser or (validate and sax2exts.XMLValParserFactory.make_parser()) or sax2exts.XMLParserFactory.make_parser()
+ if validate:
+ self.parser.setFeature (saxlib.feature_validation, 1)
if catName:
#set up the catalog, if there is one
from xml.parsers.xmlproc import catalog
However, with this change, i run into another bug:
$ python read.py
Traceback (innermost last):
File "read.py", line 9, in ?
doc = FromXmlFile ('test.xml', None, 1)
File "xml/dom/ext/reader/Sax2.py", line 330, in FromXmlFile
saxHandlerClass, parser)
File "xml/dom/ext/reader/Sax2.py", line 315, in FromXmlStream
return reader.fromStream(stream, ownerDocument)
File "xml/dom/ext/reader/Sax2.py", line 301, in fromStream
self.parser.parse(s)
File "xml/sax/drivers2/drv_xmlproc.py", line 90, in parse
parser.read_from(source.getByteStream(), bufsize)
TypeError: too many arguments; expected 2, got 3
Pooh. The interfaces for the validating and non-validating parsers are
not compatible. Patched thusly:
*** xml/parsers/xmlproc/xmlval.py-orig Fri Mar 2 18:26:47 2001
--- xml/parsers/xmlproc/xmlval.py Fri Mar 2 18:26:53 2001
***************
*** 98,105 ****
def parseEnd(self):
self.parser.parseEnd()
! def read_from(self,file):
! self.parser.read_from(file)
def flush(self):
self.parser.flush()
--- 98,105 ----
def parseEnd(self):
self.parser.parseEnd()
! def read_from(self,file,bufsize=16384):
! self.parser.read_from(file,bufsize)
def flush(self):
self.parser.flush()
With these changes, the example above works (i.e., gives an error).
However, the following program then fails:
----------------------------------------------------------------------
from xml.dom.ext.reader.Sax2 import FromXmlFile
f = open ('test2.xml', 'w')
f.write ("""<?xml version="1.0"?>
<!DOCTYPE configuration SYSTEM "test2.dtd">
<configuration>
</configuration>
""")
f.close()
f = open ('test2.dtd', 'w')
f.write ("<!ELEMENT configuration EMPTY>\n")
f.close ()
doc = FromXmlFile ('test2.xml', None, 1)
print doc
----------------------------------------------------------------------
$ python read2.py
Traceback (innermost last):
File "read2.py", line 15, in ?
doc = FromXmlFile ('test2.xml', None, 1)
File "xml/dom/ext/reader/Sax2.py", line 330, in FromXmlFile
saxHandlerClass, parser)
File "xml/dom/ext/reader/Sax2.py", line 315, in FromXmlStream
return reader.fromStream(stream, ownerDocument)
File "xml/dom/ext/reader/Sax2.py", line 301, in fromStream
self.parser.parse(s)
File "xml/sax/drivers2/drv_xmlproc.py", line 90, in parse
parser.read_from(source.getByteStream(), bufsize)
File "xml/parsers/xmlproc/xmlval.py", line 102, in read_from
self.parser.read_from(file,bufsize)
File "xml/parsers/xmlproc/xmlutils.py", line 137, in read_from
self.feed(buf)
File "xml/parsers/xmlproc/xmlutils.py", line 185, in feed
self.do_parse()
File "xml/parsers/xmlproc/xmlproc.py", line 115, in do_parse
self.parse_data()
File "xml/parsers/xmlproc/xmlproc.py", line 377, in parse_data
self.app.handle_data(self.data,start,end)
File "xml/parsers/xmlproc/xmlval.py", line 213, in handle_data
self.realapp.handle_ignorable_data(data,start,end)
File "xml/sax/drivers2/drv_xmlproc.py", line 355, in handle_ignorable_data
self._cont_handler.ignorableWhitespace(data, start, end) # FIXME?
TypeError: too many arguments; expected 2, got 4
This patch seems to fix this:
*** xml/dom/ext/reader/Sax2.py-orig Tue Feb 20 00:47:40 2001
--- xml/dom/ext/reader/Sax2.py Fri Mar 2 18:59:31 2001
***************
*** 199,205 ****
self._nodeStack[-1].appendChild(new_element)
return
! def ignorableWhitespace(self, chars):
"""
If 'keepAllWs' permits, add ignorable white-space as a text node.
A Document node cannot contain text nodes directly.
--- 199,205 ----
self._nodeStack[-1].appendChild(new_element)
return
! def ignorableWhitespace(self, chars, start, length):
"""
If 'keepAllWs' permits, add ignorable white-space as a text node.
A Document node cannot contain text nodes directly.
***************
*** 207,213 ****
for it in the DOM and it must be discarded.
"""
if self._keepAllWs and self._nodeStack[-1].nodeType != Node.DOCUMENT_NODE:
! self._currText = self._currText + chars
return
def characters(self, chars):
--- 207,213 ----
for it in the DOM and it must be discarded.
"""
if self._keepAllWs and self._nodeStack[-1].nodeType != Node.DOCUMENT_NODE:
! self._currText = self._currText + chars[start:start+length]
return
def characters(self, chars):