[XML-SIG] 0.6.4 problem with reading DOM tree from XML with validation

scott snyder scott snyder <snyder@fnal.gov>
Fri, 02 Mar 2001 19:39:59 CST


hi -

Reading a DOM tree from XML with validation seems to have broken between
0.6.2 and 0.6.4.  For example, if i run the following program:

-------------------------------------------------------------
from xml.dom.ext.reader.Sax2         import FromXmlFile

f = open ('test.xml', 'w')
f.write ("""<?xml version="1.0"?>
<!DOCTYPE configuration SYSTEM "NONEXISTENT.dtd">
<configuration/>""")
f.close()

doc = FromXmlFile ('test.xml', None, 1)
print doc
-------------------------------------------------------------

with 0.6.4, it runs without error, even though the DTD referred to
does not exist.

$ python read.py
<XML Document at 82026f8>


0.6.2, on the other hand, does give me the error i expect:

[sss@karma xmltest]$ python read.py
Traceback (innermost last):
  File "read.py", line 9, in ?
    doc = FromXmlFile ('test.xml', None, 1)
  ... (traceback trimmed) ...
  File "xml/dom/ext/reader/Sax2.py", line 240, in fatalError
    raise exception
xml.sax._exceptions.SAXParseException: Unknown:2:50: Couldn't open resource 'NONEXISTENT.dtd'


The immediate problem is fixed by this change:


*** xml/dom/ext/reader/Sax2.py-orig	Tue Feb 20 00:47:40 2001
--- xml/dom/ext/reader/Sax2.py	Fri Mar  2 18:29:21 2001
***************
*** 274,279 ****
--- 274,281 ----
      def __init__(self, validate=0, keepAllWs=0, catName=None,
                   saxHandlerClass=XmlDomGenerator, parser=None):
          self.parser = parser or (validate and sax2exts.XMLValParserFactory.make_parser()) or sax2exts.XMLParserFactory.make_parser()
+         if validate:
+             self.parser.setFeature (saxlib.feature_validation, 1)
          if catName:
              #set up the catalog, if there is one
              from xml.parsers.xmlproc import catalog


However, with this change, i run into another bug:

$ python read.py
Traceback (innermost last):
  File "read.py", line 9, in ?
    doc = FromXmlFile ('test.xml', None, 1)
  File "xml/dom/ext/reader/Sax2.py", line 330, in FromXmlFile
    saxHandlerClass, parser)
  File "xml/dom/ext/reader/Sax2.py", line 315, in FromXmlStream
    return reader.fromStream(stream, ownerDocument)
  File "xml/dom/ext/reader/Sax2.py", line 301, in fromStream
    self.parser.parse(s)
  File "xml/sax/drivers2/drv_xmlproc.py", line 90, in parse
    parser.read_from(source.getByteStream(), bufsize)
TypeError: too many arguments; expected 2, got 3


Pooh.  The interfaces for the validating and non-validating parsers are
not compatible.  Patched thusly:


*** xml/parsers/xmlproc/xmlval.py-orig	Fri Mar  2 18:26:47 2001
--- xml/parsers/xmlproc/xmlval.py	Fri Mar  2 18:26:53 2001
***************
*** 98,105 ****
      def parseEnd(self):
          self.parser.parseEnd()
  
!     def read_from(self,file):
!         self.parser.read_from(file)
  
      def flush(self):
          self.parser.flush()
--- 98,105 ----
      def parseEnd(self):
          self.parser.parseEnd()
  
!     def read_from(self,file,bufsize=16384):
!         self.parser.read_from(file,bufsize)
  
      def flush(self):
          self.parser.flush()



With these changes, the example above works (i.e., gives an error).

However, the following program then fails:

----------------------------------------------------------------------
from xml.dom.ext.reader.Sax2         import FromXmlFile

f = open ('test2.xml', 'w')
f.write ("""<?xml version="1.0"?>
<!DOCTYPE configuration SYSTEM "test2.dtd">
<configuration>
</configuration>
""")
f.close()

f = open ('test2.dtd', 'w')
f.write ("<!ELEMENT configuration EMPTY>\n")
f.close ()

doc = FromXmlFile ('test2.xml', None, 1)
print doc
----------------------------------------------------------------------

$ python read2.py
Traceback (innermost last):
  File "read2.py", line 15, in ?
    doc = FromXmlFile ('test2.xml', None, 1)
  File "xml/dom/ext/reader/Sax2.py", line 330, in FromXmlFile
    saxHandlerClass, parser)
  File "xml/dom/ext/reader/Sax2.py", line 315, in FromXmlStream
    return reader.fromStream(stream, ownerDocument)
  File "xml/dom/ext/reader/Sax2.py", line 301, in fromStream
    self.parser.parse(s)
  File "xml/sax/drivers2/drv_xmlproc.py", line 90, in parse
    parser.read_from(source.getByteStream(), bufsize)
  File "xml/parsers/xmlproc/xmlval.py", line 102, in read_from
    self.parser.read_from(file,bufsize)
  File "xml/parsers/xmlproc/xmlutils.py", line 137, in read_from
    self.feed(buf)
  File "xml/parsers/xmlproc/xmlutils.py", line 185, in feed
    self.do_parse()
  File "xml/parsers/xmlproc/xmlproc.py", line 115, in do_parse
    self.parse_data()
  File "xml/parsers/xmlproc/xmlproc.py", line 377, in parse_data
    self.app.handle_data(self.data,start,end)
  File "xml/parsers/xmlproc/xmlval.py", line 213, in handle_data
    self.realapp.handle_ignorable_data(data,start,end)
  File "xml/sax/drivers2/drv_xmlproc.py", line 355, in handle_ignorable_data
    self._cont_handler.ignorableWhitespace(data, start, end) # FIXME?
TypeError: too many arguments; expected 2, got 4


This patch seems to fix this:

*** xml/dom/ext/reader/Sax2.py-orig	Tue Feb 20 00:47:40 2001
--- xml/dom/ext/reader/Sax2.py	Fri Mar  2 18:59:31 2001
***************
*** 199,205 ****
              self._nodeStack[-1].appendChild(new_element)
          return
  
!     def ignorableWhitespace(self, chars):
          """
          If 'keepAllWs' permits, add ignorable white-space as a text node.
          A Document node cannot contain text nodes directly.
--- 199,205 ----
              self._nodeStack[-1].appendChild(new_element)
          return
  
!     def ignorableWhitespace(self, chars, start, length):
          """
          If 'keepAllWs' permits, add ignorable white-space as a text node.
          A Document node cannot contain text nodes directly.
***************
*** 207,213 ****
          for it in the DOM and it must be discarded.
          """
          if self._keepAllWs and self._nodeStack[-1].nodeType !=  Node.DOCUMENT_NODE:
!             self._currText = self._currText + chars
          return
  
      def characters(self, chars):
--- 207,213 ----
          for it in the DOM and it must be discarded.
          """
          if self._keepAllWs and self._nodeStack[-1].nodeType !=  Node.DOCUMENT_NODE:
!             self._currText = self._currText + chars[start:start+length]
          return
  
      def characters(self, chars):