[Python-checkins] cpython: Issue #14007: accept incomplete TreeBuilder objects (missing

Mon Mar 5 10:43:03 CET 2012

http://hg.python.org/cpython/rev/47016103185f
changeset:   75411:47016103185f
user:        Florent Xicluna <florent.xicluna at gmail.com>
date:        Mon Mar 05 10:42:19 2012 +0100
summary:
  Issue #14007: accept incomplete TreeBuilder objects (missing start/end/data/close) for the Python implementation as well. Add disabled tests for the doctype() method.

files:
  Lib/test/test_xml_etree.py   |  165 +++++++++++++++-------
  Lib/xml/etree/ElementTree.py |   81 +++++-----
  Misc/NEWS                    |    4 +
  3 files changed, 155 insertions(+), 95 deletions(-)

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1855,58 +1855,16 @@
 # --------------------------------------------------------------------
 
 
-class CleanContext(object):
-    """Provide default namespace mapping and path cache."""
-    checkwarnings = None
+class ElementTreeTest(unittest.TestCase):
 
-    def __init__(self, quiet=False):
-        if sys.flags.optimize >= 2:
-            # under -OO, doctests cannot be run and therefore not all warnings
-            # will be emitted
-            quiet = True
-        deprecations = (
-            # Search behaviour is broken if search path starts with "/".
-            ("This search is broken in 1.3 and earlier, and will be fixed "
-             "in a future version.  If you rely on the current behaviour, "
-             "change it to '.+'", FutureWarning),
-            # Element.getchildren() and Element.getiterator() are deprecated.
-            ("This method will be removed in future versions.  "
-             "Use .+ instead.", DeprecationWarning),
-            ("This method will be removed in future versions.  "
-             "Use .+ instead.", PendingDeprecationWarning),
-            # XMLParser.doctype() is deprecated.
-            ("This method of XMLParser is deprecated.  Define doctype.. "
-             "method on the TreeBuilder target.", DeprecationWarning))
-        self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet)
-
-    def __enter__(self):
-        from xml.etree import ElementPath
-        self._nsmap = ET.register_namespace._namespace_map
-        # Copy the default namespace mapping
-        self._nsmap_copy = self._nsmap.copy()
-        # Copy the path cache (should be empty)
-        self._path_cache = ElementPath._cache
-        ElementPath._cache = self._path_cache.copy()
-        self.checkwarnings.__enter__()
-
-    def __exit__(self, *args):
-        from xml.etree import ElementPath
-        # Restore mapping and path cache
-        self._nsmap.clear()
-        self._nsmap.update(self._nsmap_copy)
-        ElementPath._cache = self._path_cache
-        self.checkwarnings.__exit__(*args)
-
-
-class TestAcceleratorNotImported(unittest.TestCase):
-    # Test that the C accelerator was not imported for pyET
-    def test_correct_import_pyET(self):
-        self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
-
-
-class TestElementClass(unittest.TestCase):
-    def test_Element_is_a_type(self):
+    def test_istype(self):
+        self.assertIsInstance(ET.ParseError, type)
+        self.assertIsInstance(ET.QName, type)
+        self.assertIsInstance(ET.ElementTree, type)
         self.assertIsInstance(ET.Element, type)
+        # XXX issue 14128 with C ElementTree
+        # self.assertIsInstance(ET.TreeBuilder, type)
+        # self.assertIsInstance(ET.XMLParser, type)
 
     def test_Element_subclass_trivial(self):
         class MyElement(ET.Element):
@@ -1936,16 +1894,115 @@
         self.assertEqual(mye.newmethod(), 'joe')
 
 
+class TreeBuilderTest(unittest.TestCase):
+
+    sample1 = ('<!DOCTYPE html PUBLIC'
+        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
+        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
+        '<html>text</html>')
+
+    def test_dummy_builder(self):
+        class BaseDummyBuilder:
+            def close(self):
+                return 42
+
+        class DummyBuilder(BaseDummyBuilder):
+            data = start = end = lambda *a: None
+
+        parser = ET.XMLParser(target=DummyBuilder())
+        parser.feed(self.sample1)
+        self.assertEqual(parser.close(), 42)
+
+        parser = ET.XMLParser(target=BaseDummyBuilder())
+        parser.feed(self.sample1)
+        self.assertEqual(parser.close(), 42)
+
+        parser = ET.XMLParser(target=object())
+        parser.feed(self.sample1)
+        self.assertIsNone(parser.close())
+
+
+    @unittest.expectedFailure   # XXX issue 14007 with C ElementTree
+    def test_doctype(self):
+        class DoctypeParser:
+            _doctype = None
+
+            def doctype(self, name, pubid, system):
+                self._doctype = (name, pubid, system)
+
+            def close(self):
+                return self._doctype
+
+        parser = ET.XMLParser(target=DoctypeParser())
+        parser.feed(self.sample1)
+
+        self.assertEqual(parser.close(),
+            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
+             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
+
+
+class NoAcceleratorTest(unittest.TestCase):
+
+    # Test that the C accelerator was not imported for pyET
+    def test_correct_import_pyET(self):
+        self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
+        self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
+
+# --------------------------------------------------------------------
+
+
+class CleanContext(object):
+    """Provide default namespace mapping and path cache."""
+    checkwarnings = None
+
+    def __init__(self, quiet=False):
+        if sys.flags.optimize >= 2:
+            # under -OO, doctests cannot be run and therefore not all warnings
+            # will be emitted
+            quiet = True
+        deprecations = (
+            # Search behaviour is broken if search path starts with "/".
+            ("This search is broken in 1.3 and earlier, and will be fixed "
+             "in a future version.  If you rely on the current behaviour, "
+             "change it to '.+'", FutureWarning),
+            # Element.getchildren() and Element.getiterator() are deprecated.
+            ("This method will be removed in future versions.  "
+             "Use .+ instead.", DeprecationWarning),
+            ("This method will be removed in future versions.  "
+             "Use .+ instead.", PendingDeprecationWarning))
+        self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet)
+
+    def __enter__(self):
+        from xml.etree import ElementPath
+        self._nsmap = ET.register_namespace._namespace_map
+        # Copy the default namespace mapping
+        self._nsmap_copy = self._nsmap.copy()
+        # Copy the path cache (should be empty)
+        self._path_cache = ElementPath._cache
+        ElementPath._cache = self._path_cache.copy()
+        self.checkwarnings.__enter__()
+
+    def __exit__(self, *args):
+        from xml.etree import ElementPath
+        # Restore mapping and path cache
+        self._nsmap.clear()
+        self._nsmap.update(self._nsmap_copy)
+        ElementPath._cache = self._path_cache
+        self.checkwarnings.__exit__(*args)
+
+
 def test_main(module=pyET):
     from test import test_xml_etree
 
-    # Run the tests specific to the Python implementation
-    support.run_unittest(TestAcceleratorNotImported)
-
     # The same doctests are used for both the Python and the C implementations
     test_xml_etree.ET = module
 
-    support.run_unittest(TestElementClass)
+    test_classes = [ElementTreeTest, TreeBuilderTest]
+    if module is pyET:
+        # Run the tests specific to the Python implementation
+        test_classes += [NoAcceleratorTest]
+
+    support.run_unittest(*test_classes)
 
     # XXX the C module should give the same warnings as the Python module
     with CleanContext(quiet=(module is not pyET)):
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1511,24 +1511,30 @@
         self.target = self._target = target
         self._error = expat.error
         self._names = {} # name memo cache
-        # callbacks
+        # main callbacks
         parser.DefaultHandlerExpand = self._default
-        parser.StartElementHandler = self._start
-        parser.EndElementHandler = self._end
-        parser.CharacterDataHandler = self._data
-        # optional callbacks
-        parser.CommentHandler = self._comment
-        parser.ProcessingInstructionHandler = self._pi
+        if hasattr(target, 'start'):
+            parser.StartElementHandler = self._start
+        if hasattr(target, 'end'):
+            parser.EndElementHandler = self._end
+        if hasattr(target, 'data'):
+            parser.CharacterDataHandler = target.data
+        # miscellaneous callbacks
+        if hasattr(target, 'comment'):
+            parser.CommentHandler = target.comment
+        if hasattr(target, 'pi'):
+            parser.ProcessingInstructionHandler = target.pi
         # let expat do the buffering, if supported
         try:
-            self._parser.buffer_text = 1
+            parser.buffer_text = 1
         except AttributeError:
             pass
         # use new-style attribute handling, if supported
         try:
-            self._parser.ordered_attributes = 1
-            self._parser.specified_attributes = 1
-            parser.StartElementHandler = self._start_list
+            parser.ordered_attributes = 1
+            parser.specified_attributes = 1
+            if hasattr(target, 'start'):
+                parser.StartElementHandler = self._start_list
         except AttributeError:
             pass
         self._doctype = None
@@ -1572,44 +1578,29 @@
                 attrib[fixname(attrib_in[i])] = attrib_in[i+1]
         return self.target.start(tag, attrib)
 
-    def _data(self, text):
-        return self.target.data(text)
-
     def _end(self, tag):
         return self.target.end(self._fixname(tag))
 
-    def _comment(self, data):
-        try:
-            comment = self.target.comment
-        except AttributeError:
-            pass
-        else:
-            return comment(data)
-
-    def _pi(self, target, data):
-        try:
-            pi = self.target.pi
-        except AttributeError:
-            pass
-        else:
-            return pi(target, data)
-
     def _default(self, text):
         prefix = text[:1]
         if prefix == "&":
             # deal with undefined entities
             try:
-                self.target.data(self.entity[text[1:-1]])
+                data_handler = self.target.data
+            except AttributeError:
+                return
+            try:
+                data_handler(self.entity[text[1:-1]])
             except KeyError:
                 from xml.parsers import expat
                 err = expat.error(
                     "undefined entity %s: line %d, column %d" %
-                    (text, self._parser.ErrorLineNumber,
-                    self._parser.ErrorColumnNumber)
+                    (text, self.parser.ErrorLineNumber,
+                    self.parser.ErrorColumnNumber)
                     )
                 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
-                err.lineno = self._parser.ErrorLineNumber
-                err.offset = self._parser.ErrorColumnNumber
+                err.lineno = self.parser.ErrorLineNumber
+                err.offset = self.parser.ErrorColumnNumber
                 raise err
         elif prefix == "<" and text[:9] == "<!DOCTYPE":
             self._doctype = [] # inside a doctype declaration
@@ -1636,7 +1627,7 @@
                     pubid = pubid[1:-1]
                 if hasattr(self.target, "doctype"):
                     self.target.doctype(name, pubid, system[1:-1])
-                elif self.doctype is not self._XMLParser__doctype:
+                elif self.doctype != self._XMLParser__doctype:
                     # warn about deprecated call
                     self._XMLParser__doctype(name, pubid, system[1:-1])
                     self.doctype(name, pubid, system[1:-1])
@@ -1667,7 +1658,7 @@
 
     def feed(self, data):
         try:
-            self._parser.Parse(data, 0)
+            self.parser.Parse(data, 0)
         except self._error as v:
             self._raiseerror(v)
 
@@ -1679,12 +1670,20 @@
 
     def close(self):
         try:
-            self._parser.Parse("", 1) # end of data
+            self.parser.Parse("", 1) # end of data
         except self._error as v:
             self._raiseerror(v)
-        tree = self.target.close()
-        del self.target, self._parser # get rid of circular references
-        return tree
+        try:
+            try:
+                close_handler = self.target.close
+            except AttributeError:
+                pass
+            else:
+                return close_handler()
+        finally:
+            # get rid of circular references
+            del self.parser, self._parser
+            del self.target, self._target
 
 
 # Import the C accelerators
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@
 Library
 -------
 
+- Issue #14007: Accept incomplete TreeBuilder objects (missing start, end,
+  data or close method) for the Python implementation as well.
+  Drop the no-op TreeBuilder().xml() method from the C implementation.
+
 
 What's New in Python 3.3.0 Alpha 1?
 ===================================

-- 
Repository URL: http://hg.python.org/cpython