[Python-checkins] cpython: Close #14377: Add a new parameter to ElementTree.write and some module-level

eli.bendersky python-checkins at python.org
Sun Jan 13 15:05:10 CET 2013


http://hg.python.org/cpython/rev/58168d69b496
changeset:   81488:58168d69b496
user:        Eli Bendersky <eliben at gmail.com>
date:        Sun Jan 13 06:04:43 2013 -0800
summary:
  Close #14377: Add a new parameter to ElementTree.write and some module-level
serialization functions - short_empty_elements. It controls how elements
without contents are emitted.

Patch by Serhiy Storchaka. Feature initially proposed by Ariel Poliak.

files:
  Doc/library/xml.etree.elementtree.rst |  23 +++++++++-
  Lib/test/test_xml_etree.py            |  12 +++++
  Lib/xml/etree/ElementTree.py          |  31 +++++++++-----
  Misc/NEWS                             |   4 +
  4 files changed, 56 insertions(+), 14 deletions(-)


diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -428,29 +428,39 @@
    arguments.  Returns an element instance.
 
 
-.. function:: tostring(element, encoding="us-ascii", method="xml")
+.. function:: tostring(element, encoding="us-ascii", method="xml", *, \
+                       short_empty_elements=True)
 
    Generates a string representation of an XML element, including all
    subelements.  *element* is an :class:`Element` instance.  *encoding* [1]_ is
    the output encoding (default is US-ASCII).  Use ``encoding="unicode"`` to
    generate a Unicode string (otherwise, a bytestring is generated).  *method*
    is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
+   *short_empty_elements* has the same meaning as in :meth:`ElementTree.write`.
    Returns an (optionally) encoded string containing the XML data.
 
+   .. versionadded:: 3.4
+      The *short_empty_elements* parameter.
 
-.. function:: tostringlist(element, encoding="us-ascii", method="xml")
+
+.. function:: tostringlist(element, encoding="us-ascii", method="xml", *, \
+                           short_empty_elements=True)
 
    Generates a string representation of an XML element, including all
    subelements.  *element* is an :class:`Element` instance.  *encoding* [1]_ is
    the output encoding (default is US-ASCII).  Use ``encoding="unicode"`` to
    generate a Unicode string (otherwise, a bytestring is generated).  *method*
    is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
+   *short_empty_elements* has the same meaning as in :meth:`ElementTree.write`.
    Returns a list of (optionally) encoded strings containing the XML data.
    It does not guarantee any specific sequence, except that
    ``"".join(tostringlist(element)) == tostring(element)``.
 
    .. versionadded:: 3.2
 
+   .. versionadded:: 3.4
+      The *short_empty_elements* parameter.
+
 
 .. function:: XML(text, parser=None)
 
@@ -742,7 +752,7 @@
 
 
    .. method:: write(file, encoding="us-ascii", xml_declaration=None, \
-                     method="xml")
+                     method="xml", *, short_empty_elements=True)
 
       Writes the element tree to a file, as XML.  *file* is a file name, or a
       :term:`file object` opened for writing.  *encoding* [1]_ is the output
@@ -752,6 +762,10 @@
       for only if not US-ASCII or UTF-8 or Unicode (default is ``None``).
       *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is
       ``"xml"``).
+      The keyword-only *short_empty_elements* parameter controls the formatting
+      of elements that contain no content.  If *True* (the default), they are
+      emitted as a single self-closed tag, otherwise they are emitted as a pair
+      of start/end tags.
 
       The output is either a string (:class:`str`) or binary (:class:`bytes`).
       This is controlled by the *encoding* argument.  If *encoding* is
@@ -760,6 +774,9 @@
       :term:`file object`; make sure you do not try to write a string to a
       binary stream and vice versa.
 
+   .. versionadded:: 3.4
+      The *short_empty_elements* parameter.
+
 
 This is the XML file that is going to be manipulated::
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2380,6 +2380,18 @@
             ET.tostring(root, 'utf-16'),
             b''.join(ET.tostringlist(root, 'utf-16')))
 
+    def test_short_empty_elements(self):
+        root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
+        self.assertEqual(
+            ET.tostring(root, 'unicode'),
+            '<tag>a<x />b<y />c</tag>')
+        self.assertEqual(
+            ET.tostring(root, 'unicode', short_empty_elements=True),
+            '<tag>a<x />b<y />c</tag>')
+        self.assertEqual(
+            ET.tostring(root, 'unicode', short_empty_elements=False),
+            '<tag>a<x></x>b<y></y>c</tag>')
+
 
 class ParseErrorTest(unittest.TestCase):
     def test_subclass(self):
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -797,7 +797,8 @@
               encoding=None,
               xml_declaration=None,
               default_namespace=None,
-              method=None):
+              method=None, *,
+              short_empty_elements=True):
         if not method:
             method = "xml"
         elif method not in _serialize:
@@ -825,7 +826,8 @@
             else:
                 qnames, namespaces = _namespaces(self._root, default_namespace)
                 serialize = _serialize[method]
-                serialize(write, self._root, qnames, namespaces)
+                serialize(write, self._root, qnames, namespaces,
+                          short_empty_elements=short_empty_elements)
 
     def write_c14n(self, file):
         # lxml.etree compatibility.  use output method instead
@@ -947,7 +949,8 @@
             add_qname(text.text)
     return qnames, namespaces
 
-def _serialize_xml(write, elem, qnames, namespaces):
+def _serialize_xml(write, elem, qnames, namespaces,
+                   short_empty_elements, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
@@ -960,7 +963,8 @@
             if text:
                 write(_escape_cdata(text))
             for e in elem:
-                _serialize_xml(write, e, qnames, None)
+                _serialize_xml(write, e, qnames, None,
+                               short_empty_elements=short_empty_elements)
         else:
             write("<" + tag)
             items = list(elem.items())
@@ -982,12 +986,13 @@
                     else:
                         v = _escape_attrib(v)
                     write(" %s=\"%s\"" % (qnames[k], v))
-            if text or len(elem):
+            if text or len(elem) or not short_empty_elements:
                 write(">")
                 if text:
                     write(_escape_cdata(text))
                 for e in elem:
-                    _serialize_xml(write, e, qnames, None)
+                    _serialize_xml(write, e, qnames, None,
+                                   short_empty_elements=short_empty_elements)
                 write("</" + tag + ">")
             else:
                 write(" />")
@@ -1002,7 +1007,7 @@
 except NameError:
     pass
 
-def _serialize_html(write, elem, qnames, namespaces):
+def _serialize_html(write, elem, qnames, namespaces, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
@@ -1166,9 +1171,11 @@
 # @return An (optionally) encoded string containing the XML data.
 # @defreturn string
 
-def tostring(element, encoding=None, method=None):
+def tostring(element, encoding=None, method=None, *,
+             short_empty_elements=True):
     stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
-    ElementTree(element).write(stream, encoding, method=method)
+    ElementTree(element).write(stream, encoding, method=method,
+                               short_empty_elements=short_empty_elements)
     return stream.getvalue()
 
 ##
@@ -1202,10 +1209,12 @@
     def tell(self):
         return len(self.lst)
 
-def tostringlist(element, encoding=None, method=None):
+def tostringlist(element, encoding=None, method=None, *,
+                 short_empty_elements=True):
     lst = []
     stream = _ListDataStream(lst)
-    ElementTree(element).write(stream, encoding, method=method)
+    ElementTree(element).write(stream, encoding, method=method,
+                               short_empty_elements=short_empty_elements)
     return lst
 
 ##
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -559,6 +559,10 @@
 - Issue #16123: IDLE - deprecate running without a subprocess.
   Patch by Roger Serwy.
 
+- Issue #14377: ElementTree.write and some of the module-level functions have
+  a new parameter - *short_empty_elements*. It controls how elements with no
+  contents are emitted.
+
 - Issue #16089: Allow ElementTree.TreeBuilder to work again with a non-Element
   element_factory (fixes a regression in SimpleTAL).
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list