[Python-checkins] cpython: Issue 14814: Add namespaces keyword arg to find(*) methods in _elementtree.

Tue May 29 05:03:50 CEST 2012

http://hg.python.org/cpython/rev/7d252dbfbee3
changeset:   77217:7d252dbfbee3
user:        Eli Bendersky <eliben at gmail.com>
date:        Tue May 29 06:02:56 2012 +0300
summary:
  Issue 14814: Add namespaces keyword arg to find(*) methods in _elementtree.
Add attrib keyword to Element and SubElement in _elementtree.
Patch developed with Ezio Melotti.

files:
  Doc/library/xml.etree.elementtree.rst |   29 ++-
  Lib/test/test_xml_etree.py            |   66 ++++++++-
  Lib/xml/etree/ElementTree.py          |    3 +
  Modules/_elementtree.c                |  113 ++++++++++---
  4 files changed, 173 insertions(+), 38 deletions(-)

diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -476,27 +476,30 @@
       .. versionadded:: 3.2
 
 
-   .. method:: find(match)
+   .. method:: find(match, namespaces=None)
 
       Finds the first subelement matching *match*.  *match* may be a tag name
       or a :ref:`path <elementtree-xpath>`.  Returns an element instance
-      or ``None``.
+      or ``None``.  *namespaces* is an optional mapping from namespace prefix
+      to full name.
 
 
-   .. method:: findall(match)
+   .. method:: findall(match, namespaces=None)
 
       Finds all matching subelements, by tag name or
       :ref:`path <elementtree-xpath>`.  Returns a list containing all matching
-      elements in document order.
+      elements in document order.  *namespaces* is an optional mapping from
+      namespace prefix to full name.
 
 
-   .. method:: findtext(match, default=None)
+   .. method:: findtext(match, default=None, namespaces=None)
 
       Finds text for the first subelement matching *match*.  *match* may be
       a tag name or a :ref:`path <elementtree-xpath>`.  Returns the text content
       of the first matching element, or *default* if no element was found.
       Note that if the matching element has no text content an empty string
-      is returned.
+      is returned. *namespaces* is an optional mapping from namespace prefix
+      to full name.
 
 
    .. method:: getchildren()
@@ -528,11 +531,13 @@
       .. versionadded:: 3.2
 
 
-   .. method:: iterfind(match)
+   .. method:: iterfind(match, namespaces=None)
 
       Finds all matching subelements, by tag name or
       :ref:`path <elementtree-xpath>`.  Returns an iterable yielding all
-      matching elements in document order.
+      matching elements in document order. *namespaces* is an optional mapping
+      from namespace prefix to full name.
+
 
       .. versionadded:: 3.2
 
@@ -597,17 +602,17 @@
       care.  *element* is an element instance.
 
 
-   .. method:: find(match)
+   .. method:: find(match, namespaces=None)
 
       Same as :meth:`Element.find`, starting at the root of the tree.
 
 
-   .. method:: findall(match)
+   .. method:: findall(match, namespaces=None)
 
       Same as :meth:`Element.findall`, starting at the root of the tree.
 
 
-   .. method:: findtext(match, default=None)
+   .. method:: findtext(match, default=None, namespaces=None)
 
       Same as :meth:`Element.findtext`, starting at the root of the tree.
 
@@ -630,7 +635,7 @@
       to look for (default is to return all elements)
 
 
-   .. method:: iterfind(match)
+   .. method:: iterfind(match, namespaces=None)
 
       Same as :meth:`Element.iterfind`, starting at the root of the tree.
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -62,6 +62,22 @@
 </body>
 """
 
+SAMPLE_XML_NS_ELEMS = """
+<root>
+<h:table xmlns:h="hello">
+  <h:tr>
+    <h:td>Apples</h:td>
+    <h:td>Bananas</h:td>
+  </h:tr>
+</h:table>
+
+<f:table xmlns:f="foo">
+  <f:name>African Coffee Table</f:name>
+  <f:width>80</f:width>
+  <f:length>120</f:length>
+</f:table>
+</root>
+"""
 
 def sanity():
     """
@@ -1995,6 +2011,17 @@
         self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
 
 
+class NamespaceParseTest(unittest.TestCase):
+    def test_find_with_namespace(self):
+        nsmap = {'h': 'hello', 'f': 'foo'}
+        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
+
+        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
+        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
+        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
+
+
+
 class ElementSlicingTest(unittest.TestCase):
     def _elem_tags(self, elemlist):
         return [e.tag for e in elemlist]
@@ -2102,6 +2129,41 @@
                 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
 
 
+class KeywordArgsTest(unittest.TestCase):
+    # Test various issues with keyword arguments passed to ET.Element
+    # constructor and methods
+    def test_issue14818(self):
+        x = ET.XML("<a>foo</a>")
+        self.assertEqual(x.find('a', None),
+                         x.find(path='a', namespaces=None))
+        self.assertEqual(x.findtext('a', None, None),
+                         x.findtext(path='a', default=None, namespaces=None))
+        self.assertEqual(x.findall('a', None),
+                         x.findall(path='a', namespaces=None))
+        self.assertEqual(list(x.iterfind('a', None)),
+                         list(x.iterfind(path='a', namespaces=None)))
+
+        self.assertEqual(ET.Element('a').attrib, {})
+        elements = [
+            ET.Element('a', dict(href="#", id="foo")),
+            ET.Element('a', attrib=dict(href="#", id="foo")),
+            ET.Element('a', dict(href="#"), id="foo"),
+            ET.Element('a', href="#", id="foo"),
+            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
+        ]
+        for e in elements:
+            self.assertEqual(e.tag, 'a')
+            self.assertEqual(e.attrib, dict(href="#", id="foo"))
+
+        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
+        self.assertEqual(e2.attrib['key1'], 'value1')
+
+        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
+            ET.Element('a', "I'm not a dict")
+        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
+            ET.Element('a', attrib="I'm not a dict")
+
+
 # --------------------------------------------------------------------
 
 
@@ -2157,7 +2219,9 @@
         StringIOTest,
         ParseErrorTest,
         ElementTreeTest,
-        TreeBuilderTest]
+        NamespaceParseTest,
+        TreeBuilderTest,
+        KeywordArgsTest]
     if module is pyET:
         # Run the tests specific to the Python implementation
         test_classes += [NoAcceleratorTest]
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -205,6 +205,9 @@
     # constructor
 
     def __init__(self, tag, attrib={}, **extra):
+        if not isinstance(attrib, dict):
+            raise TypeError("attrib must be dict, not %s" % (
+                attrib.__class__.__name__,))
         attrib = attrib.copy()
         attrib.update(extra)
         self.tag = tag
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -347,6 +347,41 @@
     return (PyObject *)e;
 }
 
+/* Helper function for extracting the attrib dictionary from a keywords dict.
+ * This is required by some constructors/functions in this module that can
+ * either accept attrib as a keyword argument or all attributes splashed 
+ * directly into *kwds.
+ * If there is no 'attrib' keyword, return an empty dict.
+ */
+static PyObject*
+get_attrib_from_keywords(PyObject *kwds)
+{
+    PyObject *attrib_str = PyUnicode_FromString("attrib");
+    PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
+
+    if (attrib) {
+        /* If attrib was found in kwds, copy its value and remove it from
+         * kwds
+         */
+        if (!PyDict_Check(attrib)) {
+            Py_DECREF(attrib_str);
+            PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
+                         Py_TYPE(attrib)->tp_name);
+            return NULL;
+        }
+        attrib = PyDict_Copy(attrib);
+        PyDict_DelItem(kwds, attrib_str);
+    } else {
+        attrib = PyDict_New();
+    }
+
+    Py_DECREF(attrib_str);
+
+    if (attrib)
+        PyDict_Update(attrib, kwds);
+    return attrib;
+}
+
 static int
 element_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
@@ -358,13 +393,23 @@
     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
         return -1;
 
-    if (attrib || kwds) {
-        attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
+    if (attrib) {
+        /* attrib passed as positional arg */
+        attrib = PyDict_Copy(attrib);
         if (!attrib)
             return -1;
-        if (kwds)
-            PyDict_Update(attrib, kwds);
+        if (kwds) {
+            if (PyDict_Update(attrib, kwds) < 0) {
+                return -1;
+            }
+        }
+    } else if (kwds) {
+        /* have keywords args */
+        attrib = get_attrib_from_keywords(kwds);
+        if (!attrib)
+            return -1;
     } else {
+        /* no attrib arg, no kwds, so no attributes */
         Py_INCREF(Py_None);
         attrib = Py_None;
     }
@@ -536,7 +581,7 @@
 }
 
 static PyObject*
-subelement(PyObject* self, PyObject* args, PyObject* kw)
+subelement(PyObject *self, PyObject *args, PyObject *kwds)
 {
     PyObject* elem;
 
@@ -548,13 +593,23 @@
                           &PyDict_Type, &attrib))
         return NULL;
 
-    if (attrib || kw) {
-        attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
+    if (attrib) {
+        /* attrib passed as positional arg */
+        attrib = PyDict_Copy(attrib);
         if (!attrib)
             return NULL;
-        if (kw)
-            PyDict_Update(attrib, kw);
+        if (kwds) {
+            if (PyDict_Update(attrib, kwds) < 0) {
+                return NULL;
+            }
+        }
+    } else if (kwds) {
+        /* have keyword args */
+        attrib = get_attrib_from_keywords(kwds);
+        if (!attrib)
+            return NULL;
     } else {
+        /* no attrib arg, no kwds, so no attribute */
         Py_INCREF(Py_None);
         attrib = Py_None;
     }
@@ -881,13 +936,15 @@
 }
 
 static PyObject*
-element_find(ElementObject* self, PyObject* args)
+element_find(ElementObject *self, PyObject *args, PyObject *kwds)
 {
     int i;
     PyObject* tag;
     PyObject* namespaces = Py_None;
-
-    if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
+    static char *kwlist[] = {"path", "namespaces", 0};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
+                                     &tag, &namespaces))
         return NULL;
 
     if (checkpath(tag) || namespaces != Py_None) {
@@ -913,15 +970,17 @@
 }
 
 static PyObject*
-element_findtext(ElementObject* self, PyObject* args)
+element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
 {
     int i;
     PyObject* tag;
     PyObject* default_value = Py_None;
     PyObject* namespaces = Py_None;
     _Py_IDENTIFIER(findtext);
-
-    if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
+    static char *kwlist[] = {"path", "default", "namespaces", 0};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
+                                     &tag, &default_value, &namespaces))
         return NULL;
 
     if (checkpath(tag) || namespaces != Py_None)
@@ -951,14 +1010,16 @@
 }
 
 static PyObject*
-element_findall(ElementObject* self, PyObject* args)
+element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
 {
     int i;
     PyObject* out;
     PyObject* tag;
     PyObject* namespaces = Py_None;
-
-    if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
+    static char *kwlist[] = {"path", "namespaces", 0};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
+                                     &tag, &namespaces))
         return NULL;
 
     if (checkpath(tag) || namespaces != Py_None) {
@@ -990,13 +1051,15 @@
 }
 
 static PyObject*
-element_iterfind(ElementObject* self, PyObject* args)
+element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
 {
     PyObject* tag;
     PyObject* namespaces = Py_None;
     _Py_IDENTIFIER(iterfind);
-
-    if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
+    static char *kwlist[] = {"path", "namespaces", 0};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
+                                     &tag, &namespaces))
         return NULL;
 
     return _PyObject_CallMethodId(
@@ -1567,9 +1630,9 @@
     {"get", (PyCFunction) element_get, METH_VARARGS},
     {"set", (PyCFunction) element_set, METH_VARARGS},
 
-    {"find", (PyCFunction) element_find, METH_VARARGS},
-    {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
-    {"findall", (PyCFunction) element_findall, METH_VARARGS},
+    {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
+    {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
+    {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
 
     {"append", (PyCFunction) element_append, METH_VARARGS},
     {"extend", (PyCFunction) element_extend, METH_VARARGS},
@@ -1578,7 +1641,7 @@
 
     {"iter", (PyCFunction) element_iter, METH_VARARGS},
     {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
-    {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
+    {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
 
     {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
     {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},

-- 
Repository URL: http://hg.python.org/cpython