[Python-checkins] bpo-31648: Improve ElementPath (#3835)

Serhiy Storchaka webhook-mailer at python.org
Sat Sep 30 09:35:26 EDT 2017


https://github.com/python/cpython/commit/101a5e84acbab9d880e150195f23185dfb5449a9
commit: 101a5e84acbab9d880e150195f23185dfb5449a9
branch: master
author: scoder <stefan_ml at behnel.de>
committer: Serhiy Storchaka <storchaka at gmail.com>
date: 2017-09-30T16:35:21+03:00
summary:

bpo-31648: Improve ElementPath (#3835)

* Allow whitespace inside of ElementPath predicates.
* Add ElementPath predicate support for text comparison of the current node, like "[.='text']".

files:
A Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
M Doc/library/xml.etree.elementtree.rst
M Doc/whatsnew/3.7.rst
M Lib/test/test_xml_etree.py
M Lib/xml/etree/ElementPath.py

diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 7d814ad406e..61808596a55 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -437,6 +437,11 @@ Supported XPath syntax
 | ``[tag]``             | Selects all elements that have a child named         |
 |                       | ``tag``.  Only immediate children are supported.     |
 +-----------------------+------------------------------------------------------+
+| ``[.='text']``        | Selects all elements whose complete text content,    |
+|                       | including descendants, equals the given ``text``.    |
+|                       |                                                      |
+|                       | .. versionadded:: 3.7                                |
++-----------------------+------------------------------------------------------+
 | ``[tag='text']``      | Selects all elements that have a child named         |
 |                       | ``tag`` whose complete text content, including       |
 |                       | descendants, equals the given ``text``.              |
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index a474e767529..845ed643f97 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick*
 keyword argument.  When it's true, zeros are represented by ``'`'``
 instead of spaces.  (Contributed by Xiang Zhang in :issue:`30103`.)
 
+xml.etree
+---------
+
+:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
+methods can now compare text of the current node with ``[. = "text"]``,
+not only text in children.  Predicates also allow adding spaces for
+better readability.  (Contributed by Stefan Behnel in :issue:`31648`.)
+
 zipapp
 ------
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 661ad8b9d4d..02812f32bc9 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2237,6 +2237,39 @@ def test_findall(self):
             ['tag'] * 2)
         self.assertEqual(e.findall('section//'), e.findall('section//*'))
 
+        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
+            ['section'])
+
+        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
+                         [])
+        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
+                         [])
+
+        # duplicate section => 2x tag matches
+        e[1] = e[2]
+        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+                         ['section', 'section'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+                         ['tag', 'tag'])
+
     def test_test_find_with_ns(self):
         e = ET.XML(SAMPLE_XML_NS)
         self.assertEqual(summarize_list(e.findall('tag')), [])
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index 361f6d54fa5..c9d6ef345b9 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -157,6 +157,9 @@ def prepare_predicate(next, token):
             return
         if token[0] == "]":
             break
+        if token == ('', ''):
+            # ignore whitespace
+            continue
         if token[0] and token[0][:1] in "'\"":
             token = "'", token[0][1:-1]
         signature.append(token[0] or "-")
@@ -188,16 +191,22 @@ def select(context, result):
                 if elem.find(tag) is not None:
                     yield elem
         return select
-    if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
-        # [tag='value']
+    if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
+        # [.='value'] or [tag='value']
         tag = predicate[0]
         value = predicate[-1]
-        def select(context, result):
-            for elem in result:
-                for e in elem.findall(tag):
-                    if "".join(e.itertext()) == value:
+        if tag:
+            def select(context, result):
+                for elem in result:
+                    for e in elem.findall(tag):
+                        if "".join(e.itertext()) == value:
+                            yield elem
+                            break
+        else:
+            def select(context, result):
+                for elem in result:
+                    if "".join(elem.itertext()) == value:
                         yield elem
-                        break
         return select
     if signature == "-" or signature == "-()" or signature == "-()-":
         # [index] or [last()] or [last()-index]
diff --git a/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
new file mode 100644
index 00000000000..8b39ce92fee
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
@@ -0,0 +1,6 @@
+Improvements to path predicates in ElementTree:
+
+* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
+* Add support for text comparison of the current node, like "[.='text']".
+
+Patch by Stefan Behnel.



More information about the Python-checkins mailing list