[Python-checkins] bpo-31648: Improve ElementPath (#3835)
Serhiy Storchaka
webhook-mailer at python.org
Sat Sep 30 09:35:26 EDT 2017
https://github.com/python/cpython/commit/101a5e84acbab9d880e150195f23185dfb5449a9
commit: 101a5e84acbab9d880e150195f23185dfb5449a9
branch: master
author: scoder <stefan_ml at behnel.de>
committer: Serhiy Storchaka <storchaka at gmail.com>
date: 2017-09-30T16:35:21+03:00
summary:
bpo-31648: Improve ElementPath (#3835)
* Allow whitespace inside of ElementPath predicates.
* Add ElementPath predicate support for text comparison of the current node, like "[.='text']".
files:
A Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
M Doc/library/xml.etree.elementtree.rst
M Doc/whatsnew/3.7.rst
M Lib/test/test_xml_etree.py
M Lib/xml/etree/ElementPath.py
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 7d814ad406e..61808596a55 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -437,6 +437,11 @@ Supported XPath syntax
| ``[tag]`` | Selects all elements that have a child named |
| | ``tag``. Only immediate children are supported. |
+-----------------------+------------------------------------------------------+
+| ``[.='text']`` | Selects all elements whose complete text content, |
+| | including descendants, equals the given ``text``. |
+| | |
+| | .. versionadded:: 3.7 |
++-----------------------+------------------------------------------------------+
| ``[tag='text']`` | Selects all elements that have a child named |
| | ``tag`` whose complete text content, including |
| | descendants, equals the given ``text``. |
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index a474e767529..845ed643f97 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick*
keyword argument. When it's true, zeros are represented by ``'`'``
instead of spaces. (Contributed by Xiang Zhang in :issue:`30103`.)
+xml.etree
+---------
+
+:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
+methods can now compare text of the current node with ``[. = "text"]``,
+not only text in children. Predicates also allow adding spaces for
+better readability. (Contributed by Stefan Behnel in :issue:`31648`.)
+
zipapp
------
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 661ad8b9d4d..02812f32bc9 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2237,6 +2237,39 @@ def test_findall(self):
['tag'] * 2)
self.assertEqual(e.findall('section//'), e.findall('section//*'))
+ self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
+ ['section'])
+
+ self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
+ [])
+ self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
+ [])
+
+ # duplicate section => 2x tag matches
+ e[1] = e[2]
+ self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+ ['section', 'section'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+ ['tag', 'tag'])
+
def test_test_find_with_ns(self):
e = ET.XML(SAMPLE_XML_NS)
self.assertEqual(summarize_list(e.findall('tag')), [])
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index 361f6d54fa5..c9d6ef345b9 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -157,6 +157,9 @@ def prepare_predicate(next, token):
return
if token[0] == "]":
break
+ if token == ('', ''):
+ # ignore whitespace
+ continue
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
@@ -188,16 +191,22 @@ def select(context, result):
if elem.find(tag) is not None:
yield elem
return select
- if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
- # [tag='value']
+ if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
+ # [.='value'] or [tag='value']
tag = predicate[0]
value = predicate[-1]
- def select(context, result):
- for elem in result:
- for e in elem.findall(tag):
- if "".join(e.itertext()) == value:
+ if tag:
+ def select(context, result):
+ for elem in result:
+ for e in elem.findall(tag):
+ if "".join(e.itertext()) == value:
+ yield elem
+ break
+ else:
+ def select(context, result):
+ for elem in result:
+ if "".join(elem.itertext()) == value:
yield elem
- break
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]
diff --git a/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
new file mode 100644
index 00000000000..8b39ce92fee
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
@@ -0,0 +1,6 @@
+Improvements to path predicates in ElementTree:
+
+* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
+* Add support for text comparison of the current node, like "[.='text']".
+
+Patch by Stefan Behnel.
More information about the Python-checkins
mailing list