[Python-checkins] bpo-36831: Do not apply default namespace to unprefixed attributes in ElementPath. (#13201)
Stefan Behnel
webhook-mailer at python.org
Thu May 9 01:22:54 EDT 2019
https://github.com/python/cpython/commit/88db8bd0648588c67eeab16d0bc72ec5c206e3ad
commit: 88db8bd0648588c67eeab16d0bc72ec5c206e3ad
branch: master
author: Stefan Behnel <stefan_ml at behnel.de>
committer: GitHub <noreply at github.com>
date: 2019-05-09T07:22:47+02:00
summary:
bpo-36831: Do not apply default namespace to unprefixed attributes in ElementPath. (#13201)
Also provide better grouping of the tokenizer tests.
files:
M Lib/test/test_xml_etree.py
M Lib/xml/etree/ElementPath.py
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index ca6862cae44a..61737493a904 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1144,14 +1144,9 @@ def check(p, expected, namespaces=None):
# tests from the xml specification
check("*", ['*'])
- check("{ns}*", ['{ns}*'])
- check("{}*", ['{}*'])
- check("{*}tag", ['{*}tag'])
- check("{*}*", ['{*}*'])
check("text()", ['text', '()'])
check("@name", ['@', 'name'])
check("@*", ['@', '*'])
- check("@{ns}attr", ['@', '{ns}attr'])
check("para[1]", ['para', '[', '1', ']'])
check("para[last()]", ['para', '[', 'last', '()', ']'])
check("*/para", ['*', '/', 'para'])
@@ -1163,7 +1158,6 @@ def check(p, expected, namespaces=None):
check("//olist/item", ['//', 'olist', '/', 'item'])
check(".", ['.'])
check(".//para", ['.', '//', 'para'])
- check(".//{*}tag", ['.', '//', '{*}tag'])
check("..", ['..'])
check("../@lang", ['..', '/', '@', 'lang'])
check("chapter[title]", ['chapter', '[', 'title', ']'])
@@ -1171,11 +1165,32 @@ def check(p, expected, namespaces=None):
'[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
# additional tests
+ check("@{ns}attr", ['@', '{ns}attr'])
check("{http://spam}egg", ['{http://spam}egg'])
check("./spam.egg", ['.', '/', 'spam.egg'])
check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
+
+ # wildcard tags
+ check("{ns}*", ['{ns}*'])
+ check("{}*", ['{}*'])
+ check("{*}tag", ['{*}tag'])
+ check("{*}*", ['{*}*'])
+ check(".//{*}tag", ['.', '//', '{*}tag'])
+
+ # namespace prefix resolution
check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
{'xsd': 'http://www.w3.org/2001/XMLSchema'})
+ check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
+ {'': 'http://www.w3.org/2001/XMLSchema'})
+ check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
+ {'xsd': 'http://www.w3.org/2001/XMLSchema'})
+ check("@type", ['@', 'type'],
+ {'': 'http://www.w3.org/2001/XMLSchema'})
+ check("@{*}type", ['@', '{*}type'],
+ {'': 'http://www.w3.org/2001/XMLSchema'})
+ check("@{ns}attr", ['@', '{ns}attr'],
+ {'': 'http://www.w3.org/2001/XMLSchema',
+ 'ns': 'http://www.w3.org/2001/XMLSchema'})
def test_processinginstruction(self):
# Test ProcessingInstruction directly
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index cfe72f2f9d42..d318e65d84a4 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -72,23 +72,27 @@
def xpath_tokenizer(pattern, namespaces=None):
default_namespace = namespaces.get('') if namespaces else None
+ parsing_attribute = False
for token in xpath_tokenizer_re.findall(pattern):
- tag = token[1]
+ ttype, tag = token
if tag and tag[0] != "{":
if ":" in tag:
prefix, uri = tag.split(":", 1)
try:
if not namespaces:
raise KeyError
- yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+ yield ttype, "{%s}%s" % (namespaces[prefix], uri)
except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
- elif default_namespace:
- yield token[0], "{%s}%s" % (default_namespace, tag)
+ elif default_namespace and not parsing_attribute:
+ yield ttype, "{%s}%s" % (default_namespace, tag)
else:
yield token
+ parsing_attribute = False
else:
yield token
+ parsing_attribute = ttype == '@'
+
def get_parent_map(context):
parent_map = context.parent_map
@@ -100,7 +104,6 @@ def get_parent_map(context):
return parent_map
-
def _is_wildcard_tag(tag):
return tag[:3] == '{*}' or tag[-2:] == '}*'
More information about the Python-checkins
mailing list