[Python-checkins] bpo-36831: Do not apply default namespace to unprefixed attributes in ElementPath. (#13201)

Thu May 9 01:22:54 EDT 2019

https://github.com/python/cpython/commit/88db8bd0648588c67eeab16d0bc72ec5c206e3ad
commit: 88db8bd0648588c67eeab16d0bc72ec5c206e3ad
branch: master
author: Stefan Behnel <stefan_ml at behnel.de>
committer: GitHub <noreply at github.com>
date: 2019-05-09T07:22:47+02:00
summary:

bpo-36831: Do not apply default namespace to unprefixed attributes in ElementPath. (#13201)

Also provide better grouping of the tokenizer tests.

files:
M Lib/test/test_xml_etree.py
M Lib/xml/etree/ElementPath.py

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index ca6862cae44a..61737493a904 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1144,14 +1144,9 @@ def check(p, expected, namespaces=None):
 
         # tests from the xml specification
         check("*", ['*'])
-        check("{ns}*", ['{ns}*'])
-        check("{}*", ['{}*'])
-        check("{*}tag", ['{*}tag'])
-        check("{*}*", ['{*}*'])
         check("text()", ['text', '()'])
         check("@name", ['@', 'name'])
         check("@*", ['@', '*'])
-        check("@{ns}attr", ['@', '{ns}attr'])
         check("para[1]", ['para', '[', '1', ']'])
         check("para[last()]", ['para', '[', 'last', '()', ']'])
         check("*/para", ['*', '/', 'para'])
@@ -1163,7 +1158,6 @@ def check(p, expected, namespaces=None):
         check("//olist/item", ['//', 'olist', '/', 'item'])
         check(".", ['.'])
         check(".//para", ['.', '//', 'para'])
-        check(".//{*}tag", ['.', '//', '{*}tag'])
         check("..", ['..'])
         check("../@lang", ['..', '/', '@', 'lang'])
         check("chapter[title]", ['chapter', '[', 'title', ']'])
@@ -1171,11 +1165,32 @@ def check(p, expected, namespaces=None):
               '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
 
         # additional tests
+        check("@{ns}attr", ['@', '{ns}attr'])
         check("{http://spam}egg", ['{http://spam}egg'])
         check("./spam.egg", ['.', '/', 'spam.egg'])
         check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
+
+        # wildcard tags
+        check("{ns}*", ['{ns}*'])
+        check("{}*", ['{}*'])
+        check("{*}tag", ['{*}tag'])
+        check("{*}*", ['{*}*'])
+        check(".//{*}tag", ['.', '//', '{*}tag'])
+
+        # namespace prefix resolution
         check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
               {'xsd': 'http://www.w3.org/2001/XMLSchema'})
+        check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
+              {'': 'http://www.w3.org/2001/XMLSchema'})
+        check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
+              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
+        check("@type", ['@', 'type'],
+              {'': 'http://www.w3.org/2001/XMLSchema'})
+        check("@{*}type", ['@', '{*}type'],
+              {'': 'http://www.w3.org/2001/XMLSchema'})
+        check("@{ns}attr", ['@', '{ns}attr'],
+              {'': 'http://www.w3.org/2001/XMLSchema',
+               'ns': 'http://www.w3.org/2001/XMLSchema'})
 
     def test_processinginstruction(self):
         # Test ProcessingInstruction directly
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index cfe72f2f9d42..d318e65d84a4 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -72,23 +72,27 @@
 
 def xpath_tokenizer(pattern, namespaces=None):
     default_namespace = namespaces.get('') if namespaces else None
+    parsing_attribute = False
     for token in xpath_tokenizer_re.findall(pattern):
-        tag = token[1]
+        ttype, tag = token
         if tag and tag[0] != "{":
             if ":" in tag:
                 prefix, uri = tag.split(":", 1)
                 try:
                     if not namespaces:
                         raise KeyError
-                    yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+                    yield ttype, "{%s}%s" % (namespaces[prefix], uri)
                 except KeyError:
                     raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
-            elif default_namespace:
-                yield token[0], "{%s}%s" % (default_namespace, tag)
+            elif default_namespace and not parsing_attribute:
+                yield ttype, "{%s}%s" % (default_namespace, tag)
             else:
                 yield token
+            parsing_attribute = False
         else:
             yield token
+            parsing_attribute = ttype == '@'
+
 
 def get_parent_map(context):
     parent_map = context.parent_map
@@ -100,7 +104,6 @@ def get_parent_map(context):
     return parent_map
 
 
-
 def _is_wildcard_tag(tag):
     return tag[:3] == '{*}' or tag[-2:] == '}*'