I'm trying to use getpath() to get the absolute xpath for an element in a tree. This works fine when all namespaces have consistent prefixes in the document. However when I deal with a document in which the same namespace can have multiple prefixes I get wrong path. I wrote this little script to re-produce the error.
#!/usr/bin/env python
from lxml import etree
from StringIO import StringIO
def generate_sample_tree_with_multiple_prefixes():
for x in range(10):
prefix = 'l_%s' % x
node = etree.SubElement(root, "{%s}l1" % ns, nsmap={prefix: ns})
node.set('id', 'id_%s' % x)
return etree.parse(StringIO(etree.tostring(root)))
def generate_sample_tree_with_single_prefixes():
for x in range(10):
prefix = 'ns'
node = etree.SubElement(root, "{%s}l1" % ns, nsmap={prefix: ns})
node.set('id', 'id_%s' % x)
return etree.parse(StringIO(etree.tostring(root)))
def test_multiple():
tree = generate_sample_tree_with_multiple_prefixes()
r = tree.getroot()
for l1 in r.iterchildren():
path = tree.getpath(l1)
ret = tree.xpath(path, namespaces=l1.nsmap)
assert len(ret) == 1, "Multiple elements returned: %s" % ret
assert l1 == ret[0], "It's not the same"
def test_single():
tree = generate_sample_tree_with_single_prefixes()
r = tree.getroot()
for l1 in r.iterchildren():
path = tree.getpath(l1)
ret = tree.xpath(path, namespaces=l1.nsmap)
assert len(ret) == 1, "Multiple elements returned"
assert l1 == ret[0], "It's not the same"
if __name__ == '__main__':
test_single()
test_multiple()
thanks
--
-Ahmed