still don't get unicode and xml - help!

jmdeschamps at gmail.com jmdeschamps at gmail.com
Tue May 16 17:13:54 EDT 2006


I have to work with XML data containing accented characters (ISO-8859-1
encoding)
Using ElementTree, the only way i found to get the text attribute of a
node was to encode it individually, if you want. It doubles the amount
of time to process :-(
i surely doing this wrong...
What is the good way to do it? I didn't see an obvious way to do it
this time...
Thanks


############## My XML Encoding Program
#! c:/python24/python -u
# -*- coding: iso-8859-1 -*-
import elementtree.ElementTree as ET
import time

def getMyXML(accentedTest):
    filename="pagemembre.xml"
    tree = ET.parse(filename)
    elem = tree.getroot()
    if accentedTest:
        for i in elem:
            f= i.text.encode("iso-8859-1") #encode pour lire les
accents
    else:
        for i in elem:
            f= i.text
    print f

def writeMyXML(myrange,accentedTest):
    root = ET.Element("mondoc")
    if accentedTest:
        for i in range(myrange):
            ch=ET.SubElement(root, "monchapitre")
            ch.text="bel été et je serai la prêmière de la classe"
    else:
        for i in range(myrange):
            ch=ET.SubElement(root, "monchapitre")
            ch.text="bel ete et je serai la premiere de la classe"
    tree = ET.ElementTree(root)
    tree.write("pageMembre.xml","iso-8859-1")

if __name__ =="__main__":
    accentedTest=int(raw_input("set 1 for accented test, 0 for ascii"))
    print "First, writing"
    t1=time.clock()
    writeMyXML(20000,accentedTest)
    t2=time.clock()
    print "Now, reading"
    t3=time.clock()
    getMyXML(accentedTest)
    t4=time.clock()
    print "accents
are",accentedTest,"writing=",str(t2-t1),"reading=",str(t4-t3)
    s=raw_input("END XML TEST")
######### End XML Encoding Program




More information about the Python-list mailing list