Python parsing XML file problem with SAX

jia li lijia.jasperlee at gmail.com
Wed Jul 28 06:10:00 EDT 2010


I have an XML file with hundreds of <error> elements.

What's strange is only one of there elements could not be parsed correctly:
<error>
<checker>REVERSE_INULL</checker>
<function>Dispose_ParameterList</function>
<unmangled_function>Dispose_ParameterList</unmangled_function>
<status>UNINSPECTED</status>
<num>146</num>
<home>1/146MMSLib_LinkedList.c</home>
</error>

I printed the data in "characters(self, data)" and after parsing. The result
is one "\r\n" is inserted after "1/" and "146MMSLib_LinkedList.c" for the
latter.

But if I make my XML file only this element left, it could parse correctly.

My script below:
class CoverityErrorHandler(ContentHandler):
    def __init__(self):
        self.is_num = False
        self.num = ""
        self.is_func = False
        self.function = ""
        self.is_file = False
        self.filename = ""
        self.is_Report = False
        self.report = ""
        self.is_Checker = False
        self.checker = ""
        self.is_unmangled_func = False
        self.unmangled_func= ""
        self.is_Status = False
        self.Status = ""
        self.mapping = {}

    def startElement(self, name, attributes):
        if name == "num":
            self.is_num = True
        elif name == "unmangled_function":
            self.is_unmangled_func = True
        elif name == "checker":
            self.is_Checker = True
        elif name == "file":
            self.is_file = True
        elif name == "home":
            self.is_Report = True
        elif name == "function":
            self.is_func = True
        elif name == "status":
            self.is_Status = True

    def characters(self, data):
        if self.is_num:
            self.num = data
        elif self.is_func:
            self.function = data
        elif self.is_Checker:
            self.checker = data
        elif self.is_file:
            self.filename = data
        elif self.is_Report:
            self.report = data;
            print self.report;
        elif self.is_unmangled_func:
            self.unmangled_func = data
        elif self.is_Status:
            self.Status = data

    def endElement(self, name):
        if name == "error":
            self.mapping[self.num] = CoverityError(self.checker,
self.filename, self.function, self.report)
        elif name == "num":
            self.is_num = False
        elif name == "unmangled_function":
            self.is_unmangled_func = False
        elif name == "checker":
            self.is_Checker = False
        elif name == "file":
            self.is_file = False
        elif name == "home":
            self.is_Report = False
        elif name == "function":
            self.is_func = False
        elif name == "status":
            self.is_Status = False


Please any expert help to have a look. Thanks!
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20100728/175ede01/attachment.html>


More information about the Python-list mailing list