[XML-SIG] XML Parsing problem

kumar s ps_python@yahoo.com
Wed, 25 Sep 2002 15:53:55 -0700 (PDT)


Dear Group, 

I am trying to parse some 1200 XML files. 

I am using XMLParser.py script.

I wrote a shell script to pass files to this script. 

However when I try to execute my shell script I get
the following error:

$ ./format.sh
: bad interpreter: Permission deniedn/python2.1
: bad interpreter: Permission deniedn/python2.1

My script is :

#!/bin/sh

rm entries;

for file in ./home/files/xml/*
    do
        ./XMLParse1.py $file>>entries
        ./XMLParse2.py $file >>entries
    done

I have all my xml files in xml directory.


and my shell script is residing in /home/files/

Can any one please help me out why I am getting this
problem. 

when I execute my XMLParse1.py via command line I get
the result.

$ python XMLParse1.py 10245.xml 

works for me. 

Is there any way I can parse all 1024 XML files. 
Please help me. 
thanks
PS




My XMLparse.py file



#!/usr/bin/python2.1

from xml.dom import minidom
import sys
from xml.sax._exceptions import SAXParseException
import StringIO

class XMLParse:
    def _load(self, source):
        """
        Function to load an XML document from
disk/Internet/standard 
        input/XML document as a string.
        """
        sock = self.openAnything(source)
        try:
            xmld = minidom.parse(sock).documentElement
        except SAXParseException:
            raise "ParseError", "Check tags"
        sock.close()
        return xmld

    # Following function assumes user has uploaded a
file instead of
    # giving a URL pointing to the file on the
Internet
    def parseFile(self, file):
        """
        Opens specified XML document and parses it
        """
        self.xmldoc = self._load(file)
        return self.xmldoc

    def parseString(self, str):
        """
        Parses XML formatted string -str-
        """
        self.xmldoc = self._load(str)
        return self.xmldoc

    def getTag(self, name):
        """
        Given a tag with name "name", GetTag returns
the contents
        within and including the tags.
        """
        reflist =
self.xmldoc.getElementsByTagName(name)
        return reflist
        
    def getWithinTag(self, tag_name, name):
        """
        Given a tag name, this function only returs
the contents
        within the tag (NOT the entire XML document,
like getTag).
        """
        refList = tag_name.getElementsByTagName(name)
        return refList
        
    def getText(self, object, name):
        nodelist = self.getTag(name)
        rc = ""
        for node in nodelist:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + node.data
        return rc

    def openAnything(self, source):
        """URI, filename, or string --> stream

        This function lets you define parsers that
take any input
        source (URL, pathname to local or network
file, or actual data 
        as a string) and deal with it in a uniform
manner.  Returned 
        object is guaranteed to have all the basic
stdio read methods 
        (read, readline, readlines). Just .close() the
object when 
        you're done with it.

        Examples:
        >>> from xml.dom import minidom
        >>> sock =
openAnything("http://localhost/myfile.xml")
        >>> doc = minidom.parse(sock)
        >>> sock.close()
        >>> sock =
openAnything("c:\\inetpub\\wwwroot\\myfile.xml")
        >>> doc = minidom.parse(sock)
        >>> sock.close()
        >>> sock = openAnything("<ref
id='conjunction'>
               <text>and</text><text>or</text></ref>")
        >>> doc = minidom.parse(sock)
        >>> sock.close()
        """
        if hasattr(source, "read"):
            return source

        if source == '-':
                import sys
                return sys.stdin

        # try to open with urllib (if source is http,
ftp, or file 
        # URL)
        import urllib
        try:
                return urllib.urlopen(source)
        except (IOError, OSError):
                pass # try to open with native open
function (pathname)
        try:
                return open(source)
        except (IOError, OSError):
                pass # treat source as string

        return StringIO.StringIO(str(source)) 

    def sanitize(self, data):
        """
        Cleans up XML data into a string.
        """
        from re import sub, compile
        delchars = "[ \t\n]+"
        return sub(delchars, " ", data.strip())

    def xmlProcess(self, ele):
        rc = ""
        cNode = ele
        nodeAttr = None
        if cNode.hasAttributes():
            nodeAttr = cNode.attributes

        if ele.hasChildNodes():
            cNode = ele.firstChild

        while cNode.nodeType != cNode.TEXT_NODE:
            cNode = cNode.nextSibling

        while cNode is not None and cNode.nodeType ==
cNode.TEXT_NODE:
            cNode.normalize()
            rc = rc + cNode.data
            cNode = cNode.nextSibling
        else:
            #if cNode is not None:
            #    if cNode.nodeType ==
cNode.ELEMENT_NODE:
            return self.sanitize(rc), nodeAttr

    def create_set(self, attr):
        """
        Given a node's attributes (a NamedNodeMap
object), creates
        a list that is easy to use.
        Return value: attr_set[]
        Usage:
            attr_set[0].name  = name of attribute
            attr_set[0].value = value of attribute
        """
        keys = attr.keys()
        attr_set = []

        for e in range(len(keys)):
            attr_set.append(attr[keys[e]])

        return attr_set

if __name__=='__main__':
    file = sys.argv[1]
    doc  = XMLParse()
    doc.parseFile(file)
    li = doc.getTag("entry_cDNA")
#    print li

    res = ""

    for i in range(len(li)):
        res, attr = doc.xmlProcess(li[i])
        print res
        #if attr is None:
        #    break
        #else:
        #    print "Has attributes"
        #    set = doc.create_set(attr)
        #    for e in range(len(set)):
        #        print set[e].name,":",set[e].value



__________________________________________________
Do you Yahoo!?
New DSL Internet Access from SBC & Yahoo!
http://sbc.yahoo.com