[XML-SIG] XML Parsing problem
kumar s
ps_python@yahoo.com
Wed, 25 Sep 2002 15:53:55 -0700 (PDT)
Dear Group,
I am trying to parse some 1200 XML files.
I am using XMLParser.py script.
I wrote a shell script to pass files to this script.
However when I try to execute my shell script I get
the following error:
$ ./format.sh
: bad interpreter: Permission deniedn/python2.1
: bad interpreter: Permission deniedn/python2.1
My script is :
#!/bin/sh
rm entries;
for file in ./home/files/xml/*
do
./XMLParse1.py $file>>entries
./XMLParse2.py $file >>entries
done
I have all my xml files in xml directory.
and my shell script is residing in /home/files/
Can any one please help me out why I am getting this
problem.
when I execute my XMLParse1.py via command line I get
the result.
$ python XMLParse1.py 10245.xml
works for me.
Is there any way I can parse all 1024 XML files.
Please help me.
thanks
PS
My XMLparse.py file
#!/usr/bin/python2.1
from xml.dom import minidom
import sys
from xml.sax._exceptions import SAXParseException
import StringIO
class XMLParse:
def _load(self, source):
"""
Function to load an XML document from
disk/Internet/standard
input/XML document as a string.
"""
sock = self.openAnything(source)
try:
xmld = minidom.parse(sock).documentElement
except SAXParseException:
raise "ParseError", "Check tags"
sock.close()
return xmld
# Following function assumes user has uploaded a
file instead of
# giving a URL pointing to the file on the
Internet
def parseFile(self, file):
"""
Opens specified XML document and parses it
"""
self.xmldoc = self._load(file)
return self.xmldoc
def parseString(self, str):
"""
Parses XML formatted string -str-
"""
self.xmldoc = self._load(str)
return self.xmldoc
def getTag(self, name):
"""
Given a tag with name "name", GetTag returns
the contents
within and including the tags.
"""
reflist =
self.xmldoc.getElementsByTagName(name)
return reflist
def getWithinTag(self, tag_name, name):
"""
Given a tag name, this function only returs
the contents
within the tag (NOT the entire XML document,
like getTag).
"""
refList = tag_name.getElementsByTagName(name)
return refList
def getText(self, object, name):
nodelist = self.getTag(name)
rc = ""
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc
def openAnything(self, source):
"""URI, filename, or string --> stream
This function lets you define parsers that
take any input
source (URL, pathname to local or network
file, or actual data
as a string) and deal with it in a uniform
manner. Returned
object is guaranteed to have all the basic
stdio read methods
(read, readline, readlines). Just .close() the
object when
you're done with it.
Examples:
>>> from xml.dom import minidom
>>> sock =
openAnything("http://localhost/myfile.xml")
>>> doc = minidom.parse(sock)
>>> sock.close()
>>> sock =
openAnything("c:\\inetpub\\wwwroot\\myfile.xml")
>>> doc = minidom.parse(sock)
>>> sock.close()
>>> sock = openAnything("<ref
id='conjunction'>
<text>and</text><text>or</text></ref>")
>>> doc = minidom.parse(sock)
>>> sock.close()
"""
if hasattr(source, "read"):
return source
if source == '-':
import sys
return sys.stdin
# try to open with urllib (if source is http,
ftp, or file
# URL)
import urllib
try:
return urllib.urlopen(source)
except (IOError, OSError):
pass # try to open with native open
function (pathname)
try:
return open(source)
except (IOError, OSError):
pass # treat source as string
return StringIO.StringIO(str(source))
def sanitize(self, data):
"""
Cleans up XML data into a string.
"""
from re import sub, compile
delchars = "[ \t\n]+"
return sub(delchars, " ", data.strip())
def xmlProcess(self, ele):
rc = ""
cNode = ele
nodeAttr = None
if cNode.hasAttributes():
nodeAttr = cNode.attributes
if ele.hasChildNodes():
cNode = ele.firstChild
while cNode.nodeType != cNode.TEXT_NODE:
cNode = cNode.nextSibling
while cNode is not None and cNode.nodeType ==
cNode.TEXT_NODE:
cNode.normalize()
rc = rc + cNode.data
cNode = cNode.nextSibling
else:
#if cNode is not None:
# if cNode.nodeType ==
cNode.ELEMENT_NODE:
return self.sanitize(rc), nodeAttr
def create_set(self, attr):
"""
Given a node's attributes (a NamedNodeMap
object), creates
a list that is easy to use.
Return value: attr_set[]
Usage:
attr_set[0].name = name of attribute
attr_set[0].value = value of attribute
"""
keys = attr.keys()
attr_set = []
for e in range(len(keys)):
attr_set.append(attr[keys[e]])
return attr_set
if __name__=='__main__':
file = sys.argv[1]
doc = XMLParse()
doc.parseFile(file)
li = doc.getTag("entry_cDNA")
# print li
res = ""
for i in range(len(li)):
res, attr = doc.xmlProcess(li[i])
print res
#if attr is None:
# break
#else:
# print "Has attributes"
# set = doc.create_set(attr)
# for e in range(len(set)):
# print set[e].name,":",set[e].value
__________________________________________________
Do you Yahoo!?
New DSL Internet Access from SBC & Yahoo!
http://sbc.yahoo.com