[Tutor] about counting files

Abdirizak abdi a_abdi406@yahoo.com
Tue Apr 29 09:40:25 2003


--0-1171494775-1051590787=:79765
Content-Type: text/plain; charset=us-ascii

Hi, can ayone help me with this program, I am reading multiple files and I want to count each of these files, I tried different things but I couldn't get it right. here is the program:  mport glob, getopt
import fileinput,re,shelve,linecache,sys
#from TextSplitter import TextSplitter aword =re.compile (r'<[^<>]*>|\b[\w-]+\b') #using xml as well.
index={} # Generate an index in file indexFileNamedef genIndex(indexFileName, extension):
   
   fname='*.'+extension
    
   for line in fileinput.input(glob.glob(fname)):
      location = fileinput.filename(), fileinput.filelineno()
      for word in aword.findall(line.lower()):
         if word[0] != '<':
            index.setdefault(word,[]).append(location)
   print index  # for testing
   
   shelf = shelve.open(indexFileName,'n')
   for word in index:
      shelf[word] = index[word]
   shelf.close()
if __name__ == '__main__':
    import sys
    for arg in sys.argv[1:]:           genIndex(arg,'txt')  thanks in advance
 

---------------------------------
Do you Yahoo!?
The New Yahoo! Search - Faster. Easier. Bingo.
--0-1171494775-1051590787=:79765
Content-Type: text/html; charset=us-ascii

<DIV>Hi,</DIV>
<DIV>&nbsp;</DIV>
<DIV>can ayone help me with this program, I am reading multiple files and I want to count each of these files,&nbsp;I tried different things but I couldn't get it right.</DIV>
<DIV>&nbsp;</DIV>
<DIV>here is the program:</DIV>
<DIV>&nbsp;</DIV>
<DIV>&nbsp;</DIV>
<DIV>mport glob, getopt<BR>import fileinput,re,shelve,linecache,sys<BR>#from TextSplitter import TextSplitter</DIV>
<DIV>&nbsp;</DIV>
<DIV>aword =re.compile (r'&lt;[^&lt;&gt;]*&gt;|\b[\w-]+\b') #using xml as well.<BR>index={}</DIV>
<DIV>&nbsp;</DIV>
<DIV># Generate an index in file indexFileName</DIV>
<DIV>def genIndex(indexFileName, extension):<BR>&nbsp;&nbsp;&nbsp;<BR>&nbsp;&nbsp; fname='*.'+extension<BR>&nbsp;&nbsp;&nbsp;&nbsp;<BR>&nbsp;&nbsp; for line in fileinput.input(glob.glob(fname)):<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; location = fileinput.filename(), fileinput.filelineno()<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; for word in aword.findall(line.lower()):<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if word[0] != '&lt;':<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; index.setdefault(word,[]).append(location)</DIV>
<DIV><BR>&nbsp;&nbsp; print index&nbsp; # for testing<BR>&nbsp;&nbsp; <BR>&nbsp;&nbsp; shelf = shelve.open(indexFileName,'n')<BR>&nbsp;&nbsp; for word in index:<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; shelf[word] = index[word]<BR>&nbsp;&nbsp; shelf.close()</DIV>
<DIV><BR>if __name__ == '__main__':<BR>&nbsp;&nbsp;&nbsp; import sys<BR>&nbsp;&nbsp;&nbsp; for arg in sys.argv[1:]:</DIV>
<DIV>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; genIndex(arg,'txt')</DIV>
<DIV>&nbsp;</DIV>
<DIV>&nbsp;</DIV>
<DIV>thanks in advance</DIV>
<DIV><BR>&nbsp;</DIV><p><hr SIZE=1>
Do you Yahoo!?<br>
<a href="http://us.rd.yahoo.com/search/mailsig/*http://search.yahoo.com">The New Yahoo! Search</a> - Faster. Easier. Bingo.
--0-1171494775-1051590787=:79765--