[Tutor] Should I use generators here?
Tony C
cappy2112 at gmail.com
Mon May 8 03:39:27 CEST 2006
I wrote a small Python program to count some simple statistics on a Visual
Basic program thatI am maintaining.
The Python program counts total lines, whitespace lines, comment lines,
Public & Private Subroutines, Public and Private Functions.
The Python program takes about 20-40 seconds to count all these stats since
I started using Psyco, but I am wondering if I can
eliminate Pysco and improve the program performance using generators (or
some other technique).
The running time is quick enough, I'm just wondering if there are other
simple performance tweaks to use.
I've already eliminated all . (dot) references inside the loops.
I haven't quite got my head around generators yet, or when to use /not use
them, even though I have seen tutorials and examples.
I'll only include the higher level calling functions, for brevity..
ProcessAllFiles() and ProcessFileType() are the functions I am interested in
improving the performance.
Here is what the output "summary" looks like
Total Lines= 54932 in 45 Files
Total Comment Lines = 7408, Total Whitespace = 33679
Total Private Subs = 608, Total Public Subs = 145
Total Private Funcs = 86, Total Public Funcs = 165
Thanks for any advice!
Tony
def ProcessFiletype(Thesefiles, Summary, Stats):
"""Iterate over all the files in 'Thesefiles', and process each file,
one at a time"""
global TotalAllLines
LongestFilenameLen=0
for Onefile in Thesefiles:
Onefile = Onefile.lower().capitalize()
FilenameLen = len(Onefile)
if( FilenameLen > LongestFilenameLen):
LongestFilenameLen = FilenameLen
#print Onefile
try:
fh=open(Onefile, "r")
except IOError:
print("\nFATAL ERROR ocurred opening %s for input" % Onefile)
else:
try:
Filecontents = fh.readlines() # these files are very small,
less than 100k each, so reading in an entire file isn't a problem
fh.close()
except IOError:
print("\nFatal error occurred reading from %s\n\n" %
InputFilename)
else:
Summary[Onefile] = deepcopy(Stats) # associate each
filename with a new stats dict with 0 counts for all alttributes
Filestats = Summary[Onefile]
Filestats["TotalLines"] = len(Filecontents)
Summary[Onefile] = Filestats
for line in Filecontents:
TotalAllLines = TotalAllLines + 1
#Filteredline=line.strip()
Filteredline=line
if( not IsCommentLine(Filteredline, Summary[Onefile] )
):
if( not IsWhitespaceLine(Filteredline,
Summary[Onefile] )) :
if( not IsPrivateSub(Filteredline,
Summary[Onefile] )):
if( not IsPrivateFunc(Filteredline,
Summary[Onefile] ) ):
if( not IsPublicSub(Filteredline,
Summary[Onefile] )):
IsPublicFunc(Filteredline,
Summary[Onefile] )
return FilenameLen
#/////////////////////////////////////////////////////////
def ProcessAllFiles(Summary, Stats, FileTypes, FiletypeStats):
"""Iterates over all Files in current directory that have the extensions
in Filetypes"""
from glob import glob
LongestFilenameLen = 0
for Filetype in FileTypes:
TheseFiles = glob("*" + Filetype)
TheseFiles.sort()
FiletypeStats[Filetype]=len(TheseFiles)
Longest = ProcessFiletype(TheseFiles, Summary, Stats)
if( Longest > LongestFilenameLen):
LongestFilenameLen = Longest
return LongestFilenameLen
#/////////////////////////////////////////////////////////
def main(args):
import psyco
psyco.full()
global TotalAllLines, TotalFilecount, TotalCommentLines,
TotalWhitespaceLines, TotalPrivateSubs, TotalPublicSubs, TotalPrivateFuncs,
TotalPublicFuncs
TotalAllLines = 0
FileTypes=[".frm", ".bas", ".cls"] # Visual Basic source file extensions
FiletypeStats={}
FileStats={ "TotalLines":0, "WhitespaceLines":0, "CommentLines":0,
"PrivateSubCount":0, "PublicSubCount":0, "PrivateFuncCount":0,
"PublicFuncCount":0 }
FileSummary={}
LongestFilenameLen = ProcessAllFiles(FileSummary, FileStats, FileTypes,
FiletypeStats)
for Type, Count in FiletypeStats.iteritems():
print("\nThere are %3lu files with the %s extension" % (Count,
Type.upper()) )
print("\n")
TotalFilecount = 0
for File, Stats in FileSummary.iteritems():
TotalFilecount = TotalFilecount + 1
print("%s - %4lu Lines, %3lu Whitespace lines, %4lu Comments, %4lu
Private Subs, %4lu Public Subs, %4lu Private Functions, %4lu Public
Functions\n" % ( File, Stats["TotalLines"], Stats["WhitespaceLines"],
Stats["CommentLines"], Stats["PrivateSubCount"], Stats["PublicSubCount"],
Stats["PrivateFuncCount"], Stats["PublicFuncCount"] ) )
print("\nTotal Lines= %5lu in %lu Files\n" % (TotalAllLines,
TotalFilecount) )
print("\nTotal Comment Lines = %5lu, Total Whitespace = %lu" %
(TotalCommentLines, TotalWhitespaceLines) )
print("Total Private Subs = %5lu, Total Public Subs = %5lu" %
(TotalPrivateSubs, TotalPublicSubs) )
print("Total Private Funcs = %5lu, Total Public Funcs = %5lu\n\n\n" %
(TotalPrivateFuncs, TotalPublicFuncs) )
return None
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/tutor/attachments/20060507/5ea33db6/attachment.htm
More information about the Tutor
mailing list