[Tutor] Should I use generators here?

Tony C cappy2112 at gmail.com
Mon May 8 03:39:27 CEST 2006


I wrote a small Python program to count some simple statistics on a Visual
Basic program thatI am maintaining.

The Python program counts total lines, whitespace lines, comment lines,
Public & Private Subroutines, Public and Private Functions.
The Python program takes about 20-40 seconds to count all these stats since
I started using Psyco, but I am wondering if I can
eliminate Pysco and improve the program performance using generators (or
some other technique).

The running time is quick enough, I'm just wondering  if there are other
simple performance tweaks to use.
I've already eliminated all . (dot) references inside the loops.

I haven't quite got my head around generators yet, or when to use /not use
them, even though I have seen tutorials and examples.

I'll only include the higher level calling functions, for brevity..

ProcessAllFiles() and ProcessFileType() are the functions I am interested in
improving the performance.

Here is what the output "summary" looks like

Total Lines= 54932 in 45 Files

Total Comment Lines =  7408, Total Whitespace = 33679
Total Private Subs  =   608, Total Public Subs  =   145
Total Private Funcs =    86, Total Public Funcs =   165

Thanks for any advice!

Tony


def ProcessFiletype(Thesefiles, Summary, Stats):

    """Iterate over all the files in 'Thesefiles', and process each file,
one at a time"""

    global TotalAllLines

    LongestFilenameLen=0
    for Onefile in Thesefiles:
        Onefile = Onefile.lower().capitalize()
        FilenameLen = len(Onefile)
        if( FilenameLen > LongestFilenameLen):
            LongestFilenameLen = FilenameLen
        #print Onefile

        try:
            fh=open(Onefile, "r")
        except IOError:
            print("\nFATAL ERROR ocurred opening %s for input" % Onefile)
        else:
            try:
                Filecontents = fh.readlines()  # these files are very small,
less than 100k each, so reading in an entire file isn't a problem
                fh.close()
            except IOError:
                print("\nFatal error occurred reading from %s\n\n" %
InputFilename)
            else:
                Summary[Onefile] = deepcopy(Stats)    # associate each
filename with a new stats dict with 0 counts for all alttributes

                Filestats = Summary[Onefile]
                Filestats["TotalLines"] = len(Filecontents)
                Summary[Onefile] = Filestats

                for line in Filecontents:
                    TotalAllLines = TotalAllLines + 1
                    #Filteredline=line.strip()
                    Filteredline=line
                    if( not IsCommentLine(Filteredline, Summary[Onefile] )
):
                        if( not IsWhitespaceLine(Filteredline,
Summary[Onefile] )) :
                            if( not IsPrivateSub(Filteredline,
Summary[Onefile] )):
                                if( not IsPrivateFunc(Filteredline,
Summary[Onefile] ) ):
                                    if( not IsPublicSub(Filteredline,
Summary[Onefile] )):
                                        IsPublicFunc(Filteredline,
Summary[Onefile] )

    return FilenameLen

#/////////////////////////////////////////////////////////

def ProcessAllFiles(Summary, Stats, FileTypes, FiletypeStats):

    """Iterates over all Files in current directory that have the extensions
in Filetypes"""

    from glob import glob
    LongestFilenameLen = 0
    for Filetype in FileTypes:
        TheseFiles = glob("*" + Filetype)
        TheseFiles.sort()
        FiletypeStats[Filetype]=len(TheseFiles)
        Longest = ProcessFiletype(TheseFiles, Summary, Stats)
        if( Longest > LongestFilenameLen):
            LongestFilenameLen = Longest

    return LongestFilenameLen

#/////////////////////////////////////////////////////////

def main(args):

    import psyco
    psyco.full()

    global TotalAllLines, TotalFilecount, TotalCommentLines,
TotalWhitespaceLines, TotalPrivateSubs, TotalPublicSubs, TotalPrivateFuncs,
TotalPublicFuncs

    TotalAllLines = 0

    FileTypes=[".frm", ".bas", ".cls"] # Visual Basic source file extensions
    FiletypeStats={}
    FileStats={ "TotalLines":0, "WhitespaceLines":0, "CommentLines":0,
"PrivateSubCount":0, "PublicSubCount":0, "PrivateFuncCount":0,
"PublicFuncCount":0 }
    FileSummary={}

    LongestFilenameLen = ProcessAllFiles(FileSummary, FileStats, FileTypes,
FiletypeStats)

    for Type, Count in FiletypeStats.iteritems():
        print("\nThere are %3lu files with the %s extension" % (Count,
Type.upper()) )


    print("\n")

    TotalFilecount = 0

    for File, Stats in FileSummary.iteritems():
        TotalFilecount = TotalFilecount + 1
        print("%s - %4lu Lines, %3lu Whitespace lines, %4lu Comments, %4lu
Private Subs, %4lu Public Subs, %4lu Private Functions, %4lu Public
Functions\n" % ( File, Stats["TotalLines"], Stats["WhitespaceLines"],
Stats["CommentLines"], Stats["PrivateSubCount"], Stats["PublicSubCount"],
Stats["PrivateFuncCount"], Stats["PublicFuncCount"] ) )

    print("\nTotal Lines= %5lu in %lu Files\n" % (TotalAllLines,
TotalFilecount) )
    print("\nTotal Comment Lines = %5lu, Total Whitespace = %lu" %
(TotalCommentLines, TotalWhitespaceLines) )
    print("Total Private Subs  = %5lu, Total Public Subs  = %5lu" %
(TotalPrivateSubs, TotalPublicSubs) )
    print("Total Private Funcs = %5lu, Total Public Funcs = %5lu\n\n\n" %
(TotalPrivateFuncs, TotalPublicFuncs) )


    return None
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/tutor/attachments/20060507/5ea33db6/attachment.htm 


More information about the Tutor mailing list