quick question

Mon Nov 18 19:53:09 EST 2002

John ...

Thanks for your nice explanation and posting your script
for counting votes from record values in a file ...

I created a test input file ...

    # Records ..... ~ 260,000
    # Flavors ..... 12 Unique Record Values
      File Size ... 2.54 MB

Using an edited version of your script
with both a normal Python and a Psyco bound function,
I came up with the following results ...

   Count    Percent    Record

    75168      28.92    vanilla
    36288      13.96    peach
    33696      12.96    chocolate
    31968      12.30    strawberry
    26784      10.30    Granny's Best
    16416       6.32    snow
    15552       5.98    butter pecan
    12960       4.99    rocky road
     7776       2.99    kiwi
     1728       0.66    raspberry sorbet
      864       0.33    Partridge in a Pear Tree
      714       0.27    pistachio
    ======    ======
    259914    100.00

    Elapsed Time :    8.73 Seconds ... Normal
    Elapsed Time :    5.05 Seconds ... Psyco

I think 5 seconds for categorizing and counting
a couple of floppy disks worth of votes ain't too bad ...

My version of your script follows ...

Thanks again ...

Cousin Stanley

----------------------------------------------------------

'''
    NewsGroup :  comp.lang.python
         Date :  2002-11-16
    Posted_By :  John Hunter
    Edited_By :  Stanley C. Kitching

     FileName :  file_rec_count.py

     Function :

       Count records in a file
       according to the record's value

       A dictionary is used with unique record values as keys
       and their respective record counts as values

       For reporting the dictionary is converted
       to a sorted list of tuples

       Both normal Python and Psyco bound tests
       are run for comparision

'''

import psyco
import sys
import time

print '\n   ' , sys.argv[ 0 ]

def rec_count( aFile , ) :

    dict_rec = {}
    num_rec  = 0

    # Load Dictionary with Records from File

    for aRec in aFile :

        aRec = aRec.strip()

        dict_rec[ aRec ] = dict_rec.get( aRec , 0 ) + 1

        num_rec += 1

    # Convert Dictionary to a List of Tuples

    list_rec = [ (aValue,aKey) for (aKey,aValue) in dict_rec.items() ]

    list_rec.sort()

    list_rec.reverse()    # Highest Counts Listed First

    # Print Report

    num_rec = float( num_rec )

    count_tot   = 0
    percent_tot = 0.00

    print '\n    Count    Percent    Record \n'

    for ( count , aRec ) in list_rec :

        percent = ( count / num_rec ) * 100.00

        print '   %6d     %6.2f    %s ' % (count,percent,aRec)

        count_tot   += count

        percent_tot += percent

    print '\n    ======    ======'
    print '    %6d    %6.2f \n' % (count_tot,percent_tot)

def xMain() :

    aPath = 'file_rec_IceCream.txt'

    # Normal Test

    print '\n    Normal Python Test ... '

    aFile = file( aPath )

    start = time.time()

    rec_count( aFile )

    et_1  = time.time() - start

    aFile.close()

    # Psyco Test

    print '\n    Psyco Test ...'

    aFile = file( aPath )

    psyco.bind( rec_count )

    start = time.time()

    rec_count( aFile )

    et_2  = time.time() - start

    aFile.close()

    print '\n    Elapsed Time :  %6.2f Seconds ... Normal ' % ( et_1 )

    print '\n    Elapsed Time :  %6.2f Seconds ... Psyco  ' % ( et_2 )

if __name__  ==  '__main__' :

   xMain()