
import pylab
from itertools import imap

string_conversions=[int,float,pylab.datestr2num]
def string_to_cvt(s):
    for fn in string_conversions:
        try:
            v=fn(s)
            return fn
        except:
            pass
    return str

int_to_float_upcast=lambda x: "." in x and x or x+".0"
upcast_functions={
    (int,float): int_to_float_upcast,
    (float,int): int_to_float_upcast,
    }
missing_items=set(['','.','\n','.\n'])
replace_missing=lambda x: x in missing_items and 'nan' or x

def find_upcast((upcasts,cvt),row):
    global old_fn,new_fn
    new_cvt=[string_to_cvt(x) for x in row]
    new_upcasts=[]
    for i,(old_fn,new_fn) in enumerate(zip(cvt,new_cvt)):
        if old_fn==new_fn:
            upcast=set()
        elif (old_fn,new_fn) in upcast_functions:
            upcast=set([upcast_functions[(old_fn,new_fn)]])
        else:
            raise "Unable to upcast %s to %s for column %d" % (old_fn.__name__,new_fn.__name__,i)
        new_upcasts.append(upcast)
    return map(set.union,upcasts,new_upcasts),new_cvt

def assimilate_csv_file(fpath, delim=',', has_varnm=True ):

    global upcasts
    def get_row_iter(f):
        row_iter=imap(lambda x: map(replace_missing,x.split(delim)),fr)
        if has_varnm: row_iter.next()
        return row_iter

    fr=open(fpath,'r')
    row_iter=get_row_iter(fr)
    row0=row_iter.next()

    initial_upcasts=[set() for x in row0]
    initial_cvt=[string_to_cvt(x) for x in row0]
    upcasts,functions=reduce(find_upcast,row_iter,(initial_upcasts,initial_cvt))

    fr.close()
    if not any(upcasts):
        print "Nothing done to file."
        return
    
    fr=open(fpath,'r')
    fw=open(fpath+"_fixed",'w')
    for row in get_row_iter(fr):
        fw.write(delim.join([reduce(lambda x,y: y(x),u,c) for u,c in zip(upcasts,row)])+"\n")
    fw.close()
    fr.close()

if __name__ == '__main__':

    import csv, sys

    # creating data
    data = [['col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7'],
            ['1','3','1/97','1.12','2.11','001','bla1'],
            ['1.3','2','3/97','1.21','3.12','002','bla2'],
            ['2','1','2/97','1.12','2.11','003','bla3'],
            ['2','2','4/97','1.33','2.26','004','bla4'],
            ['2','2','5/97','1.73','2.42','005','bla15']]
    # saving data to csv file
    f = open('testdata_with_varnm.csv','wb')
    output = csv.writer(f)
    for i in data:
        output.writerow(i)
    f.close()

    assimilate_csv_file('testdata_with_varnm.csv')
    
