Recursive csv import functions for Pandas

andrea.botti at gmail.com andrea.botti at gmail.com
Tue Aug 2 05:50:12 EDT 2016


I have put together the following code:

    ## DEFINES INPUT FILES
    inputcsvT = ['./input_csv/A08_KI_T*.csv',		'./input_csv/A08_LR_T*.csv',		'./input_csv/A08_B1_T*.csv',]
    			#'./input_csv/A10_KI_T*.csv',		'./input_csv/A10_LR_T*.csv',		'./input_csv/A10_B1_T*.csv',
    			#'./input_csv/A11_KI_T*.csv',		'./input_csv/A11_LR_T*.csv',		'./input_csv/A11_B1_T*.csv',
    			#'./input_csv/A16_KI_T*.csv',		'./input_csv/A16_LR_T*.csv',		'./input_csv/A16_B1_T*.csv']
    
    inputcsvR = ['./input_csv/A08_KI_R*.csv',		'./input_csv/A08_LR_R*.csv',		'./input_csv/A08_B1_R*.csv',]
    
    ## DEFINES FUNCTIONS FOR CSV INPUT, MERGE AND HOURLY RESAMPLING
    def csv_import_merge_T(f):
    	dfsT = [pd.read_csv(fp,  index_col=[0], parse_dates=[0], dayfirst=True, names=['datetime','temp','rh'], header=0) for fp in files] 
    	dfT = pd.concat(dfsT)
    	dfT = dfT.drop('rh', 1)
    	#dfT[~dfT.index.duplicated()]	# replaced with function below
    	dfT_clean = dfT.reset_index().drop_duplicates('datetime').set_index('datetime')
    	dfTH = dfT_clean.resample('H').bfill()
    	return dfTH
    
    
    def csv_import_merge_R(f):
    	dfsR = [pd.read_csv(fp,  index_col=[0], parse_dates=[0], dayfirst=True, names=['datetime','rad'], header=0) for fp in files] 
    	dfR = pd.concat(dfsR)
    	#dfR[~dfR.index.duplicated()]	# replaced with function below
    	dfR_clean = dfR.reset_index().drop_duplicates('datetime').set_index('datetime')
    	dfRH = dfR_clean.resample('H').mean()
    	return dfRH
    
    
    ## PERFORMS FUNCTIONS FOR ALL Ts AND Rs AND CALCULATES HEATING DEGREE HOURS (HDH) AS R-T WHEN XX_XX_R = XX_XX_T
    for csvnameT in inputcsvT:
    	files = glob.glob(csvnameT)
    	print ('___'); print (files)
    	csvT = csvnameT[12:20]
    	print csvT
    	t = csv_import_merge_T(files)
    	t.to_csv('./output_csv/'+ csvT + '.csv')
    
    	for csvnameR in inputcsvR:
    		files = glob.glob(csvnameR)
    		print ('___'); print (files)
    		csvR = csvnameR[12:20]
    		r = csv_import_merge_R(files)
    		print csvR
    
    		while csvnameR[12:18] == csvnameT[12:18]:
    			print csvR + "=" + csvT
    			
    			r.to_csv('./output_csv/'+ csvR +'.csv')
    		
    			hdh = r.sub(t,axis=0).dropna()
    			hdh[hdh['temp']<=1] = 0
    			csvHDH = csvnameT[12:18]
    			hdh_week = hdh.temp.resample('W-MON').sum().round(decimals=0)
    			hdh_week.to_csv('./output_csv/HDH/' + csvHDH + '_HDH.csv')

The sequence of action should be:
for A08_KI
 - import A08_KI_T1,T2,...Tn, merge them into A08_KI_T and resample them by the hour (room temp.)
 - import A08_KI_R1,R2,...Rn, merge them into A08_KI_R and resample them by the hour (radiator temp.)
 - every time XX_XX_R = XX_XX_T (i.e. A08_KI_T and A08_KI_R, or A10_LR_T and A10_LR_R) calculate HDH = R - T.

The code I developed has something wrong and the command:

    while csvnameR[12:18] == csvnameT[12:18]:
seems to cause the loop to go on for ever.



More information about the Python-list mailing list