[Numpy-discussion] Help making better use of numpy array functions

Vincent Schut schut at sarvision.nl
Thu Nov 26 04:55:51 EST 2009


Hi mdekauwe,

as long as your data size is small enough to fit a 8x array in memory 
and use it, why not just skip the running total and average 8 data 
arrays each 8day period? And skip the x and y loops; these are real 
performance killers. As a bonus, your code gets a lot smaller and more 
readeable... :)
Please correct me is I'm wrong: the ultimate goal is to have a average 
of the valid pixels 8 days of data, each 8 days, correct?

I'd do it this way (more or less pythonic pseudocode, untested, but your 
should get the idea):

# read filenames
filenames = glob.glob.....
filenames.sort() # to be sure they are in proper order
filenamesChunked = [filenames[n:n+8] for n in range(0, len(filenames, 8)]
# this will produce a list of lists, with each sublist containing the
# next 8 filenames

nodatavalue =  -999

for fchunk in filenamesChunked:
	data8days = numpy.ones((8, ncols, nrows)) * -999 # fill with nodata 
values, in case there are less than 8 days
	for di in range(len(fchunk)):
		f = fchunk[di]
		data8days[di] = <read f>
	weights = data8days > nodatavalue
	# this way all nodata values get a weight of 0
	avg8days = numpy.average(data8days, axis=0, weights=weights)
	<save avg8days>

uhm... well, that's it, really :)

best regards,
Vincent.

mdekauwe wrote:
> Hi I have written some code and I would appreciate any suggestions to make
> better use of the numpy arrays functions to make it a bit more efficient and
> less of a port from C. Any tricks are thoughts would be much appreciated.
> 
> The code reads in a series of images, collects a running total if the value
> is valid and averages everything every 8 images.
> 
> Code below...
> 
> Thanks in advance...
> 
> #!/usr/bin/env python
> 
> """
> Average the daily LST values to make an 8 day product...
> 
> Martin De Kauwe
> 18th November 2009
> """
> import sys, os, glob
> import numpy as np
> import matplotlib.pyplot as plt
> 
> 
> def averageEightDays(files, lst, count, numrows, numcols):
> 	
>     day_count = 0
>     # starting day - tag for output file
>     doy = 122
>     
>     # loop over all the images and average all the information
>     # every 8 days...
>     for fname in glob.glob(os.path.join(path, files)):
>         current_file = fname.split('/')[8]
>         year = current_file.split('_')[2][0:4]
> 		
>         try:
>             f = open(fname, 'rb')
>         except IOError:
>             print "Cannot open outfile for read", fname
>             sys.exit(1)
> 	
>         data = np.fromfile(f, dtype=np.float32).reshape(numrows, numcols)
>         f.close()
> 		
>         # if it is day 1 then we need to fill up the holding array...
>         # copy the first file into the array...
>         if day_count == 0:
>             lst = data
>             for i in xrange(numrows):
>                  for j in xrange(numcols):
>                      # increase the pixel count if we got vaild data
> (undefined = -999.0
>                      if lst[i,j] > -900.:
>                          count[i,j] = 1	
>                          day_count += 1
> 		
>         # keep a running total of valid lst values in an 8 day sequence
>         elif day_count > 0 and day_count <= 7:	
>             for i in xrange(numrows):
>                 for j in xrange(numcols):
>                     # lst valid pixel?
>                     if data[i,j] > -900.:
>                         # was the existing pixel valid?
>                         if lst[i,j] < -900.:
>                             lst[i,j] = data[i,j]
>                             count[i,j] = 1
>                         else:
>                             lst[i,j] += data[i,j]
>                             count[i,j] += 1					
>                 day_count += 1
> 		
>         # need to average previous 8 days and write to a file...
>         if day_count == 8:
>             for i in xrange(numrows):
>                 for j in xrange(numcols):
>                     if count[i,j] > 0:
>                         lst[i,j] = lst[i,j] / count[i,j]
>                     else:
>                         lst[i,j] = -999.0
> 			
>             day_count = 0
>             doy += 8
> 			
>             lst, count = write_outputs(lst, count, year, doy)
> 			
>     # it is possible we didn't have enough slices to do the last 8day avg...
>     # but if we have more than one day we shall use it	
>     # need to average previous 8 days and write to a file...
>     if day_count > 1:
>         for i in xrange(numrows):
>             for j in xrange(numcols):       
>                 if count[i,j] > 0:
>                     lst[i,j] = lst[i,j] / count[i,j]
>                 else:
>                     lst[i,j] = -999.0
> 			
>         day_count = 0
>         doy += 8
> 		
>         lst, count = write_outputs(lst, count, year, doy)
> 	
> def write_outputs(lst, count, year, doy):
>     path = "/users/eow/mgdk/research/HOFF_plots/LST/8dayLST"
>     outfile = "lst_8day1030am_" + str(year) + str(doy) + ".gra"
> 	
>     try:
>         of = open(os.path.join(path, outfile), 'wb')
>     except IOError:
>         print "Cannot open outfile for write", outfile
>         sys.exit(1)
> 	
>     outfile2 = "pixelcount_8day1030am_" + str(year) + str(doy) + ".gra"
>     try:
>         of2 = open(os.path.join(path, outfile2), 'wb')
>     except IOError:
>         print "Cannot open outfile for write", outfile2
>         sys.exit(1)
> 	
>     # empty stuff and zero 8day counts
>     lst.tofile(of)
>     count.tofile(of2)	
>     of.close()
>     of2.close()
>     lst = 0.0
>     count = 0.0
> 	
>     return lst, count
> 	
> 	
> if __name__ == "__main__":
> 	
>     numrows = 332
>     numcols = 667
> 	
>     path = "/users/eow/mgdk/research/HOFF_plots/LST/gridded_03/"
>     lst = np.zeros((numrows, numcols), dtype=np.float32)
>     count = np.zeros((numrows, numcols), dtype=np.int)
>     averageEightDays('lst_scr_2006*.gra', lst, count, numrows, numcols)		
> 	
>     lst = 0.0
>     count = 0.0
>     averageEightDays('lst_scr_2007*.gra', lst, count, numrows, numcols)	
> 		
> 		
>  




More information about the NumPy-Discussion mailing list