[Numpy-discussion] Help making better use of numpy array functions
Vincent Schut
schut at sarvision.nl
Thu Nov 26 04:55:51 EST 2009
Hi mdekauwe,
as long as your data size is small enough to fit a 8x array in memory
and use it, why not just skip the running total and average 8 data
arrays each 8day period? And skip the x and y loops; these are real
performance killers. As a bonus, your code gets a lot smaller and more
readeable... :)
Please correct me is I'm wrong: the ultimate goal is to have a average
of the valid pixels 8 days of data, each 8 days, correct?
I'd do it this way (more or less pythonic pseudocode, untested, but your
should get the idea):
# read filenames
filenames = glob.glob.....
filenames.sort() # to be sure they are in proper order
filenamesChunked = [filenames[n:n+8] for n in range(0, len(filenames, 8)]
# this will produce a list of lists, with each sublist containing the
# next 8 filenames
nodatavalue = -999
for fchunk in filenamesChunked:
data8days = numpy.ones((8, ncols, nrows)) * -999 # fill with nodata
values, in case there are less than 8 days
for di in range(len(fchunk)):
f = fchunk[di]
data8days[di] = <read f>
weights = data8days > nodatavalue
# this way all nodata values get a weight of 0
avg8days = numpy.average(data8days, axis=0, weights=weights)
<save avg8days>
uhm... well, that's it, really :)
best regards,
Vincent.
mdekauwe wrote:
> Hi I have written some code and I would appreciate any suggestions to make
> better use of the numpy arrays functions to make it a bit more efficient and
> less of a port from C. Any tricks are thoughts would be much appreciated.
>
> The code reads in a series of images, collects a running total if the value
> is valid and averages everything every 8 images.
>
> Code below...
>
> Thanks in advance...
>
> #!/usr/bin/env python
>
> """
> Average the daily LST values to make an 8 day product...
>
> Martin De Kauwe
> 18th November 2009
> """
> import sys, os, glob
> import numpy as np
> import matplotlib.pyplot as plt
>
>
> def averageEightDays(files, lst, count, numrows, numcols):
>
> day_count = 0
> # starting day - tag for output file
> doy = 122
>
> # loop over all the images and average all the information
> # every 8 days...
> for fname in glob.glob(os.path.join(path, files)):
> current_file = fname.split('/')[8]
> year = current_file.split('_')[2][0:4]
>
> try:
> f = open(fname, 'rb')
> except IOError:
> print "Cannot open outfile for read", fname
> sys.exit(1)
>
> data = np.fromfile(f, dtype=np.float32).reshape(numrows, numcols)
> f.close()
>
> # if it is day 1 then we need to fill up the holding array...
> # copy the first file into the array...
> if day_count == 0:
> lst = data
> for i in xrange(numrows):
> for j in xrange(numcols):
> # increase the pixel count if we got vaild data
> (undefined = -999.0
> if lst[i,j] > -900.:
> count[i,j] = 1
> day_count += 1
>
> # keep a running total of valid lst values in an 8 day sequence
> elif day_count > 0 and day_count <= 7:
> for i in xrange(numrows):
> for j in xrange(numcols):
> # lst valid pixel?
> if data[i,j] > -900.:
> # was the existing pixel valid?
> if lst[i,j] < -900.:
> lst[i,j] = data[i,j]
> count[i,j] = 1
> else:
> lst[i,j] += data[i,j]
> count[i,j] += 1
> day_count += 1
>
> # need to average previous 8 days and write to a file...
> if day_count == 8:
> for i in xrange(numrows):
> for j in xrange(numcols):
> if count[i,j] > 0:
> lst[i,j] = lst[i,j] / count[i,j]
> else:
> lst[i,j] = -999.0
>
> day_count = 0
> doy += 8
>
> lst, count = write_outputs(lst, count, year, doy)
>
> # it is possible we didn't have enough slices to do the last 8day avg...
> # but if we have more than one day we shall use it
> # need to average previous 8 days and write to a file...
> if day_count > 1:
> for i in xrange(numrows):
> for j in xrange(numcols):
> if count[i,j] > 0:
> lst[i,j] = lst[i,j] / count[i,j]
> else:
> lst[i,j] = -999.0
>
> day_count = 0
> doy += 8
>
> lst, count = write_outputs(lst, count, year, doy)
>
> def write_outputs(lst, count, year, doy):
> path = "/users/eow/mgdk/research/HOFF_plots/LST/8dayLST"
> outfile = "lst_8day1030am_" + str(year) + str(doy) + ".gra"
>
> try:
> of = open(os.path.join(path, outfile), 'wb')
> except IOError:
> print "Cannot open outfile for write", outfile
> sys.exit(1)
>
> outfile2 = "pixelcount_8day1030am_" + str(year) + str(doy) + ".gra"
> try:
> of2 = open(os.path.join(path, outfile2), 'wb')
> except IOError:
> print "Cannot open outfile for write", outfile2
> sys.exit(1)
>
> # empty stuff and zero 8day counts
> lst.tofile(of)
> count.tofile(of2)
> of.close()
> of2.close()
> lst = 0.0
> count = 0.0
>
> return lst, count
>
>
> if __name__ == "__main__":
>
> numrows = 332
> numcols = 667
>
> path = "/users/eow/mgdk/research/HOFF_plots/LST/gridded_03/"
> lst = np.zeros((numrows, numcols), dtype=np.float32)
> count = np.zeros((numrows, numcols), dtype=np.int)
> averageEightDays('lst_scr_2006*.gra', lst, count, numrows, numcols)
>
> lst = 0.0
> count = 0.0
> averageEightDays('lst_scr_2007*.gra', lst, count, numrows, numcols)
>
>
>
More information about the NumPy-Discussion
mailing list