Hi All,
I just posted this on the SciPy forum but realised it might be more
appropriate here?
I have a separate text file for daily rainfall data that covers the whole
country. I would like to calculate the monthly mean, min, max and the mean
of the sum for one state.
The mean, max and min are just the mean, max and min for all data in that
month however the sum data needs to work out the total for the month across
the array and then sum that value.
I use gdal tools to mask out the rest of the country and I use numpy tools
for the summary stats.
I can get the max, min and mean for the state, but the mean of the sum
keeps giving me a result for the whole country rather than just the state,
even though I am performing the analysis on the state only data. I am not
sure if this is a masking issue or a numpy calculation issue. The mask
works fine for the other summary statistics.
Any feedback will be greatly appreciated!
import numpy as np
import matplotlib.pyplot as plt
from numpy import ma as MA
from mpl_toolkits.basemap import Basemap
from datetime import datetime as dt
from datetime import timedelta
import os
from StringIO import StringIO
from osgeo import gdal, gdalnumeric, ogr, osr
import glob
import matplotlib.dates as mdates
import sys
shapefile=r"/Users/state.shp"
## Create masked array from shapefile
xmin,ymin,xmax,ymax=[111.975,-
9.975, 156.275,-44.525]
ncols,nrows=[886, 691] #Your rows/cols
maskvalue = 1
xres=(xmax-xmin)/float(ncols)
yres=(ymax-ymin)/float(nrows)
geotransform=(xmin,xres,0,ymax,0, -yres)
0
src_ds = ogr.Open(shapefile)
src_lyr=src_ds.GetLayer()
dst_ds = gdal.GetDriverByName('MEM').Create('',ncols, nrows, 1
,gdal.GDT_Byte)
dst_rb = dst_ds.GetRasterBand(1)
dst_rb.Fill(0) #initialise raster with zeros
dst_rb.SetNoDataValue(0)
dst_ds.SetGeoTransform(geotransform)
err = gdal.RasterizeLayer(dst_ds, [maskvalue], src_lyr)
dst_ds.FlushCache()
mask_arr=dst_ds.GetRasterBand(1).ReadAsArray()
np.set_printoptions(threshold='nan')
mask_arr[mask_arr == 255] = 1
newmask=MA.masked_equal(mask_arr,0)
### calculate monthly summary stats for state Only
rainmax=[]
rainmin=[]
rainmean=[]
rainsum=[]
yearmonthlist=[]
yearmonth_int=[]
errors=[]
OutputFolder=r"/outputfolder"
GLOBTEMPLATE =
r"/daily-rainfall/combined/rainfall-{year}/r{year}{month:02}??.txt"
def accumulate_month(year, month):
files = glob.glob(GLOBTEMPLATE.format(year=year, month=month))
monthlyrain=[]
for ifile in files:
try:
f=np.genfromtxt(ifile,skip_header=6)
except:
print "ERROR with file:", ifile
errors.append(ifile)
f=np.flipud(f)
stateonly_f=np.ma.masked_array(f, mask=newmask.mask) # this masks
data to state
print "stateonly_f:", stateonly_f.max(), stateonly_f.mean(),
stateonly_f.sum()
monthlyrain.append(stateonly_f)
yearmonth=dt(year,month,1)
yearmonthlist.append(yearmonth)
yearmonthint=str(year)+str(month)
d=dt.strptime(yearmonthint, '%Y%m')
print d
date_string=dt.strftime(d,'%Y%m')
yearmonthint=int(date_string)
yearmonth_int.append(yearmonthint)
r_sum=np.sum(monthlyrain, axis=0)
r_mean_of_sum=MA.mean(r_sum)
r_max, r_mean, r_min=MA.max(monthlyrain), MA.mean(monthlyrain),
MA.min(monthlyrain)
rainmax.append(r_max)
rainmean.append(r_mean)
rainmin.append(r_min)
rainsum.append(r_mean_of_sum)
print " state only:", yearmonthint,r_max, r_mean, r_min, r_mean_of_sum