Hello All,<br>I am still having trouble with memory errors when I try to process many netcdf files.<br>Originally I would get the memory error as mentioned in the previous post but when I added gc.collect() after each for loop I receive the error:<br>
GEOS_ERROR: bad allocation<br>with no additional information!<br>The error use to occur at the point when a new netcdf file was to be opened and plotted but with the things I have 'fixed' thanks to suggestions from this list it seems to happen while processing the second file. <br>
I am just trying to plot 3hourly data for each file and each file contains hourly data for a month and I am trying to do this for many months. <br>It seems like I cannot close down the last file properly so the computer has a clean memory to start the next one.<br>
Any feedback will be greatly appreciated.<br>My latest version of the code:<br><br>######################<br><br>from netCDF4 import Dataset<br>import numpy as N<br>import matplotlib.pyplot as plt<br>from numpy import ma as MA<br>
from mpl_toolkits.basemap import Basemap<br>from netcdftime import utime<br>from datetime import datetime<br>import os<br><br><br>shapefile1="E:/DSE_BushfireClimatologyProject/griddeddatasamples/test_GIS/DSE_REGIONS"<br>
OutputFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/OutputsforValidation"<br><br>def plotrawdata(variable):<br>        if variable=='TSFC':<br>                ncvariablename='T_SFC'<br>
                MainFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/T_SFC/"<br>                ticks=[-5,0,5,10,15,20,25,30,35,40,45,50]<br>                Title='Surface Temperature'<br>
                cmap=plt.cm.jet<br>                <br>        elif variable=='RHSFC':<br>                ncvariablename='RH_SFC'<br>                MainFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/RH_SFC/"<br>
                ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]<br>                Title='Surface RH'<br>                cmap=plt.cm.jet_r<br><br>        fileforlatlon=Dataset("E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/T_SFC/TSFC_1974_01/IDZ00026_VIC_ADFD_T_SFC.nc", 'r+', 'NETCDF4')<br>
        LAT=fileforlatlon.variables['latitude'][:]<br>        LON=fileforlatlon.variables['longitude'][:] <br><br>        startperiod=raw_input("Start slice (e.g. 1 ): ")<br>        endperiod=raw_input("End slice (e.g. 2): ")<br>
        skipperiod=raw_input("skip slice (e.g. 1): ")<br>        if startperiod == "":<br>                startperiod = None<br>        else:<br>                startperiod = int(startperiod)<br>        if endperiod == "":<br>
                endperiod = None<br>        else:<br>                endperiod = int(endperiod)<br>        if skipperiod == "":<br>                skipperiod = None<br>        else:<br>                skipperiod= int(skipperiod)       <br>
<br>        for (path, dirs, files) in os.walk(MainFolder):<br>                        for dir in dirs:<br>                                print dir<br>                        path=path+'/'<br>                 <br>
                        for ncfile in files:<br>                                if ncfile[-3:]=='.nc':<br>                                    print "dealing with ncfiles:", path+ncfile<br>                                    ncfile=os.path.join(path,ncfile)<br>
                                    ncfile=Dataset(ncfile, 'r+', 'NETCDF4')<br>                                    #global TSFC<br>                                    variable=ncfile.variables[ncvariablename][startperiod:endperiod:skipperiod]<br>
                                    TIME=ncfile.variables['time'][startperiod:endperiod:skipperiod]<br>                                    fillvalue=ncfile.variables[ncvariablename]._FillValue<br>                                    ncfile.close()<br>
<br>                                    for variable, TIME in zip((variable[:]),(TIME[:])):<br>                                    #for variable, TIME in zip((variable[sliceperiod]),(TIME[sliceperiod])):<br><br>                                            cdftime=utime('seconds since 1970-01-01 00:00:00')<br>
                                            ncfiletime=cdftime.num2date(TIME)<br>                                            print ncfiletime<br>                                            timestr=str(ncfiletime)<br>                                            d = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')<br>
                                            date_string = d.strftime('%Y%m%d_%H%M')<br>                                            #Set up basemap using mercator projection <a href="http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html">http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html</a><br>
                                            map = Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=-33,<br>                                                              llcrnrlon=139.0,urcrnrlon=151.0,lat_ts=0,resolution='i')<br>
                                            x,y=map(*N.meshgrid(LON,LAT))<br>                                            map.drawcoastlines(linewidth=0.5)<br>                                            map.readshapefile(shapefile1, 'DSE_REGIONS')<br>
                                            map.drawstates()<br>            <br>                                            plt.title(Title+' %s UTC'%ncfiletime)<br>                                                <br>
                                            CS = map.contourf(x,y,variable, ticks, cmap=cmap)<br>                                            l,b,w,h =0.1,0.1,0.8,0.8<br>                                            cax = plt.axes([l+w+0.025, b, 0.025, h], )<br>
                                            cbar=plt.colorbar(CS, cax=cax, drawedges=True)<br>                    <br>                                            #save map as *.png and plot netcdf file<br>                                            plt.savefig((os.path.join(OutputFolder, ncvariablename+date_string+'UTC.png')))<br>
                                            #plt.show()<br>                                            plt.close() <br><br><br>######################<br><br><br><br><div class="gmail_quote">On Wed, Sep 14, 2011 at 4:08 PM, questions anon <span dir="ltr"><<a href="mailto:questions.anon@gmail.com">questions.anon@gmail.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin: 0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;">Hello All,<br>I keep coming across a memory error when processing many netcdf files. I assume it has something to do with how I loop things and maybe need to close things off properly.<br>
In the code below I am looping through a bunch of netcdf files (each file is hourly data for one month) and within each netcdf file I am outputting a *png file every three hours. <br>
This works for one netcdf file but when it begins to process the next netcdf file I receive this memory error:<br><br><i>Traceback (most recent call last):<br>  File "d:/plot_netcdf_merc_multiplot_across_multifolders_mkdirs_memoryerror.py", line 44, in <module><br>

    TSFC=ncfile.variables['T_SFC'][:]<br>  File "netCDF4.pyx", line 2473, in netCDF4.Variable.__getitem__ (netCDF4.c:23094)<br>MemoryError</i><br><br>To reduce processing requirements I have tried making the LAT and LON to only use [0] but I also receive an error:<br>

<br><i>Traceback (most recent call last):<br>  File "d:/plot_netcdf_merc_multiplot_across_multifolders_mkdirs_memoryerror.py", line 75, in <module><br>    x,y=map(*N.meshgrid(LON,LAT))<br>  File "C:\Python27\lib\site-packages\numpy\lib\function_base.py", line 3256, in meshgrid<br>

    numRows, numCols = len(y), len(x)  # yes, reversed<br>TypeError: len() of unsized object</i><br><br>finally I have added gc.collect() in a couple of places but that doesn't seem to do anything to help.<br>I am using :<i>Python 2.7.2 |EPD 7.1-2 (32-bit)| (default, Jul  3 2011, 15:13:59) [MSC v.1500 32 bit (Intel)] on win32</i><br>

Any feedback will be greatly appreciated!<br><br><br>from netCDF4 import Dataset<br>import numpy<br>import numpy as N<br>import matplotlib.pyplot as plt<br>from numpy import ma as MA<br>from mpl_toolkits.basemap import Basemap<br>

from netcdftime import utime<br>from datetime import datetime<br>import os<br>import gc<br><br>print "start processing...."<br><br>inputpath=r'E:/GriddedData/Input/'<br>outputpath=r'E:/GriddedData/Validation/'<br>

shapefile1="E:/test_GIS/DSE_REGIONS"<br>for (path, dirs, files) in os.walk(inputpath):<br>    for dir in dirs:<br>        print dir<br>        sourcepath=os.path.join(path,dir)<br>        relativepath=os.path.relpath(sourcepath,inputpath)<br>

        newdir=os.path.join(outputpath,relativepath)<br>        if not os.path.exists(newdir):<br>            os.makedirs(newdir)<br>            <br>    for ncfile in files:<br>        if ncfile[-3:]=='.nc':<br>            print "dealing with ncfiles:", ncfile<br>

            ncfile=os.path.join(sourcepath,ncfile)<br>            #print ncfile<br>            ncfile=Dataset(ncfile, 'r+', 'NETCDF4')<br>            TSFC=ncfile.variables['T_SFC'][:,:,:]<br>            TIME=ncfile.variables['time'][:]<br>

            LAT=ncfile.variables['latitude'][:]<br>            LON=ncfile.variables['longitude'][:]<br>            fillvalue=ncfile.variables['T_SFC']._FillValue<br>            TSFC=MA.masked_values(TSFC, fillvalue)<br>

            ncfile.close()<br>            gc.collect()<br>            print "garbage collected"<br>    <br>      <br>            for TSFC, TIME in zip((TSFC[1::3]),(TIME[1::3])):<br>                print TSFC, TIME<br>

            #convert time from numbers to date and prepare it to have no symbols for saving to filename<br>                cdftime=utime('seconds since 1970-01-01 00:00:00')<br>                ncfiletime=cdftime.num2date(TIME)<br>

                print ncfiletime<br>                timestr=str(ncfiletime)<br>                d = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')<br>                date_string = d.strftime('%Y%m%d_%H%M')<br>

               <br>                #Set up basemap using mercator projection <a href="http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html" target="_blank">http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html</a><br>

                map = Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=-33,<br>                              llcrnrlon=139.0,urcrnrlon=151.0,lat_ts=0,resolution='i')<br><br>            # compute map projection coordinates for lat/lon grid.<br>

                x,y=map(*N.meshgrid(LON,LAT))<br>                map.drawcoastlines(linewidth=0.5)<br>                map.readshapefile(shapefile1, 'DSE_REGIONS')<br>                map.drawstates()<br>    <br>                plt.title('Surface temperature at %s UTC'%ncfiletime)<br>

                ticks=[-5,0,5,10,15,20,25,30,35,40,45,50]<br>                CS = map.contourf(x,y,TSFC, ticks, cmap=plt.cm.jet)<br>                l,b,w,h =0.1,0.1,0.8,0.8<br>                cax = plt.axes([l+w+0.025, b, 0.025, h], )<br>

                cbar=plt.colorbar(CS, cax=cax, drawedges=True)<br>            <br>            #save map as *.png and plot netcdf file<br>                plt.savefig((os.path.join(newdir,'TSFC'+date_string+'UTC.png')))<br>

                plt.close()<br>                gc.collect()<br>                print "garbage collected again"<br>print "end of processing"<br><br><br>
</blockquote></div><br>