Hi numpy list!
I am trying to do some image processing on a number of images, 72 to be specific. I am seeing the python memory usage continually increase. I will say this is being done also using arcpy so that COULD be an issue, but want to rule out scipy
first since I am only using arcpy to grab a set of polygons and convert them to a binary image, and to save images to disk. I pass the raster to my numpy/scipy processing. The code is organized as:
1)
Main loop iterating over features
2)
Function which does image processing using scipy
3)
Class which implements some of the image processing features
I placed 2 sleep calls in the code.. one at the end of the image processing function and one at the end of each iteration in the main loop. I am seeing a 20MB memory increase each iteration while looking at my processes in Task Manager:
Iteration 1: 204,312K and 209,908K, respectively
Iteration 2: 233,728K and 230,192K, respectively
Iteration 3: 255,676K and 250,600K, respectively
Any ideas?
Thanks so much!
Joe
1)
Main loop
#for each feature class, convert to raster by "Value" and reclass to 1 and 0
temp_raster = os.path.join(scratchWorkspace,"temp_1.tif")
temp_raster_elev = os.path.join(scratchWorkspace,"temp_elev.tif")
temp_reclass = os.path.join(scratchWorkspace,"temp_reclass.tif")
remap = arcpy.sa.RemapValue([[1,1], ["NoData",0]])
for i,fc in enumerate(potWaterFeatNames):
#try to delete the temp rasters in case they exist
try:
#delete temp rasters
arcpy.Delete_management(temp_raster)
arcpy.Delete_management(temp_raster_elev)
arcpy.Delete_management(temp_reclass)
except e:
arcpy.AddMessage(e.message())
#clear in memory workspace"
arcpy.Delete_management("in_memory/temp_table")
#print "on feature {} of {}".format(i+1, len(potWaterFeatNames))
arcpy.AddMessage("on feature {} of {}".format(i+1, len(potWaterFeatNames)))
arcpy.AddMessage("fc = {}".format(fc))
arcpy.PolygonToRaster_conversion(fc, "Value", temp_raster, "", "", 1)
outReclass = arcpy.sa.Reclassify(temp_raster,"Value",remap)
outReclass.save(temp_reclass)
del outReclass
#call function to process connected components
try:
proc_im = processBinaryImage(temp_reclass, scratchWorkspace, sm_pixels)
except Exception as e:
print "FAILED! scipy image processing"
arcpy.AddMessage(e.message())
#convert processed raster to polygons
try:
out_fc = os.path.join(outWorkspace,fc + "_cleaned")
arcpy.RasterToPolygon_conversion(proc_im, out_fc)
arcpy.Delete_management(proc_im)
except Exception as e:
print "FAILED! converting cleaned binary raster to polygon"
arcpy.AddMessage(e.message())
#delete zero value polyons, gridcode = 0
try:
uc = arcpy.UpdateCursor(out_fc, "gridcode=0",arcpy.Describe(out_fc).spatialReference)
#loop through 0 rows and delete
for row in uc:
uc.deleteRow(row)
del uc
except Exception as e:
print "FAILED! deleting rows from table"
arcpy.AddMessage(e.message())
#check that number of polygons with gridcode = 1 is greater than 0
count = arcpy.GetCount_management(out_fc)
#add elevation field back in
if (count > 0):
arcpy.PolygonToRaster_conversion(fc, "z_val", temp_raster_elev, "", "", 1)
arcpy.sa.ZonalStatisticsAsTable(out_fc, "Id", temp_raster_elev, "in_memory/temp_table","#","MEAN")
arcpy.JoinField_management(out_fc, "Id", "in_memory/temp_table", "Id", ["MEAN"])
else:
arcpy.Delete_management(out_fc)
#delete temp rasters
arcpy.Delete_management(temp_raster)
arcpy.Delete_management(temp_raster_elev)
arcpy.Delete_management(temp_reclass)
#python garbage collection
#collected = gc.collect()
#print "Garbage collector: collected %d objects." % (collected)
print "sleeping for 10 seconds"
time.sleep(10)
2)
Image processing function
def processBinaryImage(imageName, save_dir, sm_pixels):
fname = os.path.basename(imageName)
imRaster = arcpy.Raster(imageName)
#Grab AGS info from image
#use describe module to grab info
descData = arcpy.Describe(imRaster)
cellSize = descData.meanCellHeight
extent = descData.Extent
spatialReference = descData.spatialReference
pnt = arcpy.Point(extent.XMin, extent.YMin)
del imRaster
converter = ConvertRasterNumpy();
imArray = converter.rasterToNumpyArray(imageName)
imMin = np.min(imArray)
imMax = np.max(imArray)
print imMin, imMax
arcpy.AddMessage("image min: " + str(imMin))
arcpy.AddMessage("image max: " + str(imMax));
#other flags
show_image = False #verbose
save_flag = False #under verbose
gray = False #threshold but keep gray levels
make_binary = False #flag if grayscale is true, to binarize final image
save_AGS_rasters = True
#create filter object
filter = thresholdMedianFilter()
lowValue = 0
tImage = filter.thresholdImage(imArray, lowValue, gray)
#median filter image
filtImage1 = filter.medianFiltImage(tImage,1)
#create structuring element for morphological operations
sElem = np.array([[0., 1., 0.],
[1., 1., 1.],
[0., 1., 0.]])
#open filtered image
gray = False
mImage = filter.mOpenImage(filtImage1, sElem, gray)
#set list to store info change
num_its = 100
the_change = np.zeros(num_its)
for it in range(100):
prev = mImage
filtImage = filter.medianFiltImage(mImage,1)
mImage = filter.mOpenImage(filtImage, sElem, gray)
#calculate difference
(m_info, z_info) = filter.calcInfoChange(prev, mImage)
the_change[it] = z_info
del filtImage
del prev
#if the change is less than 5% of the initial change, exit
cur_perc = the_change[it]/the_change[0]*100
if cur_perc < 5.0:
print "exiting filter on iteration " + str(it)
print "change is less than 5% (this iteration: " + str(cur_perc) + ")"
break
############################################################################
# now we have a binary mask. Let's find and label the connected components #
############################################################################
#clear some space
del tImage
del filtImage1
del m_info
label_im_init, nb_labels = ndimage.label(mImage)
#Compute size, mean_value, etc. of each region:
label_im = label_im_init.copy()
del label_im_init
sizes = ndimage.sum(mImage, label_im, range(nb_labels + 1))
mean_vals = ndimage.sum(imArray, label_im, range(1, nb_labels + 1))
#clean up small components
mask_size = sizes < sm_pixels
remove_pixel = mask_size[label_im]
label_im[remove_pixel] = 0;
labels = np.unique(label_im)
label_im = np.searchsorted(labels, label_im)
#make label image to a binary image, and convert to arcgis raster
label_im[label_im > 0] = 1
label_im = np.array(label_im, dtype = 'float32')
print label_im.dtype
saveit = False
if ~saveit:
outRaster = save_dir + "\\" + fname[:-4] + "_CC_" + str(sm_pixels) + ".tif"
temp = arcpy.NumPyArrayToRaster(label_im,pnt,cellSize,cellSize)
arcpy.DefineProjection_management(temp, spatialReference)
arcpy.CopyRaster_management(temp, outRaster, "DEFAULTS","0","","","","8_BIT_UNSIGNED")
#clear more space
del mImage
del nb_labels
del sizes
del mean_vals
del mask_size
del remove_pixel
del label_im
del labels
del temp
del the_change
del sElem
del filter
del imArray
print 'sleeping'
time.sleep(20)
return outRaster
3)
Image processing class
class thresholdMedianFilter():
def thresholdImage(self, imArray, thresh, binary = True):
""" threshold the image. values equal or below thresh are 0, above are 1"""
tImage = imArray
tImage[tImage <= thresh] = 0
if binary:
tImage[tImage > thresh] = 1
return tImage
def medianFiltImage(self,imArray,n=1):
"""median filter the image n amount of times. a single time is the default"""
for n in range(n):
prev = imArray
imArray = signal.medfilt2d(prev)
del prev
return imArray
def mOpenImage(self,imArray, sElem, gray = False):
""" morphological opening """
#Mnp = np.array(morph.binary_erosion(imArray, sElem))
#Mnp = np.array(morph.binary_dilation(Mnp, sElem))
if gray:
imArray1 = np.array(morph.grey_dilation(imArray, structure = sElem))
imArray2 = np.array(morph.grey_erosion(imArray1, structure = sElem), dtype = 'float32')
del imArray1
return imArray2
else:
Mnp1 = np.array(morph.binary_dilation(imArray, sElem))
Mnp2 = np.array(morph.binary_erosion(Mnp1, sElem), dtype = 'float32')
del Mnp1
return Mnp2
def calcInfoChange(self,imArray1, imArray2):
"""calculate entropy of an image"""
diff = imArray1 - imArray2
m_norm = sum(abs(diff)) #Manhattan Norm
z_norm = norm(diff.ravel(), 0) #Zero norm
del diff
return (m_norm, z_norm)
Joe McGlinchy | Imagery Scientist
Database Services – 3D and Imagery Team
ESRI
|| 380 New York St.
|| Redlands, CA
92373 || USA
T 909-793-2853, ext. 4783