# nearest neighbor in 2D

Colin J. Williams cjw at sympatico.ca
Thu Nov 6 13:48:13 CET 2003

```This enquiry has generated a lot of interest.  I thought it might be
useful to try numarray on the problem.  numarray has a compress
function, which is used to discard points which are not of interest.

The code is below.

As would be expected, each scan over the points in the neigbourhood
discards, on average, a little more than half the points.

I have not tried it, but it should be possible, with numarray, to
generalize this from 2D to multimimensional space.

Colin W.

# Neighbour.py
''' To find the closest neighbour, in a neghbourhood P,
to a point p.
'''
import math
import numarray as N
import numarray.numerictypes as _nt
import numarray.random_array as R

trial= 0
lengthRemaining= []
def find(P, p):
''' In the 2D neighbourhood P, find the point closest to p.
The approach is based on the selection of a trial value Pt, from
P, and
discarding all values further than Pt from p.
To avoid repeated sqrt calculations the discard is based on an
enclosing square.
'''
global lengthRemaining, trial
lengthRemaining+= [[P.shape[0]]]
Pz= P - p                             # zero based neighbourhood
while len(Pz):
Pt= Pz[0]                           # trial value
Pta= N.abs(Pt)
Pz= Pz[1:]
pd= math.sqrt(N.dot(Pta, Pta))      # distance of p from the trial
value
goodCases= N.logical_and((Pz < pd),
(Pz > -pd))# use the enclosing square
goodCases= N.logical_and.reduce(goodCases, 1)
Pz= N.compress(goodCases, Pz)  # discard points outside the square
if len(Pz) == 1:
Pt= Pz[0]                         # We have found the closest
Pz= []
lengthRemaining[trial]+= [len(Pz)]
z= 100
trial+= 1
return Pt + p

if __name__ == '__main__':
for sampleSize in range(100, 5000, 100):
P= R.random(shape= (sampleSize, 2))
for i in range(20):
p= R.random((1, 2))                   # point
a= find(P, p)
##      print 'Closest neighbour:', a[0]
##      print 'Check - Point(p):', p[0]
##  check= []
##  for i in range(len(P)):
##    check+= [(math.sqrt((P[i, 0]-p[0, 0])**2 + (P[i, 1]-p[0,
1])**2), P[i, 0], P[i, 1])]
##    print P[i], math.sqrt((P[i, 0]-p[0, 0])**2 + (P[i, 1]-p[0, 1])**2)
##  check.sort()
##  print check[0]
##  print check
print 'Number of scans:', sum([len(lst) for lst in lengthRemaining])
print 'Sample size:', P.shape[0], ' Average numner of scans:',
sum([len(lst) for lst in lengthRemaining])/float(len(lengthRemaining))

WEINHANDL Herbert wrote:
> John Hunter wrote:
>
>> I have a list of two tuples containing x and y coord
>>    (x0, y0)
>>  (x1, y1)
>>  ...
>>  (xn, yn)
>>
>> Given a new point x,y, I would like to find the point in the list
>> closest to x,y.  I have to do this a lot, in an inner loop, and then I
>> add each new point x,y to the list.  I know the range of x and y in
>
>
>> Can anyone point me to some code or module that provides the
>> appropriate data structures and algorithms to handle this task
>> efficiently?  The size of the list will likely be in the range of
>> 10-1000 elements.
>
>
> The plotting-library dislin (http://www.dislin.com)
> contains a really fast triangulation subroutine
> (~1 hour for 700000 points on an 1.5 GHz Athlon).
>
> For an example triangulation/nearest-neighbor-search see
> the attached python-script.
>
> Dislin is available for many platforms (Linux, Winxxx, ...)
> and it can be used for free if you are using free languages like
> Python, Perl, etc.
>
> Happy pythoning
>
> Herbert
>
>
> ------------------------------------------------------------------------
>
> #!/usr/bin/python
>
> # (c) by H. Weinhandl 04.Nov.2003
>
> import math
> import dislin
>
>
> def dist( ia,ib ) :
>     return math.sqrt( (X1[ia]-X1[ib])**2 + (Y1[ia]-Y1[ib])**2 )
>
>
> def find_nearest_neighb() :
>
>     print 'extracting neighbours ... ',
>
>     for i in range( nr_dat+3 ) :
> 	# initualize list wit the point i itself
>         neighb.append( [i] )
>
>     for i in range( nr_tri ) :
> 	# get a indes-triple, dislin seems to start indices with 1,
> 	# so I'm subtracting 1 to get a zero-based index
> 	U,V,W = I1[i]-1, I2[i]-1, I3[i]-1
>
> 	# add indices from all triangles, which contain the point itself
>         if (	     U in neighb[U] ) :
>     	    if  not (V in neighb[U] ) : neighb[U].append( V ) # do not append V if already in the list
> 	    if  not (W in neighb[U] ) : neighb[U].append( W ) # do not append W if already in the list
>
> 	if (         V in neighb[V] ) :
> 	    if  not (U in neighb[V] ) : neighb[V].append( U ) # do not append U if already in the list
> 	    if  not (W in neighb[V] ) : neighb[V].append( W ) # do not append W if already in the list
>
> 	if (	     W in neighb[W] ) :
> 	    if  not (U in neighb[W] ) : neighb[W].append( U ) # do not append U if already in the list
> 	    if  not (V in neighb[W] ) : neighb[W].append( V ) # do not append V if already in the list
>
>     print ' OK'
>     for i in range( nr_dat ) :
> 	neighb[i].remove( i ) # remove the point i itself
> 	neighb[i].sort()
> 	r_mi = 9.9e9
> 	i_mi = i
>
> 	# search for the nearest neighbor of point i
> 	for j in neighb[i]  :
> 	    r = dist( i, j )
> 	    if r < r_mi :
> 		r_mi = r
> 		i_mi = j
> 	print 'NB %2d : r_mi=%5.3f, i_mi=%2d  '% (i, r_mi, i_mi), neighb[i]
>
>
>
>
> if __name__ == '__main__' :
>
>     X1     = [ 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0 ]
>     Y1     = [ 1.0, 6.0, 3.0, 3.0, 2.0, 6.0, 0.0 ]
>     nr_dat = len( X1 )
>     nr_max = 2 * nr_dat + 1
>     I1     = [ 0 ] * nr_max
>     I2     = [ 0 ] * nr_max
>     I3     = [ 0 ] * nr_max
>     neighb = [ ]
>
>     # padding the 2 input-lists with 3 additional elements is required by dislin
>     X1.append( 0 )
>     X1.append( 0 )
>     X1.append( 0 )
>     Y1.append( 0 )
>     Y1.append( 0 )
>     Y1.append( 0 )
>
>
>     # delaunay triangulation of the input lists
>     # I1,I2,I3 are the indices of the triangular edge-points
>     nr_tri = dislin.triang( X1,Y1, nr_dat, I1,I2,I3, nr_max )
>
>     print X1
>     print Y1
>     print nr_dat, nr_tri, nr_max
>     print I1
>     print I2
>     print I3
>
>     find_nearest_neighb()
>
> #---- end ----

```