nearest neighbor in 2D

Jack Diederich jack at performancedrivers.com
Tue Nov 4 15:11:32 EST 2003


On Tue, Nov 04, 2003 at 01:02:36PM -0600, John Hunter wrote:
> >>>>> "Ron" == Ron Adam <radam2 at tampabay.rr.com> writes:
> 
>     Ron> I really don't know how you can make this faster.  There
>     Ron> might be a library that has a distance between two points
>     Ron> function that could speed it up.
> 
> If you only had to compare one point to all the other points, then the
> brute force approach -- check every point -- will work great.  This is
> O(N) and I don't think you can beat it.  The idea is that I will be
> repeatedly checking and adding points to the list, so it is worthwhile
> at the outset to set up a data structure which allows a more efficient
> search.
> 
> The analogy is a binary search in 1D.  If you plan to repeatedly
> search a (possibly growing) list of numbers to see whether it contains
> some number or find the nearest neighbor to a number, it is worthwhile
> at the outset to put them in a data structure that allows a binary
> search.  Setting up the initial data structure costs you some time,
> but subsequent searches are O(log2(N)).  See google for 'binary
> search' and the python module bisect.
> 
> So roughly, for a list with 1,000,000 elements, your brute force
> approach requires a million comparisons per search.  If the data is
> setup for binary search, on average only 13-14 comparisons will be
> required.  Well worth the effort if you need to do a lot of searches,
> as I do.
> 
> John Hunter

Breaking into the thread late, I've been busy enough to put down c.l.py
for a couple weeks.

If you only need to compare 10-1000 points, try this approach below.  I
wrote it for the ICFP programming contest where I was sorting lots and
lots of points lots and lots of times.  It sorts a list of points for
their manhattan distance from a particular point.  I tweaked it until
it was just fast enough to do what I wanted.  I won't pretend it is
optimal for all N, just that it was good enough for _my_ N.  The speed
trick is that the point we are sorting around is stored in an object that
is created only once, and then we make the object's __call__ be the
comparison function.

Since it uses the list.sort() method we don't have to do anything more
clever than write our comparison function in C.  I trust the Timbot
has written a better list.sort() than I ever will, so let's use it.

it is used thusly:

import icfp

l = [your big list of point tuples like (1, 10)]
p = (99, 22) # find nearest point in l to p

sort_ob = icfp.manhattan_sort(p) # creates a callable Manhat object that remembers p

l.sort(sort_ob) # sorts around p
print "closest", l[0]
print "farthest", l[-1]

The below code is from my CVS tree, so it should probably work.  It was
written in a rush for a contest (JDH, you may recognize parts that are 
cut-n-pasted from "probstat") so YMMV.

-jack

NB, if there is a boost or pyrex guy listening you might want to
throw in an easilly derived class that makes this [ab]use of __call__ easy.
It was a big performance win for me for very little effort.


""" icfp_module.c """

#include "Python.h"
#include <stdio.h>
#include <stdlib.h>

/*
 * stats module interface
 */

static PyObject *ErrorObject;

PyObject *manhat_new(PyObject *self, PyObject *args);

static PyTypeObject Manhat_Type;

static PyObject *
heur1(PyObject *self, PyObject *args)
{
  PyObject *tup1;
  PyObject *tup2;
  long x,y;

  if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &tup1, &PyTuple_Type, &tup2)) {
    return NULL;
  }
  x = PyInt_AsLong(PyTuple_GET_ITEM(tup1,0)) - PyInt_AsLong(PyTuple_GET_ITEM(tup2,0));
  y = PyInt_AsLong(PyTuple_GET_ITEM(tup1,1)) - PyInt_AsLong(PyTuple_GET_ITEM(tup2,1));
  return PyInt_FromLong(labs(x * x) + labs(y * y));
}

static PyMethodDef stats_methods[] = {
        {"manhattan", heur1, METH_VARARGS},
        {"manhattan_sort", manhat_new, METH_VARARGS},
        {NULL,          NULL}           /* sentinel */
};

DL_EXPORT(void)
initicfp(void)
{
        PyObject *m, *d;

        PyPQueue_Type.ob_type = &PyType_Type;
        Manhat_Type.ob_type = &PyType_Type;

        /* Create the module and add the functions */
        m = Py_InitModule("icfp", stats_methods);

        /* Add some symbolic constants to the module */
        d = PyModule_GetDict(m);
        ErrorObject = PyErr_NewException("icfp.error", NULL, NULL);
}

""" manhattan.c """
#include "Python.h"
#include <stdio.h>
 
#define ManhatObject_Check(v)    ((v)->ob_type == &Manhat_Type)
 
staticforward PyTypeObject Manhat_Type;
 
typedef struct {
        PyObject_HEAD
        long x;
        long y;
} ManhatObject;


static void
Manhat_dealloc(ManhatObject *self) {
  PyObject_Del(self);
}

static PyObject *
Manhat_call(ManhatObject *self, PyObject *args)
{
  PyObject *tup1;
  PyObject *tup2;
  long a;
  long b;
  long x;
  long y;

  if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &tup1, &PyTuple_Type, &tup2)) {
    return NULL;
  }
  x = PyInt_AsLong(PyTuple_GET_ITEM(tup1,0)) - self->x;
  y = PyInt_AsLong(PyTuple_GET_ITEM(tup1,1)) - self->y;
  a = labs(x * x) + labs(y * y);

  x = PyInt_AsLong(PyTuple_GET_ITEM(tup2,0)) - self->x;
  y = PyInt_AsLong(PyTuple_GET_ITEM(tup2,1)) - self->y;
  b = labs(x * x) + labs(y * y);

  if (a == b)
    return PyInt_FromLong(0);
  else if (a < b)
    return PyInt_FromLong(-1);
  else
    return PyInt_FromLong(1);
}

static PyMethodDef Manhat_methods[] = {
  {NULL,                NULL}           /* sentinel */
};

statichere PyTypeObject Manhat_Type = {
        /* The ob_type field must be initialized in the module init function
         * to be portable to Windows without using C++. */
        PyObject_HEAD_INIT(&PyType_Type)
        0,                      /*ob_size*/
        "Manhat",               /*tp_name*/
        sizeof(ManhatObject),   /*tp_basicsize*/
        0,                      /*tp_itemsize*/
        /* methods */
        (destructor)Manhat_dealloc, /*tp_dealloc*/
        0,                      /*tp_print*/
        0, /*tp_getattr*/
        0, //(setattrfunc)Permute_setattr, /*tp_setattr*/
        0,                      /*tp_compare*/
        0,                      /*tp_repr*/
        0,                      /*tp_as_number*/
        0,                      /*tp_as_sequence*/
        0,                      /*tp_as_mapping*/
        0,                      /*tp_hash*/
        (ternaryfunc)Manhat_call,                      /*tp_call*/
        0,                      /*tp_str*/
        PyObject_GenericGetAttr,                      /*tp_getattro*/
        0,                      /*tp_setattro*/
        0,                      /*tp_as_buffer*/
        Py_TPFLAGS_DEFAULT,     /*tp_flags*/
        0,                      /*tp_doc*/
        0,                      /*tp_traverse*/
        0,                      /*tp_clear*/
        0,                      /*tp_richcompare*/
        0,                      /*tp_weaklistoffset*/
        0,                      /*tp_iter*/
        0,                      /*tp_iternext*/
        Manhat_methods,         /*tp_methods*/
        0,                      /*tp_members*/
        0,                      /*tp_getset*/
        0,                      /*tp_base*/
        0,                      /*tp_dict*/
        0,                      /*tp_descr_get*/
        0,                      /*tp_descr_set*/
        0,                      /*tp_dictoffset*/
        0,                      /*tp_init*/
        0,                      /*tp_alloc*/
        0,                      /*tp_new*/
        0,                      /*tp_free*/
        0,                      /*tp_is_gc*/
};

// not static so ifcp_module.c can see it
PyObject *
manhat_new(PyObject *self, PyObject *args)
{
        ManhatObject *mh;
        PyObject *tup1;
        
        if (!PyArg_ParseTuple(args, "O!", &PyTuple_Type, &tup1)) {
          return NULL;
        }
         // call object create function and return
        mh = PyObject_New(ManhatObject, &Manhat_Type);
        if (mh == NULL)
          return NULL;
 
        mh->x = PyInt_AsLong(PyTuple_GET_ITEM(tup1,0));
        mh->y = PyInt_AsLong(PyTuple_GET_ITEM(tup1,1));
        return (PyObject *)mh;
}

""" setup.py """

from distutils.core import setup, Extension

files = [
           'manhattan.c',
           'icfp_module.c',
        ]

libraries = []
#libraries = ["efence"] # uncomment to use ElectricFence
includes = []
if (__name__ == '__main__'):
  setup(name = "icfp", version = "0.2",
        ext_modules = [Extension("icfp", files,
                                 libraries = libraries,
                                 include_dirs =  includes,
                                )
                      ]
       )






More information about the Python-list mailing list