[Python-Dev] "groupby" iterator
Hye-Shik Chang
perky at i18n.org
Fri Nov 28 14:54:20 EST 2003
On Thu, Nov 27, 2003 at 01:49:40PM -0800, Guido van Rossum wrote:
>
> Thanks! Here's a class version of the same, which strikes me as
> slightly easier to understand (though probably slower due to all the
> instance variable access). It may serve as an easier model for a C
> implementation. I decided not to deal explicitly with out-of-order
> access; if the caller doesn't play by the rules, some of their groups
> will be split and jumbled, but each split group will still have
> matching keys.
Here's yet another implementation for itertoolsmodule.c. (see
attachment)
I wrote it after the shower (really!) :)
Regards,
Hye-Shik
-------------- next part --------------
Index: Modules/itertoolsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v
retrieving revision 1.26
diff -u -r1.26 itertoolsmodule.c
--- Modules/itertoolsmodule.c 12 Nov 2003 14:32:26 -0000 1.26
+++ Modules/itertoolsmodule.c 28 Nov 2003 19:46:43 -0000
@@ -2081,6 +2081,272 @@
};
+/* groupby object ***********************************************************/
+
+typedef struct {
+ PyObject_HEAD
+ PyObject *it;
+ PyObject *key;
+ PyObject *oldvalue;
+ PyObject *oldkey;
+} groupbyobject;
+
+static PyTypeObject groupby_type;
+static PyObject *_itergroup_create(groupbyobject *);
+
+static PyObject *
+groupby_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ groupbyobject *gbo;
+ PyObject *it, *key;
+
+ if (!PyArg_ParseTuple(args, "OO:groupby", &key, &it))
+ return NULL;
+
+ if (!PyCallable_Check(key)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Key argument must be a callable object.");
+ return NULL;
+ }
+
+ gbo = (groupbyobject *)type->tp_alloc(type, 0);
+ if (gbo == NULL)
+ return NULL;
+ gbo->oldvalue = NULL;
+ gbo->oldkey = NULL;
+ gbo->key = key;
+ Py_INCREF(key);
+ gbo->it = PyObject_GetIter(it);
+ if (it == NULL) {
+ Py_DECREF(gbo);
+ return NULL;
+ }
+ return (PyObject *)gbo;
+}
+
+static void
+groupby_dealloc(groupbyobject *gbo)
+{
+ PyObject_GC_UnTrack(gbo);
+ Py_XDECREF(gbo->it);
+ Py_XDECREF(gbo->key);
+ Py_XDECREF(gbo->oldvalue);
+ Py_XDECREF(gbo->oldkey);
+ gbo->ob_type->tp_free(gbo);
+}
+
+static int
+groupby_traverse(groupbyobject *gbo, visitproc visit, void *arg)
+{
+ int err;
+
+ if (gbo->it) {
+ err = visit(gbo->it, arg);
+ if (err)
+ return err;
+ }
+
+ if (gbo->key) {
+ err = visit(gbo->key, arg);
+ if (err)
+ return err;
+ }
+
+ if (gbo->oldvalue) {
+ err = visit(gbo->oldvalue, arg);
+ if (err)
+ return err;
+ }
+
+ if (gbo->oldkey) {
+ err = visit(gbo->oldkey, arg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static PyObject *
+groupby_next(groupbyobject *gbo)
+{
+ if (gbo->oldvalue == NULL) {
+ gbo->oldvalue = PyIter_Next(gbo->it);
+ if (gbo->oldvalue == NULL)
+ return NULL;
+ }
+
+ return _itergroup_create(gbo);
+}
+
+PyDoc_STRVAR(groupby_doc,
+"groupby(key, iterable) -> create an iterator which returns sub-iterators\n\
+grouped by key(value).\n");
+
+static PyTypeObject groupby_type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "itertools.groupby", /* tp_name */
+ sizeof(groupbyobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)groupby_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+ Py_TPFLAGS_BASETYPE, /* tp_flags */
+ groupby_doc, /* tp_doc */
+ (traverseproc)groupby_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)groupby_next, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ groupby_new, /* tp_new */
+ PyObject_GC_Del, /* tp_free */
+};
+
+
+/* _itergroup object (internal) **********************************************/
+
+typedef struct {
+ PyObject_HEAD
+ PyObject *parent;
+} _itergroupobject;
+
+static PyTypeObject _itergroup_type;
+
+static PyObject *
+_itergroup_create(groupbyobject *parent)
+{
+ _itergroupobject *igo;
+
+ igo = PyObject_New(_itergroupobject, &_itergroup_type);
+ if (igo == NULL)
+ return PyErr_NoMemory();
+ igo->parent = (PyObject *)parent;
+ Py_INCREF(parent);
+
+ return (PyObject *)igo;
+}
+
+static void
+_itergroup_dealloc(_itergroupobject *igo)
+{
+ Py_XDECREF(igo->parent);
+ PyObject_Del(igo);
+}
+
+static PyObject *
+_itergroup_next(_itergroupobject *igo)
+{
+ groupbyobject *gbo = (groupbyobject *)igo->parent;
+ PyObject *value, *newkey;
+ int rcmp;
+
+ if (gbo->oldvalue != NULL) {
+ value = gbo->oldvalue;
+ gbo->oldvalue = NULL;
+ } else {
+ value = PyIter_Next(gbo->it);
+ if (value == NULL)
+ return NULL;
+ }
+
+ newkey = PyObject_CallFunctionObjArgs(gbo->key, value, NULL);
+ if (newkey == NULL) {
+ /* throw the value away because it may fail on next iteration
+ * trial again. */
+ Py_DECREF(value);
+ return NULL;
+ }
+
+ if (gbo->oldkey == NULL) {
+ gbo->oldkey = newkey;
+ return value;
+ } else if (PyObject_Cmp(gbo->oldkey, newkey, &rcmp) == -1) {
+ Py_DECREF(newkey);
+ return NULL;
+ }
+
+ if (rcmp == 0) {
+ Py_DECREF(newkey);
+ return value;
+ } else {
+ Py_DECREF(gbo->oldkey);
+ gbo->oldkey = newkey;
+ gbo->oldvalue = value;
+ return NULL;
+ }
+}
+
+static PyTypeObject _itergroup_type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "itertools._itergroup", /* tp_name */
+ sizeof(_itergroupobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)_itergroup_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)_itergroup_next, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+ _PyObject_Del, /* tp_free */
+};
+
+
/* module level code ********************************************************/
PyDoc_STRVAR(module_doc,
@@ -2103,6 +2369,7 @@
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\
+groupby(key, iterable) --> iterates iteraters by group\n\
");
@@ -2130,6 +2397,7 @@
&count_type,
&izip_type,
&repeat_type,
+ &groupby_type,
NULL
};
More information about the Python-Dev
mailing list