[Python-Dev] "groupby" iterator

Hye-Shik Chang perky at i18n.org
Fri Nov 28 14:54:20 EST 2003


On Thu, Nov 27, 2003 at 01:49:40PM -0800, Guido van Rossum wrote:
> 
> Thanks!  Here's a class version of the same, which strikes me as
> slightly easier to understand (though probably slower due to all the
> instance variable access).  It may serve as an easier model for a C
> implementation.  I decided not to deal explicitly with out-of-order
> access; if the caller doesn't play by the rules, some of their groups
> will be split and jumbled, but each split group will still have
> matching keys.

Here's yet another implementation for itertoolsmodule.c. (see
attachment)
I wrote it after the shower (really!) :)


Regards,
  Hye-Shik
-------------- next part --------------
Index: Modules/itertoolsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v
retrieving revision 1.26
diff -u -r1.26 itertoolsmodule.c
--- Modules/itertoolsmodule.c	12 Nov 2003 14:32:26 -0000	1.26
+++ Modules/itertoolsmodule.c	28 Nov 2003 19:46:43 -0000
@@ -2081,6 +2081,272 @@
 };
 
 
+/* groupby object ***********************************************************/
+
+typedef struct {
+	PyObject_HEAD
+	PyObject *it;
+	PyObject *key;
+	PyObject *oldvalue;
+	PyObject *oldkey;
+} groupbyobject;
+
+static PyTypeObject groupby_type;
+static PyObject *_itergroup_create(groupbyobject *);
+
+static PyObject *
+groupby_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	groupbyobject *gbo;
+	PyObject *it, *key;
+
+	if (!PyArg_ParseTuple(args, "OO:groupby", &key, &it))
+		return NULL;
+
+	if (!PyCallable_Check(key)) {
+		PyErr_SetString(PyExc_ValueError,
+		   "Key argument must be a callable object.");
+		return NULL;
+	}
+
+	gbo = (groupbyobject *)type->tp_alloc(type, 0);
+	if (gbo == NULL)
+		return NULL;
+	gbo->oldvalue = NULL;
+	gbo->oldkey = NULL;
+	gbo->key = key;
+	Py_INCREF(key);
+	gbo->it = PyObject_GetIter(it);
+	if (it == NULL) {
+		Py_DECREF(gbo);
+		return NULL;
+	}
+	return (PyObject *)gbo;
+}
+
+static void
+groupby_dealloc(groupbyobject *gbo)
+{
+	PyObject_GC_UnTrack(gbo);
+	Py_XDECREF(gbo->it);
+	Py_XDECREF(gbo->key);
+	Py_XDECREF(gbo->oldvalue);
+	Py_XDECREF(gbo->oldkey);
+	gbo->ob_type->tp_free(gbo);
+}
+
+static int
+groupby_traverse(groupbyobject *gbo, visitproc visit, void *arg)
+{
+	int err;
+
+	if (gbo->it) {
+		err = visit(gbo->it, arg);
+		if (err)
+			return err;
+	}
+
+	if (gbo->key) {
+		err = visit(gbo->key, arg);
+		if (err)
+			return err;
+	}
+
+	if (gbo->oldvalue) {
+		err = visit(gbo->oldvalue, arg);
+		if (err)
+			return err;
+	}
+
+	if (gbo->oldkey) {
+		err = visit(gbo->oldkey, arg);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static PyObject *
+groupby_next(groupbyobject *gbo)
+{
+	if (gbo->oldvalue == NULL) {
+		gbo->oldvalue = PyIter_Next(gbo->it);
+		if (gbo->oldvalue == NULL)
+			return NULL;
+	}
+
+	return _itergroup_create(gbo);
+}
+
+PyDoc_STRVAR(groupby_doc,
+"groupby(key, iterable) -> create an iterator which returns sub-iterators\n\
+grouped by key(value).\n");
+
+static PyTypeObject groupby_type = {
+	PyObject_HEAD_INIT(NULL)
+	0,				/* ob_size */
+	"itertools.groupby",		/* tp_name */
+	sizeof(groupbyobject),		/* tp_basicsize */
+	0,				/* tp_itemsize */
+	/* methods */
+	(destructor)groupby_dealloc,	/* tp_dealloc */
+	0,				/* tp_print */
+	0,				/* tp_getattr */
+	0,				/* tp_setattr */
+	0,				/* tp_compare */
+	0,				/* tp_repr */
+	0,				/* tp_as_number */
+	0,				/* tp_as_sequence */
+	0,				/* tp_as_mapping */
+	0,				/* tp_hash */
+	0,				/* tp_call */
+	0,				/* tp_str */
+	PyObject_GenericGetAttr,	/* tp_getattro */
+	0,				/* tp_setattro */
+	0,				/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+		Py_TPFLAGS_BASETYPE,	/* tp_flags */
+	groupby_doc,			/* tp_doc */
+	(traverseproc)groupby_traverse,	/* tp_traverse */
+	0,				/* tp_clear */
+	0,				/* tp_richcompare */
+	0,				/* tp_weaklistoffset */
+	PyObject_SelfIter,		/* tp_iter */
+	(iternextfunc)groupby_next,	/* tp_iternext */
+	0,				/* tp_methods */
+	0,				/* tp_members */
+	0,				/* tp_getset */
+	0,				/* tp_base */
+	0,				/* tp_dict */
+	0,				/* tp_descr_get */
+	0,				/* tp_descr_set */
+	0,				/* tp_dictoffset */
+	0,				/* tp_init */
+	0,				/* tp_alloc */
+	groupby_new,			/* tp_new */
+	PyObject_GC_Del,		/* tp_free */
+};
+
+
+/* _itergroup object (internal) **********************************************/
+
+typedef struct {
+	PyObject_HEAD
+	PyObject *parent;
+} _itergroupobject;
+
+static PyTypeObject _itergroup_type;
+
+static PyObject *
+_itergroup_create(groupbyobject *parent)
+{
+	_itergroupobject *igo;
+
+	igo = PyObject_New(_itergroupobject, &_itergroup_type);
+	if (igo == NULL)
+		return PyErr_NoMemory();
+	igo->parent = (PyObject *)parent;
+	Py_INCREF(parent);
+
+	return (PyObject *)igo;
+}
+
+static void
+_itergroup_dealloc(_itergroupobject *igo)
+{
+	Py_XDECREF(igo->parent);
+	PyObject_Del(igo);
+}
+
+static PyObject *
+_itergroup_next(_itergroupobject *igo)
+{
+	groupbyobject *gbo = (groupbyobject *)igo->parent;
+	PyObject *value, *newkey;
+	int rcmp;
+
+	if (gbo->oldvalue != NULL) {
+		value = gbo->oldvalue;
+		gbo->oldvalue = NULL;
+	} else {
+		value = PyIter_Next(gbo->it);
+		if (value == NULL)
+			return NULL;
+	}
+
+	newkey = PyObject_CallFunctionObjArgs(gbo->key, value, NULL);
+	if (newkey == NULL) {
+		/* throw the value away because it may fail on next iteration
+		 * trial again. */
+		Py_DECREF(value);
+		return NULL;
+	}
+
+	if (gbo->oldkey == NULL) {
+		gbo->oldkey = newkey;
+		return value;
+	} else if (PyObject_Cmp(gbo->oldkey, newkey, &rcmp) == -1) {
+		Py_DECREF(newkey);
+		return NULL;
+	}
+
+	if (rcmp == 0) {
+		Py_DECREF(newkey);
+		return value;
+	} else {
+		Py_DECREF(gbo->oldkey);
+		gbo->oldkey = newkey;
+		gbo->oldvalue = value;
+		return NULL;
+	}
+}
+
+static PyTypeObject _itergroup_type = {
+	PyObject_HEAD_INIT(NULL)
+	0,				/* ob_size */
+	"itertools._itergroup",		/* tp_name */
+	sizeof(_itergroupobject),	/* tp_basicsize */
+	0,				/* tp_itemsize */
+	/* methods */
+	(destructor)_itergroup_dealloc,	/* tp_dealloc */
+	0,				/* tp_print */
+	0,				/* tp_getattr */
+	0,				/* tp_setattr */
+	0,				/* tp_compare */
+	0,				/* tp_repr */
+	0,				/* tp_as_number */
+	0,				/* tp_as_sequence */
+	0,				/* tp_as_mapping */
+	0,				/* tp_hash */
+	0,				/* tp_call */
+	0,				/* tp_str */
+	PyObject_GenericGetAttr,	/* tp_getattro */
+	0,				/* tp_setattro */
+	0,				/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,		/* tp_flags */
+	0,				/* tp_doc */
+	0, 				/* tp_traverse */
+	0,				/* tp_clear */
+	0,				/* tp_richcompare */
+	0,				/* tp_weaklistoffset */
+	PyObject_SelfIter,		/* tp_iter */
+	(iternextfunc)_itergroup_next,	/* tp_iternext */
+	0,				/* tp_methods */
+	0,				/* tp_members */
+	0,				/* tp_getset */
+	0,				/* tp_base */
+	0,				/* tp_dict */
+	0,				/* tp_descr_get */
+	0,				/* tp_descr_set */
+	0,				/* tp_dictoffset */
+	0,				/* tp_init */
+	0,				/* tp_alloc */
+	0,				/* tp_new */
+	_PyObject_Del,			/* tp_free */
+};
+
+
 /* module level code ********************************************************/
 
 PyDoc_STRVAR(module_doc,
@@ -2103,6 +2369,7 @@
 chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
 takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
 dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\
+groupby(key, iterable) --> iterates iteraters by group\n\
 ");
 
 
@@ -2130,6 +2397,7 @@
 		&count_type,
 		&izip_type,
 		&repeat_type,
+		&groupby_type,
 		NULL
 	};
 


More information about the Python-Dev mailing list