[Python-checkins] python/dist/src/Modules itertoolsmodule.c, 1.26, 1.27

rhettinger at users.sourceforge.net rhettinger at users.sourceforge.net
Sat Dec 6 11:23:09 EST 2003


Update of /cvsroot/python/python/dist/src/Modules
In directory sc8-pr-cvs1:/tmp/cvs-serv31126/Modules

Modified Files:
	itertoolsmodule.c 
Log Message:
Implement itertools.groupby()

Original idea by Guido van Rossum.
Idea for skipable inner iterators by Raymond Hettinger.
Idea for argument order and identity function default by Alex Martelli.
Implementation by Hye-Shik Chang (with tweaks by Raymond Hettinger).



Index: itertoolsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** itertoolsmodule.c	12 Nov 2003 14:32:26 -0000	1.26
--- itertoolsmodule.c	6 Dec 2003 16:23:06 -0000	1.27
***************
*** 8,11 ****
--- 8,328 ----
  */
  
+ 
+ /* groupby object ***********************************************************/
+ 
+ typedef struct {
+ 	PyObject_HEAD
+ 	PyObject *it;
+ 	PyObject *keyfunc;
+ 	PyObject *tgtkey;
+ 	PyObject *currkey;
+ 	PyObject *currvalue;
+ } groupbyobject;
+ 
+ static PyTypeObject groupby_type;
+ static PyObject *_grouper_create(groupbyobject *, PyObject *);
+ 
+ static PyObject *
+ groupby_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+ {
+ 	static char *kwargs[] = {"iterable", "key", NULL};
+ 	groupbyobject *gbo;
+  	PyObject *it, *keyfunc = Py_None;
+  
+  	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:groupby", kwargs,
+ 					 &it, &keyfunc))
+ 		return NULL;
+ 
+ 	gbo = (groupbyobject *)type->tp_alloc(type, 0);
+ 	if (gbo == NULL)
+ 		return NULL;
+ 	gbo->tgtkey = NULL;
+ 	gbo->currkey = NULL;
+ 	gbo->currvalue = NULL;
+ 	gbo->keyfunc = keyfunc;
+ 	Py_INCREF(keyfunc);
+ 	gbo->it = PyObject_GetIter(it);
+ 	if (gbo->it == NULL) {
+ 		Py_DECREF(gbo);
+ 		return NULL;
+ 	}
+ 	return (PyObject *)gbo;
+ }
+ 
+ static void
+ groupby_dealloc(groupbyobject *gbo)
+ {
+ 	PyObject_GC_UnTrack(gbo);
+ 	Py_XDECREF(gbo->it);
+ 	Py_XDECREF(gbo->keyfunc);
+ 	Py_XDECREF(gbo->tgtkey);
+ 	Py_XDECREF(gbo->currkey);
+ 	Py_XDECREF(gbo->currvalue);
+ 	gbo->ob_type->tp_free(gbo);
+ }
+ 
+ static int
+ groupby_traverse(groupbyobject *gbo, visitproc visit, void *arg)
+ {
+ 	int err;
+ 
+ 	if (gbo->it) {
+ 		err = visit(gbo->it, arg);
+ 		if (err)
+ 			return err;
+ 	}
+ 	if (gbo->keyfunc) {
+ 		err = visit(gbo->keyfunc, arg);
+ 		if (err)
+ 			return err;
+ 	}
+ 	if (gbo->tgtkey) {
+ 		err = visit(gbo->tgtkey, arg);
+ 		if (err)
+ 			return err;
+ 	}
+ 	if (gbo->currkey) {
+ 		err = visit(gbo->currkey, arg);
+ 		if (err)
+ 			return err;
+ 	}
+ 	if (gbo->currvalue) {
+ 		err = visit(gbo->currvalue, arg);
+ 		if (err)
+ 			return err;
+ 	}
+ 	return 0;
+ }
+ 
+ static PyObject *
+ groupby_next(groupbyobject *gbo)
+ {
+ 	PyObject *newvalue, *newkey, *r, *grouper;
+ 
+ 	/* skip to next iteration group */
+ 	for (;;) {
+ 		if (gbo->currkey == NULL)
+ 			/* pass */;
+ 		else if (gbo->tgtkey == NULL)
+ 			break;
+ 		else {
+ 			int rcmp;
+ 
+ 			rcmp = PyObject_RichCompareBool(gbo->tgtkey,
+ 							gbo->currkey, Py_EQ);
+ 			if (rcmp == -1)
+ 				return NULL;
+ 			else if (rcmp == 0)
+ 				break;
+ 		}
+ 
+ 		newvalue = PyIter_Next(gbo->it);
+ 		if (newvalue == NULL)
+ 			return NULL;
+ 
+ 		if (gbo->keyfunc == Py_None) {
+ 			newkey = newvalue;
+ 			Py_INCREF(newvalue);
+ 		} else {
+ 			newkey = PyObject_CallFunctionObjArgs(gbo->keyfunc,
+ 							      newvalue, NULL);
+ 			if (newkey == NULL) {
+ 				Py_DECREF(newvalue);
+ 				return NULL;
+ 			}
+ 		}
+ 
+ 		Py_XDECREF(gbo->currkey);
+ 		gbo->currkey = newkey;
+ 		Py_XDECREF(gbo->currvalue);
+ 		gbo->currvalue = newvalue;
+ 	}
+ 
+ 	Py_XDECREF(gbo->tgtkey);
+ 	gbo->tgtkey = gbo->currkey;
+ 	Py_INCREF(gbo->currkey);
+ 
+ 	grouper = _grouper_create(gbo, gbo->tgtkey);
+ 	if (grouper == NULL)
+ 		return NULL;
+ 
+ 	r = PyTuple_Pack(2, gbo->currkey, grouper);
+ 	Py_DECREF(grouper);
+ 	return r;
+ }
+ 
+ PyDoc_STRVAR(groupby_doc,
+ "groupby(iterable[, keyfunc]) -> create an iterator which returns\n\
+ (key, sub-iterator) grouped by each value of key(value).\n");
+ 
+ static PyTypeObject groupby_type = {
+ 	PyObject_HEAD_INIT(NULL)
+ 	0,				/* ob_size */
+ 	"itertools.groupby",		/* tp_name */
+ 	sizeof(groupbyobject),		/* tp_basicsize */
+ 	0,				/* tp_itemsize */
+ 	/* methods */
+ 	(destructor)groupby_dealloc,	/* tp_dealloc */
+ 	0,				/* tp_print */
+ 	0,				/* tp_getattr */
+ 	0,				/* tp_setattr */
+ 	0,				/* tp_compare */
+ 	0,				/* tp_repr */
+ 	0,				/* tp_as_number */
+ 	0,				/* tp_as_sequence */
+ 	0,				/* tp_as_mapping */
+ 	0,				/* tp_hash */
+ 	0,				/* tp_call */
+ 	0,				/* tp_str */
+ 	PyObject_GenericGetAttr,	/* tp_getattro */
+ 	0,				/* tp_setattro */
+ 	0,				/* tp_as_buffer */
+ 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+ 		Py_TPFLAGS_BASETYPE,	/* tp_flags */
+ 	groupby_doc,			/* tp_doc */
+ 	(traverseproc)groupby_traverse,	/* tp_traverse */
+ 	0,				/* tp_clear */
+ 	0,				/* tp_richcompare */
+ 	0,				/* tp_weaklistoffset */
+ 	PyObject_SelfIter,		/* tp_iter */
+ 	(iternextfunc)groupby_next,	/* tp_iternext */
+ 	0,				/* tp_methods */
+ 	0,				/* tp_members */
+ 	0,				/* tp_getset */
+ 	0,				/* tp_base */
+ 	0,				/* tp_dict */
+ 	0,				/* tp_descr_get */
+ 	0,				/* tp_descr_set */
+ 	0,				/* tp_dictoffset */
+ 	0,				/* tp_init */
+ 	0,				/* tp_alloc */
+ 	groupby_new,			/* tp_new */
+ 	PyObject_GC_Del,		/* tp_free */
+ };
+ 
+ 
+ /* _grouper object (internal) ************************************************/
+ 
+ typedef struct {
+ 	PyObject_HEAD
+ 	PyObject *parent;
+ 	PyObject *tgtkey;
+ } _grouperobject;
+ 
+ static PyTypeObject _grouper_type;
+ 
+ static PyObject *
+ _grouper_create(groupbyobject *parent, PyObject *tgtkey)
+ {
+ 	_grouperobject *igo;
+ 
+ 	igo = PyObject_New(_grouperobject, &_grouper_type);
+ 	if (igo == NULL)
+ 		return NULL;
+ 	igo->parent = (PyObject *)parent;
+ 	Py_INCREF(parent);
+ 	igo->tgtkey = tgtkey;
+ 	Py_INCREF(tgtkey);
+ 
+ 	return (PyObject *)igo;
+ }
+ 
+ static void
+ _grouper_dealloc(_grouperobject *igo)
+ {
+ 	Py_DECREF(igo->parent);
+ 	Py_DECREF(igo->tgtkey);
+ 	PyObject_Del(igo);
+ }
+ 
+ static PyObject *
+ _grouper_next(_grouperobject *igo)
+ {
+ 	groupbyobject *gbo = (groupbyobject *)igo->parent;
+ 	PyObject *newvalue, *newkey, *r;
+ 	int rcmp;
+ 
+ 	if (gbo->currvalue == NULL) {
+ 		newvalue = PyIter_Next(gbo->it);
+ 		if (newvalue == NULL)
+ 			return NULL;
+ 
+ 		if (gbo->keyfunc == Py_None) {
+ 			newkey = newvalue;
+ 			Py_INCREF(newvalue);
+ 		} else {
+ 			newkey = PyObject_CallFunctionObjArgs(gbo->keyfunc,
+ 							      newvalue, NULL);
+ 			if (newkey == NULL) {
+ 				Py_DECREF(newvalue);
+ 				return NULL;
+ 			}
+ 		}
+ 
+ 		assert(gbo->currkey == NULL);
+ 		gbo->currkey = newkey;
+ 		gbo->currvalue = newvalue;
+ 	}
+ 
+ 	assert(gbo->currkey != NULL);
+ 	rcmp = PyObject_RichCompareBool(igo->tgtkey, gbo->currkey, Py_EQ);
+ 	if (rcmp <= 0)
+ 		/* got any error or current group is end */
+ 		return NULL;
+ 
+ 	r = gbo->currvalue;
+ 	gbo->currvalue = NULL;
+ 	Py_DECREF(gbo->currkey);
+ 	gbo->currkey = NULL;
+ 
+ 	return r;
+ }
+ 
+ static PyTypeObject _grouper_type = {
+ 	PyObject_HEAD_INIT(NULL)
+ 	0,				/* ob_size */
+ 	"itertools._grouper",		/* tp_name */
+ 	sizeof(_grouperobject),		/* tp_basicsize */
+ 	0,				/* tp_itemsize */
+ 	/* methods */
+ 	(destructor)_grouper_dealloc,	/* tp_dealloc */
+ 	0,				/* tp_print */
+ 	0,				/* tp_getattr */
+ 	0,				/* tp_setattr */
+ 	0,				/* tp_compare */
+ 	0,				/* tp_repr */
+ 	0,				/* tp_as_number */
+ 	0,				/* tp_as_sequence */
+ 	0,				/* tp_as_mapping */
+ 	0,				/* tp_hash */
+ 	0,				/* tp_call */
+ 	0,				/* tp_str */
+ 	PyObject_GenericGetAttr,	/* tp_getattro */
+ 	0,				/* tp_setattro */
+ 	0,				/* tp_as_buffer */
+ 	Py_TPFLAGS_DEFAULT,		/* tp_flags */
+ 	0,				/* tp_doc */
+ 	0, 				/* tp_traverse */
+ 	0,				/* tp_clear */
+ 	0,				/* tp_richcompare */
+ 	0,				/* tp_weaklistoffset */
+ 	PyObject_SelfIter,		/* tp_iter */
+ 	(iternextfunc)_grouper_next,	/* tp_iternext */
+ 	0,				/* tp_methods */
+ 	0,				/* tp_members */
+ 	0,				/* tp_getset */
+ 	0,				/* tp_base */
+ 	0,				/* tp_dict */
+ 	0,				/* tp_descr_get */
+ 	0,				/* tp_descr_set */
+ 	0,				/* tp_dictoffset */
+ 	0,				/* tp_init */
+ 	0,				/* tp_alloc */
+ 	0,				/* tp_new */
+ 	PyObject_Del,			/* tp_free */
+ };
+ 
+  
+ 
  /* tee object and with supporting function and objects ***************/
  
***************
*** 2104,2107 ****
--- 2421,2425 ----
  takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
  dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\
+ groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\
  ");
  
***************
*** 2131,2134 ****
--- 2449,2453 ----
  		&izip_type,
  		&repeat_type,
+ 		&groupby_type,
  		NULL
  	};
***************
*** 2149,2152 ****
  	if (PyType_Ready(&tee_type) < 0)
  		return;
! 
  }
--- 2468,2472 ----
  	if (PyType_Ready(&tee_type) < 0)
  		return;
! 	if (PyType_Ready(&_grouper_type) < 0)
! 		return;
  }





More information about the Python-checkins mailing list