[Python-checkins] CVS: python/dist/src/Objects stringobject.c,2.97,2.98

Tim Peters tim_one@users.sourceforge.net
Thu, 18 Jan 2001 19:03:49 -0800


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv10746/python/dist/src/objects

Modified Files:
	stringobject.c 
Log Message:
Derivative of patch #102549, "simpler, faster(!) implementation of string.join".
Also fixes two long-standing bugs (present in 2.0):
1. .join() didn't check that the result size fit in an int.
2. string.join(s) when len(s)==1 returned s[0] regardless of s[0]'s
   type; e.g., "".join([3]) returned 3 (overly optimistic optimization).
I resisted a keen temptation to make .join() apply str() automagically.


Index: stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.97
retrieving revision 2.98
diff -C2 -r2.97 -r2.98
*** stringobject.c	2001/01/16 11:54:12	2.97
--- stringobject.c	2001/01/19 03:03:47	2.98
***************
*** 795,805 ****
  {
  	char *sep = PyString_AS_STRING(self);
! 	int seplen = PyString_GET_SIZE(self);
  	PyObject *res = NULL;
- 	int reslen = 0;
  	char *p;
  	int seqlen = 0;
! 	int sz = 100;
! 	int i, slen, sz_incr;
  	PyObject *orig, *seq, *item;
  
--- 795,804 ----
  {
  	char *sep = PyString_AS_STRING(self);
! 	const int seplen = PyString_GET_SIZE(self);
  	PyObject *res = NULL;
  	char *p;
  	int seqlen = 0;
! 	size_t sz = 0;
! 	int i;
  	PyObject *orig, *seq, *item;
  
***************
*** 807,811 ****
  		return NULL;
  
! 	if (!(seq = PySequence_Fast(orig, ""))) {
  		if (PyErr_ExceptionMatches(PyExc_TypeError))
  			PyErr_Format(PyExc_TypeError,
--- 806,811 ----
  		return NULL;
  
! 	seq = PySequence_Fast(orig, "");
! 	if (seq == NULL) {
  		if (PyErr_ExceptionMatches(PyExc_TypeError))
  			PyErr_Format(PyExc_TypeError,
***************
*** 814,823 ****
  		return NULL;
  	}
! 	/* From here on out, errors go through finally: for proper
! 	 * reference count manipulations.
! 	 */
  	seqlen = PySequence_Size(seq);
  	if (seqlen == 1) {
  		item = PySequence_Fast_GET_ITEM(seq, 0);
  		Py_INCREF(item);
  		Py_DECREF(seq);
--- 814,833 ----
  		return NULL;
  	}
! 
  	seqlen = PySequence_Size(seq);
+ 	if (seqlen == 0) {
+ 		Py_DECREF(seq);
+ 		return PyString_FromString("");
+ 	}
  	if (seqlen == 1) {
  		item = PySequence_Fast_GET_ITEM(seq, 0);
+ 		if (!PyString_Check(item) && !PyUnicode_Check(item)) {
+ 			PyErr_Format(PyExc_TypeError,
+ 				     "sequence item 0: expected string,"
+ 				     " %.80s found",
+ 				     item->ob_type->tp_name);
+ 			Py_DECREF(seq);
+ 			return NULL;
+ 		}
  		Py_INCREF(item);
  		Py_DECREF(seq);
***************
*** 825,838 ****
  	}
  
! 	if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
! 		goto finally;
! 
! 	p = PyString_AS_STRING(res);
! 
  	for (i = 0; i < seqlen; i++) {
  		item = PySequence_Fast_GET_ITEM(seq, i);
  		if (!PyString_Check(item)){
  			if (PyUnicode_Check(item)) {
- 				Py_DECREF(res);
  				Py_DECREF(seq);
  				return PyUnicode_Join((PyObject *)self, orig);
--- 835,847 ----
  	}
  
! 	/* There are at least two things to join.  Do a pre-pass to figure out
! 	 * the total amount of space we'll need (sz), see whether any argument
! 	 * is absurd, and defer to the Unicode join if appropriate.
! 	 */
  	for (i = 0; i < seqlen; i++) {
+ 		const size_t old_sz = sz;
  		item = PySequence_Fast_GET_ITEM(seq, i);
  		if (!PyString_Check(item)){
  			if (PyUnicode_Check(item)) {
  				Py_DECREF(seq);
  				return PyUnicode_Join((PyObject *)self, orig);
***************
*** 842,878 ****
  				     " %.80s found",
  				     i, item->ob_type->tp_name);
! 			goto finally;
  		}
! 		slen = PyString_GET_SIZE(item);
! 		while (reslen + slen + seplen >= sz) {
! 			/* at least double the size of the string */
! 			sz_incr = slen + seplen > sz ? slen + seplen : sz;
! 			if (_PyString_Resize(&res, sz + sz_incr)) {
! 				goto finally;
! 			}
! 			sz += sz_incr;
! 			p = PyString_AS_STRING(res) + reslen;
  		}
! 		if (i > 0) {
  			memcpy(p, sep, seplen);
  			p += seplen;
- 			reslen += seplen;
  		}
- 		memcpy(p, PyString_AS_STRING(item), slen);
- 		p += slen;
- 		reslen += slen;
  	}
- 	if (_PyString_Resize(&res, reslen))
- 		goto finally;
- 	Py_DECREF(seq);
- 	return res;
  
-   finally:
  	Py_DECREF(seq);
! 	Py_XDECREF(res);
! 	return NULL;
  }
- 
- 
  
  static long
--- 851,892 ----
  				     " %.80s found",
  				     i, item->ob_type->tp_name);
! 			Py_DECREF(seq);
! 			return NULL;
  		}
! 		sz += PyString_GET_SIZE(item);
! 		if (i != 0)
! 			sz += seplen;
! 		if (sz < old_sz || sz > INT_MAX) {
! 			PyErr_SetString(PyExc_OverflowError,
! 				"join() is too long for a Python string");
! 			Py_DECREF(seq);
! 			return NULL;
  		}
! 	}
! 
! 	/* Allocate result space. */
! 	res = PyString_FromStringAndSize((char*)NULL, (int)sz);
! 	if (res == NULL) {
! 		Py_DECREF(seq);
! 		return NULL;
! 	}
! 
! 	/* Catenate everything. */
! 	p = PyString_AS_STRING(res);
! 	for (i = 0; i < seqlen; ++i) {
! 		size_t n;
! 		item = PySequence_Fast_GET_ITEM(seq, i);
! 		n = PyString_GET_SIZE(item);
! 		memcpy(p, PyString_AS_STRING(item), n);
! 		p += n;
! 		if (i < seqlen - 1) {
  			memcpy(p, sep, seplen);
  			p += seplen;
  		}
  	}
  
  	Py_DECREF(seq);
! 	return res;
  }
  
  static long