[Python-checkins] CVS: python/dist/src/Objects stringobject.c,2.97,2.98
Tim Peters
tim_one@users.sourceforge.net
Thu, 18 Jan 2001 19:03:49 -0800
Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv10746/python/dist/src/objects
Modified Files:
stringobject.c
Log Message:
Derivative of patch #102549, "simpler, faster(!) implementation of string.join".
Also fixes two long-standing bugs (present in 2.0):
1. .join() didn't check that the result size fit in an int.
2. string.join(s) when len(s)==1 returned s[0] regardless of s[0]'s
type; e.g., "".join([3]) returned 3 (overly optimistic optimization).
I resisted a keen temptation to make .join() apply str() automagically.
Index: stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.97
retrieving revision 2.98
diff -C2 -r2.97 -r2.98
*** stringobject.c 2001/01/16 11:54:12 2.97
--- stringobject.c 2001/01/19 03:03:47 2.98
***************
*** 795,805 ****
{
char *sep = PyString_AS_STRING(self);
! int seplen = PyString_GET_SIZE(self);
PyObject *res = NULL;
- int reslen = 0;
char *p;
int seqlen = 0;
! int sz = 100;
! int i, slen, sz_incr;
PyObject *orig, *seq, *item;
--- 795,804 ----
{
char *sep = PyString_AS_STRING(self);
! const int seplen = PyString_GET_SIZE(self);
PyObject *res = NULL;
char *p;
int seqlen = 0;
! size_t sz = 0;
! int i;
PyObject *orig, *seq, *item;
***************
*** 807,811 ****
return NULL;
! if (!(seq = PySequence_Fast(orig, ""))) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
--- 806,811 ----
return NULL;
! seq = PySequence_Fast(orig, "");
! if (seq == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
***************
*** 814,823 ****
return NULL;
}
! /* From here on out, errors go through finally: for proper
! * reference count manipulations.
! */
seqlen = PySequence_Size(seq);
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
Py_INCREF(item);
Py_DECREF(seq);
--- 814,833 ----
return NULL;
}
!
seqlen = PySequence_Size(seq);
+ if (seqlen == 0) {
+ Py_DECREF(seq);
+ return PyString_FromString("");
+ }
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
+ if (!PyString_Check(item) && !PyUnicode_Check(item)) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item 0: expected string,"
+ " %.80s found",
+ item->ob_type->tp_name);
+ Py_DECREF(seq);
+ return NULL;
+ }
Py_INCREF(item);
Py_DECREF(seq);
***************
*** 825,838 ****
}
! if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
! goto finally;
!
! p = PyString_AS_STRING(res);
!
for (i = 0; i < seqlen; i++) {
item = PySequence_Fast_GET_ITEM(seq, i);
if (!PyString_Check(item)){
if (PyUnicode_Check(item)) {
- Py_DECREF(res);
Py_DECREF(seq);
return PyUnicode_Join((PyObject *)self, orig);
--- 835,847 ----
}
! /* There are at least two things to join. Do a pre-pass to figure out
! * the total amount of space we'll need (sz), see whether any argument
! * is absurd, and defer to the Unicode join if appropriate.
! */
for (i = 0; i < seqlen; i++) {
+ const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
if (!PyString_Check(item)){
if (PyUnicode_Check(item)) {
Py_DECREF(seq);
return PyUnicode_Join((PyObject *)self, orig);
***************
*** 842,878 ****
" %.80s found",
i, item->ob_type->tp_name);
! goto finally;
}
! slen = PyString_GET_SIZE(item);
! while (reslen + slen + seplen >= sz) {
! /* at least double the size of the string */
! sz_incr = slen + seplen > sz ? slen + seplen : sz;
! if (_PyString_Resize(&res, sz + sz_incr)) {
! goto finally;
! }
! sz += sz_incr;
! p = PyString_AS_STRING(res) + reslen;
}
! if (i > 0) {
memcpy(p, sep, seplen);
p += seplen;
- reslen += seplen;
}
- memcpy(p, PyString_AS_STRING(item), slen);
- p += slen;
- reslen += slen;
}
- if (_PyString_Resize(&res, reslen))
- goto finally;
- Py_DECREF(seq);
- return res;
- finally:
Py_DECREF(seq);
! Py_XDECREF(res);
! return NULL;
}
-
-
static long
--- 851,892 ----
" %.80s found",
i, item->ob_type->tp_name);
! Py_DECREF(seq);
! return NULL;
}
! sz += PyString_GET_SIZE(item);
! if (i != 0)
! sz += seplen;
! if (sz < old_sz || sz > INT_MAX) {
! PyErr_SetString(PyExc_OverflowError,
! "join() is too long for a Python string");
! Py_DECREF(seq);
! return NULL;
}
! }
!
! /* Allocate result space. */
! res = PyString_FromStringAndSize((char*)NULL, (int)sz);
! if (res == NULL) {
! Py_DECREF(seq);
! return NULL;
! }
!
! /* Catenate everything. */
! p = PyString_AS_STRING(res);
! for (i = 0; i < seqlen; ++i) {
! size_t n;
! item = PySequence_Fast_GET_ITEM(seq, i);
! n = PyString_GET_SIZE(item);
! memcpy(p, PyString_AS_STRING(item), n);
! p += n;
! if (i < seqlen - 1) {
memcpy(p, sep, seplen);
p += seplen;
}
}
Py_DECREF(seq);
! return res;
}
static long