[Python-3000-checkins] r58779 - python/branches/py3k-pep3137/Objects/bytesobject.c python/branches/py3k-pep3137/Objects/stringobject.c

Fri Nov 2 18:01:32 CET 2007

Author: guido.van.rossum
Date: Fri Nov  2 18:01:32 2007
New Revision: 58779

Modified:
   python/branches/py3k-pep3137/Objects/bytesobject.c
   python/branches/py3k-pep3137/Objects/stringobject.c
Log:
Quickly fixed byts_join() and string_join() to be more like each other,
and to accept mixed bytes/buffer items.
The code should be unified into a single function though.


Modified: python/branches/py3k-pep3137/Objects/bytesobject.c
==============================================================================

--- python/branches/py3k-pep3137/Objects/bytesobject.c	(original)
+++ python/branches/py3k-pep3137/Objects/bytesobject.c	Fri Nov  2 18:01:32 2007
@@ -2800,9 +2800,10 @@
     items = PySequence_Fast_ITEMS(seq);
 
     /* Compute the total size, and check that they are all bytes */
+    /* XXX Shouldn't we use _getbuffer() on these items instead? */
     for (i = 0; i < n; i++) {
         PyObject *obj = items[i];
-        if (!PyBytes_Check(obj)) {
+        if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
             PyErr_Format(PyExc_TypeError,
                          "can only join an iterable of bytes "
                          "(item %ld has type '%.100s')",
@@ -2812,7 +2813,7 @@
         }
         if (i > 0)
             totalsize += mysize;
-        totalsize += PyBytes_GET_SIZE(obj);
+        totalsize += Py_Size(obj);
         if (totalsize < 0) {
             PyErr_NoMemory();
             goto error;
@@ -2826,12 +2827,17 @@
     dest = PyBytes_AS_STRING(result);
     for (i = 0; i < n; i++) {
         PyObject *obj = items[i];
-        Py_ssize_t size = PyBytes_GET_SIZE(obj);
-        if (i > 0) {
+        Py_ssize_t size = Py_Size(obj);
+        char *buf;
+        if (PyBytes_Check(obj))
+           buf = PyBytes_AS_STRING(obj);
+        else
+           buf = PyString_AS_STRING(obj);
+        if (i) {
             memcpy(dest, self->ob_bytes, mysize);
             dest += mysize;
         }
-        memcpy(dest, PyBytes_AS_STRING(obj), size);
+        memcpy(dest, buf, size);
         dest += size;
     }
 

Modified: python/branches/py3k-pep3137/Objects/stringobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/stringobject.c	(original)
+++ python/branches/py3k-pep3137/Objects/stringobject.c	Fri Nov  2 18:01:32 2007
@@ -1,5 +1,8 @@
 /* String object implementation */
 
+/* XXX This is now called 'bytes' as far as the user is concerned.
+   Many docstrings and error messages need to be cleaned up. */
+
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
@@ -1410,7 +1413,7 @@
 sequence.  The separator between elements is S.");
 
 static PyObject *
-string_join(PyStringObject *self, PyObject *orig)
+string_join(PyObject *self, PyObject *orig)
 {
 	char *sep = PyString_AS_STRING(self);
 	const Py_ssize_t seplen = PyString_GET_SIZE(self);
@@ -1433,7 +1436,7 @@
 	}
 	if (seqlen == 1) {
 		item = PySequence_Fast_GET_ITEM(seq, 0);
-		if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
+		if (PyString_CheckExact(item)) {
 			Py_INCREF(item);
 			Py_DECREF(seq);
 			return item;
@@ -1443,32 +1446,21 @@
 	/* There are at least two things to join, or else we have a subclass
 	 * of the builtin types in the sequence.
 	 * Do a pre-pass to figure out the total amount of space we'll
-	 * need (sz), see whether any argument is absurd, and defer to
-	 * the Unicode join if appropriate.
+	 * need (sz), and see whether all argument are bytes.
 	 */
+	/* XXX Shouldn't we use _getbuffer() on these items instead? */
 	for (i = 0; i < seqlen; i++) {
 		const size_t old_sz = sz;
 		item = PySequence_Fast_GET_ITEM(seq, i);
-		if (!PyString_Check(item)){
-			if (PyUnicode_Check(item)) {
-				/* Defer to Unicode join.
-				 * CAUTION:  There's no gurantee that the
-				 * original sequence can be iterated over
-				 * again, so we must pass seq here.
-				 */
-				PyObject *result;
-				result = PyUnicode_Join((PyObject *)self, seq);
-				Py_DECREF(seq);
-				return result;
-			}
+		if (!PyString_Check(item) && !PyBytes_Check(item)) {
 			PyErr_Format(PyExc_TypeError,
-				     "sequence item %zd: expected string,"
+				     "sequence item %zd: expected bytes,"
 				     " %.80s found",
 				     i, Py_Type(item)->tp_name);
 			Py_DECREF(seq);
 			return NULL;
 		}
-		sz += PyString_GET_SIZE(item);
+		sz += Py_Size(item);
 		if (i != 0)
 			sz += seplen;
 		if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
@@ -1487,17 +1479,24 @@
 	}
 
 	/* Catenate everything. */
+	/* I'm not worried about a PyBytes item growing because there's
+	   nowhere in this function where we release the GIL. */
 	p = PyString_AS_STRING(res);
 	for (i = 0; i < seqlen; ++i) {
 		size_t n;
-		item = PySequence_Fast_GET_ITEM(seq, i);
-		n = PyString_GET_SIZE(item);
-		Py_MEMCPY(p, PyString_AS_STRING(item), n);
-		p += n;
-		if (i < seqlen - 1) {
+                char *q;
+		if (i) {
 			Py_MEMCPY(p, sep, seplen);
 			p += seplen;
 		}
+		item = PySequence_Fast_GET_ITEM(seq, i);
+		n = Py_Size(item);
+                if (PyString_Check(item))
+			q = PyString_AS_STRING(item);
+		else
+			q = PyBytes_AS_STRING(item);
+		Py_MEMCPY(p, q, n);
+		p += n;
 	}
 
 	Py_DECREF(seq);
@@ -1509,7 +1508,7 @@
 {
 	assert(sep != NULL && PyString_Check(sep));
 	assert(x != NULL);
-	return string_join((PyStringObject *)sep, x);
+	return string_join(sep, x);
 }
 
 Py_LOCAL_INLINE(void)