[Python-3000-checkins] r58779 - python/branches/py3k-pep3137/Objects/bytesobject.c python/branches/py3k-pep3137/Objects/stringobject.c
guido.van.rossum
python-3000-checkins at python.org
Fri Nov 2 18:01:32 CET 2007
Author: guido.van.rossum
Date: Fri Nov 2 18:01:32 2007
New Revision: 58779
Modified:
python/branches/py3k-pep3137/Objects/bytesobject.c
python/branches/py3k-pep3137/Objects/stringobject.c
Log:
Quickly fixed byts_join() and string_join() to be more like each other,
and to accept mixed bytes/buffer items.
The code should be unified into a single function though.
Modified: python/branches/py3k-pep3137/Objects/bytesobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/bytesobject.c (original)
+++ python/branches/py3k-pep3137/Objects/bytesobject.c Fri Nov 2 18:01:32 2007
@@ -2800,9 +2800,10 @@
items = PySequence_Fast_ITEMS(seq);
/* Compute the total size, and check that they are all bytes */
+ /* XXX Shouldn't we use _getbuffer() on these items instead? */
for (i = 0; i < n; i++) {
PyObject *obj = items[i];
- if (!PyBytes_Check(obj)) {
+ if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
PyErr_Format(PyExc_TypeError,
"can only join an iterable of bytes "
"(item %ld has type '%.100s')",
@@ -2812,7 +2813,7 @@
}
if (i > 0)
totalsize += mysize;
- totalsize += PyBytes_GET_SIZE(obj);
+ totalsize += Py_Size(obj);
if (totalsize < 0) {
PyErr_NoMemory();
goto error;
@@ -2826,12 +2827,17 @@
dest = PyBytes_AS_STRING(result);
for (i = 0; i < n; i++) {
PyObject *obj = items[i];
- Py_ssize_t size = PyBytes_GET_SIZE(obj);
- if (i > 0) {
+ Py_ssize_t size = Py_Size(obj);
+ char *buf;
+ if (PyBytes_Check(obj))
+ buf = PyBytes_AS_STRING(obj);
+ else
+ buf = PyString_AS_STRING(obj);
+ if (i) {
memcpy(dest, self->ob_bytes, mysize);
dest += mysize;
}
- memcpy(dest, PyBytes_AS_STRING(obj), size);
+ memcpy(dest, buf, size);
dest += size;
}
Modified: python/branches/py3k-pep3137/Objects/stringobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/stringobject.c (original)
+++ python/branches/py3k-pep3137/Objects/stringobject.c Fri Nov 2 18:01:32 2007
@@ -1,5 +1,8 @@
/* String object implementation */
+/* XXX This is now called 'bytes' as far as the user is concerned.
+ Many docstrings and error messages need to be cleaned up. */
+
#define PY_SSIZE_T_CLEAN
#include "Python.h"
@@ -1410,7 +1413,7 @@
sequence. The separator between elements is S.");
static PyObject *
-string_join(PyStringObject *self, PyObject *orig)
+string_join(PyObject *self, PyObject *orig)
{
char *sep = PyString_AS_STRING(self);
const Py_ssize_t seplen = PyString_GET_SIZE(self);
@@ -1433,7 +1436,7 @@
}
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
- if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
+ if (PyString_CheckExact(item)) {
Py_INCREF(item);
Py_DECREF(seq);
return item;
@@ -1443,32 +1446,21 @@
/* There are at least two things to join, or else we have a subclass
* of the builtin types in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
- * need (sz), see whether any argument is absurd, and defer to
- * the Unicode join if appropriate.
+ * need (sz), and see whether all argument are bytes.
*/
+ /* XXX Shouldn't we use _getbuffer() on these items instead? */
for (i = 0; i < seqlen; i++) {
const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
- if (!PyString_Check(item)){
- if (PyUnicode_Check(item)) {
- /* Defer to Unicode join.
- * CAUTION: There's no gurantee that the
- * original sequence can be iterated over
- * again, so we must pass seq here.
- */
- PyObject *result;
- result = PyUnicode_Join((PyObject *)self, seq);
- Py_DECREF(seq);
- return result;
- }
+ if (!PyString_Check(item) && !PyBytes_Check(item)) {
PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected string,"
+ "sequence item %zd: expected bytes,"
" %.80s found",
i, Py_Type(item)->tp_name);
Py_DECREF(seq);
return NULL;
}
- sz += PyString_GET_SIZE(item);
+ sz += Py_Size(item);
if (i != 0)
sz += seplen;
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
@@ -1487,17 +1479,24 @@
}
/* Catenate everything. */
+ /* I'm not worried about a PyBytes item growing because there's
+ nowhere in this function where we release the GIL. */
p = PyString_AS_STRING(res);
for (i = 0; i < seqlen; ++i) {
size_t n;
- item = PySequence_Fast_GET_ITEM(seq, i);
- n = PyString_GET_SIZE(item);
- Py_MEMCPY(p, PyString_AS_STRING(item), n);
- p += n;
- if (i < seqlen - 1) {
+ char *q;
+ if (i) {
Py_MEMCPY(p, sep, seplen);
p += seplen;
}
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ n = Py_Size(item);
+ if (PyString_Check(item))
+ q = PyString_AS_STRING(item);
+ else
+ q = PyBytes_AS_STRING(item);
+ Py_MEMCPY(p, q, n);
+ p += n;
}
Py_DECREF(seq);
@@ -1509,7 +1508,7 @@
{
assert(sep != NULL && PyString_Check(sep));
assert(x != NULL);
- return string_join((PyStringObject *)sep, x);
+ return string_join(sep, x);
}
Py_LOCAL_INLINE(void)
More information about the Python-3000-checkins
mailing list