[pypy-commit] pypy py3k: Fix various segfaults and internal error while testing the cpyext module.

amauryfa noreply at buildbot.pypy.org
Fri Feb 3 00:34:54 CET 2012


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r52043:d718067d0780
Date: 2012-02-02 23:17 +0100
http://bitbucket.org/pypy/pypy/changeset/d718067d0780/

Log:	Fix various segfaults and internal error while testing the cpyext
	module. Many failures remain though.

diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -910,7 +910,7 @@
                                source_dir / "pyerrors.c",
                                source_dir / "modsupport.c",
                                source_dir / "getargs.c",
-                               source_dir / "stringobject.c",
+                               source_dir / "unicodeobject.c",
                                source_dir / "mysnprintf.c",
                                source_dir / "pythonrun.c",
                                source_dir / "sysmodule.c",
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -126,6 +126,7 @@
     version since the definition of the bytecode changes often."""
     return space.wrap(PyCode(space,
                              argcount=rffi.cast(lltype.Signed, argcount),
+                             kwonlyargcount = 0,  # XXX fix signature
                              nlocals=rffi.cast(lltype.Signed, nlocals),
                              stacksize=rffi.cast(lltype.Signed, stacksize),
                              flags=rffi.cast(lltype.Signed, flags),
diff --git a/pypy/module/cpyext/include/pyconfig.h b/pypy/module/cpyext/include/pyconfig.h
--- a/pypy/module/cpyext/include/pyconfig.h
+++ b/pypy/module/cpyext/include/pyconfig.h
@@ -25,6 +25,10 @@
 #define Py_UNICODE_SIZE 2
 #endif
 
+#ifndef _WIN32
+#define VA_LIST_IS_ARRAY
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/include/pyport.h b/pypy/module/cpyext/include/pyport.h
--- a/pypy/module/cpyext/include/pyport.h
+++ b/pypy/module/cpyext/include/pyport.h
@@ -64,4 +64,14 @@
 #   error "Python needs a typedef for Py_uintptr_t in pyport.h."
 #endif /* HAVE_UINTPTR_T */
 
+#ifdef VA_LIST_IS_ARRAY
+#define Py_VA_COPY(x, y) Py_MEMCPY((x), (y), sizeof(va_list))
+#else
+#ifdef __va_copy
+#define Py_VA_COPY __va_copy
+#else
+#define Py_VA_COPY(x, y) (x) = (y)
+#endif
+#endif
+
 #endif /* Py_PYPORT_H */
diff --git a/pypy/module/cpyext/include/stringobject.h b/pypy/module/cpyext/include/stringobject.h
--- a/pypy/module/cpyext/include/stringobject.h
+++ b/pypy/module/cpyext/include/stringobject.h
@@ -18,9 +18,6 @@
     Py_ssize_t size;
 } PyStringObject;
 
-PyObject *PyString_FromFormatV(const char *format, va_list vargs);
-PyObject *PyString_FromFormat(const char *format, ...);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -26,6 +26,9 @@
 } PyUnicodeObject;
 
 
+PyObject *PyUnicode_FromFormatV(const char *format, va_list vargs);
+PyObject *PyUnicode_FromFormat(const char *format, ...);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py
--- a/pypy/module/cpyext/object.py
+++ b/pypy/module/cpyext/object.py
@@ -229,6 +229,15 @@
     return space.repr(w_obj)
 
 @cpython_api([PyObject], PyObject)
+def PyObject_ASCII(space, w_obj):
+    r"""As PyObject_Repr(), compute a string representation of object
+    o, but escape the non-ASCII characters in the string returned by
+    PyObject_Repr() with \x, \u or \U escapes.  This generates a
+    string similar to that returned by PyObject_Repr() in Python 2.
+    Called by the ascii() built-in function."""
+    return operation.ascii(space, w_obj)
+
+ at cpython_api([PyObject], PyObject)
 def PyObject_Unicode(space, w_obj):
     """Compute a Unicode string representation of object o.  Returns the Unicode
     string representation on success, NULL on failure. This is the equivalent of
diff --git a/pypy/module/cpyext/src/bufferobject.c b/pypy/module/cpyext/src/bufferobject.c
--- a/pypy/module/cpyext/src/bufferobject.c
+++ b/pypy/module/cpyext/src/bufferobject.c
@@ -13,207 +13,207 @@
 
 static int
 get_buf(PyBufferObject *self, void **ptr, Py_ssize_t *size,
-	enum buffer_t buffer_type)
+    enum buffer_t buffer_type)
 {
-	if (self->b_base == NULL) {
-		assert (ptr != NULL);
-		*ptr = self->b_ptr;
-		*size = self->b_size;
-	}
-	else {
-		Py_ssize_t count, offset;
-		readbufferproc proc = 0;
-		PyBufferProcs *bp = self->b_base->ob_type->tp_as_buffer;
-		if ((*bp->bf_getsegcount)(self->b_base, NULL) != 1) {
-			PyErr_SetString(PyExc_TypeError,
-				"single-segment buffer object expected");
-			return 0;
-		}
-		if ((buffer_type == READ_BUFFER) ||
-			((buffer_type == ANY_BUFFER) && self->b_readonly))
-		    proc = bp->bf_getreadbuffer;
-		else if ((buffer_type == WRITE_BUFFER) ||
-			(buffer_type == ANY_BUFFER))
-    		    proc = (readbufferproc)bp->bf_getwritebuffer;
-		else if (buffer_type == CHAR_BUFFER) {
+    if (self->b_base == NULL) {
+        assert (ptr != NULL);
+        *ptr = self->b_ptr;
+        *size = self->b_size;
+    }
+    else {
+        Py_ssize_t count, offset;
+        readbufferproc proc = 0;
+        PyBufferProcs *bp = self->b_base->ob_type->tp_as_buffer;
+        if ((*bp->bf_getsegcount)(self->b_base, NULL) != 1) {
+            PyErr_SetString(PyExc_TypeError,
+                "single-segment buffer object expected");
+            return 0;
+        }
+        if ((buffer_type == READ_BUFFER) ||
+            ((buffer_type == ANY_BUFFER) && self->b_readonly))
+            proc = bp->bf_getreadbuffer;
+        else if ((buffer_type == WRITE_BUFFER) ||
+            (buffer_type == ANY_BUFFER))
+                proc = (readbufferproc)bp->bf_getwritebuffer;
+        else if (buffer_type == CHAR_BUFFER) {
             if (!PyType_HasFeature(self->ob_type,
-				Py_TPFLAGS_HAVE_GETCHARBUFFER)) {
+                Py_TPFLAGS_HAVE_GETCHARBUFFER)) {
                 PyErr_SetString(PyExc_TypeError,
                     "Py_TPFLAGS_HAVE_GETCHARBUFFER needed");
                 return 0;
-		    }
-		    proc = (readbufferproc)bp->bf_getcharbuffer;
-		}
-		if (!proc) {
-		    char *buffer_type_name;
-		    switch (buffer_type) {
-			case READ_BUFFER:
-			    buffer_type_name = "read";
-			    break;
-			case WRITE_BUFFER:
-			    buffer_type_name = "write";
-			    break;
-			case CHAR_BUFFER:
-			    buffer_type_name = "char";
-			    break;
-			default:
-			    buffer_type_name = "no";
-			    break;
-		    }
-		    PyErr_Format(PyExc_TypeError,
-			    "%s buffer type not available",
-			    buffer_type_name);
-		    return 0;
-		}
-		if ((count = (*proc)(self->b_base, 0, ptr)) < 0)
-			return 0;
-		/* apply constraints to the start/end */
-		if (self->b_offset > count)
-			offset = count;
-		else
-			offset = self->b_offset;
-		*(char **)ptr = *(char **)ptr + offset;
-		if (self->b_size == Py_END_OF_BUFFER)
-			*size = count;
-		else
-			*size = self->b_size;
-		if (offset + *size > count)
-			*size = count - offset;
-	}
-	return 1;
+            }
+            proc = (readbufferproc)bp->bf_getcharbuffer;
+        }
+        if (!proc) {
+            char *buffer_type_name;
+            switch (buffer_type) {
+            case READ_BUFFER:
+                buffer_type_name = "read";
+                break;
+            case WRITE_BUFFER:
+                buffer_type_name = "write";
+                break;
+            case CHAR_BUFFER:
+                buffer_type_name = "char";
+                break;
+            default:
+                buffer_type_name = "no";
+                break;
+            }
+            PyErr_Format(PyExc_TypeError,
+                "%s buffer type not available",
+                buffer_type_name);
+            return 0;
+        }
+        if ((count = (*proc)(self->b_base, 0, ptr)) < 0)
+            return 0;
+        /* apply constraints to the start/end */
+        if (self->b_offset > count)
+            offset = count;
+        else
+            offset = self->b_offset;
+        *(char **)ptr = *(char **)ptr + offset;
+        if (self->b_size == Py_END_OF_BUFFER)
+            *size = count;
+        else
+            *size = self->b_size;
+        if (offset + *size > count)
+            *size = count - offset;
+    }
+    return 1;
 }
 
 
 static PyObject *
 buffer_from_memory(PyObject *base, Py_ssize_t size, Py_ssize_t offset, void *ptr,
-		   int readonly)
+           int readonly)
 {
-	PyBufferObject * b;
+    PyBufferObject * b;
 
-	if (size < 0 && size != Py_END_OF_BUFFER) {
-		PyErr_SetString(PyExc_ValueError,
-				"size must be zero or positive");
-		return NULL;
-	}
-	if (offset < 0) {
-		PyErr_SetString(PyExc_ValueError,
-				"offset must be zero or positive");
-		return NULL;
-	}
+    if (size < 0 && size != Py_END_OF_BUFFER) {
+        PyErr_SetString(PyExc_ValueError,
+                "size must be zero or positive");
+        return NULL;
+    }
+    if (offset < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "offset must be zero or positive");
+        return NULL;
+    }
 
-	b = PyObject_NEW(PyBufferObject, &PyBuffer_Type);
-	if ( b == NULL )
-		return NULL;
+    b = PyObject_NEW(PyBufferObject, &PyBuffer_Type);
+    if ( b == NULL )
+        return NULL;
 
-	Py_XINCREF(base);
-	b->b_base = base;
-	b->b_ptr = ptr;
-	b->b_size = size;
-	b->b_offset = offset;
-	b->b_readonly = readonly;
-	b->b_hash = -1;
+    Py_XINCREF(base);
+    b->b_base = base;
+    b->b_ptr = ptr;
+    b->b_size = size;
+    b->b_offset = offset;
+    b->b_readonly = readonly;
+    b->b_hash = -1;
 
-	return (PyObject *) b;
+    return (PyObject *) b;
 }
 
 static PyObject *
 buffer_from_object(PyObject *base, Py_ssize_t size, Py_ssize_t offset, int readonly)
 {
-	if (offset < 0) {
-		PyErr_SetString(PyExc_ValueError,
-				"offset must be zero or positive");
-		return NULL;
-	}
-	if ( PyBuffer_Check(base) && (((PyBufferObject *)base)->b_base) ) {
-		/* another buffer, refer to the base object */
-		PyBufferObject *b = (PyBufferObject *)base;
-		if (b->b_size != Py_END_OF_BUFFER) {
-			Py_ssize_t base_size = b->b_size - offset;
-			if (base_size < 0)
-				base_size = 0;
-			if (size == Py_END_OF_BUFFER || size > base_size)
-				size = base_size;
-		}
-		offset += b->b_offset;
-		base = b->b_base;
-	}
-	return buffer_from_memory(base, size, offset, NULL, readonly);
+    if (offset < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "offset must be zero or positive");
+        return NULL;
+    }
+    if ( PyBuffer_Check(base) && (((PyBufferObject *)base)->b_base) ) {
+        /* another buffer, refer to the base object */
+        PyBufferObject *b = (PyBufferObject *)base;
+        if (b->b_size != Py_END_OF_BUFFER) {
+            Py_ssize_t base_size = b->b_size - offset;
+            if (base_size < 0)
+                base_size = 0;
+            if (size == Py_END_OF_BUFFER || size > base_size)
+                size = base_size;
+        }
+        offset += b->b_offset;
+        base = b->b_base;
+    }
+    return buffer_from_memory(base, size, offset, NULL, readonly);
 }
 
 
 PyObject *
 PyBuffer_FromObject(PyObject *base, Py_ssize_t offset, Py_ssize_t size)
 {
-	PyBufferProcs *pb = base->ob_type->tp_as_buffer;
+    PyBufferProcs *pb = base->ob_type->tp_as_buffer;
 
-	if ( pb == NULL ||
-	     pb->bf_getreadbuffer == NULL ||
-	     pb->bf_getsegcount == NULL )
-	{
-		PyErr_SetString(PyExc_TypeError, "buffer object expected");
-		return NULL;
-	}
+    if ( pb == NULL ||
+         pb->bf_getreadbuffer == NULL ||
+         pb->bf_getsegcount == NULL )
+    {
+        PyErr_SetString(PyExc_TypeError, "buffer object expected");
+        return NULL;
+    }
 
-	return buffer_from_object(base, size, offset, 1);
+    return buffer_from_object(base, size, offset, 1);
 }
 
 PyObject *
 PyBuffer_FromReadWriteObject(PyObject *base, Py_ssize_t offset, Py_ssize_t size)
 {
-	PyBufferProcs *pb = base->ob_type->tp_as_buffer;
+    PyBufferProcs *pb = base->ob_type->tp_as_buffer;
 
-	if ( pb == NULL ||
-	     pb->bf_getwritebuffer == NULL ||
-	     pb->bf_getsegcount == NULL )
-	{
-		PyErr_SetString(PyExc_TypeError, "buffer object expected");
-		return NULL;
-	}
+    if ( pb == NULL ||
+         pb->bf_getwritebuffer == NULL ||
+         pb->bf_getsegcount == NULL )
+    {
+        PyErr_SetString(PyExc_TypeError, "buffer object expected");
+        return NULL;
+    }
 
-	return buffer_from_object(base, size,  offset, 0);
+    return buffer_from_object(base, size,  offset, 0);
 }
 
 PyObject *
 PyBuffer_FromMemory(void *ptr, Py_ssize_t size)
 {
-	return buffer_from_memory(NULL, size, 0, ptr, 1);
+    return buffer_from_memory(NULL, size, 0, ptr, 1);
 }
 
 PyObject *
 PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size)
 {
-	return buffer_from_memory(NULL, size, 0, ptr, 0);
+    return buffer_from_memory(NULL, size, 0, ptr, 0);
 }
 
 PyObject *
 PyBuffer_New(Py_ssize_t size)
 {
-	PyObject *o;
-	PyBufferObject * b;
+    PyObject *o;
+    PyBufferObject * b;
 
-	if (size < 0) {
-		PyErr_SetString(PyExc_ValueError,
-				"size must be zero or positive");
-		return NULL;
-	}
-	if (sizeof(*b) > PY_SSIZE_T_MAX - size) {
-		/* unlikely */
-		return PyErr_NoMemory();
-	}
-	/* Inline PyObject_New */
-	o = (PyObject *)PyObject_MALLOC(sizeof(*b) + size);
-	if ( o == NULL )
-		return PyErr_NoMemory();
-	b = (PyBufferObject *) PyObject_INIT(o, &PyBuffer_Type);
+    if (size < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "size must be zero or positive");
+        return NULL;
+    }
+    if (sizeof(*b) > PY_SSIZE_T_MAX - size) {
+        /* unlikely */
+        return PyErr_NoMemory();
+    }
+    /* Inline PyObject_New */
+    o = (PyObject *)PyObject_MALLOC(sizeof(*b) + size);
+    if ( o == NULL )
+        return PyErr_NoMemory();
+    b = (PyBufferObject *) PyObject_INIT(o, &PyBuffer_Type);
 
-	b->b_base = NULL;
-	b->b_ptr = (void *)(b + 1);
-	b->b_size = size;
-	b->b_offset = 0;
-	b->b_readonly = 0;
-	b->b_hash = -1;
+    b->b_base = NULL;
+    b->b_ptr = (void *)(b + 1);
+    b->b_size = size;
+    b->b_offset = 0;
+    b->b_readonly = 0;
+    b->b_hash = -1;
 
-	return o;
+    return o;
 }
 
 /* Methods */
@@ -221,19 +221,19 @@
 static PyObject *
 buffer_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 {
-	PyObject *ob;
-	Py_ssize_t offset = 0;
-	Py_ssize_t size = Py_END_OF_BUFFER;
+    PyObject *ob;
+    Py_ssize_t offset = 0;
+    Py_ssize_t size = Py_END_OF_BUFFER;
 
-	/*if (PyErr_WarnPy3k("buffer() not supported in 3.x", 1) < 0)
-		return NULL;*/
-	
-	if (!_PyArg_NoKeywords("buffer()", kw))
-		return NULL;
+    /*if (PyErr_WarnPy3k("buffer() not supported in 3.x", 1) < 0)
+        return NULL;*/
+    
+    if (!_PyArg_NoKeywords("buffer()", kw))
+        return NULL;
 
-	if (!PyArg_ParseTuple(args, "O|nn:buffer", &ob, &offset, &size))
-	    return NULL;
-	return PyBuffer_FromObject(ob, offset, size);
+    if (!PyArg_ParseTuple(args, "O|nn:buffer", &ob, &offset, &size))
+        return NULL;
+    return PyBuffer_FromObject(ob, offset, size);
 }
 
 PyDoc_STRVAR(buffer_doc,
@@ -248,99 +248,100 @@
 static void
 buffer_dealloc(PyBufferObject *self)
 {
-	Py_XDECREF(self->b_base);
-	PyObject_DEL(self);
+    Py_XDECREF(self->b_base);
+    PyObject_DEL(self);
 }
 
 static int
 buffer_compare(PyBufferObject *self, PyBufferObject *other)
 {
-	void *p1, *p2;
-	Py_ssize_t len_self, len_other, min_len;
-	int cmp;
+    void *p1, *p2;
+    Py_ssize_t len_self, len_other, min_len;
+    int cmp;
 
-	if (!get_buf(self, &p1, &len_self, ANY_BUFFER))
-		return -1;
-	if (!get_buf(other, &p2, &len_other, ANY_BUFFER))
-		return -1;
-	min_len = (len_self < len_other) ? len_self : len_other;
-	if (min_len > 0) {
-		cmp = memcmp(p1, p2, min_len);
-		if (cmp != 0)
-			return cmp < 0 ? -1 : 1;
-	}
-	return (len_self < len_other) ? -1 : (len_self > len_other) ? 1 : 0;
+    if (!get_buf(self, &p1, &len_self, ANY_BUFFER))
+        return -1;
+    if (!get_buf(other, &p2, &len_other, ANY_BUFFER))
+        return -1;
+    min_len = (len_self < len_other) ? len_self : len_other;
+    if (min_len > 0) {
+        cmp = memcmp(p1, p2, min_len);
+        if (cmp != 0)
+            return cmp < 0 ? -1 : 1;
+    }
+    return (len_self < len_other) ? -1 : (len_self > len_other) ? 1 : 0;
 }
 
 static PyObject *
 buffer_repr(PyBufferObject *self)
 {
-	const char *status = self->b_readonly ? "read-only" : "read-write";
+    const char *status = self->b_readonly ? "read-only" : "read-write";
 
     if ( self->b_base == NULL )
-		return PyString_FromFormat("<%s buffer ptr %p, size %zd at %p>",
-					   status,
-					   self->b_ptr,
-					   self->b_size,
-					   self);
-	else
-		return PyString_FromFormat(
-			"<%s buffer for %p, size %zd, offset %zd at %p>",
-			status,
-			self->b_base,
-			self->b_size,
-			self->b_offset,
-			self);
+        return PyUnicode_FromFormat(
+            "<%s buffer ptr %p, size %zd at %p>",
+            status,
+            self->b_ptr,
+            self->b_size,
+            self);
+    else
+        return PyUnicode_FromFormat(
+            "<%s buffer for %p, size %zd, offset %zd at %p>",
+            status,
+            self->b_base,
+            self->b_size,
+            self->b_offset,
+            self);
 }
 
 static long
 buffer_hash(PyBufferObject *self)
 {
-	void *ptr;
-	Py_ssize_t size;
-	register Py_ssize_t len;
-	register unsigned char *p;
-	register long x;
+    void *ptr;
+    Py_ssize_t size;
+    register Py_ssize_t len;
+    register unsigned char *p;
+    register long x;
 
-	if ( self->b_hash != -1 )
-		return self->b_hash;
+    if ( self->b_hash != -1 )
+        return self->b_hash;
 
-	/* XXX potential bugs here, a readonly buffer does not imply that the
-	 * underlying memory is immutable.  b_readonly is a necessary but not
-	 * sufficient condition for a buffer to be hashable.  Perhaps it would
-	 * be better to only allow hashing if the underlying object is known to
-	 * be immutable (e.g. PyString_Check() is true).  Another idea would
-	 * be to call tp_hash on the underlying object and see if it raises
-	 * an error. */
-	if ( !self->b_readonly )
-	{
-		PyErr_SetString(PyExc_TypeError,
-				"writable buffers are not hashable");
-		return -1;
-	}
+    /* XXX potential bugs here, a readonly buffer does not imply that the
+     * underlying memory is immutable.  b_readonly is a necessary but not
+     * sufficient condition for a buffer to be hashable.  Perhaps it would
+     * be better to only allow hashing if the underlying object is known to
+     * be immutable (e.g. PyString_Check() is true).  Another idea would
+     * be to call tp_hash on the underlying object and see if it raises
+     * an error. */
+    if ( !self->b_readonly )
+    {
+        PyErr_SetString(PyExc_TypeError,
+                "writable buffers are not hashable");
+        return -1;
+    }
 
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return -1;
-	p = (unsigned char *) ptr;
-	len = size;
-	x = *p << 7;
-	while (--len >= 0)
-		x = (1000003*x) ^ *p++;
-	x ^= size;
-	if (x == -1)
-		x = -2;
-	self->b_hash = x;
-	return x;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return -1;
+    p = (unsigned char *) ptr;
+    len = size;
+    x = *p << 7;
+    while (--len >= 0)
+        x = (1000003*x) ^ *p++;
+    x ^= size;
+    if (x == -1)
+        x = -2;
+    self->b_hash = x;
+    return x;
 }
 
 static PyObject *
 buffer_str(PyBufferObject *self)
 {
-	void *ptr;
-	Py_ssize_t size;
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return NULL;
-	return PyString_FromStringAndSize((const char *)ptr, size);
+    void *ptr;
+    Py_ssize_t size;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return NULL;
+    return PyString_FromStringAndSize((const char *)ptr, size);
 }
 
 /* Sequence methods */
@@ -348,374 +349,374 @@
 static Py_ssize_t
 buffer_length(PyBufferObject *self)
 {
-	void *ptr;
-	Py_ssize_t size;
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return -1;
-	return size;
+    void *ptr;
+    Py_ssize_t size;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return -1;
+    return size;
 }
 
 static PyObject *
 buffer_concat(PyBufferObject *self, PyObject *other)
 {
-	PyBufferProcs *pb = other->ob_type->tp_as_buffer;
-	void *ptr1, *ptr2;
-	char *p;
-	PyObject *ob;
-	Py_ssize_t size, count;
+    PyBufferProcs *pb = other->ob_type->tp_as_buffer;
+    void *ptr1, *ptr2;
+    char *p;
+    PyObject *ob;
+    Py_ssize_t size, count;
 
-	if ( pb == NULL ||
-	     pb->bf_getreadbuffer == NULL ||
-	     pb->bf_getsegcount == NULL )
-	{
-		PyErr_BadArgument();
-		return NULL;
-	}
-	if ( (*pb->bf_getsegcount)(other, NULL) != 1 )
-	{
-		/* ### use a different exception type/message? */
-		PyErr_SetString(PyExc_TypeError,
-				"single-segment buffer object expected");
-		return NULL;
-	}
+    if ( pb == NULL ||
+         pb->bf_getreadbuffer == NULL ||
+         pb->bf_getsegcount == NULL )
+    {
+        PyErr_BadArgument();
+        return NULL;
+    }
+    if ( (*pb->bf_getsegcount)(other, NULL) != 1 )
+    {
+        /* ### use a different exception type/message? */
+        PyErr_SetString(PyExc_TypeError,
+                "single-segment buffer object expected");
+        return NULL;
+    }
 
- 	if (!get_buf(self, &ptr1, &size, ANY_BUFFER))
- 		return NULL;
+     if (!get_buf(self, &ptr1, &size, ANY_BUFFER))
+         return NULL;
  
-	/* optimize special case */
-	if ( size == 0 )
-	{
-	    Py_INCREF(other);
-	    return other;
-	}
+    /* optimize special case */
+    if ( size == 0 )
+    {
+        Py_INCREF(other);
+        return other;
+    }
 
-	if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
-		return NULL;
+    if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
+        return NULL;
 
-	assert(count <= PY_SIZE_MAX - size);
+    assert(count <= PY_SIZE_MAX - size);
 
- 	ob = PyString_FromStringAndSize(NULL, size + count);
-	if ( ob == NULL )
-		return NULL;
- 	p = PyString_AS_STRING(ob);
- 	memcpy(p, ptr1, size);
- 	memcpy(p + size, ptr2, count);
+     ob = PyString_FromStringAndSize(NULL, size + count);
+    if ( ob == NULL )
+        return NULL;
+     p = PyString_AS_STRING(ob);
+     memcpy(p, ptr1, size);
+     memcpy(p + size, ptr2, count);
 
-	/* there is an extra byte in the string object, so this is safe */
-	p[size + count] = '\0';
+    /* there is an extra byte in the string object, so this is safe */
+    p[size + count] = '\0';
 
-	return ob;
+    return ob;
 }
 
 static PyObject *
 buffer_repeat(PyBufferObject *self, Py_ssize_t count)
 {
-	PyObject *ob;
-	register char *p;
-	void *ptr;
-	Py_ssize_t size;
+    PyObject *ob;
+    register char *p;
+    void *ptr;
+    Py_ssize_t size;
 
-	if ( count < 0 )
-		count = 0;
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return NULL;
-	if (count > PY_SSIZE_T_MAX / size) {
-		PyErr_SetString(PyExc_MemoryError, "result too large");
-		return NULL;
-	}
-	ob = PyString_FromStringAndSize(NULL, size * count);
-	if ( ob == NULL )
-		return NULL;
+    if ( count < 0 )
+        count = 0;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return NULL;
+    if (count > PY_SSIZE_T_MAX / size) {
+        PyErr_SetString(PyExc_MemoryError, "result too large");
+        return NULL;
+    }
+    ob = PyString_FromStringAndSize(NULL, size * count);
+    if ( ob == NULL )
+        return NULL;
 
-	p = PyString_AS_STRING(ob);
-	while ( count-- )
-	{
-	    memcpy(p, ptr, size);
-	    p += size;
-	}
+    p = PyString_AS_STRING(ob);
+    while ( count-- )
+    {
+        memcpy(p, ptr, size);
+        p += size;
+    }
 
-	/* there is an extra byte in the string object, so this is safe */
-	*p = '\0';
+    /* there is an extra byte in the string object, so this is safe */
+    *p = '\0';
 
-	return ob;
+    return ob;
 }
 
 static PyObject *
 buffer_item(PyBufferObject *self, Py_ssize_t idx)
 {
-	void *ptr;
-	Py_ssize_t size;
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return NULL;
-	if ( idx < 0 || idx >= size ) {
-		PyErr_SetString(PyExc_IndexError, "buffer index out of range");
-		return NULL;
-	}
-	return PyString_FromStringAndSize((char *)ptr + idx, 1);
+    void *ptr;
+    Py_ssize_t size;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return NULL;
+    if ( idx < 0 || idx >= size ) {
+        PyErr_SetString(PyExc_IndexError, "buffer index out of range");
+        return NULL;
+    }
+    return PyString_FromStringAndSize((char *)ptr + idx, 1);
 }
 
 static PyObject *
 buffer_slice(PyBufferObject *self, Py_ssize_t left, Py_ssize_t right)
 {
-	void *ptr;
-	Py_ssize_t size;
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return NULL;
-	if ( left < 0 )
-		left = 0;
-	if ( right < 0 )
-		right = 0;
-	if ( right > size )
-		right = size;
-	if ( right < left )
-		right = left;
-	return PyString_FromStringAndSize((char *)ptr + left,
-					  right - left);
+    void *ptr;
+    Py_ssize_t size;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return NULL;
+    if ( left < 0 )
+        left = 0;
+    if ( right < 0 )
+        right = 0;
+    if ( right > size )
+        right = size;
+    if ( right < left )
+        right = left;
+    return PyString_FromStringAndSize((char *)ptr + left,
+                      right - left);
 }
 
 static PyObject *
 buffer_subscript(PyBufferObject *self, PyObject *item)
 {
-	void *p;
-	Py_ssize_t size;
-	
-	if (!get_buf(self, &p, &size, ANY_BUFFER))
-		return NULL;
+    void *p;
+    Py_ssize_t size;
+    
+    if (!get_buf(self, &p, &size, ANY_BUFFER))
+        return NULL;
     
     if (PyIndex_Check(item)) {
-		Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
-		if (i == -1 && PyErr_Occurred())
-			return NULL;
-		if (i < 0)
-			i += size;
-		return buffer_item(self, i);
-	}
-	else if (PySlice_Check(item)) {
-		Py_ssize_t start, stop, step, slicelength, cur, i;
+        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+        if (i == -1 && PyErr_Occurred())
+            return NULL;
+        if (i < 0)
+            i += size;
+        return buffer_item(self, i);
+    }
+    else if (PySlice_Check(item)) {
+        Py_ssize_t start, stop, step, slicelength, cur, i;
 
-		if (PySlice_GetIndicesEx((PySliceObject*)item, size,
-				 &start, &stop, &step, &slicelength) < 0) {
-			return NULL;
-		}
+        if (PySlice_GetIndicesEx((PySliceObject*)item, size,
+                 &start, &stop, &step, &slicelength) < 0) {
+            return NULL;
+        }
 
-		if (slicelength <= 0)
-			return PyString_FromStringAndSize("", 0);
-		else if (step == 1)
-			return PyString_FromStringAndSize((char *)p + start,
-							  stop - start);
-		else {
-			PyObject *result;
-			char *source_buf = (char *)p;
-			char *result_buf = (char *)PyMem_Malloc(slicelength);
+        if (slicelength <= 0)
+            return PyString_FromStringAndSize("", 0);
+        else if (step == 1)
+            return PyString_FromStringAndSize((char *)p + start,
+                              stop - start);
+        else {
+            PyObject *result;
+            char *source_buf = (char *)p;
+            char *result_buf = (char *)PyMem_Malloc(slicelength);
 
-			if (result_buf == NULL)
-				return PyErr_NoMemory();
+            if (result_buf == NULL)
+                return PyErr_NoMemory();
 
-			for (cur = start, i = 0; i < slicelength;
-			     cur += step, i++) {
-				result_buf[i] = source_buf[cur];
-			}
+            for (cur = start, i = 0; i < slicelength;
+                 cur += step, i++) {
+                result_buf[i] = source_buf[cur];
+            }
 
-			result = PyString_FromStringAndSize(result_buf,
-							    slicelength);
-			PyMem_Free(result_buf);
-			return result;
-		}
-	}
-	else {
-		PyErr_SetString(PyExc_TypeError,
-				"sequence index must be integer");
-		return NULL;
-	}
+            result = PyString_FromStringAndSize(result_buf,
+                                slicelength);
+            PyMem_Free(result_buf);
+            return result;
+        }
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError,
+                "sequence index must be integer");
+        return NULL;
+    }
 }
 
 static int
 buffer_ass_item(PyBufferObject *self, Py_ssize_t idx, PyObject *other)
 {
-	PyBufferProcs *pb;
-	void *ptr1, *ptr2;
-	Py_ssize_t size;
-	Py_ssize_t count;
+    PyBufferProcs *pb;
+    void *ptr1, *ptr2;
+    Py_ssize_t size;
+    Py_ssize_t count;
 
-	if ( self->b_readonly ) {
-		PyErr_SetString(PyExc_TypeError,
-				"buffer is read-only");
-		return -1;
-	}
+    if ( self->b_readonly ) {
+        PyErr_SetString(PyExc_TypeError,
+                "buffer is read-only");
+        return -1;
+    }
 
-	if (!get_buf(self, &ptr1, &size, ANY_BUFFER))
-		return -1;
+    if (!get_buf(self, &ptr1, &size, ANY_BUFFER))
+        return -1;
 
-	if (idx < 0 || idx >= size) {
-		PyErr_SetString(PyExc_IndexError,
-				"buffer assignment index out of range");
-		return -1;
-	}
+    if (idx < 0 || idx >= size) {
+        PyErr_SetString(PyExc_IndexError,
+                "buffer assignment index out of range");
+        return -1;
+    }
 
-	pb = other ? other->ob_type->tp_as_buffer : NULL;
-	if ( pb == NULL ||
-	     pb->bf_getreadbuffer == NULL ||
-	     pb->bf_getsegcount == NULL )
-	{
-		PyErr_BadArgument();
-		return -1;
-	}
-	if ( (*pb->bf_getsegcount)(other, NULL) != 1 )
-	{
-		/* ### use a different exception type/message? */
-		PyErr_SetString(PyExc_TypeError,
-				"single-segment buffer object expected");
-		return -1;
-	}
+    pb = other ? other->ob_type->tp_as_buffer : NULL;
+    if ( pb == NULL ||
+         pb->bf_getreadbuffer == NULL ||
+         pb->bf_getsegcount == NULL )
+    {
+        PyErr_BadArgument();
+        return -1;
+    }
+    if ( (*pb->bf_getsegcount)(other, NULL) != 1 )
+    {
+        /* ### use a different exception type/message? */
+        PyErr_SetString(PyExc_TypeError,
+                "single-segment buffer object expected");
+        return -1;
+    }
 
-	if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
-		return -1;
-	if ( count != 1 ) {
-		PyErr_SetString(PyExc_TypeError,
-				"right operand must be a single byte");
-		return -1;
-	}
+    if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
+        return -1;
+    if ( count != 1 ) {
+        PyErr_SetString(PyExc_TypeError,
+                "right operand must be a single byte");
+        return -1;
+    }
 
-	((char *)ptr1)[idx] = *(char *)ptr2;
-	return 0;
+    ((char *)ptr1)[idx] = *(char *)ptr2;
+    return 0;
 }
 
 static int
 buffer_ass_slice(PyBufferObject *self, Py_ssize_t left, Py_ssize_t right, PyObject *other)
 {
-	PyBufferProcs *pb;
-	void *ptr1, *ptr2;
-	Py_ssize_t size;
-	Py_ssize_t slice_len;
-	Py_ssize_t count;
+    PyBufferProcs *pb;
+    void *ptr1, *ptr2;
+    Py_ssize_t size;
+    Py_ssize_t slice_len;
+    Py_ssize_t count;
 
-	if ( self->b_readonly ) {
-		PyErr_SetString(PyExc_TypeError,
-				"buffer is read-only");
-		return -1;
-	}
+    if ( self->b_readonly ) {
+        PyErr_SetString(PyExc_TypeError,
+                "buffer is read-only");
+        return -1;
+    }
 
-	pb = other ? other->ob_type->tp_as_buffer : NULL;
-	if ( pb == NULL ||
-	     pb->bf_getreadbuffer == NULL ||
-	     pb->bf_getsegcount == NULL )
-	{
-		PyErr_BadArgument();
-		return -1;
-	}
-	if ( (*pb->bf_getsegcount)(other, NULL) != 1 )
-	{
-		/* ### use a different exception type/message? */
-		PyErr_SetString(PyExc_TypeError,
-				"single-segment buffer object expected");
-		return -1;
-	}
-	if (!get_buf(self, &ptr1, &size, ANY_BUFFER))
-		return -1;
-	if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
-		return -1;
+    pb = other ? other->ob_type->tp_as_buffer : NULL;
+    if ( pb == NULL ||
+         pb->bf_getreadbuffer == NULL ||
+         pb->bf_getsegcount == NULL )
+    {
+        PyErr_BadArgument();
+        return -1;
+    }
+    if ( (*pb->bf_getsegcount)(other, NULL) != 1 )
+    {
+        /* ### use a different exception type/message? */
+        PyErr_SetString(PyExc_TypeError,
+                "single-segment buffer object expected");
+        return -1;
+    }
+    if (!get_buf(self, &ptr1, &size, ANY_BUFFER))
+        return -1;
+    if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
+        return -1;
 
-	if ( left < 0 )
-		left = 0;
-	else if ( left > size )
-		left = size;
-	if ( right < left )
-		right = left;
-	else if ( right > size )
-		right = size;
-	slice_len = right - left;
+    if ( left < 0 )
+        left = 0;
+    else if ( left > size )
+        left = size;
+    if ( right < left )
+        right = left;
+    else if ( right > size )
+        right = size;
+    slice_len = right - left;
 
-	if ( count != slice_len ) {
-		PyErr_SetString(
-			PyExc_TypeError,
-			"right operand length must match slice length");
-		return -1;
-	}
+    if ( count != slice_len ) {
+        PyErr_SetString(
+            PyExc_TypeError,
+            "right operand length must match slice length");
+        return -1;
+    }
 
-	if ( slice_len )
-	    memcpy((char *)ptr1 + left, ptr2, slice_len);
+    if ( slice_len )
+        memcpy((char *)ptr1 + left, ptr2, slice_len);
 
-	return 0;
+    return 0;
 }
 
 static int
 buffer_ass_subscript(PyBufferObject *self, PyObject *item, PyObject *value)
 {
-	PyBufferProcs *pb;
-	void *ptr1, *ptr2;
-	Py_ssize_t selfsize;
-	Py_ssize_t othersize;
+    PyBufferProcs *pb;
+    void *ptr1, *ptr2;
+    Py_ssize_t selfsize;
+    Py_ssize_t othersize;
 
-	if ( self->b_readonly ) {
-		PyErr_SetString(PyExc_TypeError,
-				"buffer is read-only");
-		return -1;
-	}
+    if ( self->b_readonly ) {
+        PyErr_SetString(PyExc_TypeError,
+                "buffer is read-only");
+        return -1;
+    }
 
-	pb = value ? value->ob_type->tp_as_buffer : NULL;
-	if ( pb == NULL ||
-	     pb->bf_getreadbuffer == NULL ||
-	     pb->bf_getsegcount == NULL )
-	{
-		PyErr_BadArgument();
-		return -1;
-	}
-	if ( (*pb->bf_getsegcount)(value, NULL) != 1 )
-	{
-		/* ### use a different exception type/message? */
-		PyErr_SetString(PyExc_TypeError,
-				"single-segment buffer object expected");
-		return -1;
-	}
-	if (!get_buf(self, &ptr1, &selfsize, ANY_BUFFER))
-		return -1;
+    pb = value ? value->ob_type->tp_as_buffer : NULL;
+    if ( pb == NULL ||
+         pb->bf_getreadbuffer == NULL ||
+         pb->bf_getsegcount == NULL )
+    {
+        PyErr_BadArgument();
+        return -1;
+    }
+    if ( (*pb->bf_getsegcount)(value, NULL) != 1 )
+    {
+        /* ### use a different exception type/message? */
+        PyErr_SetString(PyExc_TypeError,
+                "single-segment buffer object expected");
+        return -1;
+    }
+    if (!get_buf(self, &ptr1, &selfsize, ANY_BUFFER))
+        return -1;
     
     if (PyIndex_Check(item)) {
-		Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
-		if (i == -1 && PyErr_Occurred())
-			return -1;
-		if (i < 0)
-			i += selfsize;
-		return buffer_ass_item(self, i, value);
-	}
-	else if (PySlice_Check(item)) {
-		Py_ssize_t start, stop, step, slicelength;
-		
-		if (PySlice_GetIndicesEx((PySliceObject *)item, selfsize,
-				&start, &stop, &step, &slicelength) < 0)
-			return -1;
+        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+        if (i == -1 && PyErr_Occurred())
+            return -1;
+        if (i < 0)
+            i += selfsize;
+        return buffer_ass_item(self, i, value);
+    }
+    else if (PySlice_Check(item)) {
+        Py_ssize_t start, stop, step, slicelength;
+        
+        if (PySlice_GetIndicesEx((PySliceObject *)item, selfsize,
+                &start, &stop, &step, &slicelength) < 0)
+            return -1;
 
-		if ((othersize = (*pb->bf_getreadbuffer)(value, 0, &ptr2)) < 0)
-			return -1;
+        if ((othersize = (*pb->bf_getreadbuffer)(value, 0, &ptr2)) < 0)
+            return -1;
 
-		if (othersize != slicelength) {
-			PyErr_SetString(
-				PyExc_TypeError,
-				"right operand length must match slice length");
-			return -1;
-		}
+        if (othersize != slicelength) {
+            PyErr_SetString(
+                PyExc_TypeError,
+                "right operand length must match slice length");
+            return -1;
+        }
 
-		if (slicelength == 0)
-			return 0;
-		else if (step == 1) {
-			memcpy((char *)ptr1 + start, ptr2, slicelength);
-			return 0;
-		}
-		else {
-			Py_ssize_t cur, i;
-			
-			for (cur = start, i = 0; i < slicelength;
-			     cur += step, i++) {
-				((char *)ptr1)[cur] = ((char *)ptr2)[i];
-			}
+        if (slicelength == 0)
+            return 0;
+        else if (step == 1) {
+            memcpy((char *)ptr1 + start, ptr2, slicelength);
+            return 0;
+        }
+        else {
+            Py_ssize_t cur, i;
+            
+            for (cur = start, i = 0; i < slicelength;
+                 cur += step, i++) {
+                ((char *)ptr1)[cur] = ((char *)ptr2)[i];
+            }
 
-			return 0;
-		}
-	} else {
-		PyErr_SetString(PyExc_TypeError,
-				"buffer indices must be integers");
-		return -1;
-	}
+            return 0;
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+                "buffer indices must be integers");
+        return -1;
+    }
 }
 
 /* Buffer methods */
@@ -723,64 +724,64 @@
 static Py_ssize_t
 buffer_getreadbuf(PyBufferObject *self, Py_ssize_t idx, void **pp)
 {
-	Py_ssize_t size;
-	if ( idx != 0 ) {
-		PyErr_SetString(PyExc_SystemError,
-				"accessing non-existent buffer segment");
-		return -1;
-	}
-	if (!get_buf(self, pp, &size, READ_BUFFER))
-		return -1;
-	return size;
+    Py_ssize_t size;
+    if ( idx != 0 ) {
+        PyErr_SetString(PyExc_SystemError,
+                "accessing non-existent buffer segment");
+        return -1;
+    }
+    if (!get_buf(self, pp, &size, READ_BUFFER))
+        return -1;
+    return size;
 }
 
 static Py_ssize_t
 buffer_getwritebuf(PyBufferObject *self, Py_ssize_t idx, void **pp)
 {
-	Py_ssize_t size;
+    Py_ssize_t size;
 
-	if ( self->b_readonly )
-	{
-		PyErr_SetString(PyExc_TypeError, "buffer is read-only");
-		return -1;
-	}
+    if ( self->b_readonly )
+    {
+        PyErr_SetString(PyExc_TypeError, "buffer is read-only");
+        return -1;
+    }
 
-	if ( idx != 0 ) {
-		PyErr_SetString(PyExc_SystemError,
-				"accessing non-existent buffer segment");
-		return -1;
-	}
-	if (!get_buf(self, pp, &size, WRITE_BUFFER))
-		return -1;
-	return size;
+    if ( idx != 0 ) {
+        PyErr_SetString(PyExc_SystemError,
+                "accessing non-existent buffer segment");
+        return -1;
+    }
+    if (!get_buf(self, pp, &size, WRITE_BUFFER))
+        return -1;
+    return size;
 }
 
 static Py_ssize_t
 buffer_getsegcount(PyBufferObject *self, Py_ssize_t *lenp)
 {
-	void *ptr;
-	Py_ssize_t size;
-	if (!get_buf(self, &ptr, &size, ANY_BUFFER))
-		return -1;
-	if (lenp)
-		*lenp = size;
-	return 1;
+    void *ptr;
+    Py_ssize_t size;
+    if (!get_buf(self, &ptr, &size, ANY_BUFFER))
+        return -1;
+    if (lenp)
+        *lenp = size;
+    return 1;
 }
 
 static Py_ssize_t
 buffer_getcharbuf(PyBufferObject *self, Py_ssize_t idx, const char **pp)
 {
-	void *ptr;
-	Py_ssize_t size;
-	if ( idx != 0 ) {
-		PyErr_SetString(PyExc_SystemError,
-				"accessing non-existent buffer segment");
-		return -1;
-	}
-	if (!get_buf(self, &ptr, &size, CHAR_BUFFER))
-		return -1;
-	*pp = (const char *)ptr;
-	return size;
+    void *ptr;
+    Py_ssize_t size;
+    if ( idx != 0 ) {
+        PyErr_SetString(PyExc_SystemError,
+                "accessing non-existent buffer segment");
+        return -1;
+    }
+    if (!get_buf(self, &ptr, &size, CHAR_BUFFER))
+        return -1;
+    *pp = (const char *)ptr;
+    return size;
 }
 
 void init_bufferobject(void)
@@ -789,67 +790,67 @@
 }
 
 static PySequenceMethods buffer_as_sequence = {
-	(lenfunc)buffer_length, /*sq_length*/
-	(binaryfunc)buffer_concat, /*sq_concat*/
-	(ssizeargfunc)buffer_repeat, /*sq_repeat*/
-	(ssizeargfunc)buffer_item, /*sq_item*/
-	(ssizessizeargfunc)buffer_slice, /*sq_slice*/
-	(ssizeobjargproc)buffer_ass_item, /*sq_ass_item*/
-	(ssizessizeobjargproc)buffer_ass_slice, /*sq_ass_slice*/
+    (lenfunc)buffer_length, /*sq_length*/
+    (binaryfunc)buffer_concat, /*sq_concat*/
+    (ssizeargfunc)buffer_repeat, /*sq_repeat*/
+    (ssizeargfunc)buffer_item, /*sq_item*/
+    (ssizessizeargfunc)buffer_slice, /*sq_slice*/
+    (ssizeobjargproc)buffer_ass_item, /*sq_ass_item*/
+    (ssizessizeobjargproc)buffer_ass_slice, /*sq_ass_slice*/
 };
 
 static PyMappingMethods buffer_as_mapping = {
-	(lenfunc)buffer_length,
-	(binaryfunc)buffer_subscript,
-	(objobjargproc)buffer_ass_subscript,
+    (lenfunc)buffer_length,
+    (binaryfunc)buffer_subscript,
+    (objobjargproc)buffer_ass_subscript,
 };
 
 static PyBufferProcs buffer_as_buffer = {
-	(readbufferproc)buffer_getreadbuf,
-	(writebufferproc)buffer_getwritebuf,
-	(segcountproc)buffer_getsegcount,
-	(charbufferproc)buffer_getcharbuf,
+    (readbufferproc)buffer_getreadbuf,
+    (writebufferproc)buffer_getwritebuf,
+    (segcountproc)buffer_getsegcount,
+    (charbufferproc)buffer_getcharbuf,
 };
 
 PyTypeObject PyBuffer_Type = {
     PyObject_HEAD_INIT(NULL)
     0,
-	"buffer",
-	sizeof(PyBufferObject),
-	0,
-	(destructor)buffer_dealloc, 		/* tp_dealloc */
-	0,					/* tp_print */
-	0,					/* tp_getattr */
-	0,					/* tp_setattr */
-	(cmpfunc)buffer_compare,		/* tp_compare */
-	(reprfunc)buffer_repr,			/* tp_repr */
-	0,					/* tp_as_number */
-	&buffer_as_sequence,			/* tp_as_sequence */
-	&buffer_as_mapping,			/* tp_as_mapping */
-	(hashfunc)buffer_hash,			/* tp_hash */
-	0,					/* tp_call */
-	(reprfunc)buffer_str,			/* tp_str */
-	PyObject_GenericGetAttr,		/* tp_getattro */
-	0,					/* tp_setattro */
-	&buffer_as_buffer,			/* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
-	buffer_doc,				/* tp_doc */
-	0,					/* tp_traverse */
-	0,					/* tp_clear */
-	0,					/* tp_richcompare */
-	0,					/* tp_weaklistoffset */
-	0,					/* tp_iter */
-	0,					/* tp_iternext */
-	0,					/* tp_methods */	
-	0,					/* tp_members */
-	0,					/* tp_getset */
-	0,					/* tp_base */
-	0,					/* tp_dict */
-	0,					/* tp_descr_get */
-	0,					/* tp_descr_set */
-	0,					/* tp_dictoffset */
-	0,					/* tp_init */
-	0,					/* tp_alloc */
-	buffer_new,				/* tp_new */
+    "buffer",
+    sizeof(PyBufferObject),
+    0,
+    (destructor)buffer_dealloc,   /* tp_dealloc */
+    0,                            /* tp_print */
+    0,                            /* tp_getattr */
+    0,                            /* tp_setattr */
+    (cmpfunc)buffer_compare,      /* tp_compare */
+    (reprfunc)buffer_repr,        /* tp_repr */
+    0,                            /* tp_as_number */
+    &buffer_as_sequence,          /* tp_as_sequence */
+    &buffer_as_mapping,           /* tp_as_mapping */
+    (hashfunc)buffer_hash,        /* tp_hash */
+    0,                            /* tp_call */
+    (reprfunc)buffer_str,         /* tp_str */
+    PyObject_GenericGetAttr,      /* tp_getattro */
+    0,                            /* tp_setattro */
+    &buffer_as_buffer,            /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
+    buffer_doc,                   /* tp_doc */
+    0,                            /* tp_traverse */
+    0,                            /* tp_clear */
+    0,                            /* tp_richcompare */
+    0,                            /* tp_weaklistoffset */
+    0,                            /* tp_iter */
+    0,                            /* tp_iternext */
+    0,                            /* tp_methods */    
+    0,                            /* tp_members */
+    0,                            /* tp_getset */
+    0,                            /* tp_base */
+    0,                            /* tp_dict */
+    0,                            /* tp_descr_get */
+    0,                            /* tp_descr_set */
+    0,                            /* tp_dictoffset */
+    0,                            /* tp_init */
+    0,                            /* tp_alloc */
+    buffer_new,                   /* tp_new */
 };
 
diff --git a/pypy/module/cpyext/src/capsule.c b/pypy/module/cpyext/src/capsule.c
--- a/pypy/module/cpyext/src/capsule.c
+++ b/pypy/module/cpyext/src/capsule.c
@@ -279,7 +279,7 @@
         name = "NULL";
     }
 
-    return PyString_FromFormat("<capsule object %s%s%s at %p>",
+    return PyUnicode_FromFormat("<capsule object %s%s%s at %p>",
         quote, name, quote, capsule);
 }
 
@@ -298,27 +298,27 @@
 
 PyTypeObject PyCapsule_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "PyCapsule",		/*tp_name*/
-    sizeof(PyCapsule),		/*tp_basicsize*/
-    0,				/*tp_itemsize*/
+    "PyCapsule",                /*tp_name*/
+    sizeof(PyCapsule),          /*tp_basicsize*/
+    0,                          /*tp_itemsize*/
     /* methods */
     capsule_dealloc, /*tp_dealloc*/
-    0,				/*tp_print*/
-    0,				/*tp_getattr*/
-    0,				/*tp_setattr*/
-    0,				/*tp_reserved*/
+    0,                          /*tp_print*/
+    0,                          /*tp_getattr*/
+    0,                          /*tp_setattr*/
+    0,                          /*tp_reserved*/
     capsule_repr, /*tp_repr*/
-    0,				/*tp_as_number*/
-    0,				/*tp_as_sequence*/
-    0,				/*tp_as_mapping*/
-    0,				/*tp_hash*/
-    0,				/*tp_call*/
-    0,				/*tp_str*/
-    0,				/*tp_getattro*/
-    0,				/*tp_setattro*/
-    0,				/*tp_as_buffer*/
-    0,				/*tp_flags*/
-    PyCapsule_Type__doc__	/*tp_doc*/
+    0,                          /*tp_as_number*/
+    0,                          /*tp_as_sequence*/
+    0,                          /*tp_as_mapping*/
+    0,                          /*tp_hash*/
+    0,                          /*tp_call*/
+    0,                          /*tp_str*/
+    0,                          /*tp_getattro*/
+    0,                          /*tp_setattro*/
+    0,                          /*tp_as_buffer*/
+    0,                          /*tp_flags*/
+    PyCapsule_Type__doc__       /*tp_doc*/
 };
 
 void init_capsule()
diff --git a/pypy/module/cpyext/src/pyerrors.c b/pypy/module/cpyext/src/pyerrors.c
--- a/pypy/module/cpyext/src/pyerrors.c
+++ b/pypy/module/cpyext/src/pyerrors.c
@@ -4,75 +4,79 @@
 PyObject *
 PyErr_Format(PyObject *exception, const char *format, ...)
 {
-	va_list vargs;
-	PyObject* string;
+    va_list vargs;
+    PyObject* string;
 
 #ifdef HAVE_STDARG_PROTOTYPES
-	va_start(vargs, format);
+    va_start(vargs, format);
 #else
-  va_start(vargs);
+    va_start(vargs);
 #endif
 
-	string = PyString_FromFormatV(format, vargs);
-	PyErr_SetObject(exception, string);
-	Py_XDECREF(string);
-	va_end(vargs);
-	return NULL;
+    string = PyUnicode_FromFormatV(format, vargs);
+    PyErr_SetObject(exception, string);
+    Py_XDECREF(string);
+    va_end(vargs);
+    return NULL;
 }
 
+
+
 PyObject *
 PyErr_NewException(const char *name, PyObject *base, PyObject *dict)
 {
-	char *dot;
-	PyObject *modulename = NULL;
-	PyObject *classname = NULL;
-	PyObject *mydict = NULL;
-	PyObject *bases = NULL;
-	PyObject *result = NULL;
-	dot = strrchr(name, '.');
-	if (dot == NULL) {
-		PyErr_SetString(PyExc_SystemError,
-			"PyErr_NewException: name must be module.class");
-		return NULL;
-	}
-	if (base == NULL)
-		base = PyExc_Exception;
-	if (dict == NULL) {
-		dict = mydict = PyDict_New();
-		if (dict == NULL)
-			goto failure;
-	}
-	if (PyDict_GetItemString(dict, "__module__") == NULL) {
-		modulename = PyString_FromStringAndSize(name,
-						     (Py_ssize_t)(dot-name));
-		if (modulename == NULL)
-			goto failure;
-		if (PyDict_SetItemString(dict, "__module__", modulename) != 0)
-			goto failure;
-	}
-	if (PyTuple_Check(base)) {
-		bases = base;
-		/* INCREF as we create a new ref in the else branch */
-		Py_INCREF(bases);
-	} else {
-		bases = PyTuple_Pack(1, base);
-		if (bases == NULL)
-			goto failure;
-	}
-	/* Create a real new-style class. */
-	result = PyObject_CallFunction((PyObject *)&PyType_Type, "sOO",
-				       dot+1, bases, dict);
+    const char *dot;
+    PyObject *modulename = NULL;
+    PyObject *classname = NULL;
+    PyObject *mydict = NULL;
+    PyObject *bases = NULL;
+    PyObject *result = NULL;
+    dot = strrchr(name, '.');
+    if (dot == NULL) {
+        PyErr_SetString(PyExc_SystemError,
+            "PyErr_NewException: name must be module.class");
+        return NULL;
+    }
+    if (base == NULL)
+        base = PyExc_Exception;
+    if (dict == NULL) {
+        dict = mydict = PyDict_New();
+        if (dict == NULL)
+            goto failure;
+    }
+    if (PyDict_GetItemString(dict, "__module__") == NULL) {
+        modulename = PyUnicode_FromStringAndSize(name,
+                                             (Py_ssize_t)(dot-name));
+        if (modulename == NULL)
+            goto failure;
+        if (PyDict_SetItemString(dict, "__module__", modulename) != 0)
+            goto failure;
+    }
+    if (PyTuple_Check(base)) {
+        bases = base;
+        /* INCREF as we create a new ref in the else branch */
+        Py_INCREF(bases);
+    } else {
+        bases = PyTuple_Pack(1, base);
+        if (bases == NULL)
+            goto failure;
+    }
+    /* Create a real new-style class. */
+    result = PyObject_CallFunction((PyObject *)&PyType_Type, "sOO",
+                                   dot+1, bases, dict);
   failure:
-	Py_XDECREF(bases);
-	Py_XDECREF(mydict);
-	Py_XDECREF(classname);
-	Py_XDECREF(modulename);
-	return result;
+    Py_XDECREF(bases);
+    Py_XDECREF(mydict);
+    Py_XDECREF(classname);
+    Py_XDECREF(modulename);
+    return result;
 }
 
+
 /* Create an exception with docstring */
 PyObject *
-PyErr_NewExceptionWithDoc(const char *name, const char *doc, PyObject *base, PyObject *dict)
+PyErr_NewExceptionWithDoc(const char *name, const char *doc,
+                          PyObject *base, PyObject *dict)
 {
     int result;
     PyObject *ret = NULL;
@@ -87,7 +91,7 @@
     }
 
     if (doc != NULL) {
-        docobj = PyString_FromString(doc);
+        docobj = PyUnicode_FromString(doc);
         if (docobj == NULL)
             goto failure;
         result = PyDict_SetItemString(dict, "__doc__", docobj);
diff --git a/pypy/module/cpyext/src/stringobject.c b/pypy/module/cpyext/src/unicodeobject.c
rename from pypy/module/cpyext/src/stringobject.c
rename to pypy/module/cpyext/src/unicodeobject.c
--- a/pypy/module/cpyext/src/stringobject.c
+++ b/pypy/module/cpyext/src/unicodeobject.c
@@ -1,249 +1,522 @@
-
 #include "Python.h"
 
+#if defined(Py_ISDIGIT) || defined(Py_ISALPHA)
+#error remove these definitions
+#endif
+#define Py_ISDIGIT isdigit
+#define Py_ISALPHA isalpha
+
+#define PyObject_Malloc malloc
+#define PyObject_Free free
+
+static void
+makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
+        int zeropad, int width, int precision, char c)
+{
+    *fmt++ = '%';
+    if (width) {
+        if (zeropad)
+            *fmt++ = '0';
+        fmt += sprintf(fmt, "%d", width);
+    }
+    if (precision)
+        fmt += sprintf(fmt, ".%d", precision);
+    if (longflag)
+        *fmt++ = 'l';
+    else if (longlongflag) {
+        /* longlongflag should only ever be nonzero on machines with
+           HAVE_LONG_LONG defined */
+#ifdef HAVE_LONG_LONG
+        char *f = PY_FORMAT_LONG_LONG;
+        while (*f)
+            *fmt++ = *f++;
+#else
+        /* we shouldn't ever get here */
+        assert(0);
+        *fmt++ = 'l';
+#endif
+    }
+    else if (size_tflag) {
+        char *f = PY_FORMAT_SIZE_T;
+        while (*f)
+            *fmt++ = *f++;
+    }
+    *fmt++ = c;
+    *fmt = '\0';
+}
+
+#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
+
+/* size of fixed-size buffer for formatting single arguments */
+#define ITEM_BUFFER_LEN 21
+/* maximum number of characters required for output of %ld.  21 characters
+   allows for 64-bit integers (in decimal) and an optional sign. */
+#define MAX_LONG_CHARS 21
+/* maximum number of characters required for output of %lld.
+   We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
+   plus 1 for the sign.  53/22 is an upper bound for log10(256). */
+#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+
 PyObject *
-PyString_FromFormatV(const char *format, va_list vargs)
+PyUnicode_FromFormatV(const char *format, va_list vargs)
 {
-	va_list count;
-	Py_ssize_t n = 0;
-	const char* f;
-	char *s;
-	PyObject* string;
+    va_list count;
+    Py_ssize_t callcount = 0;
+    PyObject **callresults = NULL;
+    PyObject **callresult = NULL;
+    Py_ssize_t n = 0;
+    int width = 0;
+    int precision = 0;
+    int zeropad;
+    const char* f;
+    Py_UNICODE *s;
+    PyObject *string;
+    /* used by sprintf */
+    char buffer[ITEM_BUFFER_LEN+1];
+    /* use abuffer instead of buffer, if we need more space
+     * (which can happen if there's a format specifier with width). */
+    char *abuffer = NULL;
+    char *realbuffer;
+    Py_ssize_t abuffersize = 0;
+    char fmt[61]; /* should be enough for %0width.precisionlld */
+    const char *copy;
 
-#ifdef VA_LIST_IS_ARRAY
-	Py_MEMCPY(count, vargs, sizeof(va_list));
+    Py_VA_COPY(count, vargs);
+    /* step 1: count the number of %S/%R/%A/%s format specifications
+     * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
+     * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
+     * result in an array) */
+    for (f = format; *f; f++) {
+         if (*f == '%') {
+             if (*(f+1)=='%')
+                 continue;
+             if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
+                 ++callcount;
+             while (Py_ISDIGIT((unsigned)*f))
+                 width = (width*10) + *f++ - '0';
+             while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
+                 ;
+             if (*f == 's')
+                 ++callcount;
+         }
+         else if (128 <= (unsigned char)*f) {
+             PyErr_Format(PyExc_ValueError,
+                "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+                "string, got a non-ASCII byte: 0x%02x",
+                (unsigned char)*f);
+             return NULL;
+         }
+    }
+    /* step 2: allocate memory for the results of
+     * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
+    if (callcount) {
+        callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
+        if (!callresults) {
+            PyErr_NoMemory();
+            return NULL;
+        }
+        callresult = callresults;
+    }
+    /* step 3: figure out how large a buffer we need */
+    for (f = format; *f; f++) {
+        if (*f == '%') {
+#ifdef HAVE_LONG_LONG
+            int longlongflag = 0;
+#endif
+            const char* p = f;
+            width = 0;
+            while (Py_ISDIGIT((unsigned)*f))
+                width = (width*10) + *f++ - '0';
+            while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
+                ;
+
+            /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
+             * they don't affect the amount of space we reserve.
+             */
+            if (*f == 'l') {
+                if (f[1] == 'd' || f[1] == 'u') {
+                    ++f;
+                }
+#ifdef HAVE_LONG_LONG
+                else if (f[1] == 'l' &&
+                         (f[2] == 'd' || f[2] == 'u')) {
+                    longlongflag = 1;
+                    f += 2;
+                }
+#endif
+            }
+            else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+                ++f;
+            }
+
+            switch (*f) {
+            case 'c':
+            {
+#ifndef Py_UNICODE_WIDE
+                int ordinal = va_arg(count, int);
+                if (ordinal > 0xffff)
+                    n += 2;
+                else
+                    n++;
 #else
-#ifdef  __va_copy
-	__va_copy(count, vargs);
-#else
-	count = vargs;
+                (void)va_arg(count, int);
+                n++;
 #endif
+                break;
+            }
+            case '%':
+                n++;
+                break;
+            case 'd': case 'u': case 'i': case 'x':
+                (void) va_arg(count, int);
+#ifdef HAVE_LONG_LONG
+                if (longlongflag) {
+                    if (width < MAX_LONG_LONG_CHARS)
+                        width = MAX_LONG_LONG_CHARS;
+                }
+                else
 #endif
-	/* step 1: figure out how large a buffer we need */
-	for (f = format; *f; f++) {
-		if (*f == '%') {
+                    /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
+                       including sign.  Decimal takes the most space.  This
+                       isn't enough for octal.  If a width is specified we
+                       need more (which we allocate later). */
+                    if (width < MAX_LONG_CHARS)
+                        width = MAX_LONG_CHARS;
+                n += width;
+                /* XXX should allow for large precision here too. */
+                if (abuffersize < width)
+                    abuffersize = width;
+                break;
+            case 's':
+            {
+                /* UTF-8 */
+                const char *s = va_arg(count, const char*);
+                PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
+                if (!str)
+                    goto fail;
+                n += PyUnicode_GET_SIZE(str);
+                /* Remember the str and switch to the next slot */
+                *callresult++ = str;
+                break;
+            }
+            case 'U':
+            {
+                PyObject *obj = va_arg(count, PyObject *);
+                assert(obj && PyUnicode_Check(obj));
+                n += PyUnicode_GET_SIZE(obj);
+                break;
+            }
+            case 'V':
+            {
+                PyObject *obj = va_arg(count, PyObject *);
+                const char *str = va_arg(count, const char *);
+                PyObject *str_obj;
+                assert(obj || str);
+                assert(!obj || PyUnicode_Check(obj));
+                if (obj) {
+                    n += PyUnicode_GET_SIZE(obj);
+                    *callresult++ = NULL;
+                }
+                else {
+                    str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace");
+                    if (!str_obj)
+                        goto fail;
+                    n += PyUnicode_GET_SIZE(str_obj);
+                    *callresult++ = str_obj;
+                }
+                break;
+            }
+            case 'S':
+            {
+                PyObject *obj = va_arg(count, PyObject *);
+                PyObject *str;
+                assert(obj);
+                str = PyObject_Str(obj);
+                if (!str)
+                    goto fail;
+                n += PyUnicode_GET_SIZE(str);
+                /* Remember the str and switch to the next slot */
+                *callresult++ = str;
+                break;
+            }
+            case 'R':
+            {
+                PyObject *obj = va_arg(count, PyObject *);
+                PyObject *repr;
+                assert(obj);
+                repr = PyObject_Repr(obj);
+                if (!repr)
+                    goto fail;
+                n += PyUnicode_GET_SIZE(repr);
+                /* Remember the repr and switch to the next slot */
+                *callresult++ = repr;
+                break;
+            }
+            case 'A':
+            {
+                PyObject *obj = va_arg(count, PyObject *);
+                PyObject *ascii;
+                assert(obj);
+                ascii = PyObject_ASCII(obj);
+                if (!ascii)
+                    goto fail;
+                n += PyUnicode_GET_SIZE(ascii);
+                /* Remember the repr and switch to the next slot */
+                *callresult++ = ascii;
+                break;
+            }
+            case 'p':
+                (void) va_arg(count, int);
+                /* maximum 64-bit pointer representation:
+                 * 0xffffffffffffffff
+                 * so 19 characters is enough.
+                 * XXX I count 18 -- what's the extra for?
+                 */
+                n += 19;
+                break;
+            default:
+                /* if we stumble upon an unknown
+                   formatting code, copy the rest of
+                   the format string to the output
+                   string. (we cannot just skip the
+                   code, since there's no way to know
+                   what's in the argument list) */
+                n += strlen(p);
+                goto expand;
+            }
+        } else
+            n++;
+    }
+  expand:
+    if (abuffersize > ITEM_BUFFER_LEN) {
+        /* add 1 for sprintf's trailing null byte */
+        abuffer = PyObject_Malloc(abuffersize + 1);
+        if (!abuffer) {
+            PyErr_NoMemory();
+            goto fail;
+        }
+        realbuffer = abuffer;
+    }
+    else
+        realbuffer = buffer;
+    /* step 4: fill the buffer */
+    /* Since we've analyzed how much space we need for the worst case,
+       we don't have to resize the string.
+       There can be no errors beyond this point. */
+    string = PyUnicode_FromUnicode(NULL, n);
+    if (!string)
+        goto fail;
+
+    s = PyUnicode_AS_UNICODE(string);
+    callresult = callresults;
+
+    for (f = format; *f; f++) {
+        if (*f == '%') {
+            const char* p = f++;
+            int longflag = 0;
+            int longlongflag = 0;
+            int size_tflag = 0;
+            zeropad = (*f == '0');
+            /* parse the width.precision part */
+            width = 0;
+            while (Py_ISDIGIT((unsigned)*f))
+                width = (width*10) + *f++ - '0';
+            precision = 0;
+            if (*f == '.') {
+                f++;
+                while (Py_ISDIGIT((unsigned)*f))
+                    precision = (precision*10) + *f++ - '0';
+            }
+            /* Handle %ld, %lu, %lld and %llu. */
+            if (*f == 'l') {
+                if (f[1] == 'd' || f[1] == 'u') {
+                    longflag = 1;
+                    ++f;
+                }
 #ifdef HAVE_LONG_LONG
-			int longlongflag = 0;
+                else if (f[1] == 'l' &&
+                         (f[2] == 'd' || f[2] == 'u')) {
+                    longlongflag = 1;
+                    f += 2;
+                }
 #endif
-			const char* p = f;
-			while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
-				;
+            }
+            /* handle the size_t flag. */
+            if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+                size_tflag = 1;
+                ++f;
+            }
 
-			/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
-			 * they don't affect the amount of space we reserve.
-			 */
-			if (*f == 'l') {
-				if (f[1] == 'd' || f[1] == 'u') {
-					++f;
-				}
+            switch (*f) {
+            case 'c':
+            {
+                int ordinal = va_arg(vargs, int);
+#ifndef Py_UNICODE_WIDE
+                if (ordinal > 0xffff) {
+                    ordinal -= 0x10000;
+                    *s++ = 0xD800 | (ordinal >> 10);
+                    *s++ = 0xDC00 | (ordinal & 0x3FF);
+                } else
+#endif
+                *s++ = ordinal;
+                break;
+            }
+            case 'd':
+                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
+                        width, precision, 'd');
+                if (longflag)
+                    sprintf(realbuffer, fmt, va_arg(vargs, long));
 #ifdef HAVE_LONG_LONG
-				else if (f[1] == 'l' &&
-					 (f[2] == 'd' || f[2] == 'u')) {
-					longlongflag = 1;
-					f += 2;
-				}
+                else if (longlongflag)
+                    sprintf(realbuffer, fmt, va_arg(vargs, PY_LONG_LONG));
 #endif
-			}
-			else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
-				++f;
-			}
+                else if (size_tflag)
+                    sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t));
+                else
+                    sprintf(realbuffer, fmt, va_arg(vargs, int));
+                appendstring(realbuffer);
+                break;
+            case 'u':
+                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
+                        width, precision, 'u');
+                if (longflag)
+                    sprintf(realbuffer, fmt, va_arg(vargs, unsigned long));
+#ifdef HAVE_LONG_LONG
+                else if (longlongflag)
+                    sprintf(realbuffer, fmt, va_arg(vargs,
+                                                    unsigned PY_LONG_LONG));
+#endif
+                else if (size_tflag)
+                    sprintf(realbuffer, fmt, va_arg(vargs, size_t));
+                else
+                    sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
+                appendstring(realbuffer);
+                break;
+            case 'i':
+                makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'i');
+                sprintf(realbuffer, fmt, va_arg(vargs, int));
+                appendstring(realbuffer);
+                break;
+            case 'x':
+                makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
+                sprintf(realbuffer, fmt, va_arg(vargs, int));
+                appendstring(realbuffer);
+                break;
+            case 's':
+            {
+                /* unused, since we already have the result */
+                (void) va_arg(vargs, char *);
+                Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
+                                PyUnicode_GET_SIZE(*callresult));
+                s += PyUnicode_GET_SIZE(*callresult);
+                /* We're done with the unicode()/repr() => forget it */
+                Py_DECREF(*callresult);
+                /* switch to next unicode()/repr() result */
+                ++callresult;
+                break;
+            }
+            case 'U':
+            {
+                PyObject *obj = va_arg(vargs, PyObject *);
+                Py_ssize_t size = PyUnicode_GET_SIZE(obj);
+                Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
+                s += size;
+                break;
+            }
+            case 'V':
+            {
+                PyObject *obj = va_arg(vargs, PyObject *);
+                va_arg(vargs, const char *);
+                if (obj) {
+                    Py_ssize_t size = PyUnicode_GET_SIZE(obj);
+                    Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
+                    s += size;
+                } else {
+                    Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
+                                    PyUnicode_GET_SIZE(*callresult));
+                    s += PyUnicode_GET_SIZE(*callresult);
+                    Py_DECREF(*callresult);
+                }
+                ++callresult;
+                break;
+            }
+            case 'S':
+            case 'R':
+            case 'A':
+            {
+                Py_UNICODE *ucopy;
+                Py_ssize_t usize;
+                Py_ssize_t upos;
+                /* unused, since we already have the result */
+                (void) va_arg(vargs, PyObject *);
+                ucopy = PyUnicode_AS_UNICODE(*callresult);
+                usize = PyUnicode_GET_SIZE(*callresult);
+                for (upos = 0; upos<usize;)
+                    *s++ = ucopy[upos++];
+                /* We're done with the unicode()/repr() => forget it */
+                Py_DECREF(*callresult);
+                /* switch to next unicode()/repr() result */
+                ++callresult;
+                break;
+            }
+            case 'p':
+                sprintf(buffer, "%p", va_arg(vargs, void*));
+                /* %p is ill-defined:  ensure leading 0x. */
+                if (buffer[1] == 'X')
+                    buffer[1] = 'x';
+                else if (buffer[1] != 'x') {
+                    memmove(buffer+2, buffer, strlen(buffer)+1);
+                    buffer[0] = '0';
+                    buffer[1] = 'x';
+                }
+                appendstring(buffer);
+                break;
+            case '%':
+                *s++ = '%';
+                break;
+            default:
+                appendstring(p);
+                goto end;
+            }
+        }
+        else
+            *s++ = *f;
+    }
 
-			switch (*f) {
-			case 'c':
-				(void)va_arg(count, int);
-				/* fall through... */
-			case '%':
-				n++;
-				break;
-			case 'd': case 'u': case 'i': case 'x':
-				(void) va_arg(count, int);
-#ifdef HAVE_LONG_LONG
-				/* Need at most
-				   ceil(log10(256)*SIZEOF_LONG_LONG) digits,
-				   plus 1 for the sign.  53/22 is an upper
-				   bound for log10(256). */
-				if (longlongflag)
-					n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
-				else
-#endif
-					/* 20 bytes is enough to hold a 64-bit
-					   integer.  Decimal takes the most
-					   space.  This isn't enough for
-					   octal. */
-					n += 20;
-
-				break;
-			case 's':
-				s = va_arg(count, char*);
-				n += strlen(s);
-				break;
-			case 'p':
-				(void) va_arg(count, int);
-				/* maximum 64-bit pointer representation:
-				 * 0xffffffffffffffff
-				 * so 19 characters is enough.
-				 * XXX I count 18 -- what's the extra for?
-				 */
-				n += 19;
-				break;
-			default:
-				/* if we stumble upon an unknown
-				   formatting code, copy the rest of
-				   the format string to the output
-				   string. (we cannot just skip the
-				   code, since there's no way to know
-				   what's in the argument list) */
-				n += strlen(p);
-				goto expand;
-			}
-		} else
-			n++;
-	}
- expand:
-	/* step 2: fill the buffer */
-	/* Since we've analyzed how much space we need for the worst case,
-	   use sprintf directly instead of the slower PyOS_snprintf. */
-	string = PyString_FromStringAndSize(NULL, n);
-	if (!string)
-		return NULL;
-
-	s = PyString_AsString(string);
-
-	for (f = format; *f; f++) {
-		if (*f == '%') {
-			const char* p = f++;
-			Py_ssize_t i;
-			int longflag = 0;
-#ifdef HAVE_LONG_LONG
-			int longlongflag = 0;
-#endif
-			int size_tflag = 0;
-			/* parse the width.precision part (we're only
-			   interested in the precision value, if any) */
-			n = 0;
-			while (isdigit(Py_CHARMASK(*f)))
-				n = (n*10) + *f++ - '0';
-			if (*f == '.') {
-				f++;
-				n = 0;
-				while (isdigit(Py_CHARMASK(*f)))
-					n = (n*10) + *f++ - '0';
-			}
-			while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
-				f++;
-			/* Handle %ld, %lu, %lld and %llu. */
-			if (*f == 'l') {
-				if (f[1] == 'd' || f[1] == 'u') {
-					longflag = 1;
-					++f;
-				}
-#ifdef HAVE_LONG_LONG
-				else if (f[1] == 'l' &&
-					 (f[2] == 'd' || f[2] == 'u')) {
-					longlongflag = 1;
-					f += 2;
-				}
-#endif
-			}
-			/* handle the size_t flag. */
-			else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
-				size_tflag = 1;
-				++f;
-			}
-
-			switch (*f) {
-			case 'c':
-				*s++ = va_arg(vargs, int);
-				break;
-			case 'd':
-				if (longflag)
-					sprintf(s, "%ld", va_arg(vargs, long));
-#ifdef HAVE_LONG_LONG
-				else if (longlongflag)
-					sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
-						va_arg(vargs, PY_LONG_LONG));
-#endif
-				else if (size_tflag)
-					sprintf(s, "%" PY_FORMAT_SIZE_T "d",
-					        va_arg(vargs, Py_ssize_t));
-				else
-					sprintf(s, "%d", va_arg(vargs, int));
-				s += strlen(s);
-				break;
-			case 'u':
-				if (longflag)
-					sprintf(s, "%lu",
-						va_arg(vargs, unsigned long));
-#ifdef HAVE_LONG_LONG
-				else if (longlongflag)
-					sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
-						va_arg(vargs, PY_LONG_LONG));
-#endif
-				else if (size_tflag)
-					sprintf(s, "%" PY_FORMAT_SIZE_T "u",
-					        va_arg(vargs, size_t));
-				else
-					sprintf(s, "%u",
-						va_arg(vargs, unsigned int));
-				s += strlen(s);
-				break;
-			case 'i':
-				sprintf(s, "%i", va_arg(vargs, int));
-				s += strlen(s);
-				break;
-			case 'x':
-				sprintf(s, "%x", va_arg(vargs, int));
-				s += strlen(s);
-				break;
-			case 's':
-				p = va_arg(vargs, char*);
-				i = strlen(p);
-				if (n > 0 && i > n)
-					i = n;
-				Py_MEMCPY(s, p, i);
-				s += i;
-				break;
-			case 'p':
-				sprintf(s, "%p", va_arg(vargs, void*));
-				/* %p is ill-defined:  ensure leading 0x. */
-				if (s[1] == 'X')
-					s[1] = 'x';
-				else if (s[1] != 'x') {
-					memmove(s+2, s, strlen(s)+1);
-					s[0] = '0';
-					s[1] = 'x';
-				}
-				s += strlen(s);
-				break;
-			case '%':
-				*s++ = '%';
-				break;
-			default:
-				strcpy(s, p);
-				s += strlen(s);
-				goto end;
-			}
-		} else
-			*s++ = *f;
-	}
-
- end:
-	_PyString_Resize(&string, s - PyString_AS_STRING(string));
-	return string;
+  end:
+    if (callresults)
+        PyObject_Free(callresults);
+    if (abuffer)
+        PyObject_Free(abuffer);
+    PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
+    return string;
+  fail:
+    if (callresults) {
+        PyObject **callresult2 = callresults;
+        while (callresult2 < callresult) {
+            Py_XDECREF(*callresult2);
+            ++callresult2;
+        }
+        PyObject_Free(callresults);
+    }
+    if (abuffer)
+        PyObject_Free(abuffer);
+    return NULL;
 }
 
 PyObject *
-PyString_FromFormat(const char *format, ...)
+PyUnicode_FromFormat(const char *format, ...)
 {
-	PyObject* ret;
-	va_list vargs;
+    PyObject* ret;
+    va_list vargs;
 
 #ifdef HAVE_STDARG_PROTOTYPES
-	va_start(vargs, format);
+    va_start(vargs, format);
 #else
-	va_start(vargs);
+    va_start(vargs);
 #endif
-	ret = PyString_FromFormatV(format, vargs);
-	va_end(vargs);
-	return ret;
+    ret = PyUnicode_FromFormatV(format, vargs);
+    va_end(vargs);
+    return ret;
 }
+
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -690,7 +690,7 @@
         mod = self.import_extension('foo', [
             ('newexc', 'METH_VARARGS',
              '''
-             char *name = PyString_AsString(PyTuple_GetItem(args, 0));
+             char *name = _PyUnicode_AsString(PyTuple_GetItem(args, 0));
              return PyErr_NewException(name, PyTuple_GetItem(args, 1),
                                        PyTuple_GetItem(args, 2));
              '''
diff --git a/pypy/module/cpyext/test/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py
--- a/pypy/module/cpyext/test/test_frameobject.py
+++ b/pypy/module/cpyext/test/test_frameobject.py
@@ -6,10 +6,10 @@
         module = self.import_extension('foo', [
             ("raise_exception", "METH_NOARGS",
              """
-                 PyObject *py_srcfile = PyString_FromString("filename");
-                 PyObject *py_funcname = PyString_FromString("funcname");
+                 PyObject *py_srcfile = PyUnicode_FromString("filename");
+                 PyObject *py_funcname = PyUnicode_FromString("funcname");
                  PyObject *py_globals = PyDict_New();
-                 PyObject *empty_string = PyString_FromString("");
+                 PyObject *empty_bytes = PyString_FromString("");
                  PyObject *empty_tuple = PyTuple_New(0);
                  PyCodeObject *py_code;
                  PyFrameObject *py_frame;
@@ -22,7 +22,7 @@
                      0,            /*int nlocals,*/
                      0,            /*int stacksize,*/
                      0,            /*int flags,*/
-                     empty_string, /*PyObject *code,*/
+                     empty_bytes,  /*PyObject *code,*/
                      empty_tuple,  /*PyObject *consts,*/
                      empty_tuple,  /*PyObject *names,*/
                      empty_tuple,  /*PyObject *varnames,*/
@@ -31,7 +31,7 @@
                      py_srcfile,   /*PyObject *filename,*/
                      py_funcname,  /*PyObject *name,*/
                      42,           /*int firstlineno,*/
-                     empty_string  /*PyObject *lnotab*/
+                     empty_bytes   /*PyObject *lnotab*/
                  );
 
                  if (!py_code) goto bad;
@@ -48,7 +48,7 @@
              bad:
                  Py_XDECREF(py_srcfile);
                  Py_XDECREF(py_funcname);
-                 Py_XDECREF(empty_string);
+                 Py_XDECREF(empty_bytes);
                  Py_XDECREF(empty_tuple);
                  Py_XDECREF(py_globals);
                  Py_XDECREF(py_code);
diff --git a/pypy/module/cpyext/test/test_stringobject.py b/pypy/module/cpyext/test/test_stringobject.py
--- a/pypy/module/cpyext/test/test_stringobject.py
+++ b/pypy/module/cpyext/test/test_stringobject.py
@@ -130,42 +130,6 @@
             ])
         module.getstring()
 
-    def test_format_v(self):
-        module = self.import_extension('foo', [
-            ("test_string_format_v", "METH_VARARGS",
-             '''
-                 return helper("bla %d ble %s\\n",
-                        PyInt_AsLong(PyTuple_GetItem(args, 0)),
-                        PyString_AsString(PyTuple_GetItem(args, 1)));
-             '''
-             )
-            ], prologue='''
-            PyObject* helper(char* fmt, ...)
-            {
-              va_list va;
-              PyObject* res;
-              va_start(va, fmt);
-              res = PyString_FromFormatV(fmt, va);
-              va_end(va);
-              return res;
-            }
-            ''')
-        res = module.test_string_format_v(1, b"xyz")
-        assert res == "bla 1 ble xyz\n"
-
-    def test_format(self):
-        module = self.import_extension('foo', [
-            ("test_string_format", "METH_VARARGS",
-             '''
-                 return PyString_FromFormat("bla %d ble %s\\n",
-                        PyInt_AsLong(PyTuple_GetItem(args, 0)),
-                        PyString_AsString(PyTuple_GetItem(args, 1)));
-             '''
-             )
-            ])
-        res = module.test_string_format(1, b"xyz")
-        assert res == "bla 1 ble xyz\n"
-
 class TestString(BaseApiTest):
     def test_string_resize(self, space, api):
         py_str = new_empty_str(space, 10)
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -74,6 +74,41 @@
         assert len(s) == 4
         assert s == u'a&#65533;\x00c'
 
+    def test_format_v(self):
+        module = self.import_extension('foo', [
+            ("test_unicode_format_v", "METH_VARARGS",
+             '''
+                 return helper("bla %d ble %s\\n",
+                        PyInt_AsLong(PyTuple_GetItem(args, 0)),
+                        _PyUnicode_AsString(PyTuple_GetItem(args, 1)));
+             '''
+             )
+            ], prologue='''
+            PyObject* helper(char* fmt, ...)
+            {
+              va_list va;
+              PyObject* res;
+              va_start(va, fmt);
+              res = PyUnicode_FromFormatV(fmt, va);
+              va_end(va);
+              return res;
+            }
+            ''')
+        res = module.test_unicode_format_v(1, "xyz")
+        assert res == "bla 1 ble xyz\n"
+
+    def test_format(self):
+        module = self.import_extension('foo', [
+            ("test_unicode_format", "METH_VARARGS",
+             '''
+                 return PyUnicode_FromFormat("bla %d ble %s\\n",
+                        PyInt_AsLong(PyTuple_GetItem(args, 0)),
+                        _PyUnicode_AsString(PyTuple_GetItem(args, 1)));
+             '''
+             )
+            ])
+        res = module.test_unicode_format(1, "xyz")
+        assert res == "bla 1 ble xyz\n"
 
 
 class TestUnicode(BaseApiTest):
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -383,7 +383,7 @@
 @cpython_api([CONST_STRING], PyObject)
 def PyUnicode_FromString(space, s):
     """Create a Unicode object from an UTF-8 encoded null-terminated char buffer"""
-    w_str = space.wrap(rffi.charp2str(s))
+    w_str = space.wrapbytes(rffi.charp2str(s))
     return space.call_method(w_str, 'decode', space.wrap("utf-8"))
 
 @cpython_api([CONST_STRING, Py_ssize_t], PyObject)


More information about the pypy-commit mailing list