[Python-3000-checkins] r58848 - in python/branches/py3k-pep3137: Lib/test/test_codecs.py Lib/test/test_multibytecodec_support.py Modules/_codecsmodule.c Modules/cjkcodecs/multibytecodec.c

christian.heimes python-3000-checkins at python.org
Mon Nov 5 00:19:08 CET 2007


Author: christian.heimes
Date: Mon Nov  5 00:19:08 2007
New Revision: 58848

Modified:
   python/branches/py3k-pep3137/Lib/test/test_codecs.py
   python/branches/py3k-pep3137/Lib/test/test_multibytecodec_support.py
   python/branches/py3k-pep3137/Modules/_codecsmodule.c
   python/branches/py3k-pep3137/Modules/cjkcodecs/multibytecodec.c
Log:
Patch #1386 from Amaury Forgeot d'Arc

Most codecs return buffer objects, when the rule is now to return bytes.
This patch adds a test, and corrects failing codecs.
(more PyBytes_* -> PyString_* replacements)

The patch doesn't fix any outstanding bugs but it removes the rest of the annoying codec warnings. I've modified the patch slightly (assert() -> self.assert_()). Thanks Amaury! :)

Modified: python/branches/py3k-pep3137/Lib/test/test_codecs.py
==============================================================================
--- python/branches/py3k-pep3137/Lib/test/test_codecs.py	(original)
+++ python/branches/py3k-pep3137/Lib/test/test_codecs.py	Mon Nov  5 00:19:08 2007
@@ -1265,7 +1265,9 @@
                 encodedresult = b""
                 for c in s:
                     writer.write(c)
-                    encodedresult += q.read()
+                    chunk = q.read()
+                    self.assert_(type(chunk) is bytes, type(chunk))
+                    encodedresult += chunk
                 q = Queue(b"")
                 reader = codecs.getreader(encoding)(q)
                 decodedresult = ""

Modified: python/branches/py3k-pep3137/Lib/test/test_multibytecodec_support.py
==============================================================================
--- python/branches/py3k-pep3137/Lib/test/test_multibytecodec_support.py	(original)
+++ python/branches/py3k-pep3137/Lib/test/test_multibytecodec_support.py	Mon Nov  5 00:19:08 2007
@@ -52,6 +52,10 @@
                 func = self.encode
             if expected:
                 result = func(source, scheme)[0]
+                if func is self.decode:
+                    self.assert_(type(result) is str, type(result))
+                else:
+                    self.assert_(type(result) is bytes, type(result))
                 self.assertEqual(result, expected)
             else:
                 self.assertRaises(UnicodeError, func, source, scheme)

Modified: python/branches/py3k-pep3137/Modules/_codecsmodule.c
==============================================================================
--- python/branches/py3k-pep3137/Modules/_codecsmodule.c	(original)
+++ python/branches/py3k-pep3137/Modules/_codecsmodule.c	Mon Nov  5 00:19:08 2007
@@ -180,7 +180,7 @@
 			"string is too large to encode");
 			return NULL;
 	}
-	v = PyBytes_FromStringAndSize(NULL, newsize);
+	v = PyString_FromStringAndSize(NULL, newsize);
 
 	if (v == NULL) {
 		return NULL;
@@ -188,11 +188,11 @@
 	else {
 		register Py_ssize_t i;
 		register char c;
-		register char *p = PyBytes_AS_STRING(v);
+		register char *p = PyString_AS_STRING(v);
 
 		for (i = 0; i < size; i++) {
 			/* There's at least enough room for a hex escape */
-			assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
+			assert(newsize - (p - PyString_AS_STRING(v)) >= 4);
 			c = PyString_AS_STRING(str)[i];
 			if (c == '\'' || c == '\\')
 				*p++ = '\\', *p++ = c;
@@ -212,13 +212,13 @@
 				*p++ = c;
 		}
 		*p = '\0';
-		if (PyBytes_Resize(v, (p - PyBytes_AS_STRING(v)))) {
+		if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) {
 			Py_DECREF(v);
 			return NULL;
 		}
 	}
 	
-	return codec_tuple(v, PyBytes_Size(v));
+	return codec_tuple(v, PyString_Size(v));
 }
 
 /* --- Decoder ------------------------------------------------------------ */
@@ -654,7 +654,7 @@
 			  &data, &size, &errors))
 	return NULL;
 
-    return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
+    return codec_tuple(PyString_FromStringAndSize(data, size), size);
 }
 
 static PyObject *
@@ -669,7 +669,7 @@
 			  &data, &size, &errors))
 	return NULL;
 
-    return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
+    return codec_tuple(PyString_FromStringAndSize(data, size), size);
 }
 
 static PyObject *
@@ -688,12 +688,12 @@
     if (PyUnicode_Check(obj)) {
 	data = PyUnicode_AS_DATA(obj);
 	size = PyUnicode_GET_DATA_SIZE(obj);
-	return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
+	return codec_tuple(PyString_FromStringAndSize(data, size), size);
     }
     else {
 	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 	    return NULL;
-	return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
+	return codec_tuple(PyString_FromStringAndSize(data, size), size);
     }
 }
 

Modified: python/branches/py3k-pep3137/Modules/cjkcodecs/multibytecodec.c
==============================================================================
--- python/branches/py3k-pep3137/Modules/cjkcodecs/multibytecodec.c	(original)
+++ python/branches/py3k-pep3137/Modules/cjkcodecs/multibytecodec.c	Mon Nov  5 00:19:08 2007
@@ -175,15 +175,15 @@
 	Py_ssize_t orgpos, orgsize;
 
 	orgpos = (Py_ssize_t)((char *)buf->outbuf -
-				PyBytes_AS_STRING(buf->outobj));
-	orgsize = PyBytes_GET_SIZE(buf->outobj);
-	if (PyBytes_Resize(buf->outobj, orgsize + (
+				PyString_AS_STRING(buf->outobj));
+	orgsize = PyString_GET_SIZE(buf->outobj);
+	if (_PyString_Resize(&buf->outobj, orgsize + (
 	    esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
 		return -1;
 
-	buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
-	buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
-		+ PyBytes_GET_SIZE(buf->outobj);
+	buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
+	buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
+		+ PyString_GET_SIZE(buf->outobj);
 
 	return 0;
 }
@@ -330,11 +330,11 @@
 			goto errorexit;
 	}
 
-        assert(PyBytes_Check(retstr));
-	retstrsize = PyBytes_GET_SIZE(retstr);
+        assert(PyString_Check(retstr));
+	retstrsize = PyString_GET_SIZE(retstr);
 	REQUIRE_ENCODEBUFFER(buf, retstrsize);
 
-	memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
+	memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
 	buf->outbuf += retstrsize;
 
 	newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
@@ -476,16 +476,16 @@
 	Py_ssize_t finalsize, r = 0;
 
 	if (datalen == 0)
-		return PyBytes_FromStringAndSize(NULL, 0);
+		return PyString_FromStringAndSize(NULL, 0);
 
 	buf.excobj = NULL;
 	buf.inbuf = buf.inbuf_top = *data;
 	buf.inbuf_end = buf.inbuf_top + datalen;
-	buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
+	buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
 	if (buf.outobj == NULL)
 		goto errorexit;
-	buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
-	buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
+	buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
+	buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
 
 	while (buf.inbuf < buf.inbuf_end) {
 		Py_ssize_t inleft, outleft;
@@ -520,10 +520,10 @@
 		}
 
 	finalsize = (Py_ssize_t)((char *)buf.outbuf -
-				 PyBytes_AS_STRING(buf.outobj));
+				 PyString_AS_STRING(buf.outobj));
 
-	if (finalsize != PyBytes_GET_SIZE(buf.outobj))
-		if (PyBytes_Resize(buf.outobj, finalsize) == -1)
+	if (finalsize != PyString_GET_SIZE(buf.outobj))
+		if (_PyString_Resize(&buf.outobj, finalsize) == -1)
 			goto errorexit;
 
 	Py_XDECREF(buf.excobj);
@@ -1611,8 +1611,8 @@
 	if (pwrt == NULL)
 		return NULL;
 
-        assert(PyBytes_Check(pwrt));
-	if (PyBytes_Size(pwrt) > 0) {
+        assert(PyString_Check(pwrt));
+	if (PyString_Size(pwrt) > 0) {
 		PyObject *wr;
 		wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
 		if (wr == NULL) {


More information about the Python-3000-checkins mailing list