Index: Objects/stringobject.c =================================================================== --- Objects/stringobject.c (Revision 58552) +++ Objects/stringobject.c (Arbeitskopie) @@ -3020,16 +3020,149 @@ static PyObject * string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - PyObject *x = NULL; - static char *kwlist[] = {"object", 0}; + PyObject *x = NULL, *it; + PyObject *(*iternext)(PyObject *); + const char *encoding = NULL; + const char *errors = NULL; + PyObject *new = NULL; + Py_ssize_t i, size; + static char *kwlist[] = {"object", "encoding", "errors", 0}; if (type != &PyString_Type) return str_subtype_new(type, args, kwds); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x, + &encoding, &errors)) return NULL; - if (x == NULL) + if (x == NULL) { + if (encoding != NULL || errors != NULL) { + PyErr_SetString(PyExc_TypeError, + "encoding or errors without sequence " + "argument"); + return NULL; + } return PyString_FromString(""); - return PyObject_Str(x); + } + + if (PyUnicode_Check(x)) { + /* Encode via the codec registry */ + if (encoding == NULL) { + PyErr_SetString(PyExc_TypeError, + "string argument without an encoding"); + return NULL; + } + new = PyCodec_Encode(x, encoding, errors); + if (new == NULL) + return NULL; + /* XXX(gb): must accept bytes here since codecs output bytes + at the moment */ + if (PyBytes_Check(new)) { + PyObject *str; + str = PyString_FromString(PyBytes_AsString(new)); + Py_DECREF(new); + if (!str) + return NULL; + return str; + } + if (!PyString_Check(new)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return a str8 " + "object (type=%.400s)", + Py_Type(new)->tp_name); + Py_DECREF(new); + return NULL; + } + return new; + } + + /* If it's not unicode, there can't be encoding or errors */ + if (encoding != NULL || errors != NULL) { + PyErr_SetString(PyExc_TypeError, + "encoding or errors without a string argument"); + return NULL; + } + + /* Use the modern buffer interface */ + if (PyObject_CheckBuffer(x)) { + Py_buffer view; + if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0) + return NULL; + new = PyString_FromStringAndSize(NULL, view.len); + if (!new) + goto fail; + if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval, + &view, view.len, 'C') < 0) + goto fail; + PyObject_ReleaseBuffer(x, &view); + return new; + fail: + Py_XDECREF(new); + PyObject_ReleaseBuffer(x, &view); + return NULL; + } + + /* For the iterator version, create a string object and resize as needed. */ + /* XXX(gb): is 64 a good value? also, optimize this if length is known */ + size = 64; + new = PyString_FromStringAndSize(NULL, size); + if (new == NULL) + return NULL; + + /* XXX Optimize this if the arguments is a list, tuple */ + + /* Get the iterator */ + it = PyObject_GetIter(x); + if (it == NULL) + goto error; + iternext = *Py_Type(it)->tp_iternext; + + /* Run the iterator to exhaustion */ + for (i = 0; ; i++) { + PyObject *item; + Py_ssize_t value; + + /* Get the next item */ + item = iternext(it); + if (item == NULL) { + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) + goto error; + PyErr_Clear(); + } + break; + } + + /* Interpret it as an int (__index__) */ + value = PyNumber_AsSsize_t(item, PyExc_ValueError); + Py_DECREF(item); + if (value == -1 && PyErr_Occurred()) + goto error; + + /* Range check */ + if (value < 0 || value >= 256) { + PyErr_SetString(PyExc_ValueError, + "bytes must be in range(0, 256)"); + goto error; + } + + /* Append the byte */ + if (i >= size) { + size *= 2; + if (_PyString_Resize(&new, size) < 0) + goto error; + } + ((PyStringObject *)new)->ob_sval[i] = value; + } + _PyString_Resize(&new, i); + + /* Clean up and return success */ + Py_DECREF(it); + return new; + + error: + /* Error handling when it != NULL */ + Py_XDECREF(it); + Py_DECREF(new); + return NULL; } static PyObject * Index: Lib/struct.py =================================================================== --- Lib/struct.py (Revision 58552) +++ Lib/struct.py (Arbeitskopie) @@ -36,7 +36,7 @@ class Struct(_Struct): def __init__(self, fmt): if isinstance(fmt, str): - fmt = str8(fmt) + fmt = str8(fmt, 'latin1') _Struct.__init__(self, fmt) _MAXCACHE = 100 Index: Lib/pickletools.py =================================================================== --- Lib/pickletools.py (Revision 58552) +++ Lib/pickletools.py (Arbeitskopie) @@ -1978,7 +1978,7 @@ _dis_test = r""" >>> import pickle ->>> x = [1, 2, (3, 4), {str8('abc'): "def"}] +>>> x = [1, 2, (3, 4), {str8(b'abc'): "def"}] >>> pkl = pickle.dumps(x, 0) >>> dis(pkl) 0: ( MARK Index: Lib/test/test_io.py =================================================================== --- Lib/test/test_io.py (Revision 58552) +++ Lib/test/test_io.py (Arbeitskopie) @@ -88,7 +88,7 @@ self.assertEqual(f.tell(), 6) self.assertEqual(f.seek(-1, 1), 5) self.assertEqual(f.tell(), 5) - self.assertEqual(f.write(str8(" world\n\n\n")), 9) + self.assertEqual(f.write(str8(b" world\n\n\n")), 9) self.assertEqual(f.seek(0), 0) self.assertEqual(f.write(b"h"), 1) self.assertEqual(f.seek(-1, 2), 13) Index: Lib/test/test_codeccallbacks.py =================================================================== --- Lib/test/test_codeccallbacks.py (Revision 58552) +++ Lib/test/test_codeccallbacks.py (Arbeitskopie) @@ -181,7 +181,7 @@ # mapped through the encoding again. This means, that # to be able to use e.g. the "replace" handler, the # charmap has to have a mapping for "?". - charmap = dict((ord(c), str8(2*c.upper())) for c in "abcdefgh") + charmap = dict((ord(c), str8(2*c.upper(), 'ascii')) for c in "abcdefgh") sin = "abc" sout = b"AABBCC" self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout) @@ -189,7 +189,7 @@ sin = "abcA" self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) - charmap[ord("?")] = str8("XYZ") + charmap[ord("?")] = str8(b"XYZ") sin = "abcDEF" sout = b"AABBCCXYZXYZXYZ" self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout) @@ -309,7 +309,7 @@ # check with one argument too much self.assertRaises(TypeError, exctype, *(args + ["too much"])) # check with one argument of the wrong type - wrongargs = [ "spam", str8("eggs"), b"spam", 42, 1.0, None ] + wrongargs = [ "spam", str8(b"eggs"), b"spam", 42, 1.0, None ] for i in range(len(args)): for wrongarg in wrongargs: if type(wrongarg) is type(args[i]): Index: Lib/test/test_locale.py =================================================================== --- Lib/test/test_locale.py (Revision 58552) +++ Lib/test/test_locale.py (Arbeitskopie) @@ -82,7 +82,7 @@ # Test BSD Rune locale's bug for isctype functions. def teststrop(s, method, output): - s = str8(s) + s = str8(s, 'latin1') # XXX if verbose: print("%s.%s() =? %s ..." % (repr(s), method, repr(output)), end=' ') result = getattr(s, method)() Index: Lib/test/testcodec.py =================================================================== --- Lib/test/testcodec.py (Revision 58552) +++ Lib/test/testcodec.py (Arbeitskopie) @@ -36,7 +36,7 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x78: "abc", # 1-n decoding mapping - str8("abc"): 0x0078,# 1-n encoding mapping + str8(b"abc"): 0x0078,# 1-n encoding mapping 0x01: None, # decoding mapping to 0x79: "", # decoding mapping to }) Index: Lib/test/test_builtin.py =================================================================== --- Lib/test/test_builtin.py (Revision 58552) +++ Lib/test/test_builtin.py (Arbeitskopie) @@ -580,7 +580,8 @@ self.assertEqual(hash(1), hash(1)) self.assertEqual(hash(1), hash(1.0)) hash('spam') - self.assertEqual(hash('spam'), hash(str8('spam'))) + self.assertEqual(hash('spam'), hash(str8(b'spam'))) # remove str8() + # when b"" is immutable hash((0,1,2,3)) def f(): pass self.assertRaises(TypeError, hash, []) Index: Lib/test/test_sys.py =================================================================== --- Lib/test/test_sys.py (Revision 58552) +++ Lib/test/test_sys.py (Arbeitskopie) @@ -300,7 +300,7 @@ def test_intern(self): self.assertRaises(TypeError, sys.intern) - s = str8("never interned before") + s = str8(b"never interned before") self.assert_(sys.intern(s) is s) s2 = s.swapcase().swapcase() self.assert_(sys.intern(s2) is s) @@ -314,7 +314,7 @@ def __hash__(self): return 123 - self.assertRaises(TypeError, sys.intern, S("abc")) + self.assertRaises(TypeError, sys.intern, S(b"abc")) s = "never interned as unicode before" self.assert_(sys.intern(s) is s) Index: Lib/test/test_format.py =================================================================== --- Lib/test/test_format.py (Revision 58552) +++ Lib/test/test_format.py (Arbeitskopie) @@ -40,7 +40,7 @@ print('yes') def testboth(formatstr, *args): - testformat(str8(formatstr), *args) + testformat(str8(formatstr, 'ascii'), *args) testformat(formatstr, *args)