[Python-3000-checkins] r45689 - in python/branches/p3yk: Include/bytesobject.h Lib/test/test_bytes.py Lib/test/test_file.py Objects/bytesobject.c Objects/fileobject.c

guido.van.rossum python-3000-checkins at python.org
Mon Apr 24 15:47:06 CEST 2006


Author: guido.van.rossum
Date: Mon Apr 24 15:47:05 2006
New Revision: 45689

Modified:
   python/branches/p3yk/Include/bytesobject.h
   python/branches/p3yk/Lib/test/test_bytes.py
   python/branches/p3yk/Lib/test/test_file.py
   python/branches/p3yk/Objects/bytesobject.c
   python/branches/p3yk/Objects/fileobject.c
Log:
Added much functionality to the bytes type.
Change file.readinto() to require binary mode.


Modified: python/branches/p3yk/Include/bytesobject.h
==============================================================================
--- python/branches/p3yk/Include/bytesobject.h	(original)
+++ python/branches/p3yk/Include/bytesobject.h	Mon Apr 24 15:47:05 2006
@@ -21,7 +21,7 @@
 /* Object layout */
 typedef struct {
     PyObject_VAR_HEAD
-    char *ob_sval;
+    char *ob_bytes;
 } PyBytesObject;
 
 /* Type object */
@@ -32,13 +32,14 @@
 #define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
 
 /* Direct API functions */
+PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
 PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
 PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
 PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
 PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
 
 /* Macros, trading safety for speed */
-#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval)
+#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_bytes)
 #define PyBytes_GET_SIZE(self)  (((PyBytesObject *)(self))->ob_size)
 
 #ifdef __cplusplus

Modified: python/branches/p3yk/Lib/test/test_bytes.py
==============================================================================
--- python/branches/p3yk/Lib/test/test_bytes.py	(original)
+++ python/branches/p3yk/Lib/test/test_bytes.py	Mon Apr 24 15:47:05 2006
@@ -1,6 +1,9 @@
 """Unit tests for the bytes type."""
 
+import os
+import re
 import sys
+import tempfile
 import unittest
 import test.test_support
 
@@ -45,7 +48,7 @@
         self.assertRaises(ValueError, bytes, [C(256)])
 
     def test_constructor_type_errors(self):
-        self.assertRaises(TypeError, bytes, 0)
+        self.assertRaises(TypeError, bytes, 0.0)
         class C:
             pass
         self.assertRaises(TypeError, bytes, ["0"])
@@ -100,36 +103,233 @@
         self.failUnless(bytes.__doc__ != None)
         self.failUnless(bytes.__doc__.startswith("bytes("))
 
-    # XXX More stuff to test and build (TDD):
-    # constructor from str: bytes(<str>) == bytes(map(ord, <str>))?
-    # encoding constructor: bytes(<unicode>[, <encoding>[, <errors>]])
-    # default encoding Latin-1? (Matching ord)
-    # slicing
-    # extended slicing?
-    # item assignment
-    # slice assignment
-    # extended slice assignment?
-    # __contains__ with simple int arg
-    # __contains__ with another bytes arg?
-    # find/index? (int or bytes arg?)
-    # count? (int arg)
-    # concatenation (+)
-    # repeat?
-    # extend?
-    # append?
-    # insert?
-    # pop?
-    # __reversed__?
-    # reverse? (inplace)
-    # NOT sort!
+    def test_buffer_api(self):
+        short_sample = "Hello world\n"
+        sample = short_sample + "x"*(20 - len(short_sample))
+        tfn = tempfile.mktemp()
+        try:
+            # Prepare
+            with open(tfn, "wb") as f:
+                f.write(short_sample)
+            # Test readinto
+            with open(tfn, "rb") as f:
+                b = bytes([ord('x')]*20)
+                n = f.readinto(b)
+            self.assertEqual(n, len(short_sample))
+            self.assertEqual(list(b), map(ord, sample))
+            # Test writing in binary mode
+            with open(tfn, "wb") as f:
+                f.write(b)
+            with open(tfn, "rb") as f:
+                self.assertEqual(f.read(), sample)
+            # Test writing in text mode
+            with open(tfn, "w") as f:
+                f.write(b)
+            with open(tfn, "r") as f:
+                self.assertEqual(f.read(), sample)
+            # Can't use readinto in text mode
+            with open(tfn, "r") as f:
+                self.assertRaises(TypeError, f.readinto, b)
+        finally:
+            try:
+                os.remove(tfn)
+            except os.error:
+                pass
+
+    def test_reversed(self):
+        input = map(ord, "Hello")
+        b = bytes(input)
+        output = list(reversed(b))
+        input.reverse()
+        self.assertEqual(output, input)
+
+    def test_getslice(self):
+        def by(s):
+            return bytes(map(ord, s))
+        b = by("Hello, world")
+
+        self.assertEqual(b[:5], by("Hello"))
+        self.assertEqual(b[1:5], by("ello"))
+        self.assertEqual(b[5:7], by(", "))
+        self.assertEqual(b[7:], by("world"))
+        self.assertEqual(b[7:12], by("world"))
+        self.assertEqual(b[7:100], by("world"))
+
+        self.assertEqual(b[:-7], by("Hello"))
+        self.assertEqual(b[-11:-7], by("ello"))
+        self.assertEqual(b[-7:-5], by(", "))
+        self.assertEqual(b[-5:], by("world"))
+        self.assertEqual(b[-5:12], by("world"))
+        self.assertEqual(b[-5:100], by("world"))
+        self.assertEqual(b[-100:5], by("Hello"))
+
+    def test_regexps(self):
+        def by(s):
+            return bytes(map(ord, s))
+        b = by("Hello, world")
+        self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
+
+    def test_setitem(self):
+        b = bytes([1, 2, 3])
+        b[1] = 100
+        self.assertEqual(b, bytes([1, 100, 3]))
+        b[-1] = 200
+        self.assertEqual(b, bytes([1, 100, 200]))
+        class C:
+            def __init__(self, i=0):
+                self.i = i
+            def __index__(self):
+                return self.i
+        b[0] = C(10)
+        self.assertEqual(b, bytes([10, 100, 200]))
+        try:
+            b[3] = 0
+            self.fail("Didn't raise IndexError")
+        except IndexError:
+            pass
+        try:
+            b[-10] = 0
+            self.fail("Didn't raise IndexError")
+        except IndexError:
+            pass
+        try:
+            b[0] = 256
+            self.fail("Didn't raise ValueError")
+        except ValueError:
+            pass
+        try:
+            b[0] = C(-1)
+            self.fail("Didn't raise ValueError")
+        except ValueError:
+            pass
+        try:
+            b[0] = None
+            self.fail("Didn't raise TypeError")
+        except TypeError:
+            pass
+
+    def test_delitem(self):
+        b = bytes(range(10))
+        del b[0]
+        self.assertEqual(b, bytes(range(1, 10)))
+        del b[-1]
+        self.assertEqual(b, bytes(range(1, 9)))
+        del b[4]
+        self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8]))
+
+    def test_setslice(self):
+        b = bytes(range(10))
+        self.assertEqual(list(b), list(range(10)))
+
+        b[0:5] = bytes([1, 1, 1, 1, 1])
+        self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
+
+        del b[0:-5]
+        self.assertEqual(b, bytes([5, 6, 7, 8, 9]))
+
+        b[0:0] = bytes([0, 1, 2, 3, 4])
+        self.assertEqual(b, bytes(range(10)))
+
+        b[-7:-3] = bytes([100, 101])
+        self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9]))
+
+        b[3:5] = [3, 4, 5, 6]
+        self.assertEqual(b, bytes(range(10)))
+
+    def test_setslice_trap(self):
+        # This test verifies that we correctly handle assigning self
+        # to a slice of self (the old Lambert Meertens trap).
+        b = bytes(range(256))
+        b[8:] = b
+        self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
+
+    def test_encoding(self):
+        sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
+        for enc in ("utf8", "utf16"):
+            b = bytes(sample, enc)
+            self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
+        self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1")
+        b = bytes(sample, "latin1", "ignore")
+        self.assertEqual(b, bytes(sample[:-4]))
+
+    def test_decode(self):
+        sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
+        for enc in ("utf8", "utf16"):
+            b = bytes(sample, enc)
+            self.assertEqual(b.decode(enc), sample)
+        sample = u"Hello world\n\x80\x81\xfe\xff"
+        b = bytes(sample, "latin1")
+        self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
+        self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
+
+    def test_from_buffer(self):
+        sample = "Hello world\n\x80\x81\xfe\xff"
+        buf = buffer(sample)
+        b = bytes(buf)
+        self.assertEqual(b, bytes(map(ord, sample)))
+
+    def test_to_str(self):
+        sample = "Hello world\n\x80\x81\xfe\xff"
+        b = bytes(sample)
+        self.assertEqual(str(b), sample)
+
+    def test_from_int(self):
+        b = bytes(0)
+        self.assertEqual(b, bytes())
+        b = bytes(10)
+        self.assertEqual(b, bytes([0]*10))
+        b = bytes(10000)
+        self.assertEqual(b, bytes([0]*10000))
+
+    def test_concat(self):
+        b1 = bytes("abc")
+        b2 = bytes("def")
+        self.assertEqual(b1 + b2, bytes("abcdef"))
+        self.assertRaises(TypeError, lambda: b1 + "def")
+        self.assertRaises(TypeError, lambda: "abc" + b2)
+
+    def test_repeat(self):
+        b = bytes("abc")
+        self.assertEqual(b * 3, bytes("abcabcabc"))
+        self.assertEqual(b * 0, bytes())
+        self.assertEqual(b * -1, bytes())
+        self.assertRaises(TypeError, lambda: b * 3.14)
+        self.assertRaises(TypeError, lambda: 3.14 * b)
+        self.assertRaises(MemoryError, lambda: b * sys.maxint)
+        self.assertEqual(bytes('x')*100, bytes('x'*100))
+
+    # Optimizations:
     # __iter__? (optimization)
-    # __str__? (could return "".join(map(chr, self))
-    # decode
-    # buffer API
-    # check that regexp searches work
-    # (I suppose re.sub() returns a string)
-    # file.readinto
-    # file.write
+    # __reversed__? (optimization)
+
+    # XXX Some list methods?
+    # extended slicing
+    # extended slice assignment
+    # extend (same as b[len(b):] = src)
+    # reverse (in-place)
+    # remove
+    # pop
+    # NOT sort!
+    # With int arg:
+    # __contains__
+    # index
+    # count
+    # append
+    # insert
+
+    # XXX Some string methods?  (Those that don't use character properties)
+    # startswith
+    # endswidth
+    # find, rfind
+    # __contains__ (bytes arg)
+    # index, rindex (bytes arg)
+    # join
+    # replace
+    # translate
+    # split, rsplit
+    # lstrip, rstrip, strip??
+
+    # XXX pickle and marshal support?
 
 
 def test_main():
@@ -137,5 +337,5 @@
 
 
 if __name__ == "__main__":
-    ##test_main()
-    unittest.main()
+    test_main()
+    ##unittest.main()

Modified: python/branches/p3yk/Lib/test/test_file.py
==============================================================================
--- python/branches/p3yk/Lib/test/test_file.py	(original)
+++ python/branches/p3yk/Lib/test/test_file.py	Mon Apr 24 15:47:05 2006
@@ -67,6 +67,17 @@
 f.close()
 verify(buf == a.tostring()[:n])
 
+# verify readinto refuses text files
+a = array('c', 'x'*10)
+f = open(TESTFN, 'r')
+try:
+    f.readinto(a)
+    raise TestFailed("readinto shouldn't work in text mode")
+except TypeError:
+    pass
+finally:
+    f.close()
+
 # verify writelines with integers
 f = open(TESTFN, 'wb')
 try:
@@ -261,13 +272,13 @@
 
 try:
     # Prepare the testfile
-    bag = open(TESTFN, "w")
+    bag = open(TESTFN, "wb")
     bag.write(filler * nchunks)
     bag.writelines(testlines)
     bag.close()
     # Test for appropriate errors mixing read* and iteration
     for methodname, args in methods:
-        f = open(TESTFN)
+        f = open(TESTFN, 'rb')
         if f.next() != filler:
             raise TestFailed, "Broken testfile"
         meth = getattr(f, methodname)
@@ -286,7 +297,7 @@
     # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
     # 4096 lines of that should get us exactly on the buffer boundary for
     # any power-of-2 buffersize between 4 and 16384 (inclusive).
-    f = open(TESTFN)
+    f = open(TESTFN, 'rb')
     for i in range(nchunks):
         f.next()
     testline = testlines.pop(0)
@@ -328,7 +339,7 @@
         raise TestFailed("readlines() after next() with empty buffer "
                          "failed. Got %r, expected %r" % (line, testline))
     # Reading after iteration hit EOF shouldn't hurt either
-    f = open(TESTFN)
+    f = open(TESTFN, 'rb')
     try:
         for line in f:
             pass

Modified: python/branches/p3yk/Objects/bytesobject.c
==============================================================================
--- python/branches/p3yk/Objects/bytesobject.c	(original)
+++ python/branches/p3yk/Objects/bytesobject.c	Mon Apr 24 15:47:05 2006
@@ -8,27 +8,34 @@
 /* Direct API functions */
 
 PyObject *
-PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size)
+PyBytes_FromObject(PyObject *input)
+{
+    return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
+                                        input, NULL);
+}
+
+PyObject *
+PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
 {
     PyBytesObject *new;
 
-    if (size != 0) {
-	assert(sval != NULL);
-	assert(size > 0);
-    }
+    assert(size >= 0);
 
     new = PyObject_New(PyBytesObject, &PyBytes_Type);
     if (new == NULL)
-	return NULL;
+        return NULL;
 
-    if (size > 0) {
-	new->ob_sval = PyMem_Malloc(size);
-	if (new->ob_sval == NULL) {
-	    Py_DECREF(new);
-	    return NULL;
-	}
-	memcpy(new->ob_sval, sval, size);
-	new->ob_size = size;
+    new->ob_size = size;
+    if (size == 0)
+        new->ob_bytes = NULL;
+    else {
+        new->ob_bytes = PyMem_Malloc(size);
+        if (new->ob_bytes == NULL) {
+            Py_DECREF(new);
+            return NULL;
+        }
+        if (bytes != NULL)
+            memcpy(new->ob_bytes, bytes, size);
     }
     
     return (PyObject *)new;
@@ -49,7 +56,7 @@
     assert(self != NULL);
     assert(PyBytes_Check(self));
 
-    return ((PyBytesObject *)self)->ob_sval;
+    return ((PyBytesObject *)self)->ob_bytes;
 }
 
 int
@@ -61,13 +68,13 @@
     assert(PyBytes_Check(self));
     assert(size >= 0);
 
-    sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size);
+    sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, size);
     if (sval == NULL) {
-	PyErr_NoMemory();
-	return -1;
+        PyErr_NoMemory();
+        return -1;
     }
 
-    ((PyBytesObject *)self)->ob_sval = sval;
+    ((PyBytesObject *)self)->ob_bytes = sval;
     ((PyBytesObject *)self)->ob_size = size;
 
     return 0;
@@ -82,15 +89,178 @@
 }
 
 static PyObject *
+bytes_concat(PyBytesObject *self, PyObject *other)
+{
+    PyBytesObject *result;
+    Py_ssize_t mysize;
+    Py_ssize_t size;
+
+    if (!PyBytes_Check(other)) {
+        PyErr_Format(PyExc_TypeError,
+                     "can't concat bytes to %.100s", other->ob_type->tp_name);
+        return NULL;
+    }
+    
+    mysize = self->ob_size;
+    size = mysize + ((PyBytesObject *)other)->ob_size;
+    if (size < 0)
+        return PyErr_NoMemory();
+    result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
+    if (result != NULL) {
+        memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
+        memcpy(result->ob_bytes + self->ob_size,
+               ((PyBytesObject *)other)->ob_bytes,
+               ((PyBytesObject *)other)->ob_size);
+    }
+    return (PyObject *)result;
+}
+
+static PyObject *
+bytes_repeat(PyBytesObject *self, Py_ssize_t count)
+{
+    PyBytesObject *result;
+    Py_ssize_t mysize;
+    Py_ssize_t size;
+
+    if (count < 0)
+        count = 0;
+    mysize = self->ob_size;
+    size = mysize * count;
+    if (count != 0 && size / count != mysize)
+        return PyErr_NoMemory();
+    result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL,  size);
+    if (result != NULL && size != 0) {
+        if (mysize == 1)
+            memset(result->ob_bytes, self->ob_bytes[0], size);
+        else {
+            int i;
+            for (i = 0; i < count; i++)
+                memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
+        }
+    }
+    return (PyObject *)result;
+}
+
+static PyObject *
 bytes_getitem(PyBytesObject *self, Py_ssize_t i)
 {
     if (i < 0)
-	i += self->ob_size;
+        i += self->ob_size;
     if (i < 0 || i >= self->ob_size) {
-	PyErr_SetString(PyExc_IndexError, "bytes index out of range");
-	return NULL;
+        PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+        return NULL;
     }
-    return PyInt_FromLong((unsigned char)(self->ob_sval[i]));
+    return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
+}
+
+static PyObject *
+bytes_getslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi)
+{
+    if (lo < 0)
+        lo = 0;
+    if (hi > self->ob_size)
+        hi = self->ob_size;
+    if (lo >= hi)
+        lo = hi = 0;
+    return PyBytes_FromStringAndSize(self->ob_bytes + lo, hi - lo);
+}
+
+static int
+bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi, 
+               PyObject *values)
+{
+    int avail;
+    int needed;
+    char *bytes;
+
+    if (values == NULL) {
+        bytes = NULL;
+        needed = 0;
+    }
+    else if (values == (PyObject *)self || !PyBytes_Check(values)) {
+        /* Make a copy an call this function recursively */
+        int err;
+        values = PyBytes_FromObject(values);
+        if (values == NULL)
+            return -1;
+        err = bytes_setslice(self, lo, hi, values);
+        Py_DECREF(values);
+        return err;
+    }
+    else {
+        assert(PyBytes_Check(values));
+        bytes = ((PyBytesObject *)values)->ob_bytes;
+        needed = ((PyBytesObject *)values)->ob_size;
+    }
+
+    if (lo < 0)
+        lo = 0;
+    if (hi > self->ob_size)
+        hi = self->ob_size;
+
+    avail = hi - lo;
+    if (avail < 0)
+        lo = hi = avail = 0;
+
+    if (avail != needed) {
+        if (avail > needed) {
+            /*
+              0   lo               hi               old_size
+              |   |<----avail----->|<-----tomove------>|
+              |   |<-needed->|<-----tomove------>|
+              0   lo      new_hi              new_size
+            */
+            memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
+                    self->ob_size - hi);
+        }
+        if (PyBytes_Resize((PyObject *)self, 
+                           self->ob_size + needed - avail) < 0)
+            return -1;
+        if (avail < needed) {
+            /*
+              0   lo        hi               old_size
+              |   |<-avail->|<-----tomove------>|
+              |   |<----needed---->|<-----tomove------>|
+              0   lo            new_hi              new_size
+             */
+            memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
+                    self->ob_size - lo - needed);
+        }
+    }
+
+    if (needed > 0)
+        memcpy(self->ob_bytes + lo, bytes, needed);
+
+    return 0;
+}
+
+static int
+bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
+{
+    Py_ssize_t ival;
+
+    if (i < 0)
+        i += self->ob_size;
+
+    if (i < 0 || i >= self->ob_size) {
+        PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+        return -1;
+    }
+
+    if (value == NULL)
+        return bytes_setslice(self, i, i+1, NULL);
+
+    ival = PyNumber_Index(value);
+    if (ival == -1 && PyErr_Occurred())
+        return -1;
+
+    if (ival < 0 || ival >= 256) {
+        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+        return -1;
+    }
+
+    self->ob_bytes[i] = ival;
+    return 0;
 }
 
 static long
@@ -103,69 +273,138 @@
 static int
 bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwlist[] = {"sequence", 0};
+    static char *kwlist[] = {"source", "encoding", "errors", 0};
     PyObject *arg = NULL;
-    PyObject *it; /* iter(arg) */
+    const char *encoding = NULL;
+    const char *errors = NULL;
+    Py_ssize_t count;
+    PyObject *it;
     PyObject *(*iternext)(PyObject *);
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg))
-	return -1;
-
-    /* Verify list invariants established by PyType_GenericAlloc() */
-    if (self->ob_size != 0) {
-	assert(self->ob_sval != NULL);
-	assert(self->ob_size > 0);
-    }
-
-    /* Empty previous contents */
+    /* Empty previous contents (yes, do this first of all!) */
     if (PyBytes_Resize((PyObject *)self, 0) < 0)
-	return -1;
+        return -1;
 
-    /* Quick check if we're done */
-    if (arg == 0)
-	return 0;
+    /* Parse arguments */
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
+                                     &arg, &encoding, &errors))
+        return -1;
+
+    /* Make a quick exit if no first argument */
+    if (arg == NULL) {
+        if (encoding != NULL || errors != NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                            "encoding or errors without sequence argument");
+            return -1;
+        }
+        return 0;
+    }
+
+    if (PyUnicode_Check(arg)) {
+        /* Encode via the codec registry */
+        PyObject *encoded;
+        char *bytes;
+        Py_ssize_t size;
+        if (encoding == NULL)
+            encoding = PyUnicode_GetDefaultEncoding();
+        encoded = PyCodec_Encode(arg, encoding, errors);
+        if (encoded == NULL)
+            return -1;
+        if (!PyString_Check(encoded)) {
+            PyErr_Format(PyExc_TypeError,
+                "encoder did not return a string object (type=%.400s)",
+                encoded->ob_type->tp_name);
+            Py_DECREF(encoded);
+            return -1;
+        }
+        bytes = PyString_AS_STRING(encoded);
+        size = PyString_GET_SIZE(encoded);
+        if (PyBytes_Resize((PyObject *)self, size) < 0) {
+            Py_DECREF(encoded);
+            return -1;
+        }
+        memcpy(self->ob_bytes, bytes, size);
+        Py_DECREF(encoded);
+        return 0;
+    }
+
+    /* If it's not unicode, there can't be encoding or errors */
+    if (encoding != NULL || errors != NULL) {
+        PyErr_SetString(PyExc_TypeError,
+                        "encoding or errors without a string argument");
+        return -1;
+    }
+
+    /* Is it an int? */
+    count = PyNumber_Index(arg);
+    if (count == -1 && PyErr_Occurred())
+        PyErr_Clear();
+    else {
+        if (count < 0) {
+            PyErr_SetString(PyExc_ValueError, "negative count");
+            return -1;
+        }
+        if (count > 0) {
+            if (PyBytes_Resize((PyObject *)self, count))
+                return -1;
+            memset(self->ob_bytes, 0, count);
+        }
+        return 0;
+    }
+
+    if (PyObject_CheckReadBuffer(arg)) {
+        const void *bytes;
+        Py_ssize_t size;
+        if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
+            return -1;
+        if (PyBytes_Resize((PyObject *)self, size) < 0)
+            return -1;
+        memcpy(self->ob_bytes, bytes, size);
+        return 0;
+    }
 
-    /* XXX Optimize this if the arguments is a list, tuple, or bytes */
+    /* XXX Optimize this if the arguments is a list, tuple */
 
     /* Get the iterator */
     it = PyObject_GetIter(arg);
     if (it == NULL)
-	return -1;
+        return -1;
     iternext = *it->ob_type->tp_iternext;
 
     /* Run the iterator to exhaustion */
     for (;;) {
-	PyObject *item;
-	Py_ssize_t value;
+        PyObject *item;
+        Py_ssize_t value;
 
-	/* Get the next item */
-	item = iternext(it);
-	if (item == NULL) {
-	    if (PyErr_Occurred()) {
-		if (!PyErr_ExceptionMatches(PyExc_StopIteration))
-		    goto error;
-		PyErr_Clear();
-	    }
-	    break;
-	}
-
-	/* Interpret it as an int (__index__) */
-	value = PyNumber_Index(item);
-	Py_DECREF(item);
-	if (value == -1 && PyErr_Occurred())
-	    goto error;
-
-	/* Range check */
-	if (value < 0 || value >= 256) {
-	    PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)");
-	    goto error;
-	}
-
-	/* Append the byte */
-	/* XXX Speed this up */
-	if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
-	    goto error;
-	self->ob_sval[self->ob_size-1] = value;
+        /* Get the next item */
+        item = iternext(it);
+        if (item == NULL) {
+            if (PyErr_Occurred()) {
+                if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+                    goto error;
+                PyErr_Clear();
+            }
+            break;
+        }
+
+        /* Interpret it as an int (__index__) */
+        value = PyNumber_Index(item);
+        Py_DECREF(item);
+        if (value == -1 && PyErr_Occurred())
+            goto error;
+
+        /* Range check */
+        if (value < 0 || value >= 256) {
+            PyErr_SetString(PyExc_ValueError,
+                            "bytes must be in range(0, 256)");
+            goto error;
+        }
+
+        /* Append the byte */
+        /* XXX Speed this up */
+        if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
+            goto error;
+        self->ob_bytes[self->ob_size-1] = value;
     }
 
     /* Clean up and return success */
@@ -188,45 +427,45 @@
     int i;
 
     if (self->ob_size == 0)
-	return PyString_FromString("bytes()");
+        return PyString_FromString("bytes()");
 
     list = PyList_New(0);
     if (list == NULL)
-	return NULL;
+        return NULL;
 
     str = PyString_FromString("bytes([");
     if (str == NULL)
-	goto error;
+        goto error;
 
     err = PyList_Append(list, str);
     Py_DECREF(str);
     if (err < 0)
-	goto error;
+        goto error;
 
     for (i = 0; i < self->ob_size; i++) {
-	char buffer[20];
-	sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i]));
-	str = PyString_FromString((i == 0) ? buffer+2 : buffer);
-	if (str == NULL)
-	    goto error;
-	err = PyList_Append(list, str);
-	Py_DECREF(str);
-	if (err < 0)
-	    goto error;
+        char buffer[20];
+        sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_bytes[i]));
+        str = PyString_FromString((i == 0) ? buffer+2 : buffer);
+        if (str == NULL)
+            goto error;
+        err = PyList_Append(list, str);
+        Py_DECREF(str);
+        if (err < 0)
+            goto error;
     }
 
     str = PyString_FromString("])");
     if (str == NULL)
-	goto error;
+        goto error;
 
     err = PyList_Append(list, str);
     Py_DECREF(str);
     if (err < 0)
-	goto error;
+        goto error;
     
     str = PyString_FromString("");
     if (str == NULL)
-	goto error;
+        goto error;
 
     result = _PyString_Join(str, list);
     Py_DECREF(str);
@@ -240,6 +479,12 @@
 }
 
 static PyObject *
+bytes_str(PyBytesObject *self)
+{
+    return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
+}
+
+static PyObject *
 bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
 {
     PyObject *res;
@@ -247,37 +492,37 @@
     int cmp;
 
     if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
-	Py_INCREF(Py_NotImplemented);
-	return Py_NotImplemented;
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
     }
 
     if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
-	/* Shortcut: if the lengths differ, the objects differ */
-	cmp = (op == Py_NE);
+        /* Shortcut: if the lengths differ, the objects differ */
+        cmp = (op == Py_NE);
     }
     else {
-	minsize = self->ob_size;
-	if (other->ob_size < minsize)
-	    minsize = other->ob_size;
-
-	cmp = memcmp(self->ob_sval, other->ob_sval, minsize);
-	/* In ISO C, memcmp() guarantees to use unsigned bytes! */
-
-	if (cmp == 0) {
-	    if (self->ob_size < other->ob_size)
-		cmp = -1;
-	    else if (self->ob_size > other->ob_size)
-		cmp = 1;
-	}
-
-	switch (op) {
-	case Py_LT: cmp = cmp <  0; break;
-	case Py_LE: cmp = cmp <= 0; break;
-	case Py_EQ: cmp = cmp == 0; break;
-	case Py_NE: cmp = cmp != 0; break;
-	case Py_GT: cmp = cmp >  0; break;
-	case Py_GE: cmp = cmp >= 0; break;
-	}
+        minsize = self->ob_size;
+        if (other->ob_size < minsize)
+            minsize = other->ob_size;
+
+        cmp = memcmp(self->ob_bytes, other->ob_bytes, minsize);
+        /* In ISO C, memcmp() guarantees to use unsigned bytes! */
+
+        if (cmp == 0) {
+            if (self->ob_size < other->ob_size)
+                cmp = -1;
+            else if (self->ob_size > other->ob_size)
+                cmp = 1;
+        }
+
+        switch (op) {
+        case Py_LT: cmp = cmp <  0; break;
+        case Py_LE: cmp = cmp <= 0; break;
+        case Py_EQ: cmp = cmp == 0; break;
+        case Py_NE: cmp = cmp != 0; break;
+        case Py_GT: cmp = cmp >  0; break;
+        case Py_GE: cmp = cmp >= 0; break;
+        }
     }
 
     res = cmp ? Py_True : Py_False;
@@ -288,41 +533,89 @@
 static void
 bytes_dealloc(PyBytesObject *self)
 {
-    if (self->ob_sval != 0) {
-	PyMem_Free(self->ob_sval);
+    if (self->ob_bytes != 0) {
+        PyMem_Free(self->ob_bytes);
     }
     self->ob_type->tp_free((PyObject *)self);
 }
 
+static Py_ssize_t
+bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
+{
+    if (index != 0) {
+        PyErr_SetString(PyExc_SystemError,
+                        "accessing non-existent string segment");
+        return -1;
+    }
+    *ptr = (void *)self->ob_bytes;
+    return self->ob_size;
+}
+
+static Py_ssize_t
+bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
+{
+    if (lenp)
+        *lenp = self->ob_size;
+    return 1;
+}
+
+PyDoc_STRVAR(decode_doc,
+"B.decode([encoding[,errors]]) -> unicode obect.\n\
+\n\
+Decodes B using the codec registered for encoding. encoding defaults\n\
+to the default encoding. errors may be given to set a different error\n\
+handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+as well as any other name registerd with codecs.register_error that is\n\
+able to handle UnicodeDecodeErrors.");
+
+static PyObject *
+bytes_decode(PyObject *self, PyObject *args)
+{ 
+    const char *encoding = NULL;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+        return NULL;
+    if (encoding == NULL)
+        encoding = PyUnicode_GetDefaultEncoding();
+    return PyCodec_Decode(self, encoding, errors);
+}
+
 static PySequenceMethods bytes_as_sequence = {
-    (lenfunc)bytes_length,		/*sq_length*/
-    (binaryfunc)0,			/*sq_concat*/
-    (ssizeargfunc)0,			/*sq_repeat*/
-    (ssizeargfunc)bytes_getitem,	/*sq_item*/
-    (ssizessizeargfunc)0,		/*sq_slice*/
-    0,					/*sq_ass_item*/
-    0,					/*sq_ass_slice*/
-    (objobjproc)0,			/*sq_contains*/
+    (lenfunc)bytes_length,              /*sq_length*/
+    (binaryfunc)bytes_concat,           /*sq_concat*/
+    (ssizeargfunc)bytes_repeat,         /*sq_repeat*/
+    (ssizeargfunc)bytes_getitem,        /*sq_item*/
+    (ssizessizeargfunc)bytes_getslice,  /*sq_slice*/
+    (ssizeobjargproc)bytes_setitem,     /*sq_ass_item*/
+    (ssizessizeobjargproc)bytes_setslice, /* sq_ass_slice */
+#if 0
+    (objobjproc)bytes_contains,         /* sq_contains */
+    (binaryfunc)bytes_inplace_concat,   /* sq_inplace_concat */
+    (ssizeargfunc)bytes_inplace_repeat, /* sq_inplace_repeat */
+#endif
 };
 
 static PyMappingMethods bytes_as_mapping = {
-	(lenfunc)bytes_length,
-	(binaryfunc)0,
-	0,
+    (lenfunc)bytes_length,
+    (binaryfunc)0,
+    0,
 };
 
 static PyBufferProcs bytes_as_buffer = {
-/*
-	(readbufferproc)bytes_buffer_getreadbuf,
-	(writebufferproc)bytes_buffer_getwritebuf,
-	(segcountproc)bytes_buffer_getsegcount,
-	(charbufferproc)bytes_buffer_getcharbuf,
-*/
+    (readbufferproc)bytes_getbuffer,
+    (writebufferproc)bytes_getbuffer,
+    (segcountproc)bytes_getsegcount,
+    /* XXX Bytes are not characters! But we need to implement
+       bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
+    (charbufferproc)bytes_getbuffer,
 };
 
 static PyMethodDef
 bytes_methods[] = {
-	{NULL,     NULL}
+    {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
+    {NULL,     NULL}
 };
 
 PyDoc_STRVAR(bytes_doc,
@@ -336,39 +629,40 @@
     "bytes",
     sizeof(PyBytesObject),
     0,
-    (destructor)bytes_dealloc,		/* tp_dealloc */
-    0,			 		/* tp_print */
-    0,					/* tp_getattr */
-    0,					/* tp_setattr */
-    0,					/* tp_compare */
-    (reprfunc)bytes_repr,		/* tp_repr */
-    0,					/* tp_as_number */
-    &bytes_as_sequence,			/* tp_as_sequence */
-    &bytes_as_mapping,			/* tp_as_mapping */
-    bytes_nohash, 			/* tp_hash */
-    0,					/* tp_call */
-    0,					/* tp_str */
-    PyObject_GenericGetAttr,		/* tp_getattro */
-    0,					/* tp_setattro */
-    &bytes_as_buffer,			/* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */
-    bytes_doc,				/* tp_doc */
-    0,					/* tp_traverse */
-    0,					/* tp_clear */
-    (richcmpfunc)bytes_richcompare,	/* tp_richcompare */
-    0,					/* tp_weaklistoffset */
-    0,					/* tp_iter */
-    0,					/* tp_iternext */
-    bytes_methods,			/* tp_methods */
-    0,					/* tp_members */
-    0,					/* tp_getset */
-    0,					/* tp_base */
-    0,					/* tp_dict */
-    0,					/* tp_descr_get */
-    0,					/* tp_descr_set */
-    0,					/* tp_dictoffset */
-    (initproc)bytes_init,		/* tp_init */
-    PyType_GenericAlloc,		/* tp_alloc */
-    PyType_GenericNew,			/* tp_new */
-    PyObject_Del,	                /* tp_free */
+    (destructor)bytes_dealloc,          /* tp_dealloc */
+    0,                                  /* tp_print */
+    0,                                  /* tp_getattr */
+    0,                                  /* tp_setattr */
+    0,                                  /* tp_compare */
+    (reprfunc)bytes_repr,               /* tp_repr */
+    0,                                  /* tp_as_number */
+    &bytes_as_sequence,                 /* tp_as_sequence */
+    &bytes_as_mapping,                  /* tp_as_mapping */
+    bytes_nohash,                       /* tp_hash */
+    0,                                  /* tp_call */
+    (reprfunc)bytes_str,                /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+    0,                                  /* tp_setattro */
+    &bytes_as_buffer,                   /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ 
+                                        /* bytes is 'final' or 'sealed' */
+    bytes_doc,                          /* tp_doc */
+    0,                                  /* tp_traverse */
+    0,                                  /* tp_clear */
+    (richcmpfunc)bytes_richcompare,     /* tp_richcompare */
+    0,                                  /* tp_weaklistoffset */
+    0,                                  /* tp_iter */
+    0,                                  /* tp_iternext */
+    bytes_methods,                      /* tp_methods */
+    0,                                  /* tp_members */
+    0,                                  /* tp_getset */
+    0,                                  /* tp_base */
+    0,                                  /* tp_dict */
+    0,                                  /* tp_descr_get */
+    0,                                  /* tp_descr_set */
+    0,                                  /* tp_dictoffset */
+    (initproc)bytes_init,               /* tp_init */
+    PyType_GenericAlloc,                /* tp_alloc */
+    PyType_GenericNew,                  /* tp_new */
+    PyObject_Del,                       /* tp_free */
 };

Modified: python/branches/p3yk/Objects/fileobject.c
==============================================================================
--- python/branches/p3yk/Objects/fileobject.c	(original)
+++ python/branches/p3yk/Objects/fileobject.c	Mon Apr 24 15:47:05 2006
@@ -880,6 +880,11 @@
 
 	if (f->f_fp == NULL)
 		return err_closed();
+	if (!f->f_binary) {
+		PyErr_SetString(PyExc_TypeError,
+				"readinto() requires binary mode");
+		return NULL;
+	}
 	/* refuse to mix with f.next() */
 	if (f->f_buf != NULL &&
 	    (f->f_bufend - f->f_bufptr) > 0 &&


More information about the Python-3000-checkins mailing list