[Python-3000-checkins] r45652 - in python/branches/p3yk: Include/Python.h Include/bytesobject.h Lib/test/test_bytes.py Makefile.pre.in Objects/bytesobject.c Objects/object.c Python/bltinmodule.c

guido.van.rossum python-3000-checkins at python.org
Sun Apr 23 01:28:06 CEST 2006


Author: guido.van.rossum
Date: Sun Apr 23 01:28:04 2006
New Revision: 45652

Added:
   python/branches/p3yk/Include/bytesobject.h   (contents, props changed)
   python/branches/p3yk/Lib/test/test_bytes.py   (contents, props changed)
   python/branches/p3yk/Objects/bytesobject.c   (contents, props changed)
Modified:
   python/branches/p3yk/Include/Python.h
   python/branches/p3yk/Makefile.pre.in
   python/branches/p3yk/Objects/object.c
   python/branches/p3yk/Python/bltinmodule.c
Log:
Here is a bytes type.  It's very minimal but it's a start.


Modified: python/branches/p3yk/Include/Python.h
==============================================================================
--- python/branches/p3yk/Include/Python.h	(original)
+++ python/branches/p3yk/Include/Python.h	Sun Apr 23 01:28:04 2006
@@ -78,6 +78,7 @@
 
 #include "pydebug.h"
 
+#include "bytesobject.h"
 #include "unicodeobject.h"
 #include "intobject.h"
 #include "boolobject.h"

Added: python/branches/p3yk/Include/bytesobject.h
==============================================================================
--- (empty file)
+++ python/branches/p3yk/Include/bytesobject.h	Sun Apr 23 01:28:04 2006
@@ -0,0 +1,47 @@
+/* Bytes object interface */
+
+#ifndef Py_BYTESOBJECT_H
+#define Py_BYTESOBJECT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+
+/* Type PyBytesObject represents a mutable array of bytes.
+ * The Python API is that of a sequence;
+ * the bytes are mapped to ints in [0, 256).
+ * Bytes are not characters; they may be used to encode characters.
+ * The only way to go between bytes and str/unicode is via encoding
+ * and decoding.
+ * For the concenience of C programmers, the bytes type is considered
+ * to contain a char pointer, not an unsigned char pointer.
+ */
+
+/* Object layout */
+typedef struct {
+    PyObject_VAR_HEAD
+    char *ob_sval;
+} PyBytesObject;
+
+/* Type object */
+PyAPI_DATA(PyTypeObject) PyBytes_Type;
+
+/* Type check macros */
+#define PyBytes_Check(self) PyObject_TypeCheck(self, &PyBytes_Type)
+#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
+
+/* Direct API functions */
+PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
+PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
+PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
+PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
+
+/* Macros, trading safety for speed */
+#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval)
+#define PyBytes_GET_SIZE(self)  (((PyBytesObject *)(self))->ob_size)
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_BYTESOBJECT_H */

Added: python/branches/p3yk/Lib/test/test_bytes.py
==============================================================================
--- (empty file)
+++ python/branches/p3yk/Lib/test/test_bytes.py	Sun Apr 23 01:28:04 2006
@@ -0,0 +1,109 @@
+"""Unit tests for the bytes type."""
+
+import sys
+import unittest
+import test.test_support
+
+
+class BytesTest(unittest.TestCase):
+
+    def test_basics(self):
+        b = bytes()
+        self.assertEqual(type(b), bytes)
+        self.assertEqual(b.__class__, bytes)
+
+    def test_empty_sequence(self):
+        b = bytes()
+        self.assertEqual(len(b), 0)
+        self.assertRaises(IndexError, lambda: b[0])
+        self.assertRaises(IndexError, lambda: b[1])
+        self.assertRaises(IndexError, lambda: b[sys.maxint])
+        self.assertRaises(IndexError, lambda: b[sys.maxint+1])
+        self.assertRaises(IndexError, lambda: b[10**100])
+        self.assertRaises(IndexError, lambda: b[-1])
+        self.assertRaises(IndexError, lambda: b[-2])
+        self.assertRaises(IndexError, lambda: b[-sys.maxint])
+        self.assertRaises(IndexError, lambda: b[-sys.maxint-1])
+        self.assertRaises(IndexError, lambda: b[-sys.maxint-2])
+        self.assertRaises(IndexError, lambda: b[-10**100])
+
+    def test_from_list(self):
+        ints = list(range(256))
+        b = bytes(i for i in ints)
+        self.assertEqual(len(b), 256)
+        self.assertEqual(list(b), ints)
+
+    def test_from_index(self):
+        class C:
+            def __init__(self, i=0):
+                self.i = i
+            def __index__(self):
+                return self.i
+        b = bytes([C(), C(1), C(254), C(255)])
+        self.assertEqual(list(b), [0, 1, 254, 255])
+        self.assertRaises(ValueError, lambda: bytes([C(-1)]))
+        self.assertRaises(ValueError, lambda: bytes([C(256)]))
+
+    def test_constructor_type_errors(self):
+        class C:
+            pass
+        self.assertRaises(TypeError, lambda: bytes(["0"]))
+        self.assertRaises(TypeError, lambda: bytes([0.0]))
+        self.assertRaises(TypeError, lambda: bytes([None]))
+        self.assertRaises(TypeError, lambda: bytes([C()]))
+
+    def test_constructor_value_errors(self):
+        self.assertRaises(ValueError, lambda: bytes([-1]))
+        self.assertRaises(ValueError, lambda: bytes([-sys.maxint]))
+        self.assertRaises(ValueError, lambda: bytes([-sys.maxint-1]))
+        self.assertRaises(ValueError, lambda: bytes([-sys.maxint-2]))
+        self.assertRaises(ValueError, lambda: bytes([-10**100]))
+        self.assertRaises(ValueError, lambda: bytes([256]))
+        self.assertRaises(ValueError, lambda: bytes([257]))
+        self.assertRaises(ValueError, lambda: bytes([sys.maxint]))
+        self.assertRaises(ValueError, lambda: bytes([sys.maxint+1]))
+        self.assertRaises(ValueError, lambda: bytes([10**100]))
+
+    def test_repr(self):
+        self.assertEqual(repr(bytes()), "bytes()")
+        self.assertEqual(repr(bytes([0])), "bytes([0x00])")
+        self.assertEqual(repr(bytes([0, 1, 254, 255])), "bytes([0x00, 0x01, 0xfe, 0xff])")
+
+    def test_compare(self):
+        b1 = bytes([1, 2, 3])
+        b2 = bytes([1, 2, 3])
+        b3 = bytes([1, 3])
+
+        self.failUnless(b1 == b2)
+        self.failUnless(b2 != b3)
+        self.failUnless(b1 <= b2)
+        self.failUnless(b1 <= b3)
+        self.failUnless(b1 <  b3)
+        self.failUnless(b1 >= b2)
+        self.failUnless(b3 >= b2)
+        self.failUnless(b3 >  b2)
+
+        self.failIf(b1 != b2)
+        self.failIf(b2 == b3)
+        self.failIf(b1 >  b2)
+        self.failIf(b1 >  b3)
+        self.failIf(b1 >= b3)
+        self.failIf(b1 <  b2)
+        self.failIf(b3 <  b2)
+        self.failIf(b3 <= b2)
+
+    def test_nohash(self):
+        self.assertRaises(TypeError, hash, bytes())
+
+    def test_doc(self):
+        self.failUnless(bytes.__doc__ != None)
+        self.failUnless(bytes.__doc__.startswith("bytes("))
+
+
+def test_main():
+    test.test_support.run_unittest(XrangeTest)
+
+
+if __name__ == "__main__":
+    ##test_main()
+    unittest.main()

Modified: python/branches/p3yk/Makefile.pre.in
==============================================================================
--- python/branches/p3yk/Makefile.pre.in	(original)
+++ python/branches/p3yk/Makefile.pre.in	Sun Apr 23 01:28:04 2006
@@ -278,6 +278,7 @@
 		Objects/abstract.o \
 		Objects/boolobject.o \
 		Objects/bufferobject.o \
+		Objects/bytesobject.o \
 		Objects/cellobject.o \
 		Objects/classobject.o \
 		Objects/cobject.o \
@@ -494,6 +495,7 @@
 		Include/abstract.h \
 		Include/boolobject.h \
 		Include/bufferobject.h \
+		Include/bytesobject.h \
 		Include/ceval.h \
 		Include/classobject.h \
 		Include/cobject.h \

Added: python/branches/p3yk/Objects/bytesobject.c
==============================================================================
--- (empty file)
+++ python/branches/p3yk/Objects/bytesobject.c	Sun Apr 23 01:28:04 2006
@@ -0,0 +1,373 @@
+/* Bytes object implementation */
+
+/* XXX TO DO: optimizations */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+
+/* Direct API functions */
+
+PyObject *
+PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size)
+{
+    PyBytesObject *new;
+
+    if (size != 0) {
+	assert(sval != NULL);
+	assert(size > 0);
+    }
+
+    new = PyObject_New(PyBytesObject, &PyBytes_Type);
+    if (new == NULL)
+	return NULL;
+
+    if (size > 0) {
+	new->ob_sval = PyMem_Malloc(size);
+	if (new->ob_sval == NULL) {
+	    Py_DECREF(new);
+	    return NULL;
+	}
+	memcpy(new->ob_sval, sval, size);
+	new->ob_size = size;
+    }
+    
+    return (PyObject *)new;
+}
+
+Py_ssize_t
+PyBytes_Size(PyObject *self)
+{
+    assert(self != NULL);
+    assert(PyBytes_Check(self));
+
+    return ((PyBytesObject *)self)->ob_size;
+}
+
+char  *
+PyBytes_AsString(PyObject *self)
+{
+    assert(self != NULL);
+    assert(PyBytes_Check(self));
+
+    return ((PyBytesObject *)self)->ob_sval;
+}
+
+int
+PyBytes_Resize(PyObject *self, Py_ssize_t size)
+{
+    void *sval;
+
+    assert(self != NULL);
+    assert(PyBytes_Check(self));
+    assert(size >= 0);
+
+    sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size);
+    if (sval == NULL) {
+	PyErr_NoMemory();
+	return -1;
+    }
+
+    ((PyBytesObject *)self)->ob_sval = sval;
+    ((PyBytesObject *)self)->ob_size = size;
+
+    return 0;
+}
+
+/* Functions stuffed into the type object */
+
+static Py_ssize_t
+bytes_length(PyBytesObject *self)
+{
+    return self->ob_size;
+}
+
+static PyObject *
+bytes_getitem(PyBytesObject *self, Py_ssize_t i)
+{
+    if (i < 0)
+	i += self->ob_size;
+    if (i < 0 || i >= self->ob_size) {
+	PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+	return NULL;
+    }
+    return PyInt_FromLong((unsigned char)(self->ob_sval[i]));
+}
+
+static long
+bytes_nohash(PyObject *self)
+{
+    PyErr_SetString(PyExc_TypeError, "bytes objects are unhashable");
+    return -1;
+}
+
+static int
+bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"sequence", 0};
+    PyObject *arg = NULL;
+    PyObject *it; /* iter(arg) */
+    PyObject *(*iternext)(PyObject *);
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg))
+	return -1;
+
+    /* Verify list invariants established by PyType_GenericAlloc() */
+    if (self->ob_size != 0) {
+	assert(self->ob_sval != NULL);
+	assert(self->ob_size > 0);
+    }
+
+    /* Empty previous contents */
+    if (PyBytes_Resize((PyObject *)self, 0) < 0)
+	return -1;
+
+    /* Quick check if we're done */
+    if (arg == 0)
+	return 0;
+
+    /* XXX Optimize this if the arguments is a list, tuple, or bytes */
+
+    /* Get the iterator */
+    it = PyObject_GetIter(arg);
+    if (it == NULL)
+	return 0;
+    iternext = *it->ob_type->tp_iternext;
+
+    /* Run the iterator to exhaustion */
+    for (;;) {
+	PyObject *item;
+	Py_ssize_t value;
+
+	/* Get the next item */
+	item = iternext(it);
+	if (item == NULL) {
+	    if (PyErr_Occurred()) {
+		if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+		    goto error;
+		PyErr_Clear();
+	    }
+	    break;
+	}
+
+	/* Interpret it as an int (__index__) */
+	value = PyNumber_Index(item);
+	if (value == -1 && PyErr_Occurred())
+	    goto error;
+
+	/* Range check */
+	if (value < 0 || value >= 256) {
+	    PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)");
+	    goto error;
+	}
+
+	/* Append the byte */
+	/* XXX Speed this up */
+	if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
+	    goto error;
+	self->ob_sval[self->ob_size-1] = value;
+    }
+
+    /* Clean up and return success */
+    Py_DECREF(it);
+    return 0;
+
+ error:
+    /* Error handling when it != NULL */
+    Py_DECREF(it);
+    return -1;
+}
+
+static PyObject *
+bytes_repr(PyBytesObject *self)
+{
+    PyObject *list;
+    PyObject *str;
+    PyObject *result;
+    int err;
+    int i;
+
+    if (self->ob_size == 0)
+	return PyString_FromString("bytes()");
+
+    list = PyList_New(0);
+    if (list == NULL)
+	return NULL;
+
+    str = PyString_FromString("bytes([");
+    if (str == NULL)
+	goto error;
+
+    err = PyList_Append(list, str);
+    Py_DECREF(str);
+    if (err < 0)
+	goto error;
+
+    for (i = 0; i < self->ob_size; i++) {
+	char buffer[20];
+	sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i]));
+	str = PyString_FromString((i == 0) ? buffer+2 : buffer);
+	if (str == NULL)
+	    goto error;
+	err = PyList_Append(list, str);
+	Py_DECREF(str);
+	if (err < 0)
+	    goto error;
+    }
+
+    str = PyString_FromString("])");
+    if (str == NULL)
+	goto error;
+
+    err = PyList_Append(list, str);
+    Py_DECREF(str);
+    if (err < 0)
+	goto error;
+    
+    str = PyString_FromString("");
+    if (str == NULL)
+	goto error;
+
+    result = _PyString_Join(str, list);
+    Py_DECREF(str);
+    Py_DECREF(list);
+    return result;
+
+ error:
+    /* Error handling when list != NULL  */
+    Py_DECREF(list);
+    return NULL;
+}
+
+static PyObject *
+bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
+{
+    PyObject *res;
+    int minsize;
+    int cmp;
+
+    if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
+	Py_INCREF(Py_NotImplemented);
+	return Py_NotImplemented;
+    }
+
+    if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
+	/* Shortcut: if the lengths differ, the objects differ */
+	cmp = (op == Py_NE);
+    }
+    else {
+	minsize = self->ob_size;
+	if (other->ob_size < minsize)
+	    minsize = other->ob_size;
+
+	cmp = memcmp(self->ob_sval, other->ob_sval, minsize);
+	/* In ISO C, memcmp() guarantees to use unsigned bytes! */
+
+	if (cmp == 0) {
+	    if (self->ob_size < other->ob_size)
+		cmp = -1;
+	    else if (self->ob_size > other->ob_size)
+		cmp = 1;
+	}
+
+	switch (op) {
+	case Py_LT: cmp = cmp <  0; break;
+	case Py_LE: cmp = cmp <= 0; break;
+	case Py_EQ: cmp = cmp == 0; break;
+	case Py_NE: cmp = cmp != 0; break;
+	case Py_GT: cmp = cmp >  0; break;
+	case Py_GE: cmp = cmp >= 0; break;
+	}
+    }
+
+    res = cmp ? Py_True : Py_False;
+    Py_INCREF(res);
+    return res;
+}
+
+static void
+bytes_dealloc(PyBytesObject *self)
+{
+    if (self->ob_sval != 0) {
+	PyMem_Free(self->ob_sval);
+    }
+    self->ob_type->tp_free((PyObject *)self);
+}
+
+static PySequenceMethods bytes_as_sequence = {
+    (lenfunc)bytes_length,		/*sq_length*/
+    (binaryfunc)0,			/*sq_concat*/
+    (ssizeargfunc)0,			/*sq_repeat*/
+    (ssizeargfunc)bytes_getitem,	/*sq_item*/
+    (ssizessizeargfunc)0,		/*sq_slice*/
+    0,					/*sq_ass_item*/
+    0,					/*sq_ass_slice*/
+    (objobjproc)0,			/*sq_contains*/
+};
+
+static PyMappingMethods bytes_as_mapping = {
+	(lenfunc)bytes_length,
+	(binaryfunc)0,
+	0,
+};
+
+static PyBufferProcs bytes_as_buffer = {
+/*
+	(readbufferproc)bytes_buffer_getreadbuf,
+	(writebufferproc)bytes_buffer_getwritebuf,
+	(segcountproc)bytes_buffer_getsegcount,
+	(charbufferproc)bytes_buffer_getcharbuf,
+*/
+};
+
+static PyMethodDef
+bytes_methods[] = {
+	{NULL,     NULL}
+};
+
+PyDoc_STRVAR(bytes_doc,
+"bytes([iterable]) -> new array of bytes.\n\
+\n\
+If an argument is given it must be an iterable yielding ints in range(256).");
+
+PyTypeObject PyBytes_Type = {
+    PyObject_HEAD_INIT(&PyType_Type)
+    0,
+    "bytes",
+    sizeof(PyBytesObject),
+    0,
+    (destructor)bytes_dealloc,		/* tp_dealloc */
+    0,			 		/* tp_print */
+    0,					/* tp_getattr */
+    0,					/* tp_setattr */
+    0,					/* tp_compare */
+    (reprfunc)bytes_repr,		/* tp_repr */
+    0,					/* tp_as_number */
+    &bytes_as_sequence,			/* tp_as_sequence */
+    &bytes_as_mapping,			/* tp_as_mapping */
+    bytes_nohash, 			/* tp_hash */
+    0,					/* tp_call */
+    0,					/* tp_str */
+    PyObject_GenericGetAttr,		/* tp_getattro */
+    0,					/* tp_setattro */
+    &bytes_as_buffer,			/* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */
+    bytes_doc,				/* tp_doc */
+    0,					/* tp_traverse */
+    0,					/* tp_clear */
+    (richcmpfunc)bytes_richcompare,	/* tp_richcompare */
+    0,					/* tp_weaklistoffset */
+    0,					/* tp_iter */
+    0,					/* tp_iternext */
+    bytes_methods,			/* tp_methods */
+    0,					/* tp_members */
+    0,					/* tp_getset */
+    0,					/* tp_base */
+    0,					/* tp_dict */
+    0,					/* tp_descr_get */
+    0,					/* tp_descr_set */
+    0,					/* tp_dictoffset */
+    (initproc)bytes_init,		/* tp_init */
+    PyType_GenericAlloc,		/* tp_alloc */
+    PyType_GenericNew,			/* tp_new */
+    PyObject_Del,	                /* tp_free */
+};

Modified: python/branches/p3yk/Objects/object.c
==============================================================================
--- python/branches/p3yk/Objects/object.c	(original)
+++ python/branches/p3yk/Objects/object.c	Sun Apr 23 01:28:04 2006
@@ -1881,6 +1881,9 @@
 	if (PyType_Ready(&PyBool_Type) < 0)
 		Py_FatalError("Can't initialize 'bool'");
 
+	if (PyType_Ready(&PyBytes_Type) < 0)
+		Py_FatalError("Can't initialize 'bytes'");
+
 	if (PyType_Ready(&PyString_Type) < 0)
 		Py_FatalError("Can't initialize 'str'");
 

Modified: python/branches/p3yk/Python/bltinmodule.c
==============================================================================
--- python/branches/p3yk/Python/bltinmodule.c	(original)
+++ python/branches/p3yk/Python/bltinmodule.c	Sun Apr 23 01:28:04 2006
@@ -2139,6 +2139,7 @@
 	SETBUILTIN("basestring",	&PyBaseString_Type);
 	SETBUILTIN("bool",		&PyBool_Type);
 	SETBUILTIN("buffer",		&PyBuffer_Type);
+	SETBUILTIN("bytes",		&PyBytes_Type);
 	SETBUILTIN("classmethod",	&PyClassMethod_Type);
 #ifndef WITHOUT_COMPLEX
 	SETBUILTIN("complex",		&PyComplex_Type);


More information about the Python-3000-checkins mailing list