[Python-checkins] cpython: Misc improvements to collections.deque()

raymond.hettinger python-checkins at python.org
Mon Jun 24 00:47:41 CEST 2013


http://hg.python.org/cpython/rev/cb70776241bb
changeset:   84303:cb70776241bb
parent:      84298:44c8a9d80595
user:        Raymond Hettinger <python at rcn.com>
date:        Sun Jun 23 15:44:33 2013 -0700
summary:
  Misc improvements to collections.deque()

* Clarified comment on the impact of BLOCKLEN on deque_index
  (with a power-of-two, the division and modulo
   computations are done with a right-shift and bitwise-and).

* Clarified comment on the overflow check to note that
  it is general and not just applicable the 64-bit builds.

* In deque._rotate(), the "deque->" indirections are
  factored-out of the loop (loop invariant code motion),
  leaving the code cleaner looking and slightly faster.

* In deque._rotate(), replaced the memcpy() with an
  equivalent loop.  That saved the memcpy setup time
  and allowed the pointers to move in their natural
  leftward and rightward directions.

See comparative timings at:  http://pastebin.com/p0RJnT5N

files:
  Modules/_collectionsmodule.c |  157 +++++++++++++---------
  1 files changed, 89 insertions(+), 68 deletions(-)


diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c
--- a/Modules/_collectionsmodule.c
+++ b/Modules/_collectionsmodule.c
@@ -8,9 +8,10 @@
 */
 
 /* The block length may be set to any number over 1.  Larger numbers
- * reduce the number of calls to the memory allocator but take more
- * memory.  Ideally, (BLOCKLEN+2) should be set to a multiple of the
- * length of a cache line.
+ * reduce the number of calls to the memory allocator, give faster
+ * indexing and rotation, and reduce the link::data overhead ratio.
+ * If the block length is a power-of-two, we also get faster
+ * division/modulo computations during indexing.
  */
 
 #define BLOCKLEN 62
@@ -47,8 +48,8 @@
 
 typedef struct BLOCK {
     struct BLOCK *leftlink;
+    PyObject *data[BLOCKLEN];
     struct BLOCK *rightlink;
-    PyObject *data[BLOCKLEN];
 } block;
 
 #define MAXFREEBLOCKS 10
@@ -58,13 +59,8 @@
 static block *
 newblock(block *leftlink, block *rightlink, Py_ssize_t len) {
     block *b;
-    /* To prevent len from overflowing PY_SSIZE_T_MAX on 64-bit machines, we
-     * refuse to allocate new blocks if the current len is dangerously
-     * close.  There is some extra margin to prevent spurious arithmetic
-     * overflows at various places.  The following check ensures that
-     * the blocks allocated to the deque, in the worst case, can only
-     * have PY_SSIZE_T_MAX-2 entries in total.
-     */
+    /* To prevent len from overflowing PY_SSIZE_T_MAX, we refuse to
+     * allocate new blocks if the current len is nearing overflow. */
     if (len >= PY_SSIZE_T_MAX - 2*BLOCKLEN) {
         PyErr_SetString(PyExc_OverflowError,
                         "cannot add more blocks to the deque");
@@ -413,7 +409,12 @@
 static int
 _deque_rotate(dequeobject *deque, Py_ssize_t n)
 {
-    Py_ssize_t m, len=deque->len, halflen=len>>1;
+    block *leftblock = deque->leftblock;
+    block *rightblock = deque->rightblock;
+    Py_ssize_t leftindex = deque->leftindex;
+    Py_ssize_t rightindex = deque->rightindex;
+    Py_ssize_t len=deque->len, halflen=len>>1;
+    int rv = 0;
 
     if (len <= 1)
         return 0;
@@ -429,76 +430,96 @@
 
     deque->state++;
     while (n > 0) {
-        if (deque->leftindex == 0) {
-            block *b = newblock(NULL, deque->leftblock, len);
-            if (b == NULL)
-                return -1;
-            assert(deque->leftblock->leftlink == NULL);
-            deque->leftblock->leftlink = b;
-            deque->leftblock = b;
-            deque->leftindex = BLOCKLEN;
+        if (leftindex == 0) {
+            block *b = newblock(NULL, leftblock, len);
+            if (b == NULL) {
+                rv = -1;
+                goto done;
+            }
+            assert(leftblock->leftlink == NULL);
+            leftblock->leftlink = b;
+            leftblock = b;
+            leftindex = BLOCKLEN;
         }
-        assert(deque->leftindex > 0);
+        assert(leftindex > 0);
 
-        m = n;
-        if (m > deque->rightindex + 1)
-            m = deque->rightindex + 1;
-        if (m > deque->leftindex)
-            m = deque->leftindex;
-        assert (m > 0 && m <= len);
-        memcpy(&deque->leftblock->data[deque->leftindex - m],
-               &deque->rightblock->data[deque->rightindex + 1 - m],
-               m * sizeof(PyObject *));
-        deque->rightindex -= m;
-        deque->leftindex -= m;
-        n -= m;
+        {
+            PyObject **src, **dest;
+            Py_ssize_t m = n;
 
-        if (deque->rightindex == -1) {
-            block *prevblock = deque->rightblock->leftlink;
-            assert(deque->rightblock != NULL);
-            assert(deque->leftblock != deque->rightblock);
-            freeblock(deque->rightblock);
+            if (m > rightindex + 1)
+                m = rightindex + 1;
+            if (m > leftindex)
+                m = leftindex;
+            assert (m > 0 && m <= len);
+            src = &rightblock->data[rightindex];
+            dest = &leftblock->data[leftindex - 1];
+            rightindex -= m;
+            leftindex -= m;
+            n -= m;
+            while (m--)
+                *(dest--) = *(src--);
+        }
+
+        if (rightindex == -1) {
+            block *prevblock = rightblock->leftlink;
+            assert(rightblock != NULL);
+            assert(leftblock != rightblock);
+            freeblock(rightblock);
             prevblock->rightlink = NULL;
-            deque->rightblock = prevblock;
-            deque->rightindex = BLOCKLEN - 1;
+            rightblock = prevblock;
+            rightindex = BLOCKLEN - 1;
         }
     }
     while (n < 0) {
-        if (deque->rightindex == BLOCKLEN - 1) {
-            block *b = newblock(deque->rightblock, NULL, len);
-            if (b == NULL)
-                return -1;
-            assert(deque->rightblock->rightlink == NULL);
-            deque->rightblock->rightlink = b;
-            deque->rightblock = b;
-            deque->rightindex = -1;
+        if (rightindex == BLOCKLEN - 1) {
+            block *b = newblock(rightblock, NULL, len);
+            if (b == NULL) {
+                rv = -1;
+                goto done;
+            }
+            assert(rightblock->rightlink == NULL);
+            rightblock->rightlink = b;
+            rightblock = b;
+            rightindex = -1;
         }
-        assert (deque->rightindex < BLOCKLEN - 1);
+        assert (rightindex < BLOCKLEN - 1);
 
-        m = -n;
-        if (m > BLOCKLEN - deque->leftindex)
-            m = BLOCKLEN - deque->leftindex;
-        if (m > BLOCKLEN - 1 - deque->rightindex)
-            m = BLOCKLEN - 1 - deque->rightindex;
-        assert (m > 0 && m <= len);
-        memcpy(&deque->rightblock->data[deque->rightindex + 1],
-               &deque->leftblock->data[deque->leftindex],
-               m * sizeof(PyObject *));
-        deque->leftindex += m;
-        deque->rightindex += m;
-        n += m;
+        {
+            PyObject **src, **dest;
+            Py_ssize_t m = -n;
 
-        if (deque->leftindex == BLOCKLEN) {
-            block *nextblock = deque->leftblock->rightlink;
-            assert(deque->leftblock != deque->rightblock);
-            freeblock(deque->leftblock);
+            if (m > BLOCKLEN - leftindex)
+                m = BLOCKLEN - leftindex;
+            if (m > BLOCKLEN - 1 - rightindex)
+                m = BLOCKLEN - 1 - rightindex;
+            assert (m > 0 && m <= len);
+            src = &leftblock->data[leftindex];
+            dest = &rightblock->data[rightindex + 1];
+            leftindex += m;
+            rightindex += m;
+            n += m;
+            while (m--)
+                *(dest++) = *(src++);
+        }
+
+        if (leftindex == BLOCKLEN) {
+            block *nextblock = leftblock->rightlink;
+            assert(leftblock != rightblock);
+            freeblock(leftblock);
             assert(nextblock != NULL);
             nextblock->leftlink = NULL;
-            deque->leftblock = nextblock;
-            deque->leftindex = 0;
+            leftblock = nextblock;
+            leftindex = 0;
         }
     }
-    return 0;
+done:
+    deque->leftblock = leftblock;
+    deque->rightblock = rightblock;
+    deque->leftindex = leftindex;
+    deque->rightindex = rightindex;
+
+    return rv;
 }
 
 static PyObject *

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list