[Python-checkins] cpython (2.7): Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a

nadeem.vawda python-checkins at python.org
Thu Oct 13 13:58:30 CEST 2011


http://hg.python.org/cpython/rev/c1c434e30e06
changeset:   72909:c1c434e30e06
branch:      2.7
parent:      72900:3313ce92cef7
user:        Nadeem Vawda <nadeem.vawda at gmail.com>
date:        Thu Oct 13 13:52:46 2011 +0200
summary:
  Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a linear-time one.

Also fix the builtin file class and the bz2 module, which used the same algorithm.

files:
  Misc/NEWS            |   3 +++
  Modules/_io/fileio.c |  19 ++++---------------
  Modules/bz2module.c  |  19 ++++---------------
  Objects/fileobject.c |  19 ++++---------------
  4 files changed, 15 insertions(+), 45 deletions(-)


diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -229,6 +229,9 @@
 Extension Modules
 -----------------
 
+- Issue #13159: FileIO, BZ2File, and the built-in file class now use a
+  linear-time buffer growth strategy instead of a quadratic one.
+
 - Issue #13070: Fix a crash when a TextIOWrapper caught in a reference cycle
   would be finalized after the reference to its underlying BufferedRWPair's
   writer got cleared by the GC.
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -42,12 +42,6 @@
 #define SMALLCHUNK BUFSIZ
 #endif
 
-#if SIZEOF_INT < 4
-#define BIGCHUNK  (512 * 32)
-#else
-#define BIGCHUNK  (512 * 1024)
-#endif
-
 typedef struct {
     PyObject_HEAD
     int fd;
@@ -528,15 +522,10 @@
         }
     }
 #endif
-    if (currentsize > SMALLCHUNK) {
-        /* Keep doubling until we reach BIGCHUNK;
-           then keep adding BIGCHUNK. */
-        if (currentsize <= BIGCHUNK)
-            return currentsize + currentsize;
-        else
-            return currentsize + BIGCHUNK;
-    }
-    return currentsize + SMALLCHUNK;
+    /* Expand the buffer by an amount proportional to the current size,
+       giving us amortized linear-time behavior. Use a less-than-double
+       growth factor to avoid excessive allocation. */
+    return currentsize + (currentsize >> 3) + 6;
 }
 
 static PyObject *
diff --git a/Modules/bz2module.c b/Modules/bz2module.c
--- a/Modules/bz2module.c
+++ b/Modules/bz2module.c
@@ -224,25 +224,14 @@
 #define SMALLCHUNK BUFSIZ
 #endif
 
-#if SIZEOF_INT < 4
-#define BIGCHUNK  (512 * 32)
-#else
-#define BIGCHUNK  (512 * 1024)
-#endif
-
 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 static size_t
 Util_NewBufferSize(size_t currentsize)
 {
-    if (currentsize > SMALLCHUNK) {
-        /* Keep doubling until we reach BIGCHUNK;
-           then keep adding BIGCHUNK. */
-        if (currentsize <= BIGCHUNK)
-            return currentsize + currentsize;
-        else
-            return currentsize + BIGCHUNK;
-    }
-    return currentsize + SMALLCHUNK;
+    /* Expand the buffer by an amount proportional to the current size,
+       giving us amortized linear-time behavior. Use a less-than-double
+       growth factor to avoid excessive allocation. */
+    return currentsize + (currentsize >> 3) + 6;
 }
 
 /* This is a hacked version of Python's fileobject.c:get_line(). */
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -992,12 +992,6 @@
 #define SMALLCHUNK BUFSIZ
 #endif
 
-#if SIZEOF_INT < 4
-#define BIGCHUNK  (512 * 32)
-#else
-#define BIGCHUNK  (512 * 1024)
-#endif
-
 static size_t
 new_buffersize(PyFileObject *f, size_t currentsize)
 {
@@ -1026,15 +1020,10 @@
         /* Add 1 so if the file were to grow we'd notice. */
     }
 #endif
-    if (currentsize > SMALLCHUNK) {
-        /* Keep doubling until we reach BIGCHUNK;
-           then keep adding BIGCHUNK. */
-        if (currentsize <= BIGCHUNK)
-            return currentsize + currentsize;
-        else
-            return currentsize + BIGCHUNK;
-    }
-    return currentsize + SMALLCHUNK;
+    /* Expand the buffer by an amount proportional to the current size,
+       giving us amortized linear-time behavior. Use a less-than-double
+       growth factor to avoid excessive allocation. */
+    return currentsize + (currentsize >> 3) + 6;
 }
 
 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list