[Python-checkins] cpython (3.2): Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a

nadeem.vawda python-checkins at python.org
Thu Oct 13 13:45:42 CEST 2011


http://hg.python.org/cpython/rev/d18c80a8c119
changeset:   72907:d18c80a8c119
branch:      3.2
parent:      72855:61eb59516b55
user:        Nadeem Vawda <nadeem.vawda at gmail.com>
date:        Thu Oct 13 13:34:16 2011 +0200
summary:
  Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a linear-time one.

Also fix the bz2 module, whose classes used the same algorithm.

files:
  Misc/NEWS            |   3 +++
  Modules/_io/fileio.c |  19 ++++---------------
  Modules/bz2module.c  |  19 ++++---------------
  3 files changed, 11 insertions(+), 30 deletions(-)


diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -121,6 +121,9 @@
 Extension Modules
 -----------------
 
+- Issue #13159: FileIO and BZ2File now use a linear-time buffer growth
+  strategy instead of a quadratic-time one.
+
 - Issue #13070: Fix a crash when a TextIOWrapper caught in a reference cycle
   would be finalized after the reference to its underlying BufferedRWPair's
   writer got cleared by the GC.
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -43,12 +43,6 @@
 #define SMALLCHUNK BUFSIZ
 #endif
 
-#if SIZEOF_INT < 4
-#define BIGCHUNK  (512 * 32)
-#else
-#define BIGCHUNK  (512 * 1024)
-#endif
-
 typedef struct {
     PyObject_HEAD
     int fd;
@@ -565,15 +559,10 @@
         }
     }
 #endif
-    if (currentsize > SMALLCHUNK) {
-        /* Keep doubling until we reach BIGCHUNK;
-           then keep adding BIGCHUNK. */
-        if (currentsize <= BIGCHUNK)
-            return currentsize + currentsize;
-        else
-            return currentsize + BIGCHUNK;
-    }
-    return currentsize + SMALLCHUNK;
+    /* Expand the buffer by an amount proportional to the current size,
+       giving us amortized linear-time behavior. Use a less-than-double
+       growth factor to avoid excessive allocation. */
+    return currentsize + (currentsize >> 3) + 6;
 }
 
 static PyObject *
diff --git a/Modules/bz2module.c b/Modules/bz2module.c
--- a/Modules/bz2module.c
+++ b/Modules/bz2module.c
@@ -218,25 +218,14 @@
 #define SMALLCHUNK BUFSIZ
 #endif
 
-#if SIZEOF_INT < 4
-#define BIGCHUNK  (512 * 32)
-#else
-#define BIGCHUNK  (512 * 1024)
-#endif
-
 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 static size_t
 Util_NewBufferSize(size_t currentsize)
 {
-    if (currentsize > SMALLCHUNK) {
-        /* Keep doubling until we reach BIGCHUNK;
-           then keep adding BIGCHUNK. */
-        if (currentsize <= BIGCHUNK)
-            return currentsize + currentsize;
-        else
-            return currentsize + BIGCHUNK;
-    }
-    return currentsize + SMALLCHUNK;
+    /* Expand the buffer by an amount proportional to the current size,
+       giving us amortized linear-time behavior. Use a less-than-double
+       growth factor to avoid excessive allocation. */
+    return currentsize + (currentsize >> 3) + 6;
 }
 
 /* This is a hacked version of Python's fileobject.c:get_line(). */

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list