[Python-checkins] cpython (2.7): Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a
nadeem.vawda
python-checkins at python.org
Thu Oct 13 13:58:30 CEST 2011
http://hg.python.org/cpython/rev/c1c434e30e06
changeset: 72909:c1c434e30e06
branch: 2.7
parent: 72900:3313ce92cef7
user: Nadeem Vawda <nadeem.vawda at gmail.com>
date: Thu Oct 13 13:52:46 2011 +0200
summary:
Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a linear-time one.
Also fix the builtin file class and the bz2 module, which used the same algorithm.
files:
Misc/NEWS | 3 +++
Modules/_io/fileio.c | 19 ++++---------------
Modules/bz2module.c | 19 ++++---------------
Objects/fileobject.c | 19 ++++---------------
4 files changed, 15 insertions(+), 45 deletions(-)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -229,6 +229,9 @@
Extension Modules
-----------------
+- Issue #13159: FileIO, BZ2File, and the built-in file class now use a
+ linear-time buffer growth strategy instead of a quadratic one.
+
- Issue #13070: Fix a crash when a TextIOWrapper caught in a reference cycle
would be finalized after the reference to its underlying BufferedRWPair's
writer got cleared by the GC.
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -42,12 +42,6 @@
#define SMALLCHUNK BUFSIZ
#endif
-#if SIZEOF_INT < 4
-#define BIGCHUNK (512 * 32)
-#else
-#define BIGCHUNK (512 * 1024)
-#endif
-
typedef struct {
PyObject_HEAD
int fd;
@@ -528,15 +522,10 @@
}
}
#endif
- if (currentsize > SMALLCHUNK) {
- /* Keep doubling until we reach BIGCHUNK;
- then keep adding BIGCHUNK. */
- if (currentsize <= BIGCHUNK)
- return currentsize + currentsize;
- else
- return currentsize + BIGCHUNK;
- }
- return currentsize + SMALLCHUNK;
+ /* Expand the buffer by an amount proportional to the current size,
+ giving us amortized linear-time behavior. Use a less-than-double
+ growth factor to avoid excessive allocation. */
+ return currentsize + (currentsize >> 3) + 6;
}
static PyObject *
diff --git a/Modules/bz2module.c b/Modules/bz2module.c
--- a/Modules/bz2module.c
+++ b/Modules/bz2module.c
@@ -224,25 +224,14 @@
#define SMALLCHUNK BUFSIZ
#endif
-#if SIZEOF_INT < 4
-#define BIGCHUNK (512 * 32)
-#else
-#define BIGCHUNK (512 * 1024)
-#endif
-
/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
static size_t
Util_NewBufferSize(size_t currentsize)
{
- if (currentsize > SMALLCHUNK) {
- /* Keep doubling until we reach BIGCHUNK;
- then keep adding BIGCHUNK. */
- if (currentsize <= BIGCHUNK)
- return currentsize + currentsize;
- else
- return currentsize + BIGCHUNK;
- }
- return currentsize + SMALLCHUNK;
+ /* Expand the buffer by an amount proportional to the current size,
+ giving us amortized linear-time behavior. Use a less-than-double
+ growth factor to avoid excessive allocation. */
+ return currentsize + (currentsize >> 3) + 6;
}
/* This is a hacked version of Python's fileobject.c:get_line(). */
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -992,12 +992,6 @@
#define SMALLCHUNK BUFSIZ
#endif
-#if SIZEOF_INT < 4
-#define BIGCHUNK (512 * 32)
-#else
-#define BIGCHUNK (512 * 1024)
-#endif
-
static size_t
new_buffersize(PyFileObject *f, size_t currentsize)
{
@@ -1026,15 +1020,10 @@
/* Add 1 so if the file were to grow we'd notice. */
}
#endif
- if (currentsize > SMALLCHUNK) {
- /* Keep doubling until we reach BIGCHUNK;
- then keep adding BIGCHUNK. */
- if (currentsize <= BIGCHUNK)
- return currentsize + currentsize;
- else
- return currentsize + BIGCHUNK;
- }
- return currentsize + SMALLCHUNK;
+ /* Expand the buffer by an amount proportional to the current size,
+ giving us amortized linear-time behavior. Use a less-than-double
+ growth factor to avoid excessive allocation. */
+ return currentsize + (currentsize >> 3) + 6;
}
#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list