[Python-checkins] cpython (2.7): Issue #7358: cStringIO.StringIO now supports writing to and reading from

serhiy.storchaka python-checkins at python.org
Sat Feb 9 12:48:38 CET 2013


http://hg.python.org/cpython/rev/a025b04332fe
changeset:   82089:a025b04332fe
branch:      2.7
parent:      82085:5c2ff6e64c47
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Feb 09 13:47:43 2013 +0200
summary:
  Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.

files:
  Lib/test/test_StringIO.py |  40 ++++++++++++
  Misc/NEWS                 |   3 +
  Modules/cStringIO.c       |  85 ++++++++++++++++----------
  3 files changed, 96 insertions(+), 32 deletions(-)


diff --git a/Lib/test/test_StringIO.py b/Lib/test/test_StringIO.py
--- a/Lib/test/test_StringIO.py
+++ b/Lib/test/test_StringIO.py
@@ -5,6 +5,7 @@
 import cStringIO
 import types
 import array
+import sys
 from test import test_support
 
 
@@ -105,6 +106,45 @@
         self._fp.close()
         self.assertRaises(ValueError, self._fp.getvalue)
 
+    @test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
+    def test_reads_from_large_stream(self, size):
+        linesize = 2**26 # 64 MiB
+        lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
+                ['y' * (size % linesize)]
+        f = self.MODULE.StringIO(''.join(lines))
+        for i, expected in enumerate(lines):
+            line = f.read(len(expected))
+            self.assertEqual(len(line), len(expected))
+            self.assertEqual(line, expected)
+        self.assertEqual(f.read(), '')
+        f.seek(0)
+        for i, expected in enumerate(lines):
+            line = f.readline()
+            self.assertEqual(len(line), len(expected))
+            self.assertEqual(line, expected)
+        self.assertEqual(f.readline(), '')
+        f.seek(0)
+        self.assertEqual(f.readlines(), lines)
+        self.assertEqual(f.readlines(), [])
+        f.seek(0)
+        self.assertEqual(f.readlines(size), lines)
+        self.assertEqual(f.readlines(), [])
+
+    # In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
+    # bytes per input character.
+    @test_support.bigmemtest(test_support._2G, memuse=4)
+    def test_writes_to_large_stream(self, size):
+        s = 'x' * 2**26 # 64 MiB
+        f = self.MODULE.StringIO()
+        n = size
+        while n > len(s):
+            f.write(s)
+            n -= len(s)
+        s = None
+        f.write('x' * n)
+        self.assertEqual(len(f.getvalue()), size)
+
+
 class TestStringIO(TestGenericStringIO):
     MODULE = StringIO
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -202,6 +202,9 @@
 Library
 -------
 
+- Issue #7358: cStringIO.StringIO now supports writing to and reading from
+  a stream larger than 2 GiB on 64-bit systems.
+
 - Issue #10355: In SpooledTemporaryFile class mode and name properties and
   xreadlines method now work for unrolled files.  encoding and newlines
   properties now removed as they have no sense and always produced
diff --git a/Modules/cStringIO.c b/Modules/cStringIO.c
--- a/Modules/cStringIO.c
+++ b/Modules/cStringIO.c
@@ -170,10 +170,15 @@
         n = l;
         if (n < 0) n=0;
     }
+    if (n > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "length too large");
+        return -1;
+    }
 
     *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
     ((IOobject*)self)->pos += n;
-    return n;
+    return (int)n;
 }
 
 static PyObject *
@@ -192,26 +197,33 @@
 
 static int
 IO_creadline(PyObject *self, char **output) {
-    char *n, *s;
-    Py_ssize_t l;
+    char *n, *start, *end;
+    Py_ssize_t len;
 
     if (!IO__opencheck(IOOOBJECT(self))) return -1;
 
-    for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
-           s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
-         n < s && *n != '\n'; n++);
+    n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
+    end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
+    while (n < end && *n != '\n')
+        n++;
 
-    if (n < s) n++;
+    if (n < end) n++;
 
-    *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
-    l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
+    len = n - start;
+    if (len > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "length too large");
+        return -1;
+    }
 
-    assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l);
+    *output=start;
+
+    assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
     assert(IOOOBJECT(self)->pos >= 0);
     assert(IOOOBJECT(self)->string_size >= 0);
 
-    ((IOobject*)self)->pos += l;
-    return (int)l;
+    ((IOobject*)self)->pos += len;
+    return (int)len;
 }
 
 static PyObject *
@@ -239,9 +251,9 @@
     int n;
     char *output;
     PyObject *result, *line;
-    int hint = 0, length = 0;
+    Py_ssize_t hint = 0, length = 0;
 
-    if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL;
+    if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
 
     result = PyList_New(0);
     if (!result)
@@ -377,31 +389,41 @@
 
 
 static int
-O_cwrite(PyObject *self, const char *c, Py_ssize_t  l) {
-    Py_ssize_t newl;
+O_cwrite(PyObject *self, const char *c, Py_ssize_t  len) {
+    Py_ssize_t newpos;
     Oobject *oself;
     char *newbuf;
 
     if (!IO__opencheck(IOOOBJECT(self))) return -1;
     oself = (Oobject *)self;
 
-    newl = oself->pos+l;
-    if (newl >= oself->buf_size) {
-        oself->buf_size *= 2;
-        if (oself->buf_size <= newl) {
-            assert(newl + 1 < INT_MAX);
-            oself->buf_size = (int)(newl+1);
+    if (len > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "length too large");
+        return -1;
+    }
+    assert(len >= 0);
+    if (oself->pos >= PY_SSIZE_T_MAX - len) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "new position too large");
+        return -1;
+    }
+    newpos = oself->pos + len;
+    if (newpos >= oself->buf_size) {
+        size_t newsize = oself->buf_size;
+        newsize *= 2;
+        if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
+            assert(newpos < PY_SSIZE_T_MAX - 1);
+            newsize = newpos + 1;
         }
-        newbuf = (char*)realloc(oself->buf, oself->buf_size);
+        newbuf = (char*)realloc(oself->buf, newsize);
         if (!newbuf) {
             PyErr_SetString(PyExc_MemoryError,"out of memory");
-            free(oself->buf);
-            oself->buf = 0;
-            oself->buf_size = oself->pos = 0;
             return -1;
-          }
+        }
+        oself->buf_size = (Py_ssize_t)newsize;
         oself->buf = newbuf;
-      }
+    }
 
     if (oself->string_size < oself->pos) {
         /* In case of overseek, pad with null bytes the buffer region between
@@ -416,16 +438,15 @@
                (oself->pos - oself->string_size) * sizeof(char));
     }
 
-    memcpy(oself->buf+oself->pos,c,l);
+    memcpy(oself->buf + oself->pos, c, len);
 
-    assert(oself->pos + l < INT_MAX);
-    oself->pos += (int)l;
+    oself->pos = newpos;
 
     if (oself->string_size < oself->pos) {
         oself->string_size = oself->pos;
     }
 
-    return (int)l;
+    return (int)len;
 }
 
 static PyObject *

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list