[Python-checkins] cpython (2.7): Issue #7358: cStringIO.StringIO now supports writing to and reading from
serhiy.storchaka
python-checkins at python.org
Sat Feb 9 12:48:38 CET 2013
http://hg.python.org/cpython/rev/a025b04332fe
changeset: 82089:a025b04332fe
branch: 2.7
parent: 82085:5c2ff6e64c47
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sat Feb 09 13:47:43 2013 +0200
summary:
Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.
files:
Lib/test/test_StringIO.py | 40 ++++++++++++
Misc/NEWS | 3 +
Modules/cStringIO.c | 85 ++++++++++++++++----------
3 files changed, 96 insertions(+), 32 deletions(-)
diff --git a/Lib/test/test_StringIO.py b/Lib/test/test_StringIO.py
--- a/Lib/test/test_StringIO.py
+++ b/Lib/test/test_StringIO.py
@@ -5,6 +5,7 @@
import cStringIO
import types
import array
+import sys
from test import test_support
@@ -105,6 +106,45 @@
self._fp.close()
self.assertRaises(ValueError, self._fp.getvalue)
+ @test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
+ def test_reads_from_large_stream(self, size):
+ linesize = 2**26 # 64 MiB
+ lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
+ ['y' * (size % linesize)]
+ f = self.MODULE.StringIO(''.join(lines))
+ for i, expected in enumerate(lines):
+ line = f.read(len(expected))
+ self.assertEqual(len(line), len(expected))
+ self.assertEqual(line, expected)
+ self.assertEqual(f.read(), '')
+ f.seek(0)
+ for i, expected in enumerate(lines):
+ line = f.readline()
+ self.assertEqual(len(line), len(expected))
+ self.assertEqual(line, expected)
+ self.assertEqual(f.readline(), '')
+ f.seek(0)
+ self.assertEqual(f.readlines(), lines)
+ self.assertEqual(f.readlines(), [])
+ f.seek(0)
+ self.assertEqual(f.readlines(size), lines)
+ self.assertEqual(f.readlines(), [])
+
+ # In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
+ # bytes per input character.
+ @test_support.bigmemtest(test_support._2G, memuse=4)
+ def test_writes_to_large_stream(self, size):
+ s = 'x' * 2**26 # 64 MiB
+ f = self.MODULE.StringIO()
+ n = size
+ while n > len(s):
+ f.write(s)
+ n -= len(s)
+ s = None
+ f.write('x' * n)
+ self.assertEqual(len(f.getvalue()), size)
+
+
class TestStringIO(TestGenericStringIO):
MODULE = StringIO
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -202,6 +202,9 @@
Library
-------
+- Issue #7358: cStringIO.StringIO now supports writing to and reading from
+ a stream larger than 2 GiB on 64-bit systems.
+
- Issue #10355: In SpooledTemporaryFile class mode and name properties and
xreadlines method now work for unrolled files. encoding and newlines
properties now removed as they have no sense and always produced
diff --git a/Modules/cStringIO.c b/Modules/cStringIO.c
--- a/Modules/cStringIO.c
+++ b/Modules/cStringIO.c
@@ -170,10 +170,15 @@
n = l;
if (n < 0) n=0;
}
+ if (n > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "length too large");
+ return -1;
+ }
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
((IOobject*)self)->pos += n;
- return n;
+ return (int)n;
}
static PyObject *
@@ -192,26 +197,33 @@
static int
IO_creadline(PyObject *self, char **output) {
- char *n, *s;
- Py_ssize_t l;
+ char *n, *start, *end;
+ Py_ssize_t len;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
- for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
- s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
- n < s && *n != '\n'; n++);
+ n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
+ end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
+ while (n < end && *n != '\n')
+ n++;
- if (n < s) n++;
+ if (n < end) n++;
- *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
- l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
+ len = n - start;
+ if (len > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "length too large");
+ return -1;
+ }
- assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l);
+ *output=start;
+
+ assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
assert(IOOOBJECT(self)->pos >= 0);
assert(IOOOBJECT(self)->string_size >= 0);
- ((IOobject*)self)->pos += l;
- return (int)l;
+ ((IOobject*)self)->pos += len;
+ return (int)len;
}
static PyObject *
@@ -239,9 +251,9 @@
int n;
char *output;
PyObject *result, *line;
- int hint = 0, length = 0;
+ Py_ssize_t hint = 0, length = 0;
- if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL;
+ if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
result = PyList_New(0);
if (!result)
@@ -377,31 +389,41 @@
static int
-O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
- Py_ssize_t newl;
+O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
+ Py_ssize_t newpos;
Oobject *oself;
char *newbuf;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
oself = (Oobject *)self;
- newl = oself->pos+l;
- if (newl >= oself->buf_size) {
- oself->buf_size *= 2;
- if (oself->buf_size <= newl) {
- assert(newl + 1 < INT_MAX);
- oself->buf_size = (int)(newl+1);
+ if (len > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "length too large");
+ return -1;
+ }
+ assert(len >= 0);
+ if (oself->pos >= PY_SSIZE_T_MAX - len) {
+ PyErr_SetString(PyExc_OverflowError,
+ "new position too large");
+ return -1;
+ }
+ newpos = oself->pos + len;
+ if (newpos >= oself->buf_size) {
+ size_t newsize = oself->buf_size;
+ newsize *= 2;
+ if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
+ assert(newpos < PY_SSIZE_T_MAX - 1);
+ newsize = newpos + 1;
}
- newbuf = (char*)realloc(oself->buf, oself->buf_size);
+ newbuf = (char*)realloc(oself->buf, newsize);
if (!newbuf) {
PyErr_SetString(PyExc_MemoryError,"out of memory");
- free(oself->buf);
- oself->buf = 0;
- oself->buf_size = oself->pos = 0;
return -1;
- }
+ }
+ oself->buf_size = (Py_ssize_t)newsize;
oself->buf = newbuf;
- }
+ }
if (oself->string_size < oself->pos) {
/* In case of overseek, pad with null bytes the buffer region between
@@ -416,16 +438,15 @@
(oself->pos - oself->string_size) * sizeof(char));
}
- memcpy(oself->buf+oself->pos,c,l);
+ memcpy(oself->buf + oself->pos, c, len);
- assert(oself->pos + l < INT_MAX);
- oself->pos += (int)l;
+ oself->pos = newpos;
if (oself->string_size < oself->pos) {
oself->string_size = oself->pos;
}
- return (int)l;
+ return (int)len;
}
static PyObject *
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list