[Python-checkins] r67833 - in sandbox/trunk/io-c: _fileio.c _iomodule.h _textio.c io.c io.py
amaury.forgeotdarc
python-checkins at python.org
Thu Dec 18 00:17:08 CET 2008
Author: amaury.forgeotdarc
Date: Thu Dec 18 00:17:07 2008
New Revision: 67833
Log:
io-c: the IncrementalNewlineDecoder starts working.
Modified:
sandbox/trunk/io-c/_fileio.c
sandbox/trunk/io-c/_iomodule.h
sandbox/trunk/io-c/_textio.c
sandbox/trunk/io-c/io.c
sandbox/trunk/io-c/io.py
Modified: sandbox/trunk/io-c/_fileio.c
==============================================================================
--- sandbox/trunk/io-c/_fileio.c (original)
+++ sandbox/trunk/io-c/_fileio.c Thu Dec 18 00:17:07 2008
@@ -311,7 +311,7 @@
goto error;
}
- if (PyObject_SetAttrString(self, "name", nameobj) < 0)
+ if (PyObject_SetAttrString((PyObject *)self, "name", nameobj) < 0)
goto error;
goto done;
@@ -426,6 +426,31 @@
return PyLong_FromSsize_t(n);
}
+static size_t
+new_buffersize(PyFileIOObject *self, size_t currentsize)
+{
+#ifdef HAVE_FSTAT
+ off_t pos, end;
+ struct stat st;
+ if (fstat(self->fd, &st) == 0) {
+ end = st.st_size;
+ pos = lseek(self->fd, 0L, SEEK_CUR);
+ if (end >= pos && pos >= 0)
+ return currentsize + end - pos + 1;
+ /* Add 1 so if the file were to grow we'd notice. */
+ }
+#endif
+ if (currentsize > SMALLCHUNK) {
+ /* Keep doubling until we reach BIGCHUNK;
+ then keep adding BIGCHUNK. */
+ if (currentsize <= BIGCHUNK)
+ return currentsize + currentsize;
+ else
+ return currentsize + BIGCHUNK;
+ }
+ return currentsize + SMALLCHUNK;
+}
+
static PyObject *
fileio_readall(PyFileIOObject *self)
{
@@ -438,17 +463,7 @@
return NULL;
while (1) {
- Py_ssize_t newsize = (total < SMALLCHUNK) ? SMALLCHUNK : total;
-
- /* Keep doubling until we reach BIGCHUNK;
- then keep adding BIGCHUNK. */
- if (newsize <= BIGCHUNK) {
- newsize += newsize;
- }
- else {
- /* NOTE: overflow impossible due to limits on BUFSIZ */
- newsize += BIGCHUNK;
- }
+ Py_ssize_t newsize = new_buffersize(self, total);
if (PyBytes_GET_SIZE(result) < newsize) {
if (_PyBytes_Resize(&result, newsize) < 0) {
Modified: sandbox/trunk/io-c/_iomodule.h
==============================================================================
--- sandbox/trunk/io-c/_iomodule.h (original)
+++ sandbox/trunk/io-c/_iomodule.h Thu Dec 18 00:17:07 2008
@@ -12,6 +12,7 @@
extern PyTypeObject PyBufferedRWPair_Type;
extern PyTypeObject PyBufferedRandom_Type;
extern PyTypeObject PyTextIOWrapper_Type;
+extern PyTypeObject PyIncrementalNewlineDecoder_Type;
extern PyObject* _PyIOBase_checkReadable(PyObject *self, PyObject *unused);
extern PyObject* _PyIOBase_checkWritable(PyObject *self, PyObject *unused);
Modified: sandbox/trunk/io-c/_textio.c
==============================================================================
--- sandbox/trunk/io-c/_textio.c (original)
+++ sandbox/trunk/io-c/_textio.c Thu Dec 18 00:17:07 2008
@@ -43,6 +43,245 @@
/* XXX properties: encoding, newlines */
+/* IncrementalNewlineDecoder */
+
+PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
+ "Codec used when reading a file in universal newlines mode. It wraps\n"
+ "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
+ "records the types of newlines encountered. When used with\n"
+ "translate=False, it ensures that the newline sequence is returned in\n"
+ "one piece.\n"
+ );
+
+typedef struct {
+ PyObject_HEAD
+ PyObject *decoder;
+ PyObject *errors;
+ int pendingcr:1;
+ int translate:1;
+ int seennl:3;
+} PyNewLineDecoderObject;
+
+static int
+IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
+ PyObject *args, PyObject *kwds)
+{
+ PyObject *decoder;
+ int translate;
+ PyObject *errors = NULL;
+ char *kwlist[] = {"decoder", "translate", "errors", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
+ kwlist, &decoder, &translate, &errors))
+ return -1;
+
+ self->decoder = decoder;
+ Py_INCREF(decoder);
+
+ if (errors == NULL) {
+ self->errors = PyUnicode_FromString("strict");
+ if (self->errors == NULL)
+ return -1;
+ }
+ else {
+ Py_INCREF(errors);
+ self->errors = errors;
+ }
+
+ self->translate = translate;
+ self->seennl = 0;
+ self->pendingcr = 0;
+
+ return 0;
+}
+
+#define SEEN_CR 1
+#define SEEN_LF 2
+#define SEEN_CRLF 4
+
+static PyObject *
+IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
+ PyObject *args, PyObject *kwds)
+{
+ char *kwlist[] = {"input", "final", NULL};
+ PyObject *input, *output;
+ int final = 0;
+ Py_ssize_t cr=0, lf=0, crlf=0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
+ kwlist, &input, &final))
+ return NULL;
+
+ if (self->decoder == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "IncrementalNewlineDecoder.__init__ not called");
+ return NULL;
+ }
+
+ /* decode input (with the eventual \r from a previous pass) */
+ output = PyObject_CallMethod(self->decoder, "decode", "Oi", input, final);
+ if (output == NULL)
+ return NULL;
+
+ if (!PyUnicode_Check(output)) {
+ PyErr_SetString(PyExc_TypeError,
+ "decoder should return a string result");
+ goto error;
+ }
+
+ if (self->pendingcr && (final || PyUnicode_GET_SIZE(output) > 0)) {
+ PyObject *modified;
+
+ modified = PyUnicode_FromOrdinal('\r');
+ if (modified == NULL)
+ goto error;
+
+ PyUnicode_Append(&modified, output);
+ if (modified == NULL)
+ goto error;
+
+ Py_DECREF(output);
+ output = modified;
+ self->pendingcr = 0;
+ }
+
+ /* retain last \r even when not translating data:
+ * then readline() is sure to get \r\n in one pass
+ */
+ if (!final) {
+ Py_ssize_t output_len = PyUnicode_GET_SIZE(output);
+
+ if (output_len > 0
+ && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
+
+ PyObject *modified = PyUnicode_FromUnicode(
+ PyUnicode_AS_UNICODE(output),
+ output_len - 1);
+ if (modified == NULL)
+ goto error;
+
+ Py_DECREF(output);
+ output = modified;
+ self->pendingcr = 1;
+ }
+
+ }
+
+ /* Record which newlines are read */
+ {
+ Py_UNICODE c;
+ Py_ssize_t index, previous;
+ int pendingcr = 0;
+
+ previous = '\0';
+
+ for(index = 0; index < PyUnicode_GET_SIZE(output); index++) {
+ c = PyUnicode_AS_UNICODE(output)[index];
+
+ switch (c) {
+ case '\r':
+ cr++;
+ pendingcr = 1;
+ break;
+
+ case '\n':
+ if (pendingcr) {
+ cr--;
+ crlf++;
+ pendingcr = 0;
+ }
+ else
+ lf++;
+ break;
+
+ default:
+ pendingcr = 0;
+ break;
+ }
+ }
+
+ if (cr)
+ self->seennl |= SEEN_CR;
+ if (lf)
+ self->seennl |= SEEN_LF;
+ if (crlf)
+ self->seennl |= SEEN_CRLF;
+ }
+
+ if (self->translate) {
+ PyObject *modified;
+
+ if (crlf) {
+ modified = PyObject_CallMethod(output, "replace", "ss", "\r\n", "\n");
+ if (modified == NULL)
+ goto error;
+ Py_DECREF(output);
+ output = modified;
+ }
+ if (cr) {
+ modified = PyObject_CallMethod(output, "replace", "ss", "\r", "\n");
+ if (modified == NULL)
+ goto error;
+ Py_DECREF(output);
+ output = modified;
+ }
+ }
+
+ return output;
+
+ error:
+ Py_DECREF(output);
+ return NULL;
+}
+
+static PyMethodDef IncrementalNewlineDecoder_methods[] = {
+ {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
+ {0}
+};
+
+PyTypeObject PyIncrementalNewlineDecoder_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "IncrementalNewlineDecoder", /*tp_name*/
+ sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ 0, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare */
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash */
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ IncrementalNewlineDecoder_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /*tp_weaklistoffset*/
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ IncrementalNewlineDecoder_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /*tp_dictoffset*/
+ (initproc)IncrementalNewlineDecoder_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+
+
/* TextIOWrapper */
PyDoc_STRVAR(TextIOWrapper_doc,
@@ -165,7 +404,7 @@
if (errors == NULL)
errors = "strict";
- self->chunk_size = 8192;
+ self->chunk_size = 2048;
self->readuniversal = (newline == NULL || newline[0] == '\0');
self->line_buffering = line_buffering;
self->readtranslate = (newline == NULL);
@@ -196,9 +435,13 @@
goto error;
if (self->readuniversal) {
- /* XXX
- * decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
- */
+ PyObject *incrementalDecoder = PyObject_CallFunction(
+ (PyObject *)&PyIncrementalNewlineDecoder_Type,
+ "Oi", self->decoder, (int)self->readtranslate);
+ if (incrementalDecoder == NULL)
+ goto error;
+ Py_CLEAR(self->decoder);
+ self->decoder = incrementalDecoder;
}
}
@@ -649,8 +892,17 @@
/* Rewind decoded_chars to just after the line ending we found. */
TextIOWrapper_rewind_decoded_chars(
self, PyUnicode_GET_SIZE(line) - endpos);
- if (PyUnicode_Resize(&line, endpos) < 0)
- goto error;
+
+ if (PyUnicode_GET_SIZE(line) != endpos) {
+ PyObject *resized = PyUnicode_FromUnicode(
+ PyUnicode_AS_UNICODE(line), endpos);
+
+ if (resized == NULL)
+ goto error;
+
+ Py_DECREF(line);
+ line = resized;
+ }
return line;
error:
Modified: sandbox/trunk/io-c/io.c
==============================================================================
--- sandbox/trunk/io-c/io.c (original)
+++ sandbox/trunk/io-c/io.c Thu Dec 18 00:17:07 2008
@@ -593,6 +593,12 @@
Py_INCREF(&PyTextIOWrapper_Type);
PyModule_AddObject(m, "TextIOWrapper", (PyObject *) &PyTextIOWrapper_Type);
+ /* TextIOWrapper */
+ if (PyType_Ready(&PyIncrementalNewlineDecoder_Type) < 0)
+ goto fail;
+ Py_INCREF(&PyIncrementalNewlineDecoder_Type);
+ PyModule_AddObject(m, "IncrementalNewlineDecoder", (PyObject *) &PyIncrementalNewlineDecoder_Type);
+
return m;
fail:
Modified: sandbox/trunk/io-c/io.py
==============================================================================
--- sandbox/trunk/io-c/io.py (original)
+++ sandbox/trunk/io-c/io.py Thu Dec 18 00:17:07 2008
@@ -1296,25 +1296,23 @@
"""
def __init__(self, decoder, translate, errors='strict'):
codecs.IncrementalDecoder.__init__(self, errors=errors)
- self.buffer = b''
self.translate = translate
self.decoder = decoder
self.seennl = 0
+ self.pendingcr = False
def decode(self, input, final=False):
# decode input (with the eventual \r from a previous pass)
- if self.buffer:
- input = self.buffer + input
-
output = self.decoder.decode(input, final=final)
+ if self.pendingcr and (output or final):
+ output = "\r" + output
+ self.pendingcr = False
# retain last \r even when not translating data:
# then readline() is sure to get \r\n in one pass
if output.endswith("\r") and not final:
output = output[:-1]
- self.buffer = b'\r'
- else:
- self.buffer = b''
+ self.pendingcr = True
# Record which newlines are read
crlf = output.count('\r\n')
@@ -1333,20 +1331,19 @@
def getstate(self):
buf, flag = self.decoder.getstate()
- return buf + self.buffer, flag
+ flag <<= 1
+ if self.pendingcr:
+ flag |= 1
+ return buf, flag
def setstate(self, state):
buf, flag = state
- if buf.endswith(b'\r'):
- self.buffer = b'\r'
- buf = buf[:-1]
- else:
- self.buffer = b''
- self.decoder.setstate((buf, flag))
+ self.pendingcr = bool(flag & 1)
+ self.decoder.setstate((buf, flag >> 1))
def reset(self):
self.seennl = 0
- self.buffer = b''
+ self.pendingcr = False
self.decoder.reset()
_LF = 1
More information about the Python-checkins
mailing list