[Python-checkins] r70208 - in python/branches/py3k: Lib/test/test_io.py Modules/_textio.c
antoine.pitrou
python-checkins at python.org
Sat Mar 7 00:40:56 CET 2009
Author: antoine.pitrou
Date: Sat Mar 7 00:40:56 2009
New Revision: 70208
Log:
Issue #5433: Excessive newline detection optimization in IncrementalNewlineDecoder
Modified:
python/branches/py3k/Lib/test/test_io.py
python/branches/py3k/Modules/_textio.c
Modified: python/branches/py3k/Lib/test/test_io.py
==============================================================================
--- python/branches/py3k/Lib/test/test_io.py (original)
+++ python/branches/py3k/Lib/test/test_io.py Sat Mar 7 00:40:56 2009
@@ -1915,6 +1915,19 @@
decoder = self.IncrementalNewlineDecoder(decoder, translate=True)
self.check_newline_decoding_utf8(decoder)
+ def test_newline_bytes(self):
+ # Issue 5433: Excessive optimization in IncrementalNewlineDecoder
+ def _check(dec):
+ self.assertEquals(dec.newlines, None)
+ self.assertEquals(dec.decode("\u0D00"), "\u0D00")
+ self.assertEquals(dec.newlines, None)
+ self.assertEquals(dec.decode("\u0A00"), "\u0A00")
+ self.assertEquals(dec.newlines, None)
+ dec = self.IncrementalNewlineDecoder(None, translate=False)
+ _check(dec)
+ dec = self.IncrementalNewlineDecoder(None, translate=True)
+ _check(dec)
+
class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest):
pass
Modified: python/branches/py3k/Modules/_textio.c
==============================================================================
--- python/branches/py3k/Modules/_textio.c (original)
+++ python/branches/py3k/Modules/_textio.c Sat Mar 7 00:40:56 2009
@@ -305,22 +305,40 @@
for the \r *byte* with the libc's optimized memchr.
*/
if (seennl == SEEN_LF || seennl == 0) {
- int has_cr, has_lf;
- has_lf = (seennl == SEEN_LF) ||
- (memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
- has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
- if (has_lf && !has_cr) {
- only_lf = 1;
- seennl = SEEN_LF;
- }
+ only_lf = !(memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
}
- if (!self->translate) {
+ if (only_lf) {
+ /* If not already seen, quick scan for a possible "\n" character.
+ (there's nothing else to be done, even when in translation mode)
+ */
+ if (seennl == 0 &&
+ memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
+ Py_UNICODE *s, *end;
+ s = in_str;
+ end = in_str + len;
+ for (;;) {
+ Py_UNICODE c;
+ /* Fast loop for non-control characters */
+ while (*s > '\n')
+ s++;
+ c = *s++;
+ if (c == '\n') {
+ seennl |= SEEN_LF;
+ break;
+ }
+ if (s > end)
+ break;
+ }
+ }
+ /* Finished: we have scanned for newlines, and none of them
+ need translating */
+ }
+ else if (!self->translate) {
Py_UNICODE *s, *end;
+ /* We have already seen all newline types, no need to scan again */
if (seennl == SEEN_ALL)
goto endscan;
- if (only_lf)
- goto endscan;
s = in_str;
end = in_str + len;
for (;;) {
@@ -347,7 +365,7 @@
endscan:
;
}
- else if (!only_lf) {
+ else {
PyObject *translated = NULL;
Py_UNICODE *out_str;
Py_UNICODE *in, *out, *end;
More information about the Python-checkins
mailing list