[Python-checkins] python/dist/src/Lib codecs.py,1.35.2.8,1.35.2.9
loewis@users.sourceforge.net
loewis at users.sourceforge.net
Wed Aug 24 09:38:46 CEST 2005
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24772/Lib
Modified Files:
Tag: release24-maint
codecs.py
Log Message:
Return complete lines from codec stream readers
even if there is an exception in later lines, resulting in
correct line numbers for decoding errors in source code. Fixes #1178484.
Index: codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.35.2.8
retrieving revision 1.35.2.9
diff -u -d -r1.35.2.8 -r1.35.2.9
--- codecs.py 20 Jul 2005 22:52:08 -0000 1.35.2.8
+++ codecs.py 24 Aug 2005 07:38:36 -0000 1.35.2.9
@@ -236,7 +236,7 @@
def decode(self, input, errors='strict'):
raise NotImplementedError
- def read(self, size=-1, chars=-1):
+ def read(self, size=-1, chars=-1, firstline=False):
""" Decodes data from the stream self.stream and returns the
resulting object.
@@ -253,6 +253,11 @@
is intended to prevent having to decode huge files in one
step.
+ If firstline is true, and a UnicodeDecodeError happens
+ after the first line terminator in the input only the first line
+ will be returned, the rest of the input will be kept until the
+ next call to read().
+
The method should use a greedy read strategy meaning that
it should read as much data as is allowed within the
definition of the encoding and the given size, e.g. if
@@ -275,7 +280,16 @@
newdata = self.stream.read(size)
# decode bytes (those remaining from the last call included)
data = self.bytebuffer + newdata
- newchars, decodedbytes = self.decode(data, self.errors)
+ try:
+ newchars, decodedbytes = self.decode(data, self.errors)
+ except UnicodeDecodeError, exc:
+ if firstline:
+ newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
+ lines = newchars.splitlines(True)
+ if len(lines)<=1:
+ raise
+ else:
+ raise
# keep undecoded bytes until the next call
self.bytebuffer = data[decodedbytes:]
# put new characters in the character buffer
@@ -306,7 +320,7 @@
line = ""
# If size is given, we call read() only once
while True:
- data = self.read(readsize)
+ data = self.read(readsize, firstline=True)
if data:
# If we're at a "\r" read one extra character (which might
# be a "\n") to get a proper line ending. If the stream is
More information about the Python-checkins
mailing list