[pypy-svn] pypy fast-forward: Fix newline detection with f.readline() in universal mode

amauryfa commits-noreply at bitbucket.org
Thu Jan 6 09:09:46 CET 2011


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: fast-forward
Changeset: r40412:ecc8f983acae
Date: 2011-01-06 09:11 +0100
http://bitbucket.org/pypy/pypy/changeset/ecc8f983acae/

Log:	Fix newline detection with f.readline() in universal mode we may
	have to call do_read() several times to be sure that the trailing \r
	is not followed by a \n

diff --git a/pypy/module/_file/test/test_file.py b/pypy/module/_file/test/test_file.py
--- a/pypy/module/_file/test/test_file.py
+++ b/pypy/module/_file/test/test_file.py
@@ -116,6 +116,16 @@
         f.close()
         assert f.newlines == "\r\n"
 
+        # use readline()
+        f = self.file(self.temppath, "rU")
+        res = f.readline()
+        assert res == "\n"
+        assert f.newlines == "\r\n"
+        res = f.readline()
+        assert res == ""
+        assert f.newlines == "\r\n"
+        f.close()
+
     def test_unicode(self):
         import os
         f = self.file(self.temppath, "w")

diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -934,41 +934,59 @@
         else:
             data = self.do_read(n)
 
-        # The following whole ugly mess is because we need to keep track of
-        # exactly which line separators we have seen for self.newlines,
-        # grumble, grumble.  This has an interesting corner-case.
-        #
-        # Consider a file consisting of exactly one line ending with '\r'.
-        # The first time you read(), you will not know whether it is a
-        # CR separator or half of a CRLF separator.  Neither will be marked
-        # as seen, since you are waiting for your next read to determine
-        # what you have seen.  But there's no more to read ...
-                        
-        if self.atcr:
-            if data.startswith("\n"):
-                data = data[1:]
-                self.CRLF = True
-                if not data:
-                    data = self.do_read(n)
-            else:
-                self.CR = True
-            self.atcr = False
-            
-        for i in range(len(data)):
-            if data[i] == '\n':
-                if i > 0 and data[i-1] == '\r':
+        result = ""
+
+        while True:
+            if not data:
+                break
+
+            # The following whole ugly mess is because we need to keep
+            # track of exactly which line separators we have seen for
+            # self.newlines, grumble, grumble.  This has an
+            # interesting corner-case.
+            #
+            # Consider a file consisting of exactly one line ending
+            # with '\r'.  The first time you read(), you will not know
+            # whether it is a CR separator or half of a CRLF
+            # separator.  Neither will be marked as seen, since you
+            # are waiting for your next read to determine what you
+            # have seen.  But there's no more to read ...
+
+            previous_atcr = self.atcr
+
+            if self.atcr:
+                if data.startswith("\n"):
                     self.CRLF = True
                 else:
-                    self.NL = True
-            elif data[i] == '\r':
-                if i < len(data)-1 and data[i+1] != '\n':
                     self.CR = True
-                    
-        if "\r" in data:
-            self.atcr = data.endswith("\r")
-            data = replace_crlf_with_lf(data)
-            
-        return data
+                self.atcr = False
+
+            if data.endswith("\r"):
+                data = data[:len(data) - 1]
+                n += 1
+                self.atcr = True
+
+            for i in range(len(data)):
+                if data[i] == '\n':
+                    if i > 0 and data[i-1] == '\r':
+                        self.CRLF = True
+                    elif not previous_atcr:
+                        self.NL = True
+                elif data[i] == '\r':
+                    if i < len(data)-1 and data[i+1] != '\n':
+                        self.CR = True
+
+            result += data
+            n -= len(data)
+            if n <= 0:
+                break
+
+            data = self.do_read(n)
+
+        if "\r" in result:
+            result = replace_crlf_with_lf(result)
+
+        return result
 
     def readline(self):
         result = []


More information about the Pypy-commit mailing list