[pypy-svn] r8714 - pypy/dist/pypy/lib

arigo at codespeak.net arigo at codespeak.net
Sat Jan 29 14:40:24 CET 2005


Author: arigo
Date: Sat Jan 29 14:40:24 2005
New Revision: 8714

Modified:
   pypy/dist/pypy/lib/_file.py
   pypy/dist/pypy/lib/_sio.py
Log:
Swapped Buffering*Streams and universal newlines' Text*Filter instances in the
stack of streams, to have meaningful tell()/seek().

Added the peek() method to look in the readahead buffer.
This enables TextInputFilter.readline() to be efficient.



Modified: pypy/dist/pypy/lib/_file.py
==============================================================================
--- pypy/dist/pypy/lib/_file.py	(original)
+++ pypy/dist/pypy/lib/_file.py	Sat Jan 29 14:40:24 2005
@@ -123,13 +123,6 @@
         reading = basemode == 'r' or plus
         writing = basemode != 'r' or plus
 
-        if universal:     # Wants universal newlines
-            if writing and os.linesep != '\n':
-                self.stream = _sio.TextOutputFilter(self.stream)
-            if reading:
-                self.stream = _sio.TextInputFilter(self.stream)
-                self.getnewlines = self.stream.getnewlines
-
         if bufsize == 0:   # no buffering
             pass
         elif bufsize == 1:   # line-buffering
@@ -146,6 +139,13 @@
             if reading:
                 self.stream = _sio.BufferingInputStream(self.stream, bufsize)
 
+        if universal:     # Wants universal newlines
+            if writing and os.linesep != '\n':
+                self.stream = _sio.TextOutputFilter(self.stream)
+            if reading:
+                self.stream = _sio.TextInputFilter(self.stream)
+                self.getnewlines = self.stream.getnewlines
+
     def getnewlines(self):
         return None    # can be overridden in the instance
 

Modified: pypy/dist/pypy/lib/_sio.py
==============================================================================
--- pypy/dist/pypy/lib/_sio.py	(original)
+++ pypy/dist/pypy/lib/_sio.py	Sat Jan 29 14:40:24 2005
@@ -4,7 +4,7 @@
 
 - This module contains various stream classes which provide a subset of the
   classic Python I/O API: read(n), write(s), tell(), seek(offset, whence=0),
-  readall(), readline(), truncate(size), flush(), close().
+  readall(), readline(), truncate(size), flush(), close(), peek().
 
 - This is not for general usage:
   * read(n) may return less than n bytes, just like os.read().
@@ -12,6 +12,7 @@
   * close() should be called exactly once and no further operations performed;
     there is no __del__() closing the stream for you.
   * some methods may raise NotImplementedError.
+  * peek() returns some (or no) characters that have already been read ahead.
 
 - A 'basis stream' provides I/O using a low-level API, like the os, mmap or
   socket modules.
@@ -61,14 +62,20 @@
         return ''.join(result)
 
     def readline(self):
-        # very inefficient
+        # very inefficient unless there is a peek()
         result = []
-        c = self.read(1)
-        while c:
+        while True:
+            # "peeks" on the underlying stream to see how many characters
+            # we can safely read without reading past an end-of-line
+            peeked = self.peek()
+            pn = peeked.find("\n")
+            if pn < 0: pn = len(peeked)
+            c = self.read(pn + 1)
+            if not c:
+                break
             result.append(c)
-            if c == '\n':
+            if c.endswith('\n'):
                 break
-            c = self.read(1)
         return ''.join(result)
 
     def truncate(self, size):
@@ -80,6 +87,9 @@
     def close(self):
         pass
 
+    def peek(self):
+        return ''
+
 
 class DiskFile(Stream):
 
@@ -455,6 +465,12 @@
 
         return "".join(buf)
 
+    def peek(self):
+        if self.lines:
+            return self.lines[0] + "\n"
+        else:
+            return self.buf
+
     write      = PassThrough("write",     flush_buffers=True)
     truncate   = PassThrough("truncate",  flush_buffers=True)
     flush      = PassThrough("flush",     flush_buffers=True)
@@ -636,6 +652,24 @@
             
         return data
 
+    def readline(self):
+        result = []
+        while True:
+            # "peeks" on the underlying stream to see how many characters
+            # we can safely read without reading past an end-of-line
+            peeked = self.base.peek()
+            pn = peeked.find("\n")
+            pr = peeked.find("\r")
+            if pn < 0: pn = len(peeked)
+            if pr < 0: pr = len(peeked)
+            c = self.read(min(pn, pr) + 1)
+            if not c:
+                break
+            result.append(c)
+            if c.endswith('\n'):
+                break
+        return ''.join(result)
+
     def seek(self, offset, whence=0):
         """Seeks based on knowledge that does not come from a tell()
            may go to the wrong place, since the number of
@@ -677,6 +711,9 @@
             else:
                 self.buf = ""
 
+    def peek(self):
+        return self.buf
+
     write      = PassThrough("write",     flush_buffers=True)
     truncate   = PassThrough("truncate",  flush_buffers=True)
     flush      = PassThrough("flush",     flush_buffers=True)



More information about the Pypy-commit mailing list