[Python-checkins] CVS: python/dist/src/Lib fileinput.py,1.5,1.6

Guido van Rossum python-dev@python.org
Fri, 05 Jan 2001 06:44:41 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv3615

Modified Files:
	fileinput.py 
Log Message:
Speed it up by using readlines(sizehint).  It's still slower than
other ways of reading input. :-(

In the process, I added an optional bufsize argument to the input()
function and the FileInput class.


Index: fileinput.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/fileinput.py,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** fileinput.py	2000/04/10 17:16:12	1.5
--- fileinput.py	2001/01/05 14:44:39	1.6
***************
*** 63,71 ****
  does not work for MS-DOS 8+3 filesystems.
  
  XXX Possible additions:
  
  - optional getopt argument processing
  - specify open mode ('r' or 'rb')
- - specify buffer size
  - fileno()
  - isatty()
--- 63,77 ----
  does not work for MS-DOS 8+3 filesystems.
  
+ Performance: this module is unfortunately one of the slower ways of
+ processing large numbers of input lines.  Nevertheless, a significant
+ speed-up has been obtained by using readlines(bufsize) instead of
+ readline().  A new keyword argument, bufsize=N, is present on the
+ input() function and the FileInput() class to override the default
+ buffer size.
+ 
  XXX Possible additions:
  
  - optional getopt argument processing
  - specify open mode ('r' or 'rb')
  - fileno()
  - isatty()
***************
*** 77,86 ****
  
  _state = None
  
! def input(files=None, inplace=0, backup=""):
      global _state
      if _state and _state._file:
          raise RuntimeError, "input() already active"
!     _state = FileInput(files, inplace, backup)
      return _state
  
--- 83,94 ----
  
  _state = None
+ 
+ DEFAULT_BUFSIZE = 8*1024
  
! def input(files=None, inplace=0, backup="", bufsize=0):
      global _state
      if _state and _state._file:
          raise RuntimeError, "input() already active"
!     _state = FileInput(files, inplace, backup, bufsize)
      return _state
  
***************
*** 124,128 ****
  class FileInput:
  
!     def __init__(self, files=None, inplace=0, backup=""):
          if type(files) == type(''):
              files = (files,)
--- 132,136 ----
  class FileInput:
  
!     def __init__(self, files=None, inplace=0, backup="", bufsize=0):
          if type(files) == type(''):
              files = (files,)
***************
*** 137,140 ****
--- 145,149 ----
          self._inplace = inplace
          self._backup = backup
+         self._bufsize = bufsize or DEFAULT_BUFSIZE
          self._savestdout = None
          self._output = None
***************
*** 145,148 ****
--- 154,159 ----
          self._isstdin = 0
          self._backupfilename = None
+         self._buffer = []
+         self._bufindex = 0
  
      def __del__(self):
***************
*** 154,157 ****
--- 165,177 ----
  
      def __getitem__(self, i):
+         try:
+             line = self._buffer[self._bufindex]
+         except IndexError:
+             pass
+         else:
+             self._bufindex += 1
+             self._lineno += 1
+             self._filelineno += 1
+             return line
          if i != self._lineno:
              raise RuntimeError, "accessing lines out of order"
***************
*** 184,189 ****
--- 204,220 ----
  
          self._isstdin = 0
+         self._buffer = []
+         self._bufindex = 0
  
      def readline(self):
+         try:
+             line = self._buffer[self._bufindex]
+         except IndexError:
+             pass
+         else:
+             self._bufindex += 1
+             self._lineno += 1
+             self._filelineno += 1
+             return line
          if not self._file:
              if not self._files:
***************
*** 226,235 ****
                      # This may raise IOError
                      self._file = open(self._filename, "r")
!         line = self._file.readline()
!         if line:
!             self._lineno = self._lineno + 1
!             self._filelineno = self._filelineno + 1
!             return line
!         self.nextfile()
          # Recursive call
          return self.readline()
--- 257,264 ----
                      # This may raise IOError
                      self._file = open(self._filename, "r")
!         self._buffer = self._file.readlines(self._bufsize)
!         self._bufindex = 0
!         if not self._buffer:
!             self.nextfile()
          # Recursive call
          return self.readline()