[Python-checkins] python/dist/src/Lib csv.py,1.2,1.3

montanaro@users.sourceforge.net montanaro@users.sourceforge.net
Fri, 25 Apr 2003 07:47:19 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv14712

Modified Files:
	csv.py 
Log Message:
rework Sniffer api significantly


Index: csv.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/csv.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** csv.py	25 Apr 2003 14:27:00 -0000	1.2
--- csv.py	25 Apr 2003 14:47:16 -0000	1.3
***************
*** 10,13 ****
--- 10,18 ----
                   __doc__
  
+ try:
+     from cStringIO import StringIO
+ except ImportError:
+     from StringIO import StringIO
+ 
  __all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
              "Error", "Dialect", "excel", "excel_tab", "reader", "writer",
***************
*** 148,175 ****
      '''
      "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
!     Returns a csv.Dialect object.
      '''
!     def __init__(self, sample = 16 * 1024):
          # in case there is more than one possible delimiter
          self.preferred = [',', '\t', ';', ' ', ':']
  
-         # amount of data (in bytes) to sample
-         self.sample = sample
  
! 
!     def sniff(self, fileobj):
          """
!         Takes a file-like object and returns a dialect (or None)
          """
-         self.fileobj = fileobj
- 
-         data = fileobj.read(self.sample)
  
          quotechar, delimiter, skipinitialspace = \
!                    self._guessQuoteAndDelimiter(data)
          if delimiter is None:
!             delimiter, skipinitialspace = self._guessDelimiter(data)
  
!         class SniffedDialect(Dialect):
              _name = "sniffed"
              lineterminator = '\r\n'
--- 153,174 ----
      '''
      "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
!     Returns a Dialect object.
      '''
!     def __init__(self):
          # in case there is more than one possible delimiter
          self.preferred = [',', '\t', ';', ' ', ':']
  
  
!     def sniff(self, sample):
          """
!         Returns a dialect (or None) corresponding to the sample
          """
  
          quotechar, delimiter, skipinitialspace = \
!                    self._guess_quote_and_delimiter(sample)
          if delimiter is None:
!             delimiter, skipinitialspace = self._guess_delimiter(sample)
  
!         class dialect(Dialect):
              _name = "sniffed"
              lineterminator = '\r\n'
***************
*** 177,197 ****
              # escapechar = ''
              doublequote = False
-         SniffedDialect.delimiter = delimiter
-         SniffedDialect.quotechar = quotechar
-         SniffedDialect.skipinitialspace = skipinitialspace
- 
-         self.dialect = SniffedDialect
-         return self.dialect
- 
- 
-     def hasHeaders(self):
-         return self._hasHeaders(self.fileobj, self.dialect)
  
  
!     def register_dialect(self, name='sniffed'):
!         register_dialect(name, self.dialect)
  
  
!     def _guessQuoteAndDelimiter(self, data):
          """
          Looks for text enclosed between two identical quotes
--- 176,189 ----
              # escapechar = ''
              doublequote = False
  
+         dialect.delimiter = delimiter
+         # _csv.reader won't accept a quotechar of ''
+         dialect.quotechar = quotechar or '"'
+         dialect.skipinitialspace = skipinitialspace
  
!         return dialect
  
  
!     def _guess_quote_and_delimiter(self, data):
          """
          Looks for text enclosed between two identical quotes
***************
*** 257,261 ****
  
  
!     def _guessDelimiter(self, data):
          """
          The delimiter /should/ occur the same number of times on
--- 249,253 ----
  
  
!     def _guess_delimiter(self, data):
          """
          The delimiter /should/ occur the same number of times on
***************
*** 291,300 ****
              for line in data[start:end]:
                  for char in ascii:
!                     metafrequency = charFrequency.get(char, {})
                      # must count even if frequency is 0
                      freq = line.strip().count(char)
                      # value is the mode
!                     metafrequency[freq] = metafrequency.get(freq, 0) + 1
!                     charFrequency[char] = metafrequency
  
              for char in charFrequency.keys():
--- 283,292 ----
              for line in data[start:end]:
                  for char in ascii:
!                     metaFrequency = charFrequency.get(char, {})
                      # must count even if frequency is 0
                      freq = line.strip().count(char)
                      # value is the mode
!                     metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
!                     charFrequency[char] = metaFrequency
  
              for char in charFrequency.keys():
***************
*** 357,361 ****
  
  
!     def _hasHeaders(self, fileobj, dialect):
          # Creates a dictionary of types of data in each column. If any
          # column is of a single type (say, integers), *except* for the first
--- 349,353 ----
  
  
!     def has_header(self, sample):
          # Creates a dictionary of types of data in each column. If any
          # column is of a single type (say, integers), *except* for the first
***************
*** 374,387 ****
              return eval(item.replace('(', '').replace(')', ''))
  
!         # rewind the fileobj - this might not work for some file-like
!         # objects...
!         fileobj.seek(0)
! 
!         r = csv.reader(fileobj,
!                        delimiter=dialect.delimiter,
!                        quotechar=dialect.quotechar,
!                        skipinitialspace=dialect.skipinitialspace)
  
!         header = r.next() # assume first row is header
  
          columns = len(header)
--- 366,372 ----
              return eval(item.replace('(', '').replace(')', ''))
  
!         rdr = reader(StringIO(sample), self.sniff(sample))
  
!         header = rdr.next() # assume first row is header
  
          columns = len(header)
***************
*** 390,394 ****
  
          checked = 0
!         for row in r:
              # arbitrary number of rows to check, to keep it sane
              if checked > 20:
--- 375,379 ----
  
          checked = 0
!         for row in rdr:
              # arbitrary number of rows to check, to keep it sane
              if checked > 20: