[Python-checkins] python/dist/src/Lib csv.py,1.2,1.3
montanaro@users.sourceforge.net
montanaro@users.sourceforge.net
Fri, 25 Apr 2003 07:47:19 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv14712
Modified Files:
csv.py
Log Message:
rework Sniffer api significantly
Index: csv.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/csv.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** csv.py 25 Apr 2003 14:27:00 -0000 1.2
--- csv.py 25 Apr 2003 14:47:16 -0000 1.3
***************
*** 10,13 ****
--- 10,18 ----
__doc__
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
+
__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
"Error", "Dialect", "excel", "excel_tab", "reader", "writer",
***************
*** 148,175 ****
'''
"Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
! Returns a csv.Dialect object.
'''
! def __init__(self, sample = 16 * 1024):
# in case there is more than one possible delimiter
self.preferred = [',', '\t', ';', ' ', ':']
- # amount of data (in bytes) to sample
- self.sample = sample
!
! def sniff(self, fileobj):
"""
! Takes a file-like object and returns a dialect (or None)
"""
- self.fileobj = fileobj
-
- data = fileobj.read(self.sample)
quotechar, delimiter, skipinitialspace = \
! self._guessQuoteAndDelimiter(data)
if delimiter is None:
! delimiter, skipinitialspace = self._guessDelimiter(data)
! class SniffedDialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
--- 153,174 ----
'''
"Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
! Returns a Dialect object.
'''
! def __init__(self):
# in case there is more than one possible delimiter
self.preferred = [',', '\t', ';', ' ', ':']
! def sniff(self, sample):
"""
! Returns a dialect (or None) corresponding to the sample
"""
quotechar, delimiter, skipinitialspace = \
! self._guess_quote_and_delimiter(sample)
if delimiter is None:
! delimiter, skipinitialspace = self._guess_delimiter(sample)
! class dialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
***************
*** 177,197 ****
# escapechar = ''
doublequote = False
- SniffedDialect.delimiter = delimiter
- SniffedDialect.quotechar = quotechar
- SniffedDialect.skipinitialspace = skipinitialspace
-
- self.dialect = SniffedDialect
- return self.dialect
-
-
- def hasHeaders(self):
- return self._hasHeaders(self.fileobj, self.dialect)
! def register_dialect(self, name='sniffed'):
! register_dialect(name, self.dialect)
! def _guessQuoteAndDelimiter(self, data):
"""
Looks for text enclosed between two identical quotes
--- 176,189 ----
# escapechar = ''
doublequote = False
+ dialect.delimiter = delimiter
+ # _csv.reader won't accept a quotechar of ''
+ dialect.quotechar = quotechar or '"'
+ dialect.skipinitialspace = skipinitialspace
! return dialect
! def _guess_quote_and_delimiter(self, data):
"""
Looks for text enclosed between two identical quotes
***************
*** 257,261 ****
! def _guessDelimiter(self, data):
"""
The delimiter /should/ occur the same number of times on
--- 249,253 ----
! def _guess_delimiter(self, data):
"""
The delimiter /should/ occur the same number of times on
***************
*** 291,300 ****
for line in data[start:end]:
for char in ascii:
! metafrequency = charFrequency.get(char, {})
# must count even if frequency is 0
freq = line.strip().count(char)
# value is the mode
! metafrequency[freq] = metafrequency.get(freq, 0) + 1
! charFrequency[char] = metafrequency
for char in charFrequency.keys():
--- 283,292 ----
for line in data[start:end]:
for char in ascii:
! metaFrequency = charFrequency.get(char, {})
# must count even if frequency is 0
freq = line.strip().count(char)
# value is the mode
! metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
! charFrequency[char] = metaFrequency
for char in charFrequency.keys():
***************
*** 357,361 ****
! def _hasHeaders(self, fileobj, dialect):
# Creates a dictionary of types of data in each column. If any
# column is of a single type (say, integers), *except* for the first
--- 349,353 ----
! def has_header(self, sample):
# Creates a dictionary of types of data in each column. If any
# column is of a single type (say, integers), *except* for the first
***************
*** 374,387 ****
return eval(item.replace('(', '').replace(')', ''))
! # rewind the fileobj - this might not work for some file-like
! # objects...
! fileobj.seek(0)
!
! r = csv.reader(fileobj,
! delimiter=dialect.delimiter,
! quotechar=dialect.quotechar,
! skipinitialspace=dialect.skipinitialspace)
! header = r.next() # assume first row is header
columns = len(header)
--- 366,372 ----
return eval(item.replace('(', '').replace(')', ''))
! rdr = reader(StringIO(sample), self.sniff(sample))
! header = rdr.next() # assume first row is header
columns = len(header)
***************
*** 390,394 ****
checked = 0
! for row in r:
# arbitrary number of rows to check, to keep it sane
if checked > 20:
--- 375,379 ----
checked = 0
! for row in rdr:
# arbitrary number of rows to check, to keep it sane
if checked > 20: