[Python-checkins] python/nondist/sandbox/csv/util sniffer.py,1.4,1.5
cliffwells18@users.sourceforge.net
cliffwells18@users.sourceforge.net
Fri, 14 Mar 2003 17:08:24 -0800
Update of /cvsroot/python/python/nondist/sandbox/csv/util
In directory sc8-pr-cvs1:/tmp/cvs-serv25919
Modified Files:
sniffer.py
Log Message:
Working but inefficient hasHeaders() function.
Index: sniffer.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/csv/util/sniffer.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** sniffer.py 15 Mar 2003 00:42:46 -0000 1.4
--- sniffer.py 15 Mar 2003 01:08:21 -0000 1.5
***************
*** 201,217 ****
# ------------------------------------------------------------------------------
! def hasHeaders(data, columns = 0):
! """
! PROTOTYPE:
! hasHeaders(data, columns = 0)
! DESCRIPTION:
! Decides whether row 0 is a header row
! ARGUMENTS:
! - data is a list of lists of data (as returned by importDSV)
! - columns is either the expected number of columns in each row or 0
! RETURNS:
! - true if data has header row
! """
!
# Algorithm: creates a dictionary of types of data in each column. If any column
# is of a single type (say, integers), *except* for the first row, then the first
--- 201,205 ----
# ------------------------------------------------------------------------------
! def hasHeaders(fileObj, dialect):
# Algorithm: creates a dictionary of types of data in each column. If any column
# is of a single type (say, integers), *except* for the first row, then the first
***************
*** 222,236 ****
# the likelihood of the first row being a header.
! if type(data) != type([]):
! raise InvalidData, "list expected."
! if len(data) < 2: return 0
! if not columns:
! columns = modeOfLengths(data)
columnTypes = {}
for i in range(columns): columnTypes[i] = None
! for row in data[1:]:
if len(row) != columns:
continue # skip rows that have irregular number of columns
--- 210,232 ----
# the likelihood of the first row being a header.
! def seval(item):
! """
! Strips parens from item prior to calling eval in an attempt to make it safer
! """
! item = item.replace('(', '').replace(')', '')
! return eval(item)
! reader = csv.reader(fileObj,
! delimiter = dialect.delimiter,
! quotechar = dialect.quotechar,
! skipinitialspace = dialect.skipinitialspace)
+ header = reader.next() # assume first row is header
+
+ columns = len(header)
columnTypes = {}
for i in range(columns): columnTypes[i] = None
! for row in reader:
if len(row) != columns:
continue # skip rows that have irregular number of columns
***************
*** 239,246 ****
try:
# is it a built-in type (besides string)?
! thisType = type(eval(row[col]))
except OverflowError:
# a long int?
! thisType = type(eval(row[col] + 'L'))
thisType = type(0) # treat long ints as int
except:
--- 235,242 ----
try:
# is it a built-in type (besides string)?
! thisType = type(seval(row[col]))
except OverflowError:
# a long int?
! thisType = type(seval(row[col] + 'L'))
thisType = type(0) # treat long ints as int
except:
***************
*** 254,262 ****
del columnTypes[col]
! # finally, compare results against first row and vote on whether it's a header
hasHeader = 0
for col, colType in columnTypes.items():
if type(colType) == type(0): # it's a length
! if len(data[0][col]) != colType:
hasHeader += 1
else:
--- 250,258 ----
del columnTypes[col]
! # finally, compare results against first row and "vote" on whether it's a header
hasHeader = 0
for col, colType in columnTypes.items():
if type(colType) == type(0): # it's a length
! if len(header[col]) != colType:
hasHeader += 1
else:
***************
*** 264,268 ****
else: # attempt typecast
try:
! eval("%s(%s)" % (colType.__name__, data[0][col]))
except:
hasHeader += 1
--- 260,264 ----
else: # attempt typecast
try:
! eval("%s(%s)" % (colType.__name__, header[col]))
except:
hasHeader += 1