[Python-checkins] python/nondist/sandbox/csv/util sniffer.py,1.4,1.5

cliffwells18@users.sourceforge.net cliffwells18@users.sourceforge.net
Fri, 14 Mar 2003 17:08:24 -0800


Update of /cvsroot/python/python/nondist/sandbox/csv/util
In directory sc8-pr-cvs1:/tmp/cvs-serv25919

Modified Files:
	sniffer.py 
Log Message:
Working but inefficient hasHeaders() function.



Index: sniffer.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/csv/util/sniffer.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** sniffer.py	15 Mar 2003 00:42:46 -0000	1.4
--- sniffer.py	15 Mar 2003 01:08:21 -0000	1.5
***************
*** 201,217 ****
  
  # ------------------------------------------------------------------------------
! def hasHeaders(data, columns = 0):
!     """
!     PROTOTYPE:
!       hasHeaders(data, columns = 0)
!     DESCRIPTION:
!       Decides whether row 0 is a header row
!     ARGUMENTS:
!       - data is a list of lists of data (as returned by importDSV)
!       - columns is either the expected number of columns in each row or 0
!     RETURNS:
!       - true if data has header row
!     """
!     
      # Algorithm: creates a dictionary of types of data in each column. If any column
      # is of a single type (say, integers), *except* for the first row, then the first
--- 201,205 ----
  
  # ------------------------------------------------------------------------------
! def hasHeaders(fileObj, dialect):
      # Algorithm: creates a dictionary of types of data in each column. If any column
      # is of a single type (say, integers), *except* for the first row, then the first
***************
*** 222,236 ****
      # the likelihood of the first row being a header. 
  
!     if type(data) != type([]):
!         raise InvalidData, "list expected."
!     if len(data) < 2: return 0
  
!     if not columns:
!         columns = modeOfLengths(data)
          
      columnTypes = {}
      for i in range(columns): columnTypes[i] = None
      
!     for row in data[1:]:
          if len(row) != columns:
              continue # skip rows that have irregular number of columns
--- 210,232 ----
      # the likelihood of the first row being a header. 
  
!     def seval(item):
!         """
!         Strips parens from item prior to calling eval in an attempt to make it safer
!         """
!         item = item.replace('(', '').replace(')', '')
!         return eval(item)
  
!     reader = csv.reader(fileObj,
!                         delimiter = dialect.delimiter,
!                         quotechar = dialect.quotechar,
!                         skipinitialspace = dialect.skipinitialspace)
          
+     header = reader.next() # assume first row is header
+ 
+     columns = len(header)
      columnTypes = {}
      for i in range(columns): columnTypes[i] = None
      
!     for row in reader:
          if len(row) != columns:
              continue # skip rows that have irregular number of columns
***************
*** 239,246 ****
                  try:
                      # is it a built-in type (besides string)?
!                     thisType = type(eval(row[col]))
                  except OverflowError:
                      # a long int?
!                     thisType = type(eval(row[col] + 'L'))
                      thisType = type(0) # treat long ints as int
              except:
--- 235,242 ----
                  try:
                      # is it a built-in type (besides string)?
!                     thisType = type(seval(row[col]))
                  except OverflowError:
                      # a long int?
!                     thisType = type(seval(row[col] + 'L'))
                      thisType = type(0) # treat long ints as int
              except:
***************
*** 254,262 ****
                      del columnTypes[col]
                      
!     # finally, compare results against first row and vote on whether it's a header
      hasHeader = 0
      for col, colType in columnTypes.items():
          if type(colType) == type(0): # it's a length
!             if len(data[0][col]) != colType:
                  hasHeader += 1
              else:
--- 250,258 ----
                      del columnTypes[col]
                      
!     # finally, compare results against first row and "vote" on whether it's a header
      hasHeader = 0
      for col, colType in columnTypes.items():
          if type(colType) == type(0): # it's a length
!             if len(header[col]) != colType:
                  hasHeader += 1
              else:
***************
*** 264,268 ****
          else: # attempt typecast
              try:
!                 eval("%s(%s)" % (colType.__name__, data[0][col]))
              except:
                  hasHeader += 1
--- 260,264 ----
          else: # attempt typecast
              try:
!                 eval("%s(%s)" % (colType.__name__, header[col]))
              except:
                  hasHeader += 1