a couple of newbie questions

Mon Mar 24 16:57:28 EST 2003

On 24 Mar 2003 09:07:10 -0500, Nick Vargish <nav at adams.patriot.net>
wrote:

>sjmachin at lexicon.net (John Machin) writes:
>
>> Unless you give a damn about data integrity, in which case you might
>> like to check things like (1) do lines have the minimum 2 fields and
>> the presumably desirable 5 fields [based on the OP's example] (2) are
>> there any duplicate keys.
>
>Geez, excuse me for trying to supply an illustrative example. The guy
>was _lost_. I wanted to show the way without obscuring the solution to
>the problem with a lot of type checking. 

*type* checking ???
>
>And where's your solution, smart guy?
>

=== vargish.py ===
def load_dict_1(myfile):
   datadict = {}
   datafile = open(myfile, 'r')
   for dataline in datafile.xreadlines():
      datadict[dataline.split(' ')[1]] = dataline
   return datadict

# V1 upgraded to Python 2.2
def load_dict_2(myfile):
   datadict = {}
   datafile = file(myfile, 'r')
   for dataline in datafile:
      datadict[dataline.split(' ')[1]] = dataline
   return datadict

# V2 in byte-size chunks with validations
def load_dict_3(myfile,
   split_field_delimiter=' ', # use None if want any whitespace as
delimiter
   key_field_num=1,
   max_num_fields=5
   ):
   datadict = {}
   previous_line_num = {}
   datafile = file(myfile, 'r')
   num_recs = 0
   num_bad = 0
   for dataline in datafile:
      num_recs += 1
      field = dataline.split(split_field_delimiter)
      num_fields = len(field)
      if not(key_field_num < num_fields <= max_num_fields):
         print "Load_dict_3: file %s, line %d: incorrect number of
fields (%d)" % \
            (myfile, num_recs, num_fields)
         # "print" for illustrative purposes; check out new logging
module in 2.3
         num_bad += 1
         continue
      key = field[key_field_num]
      if key in datadict:
         print "Load_dict_3: file %s, line %d: key <%s> already seen
at line %d" % \
            (myfile, num_recs, key, previous_line_num[key])
         num_bad += 1
         continue
      else:
         datadict[key] = dataline # or field -- space/time trade-off
         previous_line_num[key] = num_recs
   return num_bad, datadict

=== vargish.txt ===
01/04/2003 abc 3 4 5
11/11/1918 xyz 9 8 7
02/04/2003 abc 1 2 3

=== output ===
>>> import vargish
>>> vargish.load_dict_1("vargish.txt")
Traceback (most recent call last):
  File "<stdin>", line 1, in ?
  File "vargish.py", line 5, in load_dict_1
    datadict[dataline.split(' ')[1]] = dataline
IndexError: list index out of range
>>> vargish.load_dict_3("vargish.txt")
Load_dict_3: file vargish.txt, line 3: key <abc> already seen at line
1
Load_dict_3: file vargish.txt, line 4: incorrect number of fields (1)
(2, {'xyz': '11/11/1918 xyz 9 8 7\n', 'abc': '01/04/2003 abc 3 4
5\n'})
>>>