a couple of newbie questions
John Machin
sjmachin at lexicon.net
Mon Mar 24 16:57:28 EST 2003
On 24 Mar 2003 09:07:10 -0500, Nick Vargish <nav at adams.patriot.net>
wrote:
>sjmachin at lexicon.net (John Machin) writes:
>
>> Unless you give a damn about data integrity, in which case you might
>> like to check things like (1) do lines have the minimum 2 fields and
>> the presumably desirable 5 fields [based on the OP's example] (2) are
>> there any duplicate keys.
>
>Geez, excuse me for trying to supply an illustrative example. The guy
>was _lost_. I wanted to show the way without obscuring the solution to
>the problem with a lot of type checking.
*type* checking ???
>
>And where's your solution, smart guy?
>
=== vargish.py ===
def load_dict_1(myfile):
datadict = {}
datafile = open(myfile, 'r')
for dataline in datafile.xreadlines():
datadict[dataline.split(' ')[1]] = dataline
return datadict
# V1 upgraded to Python 2.2
def load_dict_2(myfile):
datadict = {}
datafile = file(myfile, 'r')
for dataline in datafile:
datadict[dataline.split(' ')[1]] = dataline
return datadict
# V2 in byte-size chunks with validations
def load_dict_3(myfile,
split_field_delimiter=' ', # use None if want any whitespace as
delimiter
key_field_num=1,
max_num_fields=5
):
datadict = {}
previous_line_num = {}
datafile = file(myfile, 'r')
num_recs = 0
num_bad = 0
for dataline in datafile:
num_recs += 1
field = dataline.split(split_field_delimiter)
num_fields = len(field)
if not(key_field_num < num_fields <= max_num_fields):
print "Load_dict_3: file %s, line %d: incorrect number of
fields (%d)" % \
(myfile, num_recs, num_fields)
# "print" for illustrative purposes; check out new logging
module in 2.3
num_bad += 1
continue
key = field[key_field_num]
if key in datadict:
print "Load_dict_3: file %s, line %d: key <%s> already seen
at line %d" % \
(myfile, num_recs, key, previous_line_num[key])
num_bad += 1
continue
else:
datadict[key] = dataline # or field -- space/time trade-off
previous_line_num[key] = num_recs
return num_bad, datadict
=== vargish.txt ===
01/04/2003 abc 3 4 5
11/11/1918 xyz 9 8 7
02/04/2003 abc 1 2 3
=== output ===
>>> import vargish
>>> vargish.load_dict_1("vargish.txt")
Traceback (most recent call last):
File "<stdin>", line 1, in ?
File "vargish.py", line 5, in load_dict_1
datadict[dataline.split(' ')[1]] = dataline
IndexError: list index out of range
>>> vargish.load_dict_3("vargish.txt")
Load_dict_3: file vargish.txt, line 3: key <abc> already seen at line
1
Load_dict_3: file vargish.txt, line 4: incorrect number of fields (1)
(2, {'xyz': '11/11/1918 xyz 9 8 7\n', 'abc': '01/04/2003 abc 3 4
5\n'})
>>>
More information about the Python-list
mailing list