[Tutor] dictionary of lists
Peter Otten
__peter__ at web.de
Thu Jun 4 09:30:56 CEST 2015
Chris Stinemetz wrote:
> Although I am certain it is not very efficient I was able to
> accomplish what I wanted with the following code I wrote:
>
> import os
> import pprint
> import csv
> from collections import defaultdict
>
> print_map = {'MOU':0, 'Call_Att':1, 'Device':2}
> header = ['IMEI','MOUs','Call_Att','Device']
>
> path = 'C:/Users/cs062x/Desktop/Panhandle'
>
> os.chdir(path)
> running_MOU = {}
> call_attempts = {}
> d = defaultdict(list)
> for fname in os.listdir('.'):
> with open (fname) as csvfile:
> spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
> next(spamreader)
> for row in spamreader:
>
> if row[8]:
> device = row[36]
> Elapsed_Mins = float(row[7])
> IMEI = row[8].replace("'", "")
>
> if IMEI in running_MOU.keys():
For big dicts in Python 2 the test
key in some_dict.keys()
is indeed very inefficient as it builds a list of keys first and then
performs a linear scan for the key. Much better:
key in some_dict
This test avoids building the list and can also use an efficient lookup
algorithm that is independent of the size of the dict.
> running_MOU[IMEI] += Elapsed_Mins
> else:
> running_MOU[IMEI] = Elapsed_Mins
>
> if IMEI in call_attempts.keys():
> call_attempts[IMEI] += 1
> else:
> call_attempts[IMEI] = 1
>
> # if key matches append mou else append 0.
> d[IMEI] = [running_MOU[IMEI]]
> d[IMEI].append([call_attempts[IMEI]])
> d[IMEI].append([device])
>
>
> print ",".join(header)
> for k,v in sorted(d.items()):
> print k, ",", d[k][print_map['MOU']],",",
> d[k][print_map['Call_Att']][0],",", d[k][print_map['Device']][0]
>
> print "complete"
Here's an alternative that uses only one dict:
import csv
import os
import sys
header = ['IMEI', 'MOUs', 'Call_Att', 'Device']
path = 'C:/Users/cs062x/Desktop/Panhandle'
d = {}
for fname in os.listdir(path):
with open(os.path.join(path, fname)) as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
next(spamreader)
for row in spamreader:
if row[8]:
device = row[36]
elapsed_mins = float(row[7])
IMEI = row[8].replace("'", "")
if IMEI in d:
record = d[IMEI]
record[1] += elapsed_mins
record[2] += 1
else:
d[IMEI] = [IMEI, elapsed_mins, 1, device]
writer = csv.writer(sys.stdout)
writer.writerow(header)
writer.writerows(sorted(d.itervalues()))
print "complete"
More information about the Tutor
mailing list