Creating dictionary of items from Excel with mutliple keys
Peter Otten
__peter__ at web.de
Sun Aug 14 07:02:56 EDT 2016
Atri Mahapatra wrote:
> I am trying to create a following dictionary. I am reading data from excel
> which has data in the following format:
>
> Sl no: Name Thickness Length Material Width Quantity Side
>
> It has 20 rows of data.
>
>
> The dictionary for the 20 rows, I would like to make is
>
> Data_Dict = [
> { 'Name': 'X', 'Length': '10' , 'Width': '5', 'Quantity': 2 'Area': 50};
> { 'Name': 'Y', 'Length': '20' , 'Width': 10', 'Quantity': 1 'Area': 200};
> .
> .
> .
> .
> till 20 rows
> ];
>
> I would like to add another key 'Area' as shown above. I used the
> following code(s):
>
> using openpyxl:
> d={}
> for i in range(3,sheet.max_row+1):
> #for j in range(3,9):
> #for k in range(0,5):
> Name = sheet.cell(row= i,column=3).value
> Length =sheet.cell(row =i,column=6).value
> Breadth= sheet.cell(row=i,column=7).value
> Quantity = sheet.cell (row=i,column=8).value
> Area = sheet.cell(row
> =i,column=6).value*sheet.cell(row=i,column=7).value d[Name]=
> Length,Breadth,Quantity,Area
>
> which gave an output like:
> ['X': (10, 5, 2, 50), 'Y': (20, 10, 1, 2232600), 'Z': (5, 2, 1, 10),
> [.............]
>
> Another code using xlrd:
>
> keys = [sheet.cell(2, col_index).value for col_index in range(0,8)]
> print (keys)
> dict_list = []
> d = {}
> for row_index in range(1, xl_sheet.nrows):
> for col_index in range(0,8):
> d = {keys[col_index]: xl_sheet.cell(row_index, col_index).value
> for col_index in range(0,8)}
> dict_list.append(d)
>
> print (dict_list)
> which did not have the area and neither the output was little messy.
>
>
>
> The main purpose is to sort the dictionary based on different criteria
> like Length or Area. I think the first one may be easier to sort. However
> if there are any better way to represent the dictionary and the code so
> that it can be sorted later based on different attributes please feel free
> to suggest.
Things become clearer when you abstract out reading the data. For example:
import openpyxl
def open_sheet(filename, sheetname):
wb = openpyxl.load_workbook(filename=filename)
return wb.get_sheet_by_name(sheetname)
def read_table(sheet, columnnames, header_row=0):
name_to_index = {
n: i for i, n
in enumerate(c.value for c in sheet.rows[header_row])
if n is not None}
column_indices = [name_to_index[n] for n in columnnames]
for row in sheet.rows[header_row + 1:]:
yield dict(zip(columnnames, (row[x].value for x in column_indices)))
if __name__ == "__main__":
from operator import itemgetter
import pprint
# read data from Excel
rows = read_table(
open_sheet("sample.xlsx", "SampleSheet"),
"Name Length Width Quantity".split(),
2 # replace with actual header row
)
# add Area
dict_list = []
for row_dict in rows:
row_dict["Area"] = row_dict["Width"] * row_dict["Length"]
dict_list.append(row_dict)
# sort and print data
print("Unsorted:")
pprint.pprint(dict_list)
for sort_column in "Width", "Quantity":
print("\nby {}:".format(sort_column))
dict_list.sort(key=itemgetter(sort_column))
pprint.pprint(dict_list)
To use xlrd instead of openpyxl you have to replace the open_sheet() and
read_table() functions with
def open_sheet(filename, sheetname):
wb = xlrd.open_workbook(filename)
return wb.sheet_by_name(sheetname)
def read_table(sheet, columnnames, header_row=0):
name_to_index = {
c.value: i for i, c
in enumerate(sheet.row(header_row))
if c.ctype == xlrd.XL_CELL_TEXT}
column_indices = [name_to_index[n] for n in columnnames]
for rowindex in range(header_row + 1, sheet.nrows):
row = sheet.row(rowindex)
yield dict(zip(columnnames, (row[x].value for x in column_indices)))
More information about the Python-list
mailing list