[Tutor] xlwt & xlrd: can I refactor this code better?
Albert-Jan Roskam
fomcl at yahoo.com
Wed Jul 22 21:42:36 CEST 2009
Hi,
A while ago I wrote a program to merge xls files. Now I refactored it because before, it was one big chunk of spaghetti code and I wanted to add some functionality. The code below works, but I have the feeling that it could still be simplified. Most functions have many arguments - isn't that called 'tight coupling'? Could somebody give me a few general pointers as to how to improve this program without loosing functionality? The first function is the main() function. Shouldn't a programmer strive for information hiding in such a function? Ideally, it should almost read like regular english, right? Or is that too textbook-ish? ;-)
Thanks in advance!
Albert-Jan
"""
Merge all xls files in a given directory into one multisheet xls file.
The sheets get the orginal file name, without the extension.
File names should not exceed 29 characters
"""
import glob, os.path, time
import xlrd, xlwt
def merge_xls(in_dir="d:/temp/", out_file="d:/temp/merged_output.xls"):
""" Main function: merge xls sheets """
xls_files = glob.glob(in_dir + "*.xls")
xls_files.sort()
merged_book = xlwt.Workbook()
osheet_names = [os.path.basename(xls_file)[:-4] for xls_file in xls_files]
for xls_no, xls_file in enumerate(xls_files):
print "---> Processing file %s" % (xls_file)
book = xlrd.open_workbook(xls_file)
isheet_names = xlrd.Book.sheet_names(book)
check_xls(book, merged_book, isheet_names, osheet_names, xls_no, xls_files)
stamped_outfile = out_file[:-4] + "_" + time.strftime("%Y-%m-%d") + ".xls"
merged_book.save(stamped_outfile)
print_msg(xls_files, osheet_names, stamped_outfile)
def check_xls(book, merged_book, isheet_names, osheet_names, xls_no, xls_files):
""" Check existence and file names of input xls files """
if xls_files and len(osheet_names[xls_no]) <= 29:
write_sheets(book, merged_book, isheet_names, osheet_names, xls_no, xls_files)
elif xls_files:
print "WARNING *** File name too long: <%s.xls> (maximum is 31 chars) " % (osheet_names[xls_no])
print "WARNING *** File <%s.xls> was skipped." % (osheet_names[xls_no])
else:
print "NOTE *** No xls files in %s. Nothing to do" % (in_dir)
def write_sheets(book, merged_book, isheet_names, osheet_names, xls_no, xls_files):
""" Write sheets, and add sheet numbering in case of multisheet xls input """
osheet_name = osheet_names[xls_no]
xls_file = xls_files[xls_no]
if book.nsheets == 1:
ws = merged_book.add_sheet(osheet_name)
isheet_name = isheet_names[0]
sheet = book.sheet_by_index(0)
write_cells(sheet, book, ws, isheet_name, osheet_name, xls_file)
elif book.nsheets in range(1, 100):
for sheetx in range(book.nsheets):
isheet_name = isheet_names[sheetx]
ws = merged_book.add_sheet(osheet_name+str(sheetx+1).zfill(2))
sheet = book.sheet_by_index(sheetx)
write_cells(sheet, book, ws, isheet_name, osheet_name, xls_file)
else:
raise Exception ("ERROR *** File %s has %s sheets (maximum is 99)" % (xls_file, book.nsheets))
def write_cells(sheet, book, ws, isheet_name, osheet_name, xls_file, format_cell=True):
""" Write cells, and apply formatting if needed """
MISSINGVALUES = ("#LEEG!", "#NULL!")
rx = 0 # initialize to zero in case of empty input xls file.
style = format_cells(ws)
for rx in range(sheet.nrows):
for cx in range(sheet.ncols):
cell_value = sheet.cell_value(rx, cx)
if format_cell and rx == 0:
format_cells(ws)
ws.write(rx, cx, cell_value, style)
elif cell_value in MISSINGVALUES or xlrd.XL_CELL_EMPTY:
ws.write(rx, cx, " ")
else:
ws.write(rx, cx, cell_value)
footer = "source tab: " + isheet_name + " || source file: " + os.path.basename(xls_file)
if format_cell:
ws.write(rx+2, 0, footer.upper(), style) # print bold source tab & file name below the table
else:
ws.write(rx+2, 0, footer.upper())
def format_cells(ws, font='Arial', boldrow=True, panes_frozen=True):
""" Add horizontal split pane and bold font at first row """
ws.panes_frozen = panes_frozen
ws.horz_split_pos = 1
font0 = xlwt.Font()
font0.name = font
font0.struck_out = False
font0.bold = boldrow
style0 = xlwt.XFStyle()
style0.font = font0
return style0
def print_msg(xls_files, osheet_names, stamped_outfile):
""" Print status messages """
print "\n---> Merged xls file written to %s using the following source files: " % (stamped_outfile)
MAXSHEETNAMELEN = 29
for n_sheet, osheet_name in enumerate(osheet_names):
if len(osheet_name) <= MAXSHEETNAMELEN:
print "\t", str(n_sheet+1).zfill(3), "%s.xls" % (osheet_name)
excl_sheets = [os.path.basename(xls_file)[:-4] for xls_file in xls_files if \
len(os.path.basename(xls_file)[:-4]) > MAXSHEETNAMELEN]
if excl_sheets:
print "\n--> The following files were skipped because the file name exceeds 29 characters: "
for n_sheet, excl_sheet in enumerate(excl_sheets):
print "\t", str(n_sheet+1).zfill(3), excl_sheet
if __name__ == "__main__":
merge_xls()
More information about the Tutor
mailing list