Write tables from Word (.docx) to Excel (.xlsx) using xlsxwriter
Peter Otten
__peter__ at web.de
Wed May 27 13:36:02 EDT 2020
BBT wrote:
> I am trying to parse a word (.docx) for tables, then copy these tables
> over to excel using xlsxwriter. This is my code:
>
> from docx.api import Document
> import xlsxwriter
>
> document = Document('/Users/xxx/Documents/xxx/Clauses Sample - Copy v1 -
> for merge.docx') tables = document.tables
>
> wb = xlsxwriter.Workbook('C:/Users/xxx/Documents/xxx/test clause
> retrieval.xlsx') Sheet1 = wb.add_worksheet("Compliance")
> index_row = 0
>
> print(len(tables))
>
> for table in document.tables:
> data = []
> keys = None
> for i, row in enumerate(table.rows):
> text = (cell.text for cell in row.cells)
>
> if i == 0:
> keys = tuple(text)
> continue
> row_data = dict(zip(keys, text))
> data.append(row_data)
> #print (data)
> #big_data.append(data)
> Sheet1.write(index_row,0, str(row_data))
> index_row = index_row + 1
>
> print(row_data)
>
> wb.close()
>
>
> This is my desired output: https://i.stack.imgur.com/9qnbw.png
>
> However, here is my actual output: https://i.stack.imgur.com/vpXej.png
>
> I am aware that my current output produces a list of string instead.
>
> Is there anyway that I can get my desired output using xlsxwriter?
I had to simulate docx.api. With that caveat the following seems to work:
import xlsxwriter
# begin simulation of
# from docx.api import Document
class Cell:
def __init__(self, text):
self.text = text
class Row:
def __init__(self, cells):
self.cells = [Cell(c) for c in cells]
class Table:
def __init__(self, data):
self.rows = [
Row(row) for row in data
]
class Document:
def __init__(self):
self.tables = [
Table([
["Hello", "Test"],
["est", "ing"],
["gg", "ff"]
]),
Table([
["Foo", "Bar", "Baz"],
["ham", "spam", "jam"]
])
]
document = Document()
# end simulation
wb = xlsxwriter.Workbook("tmp.xlsx")
sheet = wb.add_worksheet("Compliance")
offset = 0
for table in document.tables:
for y, row in enumerate(table.rows):
for x, cell in enumerate(row.cells):
sheet.write(y + offset, x, cell.text)
offset += len(table.rows) + 1 # one empty row between tables
wb.close()
More information about the Python-list
mailing list