Editing/Writing Word-Files from Python

Alan Kennedy alanmk at hotmail.com
Tue Apr 20 12:01:19 EDT 2004


[Daniel Cloutier]
 > is it possible to edit or write Word-files out of a Python-Program?

If you have access to Office 2003, are feeling brave, and have a lot 
of time on your hands, you could create and manipulate the XML 
structures that Word 2003 uses.

It thought the group members might find it interesting to see such a 
file, so I have exported a "Hello World!" document as XML, and posted 
the result below. I had to tidy it up a little, the original came out 
all on one line. And I had to add an encoding declaration :-)

In terms of generating such structures, well, everybody has their own 
favourite *ML templating language. I'd use TAL or XSLT in "Literal 
Result Element as Stylesheet" mode ...

http://www.w3.org/TR/xslt#result-element-stylesheet

#--------- helloworld.xml --- cut here ------------------------
<?xml version="1.0" encoding='utf-8'?>
<?mso-application progid="Word.Document"?>
<w:wordDocument
   w:embeddedObjPresent="no"
   w:macrosPresent="no"
   w:ocxPresent="no"
   xml:space="preserve"
   xmlns:aml="http://schemas.microsoft.com/aml/2001/core"
   xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
   xmlns:o="urn:schemas-microsoft-com:office:office"
   xmlns:sl="http://schemas.microsoft.com/schemaLibrary/2003/core"
   xmlns:v="urn:schemas-microsoft-com:vml"
   xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
   xmlns:w10="urn:schemas-microsoft-com:office:word"
   xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint"
 >
   <o:DocumentProperties>
     <o:Title>Hello World</o:Title>
     <o:Author>Alan</o:Author>
     <o:LastAuthor>Alan</o:LastAuthor>
     <o:Revision>1</o:Revision>
     <o:TotalTime>1</o:TotalTime>
     <o:Created>2004-04-20T15:38:00Z</o:Created>
     <o:LastSaved>2004-04-20T15:39:00Z</o:LastSaved>
     <o:Pages>1</o:Pages>
     <o:Words>1</o:Words>
     <o:Characters>12</o:Characters>
     <o:Company>Alan</o:Company>
     <o:Lines>1</o:Lines>
     <o:Paragraphs>1</o:Paragraphs>
     <o:CharactersWithSpaces>12</o:CharactersWithSpaces>
     <o:Version>11.6113</o:Version>
   </o:DocumentProperties>
   <w:fonts>
     <w:defaultFonts
       w:ascii="Times New Roman"
       w:cs="Times New Roman"
       w:fareast="Times New Roman"
       w:h-ansi="Times New Roman"
     />
   </w:fonts>
   <w:styles>
     <w:versionOfBuiltInStylenames w:val="4"/>
     <w:latentStyles
       w:defLockedState="off"
       w:latentStyleCount="156"
     />
     <w:style
       w:default="on"
       w:styleId="Normal"
       w:type="paragraph"
     >
       <w:name w:val="Normal"/>
       <w:rPr>
         <wx:font wx:val="Times New Roman"/>
         <w:lang
           w:bidi="AR-SA"
           w:fareast="EN-GB"
           w:val="EN-GB"
         />
       </w:rPr>
     </w:style>
     <w:style
       w:default="on"
       w:styleId="DefaultParagraphFont"
       w:type="character"
     >
       <w:name w:val="Default Paragraph Font"/>
       <w:semiHidden/>
     </w:style>
     <w:style
       w:default="on"
       w:styleId="TableNormal"
       w:type="table"
     >
       <w:name w:val="Normal Table"/>
       <wx:uiName wx:val="Table Normal"/>
       <w:semiHidden/>
       <w:rPr>
         <wx:font wx:val="Times New Roman"/>
       </w:rPr>
       <w:tblPr>
         <w:tblInd w:type="dxa" w:w="0"/>
         <w:tblCellMar>
           <w:top w:type="dxa" w:w="0"/>
           <w:left w:type="dxa" w:w="108"/>
           <w:bottom w:type="dxa" w:w="0"/>
           <w:right w:type="dxa" w:w="108"/>
         </w:tblCellMar>
       </w:tblPr>
     </w:style>
     <w:style w:default="on" w:styleId="NoList" w:type="list">
       <w:name w:val="No List"/>
       <w:semiHidden/>
     </w:style>
   </w:styles>
   <w:docPr>
     <w:view w:val="print"/>
     <w:zoom w:percent="100"/>
     <w:doNotEmbedSystemFonts/>
     <w:proofState w:grammar="clean" w:spelling="clean"/>
     <w:attachedTemplate w:val=""/>
     <w:defaultTabStop w:val="720"/>
     <w:displayHorizontalDrawingGridEvery w:val="0"/>
     <w:displayVerticalDrawingGridEvery w:val="0"/>
     <w:useMarginsForDrawingGridOrigin/>
     <w:characterSpacingControl w:val="DontCompress"/>
     <w:optimizeForBrowser/>
     <w:validateAgainstSchema/>
     <w:saveInvalidXML w:val="off"/>
     <w:ignoreMixedContent w:val="off"/>
     <w:alwaysShowPlaceholderText w:val="off"/>
     <w:compat>
       <w:footnoteLayoutLikeWW8/>
       <w:shapeLayoutLikeWW8/>
       <w:alignTablesRowByRow/>
       <w:forgetLastTabAlignment/>
       <w:doNotUseHTMLParagraphAutoSpacing/>
       <w:layoutRawTableWidth/>
       <w:layoutTableRowsApart/>
       <w:useWord97LineBreakingRules/>
       <w:dontAllowFieldEndSelect/>
       <w:useWord2002TableStyleRules/>
     </w:compat>
   </w:docPr>
   <w:body>
     <wx:sect>
       <w:p>
         <w:pPr>
           <w:jc w:val="center"/>
           <w:rPr>
             <w:sz w:val="40"/>
             <w:sz-cs w:val="40"/>
           </w:rPr>
         </w:pPr>
         <w:r>
           <w:rPr>
             <w:sz w:val="40"/>
             <w:sz-cs w:val="40"/>
           </w:rPr>
           <w:t>Hello World!</w:t>
         </w:r>
       </w:p>
       <w:sectPr>
         <w:pgSz w:h="16838" w:w="11906"/>
         <w:pgMar
           w:bottom="1440"
           w:footer="720"
           w:gutter="0"
           w:header="720"
           w:left="1800"
           w:right="1800"
           w:top="1440"
         />
         <w:cols w:space="720"/>
       </w:sectPr>
     </wx:sect>
   </w:body>
</w:wordDocument>
#--------- helloworld.xml --- cut here ------------------------

-- 
alan kennedy
------------------------------------------------------
check http headers here: http://xhaus.com/headers
email alan:              http://xhaus.com/contact/alan



More information about the Python-list mailing list