<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML><HEAD><TITLE>Message</TITLE>
<META http-equiv=Content-Type content="text/html; charset=iso-8859-1">
<META content="MSHTML 6.00.2800.1479" name=GENERATOR>
<STYLE>@font-face {
        font-family: Wingdings;
}
@font-face {
        font-family: Verdana;
}
@page Section1 {size: 595.3pt 841.9pt; margin: 72.0pt 90.0pt 72.0pt 90.0pt; }
P.MsoNormal {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
LI.MsoNormal {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
DIV.MsoNormal {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
H1 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 21.6pt; TEXT-INDENT: -21.6pt; FONT-FAMILY: Verdana
}
H2 {
        FONT-SIZE: 10pt; MARGIN: 12pt 0cm 3pt 28.8pt; TEXT-INDENT: -28.8pt; FONT-FAMILY: Verdana
}
H3 {
        FONT-WEIGHT: normal; FONT-SIZE: 9pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -36pt; FONT-FAMILY: Verdana
}
H4 {
        FONT-SIZE: 14pt; MARGIN: 12pt 0cm 3pt 43.2pt; TEXT-INDENT: -43.2pt; FONT-FAMILY: "Times New Roman"
}
H5 {
        FONT-SIZE: 13pt; MARGIN: 12pt 0cm 3pt 50.4pt; TEXT-INDENT: -50.4pt; FONT-STYLE: italic; FONT-FAMILY: Verdana
}
H6 {
        FONT-SIZE: 11pt; MARGIN: 12pt 0cm 3pt 57.6pt; TEXT-INDENT: -57.6pt; FONT-FAMILY: "Times New Roman"
}
P.MsoHeading7 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 64.8pt; TEXT-INDENT: -64.8pt; FONT-FAMILY: "Times New Roman"
}
LI.MsoHeading7 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 64.8pt; TEXT-INDENT: -64.8pt; FONT-FAMILY: "Times New Roman"
}
DIV.MsoHeading7 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 64.8pt; TEXT-INDENT: -64.8pt; FONT-FAMILY: "Times New Roman"
}
P.MsoHeading8 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 72pt; TEXT-INDENT: -72pt; FONT-STYLE: italic; FONT-FAMILY: "Times New Roman"
}
LI.MsoHeading8 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 72pt; TEXT-INDENT: -72pt; FONT-STYLE: italic; FONT-FAMILY: "Times New Roman"
}
DIV.MsoHeading8 {
        FONT-SIZE: 12pt; MARGIN: 12pt 0cm 3pt 72pt; TEXT-INDENT: -72pt; FONT-STYLE: italic; FONT-FAMILY: "Times New Roman"
}
P.MsoHeading9 {
        FONT-SIZE: 11pt; MARGIN: 12pt 0cm 3pt 79.2pt; TEXT-INDENT: -79.2pt; FONT-FAMILY: Arial
}
LI.MsoHeading9 {
        FONT-SIZE: 11pt; MARGIN: 12pt 0cm 3pt 79.2pt; TEXT-INDENT: -79.2pt; FONT-FAMILY: Arial
}
DIV.MsoHeading9 {
        FONT-SIZE: 11pt; MARGIN: 12pt 0cm 3pt 79.2pt; TEXT-INDENT: -79.2pt; FONT-FAMILY: Arial
}
P.MsoNormalIndent {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt 36pt; FONT-FAMILY: Verdana
}
LI.MsoNormalIndent {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt 36pt; FONT-FAMILY: Verdana
}
DIV.MsoNormalIndent {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt 36pt; FONT-FAMILY: Verdana
}
P.MsoHeader {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
LI.MsoHeader {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
DIV.MsoHeader {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
P.MsoFooter {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
LI.MsoFooter {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
DIV.MsoFooter {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt; FONT-FAMILY: Verdana
}
P.MsoTitle {
        FONT-WEIGHT: bold; FONT-SIZE: 14pt; MARGIN: 12pt 0cm 3pt; FONT-FAMILY: Verdana; TEXT-ALIGN: center
}
LI.MsoTitle {
        FONT-WEIGHT: bold; FONT-SIZE: 14pt; MARGIN: 12pt 0cm 3pt; FONT-FAMILY: Verdana; TEXT-ALIGN: center
}
DIV.MsoTitle {
        FONT-WEIGHT: bold; FONT-SIZE: 14pt; MARGIN: 12pt 0cm 3pt; FONT-FAMILY: Verdana; TEXT-ALIGN: center
}
A:link {
        COLOR: blue; TEXT-DECORATION: underline
}
SPAN.MsoHyperlink {
        COLOR: blue; TEXT-DECORATION: underline
}
A:visited {
        COLOR: purple; TEXT-DECORATION: underline
}
SPAN.MsoHyperlinkFollowed {
        COLOR: purple; TEXT-DECORATION: underline
}
P.Heading1bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
LI.Heading1bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
DIV.Heading1bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
P.Heading1text {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 27pt; FONT-FAMILY: Verdana
}
LI.Heading1text {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 27pt; FONT-FAMILY: Verdana
}
DIV.Heading1text {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 27pt; FONT-FAMILY: Verdana
}
P.Heading2text {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 27pt; FONT-FAMILY: Verdana
}
LI.Heading2text {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 27pt; FONT-FAMILY: Verdana
}
DIV.Heading2text {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 27pt; FONT-FAMILY: Verdana
}
P.Heading2bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
LI.Heading2bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
DIV.Heading2bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
P.Heading3text {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt 26.95pt; FONT-FAMILY: Verdana
}
LI.Heading3text {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt 26.95pt; FONT-FAMILY: Verdana
}
DIV.Heading3text {
        FONT-SIZE: 8pt; MARGIN: 0cm 0cm 6pt 26.95pt; FONT-FAMILY: Verdana
}
P.Heading3bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
LI.Heading3bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
DIV.Heading3bullet {
        FONT-SIZE: 8pt; MARGIN: 12pt 0cm 3pt 63pt; TEXT-INDENT: -18pt; FONT-FAMILY: Verdana
}
P.Heading3Text0 {
        FONT-SIZE: 8pt; MARGIN: 6pt 0cm 12pt 70.9pt; FONT-FAMILY: Verdana; TEXT-ALIGN: justify
}
LI.Heading3Text0 {
        FONT-SIZE: 8pt; MARGIN: 6pt 0cm 12pt 70.9pt; FONT-FAMILY: Verdana; TEXT-ALIGN: justify
}
DIV.Heading3Text0 {
        FONT-SIZE: 8pt; MARGIN: 6pt 0cm 12pt 70.9pt; FONT-FAMILY: Verdana; TEXT-ALIGN: justify
}
SPAN.EmailStyle29 {
        FONT-WEIGHT: normal; COLOR: windowtext; FONT-STYLE: normal; FONT-FAMILY: Verdana; TEXT-DECORATION: none
}
DIV.Section1 {
        page: Section1
}
OL {
        MARGIN-BOTTOM: 0cm
}
UL {
        MARGIN-BOTTOM: 0cm
}
</STYLE>
</HEAD>
<BODY lang=EN-US vLink=purple link=blue>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff size=2>When
in doubt, turn the problem around 90 degrees.</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV>
<DIV>
<TABLE cellSpacing=0 cellPadding=0>
<TBODY>
<TR vAlign=baseline>
<TD><NOBR><B><TT class=function id=l2h-25>file</TT></B>(</NOBR></TD>
<TD><VAR>filename</VAR><BIG>[</BIG><VAR>, mode</VAR><BIG>[</BIG><VAR>,
bufsize</VAR><BIG>]</BIG><BIG>]</BIG>)</TD></TR></TBODY></TABLE>
<DD>Return a new file object (described in section <A
href="mk:@MSITStore:C:\develope\Python23\Doc\Python23.chm::/lib/bltin-file-objects.html#bltin-file-objects">2.3.8</A>,
``<A class=ulink
href="mk:@MSITStore:C:\develope\Python23\Doc\Python23.chm::/lib/bltin-file-objects.html">File
Objects</A>''). The first two arguments are the same as for <CODE>stdio</CODE>'s
<TT class=cfunction>fopen()</TT>: <VAR>filename</VAR> is the file name to be
opened, <VAR>mode</VAR> indicates how the file is to be opened: <CODE>'r'</CODE>
for reading, <CODE>'w'</CODE> for writing (truncating an existing file), and
<CODE>'a'</CODE> opens it for appending (which on <I>some</I> <FONT
style="FONT-VARIANT: small-caps">Unix</FONT> systems means that <I>all</I>
writes append to the end of the file, regardless of the current seek position).
</DD></DIV>
<DIV> </DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff size=2>The
problem is that your file contains BINARY data....</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff size=2>So,
let's remove the binary data:</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV><SPAN class=031133515-12012005>
<DIV><BR><FONT face=Arial color=#0000ff size=2>import sys<BR>import
string</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=Arial color=#0000ff size=2>def strip_binary ( filename, newname
):<BR> test = open ( filename, 'rb')<BR>
stripped = open (newname, 'wb')</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=Arial color=#0000ff size=2> data =
None<BR> while data <>
"":<BR> data = test.read
(1)<BR>
<BR> if data <>
"":<BR> if
data in
string.printable:<BR>
stripped.write (data)</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=Arial color=#0000ff size=2> stripped.close
()<BR> test.close ()</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=Arial color=#0000ff size=2>strip_binary ( sys.argv[1],
sys.argv[2])<BR></FONT></DIV>
<DIV><FONT face=Arial color=#0000ff size=2></FONT> </DIV>
<DIV></SPAN><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2>This will remove all characters that are not contained in the string
modules PRINTABLE variable.</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff size=2>Then
you should be able to open the NEW file as a ASCII file, without any
issues.</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff size=2>You
could instead of creating a temporary file, write the data to a list, and then
use a SPLIT("\n") on the temporary list, and process that. That would be
the rough equivalent of READLINES....</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV>
<DIV><SPAN class=031133515-12012005>
<FONT face=Arial color=#0000ff size=2>- Ben</FONT></SPAN></DIV>
<DIV><SPAN class=031133515-12012005><FONT face=Arial color=#0000ff
size=2></FONT></SPAN> </DIV>
<BLOCKQUOTE
style="PADDING-LEFT: 5px; MARGIN-LEFT: 5px; BORDER-LEFT: #0000ff 2px solid; MARGIN-RIGHT: 0px">
<DIV></DIV>
<DIV class=OutlookMessageHeader lang=en-us dir=ltr align=left><FONT
face=Tahoma size=2>-----Original Message-----<BR><B>From:</B>
python-win32-bounces@python.org [mailto:python-win32-bounces@python.org] <B>On
Behalf Of </B>AddisonN@iti-ab.com<BR><B>Sent:</B> Wednesday, January 12, 2005
8:44 AM<BR><B>To:</B> python-win32@python.org<BR><B>Subject:</B>
[python-win32] File I/O problem<BR><BR></FONT></DIV>
<DIV class=Section1>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">I trying to process a file that was originally created
on an AS/400 as a spooled report. The file has been converted to ASCII before
sending to me by e-mail. The original report is in Arabic script and so any
Arabic script has been mapped to</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">I can’t read the whole file in unless I chop out all
the (formerly) Arabic characters as read(), readline() or readlines() seems to
think its done too early. The problem appears to be that the conversion has
produced a byte with hex value 1a and Python is treating this as an
end-of-file marker. This I’ve worked this out by using a Hex Editor and
looking at the character after where the read operation stops. The
offending character the square (unprintable) character in the file snippet
below.</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">Start file snippet >></SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">MK 2005/01/10 البنك العربي(ش
.م.ع) الميزانية الموحدة - تقريـر
الميزانية
الشهــرية
كما هي في</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">
01 : فروع دولة
امارات
=========================================
الصـفحة </SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt"><< End file snippet</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">Is there a way I can pre-process this file with Python
and chop out the characters ( the 1a) I don’t want?</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt"></SPAN></FONT> </P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">If I do this:</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">import string</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">report =
open('d:\\Software\\PythonScripts\\ear11050110.txt').readlines()
</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">report is:</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">>>> report</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">['MK 2005/01/10 \xc7\xe1\xc8\xe4\xdf
\xc7\xe1\xda\xd1\xc8\xed(\xd4
.\xe3.\xda)
\xc7\xe1\xe3\xed\xd2\xc7\xe4\xed\xc9 \xc7\xe1\xe3\xe6\xcd\xcf\xc9 -
\xca\xde\xd1\xed\xdc\xd1 \xc7\xe1\xe3\xed\xd2\xc7\xe4\xed\xc9
\xc7\xe1\xd4\xe5\xdc\xdc\xd1\xed\xc9
\xdf\xe3\xc7
\xe5\xed \xdd\xed\n',
'
01 : \xdd\xd1\xe6\xda \xcf\xe6\xe1\xc9 \xc7']</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt"></SPAN></FONT> </P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">Which is everything up to the hex
1a.</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt"></SPAN></FONT> </P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">Thanks for any prompting whatsoever.</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt"></SPAN></FONT> </P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt">Nick.</SPAN></FONT></P>
<P class=MsoNormal><FONT face=Verdana size=2><SPAN lang=EN-GB
style="FONT-SIZE: 10pt"></SPAN></FONT> </P></DIV><FONT
size=3><BR><BR>**********************************************************************<BR>This
email and any files transmitted with it are confidential and<BR>intended
solely for the use of the individual or entity to whom they<BR>are addressed.
If you have received this email in error please notify<BR>the system
manager.<BR>This footnote also confirms that this email message has been swept
by<BR>MIMEsweeper for the presence of computer viruses.<BR>Information
Technology International (ITI) +44 (0)20 7315
8500<BR>**********************************************************************<BR></BLOCKQUOTE></FONT></BODY></HTML>