Spanish Accents
Stan Iverson
iversonstan at gmail.com
Thu Dec 22 11:22:52 EST 2011
On Thu, Dec 22, 2011 at 11:30 AM, Rami Chowdhury
<rami.chowdhury at gmail.com>wrote:
> Could you try using the 'open' function from the 'codecs' module?
>
I believe this is what you meant:
file = codecs.open(p + "2.txt", "r", "utf-8")
for line in file:
print line
but got this error:
141 file = codecs.open(p + "2.txt", "r", "utf-8")
142 for line in file:
143 print line
144
*line* = '\r\n', *file* = <open file 'index2.txt', mode 'rb'>
/usr/lib64/python2.4/codecs.py in *next*(self=<open file 'index2.txt', mode
'rb'>) 492
493 """ Return the next decoded line from the input stream."""
494 return self.reader.next()
495
496 def __iter__(self):
*self* = <open file 'index2.txt', mode 'rb'>, self.*reader* = <open file
'index2.txt', mode 'rb'>, self.reader.*next* = <bound method
StreamReader.next of <open file 'index2.txt', mode 'rb'>>
/usr/lib64/python2.4/codecs.py in *next*(self=<open file 'index2.txt', mode
'rb'>) 429
430 """ Return the next decoded line from the input stream."""
431 line = self.readline()
432 if line:
433 return line
line *undefined*, *self* = <open file 'index2.txt', mode 'rb'>, self.*
readline* = <bound method StreamReader.readline of <open file 'index2.txt',
mode 'rb'>> /usr/lib64/python2.4/codecs.py in *readline*(self=<open file
'index2.txt', mode 'rb'>, size=None, keepends=True) 344
# If size is given, we call read() only once
345 while True:
346 data = self.read(readsize, firstline=True)
347 if data:
348
# If we're at a "\r" read one extra character (which might
data *undefined*, *self* = <open file 'index2.txt', mode 'rb'>, self.*read* =
<bound method StreamReader.read of <open file 'index2.txt', mode 'rb'>>, *
readsize* = 72, firstline *undefined*, *builtin* *True* = True
/usr/lib64/python2.4/codecs.py in *read*(self=<open file 'index2.txt', mode
'rb'>, size=72, chars=-1, firstline=True) 291
data = self.bytebuffer + newdata
292 try:
293
newchars, decodedbytes = self.decode(data, self.errors)
294 except UnicodeDecodeError, exc:
295 if firstline:
*newchars* = u'', *decodedbytes* = 0, *self* = <open file 'index2.txt',
mode 'rb'>, self.*decode* = <built-in function utf_8_decode>, *data* =
'\xe1intentado para ellos bastante sabios para discernir lo obvio.
Tales perso',
self.*errors* = 'strict'
*UnicodeDecodeError*: 'utf8' codec can't decode bytes in position 0-2:
invalid data
args = ('utf8', '\xe1 intentado para ellos bastante sabios para
discernir lo obvio. Tales perso', 0, 3, 'invalid data')
encoding = 'utf8'
end = 3
object = '\xe1 intentado para ellos bastante sabios para discernir lo
obvio. Tales perso'
reason = 'invalid data'
start = 0
which is the letter รก (a with accent).
So I tried with utf-16 and got this error:
141 file = codecs.open(p + "2.txt", "r", "utf-16")
142 for line in file:
143 print line
144
*line* = '\r\n', *file* = <open file 'index2.txt', mode 'rb'>
/usr/lib64/python2.4/codecs.py in *next*(self=<open file 'index2.txt', mode
'rb'>) 492
493 """ Return the next decoded line from the input stream."""
494 return self.reader.next()
495
496 def __iter__(self):
*self* = <open file 'index2.txt', mode 'rb'>, self.*reader* = <open file
'index2.txt', mode 'rb'>, self.reader.*next* = <bound method
StreamReader.next of <open file 'index2.txt', mode 'rb'>>
/usr/lib64/python2.4/codecs.py in *next*(self=<open file 'index2.txt', mode
'rb'>) 429
430 """ Return the next decoded line from the input stream."""
431 line = self.readline()
432 if line:
433 return line
line *undefined*, *self* = <open file 'index2.txt', mode 'rb'>, self.*
readline* = <bound method StreamReader.readline of <open file 'index2.txt',
mode 'rb'>> /usr/lib64/python2.4/codecs.py in *readline*(self=<open file
'index2.txt', mode 'rb'>, size=None, keepends=True) 344
# If size is given, we call read() only once
345 while True:
346 data = self.read(readsize, firstline=True)
347 if data:
348
# If we're at a "\r" read one extra character (which might
data *undefined*, *self* = <open file 'index2.txt', mode 'rb'>, self.*read* =
<bound method StreamReader.read of <open file 'index2.txt', mode 'rb'>>, *
readsize* = 72, firstline *undefined*, *builtin* *True* = True
/usr/lib64/python2.4/codecs.py in *read*(self=<open file 'index2.txt', mode
'rb'>, size=72, chars=-1, firstline=True) 291
data = self.bytebuffer + newdata
292 try:
293
newchars, decodedbytes = self.decode(data, self.errors)
294 except UnicodeDecodeError, exc:
295 if firstline:
newchars *undefined*, decodedbytes *undefined*, *self* = <open file
'index2.txt', mode 'rb'>, self.*decode* = <bound method StreamReader.decode
of <open file 'index2.txt', mode 'rb'>>, *data* = '<span
class="text">\r\n<i>Noticia:
Este sitio web entre este portal est\xe1 i', self.*errors* = 'strict'
/usr/lib64/python2.4/encodings/utf_16.py in *decode*(self=<open file
'index2.txt', mode 'rb'>, input='<span class="text">\r\n<i>Noticia: Este
sitio web entre este portal est\xe1 i', errors='strict') 47
self.decode = codecs.utf_16_be_decode
48 elif consumed>=2:
49
raise UnicodeError,"UTF-16 stream does not start with BOM"
50 return (object, consumed)
51
*builtin* *UnicodeError* = <class exceptions.UnicodeError>
*UnicodeError*: UTF-16 stream does not start with BOM
args = ('UTF-16 stream does not start with BOM',)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20111222/a60b05e5/attachment-0001.html>
More information about the Python-list
mailing list