csv and mixed lists of unicode and numbers
Sibylle Koczian
nulla.epistola at web.de
Tue Nov 24 11:42:57 EST 2009
Hello,
I want to put data from a database into a tab separated text file. This
looks like a typical application for the csv module, but there is a
snag: the rows I get from the database module (kinterbasdb in this case)
contain unicode objects and numbers. And of course the unicode objects
contain lots of non-ascii characters.
If I try to use csv.writer as is, I get UnicodeEncodeErrors. If I use
the UnicodeWriter from the module documentation, I get TypeErrors with
the numbers. (I'm using Python 2.6 - upgrading to 3.1 on this machine
would cause other complications.)
So do I have to process the rows myself and treat numbers and text
fields differently? Or what's the best way?
Here is a small example:
########################################################################
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv, codecs, cStringIO
import tempfile
cData = [u'Ärger', u'Ödland', 5, u'Süßigkeit', u'élève', 6.9, u'forêt']
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
def writewithcsv(outfile, datalist):
wrt = csv.writer(outfile, dialect=csv.excel)
wrt.writerow(datalist)
def writeunicode(outfile, datalist):
wrt = UnicodeWriter(outfile)
wrt.writerow(datalist)
def main():
with tempfile.NamedTemporaryFile() as csvfile:
print "CSV file:", csvfile.name
print "Try with csv.writer"
try:
writewithcsv(csvfile, cData)
except UnicodeEncodeError as e:
print e
print "Try with UnicodeWriter"
writeunicode(csvfile, cData)
print "Ready."
if __name__ == "__main__":
main()
##############################################################################
Hoping for advice,
Sibylle
More information about the Python-list
mailing list