csv and mixed lists of unicode and numbers

Sibylle Koczian nulla.epistola at web.de
Tue Nov 24 11:42:57 EST 2009


Hello,

I want to put data from a database into a tab separated text file. This
looks like a typical application for the csv module, but there is a
snag: the rows I get from the database module (kinterbasdb in this case)
contain unicode objects and numbers. And of course the unicode objects
contain lots of non-ascii characters.

If I try to use csv.writer as is, I get UnicodeEncodeErrors. If I use
the UnicodeWriter from the module documentation, I get TypeErrors with
the numbers. (I'm using Python 2.6 - upgrading to 3.1 on this machine
would cause other complications.)

So do I have to process the rows myself and treat numbers and text
fields differently? Or what's the best way?

Here is a small example:

########################################################################
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import csv, codecs, cStringIO
import tempfile

cData = [u'Ärger', u'Ödland', 5, u'Süßigkeit', u'élève', 6.9, u'forêt']

class UnicodeWriter:
     """
     A CSV writer which will write rows to CSV file "f",
     which is encoded in the given encoding.
     """

     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
         # Redirect output to a queue
         self.queue = cStringIO.StringIO()
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
         self.stream = f
         self.encoder = codecs.getincrementalencoder(encoding)()

     def writerow(self, row):
         self.writer.writerow([s.encode("utf-8") for s in row])
         # Fetch UTF-8 output from the queue ...
         data = self.queue.getvalue()
         data = data.decode("utf-8")
         # ... and reencode it into the target encoding
         data = self.encoder.encode(data)
         # write to the target stream
         self.stream.write(data)
         # empty queue
         self.queue.truncate(0)

     def writerows(self, rows):
         for row in rows:
             self.writerow(row)

def writewithcsv(outfile, datalist):
     wrt = csv.writer(outfile, dialect=csv.excel)
     wrt.writerow(datalist)

def writeunicode(outfile, datalist):
     wrt = UnicodeWriter(outfile)
     wrt.writerow(datalist)

def main():
     with tempfile.NamedTemporaryFile() as csvfile:
         print "CSV file:", csvfile.name
         print "Try with csv.writer"
         try:
             writewithcsv(csvfile, cData)
         except UnicodeEncodeError as e:
             print e
         print "Try with UnicodeWriter"
         writeunicode(csvfile, cData)
     print "Ready."

if __name__ == "__main__":
     main()


##############################################################################

Hoping for advice,

Sibylle



More information about the Python-list mailing list