[Spambayes-checkins] spambayes/spambayes compatcsv.py,NONE,1.1
Skip Montanaro
montanaro at users.sourceforge.net
Tue Mar 16 16:36:24 EST 2004
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13619/spambayes
Added Files:
compatcsv.py
Log Message:
Modify sb_dbexpimp.py to use csv as the interchange format. Add
compatcsv.py to create the minimum amount of csv knowledge needed by
sb_dbexpimp.py on Python 2.2 which doesn't have a csv module.
--- NEW FILE: compatcsv.py ---
#!/usr/bin/env python
"""Implement just enough of a csv parser to support sb_dbexpimp.py's needs."""
import sys
import re
if sys.platform == "windows":
EOL = "\r\n"
elif sys.platform == "mac":
EOL = "\r"
else:
EOL = "\n"
class reader:
def __init__(self, fp):
self.fp = fp
def __iter__(self):
return self
def next(self):
return self.parse_line(self.fp.next())
def parse_line(self, line):
"""parse the line.
very simple assumptions:
* separator is a comma
* fields are only quoted with quotation marks and only
quoted if the field contains a comma or a quotation mark
* embedded quotation marks are doubled
"""
result = []
while line:
if line[0] == '"':
# search for ending quotation mark
match = re.match('"(.*?)"[^"]', line)
if match is None:
# embedded newline
line = line + self.fp.next()
continue
else:
field = match.group(1)
field = field.replace('""', '"')
try:
dummy = unicode(field, "ascii")
except UnicodeError:
field = unicode(field, "utf-8")
result.append(field)
line = line[len(field)+3:]
else:
# field is terminated by a comma or EOL
match = re.match("(.*?)(,|%s)"%EOL, line)
if match is None:
print "parse error:", line
raise
field = match.group(1)
try:
dummy = unicode(field, "ascii")
except UnicodeError:
field = unicode(field, "utf-8")
result.append(field)
line = line[len(field)+len(match.group(2))]
return result
class writer:
def __init__(self, fp):
self.fp = fp
def writerow(self, row):
result = []
for item in row:
if isinstance(item, unicode):
item = item.encode("utf-8")
else:
item = str(item)
if re.search('["\n,]', item) is not None:
item = '"%s"' % item.replace('"', '""')
result.append(item)
result = ",".join(result)
self.fp.write(result+EOL)
More information about the Spambayes-checkins
mailing list