[Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.7,1.8
Skip Montanaro
montanaro at users.sourceforge.net
Tue Mar 16 16:36:24 EST 2004
Update of /cvsroot/spambayes/spambayes/scripts
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13619/scripts
Modified Files:
sb_dbexpimp.py
Log Message:
Modify sb_dbexpimp.py to use csv as the interchange format. Add
compatcsv.py to create the minimum amount of csv knowledge needed by
sb_dbexpimp.py on Python 2.2 which doesn't have a csv module.
Index: sb_dbexpimp.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** sb_dbexpimp.py 10 Feb 2004 18:54:38 -0000 1.7
--- sb_dbexpimp.py 16 Mar 2004 21:36:22 -0000 1.8
***************
*** 101,104 ****
--- 101,109 ----
True, False = 1, 0
+ try:
+ import csv
+ except ImportError:
+ import spambayes.compatcsv as csv
+
import spambayes.storage
from spambayes.Options import options
***************
*** 110,117 ****
if isinstance(s, UnicodeType):
s = s.encode('utf-8')
! return urllib.quote(s)
def uunquote(s):
! return unicode(urllib.unquote(s), 'utf-8')
def runExport(dbFN, useDBM, outFN):
--- 115,125 ----
if isinstance(s, UnicodeType):
s = s.encode('utf-8')
! return s
def uunquote(s):
! try:
! return unicode(s, 'utf-8')
! except UnicodeDecodeError:
! return s
def runExport(dbFN, useDBM, outFN):
***************
*** 129,132 ****
--- 137,142 ----
raise
+ writer = csv.writer(fp)
+
nham = bayes.nham;
nspam = bayes.nspam;
***************
*** 136,140 ****
% (nham, nspam, len(words))
! fp.write("%s,%s,\n" % (nham, nspam))
for word in words:
--- 146,150 ----
% (nham, nspam, len(words))
! writer.writerow([nham, nspam])
for word in words:
***************
*** 143,149 ****
spamcount = wi.spamcount
word = uquote(word)
! fp.write("%s`%s`%s`\n" % (word, hamcount, spamcount))
!
! fp.close()
def runImport(dbFN, useDBM, newDBM, inFN):
--- 153,157 ----
spamcount = wi.spamcount
word = uquote(word)
! writer.writerow([word, hamcount, spamcount])
def runImport(dbFN, useDBM, newDBM, inFN):
***************
*** 152,181 ****
try:
os.unlink(dbFN)
! except OSError, e:
! if e.errno != 2: # errno.<WHAT>
! raise
try:
os.unlink(dbFN+".dat")
! except OSError, e:
! if e.errno != 2: # errno.<WHAT>
! raise
try:
os.unlink(dbFN+".dir")
! except OSError, e:
! if e.errno != 2: # errno.<WHAT>
! raise
bayes = spambayes.storage.open_storage(dbFN, useDBM)
try:
! fp = open(inFN, 'r')
except IOError, e:
if e.errno != errno.ENOENT:
raise
! nline = fp.readline()
! (nham, nspam, junk) = re.split(',', nline)
if newDBM:
--- 160,186 ----
try:
os.unlink(dbFN)
! except OSError:
! pass
try:
os.unlink(dbFN+".dat")
! except OSError:
! pass
try:
os.unlink(dbFN+".dir")
! except OSError:
! pass
bayes = spambayes.storage.open_storage(dbFN, useDBM)
try:
! fp = open(inFN, 'rb')
except IOError, e:
if e.errno != errno.ENOENT:
raise
! rdr = csv.reader(fp)
! (nham, nspam) = rdr.next()
if newDBM:
***************
*** 193,200 ****
print "%s database %s using file %s" % (impType, dbFN, inFN)
! lines = fp.readlines()
!
! for line in lines:
! (word, hamcount, spamcount, junk) = re.split('`', line)
word = uunquote(word)
--- 198,202 ----
print "%s database %s using file %s" % (impType, dbFN, inFN)
! for (word, hamcount, spamcount) in rdr:
word = uunquote(word)
***************
*** 209,214 ****
bayes._wordinfoset(word, wi)
- fp.close()
-
print "Storing database, please be patient. Even moderately sized"
print "databases may take a very long time to store."
--- 211,214 ----
More information about the Spambayes-checkins
mailing list