[Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.7,1.8

Skip Montanaro montanaro at users.sourceforge.net
Tue Mar 16 16:36:24 EST 2004


Update of /cvsroot/spambayes/spambayes/scripts
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13619/scripts

Modified Files:
	sb_dbexpimp.py 
Log Message:
Modify sb_dbexpimp.py to use csv as the interchange format.  Add
compatcsv.py to create the minimum amount of csv knowledge needed by
sb_dbexpimp.py on Python 2.2 which doesn't have a csv module.


Index: sb_dbexpimp.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** sb_dbexpimp.py	10 Feb 2004 18:54:38 -0000	1.7
--- sb_dbexpimp.py	16 Mar 2004 21:36:22 -0000	1.8
***************
*** 101,104 ****
--- 101,109 ----
      True, False = 1, 0
  
+ try:
+     import csv
+ except ImportError:
+     import spambayes.compatcsv as csv
+ 
  import spambayes.storage
  from spambayes.Options import options
***************
*** 110,117 ****
      if isinstance(s, UnicodeType):
          s = s.encode('utf-8')
!     return urllib.quote(s)
  
  def uunquote(s):
!     return unicode(urllib.unquote(s), 'utf-8')
  
  def runExport(dbFN, useDBM, outFN):
--- 115,125 ----
      if isinstance(s, UnicodeType):
          s = s.encode('utf-8')
!     return s
  
  def uunquote(s):
!     try:
!         return unicode(s, 'utf-8')
!     except UnicodeDecodeError:
!         return s
  
  def runExport(dbFN, useDBM, outFN):
***************
*** 129,132 ****
--- 137,142 ----
              raise
  
+     writer = csv.writer(fp)
+ 
      nham = bayes.nham;
      nspam = bayes.nspam;
***************
*** 136,140 ****
              % (nham, nspam, len(words))
  
!     fp.write("%s,%s,\n" % (nham, nspam))
  
      for word in words:
--- 146,150 ----
              % (nham, nspam, len(words))
  
!     writer.writerow([nham, nspam])
  
      for word in words:
***************
*** 143,149 ****
          spamcount = wi.spamcount
          word = uquote(word)
!         fp.write("%s`%s`%s`\n" % (word, hamcount, spamcount))
! 
!     fp.close()
  
  def runImport(dbFN, useDBM, newDBM, inFN):
--- 153,157 ----
          spamcount = wi.spamcount
          word = uquote(word)
!         writer.writerow([word, hamcount, spamcount])
  
  def runImport(dbFN, useDBM, newDBM, inFN):
***************
*** 152,181 ****
          try:
              os.unlink(dbFN)
!         except OSError, e:
!             if e.errno != 2:     # errno.<WHAT>
!                 raise
  
          try:
              os.unlink(dbFN+".dat")
!         except OSError, e:
!             if e.errno != 2:     # errno.<WHAT>
!                 raise
  
          try:
              os.unlink(dbFN+".dir")
!         except OSError, e:
!             if e.errno != 2:     # errno.<WHAT>
!                 raise
  
      bayes = spambayes.storage.open_storage(dbFN, useDBM)
  
      try:
!         fp = open(inFN, 'r')
      except IOError, e:
          if e.errno != errno.ENOENT:
              raise
  
!     nline = fp.readline()
!     (nham, nspam, junk) = re.split(',', nline)
  
      if newDBM:
--- 160,186 ----
          try:
              os.unlink(dbFN)
!         except OSError:
!             pass
  
          try:
              os.unlink(dbFN+".dat")
!         except OSError:
!             pass
  
          try:
              os.unlink(dbFN+".dir")
!         except OSError:
!             pass
  
      bayes = spambayes.storage.open_storage(dbFN, useDBM)
  
      try:
!         fp = open(inFN, 'rb')
      except IOError, e:
          if e.errno != errno.ENOENT:
              raise
  
!     rdr = csv.reader(fp)
!     (nham, nspam) = rdr.next()
  
      if newDBM:
***************
*** 193,200 ****
      print "%s database %s using file %s" % (impType, dbFN, inFN)
  
!     lines = fp.readlines()
! 
!     for line in lines:
!         (word, hamcount, spamcount, junk) = re.split('`', line)
          word = uunquote(word)
  
--- 198,202 ----
      print "%s database %s using file %s" % (impType, dbFN, inFN)
  
!     for (word, hamcount, spamcount) in rdr:
          word = uunquote(word)
  
***************
*** 209,214 ****
          bayes._wordinfoset(word, wi)
  
-     fp.close()
- 
      print "Storing database, please be patient.  Even moderately sized"
      print "databases may take a very long time to store."
--- 211,214 ----




More information about the Spambayes-checkins mailing list