[Spambayes-checkins] spambayes HistToGNU.py,1.6,1.7 Histogram.py,1.2,1.3

Tim Peters tim_one@users.sourceforge.net
Thu, 03 Oct 2002 20:01:32 -0700


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv29721

Modified Files:
	HistToGNU.py Histogram.py 
Log Message:
Ack, I'm sure my histogram refactoring broke HistToGNU.py, but can't
test it conveniently.  Lots of fiddling so that it's no longer obviously
broken.


Index: HistToGNU.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/HistToGNU.py,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** HistToGNU.py	27 Sep 2002 21:04:05 -0000	1.6
--- HistToGNU.py	4 Oct 2002 03:01:29 -0000	1.7
***************
*** 41,46 ****
      return pickle.load(file(path))
  
! def outputHist(hist,f=sys.stdout):
      """Output the Hist object to file f"""
      for i in range(len(hist.buckets)):
          n = hist.buckets[i]
--- 41,47 ----
      return pickle.load(file(path))
  
! def outputHist(hist, f=sys.stdout):
      """Output the Hist object to file f"""
+     hist.fill_buckets()
      for i in range(len(hist.buckets)):
          n = hist.buckets[i]
***************
*** 67,72 ****
  
      try:
!         opts, args = getopt.getopt(sys.argv[1:], '',
!                                    [])
      except getopt.error, msg:
          usage(1, msg)
--- 68,72 ----
  
      try:
!         opts, args = getopt.getopt(sys.argv[1:], '', [])
      except getopt.error, msg:
          usage(1, msg)

Index: Histogram.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Histogram.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** Histogram.py	4 Oct 2002 02:44:45 -0000	1.2
--- Histogram.py	4 Oct 2002 03:01:30 -0000	1.3
***************
*** 72,75 ****
--- 72,110 ----
          return self
  
+     def get_lo_hi(self):
+         self.compute_stats()
+         lo, hi = self.lo, self.hi
+         if lo is None:
+             lo = self.min
+         if hi is None:
+             hi = self.max
+         return lo, hi
+ 
+     def get_bucketwidth(self):
+         lo, hi = self.get_lo_hi()
+         span = float(hi - lo)
+         return span / self.nbuckets
+ 
+     # Set instance var nbuckets to the # of buckets, and buckets to a list
+     # of nbuckets counts.
+     def fill_buckets(self, nbuckets=None):
+         if nbuckets is None:
+             nbuckets = self.nbuckets
+         if nbuckets <= 0:
+             raise ValueError("nbuckets %g > 0 required" % nbuckets)
+         self.nbuckets = nbuckets
+         self.buckets = buckets = [0] * nbuckets
+ 
+         # Compute bucket counts.
+         lo, hi = self.get_lo_hi()
+         bucketwidth = self.get_bucketwidth()
+         for x in self.data:
+             i = int((x - lo) / bucketwidth)
+             if i >= nbuckets:
+                 i = nbuckets - 1
+             elif i < 0:
+                 i = 0
+             buckets[i] += 1
+ 
      # Print a histogram to stdout.
      # Also sets instance var nbuckets to the # of buckets, and
***************
*** 87,116 ****
                                                         self.median,
                                                         self.max)
!         if nbuckets is None:
!             nbuckets = self.nbuckets
!         self.nbuckets = nbuckets
!         self.buckets = buckets = [0] * nbuckets
! 
!         lo, hi = self.lo, self.hi
!         if lo is None:
!             lo = self.min
!         if hi is None:
!             hi = self.max
          if lo > hi:
              return
  
-         # Compute bucket counts.
-         span = float(hi - lo)
-         bucketwidth = span / nbuckets
-         for x in self.data:
-             i = int((x - lo) / bucketwidth)
-             if i >= nbuckets:
-                 i = nbuckets - 1
-             elif i < 0:
-                 i = 0
-             buckets[i] += 1
- 
          # hunit is how many items a * represents.  A * is printed for
          # each hunit items, plus any non-zero fraction thereof.
          biggest = max(self.buckets)
          hunit, r = divmod(biggest, WIDTH)
--- 122,132 ----
                                                         self.median,
                                                         self.max)
!         lo, hi = self.get_lo_hi()
          if lo > hi:
              return
  
          # hunit is how many items a * represents.  A * is printed for
          # each hunit items, plus any non-zero fraction thereof.
+         self.fill_buckets(nbuckets)
          biggest = max(self.buckets)
          hunit, r = divmod(biggest, WIDTH)
***************
*** 128,131 ****
--- 144,148 ----
          format = "%" + str(boundary_digits + 2) + '.1f %' + str(ndigits) + "d"
  
+         bucketwidth = self.get_bucketwidth()
          for i in range(nbuckets):
              n = self.buckets[i]