[Spambayes-checkins] spambayes HistToGNU.py,1.6,1.7
Histogram.py,1.2,1.3
Tim Peters
tim_one@users.sourceforge.net
Thu, 03 Oct 2002 20:01:32 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv29721
Modified Files:
HistToGNU.py Histogram.py
Log Message:
Ack, I'm sure my histogram refactoring broke HistToGNU.py, but can't
test it conveniently. Lots of fiddling so that it's no longer obviously
broken.
Index: HistToGNU.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/HistToGNU.py,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** HistToGNU.py 27 Sep 2002 21:04:05 -0000 1.6
--- HistToGNU.py 4 Oct 2002 03:01:29 -0000 1.7
***************
*** 41,46 ****
return pickle.load(file(path))
! def outputHist(hist,f=sys.stdout):
"""Output the Hist object to file f"""
for i in range(len(hist.buckets)):
n = hist.buckets[i]
--- 41,47 ----
return pickle.load(file(path))
! def outputHist(hist, f=sys.stdout):
"""Output the Hist object to file f"""
+ hist.fill_buckets()
for i in range(len(hist.buckets)):
n = hist.buckets[i]
***************
*** 67,72 ****
try:
! opts, args = getopt.getopt(sys.argv[1:], '',
! [])
except getopt.error, msg:
usage(1, msg)
--- 68,72 ----
try:
! opts, args = getopt.getopt(sys.argv[1:], '', [])
except getopt.error, msg:
usage(1, msg)
Index: Histogram.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Histogram.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** Histogram.py 4 Oct 2002 02:44:45 -0000 1.2
--- Histogram.py 4 Oct 2002 03:01:30 -0000 1.3
***************
*** 72,75 ****
--- 72,110 ----
return self
+ def get_lo_hi(self):
+ self.compute_stats()
+ lo, hi = self.lo, self.hi
+ if lo is None:
+ lo = self.min
+ if hi is None:
+ hi = self.max
+ return lo, hi
+
+ def get_bucketwidth(self):
+ lo, hi = self.get_lo_hi()
+ span = float(hi - lo)
+ return span / self.nbuckets
+
+ # Set instance var nbuckets to the # of buckets, and buckets to a list
+ # of nbuckets counts.
+ def fill_buckets(self, nbuckets=None):
+ if nbuckets is None:
+ nbuckets = self.nbuckets
+ if nbuckets <= 0:
+ raise ValueError("nbuckets %g > 0 required" % nbuckets)
+ self.nbuckets = nbuckets
+ self.buckets = buckets = [0] * nbuckets
+
+ # Compute bucket counts.
+ lo, hi = self.get_lo_hi()
+ bucketwidth = self.get_bucketwidth()
+ for x in self.data:
+ i = int((x - lo) / bucketwidth)
+ if i >= nbuckets:
+ i = nbuckets - 1
+ elif i < 0:
+ i = 0
+ buckets[i] += 1
+
# Print a histogram to stdout.
# Also sets instance var nbuckets to the # of buckets, and
***************
*** 87,116 ****
self.median,
self.max)
! if nbuckets is None:
! nbuckets = self.nbuckets
! self.nbuckets = nbuckets
! self.buckets = buckets = [0] * nbuckets
!
! lo, hi = self.lo, self.hi
! if lo is None:
! lo = self.min
! if hi is None:
! hi = self.max
if lo > hi:
return
- # Compute bucket counts.
- span = float(hi - lo)
- bucketwidth = span / nbuckets
- for x in self.data:
- i = int((x - lo) / bucketwidth)
- if i >= nbuckets:
- i = nbuckets - 1
- elif i < 0:
- i = 0
- buckets[i] += 1
-
# hunit is how many items a * represents. A * is printed for
# each hunit items, plus any non-zero fraction thereof.
biggest = max(self.buckets)
hunit, r = divmod(biggest, WIDTH)
--- 122,132 ----
self.median,
self.max)
! lo, hi = self.get_lo_hi()
if lo > hi:
return
# hunit is how many items a * represents. A * is printed for
# each hunit items, plus any non-zero fraction thereof.
+ self.fill_buckets(nbuckets)
biggest = max(self.buckets)
hunit, r = divmod(biggest, WIDTH)
***************
*** 128,131 ****
--- 144,148 ----
format = "%" + str(boundary_digits + 2) + '.1f %' + str(ndigits) + "d"
+ bucketwidth = self.get_bucketwidth()
for i in range(nbuckets):
n = self.buckets[i]