[Scipy-svn] r2320 - in trunk/Lib/sandbox/pyem: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Thu Nov 16 07:27:09 EST 2006
Author: cdavid
Date: 2006-11-16 06:26:52 -0600 (Thu, 16 Nov 2006)
New Revision: 2320
Added:
trunk/Lib/sandbox/pyem/basic_example1.py
trunk/Lib/sandbox/pyem/basic_example2.py
trunk/Lib/sandbox/pyem/basic_example3.py
trunk/Lib/sandbox/pyem/demo1.py
trunk/Lib/sandbox/pyem/demo2.py
trunk/Lib/sandbox/pyem/examples.py
trunk/Lib/sandbox/pyem/tests/test_examples.py
Removed:
trunk/Lib/sandbox/pyem/example.py
trunk/Lib/sandbox/pyem/example2.py
Modified:
trunk/Lib/sandbox/pyem/Changelog
trunk/Lib/sandbox/pyem/__init__.py
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/tests/test_online_em.py
Log:
Various bug fixes (see Changelog)
Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/Changelog 2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,3 +1,11 @@
+pyem (0.5.6) Thu, 16 Nov 2006 21:02:02 +0900
+
+ * correct examples
+ * correct exceptions msg strings in gauss_mix, which
+ were buggy
+ * add examples from website to the package, so that above errors
+ do not appear again
+
pyem (0.5.6) Thu, 16 Nov 2006 14:18:19 +0900
* bump to 0.5.6
Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/__init__.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,11 +1,12 @@
#! /usr/bin/env python
-# Last Change: Fri Oct 20 11:00 AM 2006 J
+# Last Change: Thu Nov 16 09:00 PM 2006 J
from info import __doc__
from gauss_mix import GmParamError, GM
from gmm_em import GmmParamError, GMM, EM
from online_em import OnGMM as _OnGMM
+import examples as _examples
__all__ = filter(lambda s:not s.startswith('_'),dir())
Added: trunk/Lib/sandbox/pyem/basic_example1.py
===================================================================
--- trunk/Lib/sandbox/pyem/basic_example1.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/basic_example1.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,48 @@
+import numpy as N
+import pylab as P
+from scipy.sandbox.pyem import GM
+
+#------------------------------
+# Hyper parameters:
+# - K: number of clusters
+# - d: dimension
+k = 3
+d = 2
+
+#-------------------------------------------------------
+# Values for weights, mean and (diagonal) variances
+# - the weights are an array of rank 1
+# - mean is expected to be rank 2 with one row for one component
+# - variances are also expteced to be rank 2. For diagonal, one row
+# is one diagonal, for full, the first d rows are the first variance,
+# etc... In this case, the variance matrix should be k*d rows and d
+# colums
+w = N.array([0.2, 0.45, 0.35])
+mu = N.array([[4.1, 3], [1, 5], [-2, -3]])
+va = N.array([[1, 1.5], [3, 4], [2, 3.5]])
+
+#-----------------------------------------
+# First method: directly from parameters:
+# Both methods are equivalents.
+gm = GM.fromvalues(w, mu, va)
+
+#-------------------------------------
+# Second method to build a GM instance:
+gm = GM(d, k, mode = 'diag')
+# The set_params checks that w, mu, and va corresponds to k, d and m
+gm.set_param(w, mu, va)
+
+# Once set_params is called, both methods are equivalent. The 2d
+# method is useful when using a GM object for learning (where
+# the learner class will set the params), whereas the first one
+# is useful when there is a need to quickly sample a model
+# from existing values, without a need to give the hyper parameters
+
+# Create a Gaussian Mixture from the parameters, and sample
+# 1000 items from it (one row = one 2 dimension sample)
+data = gm.sample(1000)
+
+# Plot the samples
+P.plot(data[:, 0], data[:, 1], '.')
+# Plot the ellipsoids of confidence with a level a 75 %
+gm.plot(level = 0.75)
Added: trunk/Lib/sandbox/pyem/basic_example2.py
===================================================================
--- trunk/Lib/sandbox/pyem/basic_example2.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/basic_example2.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,45 @@
+from numpy.random import seed
+
+from scipy.sandbox.pyem import GM, GMM, EM
+import copy
+
+# To reproduce results, fix the random seed
+seed(1)
+
+#+++++++++++++++++++++++++++++
+# Meta parameters of the model
+# - k: Number of components
+# - d: dimension of each Gaussian
+# - mode: Mode of covariance matrix: full or diag (string)
+# - nframes: number of frames (frame = one data point = one
+# row of d elements)
+k = 2
+d = 2
+mode = 'diag'
+nframes = 1e3
+
+#+++++++++++++++++++++++++++++++++++++++++++
+# Create an artificial GM model, samples it
+#+++++++++++++++++++++++++++++++++++++++++++
+w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
+gm = GM.fromvalues(w, mu, va)
+
+# Sample nframes frames from the model
+data = gm.sample(nframes)
+
+#++++++++++++++++++++++++
+# Learn the model with EM
+#++++++++++++++++++++++++
+
+# Create a Model from a Gaussian mixture with kmean initialization
+lgm = GM(d, k, mode)
+gmm = GMM(lgm, 'kmean')
+
+# The actual EM, with likelihood computation. The threshold
+# is compared to the (linearly appromixated) derivative of the likelihood
+em = EM()
+like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
+
+# The computed parameters are in gmm.gm, which is the same than lgm
+# (remember, python does not copy most objects by default). You can for example
+# plot lgm against gm to compare
Added: trunk/Lib/sandbox/pyem/basic_example3.py
===================================================================
--- trunk/Lib/sandbox/pyem/basic_example3.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/basic_example3.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,64 @@
+import numpy as N
+from numpy.random import seed
+
+from scipy.sandbox.pyem import GM, GMM, EM
+import copy
+
+seed(2)
+
+k = 4
+d = 2
+mode = 'diag'
+nframes = 1e3
+
+#+++++++++++++++++++++++++++++++++++++++++++
+# Create an artificial GMM model, samples it
+#+++++++++++++++++++++++++++++++++++++++++++
+w, mu, va = GM.gen_param(d, k, mode, spread = 1.0)
+gm = GM.fromvalues(w, mu, va)
+
+# Sample nframes frames from the model
+data = gm.sample(nframes)
+
+#++++++++++++++++++++++++
+# Learn the model with EM
+#++++++++++++++++++++++++
+
+# List of learned mixtures lgm[i] is a mixture with i+1 components
+lgm = []
+kmax = 6
+bics = N.zeros(kmax)
+em = EM()
+for i in range(kmax):
+ lgm.append(GM(d, i+1, mode))
+
+ gmm = GMM(lgm[i], 'kmean')
+ em.train(data, gmm, maxiter = 30, thresh = 1e-10)
+ bics[i] = gmm.bic(data)
+
+print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
+
+#+++++++++++++++
+# Draw the model
+#+++++++++++++++
+import pylab as P
+P.subplot(3, 2, 1)
+
+for k in range(kmax):
+ P.subplot(3, 2, k+1)
+ level = 0.9
+ P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+
+ # h keeps the handles of the plot, so that you can modify
+ # its parameters like label or color
+ h = lgm[k].plot(level = level)
+ [i.set_color('r') for i in h]
+ h[0].set_label('EM confidence ellipsoides')
+
+ h = gm.plot(level = level)
+ [i.set_color('g') for i in h]
+ h[0].set_label('Real confidence ellipsoides')
+
+P.legend(loc = 0)
+# depending on your configuration, you may have to call P.show()
+# to actually display the figure
Copied: trunk/Lib/sandbox/pyem/demo1.py (from rev 2316, trunk/Lib/sandbox/pyem/example.py)
Copied: trunk/Lib/sandbox/pyem/demo2.py (from rev 2316, trunk/Lib/sandbox/pyem/example2.py)
Deleted: trunk/Lib/sandbox/pyem/example.py
===================================================================
--- trunk/Lib/sandbox/pyem/example.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/example.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,109 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(1)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-# - k: Number of components
-# - d: dimension of each Gaussian
-# - mode: Mode of covariance matrix: full or diag (string)
-# - nframes: number of frames (frame = one data point = one
-# row of d elements)
-k = 2
-d = 2
-mode = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
-gm = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames from the model
-data = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Init the model
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-gmm.init(data)
-
-# Keep a copy for drawing later
-gm0 = copy.copy(lgm)
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em = EM()
-like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(2, 1, 1)
-
-# Level is the confidence level for confidence ellipsoids: 1.0 means that
-# all points will be (almost surely) inside the ellipsoid
-level = 0.8
-if not d == 1:
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
- # h keeps the handles of the plot, so that you can modify
- # its parameters like label or color
- h = gm.plot(level = level)
- [i.set_color('g') for i in h]
- h[0].set_label('true confidence ellipsoides')
-
- # Initial confidence ellipses as found by kmean
- h = gm0.plot(level = level)
- [i.set_color('k') for i in h]
- h[0].set_label('kmean confidence ellipsoides')
-
- # Values found by EM
- h = lgm.plot(level = level)
- [i.set_color('r') for i in h]
- h[0].set_label('EM confidence ellipsoides')
-
- P.legend(loc = 0)
-else:
- # The 1d plotting function is quite elaborate: the confidence
- # interval are represented by filled areas, the pdf of the mixture and
- # the pdf of each component is drawn (optional)
- h = gm.plot1d(level = level)
- [i.set_color('g') for i in h['pdf']]
- h['pdf'][0].set_label('true pdf')
-
- h0 = gm0.plot1d(level = level)
- [i.set_color('k') for i in h0['pdf']]
- h0['pdf'][0].set_label('initial pdf')
-
- hl = lgm.plot1d(fill = 1, level = level)
- [i.set_color('r') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by EM')
-
- P.legend(loc = 0)
-
-P.subplot(2, 1, 2)
-P.plot(like)
-P.title('log likelihood')
-
-P.show()
-# P.save('2d diag.png')
Deleted: trunk/Lib/sandbox/pyem/example2.py
===================================================================
--- trunk/Lib/sandbox/pyem/example2.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/example2.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,104 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(2)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-# - k: Number of components
-# - d: dimension of each Gaussian
-# - mode: Mode of covariance matrix: full or diag (string)
-# - nframes: number of frames (frame = one data point = one
-# row of d elements)
-k = 4
-d = 2
-mode = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va = GM.gen_param(d, k, mode, spread = 1.0)
-gm = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames from the model
-data = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-lgm = []
-kmax = 6
-bics = N.zeros(kmax)
-for i in range(kmax):
- # Init the model with an empty Gaussian Mixture, and create a Gaussian
- # Mixture Model from it
- lgm.append(GM(d, i+1, mode))
- gmm = GMM(lgm[i], 'kmean')
-
- # The actual EM, with likelihood computation. The threshold
- # is compared to the (linearly appromixated) derivative of the likelihood
- em = EM()
- em.train(data, gmm, maxiter = 30, thresh = 1e-10)
- bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
- P.subplot(3, 2, k+1)
- # Level is the confidence level for confidence ellipsoids: 1.0 means that
- # all points will be (almost surely) inside the ellipsoid
- level = 0.8
- if not d == 1:
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
- # h keeps the handles of the plot, so that you can modify
- # its parameters like label or color
- h = lgm[k].plot(level = level)
- [i.set_color('r') for i in h]
- h[0].set_label('EM confidence ellipsoides')
-
- h = gm.plot(level = level)
- [i.set_color('g') for i in h]
- h[0].set_label('Real confidence ellipsoides')
- else:
- # The 1d plotting function is quite elaborate: the confidence
- # interval are represented by filled areas, the pdf of the mixture and
- # the pdf of each component is drawn (optional)
- h = gm.plot1d(level = level)
- [i.set_color('g') for i in h['pdf']]
- h['pdf'][0].set_label('true pdf')
-
- h0 = gm0.plot1d(level = level)
- [i.set_color('k') for i in h0['pdf']]
- h0['pdf'][0].set_label('initial pdf')
-
- hl = lgm.plot1d(fill = 1, level = level)
- [i.set_color('r') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by EM')
-
- P.legend(loc = 0)
-
-P.legend(loc = 0)
-P.show()
-# P.save('2d diag.png')
Added: trunk/Lib/sandbox/pyem/examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/examples.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,14 @@
+def ex1():
+ import basic_example1
+
+def ex2():
+ import basic_example2
+
+def ex3():
+ import basic_example3
+
+if __name__ == '__main__':
+ ex1()
+ ex2()
+ ex3()
+
Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Thu Nov 09 06:00 PM 2006 J
+# Last Change: Thu Nov 16 08:00 PM 2006 J
# Module to implement GaussianMixture class.
@@ -82,10 +82,10 @@
k, d, mode = check_gmm_param(weights, mu, sigma)
if not k == self.k:
raise GmParamError("Number of given components is %d, expected %d"
- % (shape(k), shape(self.k)))
+ % (k, self.k))
if not d == self.d:
raise GmParamError("Dimension of the given model is %d, expected %d"
- % (shape(d), shape(self.d)))
+ % (d, self.d))
if not mode == self.mode and not d == 1:
raise GmParamError("Given covariance mode is %s, expected %s"
% (mode, self.mode))
Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/gmm_em.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -374,7 +374,7 @@
K = mu.shape[0]
n = data.shape[0]
- d = data.shape[1]
+ d = mu.shape[1]
y = N.zeros((K, n))
if mu.size == va.size:
Added: trunk/Lib/sandbox/pyem/tests/test_examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_examples.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/tests/test_examples.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,26 @@
+#! /usr/bin/env python
+# Last Change: Thu Nov 16 09:00 PM 2006 J
+
+from numpy.testing import *
+
+set_package_path()
+from pyem.examples import ex1, ex2, ex3
+restore_path()
+
+# #Optional:
+# set_local_path()
+# # import modules that are located in the same directory as this file.
+# restore_path()
+
+class test_examples(NumpyTestCase):
+ def check_ex1(self, level = 5):
+ ex1()
+
+ def check_ex2(self, level = 5):
+ ex2()
+
+ def check_ex3(self, level = 5):
+ ex3()
+
+if __name__ == "__main__":
+ NumpyTest().run()
Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Mon Oct 23 07:00 PM 2006 J
+# Last Change: Thu Nov 16 09:00 PM 2006 J
import copy
@@ -61,7 +61,7 @@
self._create_model(d, k, mode, nframes, emiter)
self._check(d, k, mode, nframes, emiter)
- def check_2d(self, level = 2):
+ def check_2d(self, level = 1):
d = 2
k = 2
mode = 'diag'
@@ -72,7 +72,7 @@
self._create_model(d, k, mode, nframes, emiter)
self._check(d, k, mode, nframes, emiter)
- def check_5d(self, level = 2):
+ def check_5d(self, level = 5):
d = 5
k = 2
mode = 'diag'
More information about the Scipy-svn
mailing list