[Scipy-svn] r3053 - in trunk/Lib/sandbox/pyem: . doc/examples examples src tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Mon May 28 03:15:19 EDT 2007
Author: cdavid
Date: 2007-05-28 02:14:50 -0500 (Mon, 28 May 2007)
New Revision: 3053
Added:
trunk/Lib/sandbox/pyem/doc/examples/demo1.py
trunk/Lib/sandbox/pyem/doc/examples/demo2.py
trunk/Lib/sandbox/pyem/examples/
trunk/Lib/sandbox/pyem/examples/__init__.py
trunk/Lib/sandbox/pyem/examples/basic_example1.py
trunk/Lib/sandbox/pyem/examples/basic_example2.py
trunk/Lib/sandbox/pyem/examples/basic_example3.py
trunk/Lib/sandbox/pyem/examples/examples.py
Removed:
trunk/Lib/sandbox/pyem/demo1.py
trunk/Lib/sandbox/pyem/demo2.py
trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py
trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py
trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py
trunk/Lib/sandbox/pyem/doc/examples/examples.py
Modified:
trunk/Lib/sandbox/pyem/Changelog
trunk/Lib/sandbox/pyem/TODO
trunk/Lib/sandbox/pyem/__init__.py
trunk/Lib/sandbox/pyem/info.py
trunk/Lib/sandbox/pyem/src/pure_den.c
trunk/Lib/sandbox/pyem/tests/test_densities.py
trunk/Lib/sandbox/pyem/tests/test_examples.py
trunk/Lib/sandbox/pyem/tests/test_online_em.py
Log:
Put examples directory at the top level, so that they be easily imported for tests.
Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/Changelog 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,3 +1,7 @@
+pyem (0.5.7dev) Mon, 28 May 2007 11:31:08 +0900
+
+ * Put doc into its own directory
+
pyem (0.5.6) Thu, 16 Nov 2006 21:02:02 +0900
* correct examples
Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/TODO 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,8 +1,11 @@
-# Last Change: Thu Nov 09 06:00 PM 2006 J
+# Last Change: Mon May 28 11:00 AM 2007 J
+
Things which must be implemented for a 1.0 version (in importante order)
- A classifier
- basic regularization
+ - Use scipy.cluster kmeans instead of our own, as it now provides all
+ necessary functionalities.
Things which would be nice (after 1.0 version):
- Bayes prior (hard, suppose MCMC)
Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/__init__.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,12 +1,12 @@
#! /usr/bin/env python
-# Last Change: Thu Nov 16 09:00 PM 2006 J
+# Last Change: Mon May 28 01:00 PM 2007 J
from info import __doc__
from gauss_mix import GmParamError, GM
from gmm_em import GmmParamError, GMM, EM
-from online_em import OnGMM as _OnGMM
-import examples as _examples
+#from online_em import OnGMM as _OnGMM
+#import examples as _examples
__all__ = filter(lambda s:not s.startswith('_'),dir())
Deleted: trunk/Lib/sandbox/pyem/demo1.py
===================================================================
--- trunk/Lib/sandbox/pyem/demo1.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/demo1.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,109 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(1)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-# - k: Number of components
-# - d: dimension of each Gaussian
-# - mode: Mode of covariance matrix: full or diag (string)
-# - nframes: number of frames (frame = one data point = one
-# row of d elements)
-k = 2
-d = 2
-mode = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
-gm = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames from the model
-data = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Init the model
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-gmm.init(data)
-
-# Keep a copy for drawing later
-gm0 = copy.copy(lgm)
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em = EM()
-like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(2, 1, 1)
-
-# Level is the confidence level for confidence ellipsoids: 1.0 means that
-# all points will be (almost surely) inside the ellipsoid
-level = 0.8
-if not d == 1:
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
- # h keeps the handles of the plot, so that you can modify
- # its parameters like label or color
- h = gm.plot(level = level)
- [i.set_color('g') for i in h]
- h[0].set_label('true confidence ellipsoides')
-
- # Initial confidence ellipses as found by kmean
- h = gm0.plot(level = level)
- [i.set_color('k') for i in h]
- h[0].set_label('kmean confidence ellipsoides')
-
- # Values found by EM
- h = lgm.plot(level = level)
- [i.set_color('r') for i in h]
- h[0].set_label('EM confidence ellipsoides')
-
- P.legend(loc = 0)
-else:
- # The 1d plotting function is quite elaborate: the confidence
- # interval are represented by filled areas, the pdf of the mixture and
- # the pdf of each component is drawn (optional)
- h = gm.plot1d(level = level)
- [i.set_color('g') for i in h['pdf']]
- h['pdf'][0].set_label('true pdf')
-
- h0 = gm0.plot1d(level = level)
- [i.set_color('k') for i in h0['pdf']]
- h0['pdf'][0].set_label('initial pdf')
-
- hl = lgm.plot1d(fill = 1, level = level)
- [i.set_color('r') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by EM')
-
- P.legend(loc = 0)
-
-P.subplot(2, 1, 2)
-P.plot(like)
-P.title('log likelihood')
-
-P.show()
-# P.save('2d diag.png')
Deleted: trunk/Lib/sandbox/pyem/demo2.py
===================================================================
--- trunk/Lib/sandbox/pyem/demo2.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/demo2.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,104 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(2)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-# - k: Number of components
-# - d: dimension of each Gaussian
-# - mode: Mode of covariance matrix: full or diag (string)
-# - nframes: number of frames (frame = one data point = one
-# row of d elements)
-k = 4
-d = 2
-mode = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va = GM.gen_param(d, k, mode, spread = 1.0)
-gm = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames from the model
-data = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-lgm = []
-kmax = 6
-bics = N.zeros(kmax)
-for i in range(kmax):
- # Init the model with an empty Gaussian Mixture, and create a Gaussian
- # Mixture Model from it
- lgm.append(GM(d, i+1, mode))
- gmm = GMM(lgm[i], 'kmean')
-
- # The actual EM, with likelihood computation. The threshold
- # is compared to the (linearly appromixated) derivative of the likelihood
- em = EM()
- em.train(data, gmm, maxiter = 30, thresh = 1e-10)
- bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
- P.subplot(3, 2, k+1)
- # Level is the confidence level for confidence ellipsoids: 1.0 means that
- # all points will be (almost surely) inside the ellipsoid
- level = 0.8
- if not d == 1:
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
- # h keeps the handles of the plot, so that you can modify
- # its parameters like label or color
- h = lgm[k].plot(level = level)
- [i.set_color('r') for i in h]
- h[0].set_label('EM confidence ellipsoides')
-
- h = gm.plot(level = level)
- [i.set_color('g') for i in h]
- h[0].set_label('Real confidence ellipsoides')
- else:
- # The 1d plotting function is quite elaborate: the confidence
- # interval are represented by filled areas, the pdf of the mixture and
- # the pdf of each component is drawn (optional)
- h = gm.plot1d(level = level)
- [i.set_color('g') for i in h['pdf']]
- h['pdf'][0].set_label('true pdf')
-
- h0 = gm0.plot1d(level = level)
- [i.set_color('k') for i in h0['pdf']]
- h0['pdf'][0].set_label('initial pdf')
-
- hl = lgm.plot1d(fill = 1, level = level)
- [i.set_color('r') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by EM')
-
- P.legend(loc = 0)
-
-P.legend(loc = 0)
-P.show()
-# P.save('2d diag.png')
Deleted: trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,48 +0,0 @@
-import numpy as N
-import pylab as P
-from scipy.sandbox.pyem import GM
-
-#------------------------------
-# Hyper parameters:
-# - K: number of clusters
-# - d: dimension
-k = 3
-d = 2
-
-#-------------------------------------------------------
-# Values for weights, mean and (diagonal) variances
-# - the weights are an array of rank 1
-# - mean is expected to be rank 2 with one row for one component
-# - variances are also expteced to be rank 2. For diagonal, one row
-# is one diagonal, for full, the first d rows are the first variance,
-# etc... In this case, the variance matrix should be k*d rows and d
-# colums
-w = N.array([0.2, 0.45, 0.35])
-mu = N.array([[4.1, 3], [1, 5], [-2, -3]])
-va = N.array([[1, 1.5], [3, 4], [2, 3.5]])
-
-#-----------------------------------------
-# First method: directly from parameters:
-# Both methods are equivalents.
-gm = GM.fromvalues(w, mu, va)
-
-#-------------------------------------
-# Second method to build a GM instance:
-gm = GM(d, k, mode = 'diag')
-# The set_params checks that w, mu, and va corresponds to k, d and m
-gm.set_param(w, mu, va)
-
-# Once set_params is called, both methods are equivalent. The 2d
-# method is useful when using a GM object for learning (where
-# the learner class will set the params), whereas the first one
-# is useful when there is a need to quickly sample a model
-# from existing values, without a need to give the hyper parameters
-
-# Create a Gaussian Mixture from the parameters, and sample
-# 1000 items from it (one row = one 2 dimension sample)
-data = gm.sample(1000)
-
-# Plot the samples
-P.plot(data[:, 0], data[:, 1], '.')
-# Plot the ellipsoids of confidence with a level a 75 %
-gm.plot(level = 0.75)
Deleted: trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,45 +0,0 @@
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-# To reproduce results, fix the random seed
-seed(1)
-
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-# - k: Number of components
-# - d: dimension of each Gaussian
-# - mode: Mode of covariance matrix: full or diag (string)
-# - nframes: number of frames (frame = one data point = one
-# row of d elements)
-k = 2
-d = 2
-mode = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
-gm = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames from the model
-data = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Create a Model from a Gaussian mixture with kmean initialization
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em = EM()
-like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-# The computed parameters are in gmm.gm, which is the same than lgm
-# (remember, python does not copy most objects by default). You can for example
-# plot lgm against gm to compare
Deleted: trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,64 +0,0 @@
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(2)
-
-k = 4
-d = 2
-mode = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va = GM.gen_param(d, k, mode, spread = 1.0)
-gm = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames from the model
-data = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# List of learned mixtures lgm[i] is a mixture with i+1 components
-lgm = []
-kmax = 6
-bics = N.zeros(kmax)
-em = EM()
-for i in range(kmax):
- lgm.append(GM(d, i+1, mode))
-
- gmm = GMM(lgm[i], 'kmean')
- em.train(data, gmm, maxiter = 30, thresh = 1e-10)
- bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
- P.subplot(3, 2, k+1)
- level = 0.9
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
- # h keeps the handles of the plot, so that you can modify
- # its parameters like label or color
- h = lgm[k].plot(level = level)
- [i.set_color('r') for i in h]
- h[0].set_label('EM confidence ellipsoides')
-
- h = gm.plot(level = level)
- [i.set_color('g') for i in h]
- h[0].set_label('Real confidence ellipsoides')
-
-P.legend(loc = 0)
-# depending on your configuration, you may have to call P.show()
-# to actually display the figure
Copied: trunk/Lib/sandbox/pyem/doc/examples/demo1.py (from rev 3021, trunk/Lib/sandbox/pyem/demo1.py)
Copied: trunk/Lib/sandbox/pyem/doc/examples/demo2.py (from rev 3021, trunk/Lib/sandbox/pyem/demo2.py)
Deleted: trunk/Lib/sandbox/pyem/doc/examples/examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/examples.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/examples.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,14 +0,0 @@
-def ex1():
- import basic_example1
-
-def ex2():
- import basic_example2
-
-def ex3():
- import basic_example3
-
-if __name__ == '__main__':
- ex1()
- ex2()
- ex3()
-
Added: trunk/Lib/sandbox/pyem/examples/__init__.py
===================================================================
Copied: trunk/Lib/sandbox/pyem/examples/basic_example1.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py)
Copied: trunk/Lib/sandbox/pyem/examples/basic_example2.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py)
Copied: trunk/Lib/sandbox/pyem/examples/basic_example3.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py)
Copied: trunk/Lib/sandbox/pyem/examples/examples.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/examples.py)
Modified: trunk/Lib/sandbox/pyem/info.py
===================================================================
--- trunk/Lib/sandbox/pyem/info.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/info.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -60,7 +60,7 @@
Copyright: David Cournapeau 2006
License: BSD-style (see LICENSE.txt in main source directory)
"""
-version = '0.5.6'
+version = '0.5.7dev'
depends = ['linalg', 'stats']
ignore = False
Modified: trunk/Lib/sandbox/pyem/src/pure_den.c
===================================================================
--- trunk/Lib/sandbox/pyem/src/pure_den.c 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/src/pure_den.c 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,5 +1,8 @@
/*
- * Last Change: Wed Dec 06 08:00 PM 2006 J
+ * Last Change: Mon May 28 01:00 PM 2007 J
+ *
+ * Pure C module because ctypes cannot be used here for performance reasons
+ * (function calls are the primary bottleneck)
*/
#include <Python.h>
#include <numpy/arrayobject.h>
Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Thu Nov 09 05:00 PM 2006 J
+# Last Change: Mon May 28 01:00 PM 2007 J
# TODO:
# - having "fake tests" to check that all mode (scalar, diag and full) are
Modified: trunk/Lib/sandbox/pyem/tests/test_examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_examples.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/tests/test_examples.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,10 +1,10 @@
#! /usr/bin/env python
-# Last Change: Thu Nov 16 09:00 PM 2006 J
+# Last Change: Mon May 28 04:00 PM 2007 J
from numpy.testing import *
set_package_path()
-from pyem.examples import ex1, ex2, ex3
+from examples.examples import ex1, ex2, ex3
restore_path()
# #Optional:
Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_online_em.py 2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/tests/test_online_em.py 2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Wed Dec 06 09:00 PM 2006 J
+# Last Change: Mon May 28 01:00 PM 2007 J
import copy
More information about the Scipy-svn
mailing list