[Scipy-svn] r3053 - in trunk/Lib/sandbox/pyem: . doc/examples examples src tests

Mon May 28 03:15:19 EDT 2007

Author: cdavid
Date: 2007-05-28 02:14:50 -0500 (Mon, 28 May 2007)
New Revision: 3053

Added:
   trunk/Lib/sandbox/pyem/doc/examples/demo1.py
   trunk/Lib/sandbox/pyem/doc/examples/demo2.py
   trunk/Lib/sandbox/pyem/examples/
   trunk/Lib/sandbox/pyem/examples/__init__.py
   trunk/Lib/sandbox/pyem/examples/basic_example1.py
   trunk/Lib/sandbox/pyem/examples/basic_example2.py
   trunk/Lib/sandbox/pyem/examples/basic_example3.py
   trunk/Lib/sandbox/pyem/examples/examples.py
Removed:
   trunk/Lib/sandbox/pyem/demo1.py
   trunk/Lib/sandbox/pyem/demo2.py
   trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py
   trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py
   trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py
   trunk/Lib/sandbox/pyem/doc/examples/examples.py
Modified:
   trunk/Lib/sandbox/pyem/Changelog
   trunk/Lib/sandbox/pyem/TODO
   trunk/Lib/sandbox/pyem/__init__.py
   trunk/Lib/sandbox/pyem/info.py
   trunk/Lib/sandbox/pyem/src/pure_den.c
   trunk/Lib/sandbox/pyem/tests/test_densities.py
   trunk/Lib/sandbox/pyem/tests/test_examples.py
   trunk/Lib/sandbox/pyem/tests/test_online_em.py
Log:
Put examples directory at the top level, so that they be easily imported for tests.

Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================

--- trunk/Lib/sandbox/pyem/Changelog	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/Changelog	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,3 +1,7 @@
+pyem (0.5.7dev) Mon, 28 May 2007 11:31:08 +0900
+
+	* Put doc into its own directory
+
 pyem (0.5.6) Thu, 16 Nov 2006 21:02:02 +0900
 
 	* correct examples

Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/TODO	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,8 +1,11 @@
-# Last Change: Thu Nov 09 06:00 PM 2006 J
+# Last Change: Mon May 28 11:00 AM 2007 J
 
+
 Things which must be implemented for a 1.0 version (in importante order)
     - A classifier
     - basic regularization
+    - Use scipy.cluster kmeans instead of our own, as it now provides all
+    necessary functionalities.
 
 Things which would be nice (after 1.0 version):
     - Bayes prior (hard, suppose MCMC)

Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/__init__.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,12 +1,12 @@
 #! /usr/bin/env python
-# Last Change: Thu Nov 16 09:00 PM 2006 J
+# Last Change: Mon May 28 01:00 PM 2007 J
 
 from info import __doc__
 
 from gauss_mix import GmParamError, GM
 from gmm_em import GmmParamError, GMM, EM
-from online_em import OnGMM as _OnGMM
-import examples as _examples
+#from online_em import OnGMM as _OnGMM
+#import examples as _examples
 
 __all__ = filter(lambda s:not s.startswith('_'),dir())
 

Deleted: trunk/Lib/sandbox/pyem/demo1.py
===================================================================
--- trunk/Lib/sandbox/pyem/demo1.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/demo1.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,109 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(1)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-#   - k: Number of components
-#   - d: dimension of each Gaussian
-#   - mode: Mode of covariance matrix: full or diag (string)
-#   - nframes: number of frames (frame = one data point = one
-#   row of d elements)
-k       = 2 
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Init the model
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-gmm.init(data)
-
-# Keep a copy for drawing later
-gm0 = copy.copy(lgm)
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em      = EM()
-like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(2, 1, 1)
-
-# Level is the confidence level for confidence ellipsoids: 1.0 means that
-# all points will be (almost surely) inside the ellipsoid
-level   = 0.8
-if not d == 1:
-    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-    # h keeps the handles of the plot, so that you can modify 
-    # its parameters like label or color
-    h   = gm.plot(level = level)
-    [i.set_color('g') for i in h]
-    h[0].set_label('true confidence ellipsoides')
-
-    # Initial confidence ellipses as found by kmean
-    h   = gm0.plot(level = level)
-    [i.set_color('k') for i in h]
-    h[0].set_label('kmean confidence ellipsoides')
-
-    # Values found by EM
-    h   = lgm.plot(level = level)
-    [i.set_color('r') for i in h]
-    h[0].set_label('EM confidence ellipsoides')
-
-    P.legend(loc = 0)
-else:
-    # The 1d plotting function is quite elaborate: the confidence
-    # interval are represented by filled areas, the pdf of the mixture and
-    # the pdf of each component is drawn (optional)
-    h   = gm.plot1d(level = level)
-    [i.set_color('g') for i in h['pdf']]
-    h['pdf'][0].set_label('true pdf')
-
-    h0  = gm0.plot1d(level = level)
-    [i.set_color('k') for i in h0['pdf']]
-    h0['pdf'][0].set_label('initial pdf')
-
-    hl  = lgm.plot1d(fill = 1, level = level)
-    [i.set_color('r') for i in hl['pdf']]
-    hl['pdf'][0].set_label('pdf found by EM')
-
-    P.legend(loc = 0)
-
-P.subplot(2, 1, 2)
-P.plot(like)
-P.title('log likelihood')
-
-P.show()
-# P.save('2d diag.png')

Deleted: trunk/Lib/sandbox/pyem/demo2.py
===================================================================
--- trunk/Lib/sandbox/pyem/demo2.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/demo2.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,104 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(2)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-#   - k: Number of components
-#   - d: dimension of each Gaussian
-#   - mode: Mode of covariance matrix: full or diag (string)
-#   - nframes: number of frames (frame = one data point = one
-#   row of d elements)
-k       = 4 
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.0)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-lgm     = []
-kmax    = 6
-bics    = N.zeros(kmax)
-for i in range(kmax):
-    # Init the model with an empty Gaussian Mixture, and create a Gaussian 
-    # Mixture Model from it
-    lgm.append(GM(d, i+1, mode))
-    gmm = GMM(lgm[i], 'kmean')
-
-    # The actual EM, with likelihood computation. The threshold
-    # is compared to the (linearly appromixated) derivative of the likelihood
-    em      = EM()
-    em.train(data, gmm, maxiter = 30, thresh = 1e-10)
-    bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1) 
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
-    P.subplot(3, 2, k+1)
-    # Level is the confidence level for confidence ellipsoids: 1.0 means that
-    # all points will be (almost surely) inside the ellipsoid
-    level   = 0.8
-    if not d == 1:
-        P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-        # h keeps the handles of the plot, so that you can modify 
-        # its parameters like label or color
-        h   = lgm[k].plot(level = level)
-        [i.set_color('r') for i in h]
-        h[0].set_label('EM confidence ellipsoides')
-
-        h   = gm.plot(level = level)
-        [i.set_color('g') for i in h]
-        h[0].set_label('Real confidence ellipsoides')
-    else:
-        # The 1d plotting function is quite elaborate: the confidence
-        # interval are represented by filled areas, the pdf of the mixture and
-        # the pdf of each component is drawn (optional)
-        h   = gm.plot1d(level = level)
-        [i.set_color('g') for i in h['pdf']]
-        h['pdf'][0].set_label('true pdf')
-
-        h0  = gm0.plot1d(level = level)
-        [i.set_color('k') for i in h0['pdf']]
-        h0['pdf'][0].set_label('initial pdf')
-
-        hl  = lgm.plot1d(fill = 1, level = level)
-        [i.set_color('r') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by EM')
-
-        P.legend(loc = 0)
-
-P.legend(loc = 0)
-P.show()
-# P.save('2d diag.png')

Deleted: trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,48 +0,0 @@
-import numpy as N
-import pylab as P
-from scipy.sandbox.pyem import GM
-
-#------------------------------
-# Hyper parameters:
-#   - K:    number of clusters
-#   - d:    dimension
-k   = 3
-d   = 2
-
-#-------------------------------------------------------
-# Values for weights, mean and (diagonal) variances
-#   - the weights are an array of rank 1
-#   - mean is expected to be rank 2 with one row for one component
-#   - variances are also expteced to be rank 2. For diagonal, one row
-#   is one diagonal, for full, the first d rows are the first variance,
-#   etc... In this case, the variance matrix should be k*d rows and d 
-#   colums
-w   = N.array([0.2, 0.45, 0.35])
-mu  = N.array([[4.1, 3], [1, 5], [-2, -3]])
-va  = N.array([[1, 1.5], [3, 4], [2, 3.5]])
-
-#-----------------------------------------
-# First method: directly from parameters:
-# Both methods are equivalents.
-gm      = GM.fromvalues(w, mu, va)
-
-#-------------------------------------
-# Second method to build a GM instance:
-gm      = GM(d, k, mode = 'diag')
-# The set_params checks that w, mu, and va corresponds to k, d and m
-gm.set_param(w, mu, va)
-
-# Once set_params is called, both methods are equivalent. The 2d
-# method is useful when using a GM object for learning (where
-# the learner class will set the params), whereas the first one
-# is useful when there is a need to quickly sample a model
-# from existing values, without a need to give the hyper parameters
-
-# Create a Gaussian Mixture from the parameters, and sample
-# 1000 items from it (one row = one 2 dimension sample)
-data    = gm.sample(1000)
-
-# Plot the samples
-P.plot(data[:, 0], data[:, 1], '.')
-# Plot the ellipsoids of confidence with a level a 75 %
-gm.plot(level = 0.75)

Deleted: trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,45 +0,0 @@
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-# To reproduce results, fix the random seed
-seed(1)
-
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-#   - k: Number of components
-#   - d: dimension of each Gaussian
-#   - mode: Mode of covariance matrix: full or diag (string)
-#   - nframes: number of frames (frame = one data point = one
-#   row of d elements)
-k       = 2
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Create a Model from a Gaussian mixture with kmean initialization
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em      = EM()
-like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-# The computed parameters are in gmm.gm, which is the same than lgm
-# (remember, python does not copy most objects by default). You can for example
-# plot lgm against gm to compare

Deleted: trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,64 +0,0 @@
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(2)
-
-k       = 4
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.0)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# List of learned mixtures lgm[i] is a mixture with i+1 components
-lgm     = []
-kmax    = 6
-bics    = N.zeros(kmax)
-em      = EM()
-for i in range(kmax):
-    lgm.append(GM(d, i+1, mode))
-
-    gmm = GMM(lgm[i], 'kmean')
-    em.train(data, gmm, maxiter = 30, thresh = 1e-10)
-    bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
-    P.subplot(3, 2, k+1)
-    level   = 0.9
-    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-    # h keeps the handles of the plot, so that you can modify 
-    # its parameters like label or color
-    h   = lgm[k].plot(level = level)
-    [i.set_color('r') for i in h]
-    h[0].set_label('EM confidence ellipsoides')
-
-    h   = gm.plot(level = level)
-    [i.set_color('g') for i in h]
-    h[0].set_label('Real confidence ellipsoides')
-
-P.legend(loc = 0)
-# depending on your configuration, you may have to call P.show() 
-# to actually display the figure

Copied: trunk/Lib/sandbox/pyem/doc/examples/demo1.py (from rev 3021, trunk/Lib/sandbox/pyem/demo1.py)

Copied: trunk/Lib/sandbox/pyem/doc/examples/demo2.py (from rev 3021, trunk/Lib/sandbox/pyem/demo2.py)

Deleted: trunk/Lib/sandbox/pyem/doc/examples/examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/doc/examples/examples.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/doc/examples/examples.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,14 +0,0 @@
-def ex1():
-    import basic_example1
-
-def ex2():
-    import basic_example2
-
-def ex3():
-    import basic_example3
-
-if __name__ == '__main__':
-    ex1()
-    ex2()
-    ex3()
-

Added: trunk/Lib/sandbox/pyem/examples/__init__.py
===================================================================

Copied: trunk/Lib/sandbox/pyem/examples/basic_example1.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/basic_example1.py)

Copied: trunk/Lib/sandbox/pyem/examples/basic_example2.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/basic_example2.py)

Copied: trunk/Lib/sandbox/pyem/examples/basic_example3.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/basic_example3.py)

Copied: trunk/Lib/sandbox/pyem/examples/examples.py (from rev 3051, trunk/Lib/sandbox/pyem/doc/examples/examples.py)

Modified: trunk/Lib/sandbox/pyem/info.py
===================================================================
--- trunk/Lib/sandbox/pyem/info.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/info.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -60,7 +60,7 @@
 Copyright: David Cournapeau 2006
 License: BSD-style (see LICENSE.txt in main source directory)
 """
-version = '0.5.6'
+version = '0.5.7dev'
 
 depends = ['linalg', 'stats']
 ignore  = False

Modified: trunk/Lib/sandbox/pyem/src/pure_den.c
===================================================================
--- trunk/Lib/sandbox/pyem/src/pure_den.c	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/src/pure_den.c	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,5 +1,8 @@
 /*
- * Last Change: Wed Dec 06 08:00 PM 2006 J
+ * Last Change: Mon May 28 01:00 PM 2007 J
+ *
+ * Pure C module because ctypes cannot be used here for performance reasons
+ * (function calls are the primary bottleneck)
  */
 #include <Python.h>
 #include <numpy/arrayobject.h>

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Thu Nov 09 05:00 PM 2006 J
+# Last Change: Mon May 28 01:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are

Modified: trunk/Lib/sandbox/pyem/tests/test_examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_examples.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/tests/test_examples.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,10 +1,10 @@
 #! /usr/bin/env python
-# Last Change: Thu Nov 16 09:00 PM 2006 J
+# Last Change: Mon May 28 04:00 PM 2007 J
 
 from numpy.testing import *
 
 set_package_path()
-from pyem.examples import ex1, ex2, ex3
+from examples.examples import ex1, ex2, ex3
 restore_path()
 
 # #Optional:

Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_online_em.py	2007-05-28 01:44:11 UTC (rev 3052)
+++ trunk/Lib/sandbox/pyem/tests/test_online_em.py	2007-05-28 07:14:50 UTC (rev 3053)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Wed Dec 06 09:00 PM 2006 J
+# Last Change: Mon May 28 01:00 PM 2007 J
 
 import copy