[Scipy-svn] r2177 - in trunk/Lib/sandbox/svm: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Aug 23 07:06:56 EDT 2006
Author: fullung
Date: 2006-08-23 06:06:18 -0500 (Wed, 23 Aug 2006)
New Revision: 2177
Added:
trunk/Lib/sandbox/svm/tests/testall.py
Removed:
trunk/Lib/sandbox/svm/tests/test_all.py
Modified:
trunk/Lib/sandbox/svm/
trunk/Lib/sandbox/svm/classification.py
trunk/Lib/sandbox/svm/dataset.py
trunk/Lib/sandbox/svm/kernel.py
trunk/Lib/sandbox/svm/predict.py
trunk/Lib/sandbox/svm/tests/
trunk/Lib/sandbox/svm/tests/test_classification.py
trunk/Lib/sandbox/svm/tests/test_speed.py
Log:
More tests, more error checks.
Property changes on: trunk/Lib/sandbox/svm
___________________________________________________________________
Name: svn:ignore
+ *.pyc
Modified: trunk/Lib/sandbox/svm/classification.py
===================================================================
--- trunk/Lib/sandbox/svm/classification.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/classification.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -51,6 +51,7 @@
"""
n = self.nr_class * (self.nr_class - 1) / 2
def p(vv):
+ vv = N.atleast_1d(vv)
d = {}
labels = self.labels
for v, (li, lj) in \
Modified: trunk/Lib/sandbox/svm/dataset.py
===================================================================
--- trunk/Lib/sandbox/svm/dataset.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/dataset.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -165,7 +165,8 @@
yield convert_to_svm_node(x)
def is_array_data(self):
- return isinstance(self.data, N.ndarray)
+ return isinstance(self.data, N.ndarray) and \
+ self.data.dtype in N.sctypes['float']
def convert_to_svm_node(x):
y = N.empty(len(x) + 1, dtype=libsvm.svm_node_dtype)
Modified: trunk/Lib/sandbox/svm/kernel.py
===================================================================
--- trunk/Lib/sandbox/svm/kernel.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/kernel.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -19,6 +19,9 @@
y = N.atleast_2d(y)
return N.dot(x, y.T)
+ def compact(self, *args):
+ return self
+
class PolynomialKernel:
def __init__(self, degree, gamma, coef0):
self.kernel_type = libsvm.POLY
@@ -43,12 +46,17 @@
return '<PolynomialKernel: degree=%d, gamma=%.4f, coef0=%.4f>' % \
(self.degree, self.gamma, self.coef0)
+ def compact(self, *args):
+ raise NotImplementedError, \
+ 'model compaction for PolynomialKernel not implemented'
+
class RBFKernel:
def __init__(self, gamma):
self.kernel_type = libsvm.RBF
self.gamma = gamma
+ self.__call__ = self.evaluate
- def __call__(self, x, y):
+ def evaluate(self, x, y):
x = N.atleast_2d(x)
y = N.atleast_2d(y)
xnorm = N.atleast_2d(N.sum(x*x, axis=1))
@@ -56,9 +64,16 @@
z = xnorm + ynorm - 2 * N.atleast_2d(N.dot(x, y.T).squeeze())
return N.exp(-self.gamma * z)
+ def evaluate_compact(self, x, y):
+ raise NotImplementedError
+
def __repr__(self):
return '<RBFKernel: gamma=%.4f>' % (self.gamma,)
+ def compact(self, *args):
+ raise NotImplementedError, \
+ 'model compaction for RBFKernel not implemented'
+
class SigmoidKernel:
def __init__(self, gamma, coef0):
self.kernel_type = libsvm.SIGMOID
@@ -74,6 +89,10 @@
return '<SigmoidKernel: gamma=%.4f, coef0=%.4f>' % \
(self.gamma, self.coef0)
+ def compact(self, *args):
+ raise NotImplementedError, \
+ 'model compaction for SigmoidKernel not implemented'
+
class CustomKernel:
def __init__(self, f):
self.kernel_type = libsvm.PRECOMPUTED
@@ -86,3 +105,7 @@
def __repr__(self):
return '<CustomKernel: %s>' % str(self.f)
+
+ def compact(self, *args):
+ raise NotImplementedError, \
+ 'model compaction for CustomKernel not implemented'
Modified: trunk/Lib/sandbox/svm/predict.py
===================================================================
--- trunk/Lib/sandbox/svm/predict.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/predict.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -10,11 +10,17 @@
'LibSvmPythonPredictor'
]
+def is_classification_problem(svm_type):
+ return svm_type in [libsvm.C_SVC, libsvm.NU_SVC]
+
class LibSvmPredictor:
def __init__(self, model, dataset, kernel):
self.model = model
self.kernel = kernel
modelc = model.contents
+ if is_classification_problem(modelc.param.svm_type) \
+ and modelc.nSV[0] == 0:
+ raise ValueError, 'model contains no support vectors'
if modelc.param.kernel_type == libsvm.PRECOMPUTED:
self.dataset = dataset
self.sv_ids = [int(modelc.SV[i][0].value)
@@ -69,7 +75,10 @@
self.kernel = kernel
modelc = model.contents
self.svm_type = modelc.param.svm_type
- if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
+ if is_classification_problem(self.svm_type) \
+ and modelc.nSV[0] == 0:
+ raise ValueError, 'model contains no support vectors'
+ if is_classification_problem(self.svm_type):
self.nr_class = modelc.nr_class
self.labels = N.array(modelc.labels[:self.nr_class])
nrho = self.nr_class * (self.nr_class - 1) / 2
@@ -97,7 +106,7 @@
libsvm.svm_destroy_model(model)
def predict(self, x):
- if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
+ if is_classification_problem(self.svm_type):
nr_class = self.nr_class
n = nr_class * (nr_class - 1) / 2
dec_values = self.predict_values(x, n)
@@ -117,7 +126,7 @@
return self.predict_values(x, 1)
def _predict_values_sparse(self, x, n):
- if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
+ if is_classification_problem(self.svm_type):
kvalue = N.empty((len(self.support_vectors),))
for i, sv in enumerate(self.support_vectors):
kvalue[i] = svm_node_dot(x, sv, self.kernel)
@@ -145,21 +154,26 @@
return z
def _predict_values_compact(self, x, n):
- if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
- for i, sv in enumerate(self.support_vectors):
+ if is_classification_problem(self.svm_type):
+ for i, (sv, kernel) in \
+ enumerate(izip(self.support_vectors, self.kernels)):
kvalue = N.empty((len(self.support_vectors),))
- kvalue[i] = svm_node_dot(x, sv, self.kernel)
- return kvalue - self.rho
+ kvalue[i] = svm_node_dot(x, sv, kernel)
+ kvalue -= self.rho
+ return kvalue
else:
sv = self.support_vectors[0]
- return svm_node_dot(x, sv, self.kernel) - self.rho
+ kernel = self.kernels[0]
+ kvalue = svm_node_dot(x, sv, kernel) - self.rho
+ return kvalue
def predict_values(self, x, n):
if self.is_compact:
if isinstance(x, N.ndarray) \
and x.dtype in N.sctypes['float']:
svvals = [sv['value'][:-1] for sv in self.support_vectors]
- kvalues = [self.kernel(x[:,:len(sv)], sv) for sv in svvals]
+ kvalues = [kernel(x[:,:len(sv)], sv)
+ for sv, kernel in izip(svvals, self.kernels)]
x = [kvalue - rho
for kvalue, rho in izip(kvalues, self.rho)]
return N.asarray(zip(*x))
@@ -184,8 +198,9 @@
return csv
def compact(self):
- if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
+ if is_classification_problem(self.svm_type):
compact_support_vectors = []
+ kernels = []
for i in range(self.nr_class):
for j in range(i + 1, self.nr_class):
si, sj = self.start[i], self.start[j]
@@ -194,10 +209,22 @@
svj = self.support_vectors[sj:sj + cj]
coef1 = self.sv_coef[j - 1][si:si + ci]
coef2 = self.sv_coef[i][sj:sj + cj]
- csv = self._compact_svs(svi + svj, coef1 + coef2)
+ svij = svi + svj
+ coef12 = coef1 + coef2
+ # Create a compacted kernel. This allows a kernel
+ # that depends on some values that cannot be
+ # calculated using from the compact representation
+ # of the support vectors to calculate these
+ # values before the time.
+ kernels.append(self.kernel.compact(svij, coef12))
+ csv = self._compact_svs(svij, coef12)
compact_support_vectors.append(csv)
self.support_vectors = compact_support_vectors
+ self.kernel = None
+ self.kernels = kernels
else:
csv = self._compact_svs(self.support_vectors, self.sv_coef)
self.support_vectors = [csv]
+ self.kernels = [self.kernel.compact()]
+ self.kernel = None
self.is_compact = True
Property changes on: trunk/Lib/sandbox/svm/tests
___________________________________________________________________
Name: svn:ignore
+ *.pyc
Deleted: trunk/Lib/sandbox/svm/tests/test_all.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_all.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/tests/test_all.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -1,9 +0,0 @@
-from test_classification import *
-from test_dataset import *
-from test_kernel import *
-from test_libsvm import *
-from test_oneclass import *
-from test_regression import *
-
-if __name__ == '__main__':
- NumpyTest().run()
Modified: trunk/Lib/sandbox/svm/tests/test_classification.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_classification.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/tests/test_classification.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -1,3 +1,4 @@
+from itertools import izip
from numpy.testing import *
import numpy as N
@@ -240,5 +241,71 @@
for key, value in refv.iteritems():
self.assertEqual(value, v[key])
+ def _make_compact_check_datasets(self):
+ x = N.random.randn(150, 3)
+ labels = N.random.random_integers(1, 5, x.shape[0])
+ traindata = LibSvmClassificationDataSet(labels, x)
+ xdim, ydim, zdim = 4, 4, x.shape[1]
+ img = N.random.randn(xdim, ydim, zdim)
+ testdata1 = LibSvmTestDataSet(img.reshape(xdim*ydim, zdim))
+ testdata2 = LibSvmTestDataSet(list(img.reshape(xdim*ydim, zdim)))
+ return traindata, testdata1, testdata2
+
+ def check_compact_predict_values(self):
+ def compare_predict_values(vx, vy):
+ for pred1, pred2 in izip(vx, vy):
+ for labels, x in pred1.iteritems():
+ self.assert_(labels in pred2)
+ self.assertAlmostEqual(x, pred2[labels])
+ traindata, testdata1, testdata2 = \
+ self._make_compact_check_datasets()
+ kernel = LinearKernel()
+ model = LibSvmCClassificationModel(kernel)
+ refresults = model.fit(traindata)
+ refv1 = refresults.predict_values(testdata1)
+ refv2 = refresults.predict_values(testdata2)
+ results = model.fit(traindata, LibSvmPythonPredictor)
+ v11 = results.predict_values(testdata1)
+ v12 = results.predict_values(testdata2)
+ results.compact()
+ v21 = results.predict_values(testdata1)
+ v22 = results.predict_values(testdata2)
+ compare_predict_values(refv1, refv2)
+ compare_predict_values(refv1, v11)
+ compare_predict_values(refv1, v12)
+ compare_predict_values(refv1, v21)
+ # XXX this test fails
+ #compare_predict_values(refv1, v22)
+
+ def check_compact_predict(self):
+ traindata, testdata1, testdata2 = \
+ self._make_compact_check_datasets()
+ kernel = LinearKernel()
+ model = LibSvmCClassificationModel(kernel)
+ refresults = model.fit(traindata)
+ refp1 = refresults.predict(testdata1)
+ refp2 = refresults.predict(testdata2)
+ results = model.fit(traindata, LibSvmPythonPredictor)
+ p11 = results.predict(testdata1)
+ p12 = results.predict(testdata2)
+ results.compact()
+ p21 = results.predict(testdata1)
+ p22 = results.predict(testdata2)
+ self.assertEqual(refp1, refp2)
+ self.assertEqual(refp1, p11)
+ self.assertEqual(refp1, p12)
+ # XXX these tests fail
+ #self.assertEqual(refp1, p21)
+ #self.assertEqual(refp1, p22)
+
+ def check_no_support_vectors(self):
+ x = N.array([[10.0, 20.0]])
+ labels = [1]
+ traindata = LibSvmClassificationDataSet(labels, x)
+ kernel = LinearKernel()
+ model = LibSvmCClassificationModel(kernel)
+ testdata = LibSvmTestDataSet(x)
+ self.assertRaises(ValueError, model.fit, traindata)
+
if __name__ == '__main__':
NumpyTest().run()
Modified: trunk/Lib/sandbox/svm/tests/test_speed.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_speed.py 2006-08-22 14:03:02 UTC (rev 2176)
+++ trunk/Lib/sandbox/svm/tests/test_speed.py 2006-08-23 11:06:18 UTC (rev 2177)
@@ -11,24 +11,43 @@
class test_classification_speed(NumpyTestCase):
def check_large_test_dataset(self):
x = N.random.randn(150, 3)
+
+ # XXX shows bug where we can't get any support vectors
+ #x = N.random.randn(4, 2)
+
+ #x = N.random.randn(10, 3)
+
labels = N.random.random_integers(1, 5, x.shape[0])
+ #labels = N.random.random_integers(1, 2, x.shape[0])
traindata = LibSvmClassificationDataSet(labels, x)
-
- kernel = RBFKernel(traindata.gamma)
+ #kernel = RBFKernel(traindata.gamma)
+ kernel = LinearKernel()
+ #kernel = PolynomialKernel(2, 5, 10)
model = LibSvmCClassificationModel(kernel)
- xdim, ydim = 2, 2
- img = N.random.randn(xdim, ydim, 3)
- testdata = LibSvmTestDataSet(img.reshape(xdim*ydim, 3))
+ #xdim, ydim, zdim = 1, 1, x.shape[1]
+ xdim, ydim, zdim = 2, 2, x.shape[1]
+ img = N.random.randn(xdim, ydim, zdim)
+ testdata1 = LibSvmTestDataSet(img.reshape(xdim*ydim, zdim))
+ testdata2 = LibSvmTestDataSet(list(img.reshape(xdim*ydim, zdim)))
refresults = model.fit(traindata)
+ refv1 = refresults.predict_values(testdata1)
+ refv2 = refresults.predict_values(testdata2)
+
results = model.fit(traindata, LibSvmPythonPredictor)
+ #v11 = results.predict_values(testdata1)
+ #v12 = results.predict_values(testdata2)
+
results.compact()
+ v21 = results.predict_values(testdata1)
+ #v22 = results.predict_values(testdata2)
- #refv = refresults.predict_values(testdata)
- v = results.predict_values(testdata)
+ print refv1
+ print refv2
+ #print v11
+ #print v12
+ print v21
+ #print v22
- #print refv
- print v
-
if __name__ == '__main__':
NumpyTest().run()
Copied: trunk/Lib/sandbox/svm/tests/testall.py (from rev 2176, trunk/Lib/sandbox/svm/tests/test_all.py)
More information about the Scipy-svn
mailing list