[Scipy-svn] r2103 - in trunk/Lib/sandbox/svm: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Fri Jul 14 17:22:10 EDT 2006
Author: fullung
Date: 2006-07-14 16:21:55 -0500 (Fri, 14 Jul 2006)
New Revision: 2103
Modified:
trunk/Lib/sandbox/svm/dataset.py
trunk/Lib/sandbox/svm/tests/test_classification.py
trunk/Lib/sandbox/svm/tests/test_dataset.py
trunk/Lib/sandbox/svm/tests/test_kernel.py
trunk/Lib/sandbox/svm/tests/test_libsvm.py
trunk/Lib/sandbox/svm/tests/test_oneclass.py
trunk/Lib/sandbox/svm/tests/test_regression.py
Log:
Dataset for dealing with precomputed kernels.
Modified: trunk/Lib/sandbox/svm/dataset.py
===================================================================
--- trunk/Lib/sandbox/svm/dataset.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/dataset.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -44,34 +44,31 @@
self.iddatamap = {}
- # Create Gram matrix as a list of vectors that have extra
- # entries for id and end of record marker.
+ # Create Gram matrix as a list of vectors which an extra entry
+ # for the id field.
n = len(origdata)
- grammat = [N.empty((n+2,), dtype=libsvm.svm_node_dtype)
+ grammat = [N.empty((n+1,), dtype=libsvm.svm_node_dtype)
for i in range(n)]
self.grammat = grammat
# Calculate Gram matrix. Refer to Kernel::kernel_precomputed
# in svm.cpp to see how this precomputed setup works.
- for i, (y1, x1) in enumerate(origdata):
+ for i, (yi, xi) in enumerate(origdata):
id = i + 1
- # XXX possible numpy bug
- #grammat[i][[0,-1]] = (0, id), (-1, 0.0)
grammat[i][0] = 0, id
- grammat[i][-1] = -1, 0.0
- for j, (y2, x2) in enumerate(origdata[i:]):
+ # Map id to original vector so that we can find it again
+ # after the model has been trained. libsvm essentially
+ # provides the ids of the support vectors.
+ self.iddatamap[id] = xi
+ for j, (yj, xj) in enumerate(origdata[i:]):
# Gram matrix is symmetric, so calculate dot product
# once and store it in both required locations
- z = kernel(x1, x2, svm_node_dot)
+ z = self.kernel(xi, xj, svm_node_dot)
# fix index so we assign to the right place
j += i
- grammat[i][j+1] = 0, z
- grammat[j][i+1] = 0, z
- # Map id to original vector so that we can find it again
- # after the model has been trained. libsvm essentially
- # provides the ids of the support vectors.
- self.iddatamap[id] = x1
-
+ grammat[i][j + 1] = 0, z
+ grammat[j][i + 1] = 0, z
+
def getdata(self):
return zip(map(lambda x: x[0], self.origdata), self.grammat)
data = property(getdata)
@@ -89,30 +86,45 @@
Combine this dataset with another dataset by extending the
Gram matrix with the new inner products into a new matrix.
"""
- n = len(self.origdata) + len(dataset.data)
+ n = len(self.origdata) + len(dataset.data) + 1
newgrammat = []
# copy original Gram matrix
for i in range(len(self.origdata)):
- row = N.empty((n,), dtype=libsvm.svm_node_dtype)
- row[:-1] = self.grammat[i]
- newgrammat.append(row)
+ newrow = N.zeros((n,), dtype=libsvm.svm_node_dtype)
+ oldrow = self.grammat[i]
+ newrow[:len(oldrow)] = oldrow
+ newgrammat.append(newrow)
- # copy id->vector map
- newiddatamap = dict(self.iddatamap.items())
-
# prepare Gram matrix for new data
for i in range(len(dataset.data)):
- id = i + len(self.origdata) + 1
- row = N.empty((n,), dtype=libsvm.svm_node_dtype)
- row[[0,-1]] = (0, id), (-1, 0.0)
+ row = N.zeros((n,), dtype=libsvm.svm_node_dtype)
newgrammat.append(row)
- newiddatamap[id] = dataset.data[i][1]
+ newiddatamap = dict(self.iddatamap.items())
+ m = len(self.origdata)
+ for i, (yi, xi) in enumerate(dataset.data):
+ i += m
+ for j, (yj, xj) in enumerate(self.origdata):
+ z = self.kernel(xi, xj, svm_node_dot)
+ newgrammat[i][j + 1] = 0, z
+ newgrammat[j][i + 1] = 0, z
+ for i, (yi, xi) in enumerate(dataset.data):
+ k = m + i
+ id = k + 1
+ newgrammat[k][0] = 0, id
+ newiddatamap[id] = xi
+ for j, (yj, xj) in enumerate(dataset.data[i:]):
+ z = self.kernel(xi, xj, svm_node_dot)
+ j += k
+ newgrammat[k][j + 1] = 0, z
+ newgrammat[j][k + 1] = 0, z
+
newdataset = self.__class__(self.kernel)
newdataset.origdata = self.origdata + dataset.data
newdataset.iddatamap = newiddatamap
newdataset.grammat = newgrammat
+ return newdataset
class LibSvmRegressionDataSet(LibSvmDataSet):
def __init__(self, origdata):
Modified: trunk/Lib/sandbox/svm/tests/test_classification.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_classification.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/tests/test_classification.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -1,13 +1,11 @@
+from numpy.testing import *
import numpy as N
-from numpy.testing import *
set_local_path('../..')
-
from svm.classification import *
from svm.dataset import LibSvmClassificationDataSet
from svm.dataset import LibSvmTestDataSet
from svm.kernel import *
-
restore_path()
class test_classification(NumpyTestCase):
Modified: trunk/Lib/sandbox/svm/tests/test_dataset.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_dataset.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/tests/test_dataset.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -1,13 +1,11 @@
+from numpy.testing import *
import numpy as N
-from numpy.testing import *
set_local_path('../..')
-
from svm.dataset import *
from svm.kernel import *
from svm.dataset import convert_to_svm_node, svm_node_dot
from svm.libsvm import svm_node_dtype
-
restore_path()
class test_dataset(NumpyTestCase):
@@ -95,23 +93,30 @@
# get a new dataset containing the precomputed data
pcdata = origdata.precompute(kernel)
for i, row in enumerate(pcdata.grammat):
- valuerow = row[1:-1]['value']
+ valuerow = row[1:]['value']
assert_array_almost_equal(valuerow, expt_grammat[i])
def check_combine(self):
kernel = LinearKernel()
- y1 = N.random.randn(2)
- x1 = N.random.randn(len(y1), 2)
+ y1 = N.random.randn(10)
+ x1 = N.random.randn(len(y1), 10)
origdata = LibSvmRegressionDataSet(zip(y1, x1))
pcdata = origdata.precompute(kernel)
- y2 = N.random.randn(1)
+ y2 = N.random.randn(5)
x2 = N.random.randn(len(y2), x1.shape[1])
moredata = LibSvmRegressionDataSet(zip(y2, x2))
+ morepcdata = pcdata.combine(moredata)
- #pcdata.combine(moredata)
- #pcdata.copy_and_extend(moredata)
+ expt_grammat = N.empty((len(y1) + len(y2),)*2)
+ x = N.vstack([x1,x2])
+ for i, xi in enumerate(x):
+ for j, xj in enumerate(x):
+ expt_grammat[i, j] = kernel(xi, xj, N.dot)
+ for i, row in enumerate(morepcdata.grammat):
+ valuerow = row[1:]['value']
+ assert_array_almost_equal(valuerow, expt_grammat[i])
if __name__ == '__main__':
NumpyTest().run()
Modified: trunk/Lib/sandbox/svm/tests/test_kernel.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_kernel.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/tests/test_kernel.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -1,10 +1,8 @@
+from numpy.testing import *
import numpy as N
-from numpy.testing import *
set_local_path('../..')
-
from svm.kernel import *
-
restore_path()
class test_kernel(NumpyTestCase):
Modified: trunk/Lib/sandbox/svm/tests/test_libsvm.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_libsvm.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/tests/test_libsvm.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -1,10 +1,8 @@
+from numpy.testing import *
import numpy as N
-from numpy.testing import *
set_local_path('../..')
-
import svm.libsvm as libsvm
-
restore_path()
class test_libsvm(NumpyTestCase):
Modified: trunk/Lib/sandbox/svm/tests/test_oneclass.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_oneclass.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/tests/test_oneclass.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -1,13 +1,11 @@
+from numpy.testing import *
import numpy as N
-from numpy.testing import *
set_local_path('../..')
-
from svm.oneclass import *
from svm.dataset import LibSvmOneClassDataSet
from svm.dataset import LibSvmTestDataSet
from svm.kernel import *
-
restore_path()
class test_oneclass(NumpyTestCase):
Modified: trunk/Lib/sandbox/svm/tests/test_regression.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_regression.py 2006-07-14 12:59:13 UTC (rev 2102)
+++ trunk/Lib/sandbox/svm/tests/test_regression.py 2006-07-14 21:21:55 UTC (rev 2103)
@@ -1,13 +1,11 @@
+from numpy.testing import *
import numpy as N
-from numpy.testing import *
set_local_path('../..')
-
from svm.regression import *
from svm.dataset import LibSvmRegressionDataSet
from svm.dataset import LibSvmTestDataSet
from svm.kernel import *
-
restore_path()
class test_regression(NumpyTestCase):
More information about the Scipy-svn
mailing list