-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathller.py
132 lines (106 loc) · 4.31 KB
/
ller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
'''Locally Linear Embedding for Regression'''
import numpy as np
from scipy.sparse import eye as speye
from scipy.sparse.csgraph import laplacian
from sklearn.manifold.locally_linear import (
barycenter_kneighbors_graph, null_space, LocallyLinearEmbedding)
from sklearn.metrics.pairwise import pairwise_distances, rbf_kernel
from sklearn.neighbors import NearestNeighbors
def ller(X, Y, n_neighbors, n_components, mu=0.5, gamma=None, reg=1e-3,
eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None):
"""
Locally Linear Embedding for Regression (LLER)
Parameters
----------
X : ndarray, 2-dimensional
The data matrix, shape (num_points, num_dims)
Y : ndarray, 1 or 2-dimensional
The response matrix, shape (num_points, num_responses).
n_neighbors : int
Number of neighbors for kNN graph construction.
n_components : int
Number of dimensions for embedding.
mu : float, optional
Influence of the Y-similarity penalty.
gamma : float, optional
Scaling factor for RBF kernel on Y.
Defaults to the inverse of the median distance between rows of Y.
Returns
-------
embedding : ndarray, 2-dimensional
The embedding of X, shape (num_points, n_components)
lle_error : float
The embedding error of X (for a fixed reconstruction matrix W)
ller_error : float
The embedding error of X that takes Y into account.
"""
if eigen_solver not in ('auto', 'arpack', 'dense'):
raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)
if Y.ndim == 1:
Y = Y[:, None]
if gamma is None:
dists = pairwise_distances(Y)
gamma = 1.0 / np.median(dists)
nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1)
nbrs.fit(X)
X = nbrs._fit_X
Nx, d_in = X.shape
Ny = Y.shape[0]
if n_components > d_in:
raise ValueError("output dimension must be less than or equal "
"to input dimension")
if n_neighbors >= Nx:
raise ValueError("n_neighbors must be less than number of points")
if n_neighbors <= 0:
raise ValueError("n_neighbors must be positive")
if Nx != Ny:
raise ValueError("X and Y must have same number of points")
M_sparse = (eigen_solver != 'dense')
W = barycenter_kneighbors_graph(
nbrs, n_neighbors=n_neighbors, reg=reg)
if M_sparse:
M = speye(*W.shape, format=W.format) - W
M = (M.T * M).tocsr()
else:
M = (W.T * W - W.T - W).toarray()
M.flat[::M.shape[0] + 1] += 1
P = rbf_kernel(Y, gamma=gamma)
L = laplacian(P, normed=False)
M /= np.abs(M).max() # optional scaling step
L /= np.abs(L).max()
omega = M + mu * L
embedding, lle_error = null_space(omega, n_components, k_skip=1,
eigen_solver=eigen_solver, tol=tol,
max_iter=max_iter,
random_state=random_state)
ller_error = np.trace(embedding.T.dot(L).dot(embedding))
return embedding, lle_error, ller_error
class LLER(LocallyLinearEmbedding):
"""Scikit-learn compatible class for LLER."""
def __init__(self, n_neighbors=5, n_components=2, reg=1E-3,
eigen_solver='auto', tol=1E-6, max_iter=100,
neighbors_algorithm='auto', random_state=None,
mu=0.5, gamma=None):
self.n_neighbors = n_neighbors
self.n_components = n_components
self.reg = reg
self.eigen_solver = eigen_solver
self.tol = tol
self.max_iter = max_iter
self.random_state = random_state
self.neighbors_algorithm = neighbors_algorithm
self.mu = mu
self.gamma = gamma
def fit_transform(self, X, Y):
self.fit(X, Y)
return self.embedding_
def fit(self, X, Y):
self.nbrs_ = NearestNeighbors(self.n_neighbors,
algorithm=self.neighbors_algorithm)
self.nbrs_.fit(X)
self.embedding_, self.lle_error_, self.ller_error_ = ller(
self.nbrs_, Y, self.n_neighbors, self.n_components,
eigen_solver=self.eigen_solver, tol=self.tol,
max_iter=self.max_iter, random_state=self.random_state,
mu=self.mu, gamma=self.gamma)
return self