# LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
import warnings
import numpy as np
from scipy import sparse
from scipy.linalg import eigh, eig
from scipy.sparse.linalg import lobpcg, eigs, eigsh
from sklearn.utils.validation import check_random_state
from .validation import check_array
EIGEN_SOLVERS = ['auto', 'dense', 'arpack', 'lobpcg']
BAD_EIGEN_SOLVERS = {}
AMG_KWDS = ['strength', 'aggregate', 'smooth', 'max_levels', 'max_coarse']
try:
from pyamg import smoothed_aggregation_solver
PYAMG_LOADED = True
EIGEN_SOLVERS.append('amg')
except ImportError:
PYAMG_LOADED = False
BAD_EIGEN_SOLVERS['amg'] = """The eigen_solver was set to 'amg',
but pyamg is not available. Please either
install pyamg or use another method."""
[docs]def check_eigen_solver(eigen_solver, solver_kwds, size=None, nvec=None):
"""Check that the selected eigensolver is valid
Parameters
----------
eigen_solver : string
string value to validate
size, nvec : int (optional)
if both provided, use the specified problem size and number of vectors
to determine the optimal method to use with eigen_solver='auto'
Returns
-------
eigen_solver : string
The eigen solver. This only differs from the input if
eigen_solver == 'auto' and `size` is specified.
"""
if eigen_solver in BAD_EIGEN_SOLVERS:
raise ValueError(BAD_EIGEN_SOLVERS[eigen_solver])
elif eigen_solver not in EIGEN_SOLVERS:
raise ValueError("Unrecognized eigen_solver: '{0}'."
"Should be one of: {1}".format(eigen_solver,
EIGEN_SOLVERS))
if size is not None and nvec is not None:
# do some checks of the eigensolver
if eigen_solver == 'lobpcg' and size < 5 * nvec + 1:
warnings.warn("lobpcg does not perform well with small matrices or "
"with large numbers of vectors. Switching to 'dense'")
eigen_solver = 'dense'
solver_kwds = None
elif eigen_solver == 'auto':
if size > 200 and nvec < 10:
if PYAMG_LOADED:
eigen_solver = 'amg'
solver_kwds = None
else:
eigen_solver = 'arpack'
solver_kwds = None
else:
eigen_solver = 'dense'
solver_kwds = None
return eigen_solver, solver_kwds
def _is_symmetric(M, tol = 1e-8):
if sparse.isspmatrix(M):
conditions = np.abs((M - M.T).data) < tol
else:
conditions = np.abs((M - M.T)) < tol
return(np.all(conditions))
[docs]def eigen_decomposition(G, n_components=8, eigen_solver='auto',
random_state=None,
drop_first=True, largest=True, solver_kwds=None):
"""
Function to compute the eigendecomposition of a square matrix.
Parameters
----------
G : array_like or sparse matrix
The square matrix for which to compute the eigen-decomposition.
n_components : integer, optional
The number of eigenvectors to return
eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
'auto' :
attempt to choose the best method for input data (default)
'dense' :
use standard dense matrix operations for the eigenvalue decomposition.
For this method, M must be an array or matrix type.
This method should be avoided for large problems.
'arpack' :
use arnoldi iteration in shift-invert mode. For this method,
M may be a dense matrix, sparse matrix, or general linear operator.
Warning: ARPACK can be unstable for some problems. It is best to
try several random seeds in order to check results.
'lobpcg' :
Locally Optimal Block Preconditioned Conjugate Gradient Method.
A preconditioned eigensolver for large symmetric positive definite
(SPD) generalized eigenproblems.
'amg' :
Algebraic Multigrid solver (requires ``pyamg`` to be installed)
It can be faster on very large, sparse problems, but may also lead
to instabilities.
random_state : int seed, RandomState instance, or None (default)
A pseudo random number generator used for the initialization of the
lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
By default, arpack is used.
solver_kwds : any additional keyword arguments to pass to the selected eigen_solver
Returns
-------
lambdas, diffusion_map : eigenvalues, eigenvectors
"""
n_nodes = G.shape[0]
if drop_first:
n_components = n_components + 1
eigen_solver, solver_kwds = check_eigen_solver(eigen_solver, solver_kwds,
size=n_nodes,
nvec=n_components)
random_state = check_random_state(random_state)
# Convert G to best type for eigendecomposition
if sparse.issparse(G):
if G.getformat() is not 'csr':
G.tocsr()
G = G.astype(np.float)
# Check for symmetry
is_symmetric = _is_symmetric(G)
# Try Eigen Methods:
if eigen_solver == 'arpack':
# This matches the internal initial state used by ARPACK
v0 = random_state.uniform(-1, 1, G.shape[0])
if is_symmetric:
if largest:
which = 'LM'
else:
which = 'SM'
lambdas, diffusion_map = eigsh(G, k=n_components, which=which,
v0=v0,**(solver_kwds or {}))
else:
if largest:
which = 'LR'
else:
which = 'SR'
lambdas, diffusion_map = eigs(G, k=n_components, which=which,
**(solver_kwds or {}))
lambdas = np.real(lambdas)
diffusion_map = np.real(diffusion_map)
elif eigen_solver == 'amg':
# separate amg & lobpcg keywords:
if solver_kwds is not None:
amg_kwds = {}
lobpcg_kwds = solver_kwds.copy()
for kwd in AMG_KWDS:
if kwd in solver_kwds.keys():
amg_kwds[kwd] = solver_kwds[kwd]
del lobpcg_kwds[kwd]
else:
amg_kwds = None
lobpcg_kwds = None
if not is_symmetric:
raise ValueError("lobpcg requires symmetric matrices.")
if not sparse.issparse(G):
warnings.warn("AMG works better for sparse matrices")
# Use AMG to get a preconditioner and speed up the eigenvalue problem.
ml = smoothed_aggregation_solver(check_array(G, accept_sparse = ['csr']),**(amg_kwds or {}))
M = ml.aspreconditioner()
n_find = min(n_nodes, 5 + 2*n_components)
X = random_state.rand(n_nodes, n_find)
X[:, 0] = (G.diagonal()).ravel()
lambdas, diffusion_map = lobpcg(G, X, M=M, largest=largest,**(lobpcg_kwds or {}))
sort_order = np.argsort(lambdas)
if largest:
lambdas = lambdas[sort_order[::-1]]
diffusion_map = diffusion_map[:, sort_order[::-1]]
else:
lambdas = lambdas[sort_order]
diffusion_map = diffusion_map[:, sort_order]
lambdas = lambdas[:n_components]
diffusion_map = diffusion_map[:, :n_components]
elif eigen_solver == "lobpcg":
if not is_symmetric:
raise ValueError("lobpcg requires symmetric matrices.")
n_find = min(n_nodes, 5 + 2*n_components)
X = random_state.rand(n_nodes, n_find)
lambdas, diffusion_map = lobpcg(G, X, largest=largest,**(solver_kwds or {}))
sort_order = np.argsort(lambdas)
if largest:
lambdas = lambdas[sort_order[::-1]]
diffusion_map = diffusion_map[:, sort_order[::-1]]
else:
lambdas = lambdas[sort_order]
diffusion_map = diffusion_map[:, sort_order]
lambdas = lambdas[:n_components]
diffusion_map = diffusion_map[:, :n_components]
elif eigen_solver == 'dense':
if sparse.isspmatrix(G):
G = G.todense()
if is_symmetric:
lambdas, diffusion_map = eigh(G,**(solver_kwds or {}))
else:
lambdas, diffusion_map = eig(G,**(solver_kwds or {}))
if largest:# eigh always returns eigenvalues in ascending order
lambdas = lambdas[::-1] # reverse order the e-values
diffusion_map = diffusion_map[:, ::-1] # reverse order the vectors
lambdas = lambdas[:n_components]
diffusion_map = diffusion_map[:, :n_components]
return (lambdas, diffusion_map)
[docs]def null_space(M, k, k_skip=1, eigen_solver='arpack',
random_state=None, solver_kwds=None):
"""
Find the null space of a matrix M: eigenvectors associated with 0 eigenvalues
Parameters
----------
M : {array, matrix, sparse matrix, LinearOperator}
Input covariance matrix: should be symmetric positive semi-definite
k : integer
Number of eigenvalues/vectors to return
k_skip : integer, optional
Number of low eigenvalues to skip.
eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
'auto' :
algorithm will attempt to choose the best method for input data
'dense' :
use standard dense matrix operations for the eigenvalue decomposition.
For this method, M must be an array or matrix type. This method should be avoided for large problems.
'arpack' :
use arnoldi iteration in shift-invert mode. For this method,
M may be a dense matrix, sparse matrix, or general linear operator.
Warning: ARPACK can be unstable for some problems. It is best to
try several random seeds in order to check results.
'lobpcg' :
Locally Optimal Block Preconditioned Conjugate Gradient Method.
A preconditioned eigensolver for large symmetric positive definite
(SPD) generalized eigenproblems.
'amg' :
AMG requires pyamg to be installed. It can be faster on very large,
sparse problems, but may also lead to instabilities.
random_state: numpy.RandomState or int, optional
The generator or seed used to determine the starting vector for arpack
iterations. Defaults to numpy.random.
solver_kwds : any additional keyword arguments to pass to the selected eigen_solver
Returns
-------
null_space : estimated k vectors of the null space
error : estimated error (sum of eigenvalues)
Notes
-----
dense solver key words: see
http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.linalg.eigh.html
for symmetric problems and
http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.linalg.eig.html#scipy.linalg.eig
for non symmetric problems.
arpack sovler key words: see
http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.linalg.eigsh.html
for symmetric problems and http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.linalg.eigs.html#scipy.sparse.linalg.eigs
for non symmetric problems.
lobpcg solver keywords: see
http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lobpcg.html
amg solver keywords: see
http://pyamg.googlecode.com/svn/branches/1.0.x/Docs/html/pyamg.aggregation.html#module-pyamg.aggregation.aggregation
(Note amg solver uses lobpcg and also accepts lobpcg keywords)
"""
eigen_solver, solver_kwds = check_eigen_solver(eigen_solver, solver_kwds,
size=M.shape[0],
nvec=k + k_skip)
random_state = check_random_state(random_state)
if eigen_solver == 'arpack':
# This matches the internal initial state used by ARPACK
v0 = random_state.uniform(-1, 1, M.shape[0])
try:
eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,
v0=v0,**(solver_kwds or {}))
except RuntimeError as msg:
raise ValueError("Error in determining null-space with ARPACK. "
"Error message: '%s'. "
"Note that method='arpack' can fail when the "
"weight matrix is singular or otherwise "
"ill-behaved. method='dense' is recommended. "
"See online documentation for more information."
% msg)
return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])
elif eigen_solver == 'dense':
if hasattr(M, 'toarray'):
M = M.toarray()
eigen_values, eigen_vectors = eigh(M, eigvals=(0, k+k_skip),overwrite_a=True,
**(solver_kwds or {}))
index = np.argsort(np.abs(eigen_values))
eigen_vectors = eigen_vectors[:, index]
eigen_values = eigen_values[index]
return eigen_vectors[:, k_skip:k+1], np.sum(eigen_values[k_skip:k+1])
# eigen_values, eigen_vectors = eigh(
# M, eigvals=(k_skip, k + k_skip - 1), overwrite_a=True)
# index = np.argsort(np.abs(eigen_values))
# return eigen_vectors[:, index], np.sum(eigen_values)
elif (eigen_solver == 'amg' or eigen_solver == 'lobpcg'):
# M should be positive semi-definite. Add 1 to make it pos. def.
try:
M = sparse.identity(M.shape[0]) + M
n_components = min(k + k_skip + 10, M.shape[0])
eigen_values, eigen_vectors = eigen_decomposition(M, n_components,
eigen_solver = eigen_solver,
drop_first = False,
largest = False,
random_state=random_state,
solver_kwds=solver_kwds)
eigen_values = eigen_values -1
index = np.argsort(np.abs(eigen_values))
eigen_values = eigen_values[index]
eigen_vectors = eigen_vectors[:, index]
return eigen_vectors[:, k_skip:k+1], np.sum(eigen_values[k_skip:k+1])
except np.linalg.LinAlgError: # try again with bigger increase
warnings.warn("LOBPCG failed the first time. Increasing Pos Def adjustment.")
M = 2.0*sparse.identity(M.shape[0]) + M
n_components = min(k + k_skip + 10, M.shape[0])
eigen_values, eigen_vectors = eigen_decomposition(M, n_components,
eigen_solver = eigen_solver,
drop_first = False,
largest = False,
random_state=random_state,
solver_kwds=solver_kwds)
eigen_values = eigen_values - 2
index = np.argsort(np.abs(eigen_values))
eigen_values = eigen_values[index]
eigen_vectors = eigen_vectors[:, index]
return eigen_vectors[:, k_skip:k+1], np.sum(eigen_values[k_skip:k+1])
else:
raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)