"""
Scalable Manifold learning utilities and algorithms.
Graphs are represented with their weighted adjacency matrices, preferably using
sparse matrices.
A note on symmetrization and internal sparse representations
------------------------------------------------------------
For performance, this code uses the FLANN library to compute
approximate neighborhoods efficiently. This means that (1) the
adjacency matrix produced is NOT GUARANTEED to be symmetric and
(2) compute_adjacency_matrix returns a sparse matrix called
adjacency_matrix. adjacency_matrix has 0.0 on the diagonal,
as it should. Implicitly, the missing entries are infinity not
0 for this matrix. But (1) and (2) mean that if one tries to
symmetrize adjacency_matrix, the scipy.sparse code eliminates
the 0.0 entries from adjacency_matrix hence in the affinity
matrix we explicitly set the diagonal to 1.0 for sparse matrices.
We adopted the following convention:
* adjacency_matrix will NOT BE GUARANTEED symmetric
* affinity_matrix will perform a symmetrization by default
* laplacian performs symmetrization
only if symmetrize_input=True (the default setting), and DOES NOT check symmetry
* these conventions are the same for dense matrices, for consistency
"""
# Authors: Marina Meila <mmp@stat.washington.edu>
# James McQueen <jmcq@u.washington.edu>
# LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
from __future__ import division ## removes integer division
import numpy as np
from scipy import sparse
from scipy.special import gammaln
from .adjacency import compute_adjacency_matrix
from .affinity import compute_affinity_matrix
from .laplacian import compute_laplacian_matrix
from ..utils.validation import check_array
sparse_formats = ['csr', 'coo', 'lil', 'bsr', 'dok', 'dia']
distance_error_msg = ("No data matrix exists. "
"Adjacency matrix cannot be computed.")
[docs]class Geometry(object):
"""
The Geometry class stores the data, distance, affinity and laplacian
matrices used by the various embedding methods and is the primary
object passed to embedding functions.
The Geometry class contains functions to compute the aforementioned
matrices and allows for re-computation whenever necessary.
Parameters
----------
adjacency_method : string {'auto', 'brute', 'pyflann', 'cyflann'}
method for computing pairwise radius neighbors graph.
adjacency_kwds : dict
dictionary containing keyword arguments for adjacency matrix.
see distance.py docmuentation for arguments for each method.
If new kwargs are passed to compute_adjacency_matrix then this
dictionary will be updated.
affinity_method : string {'auto', 'gaussian'}
method of computing affinity matrix
affinity_kwds : dict
dictionary containing keyword arguments for affinity matrix.
see affinity.py docmuentation for arguments for each method.
If new kwargs are passed to compute_affinity_matrix then this
dictionary will be updated.
laplacian_method : string,
type of laplacian to be computed. Possibilities are
{'symmetricnormalized', 'geometric', 'renormalized',
'unnormalized', 'randomwalk'} see laplacian.py for more information.
laplacian_kwds : dict
dictionary containing keyword arguments for Laplacian matrix.
see laplacian.py docmuentation for arguments for each method.
If new kwargs are passed to compute_laplacian_matrix then this
dictionary will be updated.
**kwargs :
additional arguments will be parsed and used to override values in
the above dictionaries. For example:
- `affinity_radius` will override `affinity_kwds['radius']`
- `adjacency_n_neighbors` will override `adjacency_kwds['n_neighbors']`
etc.
"""
def __init__(self, adjacency_method='auto', adjacency_kwds=None,
affinity_method='auto', affinity_kwds=None,
laplacian_method='auto',laplacian_kwds=None, **kwargs):
self.adjacency_method = adjacency_method
self.adjacency_kwds = dict(**(adjacency_kwds or {}))
self.affinity_method = affinity_method
self.affinity_kwds = dict(**(affinity_kwds or {}))
self.laplacian_method = laplacian_method
self.laplacian_kwds = dict(**(laplacian_kwds or {}))
# map extra keywords: e.g. affinity_radius -> affinity_kwds['radius']
dicts = dict(adjaceny=self.adjacency_kwds,
affinity=self.affinity_kwds,
laplacian=self.laplacian_kwds)
for key, val in kwargs.items():
keysplit = key.split('_')
if keysplit[0] not in dicts:
raise ValueError('key `{0}` not valid'.format(key))
dicts[keysplit[0]]['_'.join(keysplit[1:])] = val
self.X = None
self.adjacency_matrix = None
self.affinity_matrix = None
self.laplacian_matrix = None
self.laplacian_symmetric = None
self.laplacian_weights = None
[docs] def set_radius(self, radius, override=True, X=None, n_components=2):
"""Set the radius for the adjacency and affinity computation
By default, this will override keyword arguments provided on
initialization.
Parameters
----------
radius : float
radius to set for adjacency and affinity.
override : bool (default: True)
if False, then only set radius if not already defined in
`adjacency_args` and `affinity_args`.
X : ndarray or sparse (optional)
if provided, estimate a suitable radius from this data.
n_components : int (default=2)
the number of components to use when estimating the radius
"""
if radius < 0:
raise ValueError("radius must be non-negative")
if override or ('radius' not in self.adjacency_kwds and
'n_neighbors' not in self.adjacency_kwds):
self.adjacency_kwds['radius'] = radius
if override or ('radius' not in self.affinity_kwds):
self.affinity_kwds['radius'] = radius
[docs] def set_matrix(self, X, input_type):
"""Set the data matrix given the (string) input_type"""
if input_type == 'data':
self.set_data_matrix(X)
elif input_type == 'adjacency':
self.set_adjacency_matrix(X)
elif input_type == 'affinity':
self.set_affinity_matrix(X)
else:
raise ValueError("Unrecognized input_type: {0}".format(input_type))
[docs] def compute_adjacency_matrix(self, copy=False, **kwargs):
"""
This function will compute the adjacency matrix.
In order to acquire the existing adjacency matrix use
self.adjacency_matrix as comptute_adjacency_matrix() will re-compute
the adjacency matrix.
Parameters
----------
copy : boolean, whether to return a copied version of the adjacency matrix
**kwargs : see distance.py docmuentation for arguments for each method.
Returns
-------
self.adjacency_matrix : sparse matrix (N_obs, N_obs)
Non explicit 0.0 values should be considered not connected.
"""
if self.X is None:
raise ValueError(distance_error_msg)
kwds = self.adjacency_kwds.copy()
kwds.update(kwargs)
self.adjacency_matrix = compute_adjacency_matrix(self.X,
self.adjacency_method,
**kwds)
if copy:
return self.adjacency_matrix.copy()
else:
return self.adjacency_matrix
[docs] def compute_affinity_matrix(self, copy=False, **kwargs):
"""
This function will compute the affinity matrix. In order to
acquire the existing affinity matrix use self.affinity_matrix as
comptute_affinity_matrix() will re-compute the affinity matrix.
Parameters
----------
copy : boolean
whether to return a copied version of the affinity matrix
**kwargs :
see affinity.py docmuentation for arguments for each method.
Returns
-------
self.affinity_matrix : sparse matrix (N_obs, N_obs)
contains the pairwise affinity values using the Guassian kernel
and bandwidth equal to the affinity_radius
"""
if self.adjacency_matrix is None:
self.compute_adjacency_matrix()
kwds = self.affinity_kwds.copy()
kwds.update(kwargs)
self.affinity_matrix = compute_affinity_matrix(self.adjacency_matrix,
self.affinity_method,
**kwds)
if copy:
return self.affinity_matrix.copy()
else:
return self.affinity_matrix
[docs] def compute_laplacian_matrix(self, copy=True, return_lapsym=False, **kwargs):
"""
Note: this function will compute the laplacian matrix. In order to acquire
the existing laplacian matrix use self.laplacian_matrix as
comptute_laplacian_matrix() will re-compute the laplacian matrix.
Parameters
----------
copy : boolean, whether to return copied version of the self.laplacian_matrix
return_lapsym : boolean, if True returns additionally the symmetrized version of
the requested laplacian and the re-normalization weights.
**kwargs : see laplacian.py docmuentation for arguments for each method.
Returns
-------
self.laplacian_matrix : sparse matrix (N_obs, N_obs).
The requested laplacian.
self.laplacian_symmetric : sparse matrix (N_obs, N_obs)
The symmetric laplacian.
self.laplacian_weights : ndarray (N_obs,)
The renormalization weights used to make
laplacian_matrix from laplacian_symmetric
"""
if self.affinity_matrix is None:
self.compute_affinity_matrix()
kwds = self.laplacian_kwds.copy()
kwds.update(kwargs)
kwds['full_output'] = return_lapsym
result = compute_laplacian_matrix(self.affinity_matrix,
self.laplacian_method,
**kwds)
if return_lapsym:
(self.laplacian_matrix,
self.laplacian_symmetric,
self.laplacian_weights) = result
else:
self.laplacian_matrix = result
if copy:
return self.laplacian_matrix.copy()
else:
return self.laplacian_matrix
[docs] def set_data_matrix(self, X):
"""
Parameters
----------
X : ndarray (N_obs, N_features)
The original data set to input.
"""
X = check_array(X, accept_sparse=sparse_formats)
self.X = X
[docs] def set_adjacency_matrix(self, adjacency_mat):
"""
Parameters
----------
adjacency_mat : sparse matrix (N_obs, N_obs)
The adjacency matrix to input.
"""
adjacency_mat = check_array(adjacency_mat, accept_sparse=sparse_formats)
if adjacency_mat.shape[0] != adjacency_mat.shape[1]:
raise ValueError("adjacency matrix is not square")
self.adjacency_matrix = adjacency_mat
[docs] def set_affinity_matrix(self, affinity_mat):
"""
Parameters
----------
affinity_mat : sparse matrix (N_obs, N_obs).
The adjacency matrix to input.
"""
affinity_mat = check_array(affinity_mat, accept_sparse=sparse_formats)
if affinity_mat.shape[0] != affinity_mat.shape[1]:
raise ValueError("affinity matrix is not square")
self.affinity_matrix = affinity_mat
[docs] def set_laplacian_matrix(self, laplacian_mat):
"""
Parameters
----------
laplacian_mat : sparse matrix (N_obs, N_obs).
The Laplacian matrix to input.
"""
laplacian_mat = check_array(laplacian_mat, accept_sparse = sparse_formats)
if laplacian_mat.shape[0] != laplacian_mat.shape[1]:
raise ValueError("Laplacian matrix is not square")
self.laplacian_matrix = laplacian_mat
def delete_data_matrix(self):
self.X = None
def delete_adjacency_matrix(self):
self.adjacency_matrix = None
def delete_affinity_matrix(self):
self.affinity_matrix = None
def delete_laplacian_matrix(self):
self.laplacian_matrix = None