Source code for megaman.geometry.geometry

"""
Scalable Manifold learning utilities and algorithms.

Graphs are represented with their weighted adjacency matrices, preferably using
sparse matrices.

A note on symmetrization and internal sparse representations
------------------------------------------------------------

For performance, this code uses the FLANN library to compute
approximate neighborhoods efficiently. This means that (1) the
adjacency matrix produced is NOT GUARANTEED to be symmetric and
(2) compute_adjacency_matrix returns a sparse matrix called
adjacency_matrix. adjacency_matrix has 0.0 on the diagonal,
as it should. Implicitly, the missing entries are infinity not
0 for this matrix. But (1) and (2) mean that if one tries to
symmetrize adjacency_matrix, the scipy.sparse code eliminates
the 0.0 entries from adjacency_matrix hence in the affinity
matrix we explicitly set the diagonal to 1.0 for sparse matrices.

We adopted the following convention:
   * adjacency_matrix will NOT BE GUARANTEED symmetric
   * affinity_matrix will perform a symmetrization by default
   * laplacian performs symmetrization 
     only if symmetrize_input=True (the default setting), and DOES NOT check symmetry
   * these conventions are the same for dense matrices, for consistency
"""

# Authors: Marina Meila <mmp@stat.washington.edu>
#         James McQueen <jmcq@u.washington.edu>
# LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
from __future__ import division ## removes integer division
import numpy as np
from scipy import sparse
from scipy.special import gammaln
from .adjacency import compute_adjacency_matrix
from .affinity import compute_affinity_matrix
from .laplacian import compute_laplacian_matrix
from ..utils.validation import check_array

sparse_formats = ['csr', 'coo', 'lil', 'bsr', 'dok', 'dia']
distance_error_msg = ("No data matrix exists. "
                      "Adjacency matrix cannot be computed.")


[docs]class Geometry(object): """ The Geometry class stores the data, distance, affinity and laplacian matrices used by the various embedding methods and is the primary object passed to embedding functions. The Geometry class contains functions to compute the aforementioned matrices and allows for re-computation whenever necessary. Parameters ---------- adjacency_method : string {'auto', 'brute', 'pyflann', 'cyflann'} method for computing pairwise radius neighbors graph. adjacency_kwds : dict dictionary containing keyword arguments for adjacency matrix. see distance.py docmuentation for arguments for each method. If new kwargs are passed to compute_adjacency_matrix then this dictionary will be updated. affinity_method : string {'auto', 'gaussian'} method of computing affinity matrix affinity_kwds : dict dictionary containing keyword arguments for affinity matrix. see affinity.py docmuentation for arguments for each method. If new kwargs are passed to compute_affinity_matrix then this dictionary will be updated. laplacian_method : string, type of laplacian to be computed. Possibilities are {'symmetricnormalized', 'geometric', 'renormalized', 'unnormalized', 'randomwalk'} see laplacian.py for more information. laplacian_kwds : dict dictionary containing keyword arguments for Laplacian matrix. see laplacian.py docmuentation for arguments for each method. If new kwargs are passed to compute_laplacian_matrix then this dictionary will be updated. **kwargs : additional arguments will be parsed and used to override values in the above dictionaries. For example: - `affinity_radius` will override `affinity_kwds['radius']` - `adjacency_n_neighbors` will override `adjacency_kwds['n_neighbors']` etc. """ def __init__(self, adjacency_method='auto', adjacency_kwds=None, affinity_method='auto', affinity_kwds=None, laplacian_method='auto',laplacian_kwds=None, **kwargs): self.adjacency_method = adjacency_method self.adjacency_kwds = dict(**(adjacency_kwds or {})) self.affinity_method = affinity_method self.affinity_kwds = dict(**(affinity_kwds or {})) self.laplacian_method = laplacian_method self.laplacian_kwds = dict(**(laplacian_kwds or {})) # map extra keywords: e.g. affinity_radius -> affinity_kwds['radius'] dicts = dict(adjaceny=self.adjacency_kwds, affinity=self.affinity_kwds, laplacian=self.laplacian_kwds) for key, val in kwargs.items(): keysplit = key.split('_') if keysplit[0] not in dicts: raise ValueError('key `{0}` not valid'.format(key)) dicts[keysplit[0]]['_'.join(keysplit[1:])] = val self.X = None self.adjacency_matrix = None self.affinity_matrix = None self.laplacian_matrix = None self.laplacian_symmetric = None self.laplacian_weights = None
[docs] def set_radius(self, radius, override=True, X=None, n_components=2): """Set the radius for the adjacency and affinity computation By default, this will override keyword arguments provided on initialization. Parameters ---------- radius : float radius to set for adjacency and affinity. override : bool (default: True) if False, then only set radius if not already defined in `adjacency_args` and `affinity_args`. X : ndarray or sparse (optional) if provided, estimate a suitable radius from this data. n_components : int (default=2) the number of components to use when estimating the radius """ if radius < 0: raise ValueError("radius must be non-negative") if override or ('radius' not in self.adjacency_kwds and 'n_neighbors' not in self.adjacency_kwds): self.adjacency_kwds['radius'] = radius if override or ('radius' not in self.affinity_kwds): self.affinity_kwds['radius'] = radius
[docs] def set_matrix(self, X, input_type): """Set the data matrix given the (string) input_type""" if input_type == 'data': self.set_data_matrix(X) elif input_type == 'adjacency': self.set_adjacency_matrix(X) elif input_type == 'affinity': self.set_affinity_matrix(X) else: raise ValueError("Unrecognized input_type: {0}".format(input_type))
[docs] def compute_adjacency_matrix(self, copy=False, **kwargs): """ This function will compute the adjacency matrix. In order to acquire the existing adjacency matrix use self.adjacency_matrix as comptute_adjacency_matrix() will re-compute the adjacency matrix. Parameters ---------- copy : boolean, whether to return a copied version of the adjacency matrix **kwargs : see distance.py docmuentation for arguments for each method. Returns ------- self.adjacency_matrix : sparse matrix (N_obs, N_obs) Non explicit 0.0 values should be considered not connected. """ if self.X is None: raise ValueError(distance_error_msg) kwds = self.adjacency_kwds.copy() kwds.update(kwargs) self.adjacency_matrix = compute_adjacency_matrix(self.X, self.adjacency_method, **kwds) if copy: return self.adjacency_matrix.copy() else: return self.adjacency_matrix
[docs] def compute_affinity_matrix(self, copy=False, **kwargs): """ This function will compute the affinity matrix. In order to acquire the existing affinity matrix use self.affinity_matrix as comptute_affinity_matrix() will re-compute the affinity matrix. Parameters ---------- copy : boolean whether to return a copied version of the affinity matrix **kwargs : see affinity.py docmuentation for arguments for each method. Returns ------- self.affinity_matrix : sparse matrix (N_obs, N_obs) contains the pairwise affinity values using the Guassian kernel and bandwidth equal to the affinity_radius """ if self.adjacency_matrix is None: self.compute_adjacency_matrix() kwds = self.affinity_kwds.copy() kwds.update(kwargs) self.affinity_matrix = compute_affinity_matrix(self.adjacency_matrix, self.affinity_method, **kwds) if copy: return self.affinity_matrix.copy() else: return self.affinity_matrix
[docs] def compute_laplacian_matrix(self, copy=True, return_lapsym=False, **kwargs): """ Note: this function will compute the laplacian matrix. In order to acquire the existing laplacian matrix use self.laplacian_matrix as comptute_laplacian_matrix() will re-compute the laplacian matrix. Parameters ---------- copy : boolean, whether to return copied version of the self.laplacian_matrix return_lapsym : boolean, if True returns additionally the symmetrized version of the requested laplacian and the re-normalization weights. **kwargs : see laplacian.py docmuentation for arguments for each method. Returns ------- self.laplacian_matrix : sparse matrix (N_obs, N_obs). The requested laplacian. self.laplacian_symmetric : sparse matrix (N_obs, N_obs) The symmetric laplacian. self.laplacian_weights : ndarray (N_obs,) The renormalization weights used to make laplacian_matrix from laplacian_symmetric """ if self.affinity_matrix is None: self.compute_affinity_matrix() kwds = self.laplacian_kwds.copy() kwds.update(kwargs) kwds['full_output'] = return_lapsym result = compute_laplacian_matrix(self.affinity_matrix, self.laplacian_method, **kwds) if return_lapsym: (self.laplacian_matrix, self.laplacian_symmetric, self.laplacian_weights) = result else: self.laplacian_matrix = result if copy: return self.laplacian_matrix.copy() else: return self.laplacian_matrix
[docs] def set_data_matrix(self, X): """ Parameters ---------- X : ndarray (N_obs, N_features) The original data set to input. """ X = check_array(X, accept_sparse=sparse_formats) self.X = X
[docs] def set_adjacency_matrix(self, adjacency_mat): """ Parameters ---------- adjacency_mat : sparse matrix (N_obs, N_obs) The adjacency matrix to input. """ adjacency_mat = check_array(adjacency_mat, accept_sparse=sparse_formats) if adjacency_mat.shape[0] != adjacency_mat.shape[1]: raise ValueError("adjacency matrix is not square") self.adjacency_matrix = adjacency_mat
[docs] def set_affinity_matrix(self, affinity_mat): """ Parameters ---------- affinity_mat : sparse matrix (N_obs, N_obs). The adjacency matrix to input. """ affinity_mat = check_array(affinity_mat, accept_sparse=sparse_formats) if affinity_mat.shape[0] != affinity_mat.shape[1]: raise ValueError("affinity matrix is not square") self.affinity_matrix = affinity_mat
[docs] def set_laplacian_matrix(self, laplacian_mat): """ Parameters ---------- laplacian_mat : sparse matrix (N_obs, N_obs). The Laplacian matrix to input. """ laplacian_mat = check_array(laplacian_mat, accept_sparse = sparse_formats) if laplacian_mat.shape[0] != laplacian_mat.shape[1]: raise ValueError("Laplacian matrix is not square") self.laplacian_matrix = laplacian_mat
def delete_data_matrix(self): self.X = None def delete_adjacency_matrix(self): self.adjacency_matrix = None def delete_affinity_matrix(self): self.affinity_matrix = None def delete_laplacian_matrix(self): self.laplacian_matrix = None