"""Riemannian Metric learning utilities and algorithms.
To use the "geometric" Laplacian from geometry.py for statistically
consistent results.
"""
# Author: Marina Meila <mmp@stat.washington.edu>
# after the Matlab function rmetric.m by Dominique Perrault-Joncas
# LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
import warnings
import numpy as np
from scipy import sparse
from numpy.linalg import eigh, svd, inv
[docs]def riemann_metric( Y, laplacian=None, n_dim=None, invert_h=False, mode_inv = 'svd'):
"""
Parameters
----------
Y: array-like, shape = (n_samples, mdimY )
The embedding coordinates of the points
laplacian: array-like, shape = (n_samples, n_samples)
The Laplacian of the data. It is recommended to use the "geometric"
Laplacian (default) option from geometry.graph_laplacian()
n_dim : integer, optional
Use only the first n_dim <= mdimY dimensions.All dimensions
n_dim:mdimY are ignored.
invert_h: boolean, optional
if False, only the "dual Riemannian metric" is computed
if True, the dual metric matrices are inverted to obtain the
Riemannian metric G.
mode_inv: string, optional
How to compute the inverses of h_dual_metric, if invert_h
"inv", use numpy.inv()
"svd" (default), use numpy.linalg.svd(), then invert the eigenvalues
(possibly a more numerically stable method with H is symmetric and
ill conditioned)
Returns
-------
h_dual_metric : array, shape=(n_samples, n_dim, n_dim)
Optionally :
g_riemann_metric : array, shape=(n_samples, n_dim, n_dim )
Hvv : singular vectors of H, transposed, shape = ( n_samples, n_dim, n_dim )
Hsvals : singular values of H, shape = ( n_samples, n_dim )
Gsvals : singular values of G, shape = ( n_samples, n_dim )
Notes
-----
References
----------
"Non-linear dimensionality reduction: Riemannian metric estimation and
the problem of geometric discovery",
Dominique Perraul-Joncas, Marina Meila, arXiv:1305.7255
"""
n_samples = laplacian.shape[0]
h_dual_metric = np.zeros((n_samples, n_dim, n_dim ))
for i in np.arange(n_dim ):
for j in np.arange(i,n_dim ):
yij = Y[:,i]*Y[:,j]
h_dual_metric[ :, i, j] = 0.5*(laplacian.dot(yij)-Y[:,j]*laplacian.dot(Y[:,i])-Y[:,i]*laplacian.dot(Y[:,j]))
for j in np.arange(n_dim-1):
for i in np.arange(j+1,n_dim):
h_dual_metric[ :,i,j] = h_dual_metric[:,j,i]
# compute rmetric if requested
if( invert_h ):
riemann_metric, Hvv, Hsvals, Gsvals = compute_G_from_H( h_dual_metric )
else:
riemann_metric = Hvv = Hsvals = Gsvals = None
return h_dual_metric, riemann_metric, Hvv, Hsvals, Gsvals
[docs]def riemann_metric_lazy( Y, sample,laplacian, n_dim, invert_h=False, mode_inv = 'svd'):
"""
Parameters
----------
Y: array-like, shape = (n_samples, mdimY )
The embedding coordinates of the points
sample : array-like, shape (n_samples)
array of indices over which to calculate the riemannian metric.
laplacian: array-like, shape = (n_samples, n_samples)
The Laplacian of the data. It is recommended to use the "geometric"
Laplacian (default) option from geometry.graph_laplacian()
n_dim : integer, optional
Use only the first n_dim <= mdimY dimensions.All dimensions
n_dim:mdimY are ignored.
invert_h: boolean, optional
if False, only the "dual Riemannian metric" is computed
if True, the dual metric matrices are inverted to obtain the
Riemannian metric G.
mode_inv: string, optional
How to compute the inverses of h_dual_metric, if invert_h
"inv", use numpy.inv()
"svd" (default), use numpy.linalg.svd(), then invert the eigenvalues
(possibly a more numerically stable method with H is symmetric and
ill conditioned)
Returns
-------
h_dual_metric : array, shape=(n_samples, n_dim, n_dim)
Optionally :
g_riemann_metric : array, shape=(n_samples, n_dim, n_dim )
Hvv : singular vectors of H, transposed, shape = ( n_samples, n_dim, n_dim )
Hsvals : singular values of H, shape = ( n_samples, n_dim )
Gsvals : singular values of G, shape = ( n_samples, n_dim )
Notes
-----
References
----------
"Non-linear dimensionality reduction: Riemannian metric estimation and
the problem of geometric discovery",
Dominique Perraul-Joncas, Marina Meila, arXiv:1305.7255
"""
n_samples = laplacian.shape[0]
laplacian = laplacian[sample,:]
h_dual_metric = np.zeros((len(sample), n_dim, n_dim))
for i in np.arange(n_dim ):
for j in np.arange(i,n_dim ):
yij = Y[:,i]*Y[:,j]
h_dual_metric[ :, i, j] = 0.5*(laplacian.dot(yij)-Y[sample,j]*laplacian.dot(Y[:,i])-Y[sample,i]*laplacian.dot(Y[:,j]))
for j in np.arange(n_dim-1):
for i in np.arange(j+1,n_dim):
h_dual_metric[ :,i,j] = h_dual_metric[:,j,i]
if( invert_h ):
riemann_metric, Hvv, Hsvals, Gsvals = compute_G_from_H( h_dual_metric )
else:
riemann_metric = Hvv = Hsvals = Gsvals = None
return h_dual_metric,riemann_metric, Hvv, Hsvals, Gsvals
[docs]def compute_G_from_H( H, mdimG = None, mode_inv = "svd" ):
"""
Parameters
----------
H : the inverse R. Metric
if mode_inv == 'svd':
also returns Hvv, Hsvals, Gsvals the (transposed) eigenvectors of
H and the singular values of H and G
if mdimG < H.shape[2]:
G.shape = [ n_samples, mdimG, mdimG ] with n_samples = H.shape[0]
Notes
------
currently Hvv, Hsvals are n_dim = H.shape[2], and Gsvals, G are mdimG
(This contradicts the documentation of riemann_metric which states
that riemann_metric and h_dual_metric have the same dimensions)
See the notes in RiemannMetric
"""
n_samples = H.shape[0]
n_dim = H.shape[2]
if mode_inv is 'svd':
Huu, Hsvals, Hvv = np.linalg.svd( H )
if mdimG is None:
Gsvals = 1./Hsvals
G = np.zeros((n_samples, n_dim, n_dim))
for i in np.arange(n_samples):
G[i,:,:] = np.dot(Huu[i,:,:], np.dot( np.diag(Gsvals[i,:]), Hvv[i,:,:]))
elif mdimG < n_dim:
Gsvals[:,:mdimG] = 1./Hsvals[:,:mdimG]
Gsvals[:,mdimG:] = 0.
# this can be redone with np.einsum() but it's barbaric
G = np.zeros((n_samples, mdimG, mdimG))
for i in np.arange(n_samples):
G[i,:,:mdimG] = np.dot(Huu[i,:,mdimG], np.dot( np.diag(Gsvals[i,:mdimG]), Hvv[i,:,:mdimG]))
else:
raise ValueError('mdimG must be <= H.shape[1]')
return G, Hvv, Hsvals, Gsvals
else:
riemann_metric = np.linalg.inv(h_dual_metric)
return riemann_metric, None, None, None
[docs]class RiemannMetric(object):
"""
RiemannMetric computes and stores the Riemannian metric and its dual
associated with an embedding Y. The Riemannian metric is currently denoted
by G, its dual by H, and the Laplacian by L. G at each point is the
matrix inverse of H.
For performance, the following choices have been made:
* the class makes no defensive copies of L, Y
* no defensive copies of the array attributes H, G, Hvv, ....
* G is computed on request only
In the future, this class will be extended to compute H only once,
for mdimY dimensions, but to store multiple G's, with different dimensions.
In the near future plans is also a "lazy" implementation, which will
compute G (and maybe even H) only at the requested points.
Parameters
-----------
Y : embedding coordinates, shape = (n, mdimY)
laplacian : estimated laplacian from data shape = (n, n)
n_dim : the manifold domension
mod_inv : if mode_inv = svd, also returns Hvv, Hsvals,
Gsvals the (transposed) eigenvectors of
H and the singular values of H and G
Returns
----------
mdimG : dimension of G, H
mdimY : dimension of Y
H : dual Riemann metric, shape = (n, mdimY, mdimY)
G : Riemann metric, shape = (n, mdimG, mdimG)
Hvv : (transposed) singular vectors of H, shape = (n, mdimY, mdimY)
Hsvals : singular values of H, shape = (n, mdimY)
Gsvals : singular values of G, shape = (n, mdimG)
detG : determinants of G, shape = (n,1)
Notes
-----
H is always computed at full dimension self.mdimY
G is computed at mdimG (some contradictions persist here)
References
----------
"Non-linear dimensionality reduction: Riemannian metric estimation and
the problem of geometric discovery",
Dominique Perraul-Joncas, Marina Meila, arXiv:1305.7255
"""
def __init__(self, Y, laplacian, n_dim=None, mode_inv='svd'):
# input data
self.Y = Y
self.n, self.mdimY = Y.shape
self.L = laplacian
# input params
if n_dim is None:
self.mdimG = self.mdimY
else:
if n_dim > self.mdimY:
raise ValueError('n_dim must be <= Y.shape[1]')
self.mdimG = n_dim # dimension of the riemann_metric computed
self.mode_inv = mode_inv
if self.mode_inv not in set(('svd', 'inv')):
raise ValueError(("%s is not a valid value. Expected "
"'svd', 'inv'") % self.mode_inv)
# results and outputs
self.H = None
self.G = None
self.Hvv = None
self.Hsvals = None
self.Gsvals = None
self.detG = None
[docs] def get_dual_rmetric( self, invert_h = False, mode_inv = 'svd' ):
"""
Compute the dual Riemannian Metric
This is not satisfactory, because if mdimG<mdimY the shape of H
will not be the same as the shape of G. TODO(maybe): return a (copied)
smaller H with only the rows and columns in G.
"""
if self.H is None:
self.H, self.G, self.Hvv, self.Hsvals, self.Gsvals = riemann_metric(self.Y, self.L, self.mdimG, invert_h = invert_h, mode_inv = mode_inv)
if invert_h:
return self.H, self.G
else:
return self.H
[docs] def get_rmetric( self, mode_inv = 'svd', return_svd = False ):
"""
Compute the Reimannian Metric
"""
if self.H is None:
self.H, self.G, self.Hvv, self.Hsval = riemann_metric(self.Y, self.L, self.mdimG, invert_h = True, mode_inv = mode_inv)
if self.G is None:
self.G, self.Hvv, self.Hsvals, self.Gsvals = compute_G_from_H( self.H, mode_inv = self.mode_inv )
if mode_inv is 'svd' and return_svd:
return self.G, self.Hvv, self.Hsvals, self.Gsvals
else:
return self.G
def get_mdimG(self):
return self.mdimG
def get_detG(self):
if self.G is None:
self.H, self.G, self.Hvv, self.Hsval = riemann_metric(self.Y, self.L, self.mdimG, invert_h = True, mode_inv = self.mode_inv)
if self.detG is None:
if self.mdimG == self.mdimY:
self.detG = 1./np.linalg.det(H)
else:
self.detG = 1./np.linalg.det(H[:,:mdimG,:mdimG])
# could also be prod eigenvalues
# inefficient? shall I do it on G?