# Author: Kyohei Atarashi
# License: BSD-2-Clause
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_random_state, check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.extmath import safe_sparse_dot
from math import sqrt
from .utils import get_random_matrix
def _get_random_matrix(distribution):
return lambda rng, size, p: get_random_matrix(rng, distribution, size, p)
[docs]class RandomProjection(BaseEstimator, TransformerMixin):
"""Approximates feature map of the linear product kernel by Random Projection.
Parameters
----------
n_components : int (default=100)
Number of Monte Carlo samples per original features.
Equals the dimensionality of the computed (mapped) feature space.
distribution : str or function (default="rademacher")
A function for sampling random bases.
Its arguments must be random_state and size.
If None, the Rademacher distribution is used.
p_sparse : float (default="auto")
Sparsity parameter for "sparse_rademacher" distribution.
If p_sparse = 0, "sparse_rademacher" is equivalent to "rademacher".
The relationship between p_sparse and s in [1] is s = 1/(1-p).
If auto, p_sparse = 1 - 1/sqrt(n_features), recommended in [1].
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If np.RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by `np.random`.
Attributes
----------
random_weights_ : array, or sparse matrix, shape (n_features, n_components)
The sampled basis.
References
-----------
[1] Very Sparse Random Projections.
Ping Li, T. Hastie, and K. W. Church.
In KDD 2006.
(https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf)
"""
def __init__(self, n_components=100, distribution="rademacher",
p_sparse="auto", random_state=None):
self.n_components = n_components
self.distribution = distribution
self.p_sparse = p_sparse
self.random_state = random_state
[docs] def fit(self, X, y=None):
"""Generate random weights according to n_features.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
self : object
Returns the transformer.
"""
random_state = check_random_state(self.random_state)
X = check_array(X, accept_sparse=True)
n_samples, n_features = X.shape
size = (n_features, self.n_components)
if self.p_sparse == "auto":
p_sparse = 1 - 1./np.sqrt(n_features)
else:
if isinstance(self.p_sparse, float):
if 1. > self.p_sparse >= 0:
p_sparse = self.p_sparse
else:
raise ValueError("p_sparse must be in [0, 1), but got {}"
.format(self.p_sparse))
else:
raise TypeError("p_sparse is 'auto' or float in [0, 1), but "
"got type {}".format(type(self.p_sparse)))
if isinstance(self.distribution, str):
distribution = _get_random_matrix(self.distribution)
else:
distribution = self.distribution
self.random_weights_ = distribution(random_state, size, p_sparse)
return self
def _remove_bases(self, indices):
self.random_weights_ = np.delete(self.random_weights_, indices, axis=1)
self.n_components = self.random_weights_.shape[1]
return True