# Author: Kyohei Atarashi
# License: BSD-2-Clause
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_random_state, check_array
from sklearn.utils.validation import check_is_fitted
from .utils import next_pow_of_two, get_random_matrix
def _get_random_matrix(distribution):
return lambda rng, size: get_random_matrix(rng, distribution, size)
[docs]class SubsampledRandomHadamard(BaseEstimator, TransformerMixin):
"""Approximates feature maps of the product between random matrix and
feature vectors by Subsampled Randomized Hadamard Transform.
This class can be used as a sub-routine for approximating the product
between random matrix and feature vectors in some random features.
Subsampled Randomized Hadamard Transform uses diagonal matrices, the
Walsh-Hadamard matrix, and submatrix of the identity matrix for
approximating the matrix-vector product.
Parameters
----------
n_components : int (default=100)
Number of Monte Carlo samples per original features.
Equals the dimensionality of the computed (mapped) feature space.
distribution : str or function (default="rademacher")
A function for sampling random bases.
Its arguments must be random_state and size.
For str, "gaussian" (or "normal"), "rademacher", "laplace", or
"uniform" can be used.
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If np.RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by `np.random`.
Attributes
----------
random_weights_ : array, shape (n_features)
The sampled basis.
It is sampled by using self.distribution, which is the rademacher
distribution default.
random_indices_rows_ : array, shape (n_components)
The indices of rows sampled from [0, \ldots, n_features_padded-1]
uniformly, where n_features_padded is the smallest power of two number
that is bigger than n_features.
References
----------
[1] Improved Analysis of the Subsampled Randomized Hadamard Transform.
Joel A Tropp.
Advances in Adaptive Data Analysis,
(https://arxiv.org/pdf/1011.1595.pdf)
"""
def __init__(self, n_components=100, distribution="rademacher",
random_state=None):
self.n_components = n_components
self.distribution = distribution
self.random_state = random_state
[docs] def fit(self, X, y=None):
"""Generate random weights according to n_features.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
self : object
Returns the transformer.
"""
random_state = check_random_state(self.random_state)
X = check_array(X, accept_sparse=True)
n_samples, n_features = X.shape
if isinstance(self.distribution, str):
distribution = _get_random_matrix(self.distribution)
else:
distribution = self.distribution
n_features_padded = next_pow_of_two(n_features)
if n_features_padded < self.n_components:
raise ValueError("n_components is bigger than next power of two "
"of n_features.")
self.random_weights_ = distribution(random_state, n_features)
self.random_weights_ = self.random_weights_.astype(np.float64)
perm = random_state.permutation(n_features_padded).astype(np.int32)
self.random_indices_rows_ = perm[:self.n_components]
return self