Source code for pyrfm.random_feature.random_projection

# Author: Kyohei Atarashi
# License: BSD-2-Clause

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_random_state, check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.extmath import safe_sparse_dot
from math import sqrt
from .utils import get_random_matrix


def _get_random_matrix(distribution):
    return lambda rng, size, p: get_random_matrix(rng, distribution, size, p)


[docs]class RandomProjection(BaseEstimator, TransformerMixin): """Approximates feature map of the linear product kernel by Random Projection. Parameters ---------- n_components : int (default=100) Number of Monte Carlo samples per original features. Equals the dimensionality of the computed (mapped) feature space. distribution : str or function (default="rademacher") A function for sampling random bases. Its arguments must be random_state and size. If None, the Rademacher distribution is used. p_sparse : float (default="auto") Sparsity parameter for "sparse_rademacher" distribution. If p_sparse = 0, "sparse_rademacher" is equivalent to "rademacher". The relationship between p_sparse and s in [1] is s = 1/(1-p). If auto, p_sparse = 1 - 1/sqrt(n_features), recommended in [1]. random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If np.RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Attributes ---------- random_weights_ : array, or sparse matrix, shape (n_features, n_components) The sampled basis. References ----------- [1] Very Sparse Random Projections. Ping Li, T. Hastie, and K. W. Church. In KDD 2006. (https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf) """ def __init__(self, n_components=100, distribution="rademacher", p_sparse="auto", random_state=None): self.n_components = n_components self.distribution = distribution self.p_sparse = p_sparse self.random_state = random_state
[docs] def fit(self, X, y=None): """Generate random weights according to n_features. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- self : object Returns the transformer. """ random_state = check_random_state(self.random_state) X = check_array(X, accept_sparse=True) n_samples, n_features = X.shape size = (n_features, self.n_components) if self.p_sparse == "auto": p_sparse = 1 - 1./np.sqrt(n_features) else: if isinstance(self.p_sparse, float): if 1. > self.p_sparse >= 0: p_sparse = self.p_sparse else: raise ValueError("p_sparse must be in [0, 1), but got {}" .format(self.p_sparse)) else: raise TypeError("p_sparse is 'auto' or float in [0, 1), but " "got type {}".format(type(self.p_sparse))) if isinstance(self.distribution, str): distribution = _get_random_matrix(self.distribution) else: distribution = self.distribution self.random_weights_ = distribution(random_state, size, p_sparse) return self
[docs] def transform(self, X): """Apply the approximate feature map to X. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ check_is_fitted(self, "random_weights_") X = check_array(X, accept_sparse=True) output = safe_sparse_dot(X, self.random_weights_, True) return output / sqrt(self.n_components)
def _remove_bases(self, indices): self.random_weights_ = np.delete(self.random_weights_, indices, axis=1) self.n_components = self.random_weights_.shape[1] return True