Source code for pyoselm.oselm

"""Module to build Online Sequential Extreme Learning Machine (OS-ELM) models"""

# ===================================================
# Author: Leandro Ferrado
# Copyright(c) 2018
# License: Apache License 2.0
# ===================================================

import warnings

import numpy as np
from scipy.linalg import pinv2
from scipy.sparse import eye
from scipy.special import softmax
from sklearn.base import RegressorMixin, BaseEstimator
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import as_float_array
from sklearn.utils.extmath import safe_sparse_dot

from pyoselm.layer import MLPRandomLayer


__all__ = [
    "OSELMRegressor",
    "OSELMClassifier",
]


def multiple_safe_sparse_dot(*matrices):
    """
    Make safe_sparse_dot() calls over multiple matrices

    Parameters
    ----------
    matrices: iterable of matrices

    Returns
    -------
    dot_product : array or sparse matrix
    """
    if len(matrices) < 2:
        raise ValueError("Argument 'matrices' must have at least 2 matrices")

    r = matrices[0]
    for m in matrices[1:]:
        r = safe_sparse_dot(r, m)

    return r


[docs]class OSELMRegressor(BaseEstimator, RegressorMixin): """ OSELMRegressor is a regressor based on Online Sequential Extreme Learning Machine (OS-ELM). This type of model is an ELM that.... ... [1][2] Parameters ---------- `n_hidden` : int, optional (default=20) Number of units to generate in the SimpleRandomLayer `activation_func` : {callable, string} optional (default='sigmoid') Function used to transform input activation It must be one of 'tanh', 'sine', 'tribas', 'inv_tribase', 'sigmoid', 'hardlim', 'softlim', 'gaussian', 'multiquadric', 'inv_multiquadric' or a callable. If none is given, 'tanh' will be used. If a callable is given, it will be used to compute the hidden unit activations. `activation_args` : dictionary, optional (default=None) Supplies keyword arguments for a callable activation_func `use_woodbury` : bool, optional (default=False) Flag to indicate if Woodbury formula should be used for the fit step, or just the traditional iterative procedure. Not recommended if handling large datasets. `random_state` : int, RandomState instance or None (default=None) Control the pseudo random number generator used to generate the hidden unit weights at fit time. Attributes ---------- `P` : np.array ... `beta` : np.array ... See Also -------- ELMRegressor, MLPRandomLayer References ---------- .. [1] http://www.extreme-learning-machines.org .. [2] G.-B. Huang, Q.-Y. Zhu and C.-K. Siew, "Extreme Learning Machine: Theory and Applications", Neurocomputing, vol. 70, pp. 489-501, 2006. """
[docs] def __init__(self, n_hidden=20, activation_func='sigmoid', activation_args=None, use_woodbury=False, random_state=123,): self.n_hidden = n_hidden self.random_state = random_state self.activation_func = activation_func self.activation_args = activation_args self.use_woodbury = use_woodbury self.P = None self.beta = None
def _create_random_layer(self): """Pass init params to MLPRandomLayer""" return MLPRandomLayer(n_hidden=self.n_hidden, random_state=self.random_state, activation_func=self.activation_func, activation_args=self.activation_args) def _fit_woodbury(self, X, y): """Compute learning step using Woodbury formula""" # fit random hidden layer and compute the hidden layer activations H = self._create_random_layer().fit_transform(X) y = as_float_array(y, copy=True) if self.beta is None: # this is the first time the model is fitted if len(X) < self.n_hidden: raise ValueError("The first time the model is fitted, " "X must have at least equal number of " "samples than n_hidden value!") self.P = pinv2(safe_sparse_dot(H.T, H)) self.beta = multiple_safe_sparse_dot(self.P, H.T, y) else: if len(H) > 10e3: warnings.warn("Large input of %i rows and use_woodbury=True "\ "may throw OOM errors" % len(H)) M = eye(len(H)) + multiple_safe_sparse_dot(H, self.P, H.T) self.P -= multiple_safe_sparse_dot(self.P, H.T, pinv2(M), H, self.P) e = y - safe_sparse_dot(H, self.beta) self.beta += multiple_safe_sparse_dot(self.P, H.T, e) def _fit_iterative(self, X, y): """Compute learning step using iterative procedure""" # fit random hidden layer and compute the hidden layer activations H = self._create_random_layer().fit_transform(X) y = as_float_array(y, copy=True) if self.beta is None: # this is the first time the model is fitted if len(X) < self.n_hidden: raise ValueError("The first time the model is fitted, " "X must have at least equal number of " "samples than n_hidden value!") self.P = safe_sparse_dot(H.T, H) P_inv = pinv2(self.P) self.beta = multiple_safe_sparse_dot(P_inv, H.T, y) else: self.P += safe_sparse_dot(H.T, H) P_inv = pinv2(self.P) e = y - safe_sparse_dot(H, self.beta) self.beta = self.beta + multiple_safe_sparse_dot(P_inv, H.T, e)
[docs] def fit(self, X, y): """ Fit the model using X, y as training data. Notice that this function could be used for n_samples==1 (online learning), except for the first time the model is fitted, where it needs at least as many rows as 'n_hidden' configured. Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like of shape [n_samples, n_outputs] Target values (class labels in classification, real numbers in regression) Returns ------- self : object Returns an instance of self. """ if self.use_woodbury: self._fit_woodbury(X, y) else: self._fit_iterative(X, y) return self
[docs] def partial_fit(self, X, y): """ Fit the model using X, y as training data. Alias for fit() method. Notice that this function could be used for n_samples==1 (online learning), except for the first time the model is fitted, where it needs at least as many rows as 'n_hidden' configured. Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like of shape [n_samples, n_outputs] Target values (class labels in classification, real numbers in regression) Returns ------- self : object Returns an instance of self. """ return self.fit(X, y)
@property def is_fitted(self): """Check if model was fitted Returns ------- boolean, True if model is fitted """ return self.beta is not None
[docs] def predict(self, X): """ Predict values using the model Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Returns ------- C : numpy array of shape [n_samples, n_outputs] Predicted values. """ if not self.is_fitted: raise ValueError("OSELMRegressor not fitted") # compute hidden layer activations H = self._create_random_layer().fit_transform(X) # compute output predictions for new hidden activations predictions = safe_sparse_dot(H, self.beta) return predictions
[docs]class OSELMClassifier(OSELMRegressor): """ OSELMClassifier is a classifier based on the Extreme Learning Machine. An Extreme Learning Machine (ELM) is a single layer feedforward network with a random hidden layer components and ordinary linear least squares fitting of the hidden->output weights by default. [1][2] OSELMClassifier is an OSELMRegressor subclass that first binarizes the data, then uses the superclass to compute the decision function that is then unbinarized to yield the prediction. The params for the RandomLayer used in the input transform are exposed in the ELMClassifier constructor. Parameters ---------- `n_hidden` : int, optional (default=20) Number of units to generate in the SimpleRandomLayer `activation_func` : {callable, string} optional (default='sigmoid') Function used to transform input activation It must be one of 'tanh', 'sine', 'tribas', 'inv_tribase', 'sigmoid', 'hardlim', 'softlim', 'gaussian', 'multiquadric', 'inv_multiquadric' or a callable. If none is given, 'tanh' will be used. If a callable is given, it will be used to compute the hidden unit activations. `activation_args` : dictionary, optional (default=None) Supplies keyword arguments for a callable activation_func `random_state` : int, RandomState instance or None (default=None) Control the pseudo random number generator used to generate the hidden unit weights at fit time. Attributes ---------- `classes_` : numpy array of shape [n_classes] Array of class labels See Also -------- ELMRegressor, OSELMRegressor, MLPRandomLayer References ---------- .. [1] http://www.extreme-learning-machines.org .. [2] G.-B. Huang, Q.-Y. Zhu and C.-K. Siew, "Extreme Learning Machine: Theory and Applications", Neurocomputing, vol. 70, pp. 489-501, 2006. """
[docs] def __init__(self, n_hidden=20, activation_func='sigmoid', activation_args=None, binarizer=LabelBinarizer(neg_label=-1, pos_label=1), use_woodbury=False, random_state=123): super(OSELMClassifier, self).__init__(n_hidden=n_hidden, random_state=random_state, activation_func=activation_func, activation_args=activation_args, use_woodbury=use_woodbury) self.classes_ = None self.binarizer = binarizer
[docs] def decision_function(self, X): """ This function return the decision function values related to each class on an array of test vectors X. Parameters ---------- X : array-like of shape [n_samples, n_features] Returns ------- C : array of shape [n_samples, n_classes] or [n_samples,] Decision function values related to each class, per sample. In the two-class case, the shape is [n_samples,] """ return super(OSELMClassifier, self).predict(X)
[docs] def fit(self, X, y): """ Fit the model using X, y as training data. Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like of shape [n_samples, n_outputs] Target values (class labels in classification, real numbers in regression) Returns ------- self : object Returns an instance of self. """ if not self.is_fitted: self.classes_ = np.unique(y) y_bin = self.binarizer.fit_transform(y) else: y_bin = self.binarizer.transform(y) super(OSELMClassifier, self).fit(X, y_bin) return self
[docs] def predict(self, X): """ Predict class values using the model Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Returns ------- C : numpy array of shape [n_samples, n_outputs] Predicted class values. """ if not self.is_fitted: raise ValueError("OSELMClassifier not fitted") raw_predictions = self.decision_function(X) class_predictions = self.binarizer.inverse_transform(raw_predictions) return class_predictions
[docs] def predict_proba(self, X): """ Predict probability values using the model Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Returns ------- P : numpy array of shape [n_samples, n_outputs] Predicted probability values. """ if not self.is_fitted: raise ValueError("OSELMClassifier not fitted") raw_predictions = self.decision_function(X) # using softmax to translate raw predictions into probability values proba_predictions = softmax(raw_predictions) return proba_predictions
[docs] def score(self, X, y, **kwargs): """Force use of accuracy score since it doesn't inherit from ClassifierMixin""" return accuracy_score(y, self.predict(X))