How to use scikit-learn - 10 common examples

To help you get started, we’ve selected a few scikit-learn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None,
             intercept_init=None, sample_weight=None):
        if hasattr(self, "classes_"):
            self.classes_ = None

        X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
        n_samples, n_features = X.shape

        # labels can be encoded as float, int, or string literals
        # np.unique sorts in asc order; largest class id is positive class
        classes = np.unique(y)

        if self.warm_start and self.coef_ is not None:
            if coef_init is None:
                coef_init = self.coef_
            if intercept_init is None:
                intercept_init = self.intercept_
        else:
            self.coef_ = None
            self.intercept_ = None

        if self.average > 0:
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
                     n_iter, sample_weight,
                     coef_init, intercept_init):
        X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64)
        y = astype(y, np.float64, copy=False)

        n_samples, n_features = X.shape

        self._validate_params()

        # Allocate datastructures from input arguments
        sample_weight = self._validate_sample_weight(sample_weight, n_samples)

        if self.coef_ is None:
            self._allocate_parameter_mem(1, n_features,
                                         coef_init, intercept_init)
        elif n_features != self.coef_.shape[-1]:
            raise ValueError("Number of features %d does not match previous "
                             "data %d." % (n_features, self.coef_.shape[-1]))
        if self.average > 0 and self.average_coef_ is None:
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _partial_fit(self, X, y, alpha, C,
                     loss, learning_rate, n_iter,
                     classes, sample_weight,
                     coef_init, intercept_init):
        X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")

        n_samples, n_features = X.shape

        self._validate_params()
        _check_partial_fit_first_call(self, classes)

        n_classes = self.classes_.shape[0]

        # Allocate datastructures from input arguments
        self._expanded_class_weight = compute_class_weight(self.class_weight,
                                                           self.classes_, y)
        sample_weight = self._validate_sample_weight(sample_weight, n_samples)

        if self.coef_ is None or coef_init is not None:
            self._allocate_parameter_mem(n_classes, n_features,
                                         coef_init, intercept_init)
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _partial_fit(self, X, y, alpha, C,
                     loss, learning_rate, n_iter,
                     classes, sample_weight,
                     coef_init, intercept_init):
        X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")

        n_samples, n_features = X.shape

        self._validate_params()
        _check_partial_fit_first_call(self, classes)

        n_classes = self.classes_.shape[0]

        # Allocate datastructures from input arguments
        self._expanded_class_weight = compute_class_weight(self.class_weight,
                                                           self.classes_, y)
        sample_weight = self._validate_sample_weight(sample_weight, n_samples)

        if self.coef_ is None or coef_init is not None:
            self._allocate_parameter_mem(n_classes, n_features,
                                         coef_init, intercept_init)
        elif n_features != self.coef_.shape[-1]:
            raise ValueError("Number of features %d does not match previous "
                             "data %d." % (n_features, self.coef_.shape[-1]))

        self.loss_function = self._get_loss_function(loss)
        if self.t_ is None:
            self.t_ = 1.0

        # delegate to concrete training procedure
        if n_classes > 2:
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
                     n_iter, sample_weight,
                     coef_init, intercept_init):
        X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64)
        y = astype(y, np.float64, copy=False)

        n_samples, n_features = X.shape

        self._validate_params()

        # Allocate datastructures from input arguments
        sample_weight = self._validate_sample_weight(sample_weight, n_samples)

        if self.coef_ is None:
            self._allocate_parameter_mem(1, n_features,
                                         coef_init, intercept_init)
        elif n_features != self.coef_.shape[-1]:
            raise ValueError("Number of features %d does not match previous "
                             "data %d." % (n_features, self.coef_.shape[-1]))
        if self.average > 0 and self.average_coef_ is None:
            self.average_coef_ = np.zeros(n_features,
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _partial_fit(self, X, y, alpha, C,
                     loss, learning_rate, n_iter,
                     classes, sample_weight,
                     coef_init, intercept_init):
        X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")

        n_samples, n_features = X.shape

        self._validate_params()
        _check_partial_fit_first_call(self, classes)

        n_classes = self.classes_.shape[0]

        # Allocate datastructures from input arguments
        self._expanded_class_weight = compute_class_weight(self.class_weight,
                                                           self.classes_, y)
        sample_weight = self._validate_sample_weight(sample_weight, n_samples)

        if self.coef_ is None or coef_init is not None:
            self._allocate_parameter_mem(n_classes, n_features,
                                         coef_init, intercept_init)
        elif n_features != self.coef_.shape[-1]:
            raise ValueError("Number of features %d does not match previous "
                             "data %d." % (n_features, self.coef_.shape[-1]))

        self.loss_function = self._get_loss_function(loss)
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
Weights applied to individual samples.
            If not provided, uniform weights are assumed. These weights will
            be multiplied with class_weight (passed through the
            constructor) if class_weight is specified

        Returns
        -------
        self : returns an instance of self.
        """
        return self._fit(X, y, alpha=self.alpha, C=1.0,
                         loss=self.loss, learning_rate=self.learning_rate,
                         coef_init=coef_init, intercept_init=intercept_init,
                         sample_weight=sample_weight)


class SGDClassifier(BaseSGDClassifier, _LearntSelectorMixin):
    """Linear classifiers (SVM, logistic regression, a.o.) with SGD training.

    This estimator implements regularized linear models with stochastic
    gradient descent (SGD) learning: the gradient of the loss is estimated
    each sample at a time and the model is updated along the way with a
    decreasing strength schedule (aka learning rate). SGD allows minibatch
    (online/out-of-core) learning, see the partial_fit method.
    For best results using the default learning rate schedule, the data should
    have zero mean and unit variance.

    This implementation works with data represented as dense or sparse arrays
    of floating point values for the features. The model it fits can be
    controlled with the loss parameter; by default, it fits a linear support
    vector machine (SVM).

    The regularizer is a penalty added to the loss function that shrinks model
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
self.average_intercept_ = np.atleast_1d(self.average_intercept_)
            self.standard_intercept_ = np.atleast_1d(self.standard_intercept_)
            self.t_ += n_iter * X.shape[0]

            if self.average <= self.t_ - 1.0:
                self.coef_ = self.average_coef_
                self.intercept_ = self.average_intercept_
            else:
                self.coef_ = self.standard_coef_
                self.intercept_ = self.standard_intercept_

        else:
            if self.n_jobs > 1:
                sgd_result = \
                    Parallel(n_jobs=self.n_jobs, backend='threading')(
                    delayed(plain_sgd)(self.coef_,
                              self.intercept_[0],
                              loss_function,
                              penalty_type,
                              alpha, C,
                              self.l1_ratio,
                              dataset,
                              n_iter/self.n_jobs,
                              int(self.fit_intercept),
                              int(self.verbose),
                              int(self.shuffle),
                              seed,
                              1.0, 1.0,
                              learning_rate_type,
                              self.eta0, self.power_t, self.t_,
                              intercept_decay) for _ in range(self.n_jobs))
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
def _fit_multiclass(self, X, y, alpha, C, learning_rate,
                        sample_weight, n_iter):
        """Fit a multi-class classifier by combining binary classifiers

        Each binary classifier predicts one class versus all others. This
        strategy is called OVA: One Versus All.
        """
        # Use joblib to fit OvA in parallel.
        result = Parallel(n_jobs=self.n_jobs, backend="threading",
                          verbose=self.verbose)(
            delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
                                n_iter, self._expanded_class_weight[i], 1.,
                                sample_weight)
            for i in range(len(self.classes_)))

        for i, (_, intercept) in enumerate(result):
            self.intercept_[i] = intercept

        self.t_ += n_iter * X.shape[0]

        if self.average > 0:
            if self.average <= self.t_ - 1.0:
                self.coef_ = self.average_coef_
                self.intercept_ = self.average_intercept_
            else:
github angadgill / Parallel-SGD / scikit-learn / sklearn / linear_model / stochastic_gradient.py View on Github external
self.average_intercept_ = np.atleast_1d(self.average_intercept_)
            self.standard_intercept_ = np.atleast_1d(self.standard_intercept_)
            self.t_ += n_iter * X.shape[0]

            if self.average <= self.t_ - 1.0:
                self.coef_ = self.average_coef_
                self.intercept_ = self.average_intercept_
            else:
                self.coef_ = self.standard_coef_
                self.intercept_ = self.standard_intercept_

        else:
            if self.n_jobs > 1:
                sgd_result = \
                    Parallel(n_jobs=self.n_jobs, backend='threading')(
                    delayed(plain_sgd)(self.coef_,
                              self.intercept_[0],
                              loss_function,
                              penalty_type,
                              alpha, C,
                              self.l1_ratio,
                              dataset,
                              n_iter/self.n_jobs,
                              int(self.fit_intercept),
                              int(self.verbose),
                              int(self.shuffle),
                              seed,
                              1.0, 1.0,
                              learning_rate_type,
                              self.eta0, self.power_t, self.t_,
                              intercept_decay) for _ in range(self.n_jobs))