Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None,
intercept_init=None, sample_weight=None):
if hasattr(self, "classes_"):
self.classes_ = None
X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
n_samples, n_features = X.shape
# labels can be encoded as float, int, or string literals
# np.unique sorts in asc order; largest class id is positive class
classes = np.unique(y)
if self.warm_start and self.coef_ is not None:
if coef_init is None:
coef_init = self.coef_
if intercept_init is None:
intercept_init = self.intercept_
else:
self.coef_ = None
self.intercept_ = None
if self.average > 0:
def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
n_iter, sample_weight,
coef_init, intercept_init):
X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64)
y = astype(y, np.float64, copy=False)
n_samples, n_features = X.shape
self._validate_params()
# Allocate datastructures from input arguments
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
if self.coef_ is None:
self._allocate_parameter_mem(1, n_features,
coef_init, intercept_init)
elif n_features != self.coef_.shape[-1]:
raise ValueError("Number of features %d does not match previous "
"data %d." % (n_features, self.coef_.shape[-1]))
if self.average > 0 and self.average_coef_ is None:
def _partial_fit(self, X, y, alpha, C,
loss, learning_rate, n_iter,
classes, sample_weight,
coef_init, intercept_init):
X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
n_samples, n_features = X.shape
self._validate_params()
_check_partial_fit_first_call(self, classes)
n_classes = self.classes_.shape[0]
# Allocate datastructures from input arguments
self._expanded_class_weight = compute_class_weight(self.class_weight,
self.classes_, y)
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
if self.coef_ is None or coef_init is not None:
self._allocate_parameter_mem(n_classes, n_features,
coef_init, intercept_init)
def _partial_fit(self, X, y, alpha, C,
loss, learning_rate, n_iter,
classes, sample_weight,
coef_init, intercept_init):
X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
n_samples, n_features = X.shape
self._validate_params()
_check_partial_fit_first_call(self, classes)
n_classes = self.classes_.shape[0]
# Allocate datastructures from input arguments
self._expanded_class_weight = compute_class_weight(self.class_weight,
self.classes_, y)
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
if self.coef_ is None or coef_init is not None:
self._allocate_parameter_mem(n_classes, n_features,
coef_init, intercept_init)
elif n_features != self.coef_.shape[-1]:
raise ValueError("Number of features %d does not match previous "
"data %d." % (n_features, self.coef_.shape[-1]))
self.loss_function = self._get_loss_function(loss)
if self.t_ is None:
self.t_ = 1.0
# delegate to concrete training procedure
if n_classes > 2:
def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
n_iter, sample_weight,
coef_init, intercept_init):
X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64)
y = astype(y, np.float64, copy=False)
n_samples, n_features = X.shape
self._validate_params()
# Allocate datastructures from input arguments
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
if self.coef_ is None:
self._allocate_parameter_mem(1, n_features,
coef_init, intercept_init)
elif n_features != self.coef_.shape[-1]:
raise ValueError("Number of features %d does not match previous "
"data %d." % (n_features, self.coef_.shape[-1]))
if self.average > 0 and self.average_coef_ is None:
self.average_coef_ = np.zeros(n_features,
def _partial_fit(self, X, y, alpha, C,
loss, learning_rate, n_iter,
classes, sample_weight,
coef_init, intercept_init):
X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
n_samples, n_features = X.shape
self._validate_params()
_check_partial_fit_first_call(self, classes)
n_classes = self.classes_.shape[0]
# Allocate datastructures from input arguments
self._expanded_class_weight = compute_class_weight(self.class_weight,
self.classes_, y)
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
if self.coef_ is None or coef_init is not None:
self._allocate_parameter_mem(n_classes, n_features,
coef_init, intercept_init)
elif n_features != self.coef_.shape[-1]:
raise ValueError("Number of features %d does not match previous "
"data %d." % (n_features, self.coef_.shape[-1]))
self.loss_function = self._get_loss_function(loss)
Weights applied to individual samples.
If not provided, uniform weights are assumed. These weights will
be multiplied with class_weight (passed through the
constructor) if class_weight is specified
Returns
-------
self : returns an instance of self.
"""
return self._fit(X, y, alpha=self.alpha, C=1.0,
loss=self.loss, learning_rate=self.learning_rate,
coef_init=coef_init, intercept_init=intercept_init,
sample_weight=sample_weight)
class SGDClassifier(BaseSGDClassifier, _LearntSelectorMixin):
"""Linear classifiers (SVM, logistic regression, a.o.) with SGD training.
This estimator implements regularized linear models with stochastic
gradient descent (SGD) learning: the gradient of the loss is estimated
each sample at a time and the model is updated along the way with a
decreasing strength schedule (aka learning rate). SGD allows minibatch
(online/out-of-core) learning, see the partial_fit method.
For best results using the default learning rate schedule, the data should
have zero mean and unit variance.
This implementation works with data represented as dense or sparse arrays
of floating point values for the features. The model it fits can be
controlled with the loss parameter; by default, it fits a linear support
vector machine (SVM).
The regularizer is a penalty added to the loss function that shrinks model
self.average_intercept_ = np.atleast_1d(self.average_intercept_)
self.standard_intercept_ = np.atleast_1d(self.standard_intercept_)
self.t_ += n_iter * X.shape[0]
if self.average <= self.t_ - 1.0:
self.coef_ = self.average_coef_
self.intercept_ = self.average_intercept_
else:
self.coef_ = self.standard_coef_
self.intercept_ = self.standard_intercept_
else:
if self.n_jobs > 1:
sgd_result = \
Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(plain_sgd)(self.coef_,
self.intercept_[0],
loss_function,
penalty_type,
alpha, C,
self.l1_ratio,
dataset,
n_iter/self.n_jobs,
int(self.fit_intercept),
int(self.verbose),
int(self.shuffle),
seed,
1.0, 1.0,
learning_rate_type,
self.eta0, self.power_t, self.t_,
intercept_decay) for _ in range(self.n_jobs))
def _fit_multiclass(self, X, y, alpha, C, learning_rate,
sample_weight, n_iter):
"""Fit a multi-class classifier by combining binary classifiers
Each binary classifier predicts one class versus all others. This
strategy is called OVA: One Versus All.
"""
# Use joblib to fit OvA in parallel.
result = Parallel(n_jobs=self.n_jobs, backend="threading",
verbose=self.verbose)(
delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
n_iter, self._expanded_class_weight[i], 1.,
sample_weight)
for i in range(len(self.classes_)))
for i, (_, intercept) in enumerate(result):
self.intercept_[i] = intercept
self.t_ += n_iter * X.shape[0]
if self.average > 0:
if self.average <= self.t_ - 1.0:
self.coef_ = self.average_coef_
self.intercept_ = self.average_intercept_
else:
self.average_intercept_ = np.atleast_1d(self.average_intercept_)
self.standard_intercept_ = np.atleast_1d(self.standard_intercept_)
self.t_ += n_iter * X.shape[0]
if self.average <= self.t_ - 1.0:
self.coef_ = self.average_coef_
self.intercept_ = self.average_intercept_
else:
self.coef_ = self.standard_coef_
self.intercept_ = self.standard_intercept_
else:
if self.n_jobs > 1:
sgd_result = \
Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(plain_sgd)(self.coef_,
self.intercept_[0],
loss_function,
penalty_type,
alpha, C,
self.l1_ratio,
dataset,
n_iter/self.n_jobs,
int(self.fit_intercept),
int(self.verbose),
int(self.shuffle),
seed,
1.0, 1.0,
learning_rate_type,
self.eta0, self.power_t, self.t_,
intercept_decay) for _ in range(self.n_jobs))