Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _fit_fast(self):
"""Fast ABOD method. Only use n_neighbors for angle calculation.
Internal use only
"""
# make sure the n_neighbors is in the range
check_parameter(self.n_neighbors, 1, self.n_train_)
self.tree_ = KDTree(self.X_train_)
neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
neigh.fit(self.X_train_)
ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
return_distance=False)
for i in range(self.n_train_):
curr_pt = self.X_train_[i, :]
X_ind = ind_arr[i, :]
self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
self.X_train_,
X_ind)
return self
self.preprocessing = preprocessing
self.verbose = verbose
self.random_state = random_state
# default values
if self.hidden_neurons is None:
self.hidden_neurons = [64, 32, 32, 64]
# Verify the network design is valid
if not self.hidden_neurons == self.hidden_neurons[::-1]:
print(self.hidden_neurons)
raise ValueError("Hidden units should be symmetric")
self.hidden_neurons_ = self.hidden_neurons
check_parameter(dropout_rate, 0, 1, param_name='dropout_rate',
include_left=True)
def _validate_estimator(self, default=None):
"""Check the value of alpha and beta and clustering algorithm.
"""
check_parameter(self.alpha, low=0, high=1, param_name='alpha',
include_left=False, include_right=False)
check_parameter(self.beta, low=1, param_name='beta',
include_left=False)
if self.clustering_estimator is not None:
self.clustering_estimator_ = self.clustering_estimator
else:
self.clustering_estimator_ = default
# make sure the base clustering algorithm is valid
if self.clustering_estimator_ is None:
raise ValueError("clustering algorithm cannot be None")
if self.check_estimator:
check_estimator(self.clustering_estimator_)
def __init__(self, base_estimators, method='average', threshold=0.5,
weights=None, pre_fitted=False):
super(SimpleClassifierAggregator, self).__init__(
base_estimators=base_estimators, pre_fitted=pre_fitted)
# validate input parameters
if method not in ['average', 'maximization', 'majority_vote',
'median']:
raise ValueError("{method} is not a valid parameter.".format(
method=method))
self.method = method
check_parameter(threshold, 0, 1, include_left=False,
include_right=False, param_name='threshold')
self.threshold = threshold
# set estimator weights
self._set_weights(weights)
Raw outlier scores. Outliers are assumed have larger values.
outliers_fraction : float in (0,1)
Percentage of outliers.
Returns
-------
outlier_labels : numpy array of shape (n_samples,)
For each observation, tells whether or not
it should be considered as an outlier according to the
fitted model. Return the outlier probability, ranging
in [0,1].
"""
# check input values
pred_scores = column_or_1d(pred_scores)
check_parameter(outliers_fraction, 0, 1)
threshold = percentile(pred_scores, 100 * (1 - outliers_fraction))
pred_labels = (pred_scores > threshold).astype('int')
return pred_labels
def __init__(self, n_bins=10, alpha=0.1, tol=0.5, contamination=0.1):
super(HBOS, self).__init__(contamination=contamination)
self.n_bins = n_bins
self.alpha = alpha
self.tol = tol
check_parameter(alpha, 0, 1, param_name='alpha')
check_parameter(tol, 0, 1, param_name='tol')
-------
combined_scores : Numpy array of shape (n_samples,)
The combined outlier scores.
"""
if mode != 'AOM' and mode != 'MOA':
raise NotImplementedError(
'{mode} is not implemented'.format(mode=mode))
scores = check_array(scores)
# TODO: add one more parameter for max number of estimators
# use random_state instead
# for now it is fixed at n_estimators/2
n_estimators = scores.shape[1]
check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')
scores_buckets = np.zeros([scores.shape[0], n_buckets])
if method == 'static':
n_estimators_per_bucket = int(n_estimators / n_buckets)
if n_estimators % n_buckets != 0:
raise ValueError('n_estimators / n_buckets has a remainder. Not '
'allowed in static mode.')
if not bootstrap_estimators:
# shuffle the estimator order
shuffled_list = shuffle(list(range(0, n_estimators, 1)),
random_state=random_state)
head = 0
Training data.
y_train : numpy array of shape (n_train,)
Training ground truth.
X_test : numpy array of shape (n_test, n_features)
Test data.
y_test : numpy array of shape (n_test,)
Test ground truth.
"""
# initialize a random state and seeds for the instance
random_state = check_random_state(random_state)
if isinstance(n_clusters, int):
check_parameter(n_clusters, low=1, param_name='n_clusters')
else:
raise ValueError("n_clusters should be int, got %s" % n_clusters)
if isinstance(n_features, int):
check_parameter(n_features, low=1, param_name='n_features')
else:
raise ValueError("n_features should be int, got %s" % n_features)
if isinstance(contamination, float):
check_parameter(contamination, low=0, high=0.5,
param_name='contamination')
else:
raise ValueError(
"contamination should be float, got %s" % contamination)
if isinstance(dist, float):
super(DES_LA, self).__init__(
base_estimators=base_estimators, pre_fitted=pre_fitted)
# validate input parameters
if not isinstance(local_region_size, int):
raise ValueError('local_region_size must be an integer variable')
check_parameter(local_region_size, low=2, include_left=True,
param_name='local_region_size')
self.local_region_size = local_region_size
if n_selected_clfs is None:
self.n_selected_clfs = int(self.n_base_estimators_ * 0.5)
else:
if not isinstance(n_selected_clfs, int):
raise ValueError('n_selected_clfs must be an integer variable')
check_parameter(n_selected_clfs, low=1,
high=self.n_base_estimators_, include_left=True,
include_right=True, param_name='n_selected_clfs')
self.n_selected_clfs = n_selected_clfs
self.use_weights = use_weights
if threshold is not None:
warnings.warn(
"DES does not support threshold setting option. "
"Please set the threshold in classifiers directly.")
if pre_fitted is not None:
warnings.warn("DES does not support pre_fitted option.")
def __init__(self, base_estimators, meta_clf=None, n_folds=2,
keep_original=True,
use_proba=False, shuffle_data=False, random_state=None,
threshold=None, pre_fitted=None):
super(Stacking, self).__init__(
base_estimators=base_estimators, pre_fitted=pre_fitted)
# validate input parameters
if not isinstance(n_folds, int):
raise ValueError('n_folds must be an integer variable')
check_parameter(n_folds, low=2, include_left=True,
param_name='n_folds')
self.n_folds = n_folds
if meta_clf is not None:
self.meta_clf = meta_clf
else:
self.meta_clf = LogisticRegression()
# set flags
self.keep_original = keep_original
self.use_proba = use_proba
self.shuffle_data = shuffle_data
self.random_state = random_state
if threshold is not None: