Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
y = mat['y']
# split dataset into train and test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
# standardize data to be digestible for most algorithms
X_train, X_test = standardizer(X_train, X_test)
contamination = y.sum() / len(y)
# get estimators for training and prediction
base_estimators = get_estimators(contamination=contamination)
##########################################################################
model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
approx_clf=approx_clf,
n_jobs=n_jobs, bps_flag=True, contamination=contamination,
approx_flag_global=True)
start = time.time()
model.fit(X_train) # fit all models with X
print('Fit time:', time.time() - start)
print()
start = time.time()
model.approximate(X_train) # conduct model approximation if it is enabled
print('Approximation time:', time.time() - start)
print()
start = time.time()
predicted_labels = model.predict(X_test) # predict labels
LOF(n_neighbors=45, contamination=contamination),
HBOS(contamination=contamination),
PCA(contamination=contamination),
OCSVM(contamination=contamination),
KNN(n_neighbors=5, contamination=contamination),
KNN(n_neighbors=15, contamination=contamination),
KNN(n_neighbors=25, contamination=contamination),
KNN(n_neighbors=35, contamination=contamination),
KNN(n_neighbors=45, contamination=contamination),
IForest(n_estimators=50, contamination=contamination),
IForest(n_estimators=100, contamination=contamination),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)])
]
model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True,
contamination=contamination, approx_flag_global=True)
model.fit(X_train) # fit all models with X
model.approximate(X_train) # conduct model approximation if it is enabled
predicted_labels = model.predict(X_test) # predict labels
predicted_scores = model.decision_function(X_test) # predict scores
predicted_probs = model.predict_proba(X_test) # predict scores
###########################################################################
# compared with other approaches
evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
evaluate_print('average', y_test, average(predicted_scores))
evaluate_print('maximization', y_test, maximization(predicted_scores))
clf = LOF()
clf.fit(X_train)
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
]
# model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
# n_jobs=6, bps_flag=False, contamination=contamination,
# approx_flag_global=True)
model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
n_jobs=6, bps_flag=True, contamination=contamination,
approx_flag_global=True)
start = time.time()
model.fit(X) # fit all models with X
print('Fit time:', time.time() - start)
print()
start = time.time()
model.approximate(X) # conduct model approximation if it is enabled
print('Approximation time:', time.time() - start)
print()
start = time.time()
predicted_labels = model.predict(X) # predict labels
print('Predict time:', time.time() - start)
X = mat['X']
y = mat['y']
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
# standardize data to be digestible for most algorithms
X_train, X_test = standardizer(X_train, X_test)
contamination = y.sum() / len(y)
base_estimators = deepcopy(get_estimators(contamination=contamination))
##########################################################################
model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
approx_clf=RandomForestRegressor(),
n_jobs=n_jobs, bps_flag=True, contamination=contamination,
approx_flag_global=True)
start = time.time()
model.fit(X_train) # fit all models with X
print('Fit time:', time.time() - start)
print()
start = time.time()
model.approximate(X_train) # conduct model approximation if it is enabled
print('Approximation time:', time.time() - start)
print()
start = time.time()
predicted_labels = model.predict(X_test) # predict labels