Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_classification():
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import roc_auc_score
data, target = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
score = roc_auc_score(y_test, preds)
assert score >= 0.95
from ngboost.distns import Bernoulli
from ngboost.learners import default_tree_learner
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
if __name__ == "__main__":
np.random.seed(12345)
X, Y = load_breast_cancer(True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
ngb = NGBClassifier(Dist=Bernoulli)
ngb.fit(X_train, Y_train)
preds = ngb.pred_dist(X_test)
print("ROC:", roc_auc_score(Y_test, preds.probs[1]))
from ngboost import NGBClassifier
from ngboost.distns import k_categorical
from ngboost.learners import default_tree_learner
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
if __name__ == "__main__":
X, y = load_breast_cancer(True)
y[0:15] = 2 # artificially make this a 3-class problem instead of a 2-class problem
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)
ngb = NGBClassifier(
Dist=k_categorical(3)
) # tell ngboost that there are 3 possible outcomes
ngb.fit(X_train, Y_train) # Y should have only 3 values: {0,1,2}
# predicted probabilities of class 0, 1, and 2 (columns) for each observation (row)
preds = ngb.predict_proba(X_test)
# An example where the base learner is also searched over (this is how you would vary tree depth):
X, Y = load_breast_cancer(True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
b1 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=2)
b2 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=4)
b3 = Ridge(alpha=0.0)
param_grid = {
"n_estimators": [20, 50],
"minibatch_frac": [1.0, 0.5],
"Base": [b1, b2],
}
ngb = NGBClassifier(natural_gradient=True, verbose=False, Dist=k_categorical(2))
grid_search = GridSearchCV(ngb, param_grid=param_grid, cv=5)
grid_search.fit(X_train, Y_train)
print(grid_search.best_params_)