Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
# Fit MLP classifier to the data.
clf = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=30,
per_run_time_limit=10,
include_estimators=['MLPClassifier'],
)
clf.fit(X_train, y_train)
# Print test accuracy and statistics.
y_pred = clf.predict(X_test)
print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
print(clf.sprint_statistics())
print(clf.show_models())
#log_proba = clf_cv.predict_log_proba(Xtn)
Py_d = log_proba[:,1] -np.log(0.5)
vect_prob=np.zeros((Xt.shape[0],1),dtype='f')
vect_prob[:,0]=Py_d[:]
Xt_augment=domain_adaptation_baseline.append_features(Xtn,vect_prob)
'''
hw, W = denoising_autoencoders.mDA(Xt_augment.T, noise, 0.05, layer_func=layer_func)
h=hw.T
if not(multiclass):
#TODO This is dangerous if I swap label 0 and 1 as decision, no ?
m_score =sklearn.metrics.accuracy_score(Yt,h[:,-1]>0.5)
#m_score = sklearn.metrics.accuracy_score(Yt,(h[:,-1]-np.log(0.5))>0)
model_AUC=sklearn.metrics.roc_auc_score(Yt,h[:,-1])
baseline_AUC=sklearn.metrics.roc_auc_score(Yt,Py_d)
print "AUC",baseline_AUC,model_AUC
if score=='AUC':
return (baseline_AUC,model_AUC)
else:
return (no_transfer_acc,m_score)
else:
hy_reconstruction=h[:,-nclasses:]
y_pred = np.argmax(hy_reconstruction,axis=1)
m_score = sklearn.metrics.accuracy_score(Yt,y_pred)
if score=='AUC':
raise NotImplementedError
else:
return (no_transfer_acc,m_score)
g = sns.JointGrid(x="Euclidean distance",
y="Posterior distance", data=df, height=5)
for _correctness, _df in df.groupby("Correctness"):
sns.kdeplot(_df["Euclidean distance"],
ax=g.ax_marg_x, legend=False, shade=True)
sns.kdeplot(_df["Posterior distance"], ax=g.ax_marg_y,
vertical=True, legend=False, shade=True)
sns.kdeplot(_df["Euclidean distance"],
_df["Posterior distance"], n_levels=10, ax=g.ax_joint)
ax = sns.scatterplot(
x="Euclidean distance", y="Posterior distance", hue="Correctness",
data=df.sample(frac=1, random_state=0), s=5, edgecolor=None, alpha=0.5,
rasterized=True, ax=g.ax_joint
)
ax.set_xlabel(
f"Euclidean distance (AUC = {sklearn.metrics.roc_auc_score(correctness, -edist):.3f})")
ax.set_ylabel(
f"Posterior distance (AUC = {sklearn.metrics.roc_auc_score(correctness, -pdist):.3f})")
g.ax_joint.legend(frameon=False)
return ax
def accuracy_eval(label_tr, label_pred):
overall_accuracy = metrics.accuracy_score(label_tr, label_pred)
avarage_accuracy = np.mean(metrics.precision_score(label_tr, label_pred, average = None))
kappa = metrics.cohen_kappa_score(label_tr, label_pred)
cm = metrics.confusion_matrix(label_tr, label_pred)
return overall_accuracy, avarage_accuracy, kappa, cm
'''
dic['logloss'] = -1
if 'auc' in list_metrics:
dic['auc'] = metrics.roc_auc_score(y_true, y_prob[:, 1])
if 'pres_0' in list_metrics:
dic['pres_0'] = metrics.precision_score(y_true, y_pred, pos_label=0)
if 'pres_1' in list_metrics:
dic['pres_1'] = metrics.precision_score(y_true, y_pred, pos_label=1)
if 'recall_0' in list_metrics:
dic['recall_0'] = metrics.recall_score(y_true, y_pred, pos_label=0)
if 'recall_1' in list_metrics:
dic['recall_1'] = metrics.recall_score(y_true, y_pred, pos_label=1)
if 'cm' in list_metrics:
dic['cm'] = str(metrics.confusion_matrix(y_true, y_pred))
return dic
centers=centers_from_docsets_labels(docs, m, range(m.size()))
predictions=[]
for i, datum in enumerate(data):
candidates=[]
for j, center in enumerate(centers):
candidates.append(np.linalg.norm(center - datum))
predictions.append(np.argmin(candidates))
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f '
% ("SMH", 0, 0,
metrics.homogeneity_score(labels, predictions),
metrics.completeness_score(labels, predictions),
metrics.v_measure_score(labels, predictions),
metrics.adjusted_rand_score(labels, predictions),
metrics.adjusted_mutual_info_score(labels, predictions)))
#metrics.silhouette_score(data, predictions,
# metric='euclidean',
# sample_size=sample_size)))
bench_k_means(KMeans(init='k-means++', n_clusters=n_digits,
n_init=m.size()),
name="k-means++:"+str(m.size()), data=data)
rng_seed = 40 # control reproducibility
tprs_list: List[ndarray] = []
fprs_list: List[ndarray] = []
rng = np.random.RandomState(rng_seed)
num_possible_scores = len(np.unique(y_score))
while len(tprs_list) < num_bootstraps:
# bootstrap by sampling with replacement on the prediction indices
indices = rng.randint(0, len(y_score) - 1, len(y_score))
if len(np.unique(y_true[indices])) < 2:
# We need at least one positive and one negative sample for ROC AUC
# to be defined: reject the sample
continue
# get ROC data this boostrap
fpr, tpr, thresholds = metrics.roc_curve(
y_true[indices], y_score[indices]
)
if len(fpr) < num_possible_scores + 1:
# if all scores are not represented in this selection then a
# different number of ROC thresholds will be defined.
# This causes problems.
continue
# remove first and last items - these are just end points of the ROC
if exclude_first_last:
fpr = fpr[1:-1]
tpr = tpr[1:-1]
# append these boostrap values to the list
tprs_list.append(tpr)
fprs_list.append(fpr)
print "Predictions:"
predictions_y = [min(len(class_mapping_id_to_origtext), x+1) for x in predictions_y]
print predictions_y
print "Classes:"
for (idx,lbl) in sorted_class_mappings:
print "%s - %s" % (idx, lbl)
#Confusion matrix
if has_gold:
print "Implicit confusion matrix"
import sklearn.metrics as skm
conf_matrix = skm.confusion_matrix(relations_y_gold_implicit, predictions_y)
print conf_matrix
print skm.accuracy_score(relations_y_gold_implicit, predictions_y)
# Print accuracy
# correct_predictions = float(sum(predictions_y == y_test))
# print("Total number of test examples: {}".format(len(y_test)))
# print("Accuracy: {:g}".format(correct_predictions / float(len(y_test))))
# set predicted labels
#for i, relation_dict in enumerate(implicit_relation_objects_list):
print "predictions_y cnt:%s" % len(predictions_y)
print "implicit_relation_objects_list cnt:%s" % len(implicit_relation_objects_list)
for i in range(0, len(predictions_y)):
# label_binary = predictions_y[i]
# label = next(obj for idx,obj in enumerate(label_binary) if obj == 1)+1
def mean_squared_error(y_test, y_pred):
"""Computes the mean squared error (MSE).
Args:
y_test: np.array 1-D array of true class labels
y_pred: np.array 1-D array of predicted class labels
Returns:
mean squared error: float.
"""
return metrics.mean_squared_error(y_test, y_pred)