Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
continue
chunks = line[:-1].decode('utf-8').split()
flat_true.append(chunks[-2])
sequential_true[-1].append(chunks[-2])
flat_predictions.append(chunks[-1])
sequential_predictions[-1].append(chunks[-1])
# restoring the word-level tags
test_predictions_word, test_tags_word = [], []
for idx, n in enumerate(test_context_correspondence):
for i in range(n):
test_predictions_word.append(flat_predictions[idx])
test_tags_word.append(flat_true[idx])
print(f1_score(test_predictions_word, test_tags_word, average=None))
print(f1_score(test_predictions_word, test_tags_word, average='weighted', pos_label=None))
print("Precision: {}, recall: {}".format(precision_score(test_predictions_word, test_tags_word, average=None), recall_score(test_predictions_word, test_tags_word, average=None)))
else:
train_tags = [tag_map[tag] for tag in train_tags]
#print(test_tags)
test_tags = [tag_map[tag] for tag in test_tags]
#print(test_tags)
#sys.exit()
# data_type is 'token' or 'plain'
logger.info('start training...')
classifier_type = import_class(config['learning']['classifier']['module'])
# train the classifier(s)
classifier_map = map_classifiers(train_features, train_tags, classifier_type, data_type=data_type)
logger.info('classifying the test instances')
test_predictions = predict_all(test_features, classifier_map, data_type=data_type)
y = f_props(layers, x_embs, train=False)
loss = dy.binary_log_loss(y, t)
losses.append(loss)
preds.append(y)
mb_loss = dy.average(losses)
# Forward propagation
loss_all_valid.append(mb_loss.value())
pred_all_valid.extend(binary_pred(dy.concatenate_to_batch(preds).npvalue()).flatten().tolist())
print('EPOCH: %d, Train Loss: %.3f (F1: %.3f, Acc: %.3f), Valid Loss: %.3f (F1: %.3f, Acc: %.3f), Time: %.3f[s]' % (
epoch+1,
np.mean(loss_all_train),
f1_score(train_y, pred_all_train),
accuracy_score(train_y, pred_all_train),
np.mean(loss_all_valid),
f1_score(valid_y, pred_all_valid),
accuracy_score(valid_y, pred_all_valid),
time.time()-start_time,
))
# Save model
if V_STRATEGY in ['rand', 'static', 'non-static']:
dy.save('./model', [V1] + layers)
else:
dy.save('./model', [V1, V2] + layers)
y_pred3 = []
for istep, xy in enumerate(generate_rows(val_phrases, val_ys, batch_size, embeddings, 1)):
x = xy[0]
y = xy[1]['output']
y_pred = model.predict(x=x, verbose=0)
for k in range(len(y_pred)):
y_true2.append(y[k][1])
y_pred2.append(y_pred[k][1] > y_pred[k][0])
y_pred3.append(y_pred[k][1])
if istep >= nb_validation_steps:
break
# из-за сильного дисбаланса (в пользу исходов с y=0) оценивать качество
# получающейся модели лучше по f1
f1 = sklearn.metrics.f1_score(y_true=y_true2, y_pred=y_pred2)
logging.info('val f1={}'.format(f1))
if False:
# Отрисуем AUC кривую
y_pred3 = y_pred[:, 1]
fpr, recall, thresholds = sklearn.metrics.roc_curve(y_true=y_true2, y_score=y_pred3)
roc_auc = sklearn.metrics.auc(fpr, recall)
plt.title('Premise-question relevance ROC')
plt.plot(fpr, recall, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
13: 'Entity-Origin(e1,e2)', 14: 'Entity-Origin(e2,e1)',
15: 'Member-Collection(e1,e2)', 16: 'Member-Collection(e2,e1)',
17: 'Content-Container(e1,e2)', 18: 'Content-Container(e2,e1)'}
output_file = open(FLAGS.output_path, 'w')
target_file = open(FLAGS.target_path, 'w')
for i in range(len(all_predictions)):
output_file.write("{}\t{}\n".format(i, labelsMapping[all_predictions[i]]))
target_file.write("{}\t{}\n".format(i, labelsMapping[y_eval[i]]))
output_file.close()
target_file.close()
correct_predictions = float(sum(all_predictions == y_eval))
print("\nTotal number of test examples: {}".format(len(y_eval)))
print("Accuracy: {:g}".format(correct_predictions / float(len(y_eval))))
print("(2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}".format(
f1_score(y_eval, all_predictions, labels=np.array(range(1, 19)), average="macro")))
def dev_step(dev_x, dev_y):
"""
Evaluates model on a dev set
"""
feed_dict = {
leam.input_x: np.array(dev_x),
leam.input_y: np.array(dev_y),
leam.drop_out_prob: 1.0,
leam.seq_length: np.array(self.get_length(dev_x))
}
dev_cost, dev_accuracy, predictions = sess.run([leam.loss, leam.accuracy, leam.predictions], feed_dict)
y_true = [np.nonzero(x)[0][0] for x in dev_y]
f1 = f1_score(np.array(y_true), predictions, average='micro')
print("验证集:loss {:g}, acc {:g}, f1 {:g}\n".format(dev_cost, dev_accuracy, f1))
return dev_cost, f1
c_d_preds, c_d_loss = self._eval_predict(x, y)
# This converts the labels back into the original format. I.e. [0,1,1,0] will become [0,2,2,0] again if
# 1 didn't exist in the dataset.
# c_d_preds = [exp_config.label_list[int(pp)] for pp in c_d_preds]
# y_gts = [exp_config.label_list[pp] for pp in y]
num_batches += 1
predictions_diag += list(c_d_preds)
diag_loss_ii += c_d_loss
predictions_diag_gt += list(y)
avg_loss = (diag_loss_ii / num_batches)
average_mode = 'binary' if self.nlabels == 2 else 'micro'
f1_diag_score = f1_score(np.asarray(predictions_diag_gt), np.asarray(predictions_diag), average=average_mode)
logging.info(' Average loss: %0.04f, diag f1_score: %0.04f' % (avg_loss, f1_diag_score, ))
return avg_loss, f1_diag_score
def evaluate(self, x, y):
# evaluate f1 score
last, out = self.model(x)
y_pred = nn.LogSoftmax(dim=1)(out)
# multiclass f1 score, classes: [-1, 0, 1] //feed in (0, 1, 2) to satisfy function
return f1_score(y, torch.argmax(y_pred, 1).data.numpy(), average='macro')
Return the precision and the number of correct predictions.
Batch evaluation saves memory and enables this to run on smaller GPUs.
sess: the session in which the model has been trained.
op: the Tensor that returns the number of correct predictions.
data: size N x M
N: number of signals (samples)
M: number of vertices (features)
labels: size N
N: number of signals (samples)
"""
t_wall = time.time()
predictions, loss = self.predict(data, labels, sess)
ncorrects = sum(predictions == labels)
accuracy = 100 * sklearn.metrics.accuracy_score(labels, predictions)
f1 = 100 * sklearn.metrics.f1_score(labels, predictions, average='weighted')
string = 'accuracy: {:.2f} ({:d} / {:d}), f1 (weighted): {:.2f}, loss: {:.2e}'.format(
accuracy, ncorrects, len(labels), f1, loss)
if sess is None:
string += '\ntime: {:.0f}s '.format(time.time()-t_wall)
return string, accuracy, f1, loss
def _scorer(clf, X, y):
n_class = len(np.unique(y))
if n_class == 2:
if hasattr(clf, 'predict_proba'):
ypred = clf.predict_proba(X)[:, 1]
elif hasattr(clf, 'decision_function'):
ypred = clf.decision_function(X)
else:
ypred = clf.predict(X)
score = roc_auc_score(y, ypred)
else:
score = f1_score(y, clf.predict(X))
return score
----------
y_test : numpy.array or list
target results
y_predict : numpy.array or list
predicted results
n_classes : int
number of classes
Examples
--------
>>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes)
"""
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)])
f1 = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)])
f1_macro = f1_score(y_test, y_predict, average='macro')
acc = accuracy_score(y_test, y_predict)
print('confusion matrix: \n',c_mat)
print('f1-score:',f1)
print('f1-score(macro):',f1_macro) # same output with > f1_score(y_true, y_pred, average='macro')
print('accuracy-score:', acc)
return c_mat, f1, acc, f1_macro