How to use the snorkel.analysis.metric_score function in snorkel

To help you get started, we’ve selected a few snorkel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_fbeta(self):
        golds = np.array([0, 0, 0, 0, 1])
        preds = np.array([1, 1, 0, 0, 1])
        pre = metric_score(golds, preds, probs=None, metric="precision")
        rec = metric_score(golds, preds, probs=None, metric="recall")
        self.assertAlmostEqual(
            pre,
            metric_score(golds, preds, probs=None, metric="fbeta", beta=1e-6),
            places=2,
        )
        self.assertAlmostEqual(
            rec,
            metric_score(golds, preds, probs=None, metric="fbeta", beta=1e6),
            places=2,
        )
github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_matthews(self):
        golds = np.array([0, 0, 0, 0, 1])
        preds = np.array([1, 0, 0, 0, 0])
        mcc = metric_score(golds, preds, probs=None, metric="matthews_corrcoef")
        self.assertAlmostEqual(mcc, -0.25)

        golds = np.array([0, 0, 0, 0, 1])
        preds = np.array([0, 0, 0, 0, 1])
        mcc = metric_score(golds, preds, probs=None, metric="matthews_corrcoef")
        self.assertAlmostEqual(mcc, 1.0)
github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_f1_multiclass(self):
        golds = np.array([0, 0, 1, 1, 2])
        preds = np.array([1, 1, 0, 1, 2])
        score = metric_score(golds, preds, probs=None, metric="f1_micro")
        self.assertAlmostEqual(score, 0.4)

        score = metric_score(golds, preds, probs=None, metric="f1_macro")
        self.assertAlmostEqual(score, 0.47, 2)
github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_ignores(self):
        golds = np.array([0, 0, 0, 1, 1])
        preds = np.array([0, -1, 0, 1, 0])
        score = metric_score(golds, preds, probs=None, metric="accuracy")
        self.assertAlmostEqual(score, 0.6)
        score = metric_score(
            golds, preds, probs=None, metric="accuracy", filter_dict={"preds": [-1]}
        )
        self.assertAlmostEqual(score, 0.75)
        score = metric_score(
            golds, preds, probs=None, metric="accuracy", filter_dict={"golds": [0]}
        )
        self.assertAlmostEqual(score, 0.5)
        score = metric_score(
            golds,
            preds,
            probs=None,
            metric="accuracy",
            filter_dict={"golds": [1], "preds": [-1]},
        )
        self.assertAlmostEqual(score, 1.0)
github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_accuracy_basic(self):
        golds = np.array([0, 0, 0, 1, 1])
        preds = np.array([0, 0, 0, 1, 0])
        score = metric_score(golds, preds, probs=None, metric="accuracy")
        self.assertAlmostEqual(score, 0.8)
github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_coverage(self):
        golds = np.array([0, 0, 0, 0, 1])
        preds = np.array([-1, -1, 0, 0, 0])
        score = metric_score(golds, preds, probs=None, metric="coverage")
        self.assertAlmostEqual(score, 0.6)
        score = metric_score(
            golds, preds, probs=None, filter_dict={"golds": [1]}, metric="coverage"
        )
        self.assertAlmostEqual(score, 0.5)
github snorkel-team / snorkel / test / analysis / test_metrics.py View on Github external
def test_coverage(self):
        golds = np.array([0, 0, 0, 0, 1])
        preds = np.array([-1, -1, 0, 0, 0])
        score = metric_score(golds, preds, probs=None, metric="coverage")
        self.assertAlmostEqual(score, 0.6)
        score = metric_score(
            golds, preds, probs=None, filter_dict={"golds": [1]}, metric="coverage"
        )
        self.assertAlmostEqual(score, 0.5)
github snorkel-team / snorkel-tutorials / spouse / spouse_demo.py View on Github external
model = get_model()
batch_size = 64
model.fit(X_train, probs_train_filtered, batch_size=batch_size, epochs=get_n_epochs())

# %% [markdown]
# Finally, we evaluate the trained model by measuring its F1 score and ROC_AUC.

# %%
X_test = get_feature_arrays(df_test)
probs_test = model.predict(X_test)
preds_test = probs_to_preds(probs_test)
print(
    f"Test F1 when trained with soft labels: {metric_score(Y_test, preds=preds_test, metric='f1')}"
)
print(
    f"Test ROC-AUC when trained with soft labels: {metric_score(Y_test, probs=probs_test, metric='roc_auc')}"
)
github snorkel-team / snorkel-tutorials / spouse / spouse_demo.py View on Github external
from utils import get_n_epochs

X_train = get_feature_arrays(df_train_filtered)
model = get_model()
batch_size = 64
model.fit(X_train, probs_train_filtered, batch_size=batch_size, epochs=get_n_epochs())

# %% [markdown]
# Finally, we evaluate the trained model by measuring its F1 score and ROC_AUC.

# %%
X_test = get_feature_arrays(df_test)
probs_test = model.predict(X_test)
preds_test = probs_to_preds(probs_test)
print(
    f"Test F1 when trained with soft labels: {metric_score(Y_test, preds=preds_test, metric='f1')}"
)
print(
    f"Test ROC-AUC when trained with soft labels: {metric_score(Y_test, probs=probs_test, metric='roc_auc')}"
)
github snorkel-team / snorkel-tutorials / crowdsourcing / crowdsourcing_tutorial.py View on Github external
# %% {"tags": ["md-exclude-output"]}
from snorkel.labeling.model import LabelModel

# Train LabelModel.
label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train, n_epochs=100, seed=123, log_freq=20, l2=0.1, lr=0.01)

# %% [markdown]
# As a spot-check for the quality of our LabelModel, we'll score it on the dev set.

# %%
from snorkel.analysis import metric_score

preds_dev = label_model.predict(L_dev)

acc = metric_score(Y_dev, preds_dev, probs=None, metric="accuracy")
print(f"LabelModel Accuracy: {acc:.3f}")

# %% [markdown]
# We see that we get very high accuracy on the development set.
# This is due to the abundance of high quality crowdworker labels.
# **Since we don't have these high quality crowdsourcing labels for the
# test set or new incoming data points, we can't use the LabelModel reliably
# at inference time.**
# In order to run inference on new incoming data points, we need to train a
# discriminative model over the tweets themselves.
# Let's generate a set of labels for that training set.

# %%
preds_train = label_model.predict(L_train)

# %% [markdown]