Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
LF_voltage_row_temp,
LF_voltage_row_part,
LF_typ_row,
LF_complement_left_row,
LF_too_many_numbers_row,
LF_temp_on_high_page_num,
LF_temp_outside_table,
LF_not_temp_relevant,
]
labeler.update(split=0, lfs=[stg_temp_lfs_2, ce_v_max_lfs], parallelism=PARALLEL)
assert session.query(Label).count() == 6478
assert session.query(LabelKey).count() == 16
L_train = labeler.get_label_matrices(train_cands)
assert L_train[0].shape == (3493, 16)
gen_model = LabelModel()
gen_model.fit(L_train=L_train[0], n_epochs=500, log_freq=100)
train_marginals = gen_model.predict_proba(L_train[0])
diffs = train_marginals.max(axis=1) - train_marginals.min(axis=1)
train_idxs = np.where(diffs > 1e-6)[0]
train_dataloader = EmmentalDataLoader(
task_to_label_dict={ATTRIBUTE: "labels"},
dataset=FonduerDataset(
ATTRIBUTE,
train_cands[0],
F_train[0],
emb_layer.word2id,
train_marginals,
train_idxs,
def test_model_loss(self):
L = np.array([[0, -1, 0], [0, 1, 0]])
label_model = LabelModel(cardinality=2, verbose=False)
label_model.fit(L, n_epochs=1)
init_loss = label_model._loss_mu().item()
label_model.fit(L, n_epochs=10)
next_loss = label_model._loss_mu().item()
self.assertLessEqual(next_loss, init_loss)
with self.assertRaisesRegex(Exception, "Loss is NaN."):
label_model.fit(L, n_epochs=10, lr=1e8)
def test_score(self):
L = np.array([[1, 1, 0], [-1, -1, -1], [1, 0, 1]])
Y = np.array([1, 0, 1])
label_model = LabelModel(cardinality=2, verbose=False)
label_model.fit(L, n_epochs=100)
results = label_model.score(L, Y, metrics=["accuracy", "coverage"])
np.testing.assert_array_almost_equal(
label_model.predict(L), np.array([1, -1, 1])
)
results_expected = dict(accuracy=1.0, coverage=2 / 3)
self.assertEqual(results, results_expected)
L = np.array([[1, 0, 1], [1, 0, 1]])
label_model = self._set_up_model(L)
label_model.mu = nn.Parameter(label_model.mu_init.clone().clamp(0.01, 0.99))
results = label_model.score(L, Y=np.array([0, 1]))
results_expected = dict(accuracy=0.5)
self.assertEqual(results, results_expected)
def test_scheduler_init(self):
L = np.array([[0, -1, 0], [0, 1, 0]])
label_model = LabelModel()
label_model.fit(L, lr_scheduler="constant", n_epochs=1)
self.assertIsNone(label_model.lr_scheduler)
label_model.fit(L, lr_scheduler="linear", n_epochs=1)
self.assertIsInstance(label_model.lr_scheduler, optim.lr_scheduler.LambdaLR)
label_model.fit(L, lr_scheduler="exponential", n_epochs=1)
self.assertIsInstance(
label_model.lr_scheduler, optim.lr_scheduler.ExponentialLR
)
label_model.fit(L, lr_scheduler="step", n_epochs=1)
self.assertIsInstance(label_model.lr_scheduler, optim.lr_scheduler.StepLR)
def test_label_model_sparse(self) -> None:
"""Test the LabelModel's estimate of P and Y on a sparse synthetic dataset.
This tests the common setting where LFs abstain most of the time, which can
cause issues for example if parameter clamping set too high (e.g. see Issue
#1422).
"""
np.random.seed(123)
P, Y, L = generate_simple_label_matrix(
self.n, self.m, self.cardinality, abstain_multiplier=1000.0
)
# Train LabelModel
label_model = LabelModel(cardinality=self.cardinality, verbose=False)
label_model.fit(L, n_epochs=1000, lr=0.01, seed=123)
# Test estimated LF conditional probabilities
P_lm = label_model.get_conditional_probs()
np.testing.assert_array_almost_equal(P, P_lm, decimal=2)
# Test predicted labels *only on non-abstained data points*
Y_pred = label_model.predict(L, tie_break_policy="abstain")
(idx,) = np.where(Y_pred != -1)
acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx)
self.assertGreaterEqual(acc, 0.65)
# Make sure that we don't output abstain when an LF votes, per issue #1422
self.assertEqual(len(idx), np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())
def test_optimizer(self):
L = np.array([[0, -1, 0], [0, 1, 0]])
label_model = LabelModel(cardinality=2, verbose=False)
label_model.fit(L, n_epochs=1, optimizer="sgd")
label_model.fit(L, n_epochs=1, optimizer="adam")
label_model.fit(L, n_epochs=1, optimizer="adamax")
with self.assertRaisesRegex(ValueError, "Unrecognized optimizer option"):
label_model.fit(L, n_epochs=1, optimizer="bad_opt")
def test_lr_scheduler(self):
L = np.array([[0, -1, 0], [0, 1, 0]])
label_model = LabelModel(cardinality=2, verbose=False)
label_model.fit(L, n_epochs=1)
label_model.fit(L, n_epochs=1, lr_scheduler="constant")
label_model.fit(L, n_epochs=1, lr_scheduler="linear")
label_model.fit(L, n_epochs=1, lr_scheduler="exponential")
label_model.fit(L, n_epochs=1, lr_scheduler="step")
with self.assertRaisesRegex(ValueError, "Unrecognized lr scheduler option"):
label_model.fit(L, n_epochs=1, lr_scheduler="bad_scheduler")
def test_save_and_load(self):
L = np.array([[0, -1, 0], [0, 1, 1]])
label_model = LabelModel(cardinality=2, verbose=False)
label_model.fit(L, n_epochs=1)
original_preds = label_model.predict(L)
dir_path = tempfile.mkdtemp()
save_path = dir_path + "label_model.pkl"
label_model.save(save_path)
label_model_new = LabelModel(cardinality=2, verbose=False)
label_model_new.load(save_path)
loaded_preds = label_model_new.predict(L)
shutil.rmtree(dir_path)
np.testing.assert_array_equal(loaded_preds, original_preds)
def test_optimizer_init(self):
L = np.array([[0, -1, 0], [0, 1, 0]])
label_model = LabelModel()
label_model.fit(L, optimizer="sgd", n_epochs=1)
self.assertIsInstance(label_model.optimizer, optim.SGD)
label_model.fit(L, optimizer="adam", n_epochs=1)
self.assertIsInstance(label_model.optimizer, optim.Adam)
label_model.fit(L, optimizer="adamax", n_epochs=1)
self.assertIsInstance(label_model.optimizer, optim.Adamax)
with self.assertRaisesRegex(ValueError, "Unrecognized optimizer"):
label_model.fit(L, optimizer="bad_optimizer", n_epochs=1)
[0.20, 0.75, 0.3],
[0.05, 0.10, 0.6],
# LF 1
[0.25, 0.55, 0.3],
[0.15, 0.45, 0.4],
[0.20, 0.00, 0.3],
# LF 2
[0.5, 0.15, 0.2],
[0.3, 0.65, 0.2],
[0.2, 0.20, 0.6],
]
)
mu = mu[:, [1, 2, 0]]
# First test: Two "good" LFs
label_model = LabelModel(cardinality=3, verbose=False)
label_model._set_class_balance(None, None)
label_model.m = 3
label_model.mu = nn.Parameter(torch.from_numpy(mu))
label_model._break_col_permutation_symmetry()
self.assertEqual(label_model.mu.data[0, 0], 0.75)
self.assertEqual(label_model.mu.data[1, 1], 0.75)
# Test with non-uniform class balance
# It should not consider the "correct" permutation as it does not commute
label_model = LabelModel(cardinality=3, verbose=False)
label_model._set_class_balance([0.7, 0.2, 0.1], None)
label_model.m = 3
label_model.mu = nn.Parameter(torch.from_numpy(mu))
label_model._break_col_permutation_symmetry()
self.assertEqual(label_model.mu.data[0, 0], 0.15)
self.assertEqual(label_model.mu.data[1, 1], 0.3)