Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Calling .tocsr on a COO matrix with duplicate entries
# changes its data arrays in-place, leading to out-of-bounds
# array accesses in the WARP code.
# Reported in https://github.com/lyst/lightfm/issues/117.
rows, cols = (1000, 100)
mat = sp.random(rows, cols)
mat.data[:] = 1
# Duplicate entries in the COO matrix
mat.data = np.concatenate((mat.data, mat.data[:1000]))
mat.row = np.concatenate((mat.row, mat.row[:1000]))
mat.col = np.concatenate((mat.col, mat.col[:1000]))
for loss in ("warp", "bpr", "warp-kos"):
model = LightFM(loss="warp")
model.fit(mat)
def test_movielens_accuracy_fit():
model = LightFM(random_state=SEED)
model.fit(train, epochs=10)
train_predictions = model.predict(train.row, train.col)
test_predictions = model.predict(test.row, test.col)
assert roc_auc_score(train.data, train_predictions) > 0.84
assert roc_auc_score(test.data, test_predictions) > 0.76
def test_random_train_test_split(test_percentage):
data = fetch_movielens()["train"]
train, test = random_train_test_split(data, test_percentage=test_percentage)
assert test.nnz / float(data.nnz) == test_percentage
_assert_disjoint(train, test)
def test_fitting():
users, items = 10, 100
dataset = Dataset()
dataset.fit(range(users), range(items))
assert dataset.interactions_shape() == (users, items)
assert dataset.user_features_shape() == (users, users)
assert dataset.item_features_shape() == (items, items)
assert dataset.build_interactions([])[0].shape == (users, items)
assert dataset.build_user_features([]).getnnz() == users
assert dataset.build_item_features([]).getnnz() == items
def test_exceptions():
users, items = 10, 100
dataset = Dataset()
dataset.fit(range(users), range(items))
with pytest.raises(ValueError):
dataset.build_interactions([(users + 1, 0)])
with pytest.raises(ValueError):
dataset.build_interactions([(0, items + 1)])
dataset.fit_partial([users + 1], [items + 1])
dataset.build_interactions([(users + 1, 0)])
dataset.build_interactions([(0, items + 1)])
def test_build_features():
users, items = 10, 100
dataset = Dataset(user_identity_features=False, item_identity_features=False)
dataset.fit(
range(users),
range(items),
["user:{}".format(x) for x in range(users)],
["item:{}".format(x) for x in range(items)],
)
# Build from lists
user_features = dataset.build_user_features(
[
(user_id, ["user:{}".format(x) for x in range(users)])
for user_id in range(users)
]
)
assert user_features.getnnz() == users ** 2
def test_fitting_no_identity():
users, items = 10, 100
dataset = Dataset(user_identity_features=False, item_identity_features=False)
dataset.fit(range(users), range(items))
assert dataset.interactions_shape() == (users, items)
assert dataset.user_features_shape() == (users, 0)
assert dataset.item_features_shape() == (items, 0)
assert dataset.build_interactions([])[0].shape == (users, items)
assert dataset.build_user_features([], normalize=False).getnnz() == 0
assert dataset.build_item_features([], normalize=False).getnnz() == 0
def test_param_sanity():
with pytest.raises(AssertionError):
LightFM(no_components=-1)
with pytest.raises(AssertionError):
LightFM(user_alpha=-1.0)
with pytest.raises(AssertionError):
LightFM(item_alpha=-1.0)
with pytest.raises(ValueError):
LightFM(max_sampled=-1.0)
def test_intersections_check():
no_users, no_items = (10, 100)
train, test = _generate_data(no_users, no_items)
model = LightFM(loss="bpr")
model.fit_partial(train)
# check error is raised when train and test have interactions in common
with pytest.raises(ValueError):
evaluation.auc_score(
model, train, train_interactions=train, check_intersections=True
)
with pytest.raises(ValueError):
evaluation.recall_at_k(
model, train, train_interactions=train, check_intersections=True
)
with pytest.raises(ValueError):
evaluation.precision_at_k(
model, train, train_interactions=train, check_intersections=True
def test_warp_precision_high_interaction_values():
model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)
_train = train.copy()
_train.data = _train.data * 5
model.fit_partial(_train, epochs=10)
(train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
model, _train, test
)
assert train_precision > 0.45
assert test_precision > 0.07
assert full_train_auc > 0.93
assert full_test_auc > 0.9