How to use the lightfm.datasets.fetch_movielens function in lightfm

To help you get started, we’ve selected a few lightfm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lyst / lightfm / tests / test_cross_validation.py View on Github external
def test_random_train_test_split(test_percentage):

    data = fetch_movielens()["train"]

    train, test = random_train_test_split(data, test_percentage=test_percentage)

    assert test.nnz / float(data.nnz) == test_percentage
    _assert_disjoint(train, test)
github lyst / lightfm / tests / test_movielens.py View on Github external
def test_movielens_genre_accuracy():

    item_features = fetch_movielens(indicator_features=False, genre_features=True)[
        "item_features"
    ]

    assert item_features.shape[1] < item_features.shape[0]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=10)

    train_predictions = model.predict(train.row, train.col, item_features=item_features)
    test_predictions = model.predict(test.row, test.col, item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.75
    assert roc_auc_score(test.data, test_predictions) > 0.69
github jfkirk / tensorrec / test / datasets.py View on Github external
def get_movielens_100k(min_positive_score=4, negative_value=0):
    movielens_100k_dict = datasets.fetch_movielens(indicator_features=True, genre_features=True)

    def flip_ratings(ratings_matrix):
        ratings_matrix.data = np.array([1 if rating >= min_positive_score else negative_value
                                        for rating in ratings_matrix.data])
        return ratings_matrix

    test_interactions = flip_ratings(movielens_100k_dict['test'])
    train_interactions = flip_ratings(movielens_100k_dict['train'])

    # Create indicator features for all users
    num_users = train_interactions.shape[0]
    user_features = sp.identity(num_users)

    # Movie titles
    titles = movielens_100k_dict['item_labels']
github lyst / lightfm / tests / test_movielens.py View on Github external
user_features = sp.identity(no_users, dtype=np.int32).tocsr()
    item_features = sp.identity(no_items, dtype=np.int32).tocsr()

    return (user_features.tocsr(), item_features.tocsr())


def _binarize(dataset):

    positives = dataset.data >= 4.0
    dataset.data[positives] = 1.0
    dataset.data[np.logical_not(positives)] = -1.0

    return dataset


movielens = fetch_movielens()
train, test = _binarize(movielens["train"]), _binarize(movielens["test"])


(train_user_features, train_item_features) = _get_feature_matrices(train)
(test_user_features, test_item_features) = _get_feature_matrices(test)


def test_movielens_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
github lyst / lightfm / tests / test_movielens.py View on Github external
def test_movielens_both_accuracy():
    """
    Accuracy with both genre metadata and item-specific
    features shoul be no worse than with just item-specific
    features (though more training may be necessary).
    """

    item_features = fetch_movielens(indicator_features=True, genre_features=True)[
        "item_features"
    ]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=15)

    train_predictions = model.predict(train.row, train.col, item_features=item_features)
    test_predictions = model.predict(test.row, test.col, item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.75
github lyst / lightfm / tests / test_datasets.py View on Github external
def test_basic_fetching_movielens():

    data = fetch_movielens()

    assert isinstance(data["train"], sp.coo_matrix)
    assert isinstance(data["test"], sp.coo_matrix)

    assert data["train"].shape == data["test"].shape
    assert data["train"].shape == (943, 1682)
    assert (data["train"].getnnz() + data["test"].getnnz()) == 100000

    assert data["item_features"].shape == (1682, 1682)
    assert len(data["item_feature_labels"]) == 1682
    assert data["item_feature_labels"] is data["item_labels"]

    data = fetch_movielens(genre_features=True)

    assert data["item_features"].shape == (1682, len(data["item_feature_labels"]))
    assert data["item_feature_labels"] is not data["item_labels"]

    with pytest.raises(ValueError):
        data = fetch_movielens(indicator_features=False, genre_features=False)
github lyst / lightfm / tests / test_datasets.py View on Github external
def test_basic_fetching_movielens():

    data = fetch_movielens()

    assert isinstance(data["train"], sp.coo_matrix)
    assert isinstance(data["test"], sp.coo_matrix)

    assert data["train"].shape == data["test"].shape
    assert data["train"].shape == (943, 1682)
    assert (data["train"].getnnz() + data["test"].getnnz()) == 100000

    assert data["item_features"].shape == (1682, 1682)
    assert len(data["item_feature_labels"]) == 1682
    assert data["item_feature_labels"] is data["item_labels"]

    data = fetch_movielens(genre_features=True)

    assert data["item_features"].shape == (1682, len(data["item_feature_labels"]))
    assert data["item_feature_labels"] is not data["item_labels"]
github lyst / lightfm / tests / test_datasets.py View on Github external
assert data["train"].shape == data["test"].shape
    assert data["train"].shape == (943, 1682)
    assert (data["train"].getnnz() + data["test"].getnnz()) == 100000

    assert data["item_features"].shape == (1682, 1682)
    assert len(data["item_feature_labels"]) == 1682
    assert data["item_feature_labels"] is data["item_labels"]

    data = fetch_movielens(genre_features=True)

    assert data["item_features"].shape == (1682, len(data["item_feature_labels"]))
    assert data["item_feature_labels"] is not data["item_labels"]

    with pytest.raises(ValueError):
        data = fetch_movielens(indicator_features=False, genre_features=False)
github llSourcell / recommender_system_challenge / demo.py View on Github external
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM

#CHALLENGE part 1 of 3 - write your own fetch and format method for a different recommendation
#dataset. Here a good few https://gist.github.com/entaroadun/1653794 
#And take a look at the fetch_movielens method to see what it's doing 
#

#fetch data and format it
data = fetch_movielens(min_rating=4.0)

#print training and testing data
print(repr(data['train']))
print(repr(data['test']))


#CHALLENGE part 2 of 3 - use 3 different loss functions (so 3 different models), compare results, print results for
#the best one. - Available loss functions are warp, logistic, bpr, and warp-kos.

#create model
model = LightFM(loss='warp')
#train model
model.fit(data['train'], epochs=30, num_threads=2)


#CHALLENGE part 3 of 3 - Modify this function so that it parses your dataset correctly to retrieve