How to use the implicit.datasets._download function in implicit

To help you get started, we’ve selected a few implicit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github benfred / implicit / implicit / datasets / million_song_dataset.py View on Github external
def get_msd_taste_profile():
    """ Returns the taste profile subset from the million song dataset:
    https://labrosa.ee.columbia.edu/millionsong/tasteprofile

    Data returned is a tuple of (trackinfo, user, plays) where
    plays is a CSR matrix of with the rows being the track, columns being
    the user and the values being the number of plays.

    Trackinfo is a an array of tuples (trackid, artist, album, song name),
    with the position corresponding to the rowid of the plays matrix. Likewise
    users is an array of the user identifiers.
    """

    filename = os.path.join(_download.LOCAL_CACHE_DIR, "msd_taste_profile.hdf5")
    if not os.path.isfile(filename):
        log.info("Downloading dataset to '%s'", filename)
        _download.download_file(URL, filename)
    else:
        log.info("Using cached dataset at '%s'", filename)

    with h5py.File(filename, 'r') as f:
        m = f.get('track_user_plays')
        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
        return np.array(f['track']), np.array(f['user']), plays
github benfred / implicit / implicit / datasets / movielens.py View on Github external
---------
    variant : string
        Which version of the movielens dataset to download. Should be one of '20m', '10m',
        '1m' or '100k'.

    Returns
    -------
    movies : ndarray
        An array of the movie titles.
    ratings : csr_matrix
        A sparse matrix where the row is the movieId, the column is the userId and the value is
        the rating.
    """
    filename = "movielens_%s.hdf5" % variant

    path = os.path.join(_download.LOCAL_CACHE_DIR, filename)
    if not os.path.isfile(path):
        log.info("Downloading dataset to '%s'", path)
        _download.download_file(URL_BASE + filename, path)
    else:
        log.info("Using cached dataset at '%s'", path)

    with h5py.File(path, 'r') as f:
        m = f.get('movie_user_ratings')
        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
        return np.array(f['movie']), plays
github benfred / implicit / implicit / datasets / lastfm.py View on Github external
def get_lastfm():
    """ Returns the lastfm360k dataset, downloading locally if necessary.
    Returns a tuple of (artistids, userids, plays) where plays is a CSR matrix """

    filename = os.path.join(_download.LOCAL_CACHE_DIR, "lastfm_360k.hdf5")
    if not os.path.isfile(filename):
        log.info("Downloading dataset to '%s'", filename)
        _download.download_file(URL, filename)
    else:
        log.info("Using cached dataset at '%s'", filename)

    with h5py.File(filename, 'r') as f:
        m = f.get('artist_user_plays')
        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
        return np.array(f['artist']), np.array(f['user']), plays