How to use the implicit.nearest_neighbours.bm25_weight function in implicit

To help you get started, we’ve selected a few implicit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github erikbern / ann-benchmarks / ann_benchmarks / datasets.py View on Github external
# http://www.benfrederickson.com/approximate-nearest-neighbours-for-recommender-systems/  # noqa

    # The dataset is from "Last.fm Dataset - 360K users":
    # http://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-360K.html  # noqa

    # This requires the implicit package to generate the factors
    # (on my desktop/gpu this only takes 4-5 seconds to train - but
    # could take 1-2 minutes on a laptop)
    from implicit.datasets.lastfm import get_lastfm
    from implicit.approximate_als import augment_inner_product_matrix
    import implicit

    # train an als model on the lastfm data
    _, _, play_counts = get_lastfm()
    model = implicit.als.AlternatingLeastSquares(factors=n_dimensions)
    model.fit(implicit.nearest_neighbours.bm25_weight(
        play_counts, K1=100, B=0.8))

    # transform item factors so that each one has the same norm,
    # and transform the user factors such by appending a 0 column
    _, item_factors = augment_inner_product_matrix(model.item_factors)
    user_factors = numpy.append(model.user_factors,
                                numpy.zeros((model.user_factors.shape[0], 1)),
                                axis=1)

    # only query the first 50k users (speeds things up signficantly
    # without changing results)
    user_factors = user_factors[:test_size]

    # after that transformation a cosine lookup will return the same results
    # as the inner product on the untransformed data
    write_output(item_factors, user_factors, out_fn, 'angular')
github benfred / implicit / benchmarks / benchmark_qmf.py View on Github external
def run_benchmark(args):
    plays = bm25_weight(scipy.io.mmread(args.inputfile))

    qmf_time = benchmark_qmf(args.qmfpath, plays, args.factors, args.regularization,
                             args.iterations)

    implicit_time = benchmark_implicit(plays, args.factors, args.regularization, args.iterations)

    print("QMF finished in", qmf_time)
    print("Implicit finished in", implicit_time)
    print("Implicit is %s times faster" % (qmf_time / implicit_time))
github benfred / implicit / examples / lastfm.py View on Github external
def calculate_similar_artists(output_filename, model_name="als"):
    """ generates a list of similar artists in lastfm by utiliizing the 'similar_items'
    api of the models """
    artists, users, plays = get_lastfm()

    # create a model from the input data
    model = get_model(model_name)

    # if we're training an ALS based model, weight input for last.fm
    # by bm25
    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        logging.debug("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_recommend = False

    # this is actually disturbingly expensive:
    plays = plays.tocsr()

    logging.debug("training model %s", model_name)
    start = time.time()
    model.fit(plays)
    logging.debug("trained model '%s' in %0.2fs", model_name, time.time() - start)

    # write out similar artists by popularity
    start = time.time()
    logging.debug("calculating top artists")
github benfred / implicit / examples / lastfm.py View on Github external
def calculate_recommendations(output_filename, model_name="als"):
    """ Generates artist recommendations for each user in the dataset """
    # train the model based off input params
    artists, users, plays = get_lastfm()

    # create a model from the input data
    model = get_model(model_name)

    # if we're training an ALS based model, weight input for last.fm
    # by bm25
    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        logging.debug("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_similar_items = False

    # this is actually disturbingly expensive:
    plays = plays.tocsr()

    logging.debug("training model %s", model_name)
    start = time.time()
    model.fit(plays)
    logging.debug("trained model '%s' in %0.2fs", model_name, time.time() - start)

    # generate recommendations for each user and write out to a file
    start = time.time()
    user_plays = plays.T.tocsr()
    with tqdm.tqdm(total=len(users)) as progress:
github benfred / implicit / benchmarks / benchmark_als.py View on Github external
parser.add_argument('--input', type=str, required=True,
                        dest='inputfile', help='dataset file in matrix market format')
    parser.add_argument('--graph', help='generates graphs',
                        action="store_true")
    parser.add_argument('--loss', help='test training loss',
                        action="store_true")
    parser.add_argument('--speed', help='test training speed',
                        action="store_true")

    args = parser.parse_args()
    if not (args.speed or args.loss):
        print("must specify at least one of --speed or --loss")
        parser.print_help()

    else:
        plays = bm25_weight(scipy.io.mmread(args.inputfile)).tocsr()
        logging.basicConfig(level=logging.DEBUG)

        if args.loss:
            acc = benchmark_accuracy(plays)
            json.dump(acc, open("als_accuracy.json", "w"))
            if args.graph:
                generate_loss_graph(acc, "als_accuracy.png")

        if args.speed:
            speed = benchmark_times(plays)
            json.dump(speed, open("als_speed.json", "w"))
            if args.graph:
                generate_speed_graph(speed, "als_speed.png")
github DomainGroupOSS / ml-recsys-tools / ml_recsys_tools / recommenders / implib_recommenders.py View on Github external
def _set_implib_train_mat(self, train_mat):
        # implib ALS expects matrix in items x users format
        self.implib_train_mat = train_mat.T
        if self.fit_params['use_bm25']:
            self.implib_train_mat = bm25_weight(
                self.implib_train_mat,
                K1=self.fit_params['bm25_k1'],
                B=self.fit_params['bm25_b'])
        self.model.regularization = \
            self.fit_params['regularization'] * self.implib_train_mat.nnz
github benfred / implicit / examples / movielens.py View on Github external
# remove things < min_rating, and convert to implicit dataset
    # by considering ratings as a binary preference only
    ratings.data[ratings.data < min_rating] = 0
    ratings.eliminate_zeros()
    ratings.data = np.ones(len(ratings.data))

    log.info("read data file in %s", time.time() - start)

    # generate a recommender model based off the input params
    if model_name == "als":
        model = AlternatingLeastSquares()

        # lets weight these models by bm25weight.
        log.debug("weighting matrix by bm25_weight")
        ratings = (bm25_weight(ratings, B=0.9) * 5).tocsr()

    elif model_name == "bpr":
        model = BayesianPersonalizedRanking()

    elif model_name == "lmf":
        model = LogisticMatrixFactorization()

    elif model_name == "tfidf":
        model = TFIDFRecommender()

    elif model_name == "cosine":
        model = CosineRecommender()

    elif model_name == "bm25":
        model = BM25Recommender(B=0.2)