Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def benchmark_implicit(ratings, factors, iterations=5, use_gpu=False):
ratings = ratings.tocsr()
times = {}
for rank in factors:
model = implicit.als.AlternatingLeastSquares(factors=rank,
iterations=iterations,
use_gpu=use_gpu)
start = time.time()
model.fit(ratings)
elapsed = time.time() - start
# take average time over iterations to be consistent with spark timings
times[rank] = elapsed / iterations
print("implicit. factors=%i took %.3f" % (rank, elapsed/iterations))
return times
def build(self):
# define iALS model instance
self._model = implicit.als.AlternatingLeastSquares(factors=self.rank,
regularization=self.regularization,
iterations=self.num_epochs,
num_threads=self.num_threads)
# prepare input matrix for learning the model
matrix = self.get_training_matrix() # user_by_item sparse matrix
matrix.data = self.confidence(matrix.data, alpha=self.alpha,
weight=self.weight_func, epsilon=self.epsilon)
with track_time(self.training_time, verbose=self.verbose, model=self.method):
# build the model
# implicit takes item_by_user matrix as input, need to transpose
self._model.fit(matrix.T)
# This hopefully replicates the experiments done in this post:
# http://www.benfrederickson.com/approximate-nearest-neighbours-for-recommender-systems/ # noqa
# The dataset is from "Last.fm Dataset - 360K users":
# http://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-360K.html # noqa
# This requires the implicit package to generate the factors
# (on my desktop/gpu this only takes 4-5 seconds to train - but
# could take 1-2 minutes on a laptop)
from implicit.datasets.lastfm import get_lastfm
from implicit.approximate_als import augment_inner_product_matrix
import implicit
# train an als model on the lastfm data
_, _, play_counts = get_lastfm()
model = implicit.als.AlternatingLeastSquares(factors=n_dimensions)
model.fit(implicit.nearest_neighbours.bm25_weight(
play_counts, K1=100, B=0.8))
# transform item factors so that each one has the same norm,
# and transform the user factors such by appending a 0 column
_, item_factors = augment_inner_product_matrix(model.item_factors)
user_factors = numpy.append(model.user_factors,
numpy.zeros((model.user_factors.shape[0], 1)),
axis=1)
# only query the first 50k users (speeds things up signficantly
# without changing results)
user_factors = user_factors[:test_size]
# after that transformation a cosine lookup will return the same results
# as the inner product on the untransformed data
# learn a model
data_idx = np.nonzero(clus_model.labels_ == i)[0]
X = train_X[data_idx, :]
Y = train_Y[data_idx, :]
print('embedding learning: building kNN graph')
# build the kNN graph
graph = kneighbors_graph(Y, params.SVP_neigh, mode='distance', metric='cosine',
include_self=True,
n_jobs=-1)
graph.data = 1 - graph.data # convert to similarity
print('embedding learning: ALS')
# learn the local embedding
als_model = implicit.als.AlternatingLeastSquares(factors=params.out_Dim,
regularization=params.embedding_lambda)
als_model.fit(graph)
# the embedding
# shape: #instances x embedding dim
Z = als_model.item_factors
print('linear regressor training')
# learn the linear regressor
if True:
# regressor = Ridge(fit_intercept=True, alpha=params.regressor_lambda2)
regressor = ElasticNet(alpha=0.1, l1_ratio=0.001)
regressor.fit(X, Z)
# shape: embedding dim x feature dim
V = regressor.coef_
else:
from . import als, approximate_als, bpr, lmf, nearest_neighbours
from .als import alternating_least_squares
__version__ = '0.4.0'
__all__ = [alternating_least_squares, als,
approximate_als, bpr, nearest_neighbours, lmf, __version__]