import scipy
import numpy as np
import pandas as pd
from tqdm.auto import trange
import multiprocessing
from cornac.models import Recommender
from cornac.utils import get_rng, fast_dot
from cornac.utils.init_utils import normal, zeros
from cornac.exception import ScoreException
from implicit.als import AlternatingLeastSquares as als
[docs]
class ALS(Recommender):
"""Alternating Least Squares of Matrix Factorization.
Parameters
----------
k: int, optional, default: 10
The dimension of the latent factors.
max_iter: int, optional, default: 100
Maximum number of iterations or the number of epochs for SGD.
lambda_reg: float, optional, default: 0.001
The lambda value used for regularization.
alpha: float, optional, default: 1.0
The rate of confidence increase
num_threads: int, optional, default: 0
Number of parallel threads for training. If num_threads=0, all CPU cores will be utilized.
If seed is not None, num_threads=1 to remove randomness from parallelization.
trainable: boolean, optional, default: True
When False, the model will not be re-trained, and input of pre-trained parameters are required.
verbose: boolean, optional, default: True
When True, running logs are displayed.
init_params: dictionary, optional, default: None
Initial parameters, e.g., init_params = {'U': user_factors, 'V': item_factors}
seed: int, optional, default: None
Random seed for weight initialization.
If specified, training will take longer because of single-thread (no parallelization).
References
----------
[1] Y. Hu, Y. Koren, and C. Volinsky, “Collaborative Filtering for Implicit Feedback Datasets,” in 2008 Eighth IEEE International Conference on Data Mining, Pisa, Italy: IEEE, Dec. 2008, pp. 263-272. doi: 10.1109/ICDM.2008.22.
[2] implicit library: https://pypi.org/project/implicit/
"""
def __init__(
self,
name='ALS',
k=10,
max_iter=20,
# learning_rate=0.01,
lambda_reg=0.02,
alpha = 1.0,
num_threads=0,
trainable=True,
verbose=False,
init_params=None,
seed=None
):
super().__init__(name=name, trainable=trainable, verbose=verbose)
self.k = k
self.max_iter = max_iter
self.alpha = alpha
self.lambda_reg = lambda_reg
self.seed = seed
if seed is not None:
self.num_threads = 1
elif num_threads > 0 and num_threads < multiprocessing.cpu_count():
self.num_threads = num_threads
else:
self.num_threads = multiprocessing.cpu_count()
# Init params if provided
self.init_params = {} if init_params is None else init_params
self.u_factors = self.init_params.get('U', None)
self.i_factors = self.init_params.get('V', None)
self.global_mean = 0.0
self.Cui = None
self.Ciu = None
def _init(self):
rng = get_rng(self.seed)
n_users, n_items = self.train_set.num_users, self.train_set.num_items
if self.u_factors is None:
self.u_factors = normal([n_users, self.k], std=0.01, random_state=rng)
if self.i_factors is None:
self.i_factors = normal([n_items, self.k], std=0.01, random_state=rng)
self.global_mean = 0.0
[docs]
def fit(self, train_set, val_set=None):
"""Fit the model to observations.
Parameters
----------
train_set: :obj:`cornac.data.Dataset`, required
User-Item preference data as well as additional modalities.
val_set: :obj:`cornac.data.Dataset`, optional, default: None
User-Item preference data for model selection purposes (e.g., early stopping).
Returns
-------
self : object
"""
Recommender.fit(self, train_set, val_set)
self._init()
if self.trainable:
a = als(factors=self.k,
regularization=self.lambda_reg,
alpha=self.alpha,
iterations=self.max_iter,
calculate_training_loss=True,
num_threads=self.num_threads,
random_state=self.seed)
a.fit(self.train_set.matrix)
self.u_factors = np.array(a.user_factors, dtype=np.float64)
self.i_factors = np.array(a.item_factors, dtype=np.float64)
return self
[docs]
def score(self, user_idx, item_idx=None):
"""Predict the scores/ratings of a user for an item.
Parameters
----------
user_idx: int, required
The index of the user for whom to perform score prediction.
item_idx: int, optional, default: None
The index of the item for which to perform score prediction.
If None, scores for all known items will be returned.
Returns
-------
res : A scalar or a Numpy array
Relative scores that the user gives to the item or to all known items
"""
unk_user = self.is_unknown_user(user_idx)
if item_idx is None:
known_item_scores = np.zeros(self.train_set.num_items)
if not unk_user:
fast_dot(self.u_factors[user_idx], self.i_factors, known_item_scores)
return known_item_scores
else:
unk_item = self.is_unknown_item(item_idx)
if unk_user or unk_item:
raise ScoreException("Can't make score prediction for (user_id=%d, item_id=%d)" % (user_idx, item_idx))
item_score = np.dot(self.u_factors[user_idx], self.i_factors[item_idx])
return item_score