Source code for cornac.models.als.recom_als

import scipy
import numpy as np
import pandas as pd
from tqdm.auto import trange

import multiprocessing

from cornac.models import Recommender
from cornac.utils import get_rng, fast_dot
from cornac.utils.init_utils import normal, zeros
from cornac.exception import ScoreException

from implicit.als import AlternatingLeastSquares as als


[docs]
class ALS(Recommender):
    """Alternating Least Squares of Matrix Factorization.

    Parameters
    ----------
    k: int, optional, default: 10
        The dimension of the latent factors.

    max_iter: int, optional, default: 100
        Maximum number of iterations or the number of epochs for SGD.

    lambda_reg: float, optional, default: 0.001
        The lambda value used for regularization.

    alpha: float, optional, default: 1.0
        The rate of confidence increase

    num_threads: int, optional, default: 0
        Number of parallel threads for training. If num_threads=0, all CPU cores will be utilized.
        If seed is not None, num_threads=1 to remove randomness from parallelization.

    trainable: boolean, optional, default: True
        When False, the model will not be re-trained, and input of pre-trained parameters are required.

    verbose: boolean, optional, default: True
        When True, running logs are displayed.

    init_params: dictionary, optional, default: None
        Initial parameters, e.g., init_params = {'U': user_factors, 'V': item_factors}

    seed: int, optional, default: None
        Random seed for weight initialization.
        If specified, training will take longer because of single-thread (no parallelization).

    References
    ----------
    [1] Y. Hu, Y. Koren, and C. Volinsky, “Collaborative Filtering for Implicit Feedback Datasets,” in 2008 Eighth IEEE International Conference on Data Mining, Pisa, Italy: IEEE, Dec. 2008, pp. 263-272. doi: 10.1109/ICDM.2008.22.
    
    [2] implicit library: https://pypi.org/project/implicit/
    """
    
    

    def __init__(
        self, 
        name='ALS', 
        k=10, 
        max_iter=20, 
        # learning_rate=0.01, 
        lambda_reg=0.02, 
        alpha = 1.0,
        num_threads=0, 
        trainable=True, 
        verbose=False, 
        init_params=None, 
        seed=None
    ):
        super().__init__(name=name, trainable=trainable, verbose=verbose)
        self.k = k
        self.max_iter = max_iter
        self.alpha = alpha
        self.lambda_reg = lambda_reg
        self.seed = seed

        if seed is not None:
            self.num_threads = 1
        elif num_threads > 0 and num_threads < multiprocessing.cpu_count():
            self.num_threads = num_threads
        else:
            self.num_threads = multiprocessing.cpu_count()

        # Init params if provided
        self.init_params = {} if init_params is None else init_params
        self.u_factors = self.init_params.get('U', None)
        self.i_factors = self.init_params.get('V', None)
        self.global_mean = 0.0

        self.Cui = None
        self.Ciu = None

    def _init(self):
        rng = get_rng(self.seed)
        n_users, n_items = self.train_set.num_users, self.train_set.num_items

        if self.u_factors is None:
            self.u_factors = normal([n_users, self.k], std=0.01, random_state=rng) 
        if self.i_factors is None:
            self.i_factors = normal([n_items, self.k], std=0.01, random_state=rng)

        self.global_mean = 0.0



[docs]
    def fit(self, train_set, val_set=None):
        """Fit the model to observations.

        Parameters
        ----------
        train_set: :obj:`cornac.data.Dataset`, required
            User-Item preference data as well as additional modalities.

        val_set: :obj:`cornac.data.Dataset`, optional, default: None
            User-Item preference data for model selection purposes (e.g., early stopping).

        Returns
        -------
        self : object
        """
        Recommender.fit(self, train_set, val_set)

        self._init()

        if self.trainable:
            a = als(factors=self.k, 
            regularization=self.lambda_reg,
            alpha=self.alpha,
            iterations=self.max_iter,
            calculate_training_loss=True,
            num_threads=self.num_threads,
            random_state=self.seed)
            a.fit(self.train_set.matrix)
            self.u_factors = np.array(a.user_factors, dtype=np.float64)
            self.i_factors = np.array(a.item_factors, dtype=np.float64)

        return self




[docs]
    def score(self, user_idx, item_idx=None):
        """Predict the scores/ratings of a user for an item.

        Parameters
        ----------
        user_idx: int, required
            The index of the user for whom to perform score prediction.

        item_idx: int, optional, default: None
            The index of the item for which to perform score prediction.
            If None, scores for all known items will be returned.

        Returns
        -------
        res : A scalar or a Numpy array
            Relative scores that the user gives to the item or to all known items

        """
        unk_user = self.is_unknown_user(user_idx)

        if item_idx is None:
            known_item_scores = np.zeros(self.train_set.num_items)
            if not unk_user:
                fast_dot(self.u_factors[user_idx], self.i_factors, known_item_scores)
            return known_item_scores
        else:
            unk_item = self.is_unknown_item(item_idx)
            if unk_user or unk_item:
                raise ScoreException("Can't make score prediction for (user_id=%d, item_id=%d)" % (user_idx, item_idx))
            item_score = np.dot(self.u_factors[user_idx], self.i_factors[item_idx])
            return item_score