Source code for irspack.recommenders.user_knn

from abc import abstractmethod
from typing import Optional, Union

from .._threading import get_n_threads
from ..definitions import InteractionMatrix
from ..optimization.parameter_range import (
    CategoricalRange,
    UniformFloatRange,
    default_tune_range_knn_with_weighting,
)
from ..utils import okapi_BM_25_weight, remove_diagonal, tf_idf_weight
from ._knn import (
    AsymmetricSimilarityComputer,
    CosineSimilarityComputer,
    JaccardSimilarityComputer,
    TverskyIndexComputer,
)
from .base import BaseUserSimilarityRecommender, RecommenderConfig
from .knn import FeatureWeightingScheme


class BaseUserKNNConfig(RecommenderConfig):
    shrinkage: float = 0.0
    top_k: int = 100
    n_threads: Optional[int] = None
    feature_weighting: str = "NONE"
    bm25_k1: float = 1.2
    bm25_b: float = 0.75


class BaseUserKNNRecommender(BaseUserSimilarityRecommender):
    def __init__(
        self,
        X_train_all: InteractionMatrix,
        shrinkage: float = 0.0,
        top_k: int = 100,
        n_threads: Optional[int] = None,
        feature_weighting: str = "NONE",
        bm25_k1: float = 1.2,
        bm25_b: float = 0.75,
    ):
        super().__init__(X_train_all)
        self.shrinkage = shrinkage
        self.top_k = top_k
        self.feature_weighting = FeatureWeightingScheme(feature_weighting)
        self.bm25_k1 = bm25_k1
        self.bm25_b = bm25_b
        self.n_threads = get_n_threads(n_threads)

    @abstractmethod
    def _create_computer(
        self, X: InteractionMatrix
    ) -> Union[
        CosineSimilarityComputer,
        AsymmetricSimilarityComputer,
        JaccardSimilarityComputer,
        TverskyIndexComputer,
    ]:
        raise NotImplementedError("")

    def _learn(self) -> None:
        if self.feature_weighting == FeatureWeightingScheme.NONE:
            X_weighted = self.X_train_all
        elif self.feature_weighting == FeatureWeightingScheme.TF_IDF:
            X_weighted = tf_idf_weight(self.X_train_all)
        elif self.feature_weighting == FeatureWeightingScheme.BM_25:
            X_weighted = okapi_BM_25_weight(self.X_train_all, self.bm25_k1, self.bm25_b)
        else:
            raise RuntimeError("Unknown weighting scheme.")

        computer = self._create_computer(X_weighted)
        self.U_ = remove_diagonal(
            computer.compute_similarity(self.X_train_all, self.top_k)
        )


class CosineUserKNNConfig(BaseUserKNNConfig):
    normalize: bool = True


[docs]class CosineUserKNNRecommender(BaseUserKNNRecommender): r"""K-nearest neighbor recommender system based on cosine similarity. That is, the similarity matrix ``U`` is given by (row-wise top-k restricted) .. math:: \mathrm{U}_{u,v} = \begin{cases} \frac{\sum_{i} X_{ui} X_{vi}}{||X_{u*}||_2 ||X_{v*}||_2 + \mathrm{shrinkage}} & (\text{if normalize = True}) \\ \sum_{i} X_{ui} X_{vi} & (\text{if normalize = False}) \end{cases} Args: X_train_all (Union[scipy.sparse.csr_matrix, scipy.sparse.csc_matrix]): Input interaction matrix. shrinkage (float, optional): The shrinkage parameter for regularization. Defaults to 0.0. normalize (bool, optional): Whether to normalize the similarity. Defaults to False. top_k (int, optional): Specifies the maximal number of allowed neighbors. Defaults to 100. feature_weighting (str, optional): Specifies how to weight the feature. Must be one of: - "NONE" : no feature weighting - "TF_IDF" : TF-IDF weighting - "BM_25" : `Okapi BM-25 weighting <https://en.wikipedia.org/wiki/Okapi_BM25>`_ Defaults to "NONE". bm25_k1 (float, optional): The k1 parameter for BM25. Ignored if ``feature_weighting`` is not "BM_25". Defaults to 1.2. bm25_b (float, optional): The b parameter for BM25. Ignored if ``feature_weighting`` is not "BM_25". Defaults to 0.75. n_threads (Optional[int], optional): Specifies the number of threads to use for the computation. If ``None``, the environment variable ``"IRSPACK_NUM_THREADS_DEFAULT"`` will be looked up, and if the variable is not set, it will be set to ``os.cpu_count()``. Defaults to None. """ config_class = CosineUserKNNConfig default_tune_range = default_tune_range_knn_with_weighting.copy() + [ CategoricalRange("normalize", [False, True]) ]
[docs] def __init__( self, X_train_all: InteractionMatrix, shrinkage: float = 0.0, normalize: bool = True, top_k: int = 100, feature_weighting: str = "NONE", bm25_k1: float = 1.2, bm25_b: float = 0.75, n_threads: Optional[int] = None, ): super().__init__( X_train_all, shrinkage, top_k, n_threads, feature_weighting=feature_weighting, bm25_k1=bm25_k1, bm25_b=bm25_b, ) self.normalize = normalize
def _create_computer(self, X: InteractionMatrix) -> CosineSimilarityComputer: return CosineSimilarityComputer( X, self.shrinkage, self.normalize, self.n_threads )
class AsymmetricCosineUserKNNConfig(BaseUserKNNConfig): alpha: float = 0.5
[docs]class AsymmetricCosineUserKNNRecommender(BaseUserKNNRecommender): r"""K-nearest neighbor recommender system based on asymmetric cosine similarity. That is, the similarity matrix ``U`` is given by (row-wise top-k restricted) .. math:: \mathrm{U}_{u,v} = \frac{\sum_{i} X_{ui} X_{vi}}{||X_{u*}||^{2\alpha}_2 ||X_{v*}||^{2(1-\alpha)}_2 + \mathrm{shrinkage}} Args: X_train_all (Union[scipy.sparse.csr_matrix, scipy.sparse.csc_matrix]): Input interaction matrix. shrinkage (float, optional): The shrinkage parameter for regularization. Defaults to 0.0. alpha (bool, optional): Specifies :math:`\alpha`. Defaults to 0.5. top_k (int, optional): Specifies the maximal number of allowed neighbors. Defaults to 100. feature_weighting (str, optional): Specifies how to weight the feature. Must be one of: - "NONE" : no feature weighting - "TF_IDF" : TF-IDF weighting - "BM_25" : `Okapi BM-25 weighting <https://en.wikipedia.org/wiki/Okapi_BM25>`_ Defaults to "NONE". bm25_k1 (float, optional): The k1 parameter for BM25. Ignored if ``feature_weighting`` is not "BM_25". Defaults to 1.2. bm25_b (float, optional): The b parameter for BM25. Ignored if ``feature_weighting`` is not "BM_25". Defaults to 0.75. n_threads (Optional[int], optional): Specifies the number of threads to use for the computation. If ``None``, the environment variable ``"IRSPACK_NUM_THREADS_DEFAULT"`` will be looked up, and if the variable is not set, it will be set to ``os.cpu_count()``. Defaults to None. """ config_class = AsymmetricCosineUserKNNConfig default_tune_range = default_tune_range_knn_with_weighting + [ UniformFloatRange("alpha", 0, 1) ]
[docs] def __init__( self, X_train_all: InteractionMatrix, shrinkage: float = 0.0, alpha: float = 0.5, top_k: int = 100, feature_weighting: str = "NONE", bm25_k1: float = 1.2, bm25_b: float = 0.75, n_threads: Optional[int] = None, ): super().__init__( X_train_all, shrinkage, top_k, n_threads, feature_weighting=feature_weighting, bm25_k1=bm25_k1, bm25_b=bm25_b, ) self.alpha = alpha
def _create_computer(self, X: InteractionMatrix) -> AsymmetricSimilarityComputer: return AsymmetricSimilarityComputer( X, self.shrinkage, self.alpha, self.n_threads )