# coding=utf-8
"""
Module containing evaluation functions suitable for judging the performance of
a fitted LightFM model.
"""
import numpy as np
from ._lightfm_fast import CSRMatrix, calculate_auc_from_rank
__all__ = ["precision_at_k", "recall_at_k", "auc_score", "reciprocal_rank"]
[docs]def precision_at_k(
model,
test_interactions,
train_interactions=None,
k=10,
user_features=None,
item_features=None,
preserve_rows=False,
num_threads=1,
check_intersections=True,
):
"""
Measure the precision at k metric for a model: the fraction of known
positives in the first k positions of the ranked list of results.
A perfect score is 1.0.
Parameters
----------
model: LightFM instance
the fitted model to be evaluated
test_interactions: np.float32 csr_matrix of shape [n_users, n_items]
Non-zero entries representing known positives in the evaluation set.
train_interactions: np.float32 csr_matrix of shape [n_users, n_items], optional
Non-zero entries representing known positives in the train set. These
will be omitted from the score calculations to avoid re-recommending
known positives.
k: integer, optional
The k parameter.
user_features: np.float32 csr_matrix of shape [n_users, n_user_features], optional
Each row contains that user's weights over features.
item_features: np.float32 csr_matrix of shape [n_items, n_item_features], optional
Each row contains that item's weights over features.
preserve_rows: boolean, optional
When False (default), the number of rows in the output will be equal
to the number of users with interactions in the evaluation set.
When True, the number of rows in the output will be equal to the
number of users.
num_threads: int, optional
Number of parallel computation threads to use. Should
not be higher than the number of physical cores.
check_intersections: bool, optional, True by default,
Only relevant when train_interactions are supplied.
A flag that signals whether the test and train matrices should be checked
for intersections to prevent optimistic ranks / wrong evaluation / bad data split.
Returns
-------
np.array of shape [n_users with interactions or n_users,]
Numpy array containing precision@k scores for each user. If there are
no interactions for a given user the returned precision will be 0.
"""
if num_threads < 1:
raise ValueError("Number of threads must be 1 or larger.")
ranks = model.predict_rank(
test_interactions,
train_interactions=train_interactions,
user_features=user_features,
item_features=item_features,
num_threads=num_threads,
check_intersections=check_intersections,
)
ranks.data = np.less(ranks.data, k, ranks.data)
precision = np.squeeze(np.array(ranks.sum(axis=1))) / k
if not preserve_rows:
precision = precision[test_interactions.getnnz(axis=1) > 0]
return precision
[docs]def recall_at_k(
model,
test_interactions,
train_interactions=None,
k=10,
user_features=None,
item_features=None,
preserve_rows=False,
num_threads=1,
check_intersections=True,
):
"""
Measure the recall at k metric for a model: the number of positive items in
the first k positions of the ranked list of results divided by the number
of positive items in the test period. A perfect score is 1.0.
Parameters
----------
model: LightFM instance
the fitted model to be evaluated
test_interactions: np.float32 csr_matrix of shape [n_users, n_items]
Non-zero entries representing known positives in the evaluation set.
train_interactions: np.float32 csr_matrix of shape [n_users, n_items], optional
Non-zero entries representing known positives in the train set. These
will be omitted from the score calculations to avoid re-recommending
known positives.
k: integer, optional
The k parameter.
user_features: np.float32 csr_matrix of shape [n_users, n_user_features], optional
Each row contains that user's weights over features.
item_features: np.float32 csr_matrix of shape [n_items, n_item_features], optional
Each row contains that item's weights over features.
preserve_rows: boolean, optional
When False (default), the number of rows in the output will be equal
to the number of users with interactions in the evaluation set.
When True, the number of rows in the output will be equal to the
number of users.
num_threads: int, optional
Number of parallel computation threads to use. Should
not be higher than the number of physical cores.
check_intersections: bool, optional, True by default,
Only relevant when train_interactions are supplied.
A flag that signals whether the test and train matrices should be checked
for intersections to prevent optimistic ranks / wrong evaluation / bad data split.
Returns
-------
np.array of shape [n_users with interactions or n_users,]
Numpy array containing recall@k scores for each user. If there are no
interactions for a given user having items in the test period, the
returned recall will be 0.
"""
if num_threads < 1:
raise ValueError("Number of threads must be 1 or larger.")
ranks = model.predict_rank(
test_interactions,
train_interactions=train_interactions,
user_features=user_features,
item_features=item_features,
num_threads=num_threads,
check_intersections=check_intersections,
)
ranks.data = np.less(ranks.data, k, ranks.data)
retrieved = np.squeeze(test_interactions.getnnz(axis=1))
hit = np.squeeze(np.array(ranks.sum(axis=1)))
if not preserve_rows:
hit = hit[test_interactions.getnnz(axis=1) > 0]
retrieved = retrieved[test_interactions.getnnz(axis=1) > 0]
return hit / retrieved
[docs]def auc_score(
model,
test_interactions,
train_interactions=None,
user_features=None,
item_features=None,
preserve_rows=False,
num_threads=1,
check_intersections=True,
):
"""
Measure the ROC AUC metric for a model: the probability that a randomly
chosen positive example has a higher score than a randomly chosen negative
example.
A perfect score is 1.0.
Parameters
----------
model: LightFM instance
the fitted model to be evaluated
test_interactions: np.float32 csr_matrix of shape [n_users, n_items]
Non-zero entries representing known positives in the evaluation set.
train_interactions: np.float32 csr_matrix of shape [n_users, n_items], optional
Non-zero entries representing known positives in the train set. These
will be omitted from the score calculations to avoid re-recommending
known positives.
user_features: np.float32 csr_matrix of shape [n_users, n_user_features], optional
Each row contains that user's weights over features.
item_features: np.float32 csr_matrix of shape [n_items, n_item_features], optional
Each row contains that item's weights over features.
preserve_rows: boolean, optional
When False (default), the number of rows in the output will be equal
to the number of users with interactions in the evaluation set.
When True, the number of rows in the output will be equal to the
number of users.
num_threads: int, optional
Number of parallel computation threads to use. Should
not be higher than the number of physical cores.
check_intersections: bool, optional, True by default,
Only relevant when train_interactions are supplied.
A flag that signals whether the test and train matrices should be checked
for intersections to prevent optimistic ranks / wrong evaluation / bad data split.
Returns
-------
np.array of shape [n_users with interactions or n_users,]
Numpy array containing AUC scores for each user. If there are no
interactions for a given user the returned AUC will be 0.5.
"""
if num_threads < 1:
raise ValueError("Number of threads must be 1 or larger.")
ranks = model.predict_rank(
test_interactions,
train_interactions=train_interactions,
user_features=user_features,
item_features=item_features,
num_threads=num_threads,
check_intersections=check_intersections,
)
assert np.all(ranks.data >= 0)
auc = np.zeros(ranks.shape[0], dtype=np.float32)
if train_interactions is not None:
num_train_positives = np.squeeze(
np.array(train_interactions.getnnz(axis=1)).astype(np.int32)
)
else:
num_train_positives = np.zeros(test_interactions.shape[0], dtype=np.int32)
# The second argument is modified in-place, but
# here we don't care about the inconsistency
# introduced into the ranks matrix.
calculate_auc_from_rank(
CSRMatrix(ranks), num_train_positives, ranks.data, auc, num_threads
)
if not preserve_rows:
auc = auc[test_interactions.getnnz(axis=1) > 0]
return auc
[docs]def reciprocal_rank(
model,
test_interactions,
train_interactions=None,
user_features=None,
item_features=None,
preserve_rows=False,
num_threads=1,
check_intersections=True,
):
"""
Measure the reciprocal rank metric for a model: 1 / the rank of the highest
ranked positive example. A perfect score is 1.0.
Parameters
----------
model: LightFM instance
the fitted model to be evaluated
test_interactions: np.float32 csr_matrix of shape [n_users, n_items]
Non-zero entries representing known positives in the evaluation set.
train_interactions: np.float32 csr_matrix of shape [n_users, n_items], optional
Non-zero entries representing known positives in the train set. These
will be omitted from the score calculations to avoid re-recommending
known positives.
user_features: np.float32 csr_matrix of shape [n_users, n_user_features], optional
Each row contains that user's weights over features.
item_features: np.float32 csr_matrix of shape [n_items, n_item_features], optional
Each row contains that item's weights over features.
preserve_rows: boolean, optional
When False (default), the number of rows in the output will be equal
to the number of users with interactions in the evaluation set.
When True, the number of rows in the output will be equal to the
number of users.
num_threads: int, optional
Number of parallel computation threads to use. Should
not be higher than the number of physical cores.
check_intersections: bool, optional, True by default,
Only relevant when train_interactions are supplied.
A flag that signals whether the test and train matrices should be checked
for intersections to prevent optimistic ranks / wrong evaluation / bad data split.
Returns
-------
np.array of shape [n_users with interactions or n_users,]
Numpy array containing reciprocal rank scores for each user.
If there are no interactions for a given user the returned value will
be 0.0.
"""
if num_threads < 1:
raise ValueError("Number of threads must be 1 or larger.")
ranks = model.predict_rank(
test_interactions,
train_interactions=train_interactions,
user_features=user_features,
item_features=item_features,
num_threads=num_threads,
check_intersections=check_intersections,
)
ranks.data = 1.0 / (ranks.data + 1.0)
ranks = np.squeeze(np.array(ranks.max(axis=1).todense()))
if not preserve_rows:
ranks = ranks[test_interactions.getnnz(axis=1) > 0]
return ranks