Source code for lightfm.cross_validation

```# coding=utf-8
"""
Dataset splitting functions.
"""

import numpy as np
import scipy.sparse as sp

def _shuffle(uids, iids, data, random_state):

shuffle_indices = np.arange(len(uids))
random_state.shuffle(shuffle_indices)

return (uids[shuffle_indices],
iids[shuffle_indices],
data[shuffle_indices])

[docs]def random_train_test_split(interactions,
test_percentage=0.2,
random_state=None):
"""
Randomly split interactions between training and testing.

This function takes an interaction set and splits it into
two disjoint sets, a training set and a test set. Note that
no effort is made to make sure that all items and users with
interactions in the test set also have interactions in the
training set; this may lead to a partial cold-start problem
in the test set.

Parameters
----------

interactions: a scipy sparse matrix containing interactions
The interactions to split.
test_percentage: float, optional
The fraction of interactions to place in the test set.
random_state: np.random.RandomState, optional
The random state used for the shuffle.

Returns
-------

(train, test): (scipy.sparse.COOMatrix,
scipy.sparse.COOMatrix)
A tuple of (train data, test data)
"""

if not sp.issparse(interactions):
raise ValueError('Interactions must be a scipy.sparse matrix.')

if random_state is None:
random_state = np.random.RandomState()

interactions = interactions.tocoo()

shape = interactions.shape
uids, iids, data = (interactions.row,
interactions.col,
interactions.data)

uids, iids, data = _shuffle(uids, iids, data, random_state)

cutoff = int((1.0 - test_percentage) * len(uids))

train_idx = slice(None, cutoff)
test_idx = slice(cutoff, None)

train = sp.coo_matrix((data[train_idx],
(uids[train_idx],
iids[train_idx])),
shape=shape,
dtype=interactions.dtype)
test = sp.coo_matrix((data[test_idx],
(uids[test_idx],
iids[test_idx])),
shape=shape,
dtype=interactions.dtype)

return train, test
```