Source code for hidi.matrix
import numpy as np
import pandas as pd
import types
import warnings
from hidi.transform import Transform
from hidi.linalg import dot
from pyvalid import accepts
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
# Catch annoying warnings from nimfa
with warnings.catch_warnings():
warnings.simplefilter("ignore")
import nimfa as nf
[docs]class ApplyTransform(Transform):
"""
Apply a function to an input.
Takes a single argument, `fn`, which must be a function
accepting one argument (the function to apply), and kwargs.
:param fn: The function to be applied to transform input.
:type fn: function
"""
def __init__(self, fn):
self.fn = fn
[docs] def transform(self, x, **kwargs):
"""
:param x: The input to the function :code:`fn`.
:rtype: Any
"""
return self.fn(x, **kwargs), kwargs
[docs]class SimilarityTransform(Transform):
"""
Takes the dot product of a link*item matrix.
Returns either a link*link or item*item similarity matrix.
If axis is :code:`0`, an item*item matrix is returned, if
axis is :code:`1` a link*link matrix is returned.
The returned matrix represents a similarity matrix.
The transform function returns a tuple containing the
similarity matrix, and the links or items, depending on
the axis.
:param axis: The axis to perform the dot product for.
:type axis: int[0,1]
"""
def __init__(self, axis=0):
self.axis = axis
[docs] def transform(self, M, items, links, **kwargs):
"""
:param M: The matrix to create a similarity matrix from
:type M: numpy ndarray-like
:param items: Array of :code:`item_ids` in the same order
that they appear in :code:`M`.
:type items: array
:param links: Array of :code:`link_ids` in the same order
that they appear in :code:`M`.
:type links: array
:rtype: numpy.ndarray-like
"""
M_T = M.transpose()
if self.axis == 0:
sim_matrix = dot(M_T, M)
sim_axis = items
elif self.axis == 1:
sim_matrix = dot(M, M_T)
sim_axis = links
else:
raise Exception('Axis must be either 0 or 1')
return sim_matrix, self.merge_kwargs(dict(items=sim_axis), kwargs)
[docs]class ScalarTransform(Transform):
"""
Scale the matrix using a function or class method.
`ScalerTransform` takes an `fn` argument that specifies the
function that should be applied to the matrix. If `fn` is a string
the scaler transform will try to call a function by that name on
the matrix, if it is a function reference, scaler transform will
call that function with the matrix as input.
:param fn: The scalar function to use. If :code:`fn` is a string
then an attribute of that name will be looked up and called.
If :code:`fn` is a function, that function will be called
with the input given to transform.
:type fn: str | function
"""
def __init__(self, fn=np.log):
self.fn = fn
def scale(self, matrix):
if isinstance(self.fn, types.FunctionType):
return self.fn(matrix)
elif type(self.fn) == str:
return getattr(matrix, self.fn)()
raise Exception('%s is not a valid scaling function' % self.fn)
[docs] def transform(self, matrix_to_scale, **kwargs):
"""
Takes a :code:`matrix_to_scale` as a numpy ndarray-like object
and performs scaling on it, then returns the result.
:rtype: Any
"""
out = self.scale(matrix_to_scale)
return out, kwargs
[docs]class SparseTransform(Transform):
"""
Make a sparse item*link matrix using SciPy's sparse compressed
row matrix implementation.
"""
@accepts(object, pd.DataFrame)
[docs] def transform(self, df, **kwargs):
"""
Takes a dataframe that has :code:`link_id`, :code:`item_id` and
:code:`score` columns.
Returns a SciPy :code:`csr_matrix`.
:param df: The DataFrame to make a sparse matrix from. Must have
:code:`link_id`, :code:`item_id`, and :code:`score` columns.
:type df: pandas.DataFrame
:rtype: scipy.sparse.csr_matrix
"""
link_u = list(df.link_id.unique())
item_u = list(df.item_id.unique())
data = df.score.as_matrix()
row = df.link_id.astype('category', categories=link_u).cat.codes
col = df.item_id.astype('category', categories=item_u).cat.codes
outshape = (len(link_u), len(item_u))
in_tuple = (data, (row, col))
kwargs = self.merge_kwargs(dict(links=link_u, items=item_u), kwargs)
return csr_matrix(in_tuple, shape=outshape), kwargs
[docs]class DenseTransform(Transform):
"""
Transform a sparse matrix to its dense representation.
"""
[docs] def transform(self, M, **kwargs):
"""
Takes a sparse matrix and transform it into its dense representation
:param M: a sparse matrix
:type M: scipy.sparse classes
:rtype: numpy.ndarray
"""
return M.todense(), kwargs
[docs]class ItemsMatrixToDFTransform(Transform):
"""
Create a Pandas DataFrame object with items as the index.
"""
[docs] def transform(self, M, items, **kwargs):
"""
Takes a numpy ndarray-like object and a list of item identifiers
to be used as the index for the DataFrame.
:rtype: pandas.DataFrame
"""
return pd.DataFrame(M, index=items), kwargs
[docs]class KerasEvaluationTransform(Transform):
"""
Generalized transform for Keras algorithm
This transform takes a Keras sequential model, a validation matrix and
its keyword arugments upon initialization.
:param keras_model: a Keras sequential model which is documented here:
https://keras.io/getting-started/sequential-model-guide/
:type keras_model: Keras Sequential model
:param validation_matrix: A validation matrix is a dataframe that has
:code:`item_id` index, other 'label' columns. It will be inner
joined with the M matrix and then fed into the Keras sequential
model.
:type validation_matrix: pandas.DataFrame
:param tts_seed: random state seed for :code:`train_test_split`
:type tts_seed: int
:param tt_split: the proportion of the dataset to include in the test
split for :code:`train_test_split`
:type tt_split: float
"""
def __init__(self, keras_model, validation_matrix, tts_seed=42,
tt_split=0.25, **keras_kwargs):
self.keras_model = keras_model
self.keras_kwargs = keras_kwargs
self.validation_matrix = validation_matrix
self.tts_seed = tts_seed
self.tt_split = tt_split
if 'item_id' in validation_matrix.columns:
self.validation_matrix.set_index('item_id', inplace=True)
[docs] def transform(self, M, **kwargs):
"""
Takes a Takes a dataframe that has :code:`item_id` index, other
'features' columns for prediction, and applies a Keras sequential
model to it.
:param M: a dataframe that has an :code:`item_id` index, and
"features" columns
:type M: pandas.DataFrame
:rtype: a tuple with trained Keras model and its keyword
arguments
"""
rows, columns = M.shape
factors = M.merge(self.validation_matrix, left_index=True,
right_index=True)
factors = factors.values
x_train, x_test, y_train, y_test = train_test_split(
factors[:, :columns], factors[:, columns:],
random_state=self.tts_seed, test_size=self.tt_split)
self.keras_model.fit(
x_train, y_train, validation_data=[x_test, y_test],
**self.keras_kwargs)
return self.keras_model, kwargs
[docs]class KerasKfoldTransform(Transform):
"""
Generalized transform for Keras algorithm with k fold cross validation
evaluation
:param keras_model: a Keras sequential model which is documented here:
https://keras.io/getting-started/sequential-model-guide/
:type keras_model: Keras Sequential model
:param validation_matrix: A validation matrix is a dataframe that has
:code:`item_id` index, other 'label' columns. It will be inner
joined with the M matrix and then fed into the Keras sequential
model.
:type validation_matrix: pandas.DataFrame
:param kfold_n_splits: Number of folds for kfold. Must be at least 2.
:type kfold_n_splits: int
:param kfold_seed: random state seed for kfold
:type kfold_seed: None, int or RandomState
:param kfold_shuffle: Whether to shuffle the data before splitting
into batches for kfold
:type kfold_shuffle: boolean
"""
def __init__(self, keras_model, validation_matrix,
kfold_n_splits=10, kfold_seed=42, kfold_shuffle=True,
classification=False, **keras_kwargs):
self.keras_model = keras_model
self.keras_kwargs = keras_kwargs
self.validation_matrix = validation_matrix
self.kfold_n_splits = kfold_n_splits
self.kfold_seed = kfold_seed
self.kfold_shuffle = kfold_shuffle
self.classification = classification
if 'item_id' in validation_matrix.columns:
self.validation_matrix.set_index('item_id', inplace=True)
[docs] def transform(self, M, **kwargs):
"""
Takes a Takes a dataframe that has :code:`item_id` index, other
'features' columns for prediction, and applies a Keras sequential
model to it.
:param M:
a dataframe that has an :code:`item_id` index, and
"features" columns.
:type M: pandas.DataFrame
:rtype: a tuple with trained Keras model and its keyword
arguments
"""
rows, columns = M.shape
factors = M.merge(self.validation_matrix, left_index=True,
right_index=True)
factors = factors.values
if self.classification:
kfold = StratifiedKFold(n_splits=self.kfold_n_splits,
random_state=self.kfold_seed,
shuffle=self.kfold_shuffle)
else:
kfold = KFold(n_splits=self.kfold_n_splits,
random_state=self.kfold_seed,
shuffle=self.kfold_shuffle)
X = factors[:, :columns]
Y = factors[:, columns:]
for train_index, test_index in kfold.split(X, Y):
self.keras_model.fit(
X[train_index], Y[train_index],
validation_data=[X[test_index], Y[train_index]],
**self.keras_kwargs)
return self.keras_model, kwargs
[docs]class KerasPredictionTransform(Transform):
"""
Generalized transform for Keras model prediction
This transform takes a trained Keras model. It applies the train model
to the input when :code:`transform` is called.
:param: model: trained keras model
:type M: trained keras model
"""
def __init__(self, model):
self.model = model
[docs] def transform(self, M, **kwargs):
"""
Takes a numpy ndarray-like object and applies a trained Keras model
to it.
Returns the predictions from the trained Keras model
:param M:
a dataframe that has an :code:`item_id` index,
and a "features" columns
:type M: pandas.DataFrame
:rtype: ndarray-like object with its kwargs
"""
predictions = self.model.predict(M)
return predictions, kwargs
[docs]class SkLearnTransform(Transform):
"""
Generalized transform for SciKit Learn algorithms.
This transform takes a SciKit Learn algorithm, and its
keyword arguments upon initialization. It applies the
algorithm to the input when :code:`transform` is called.
The algorithm to be applied is likely, but not necessarily
a :code:`sklearn.decomposition` algorithm.
"""
def __init__(self, SkLearnAlg, **sklearn_args):
self.SkLearnAlg = SkLearnAlg
self.sklearn_args = sklearn_args
[docs] def transform(self, M, **kwargs):
"""
Takes a numpy ndarray-like object and applies a SkLearn
algorithm to it.
:rtype: numpy.ndarray
"""
sklearn_alg = self.SkLearnAlg(**self.sklearn_args)
transformed = sklearn_alg.fit_transform(M)
kwargs['sklearn_fit'] = sklearn_alg
return transformed, kwargs
[docs]class SVDTransform(SkLearnTransform):
"""
Perform Truncated SVD on the matrix.
This uses SciKit Learn's Tuncated SVD implementation, which
is documented here:
http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html
All kwargs given to :code:`SVDTransform`'s initialization
function will be given to :code:`sklearn.decomposition.TruncatedSVD`.
Please reference the `sklearn docs
<http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html>`_
when using this transform.
"""
def __init__(self, **svd_kwargs):
super(SVDTransform, self).__init__(TruncatedSVD, **svd_kwargs)
[docs]class NimfaTransform(Transform):
"""
Generalized Nimfa transform.
This transform takes a nimfa algorithm, and its keyword
arguments upon initialization. It applies the algorithm
to the input when :code:`transform` is called.
"""
def __init__(self, NimfaAlg, **nimfa_kwargs):
self.NimfaAlg = NimfaAlg
self.nimfa_kwargs = nimfa_kwargs
[docs] def transform(self, M, **kwargs):
"""
:rtype: numpy.ndarray
"""
nimfa_alg = self.NimfaAlg(M, **self.nimfa_kwargs)
nimfa_fit = nimfa_alg()
kwargs['nimfa_fit'] = nimfa_fit
return nimfa_fit.basis(), kwargs
[docs]class SNMFTransform(NimfaTransform):
"""
Perform Sparse Nonnegative Matrix Factorization.
This wraps nimfa's snmf function, which is documented here:
http://nimfa.biolab.si/nimfa.methods.factorization.snmf.html
All kwargs given to :code:`SNFMTransform`'s initialization
function will be given to :code:`nimfa.Snmf`.
Please reference the `nimfa docs
<http://nimfa.biolab.si/nimfa.methods.factorization.snmf.html>`_
when using this transform.
"""
def __init__(self, **snmf_kwargs):
super(SNMFTransform, self).__init__(nf.Snmf, **snmf_kwargs)