/*!
*
*
* \brief Implements a Model using a linear function.
*
*
*
* \author T. Glasmachers, O. Krause
* \date 2010-2017
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
#ifndef SHARK_MODELS_LINEARMODEL_H
#define SHARK_MODELS_LINEARMODEL_H
#include
#include
#include
namespace shark {
///
/// \brief Linear Prediction with optional activation function
///
/// \par
/// This model computes the result of
/// \f$ y = f(x) = g(A x + b) \f$, where g is an arbitrary activation function.
/// By default g is the identity and the model is a simple linear model.
/// Otherwise, this is known as a generalized linear model. There are two important special cases:
/// The output may be a single number, and the offset term b may be
/// dropped.
///
/// The class allows for dense and sparse input vector types. However it assumes that
/// the weight matrix and the ouputs are dense. There are some cases where this is not
/// good behavior. Check for example Normalizer for a class which is designed for sparse
/// inputs and outputs.
template
class LinearModel : public AbstractModel<
InputType,
blas::vector,//type of output uses same device and precision as input
blas::vector//type of parameters uses same device and precision as input
>{
public:
typedef blas::vector VectorType;
typedef blas::matrix MatrixType;
private:
typedef AbstractModel base_type;
typedef LinearModel self_type;
Shape m_inputShape;
Shape m_outputShape;
MatrixType m_matrix;
VectorType m_offset;
ActivationFunction m_activation;
public:
typedef typename base_type::BatchInputType BatchInputType;
typedef typename base_type::BatchOutputType BatchOutputType;//same as MatrixType
typedef typename base_type::ParameterVectorType ParameterVectorType;//same as VectorType
/// CDefault Constructor; use setStructure later
LinearModel(){
this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
if(std::is_base_of::value){
this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
}
}
/// Constructor creating a model with given dimensionalities and optional offset term.
LinearModel(Shape const& inputs, Shape const& outputs = 1, bool offset = false)
: m_inputShape(inputs)
, m_outputShape(outputs)
, m_matrix(outputs.numElements(),inputs.numElements(),0.0)
, m_offset(offset?outputs.numElements():0,0.0){
this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
if(std::is_base_of::value){
this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
}
}
/// \brief From INameable: return the class name.
std::string name() const
{ return "LinearModel"; }
/// Construction from matrix (and vector)
LinearModel(MatrixType const& matrix, VectorType const& offset = VectorType())
: m_inputShape(matrix.size2())
, m_outputShape(matrix.size1())
, m_matrix(matrix)
, m_offset(offset){
this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
if(std::is_base_of::value){
this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
}
}
/// check for the presence of an offset term
bool hasOffset() const{
return m_offset.size() != 0;
}
///\brief Returns the expected shape of the input
Shape inputShape() const{
return m_inputShape;
}
///\brief Returns the shape of the output
Shape outputShape() const{
return m_outputShape;
}
/// obtain the parameter vector
ParameterVectorType parameterVector() const{
return to_vector(m_matrix) | m_offset;
}
/// overwrite the parameter vector
void setParameterVector(ParameterVectorType const& newParameters){
std::size_t numInputs = inputShape().numElements();
std::size_t numOutputs = outputShape().numElements();
noalias(to_vector(m_matrix)) = subrange(newParameters, 0, numInputs * numOutputs);
noalias(m_offset) = subrange(newParameters, numInputs * numOutputs, newParameters.size());
}
/// return the number of parameter
size_t numberOfParameters() const{
return m_matrix.size1()*m_matrix.size2()+m_offset.size();
}
/// overwrite structure and parameters
void setStructure(Shape const& inputs, Shape const& outputs = 1, bool offset = false){
LinearModel model(inputs,outputs,offset);
*this = model;
}
/// overwrite structure and parameters
void setStructure(MatrixType const& matrix, VectorType const& offset = VectorType()){
LinearModel model(matrix,offset);
*this = model;
}
/// return a copy of the matrix in dense format
MatrixType const& matrix() const{
return m_matrix;
}
MatrixType& matrix(){
return m_matrix;
}
/// return the offset
VectorType const& offset() const{
return m_offset;
}
VectorType& offset(){
return m_offset;
}
/// \brief Returns the activation function.
ActivationFunction const& activationFunction()const{
return m_activation;
}
/// \brief Returns the activation function.
ActivationFunction& activationFunction(){
return m_activation;
}
boost::shared_ptr createState()const{
return boost::shared_ptr(new typename ActivationFunction::State());
}
using base_type::eval;
/// Evaluate the model: output = matrix * input + offset
void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
outputs.resize(inputs.size1(),m_matrix.size1());
//we multiply with a set of row vectors from the left
noalias(outputs) = inputs % trans(m_matrix);
if (hasOffset()){
noalias(outputs)+=repeat(m_offset,inputs.size1());
}
m_activation.evalInPlace(outputs);
}
void eval(InputType const& input, VectorType& output)const {
output.resize(m_matrix.size1());
//we multiply with a set of row vectors from the left
noalias(output) = m_matrix % input;
if (hasOffset()) {
noalias(output) += m_offset;
}
m_activation.evalInPlace(output);
}
/// Evaluate the model: output = matrix * input + offset
void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
outputs.resize(inputs.size1(),m_matrix.size1());
//we multiply with a set of row vectors from the left
noalias(outputs) = inputs % trans(m_matrix);
if (hasOffset()){
noalias(outputs)+=repeat(m_offset,inputs.size1());
}
m_activation.evalInPlace(outputs, state.toState());
}
///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch
void weightedParameterDerivative(
BatchInputType const& patterns,
BatchOutputType const& outputs,
BatchOutputType const& coefficients,
State const& state,
ParameterVectorType& gradient
)const{
SIZE_CHECK(coefficients.size2()==m_matrix.size1());
SIZE_CHECK(coefficients.size1()==patterns.size1());
gradient.resize(numberOfParameters());
std::size_t numInputs = inputShape().numElements();
std::size_t numOutputs = outputShape().numElements();
gradient.clear();
std::size_t matrixParams = numInputs*numOutputs;
auto weightGradient = blas::to_matrix(subrange(gradient,0,matrixParams), numOutputs,numInputs);
BatchOutputType delta = coefficients;
m_activation.multiplyDerivative(outputs,delta, state.toState());
//sum_i coefficients(output,i)*pattern(i))
noalias(weightGradient) = trans(delta) % patterns;
if (hasOffset()){
noalias(subrange(gradient, matrixParams, matrixParams + numOutputs)) = sum_rows(delta);
}
}
///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch
void weightedInputDerivative(
BatchInputType const & patterns,
BatchOutputType const& outputs,
BatchOutputType const & coefficients,
State const& state,
MatrixType& derivative
)const{
SIZE_CHECK(coefficients.size2() == m_matrix.size1());
SIZE_CHECK(coefficients.size1() == patterns.size1());
//compute chain rule
BatchOutputType delta = coefficients;
m_activation.multiplyDerivative(outputs,delta, state.toState());
derivative.resize(patterns.size1(),patterns.size2());
noalias(derivative) = delta % m_matrix;
}
void weightedDerivatives(
BatchInputType const & patterns,
BatchOutputType const& outputs,
BatchOutputType const & coefficients,
State const& state,
ParameterVectorType& parameterDerivative,
MatrixType& inputDerivative
)const{
SIZE_CHECK(coefficients.size2()==m_matrix.size1());
SIZE_CHECK(coefficients.size1()==patterns.size1());
std::size_t numInputs = inputShape().numElements();
std::size_t numOutputs = outputShape().numElements();
//compute chain rule
BatchOutputType delta = coefficients;
m_activation.multiplyDerivative(outputs,delta, state.toState());
//compute input derivative
inputDerivative.resize(patterns.size1(),numInputs);
noalias(inputDerivative) = delta % m_matrix;
//compute parameter derivative
parameterDerivative.resize(numberOfParameters());
parameterDerivative.clear();
std::size_t matrixParams = numInputs*numOutputs;
auto weightGradient = blas::to_matrix(subrange(parameterDerivative,0,matrixParams), numOutputs,numInputs);
auto offsetGradient = subrange(parameterDerivative,matrixParams,parameterDerivative.size());
//sum_i coefficients(output,i)*pattern(i))
noalias(weightGradient) = trans(delta) % patterns;
if (hasOffset()){
noalias(offsetGradient) = sum_rows(delta);
}
}
/// From ISerializable
void read(InArchive& archive){
archive >> m_matrix;
archive >> m_offset;
archive >> m_inputShape;
archive >> m_outputShape;
}
/// From ISerializable
void write(OutArchive& archive) const{
archive << m_matrix;
archive << m_offset;
archive << m_inputShape;
archive << m_outputShape;
}
};
/*! \brief Basic linear classifier.
*
* The LinearClassifier class is a multi class classifier model
* suited for linear discriminant analysis. For c classes
* \f$ 0, \dots, c-1 \f$ the model computes
*
* \f$ \arg \max_i w_i^T x + b_i \f$
*
* Thus is it a linear model with arg max computation.
* The internal linear model can be queried using decisionFunction().
*/
template
class LinearClassifier : public Classifier >
{
public:
typedef typename LinearModel::MatrixType MatrixType;
LinearClassifier(){}
/// Constructor creating a model with given dimensionalities and optional offset term.
LinearClassifier(Shape const& inputs, std::size_t numClasses, bool offset = false){
setStructure(inputs, numClasses, offset);
}
/// Constructor from weight matrix (and optional offset).
LinearClassifier(MatrixType const& matrix, VectorType const& offset = VectorType()){
setStructure(matrix, offset);
}
std::string name() const
{ return "LinearClassifier"; }
/// overwrite structure and parameters
void setStructure(Shape const& inputs, std::size_t numClasses, bool offset = false){
this->decisionFunction().setStructure(inputs, numClasses, offset);
}
/// overwrite structure and parameters
void setStructure(MatrixType const& matrix, VectorType const& offset = VectorType()){
this->decisionFunction().setStructure(matrix, offset);
}
};
}
#endif