* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_MODELS_LINEARMODEL_H #define SHARK_MODELS_LINEARMODEL_H #include #include #include namespace shark { /// /// \brief Linear Prediction with optional activation function /// /// \par /// This model computes the result of /// \f$ y = f(x) = g(A x + b) \f$, where g is an arbitrary activation function. /// By default g is the identity and the model is a simple linear model. /// Otherwise, this is known as a generalized linear model. There are two important special cases: /// The output may be a single number, and the offset term b may be /// dropped. /// /// The class allows for dense and sparse input vector types. However it assumes that /// the weight matrix and the ouputs are dense. There are some cases where this is not /// good behavior. Check for example Normalizer for a class which is designed for sparse /// inputs and outputs. template class LinearModel : public AbstractModel< InputType, blas::vector,//type of output uses same device and precision as input blas::vector//type of parameters uses same device and precision as input >{ public: typedef blas::vector VectorType; typedef blas::matrix MatrixType; private: typedef AbstractModel base_type; typedef LinearModel self_type; Shape m_inputShape; Shape m_outputShape; MatrixType m_matrix; VectorType m_offset; ActivationFunction m_activation; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::BatchOutputType BatchOutputType;//same as MatrixType typedef typename base_type::ParameterVectorType ParameterVectorType;//same as VectorType /// CDefault Constructor; use setStructure later LinearModel(){ this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; if(std::is_base_of::value){ this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; } } /// Constructor creating a model with given dimensionalities and optional offset term. LinearModel(Shape const& inputs, Shape const& outputs = 1, bool offset = false) : m_inputShape(inputs) , m_outputShape(outputs) , m_matrix(outputs.numElements(),inputs.numElements(),0.0) , m_offset(offset?outputs.numElements():0,0.0){ this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; if(std::is_base_of::value){ this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; } } /// \brief From INameable: return the class name. std::string name() const { return "LinearModel"; } /// Construction from matrix (and vector) LinearModel(MatrixType const& matrix, VectorType const& offset = VectorType()) : m_inputShape(matrix.size2()) , m_outputShape(matrix.size1()) , m_matrix(matrix) , m_offset(offset){ this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; if(std::is_base_of::value){ this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; } } /// check for the presence of an offset term bool hasOffset() const{ return m_offset.size() != 0; } ///\brief Returns the expected shape of the input Shape inputShape() const{ return m_inputShape; } ///\brief Returns the shape of the output Shape outputShape() const{ return m_outputShape; } /// obtain the parameter vector ParameterVectorType parameterVector() const{ return to_vector(m_matrix) | m_offset; } /// overwrite the parameter vector void setParameterVector(ParameterVectorType const& newParameters){ std::size_t numInputs = inputShape().numElements(); std::size_t numOutputs = outputShape().numElements(); noalias(to_vector(m_matrix)) = subrange(newParameters, 0, numInputs * numOutputs); noalias(m_offset) = subrange(newParameters, numInputs * numOutputs, newParameters.size()); } /// return the number of parameter size_t numberOfParameters() const{ return m_matrix.size1()*m_matrix.size2()+m_offset.size(); } /// overwrite structure and parameters void setStructure(Shape const& inputs, Shape const& outputs = 1, bool offset = false){ LinearModel model(inputs,outputs,offset); *this = model; } /// overwrite structure and parameters void setStructure(MatrixType const& matrix, VectorType const& offset = VectorType()){ LinearModel model(matrix,offset); *this = model; } /// return a copy of the matrix in dense format MatrixType const& matrix() const{ return m_matrix; } MatrixType& matrix(){ return m_matrix; } /// return the offset VectorType const& offset() const{ return m_offset; } VectorType& offset(){ return m_offset; } /// \brief Returns the activation function. ActivationFunction const& activationFunction()const{ return m_activation; } /// \brief Returns the activation function. ActivationFunction& activationFunction(){ return m_activation; } boost::shared_ptr createState()const{ return boost::shared_ptr(new typename ActivationFunction::State()); } using base_type::eval; /// Evaluate the model: output = matrix * input + offset void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{ outputs.resize(inputs.size1(),m_matrix.size1()); //we multiply with a set of row vectors from the left noalias(outputs) = inputs % trans(m_matrix); if (hasOffset()){ noalias(outputs)+=repeat(m_offset,inputs.size1()); } m_activation.evalInPlace(outputs); } void eval(InputType const& input, VectorType& output)const { output.resize(m_matrix.size1()); //we multiply with a set of row vectors from the left noalias(output) = m_matrix % input; if (hasOffset()) { noalias(output) += m_offset; } m_activation.evalInPlace(output); } /// Evaluate the model: output = matrix * input + offset void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{ outputs.resize(inputs.size1(),m_matrix.size1()); //we multiply with a set of row vectors from the left noalias(outputs) = inputs % trans(m_matrix); if (hasOffset()){ noalias(outputs)+=repeat(m_offset,inputs.size1()); } m_activation.evalInPlace(outputs, state.toState()); } ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch void weightedParameterDerivative( BatchInputType const& patterns, BatchOutputType const& outputs, BatchOutputType const& coefficients, State const& state, ParameterVectorType& gradient )const{ SIZE_CHECK(coefficients.size2()==m_matrix.size1()); SIZE_CHECK(coefficients.size1()==patterns.size1()); gradient.resize(numberOfParameters()); std::size_t numInputs = inputShape().numElements(); std::size_t numOutputs = outputShape().numElements(); gradient.clear(); std::size_t matrixParams = numInputs*numOutputs; auto weightGradient = blas::to_matrix(subrange(gradient,0,matrixParams), numOutputs,numInputs); BatchOutputType delta = coefficients; m_activation.multiplyDerivative(outputs,delta, state.toState()); //sum_i coefficients(output,i)*pattern(i)) noalias(weightGradient) = trans(delta) % patterns; if (hasOffset()){ noalias(subrange(gradient, matrixParams, matrixParams + numOutputs)) = sum_rows(delta); } } ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch void weightedInputDerivative( BatchInputType const & patterns, BatchOutputType const& outputs, BatchOutputType const & coefficients, State const& state, MatrixType& derivative )const{ SIZE_CHECK(coefficients.size2() == m_matrix.size1()); SIZE_CHECK(coefficients.size1() == patterns.size1()); //compute chain rule BatchOutputType delta = coefficients; m_activation.multiplyDerivative(outputs,delta, state.toState()); derivative.resize(patterns.size1(),patterns.size2()); noalias(derivative) = delta % m_matrix; } void weightedDerivatives( BatchInputType const & patterns, BatchOutputType const& outputs, BatchOutputType const & coefficients, State const& state, ParameterVectorType& parameterDerivative, MatrixType& inputDerivative )const{ SIZE_CHECK(coefficients.size2()==m_matrix.size1()); SIZE_CHECK(coefficients.size1()==patterns.size1()); std::size_t numInputs = inputShape().numElements(); std::size_t numOutputs = outputShape().numElements(); //compute chain rule BatchOutputType delta = coefficients; m_activation.multiplyDerivative(outputs,delta, state.toState()); //compute input derivative inputDerivative.resize(patterns.size1(),numInputs); noalias(inputDerivative) = delta % m_matrix; //compute parameter derivative parameterDerivative.resize(numberOfParameters()); parameterDerivative.clear(); std::size_t matrixParams = numInputs*numOutputs; auto weightGradient = blas::to_matrix(subrange(parameterDerivative,0,matrixParams), numOutputs,numInputs); auto offsetGradient = subrange(parameterDerivative,matrixParams,parameterDerivative.size()); //sum_i coefficients(output,i)*pattern(i)) noalias(weightGradient) = trans(delta) % patterns; if (hasOffset()){ noalias(offsetGradient) = sum_rows(delta); } } /// From ISerializable void read(InArchive& archive){ archive >> m_matrix; archive >> m_offset; archive >> m_inputShape; archive >> m_outputShape; } /// From ISerializable void write(OutArchive& archive) const{ archive << m_matrix; archive << m_offset; archive << m_inputShape; archive << m_outputShape; } }; /*! \brief Basic linear classifier. * * The LinearClassifier class is a multi class classifier model * suited for linear discriminant analysis. For c classes * \f$ 0, \dots, c-1 \f$ the model computes * * \f$ \arg \max_i w_i^T x + b_i \f$ * * Thus is it a linear model with arg max computation. * The internal linear model can be queried using decisionFunction(). */ template class LinearClassifier : public Classifier > { public: typedef typename LinearModel::MatrixType MatrixType; LinearClassifier(){} /// Constructor creating a model with given dimensionalities and optional offset term. LinearClassifier(Shape const& inputs, std::size_t numClasses, bool offset = false){ setStructure(inputs, numClasses, offset); } /// Constructor from weight matrix (and optional offset). LinearClassifier(MatrixType const& matrix, VectorType const& offset = VectorType()){ setStructure(matrix, offset); } std::string name() const { return "LinearClassifier"; } /// overwrite structure and parameters void setStructure(Shape const& inputs, std::size_t numClasses, bool offset = false){ this->decisionFunction().setStructure(inputs, numClasses, offset); } /// overwrite structure and parameters void setStructure(MatrixType const& matrix, VectorType const& offset = VectorType()){ this->decisionFunction().setStructure(matrix, offset); } }; } #endif