//===========================================================================
/*!
*
*
* \brief Model for conversion of real valued output to class labels
*
* \author T. Glasmachers, O.Krause
* \date 2017
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_MODELS_CLASSIFIER_H
#define SHARK_MODELS_CLASSIFIER_H
#include
namespace shark {
///
/// \brief Conversion of real-valued or vector valued outputs to class labels
///
/// \par
/// The Classifier is a model converting the
/// real-valued vector output of an underlying decision function to a
/// class label 0, ..., d-1 by means of an arg-max operation.
/// The class returns the argument of the maximal
/// input component as its output. This convertson is adjusted to
/// interpret the output of a linear model, a neural network or a support vector
/// machine for multi-category classification.
///
/// In the special case that d is 1, it is assumed that the model can be represented as
/// a 2 d vector with both components having the same value but opposite sign.
/// In consequence, a positive output of the model is interpreted as class 1, a negative as class 0.
///
/// The underlying decision function is an arbitrary model. It should
/// be default constructable and it can be accessed using decisionFunction().
/// The parameters of the Classifier are the ones of the decision function.
///
/// Optionally the model allows to set bias values which are added on the predicted
/// values of the decision function. Thus adding positive weights on a class makes it
/// more likely to be predicted. In the binary case with a single output, a positive weight
/// makes class one more likely and a negative weight class 0.
template
class Classifier : public AbstractModel<
typename Model::InputType,
unsigned int,
typename Model::ParameterVectorType
>{
private:
typedef typename Model::BatchOutputType ModelBatchOutputType;
public:
typedef Model DecisionFunctionType;
typedef typename Model::InputType InputType;
typedef unsigned int OutputType;
typedef typename Batch::type BatchInputType;
typedef Batch::type BatchOutputType;
typedef typename Model::ParameterVectorType ParameterVectorType;
Classifier(){}
Classifier(Model const& decisionFunction)
: m_decisionFunction(decisionFunction){}
std::string name() const
{ return "Classifier<"+m_decisionFunction.name()+">"; }
ParameterVectorType parameterVector() const{
return m_decisionFunction.parameterVector();
}
void setParameterVector(ParameterVectorType const& newParameters){
m_decisionFunction.setParameterVector(newParameters);
}
std::size_t numberOfParameters() const{
return m_decisionFunction.numberOfParameters();
}
///\brief Returns the expected shape of the input
Shape inputShape() const{
return m_decisionFunction.inputShape();
}
///\brief Returns the shape of the output
///
/// For the classifier, Shape is a number representing the number of classes.
Shape outputShape() const{
return m_decisionFunction.outputShape().flatten();
}
RealVector const& bias()const{
return m_bias;
}
RealVector& bias(){
return m_bias;
}
/// \brief Return the decision function
Model const& decisionFunction()const{
return m_decisionFunction;
}
/// \brief Return the decision function
Model& decisionFunction(){
return m_decisionFunction;
}
void eval(BatchInputType const& input, BatchOutputType& output)const{
SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
ModelBatchOutputType modelResult;
m_decisionFunction.eval(input,modelResult);
std::size_t batchSize = modelResult.size1();
output.resize(batchSize);
if(modelResult.size2()== 1){
double bias = m_bias.empty()? 0.0 : m_bias(0);
for(std::size_t i = 0; i != batchSize; ++i){
output(i) = modelResult(i,0) + bias > 0.0;
}
}
else{
for(std::size_t i = 0; i != batchSize; ++i){
if(m_bias.empty())
output(i) = static_cast(arg_max(row(modelResult,i)));
else
output(i) = static_cast(arg_max(row(modelResult,i) + m_bias));
}
}
}
void eval(BatchInputType const& input, BatchOutputType& output, State& state)const{
eval(input,output);
}
void eval(InputType const & pattern, OutputType& output)const{
SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
typename Model::OutputType modelResult;
m_decisionFunction.eval(pattern,modelResult);
if(m_bias.empty()){
if(modelResult.size() == 1){
double bias = m_bias.empty()? 0.0 : m_bias(0);
output = modelResult(0) + bias > 0.0;
}
else{
if(m_bias.empty())
output = static_cast(arg_max(modelResult));
else
output = static_cast(arg_max(modelResult + m_bias));
}
}
}
/// From ISerializable
void read(InArchive& archive){
archive >> m_decisionFunction;
archive >> m_bias;
}
/// From ISerializable
void write(OutArchive& archive) const{
archive << m_decisionFunction;
archive << m_bias;
}
private:
Model m_decisionFunction;
RealVector m_bias;
};
};
#endif