/*! * * \brief Implements the Hinge Loss function for maximum margin classification. * * * \author Oswin Krause * \date 2014 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_HINGELOSS_H #define SHARK_OBJECTIVEFUNCTIONS_LOSS_HINGELOSS_H #include namespace shark { /// /// \brief Hinge-loss for large margin classification /// /// The hinge loss for two class problems is defined as \f$ L_i = \max \{ 0 , 1- y_i f(x_i) \} \f$ where \f$ y_i \in \{-1,1} \f$ is the label /// and \f$ f(x_i) \f$ is the prediction of the model for the ith input. The loss introduces the concept of /// a margin, that is, the point should not only be correctly classified but also not too close to the /// decision boundary. Therefore even correctly classified points are getting punished. /// /// for multi class problems the concept of sums of the relative margin is used: /// \f$ L_i = \sum_{c \neq y_i} \max \{ 0 , 1- 1/2 (f_{y_i}(x_i)- f_c(x_i) \} \f$. This loss requires that there is a margin /// between the different class outputs and the functions needs as many outputs as classes. the pre-factor /// 1/2 ensures that in the 2 class 2 output case with a linear function the value of loss is the same as in the single /// output version. /// /// The loss is implemented for class labels 0,1,...,n, even in the binary cases. /// /// The hinge-loss is differentiable except on one point. /// For points violating the margin, the derivative is -1, /// for points that are not violating it, it is 0. Boundary counts as non-violating. class HingeLoss : public AbstractLoss { public: /// constructor HingeLoss(){ m_features |= base_type::HAS_FIRST_DERIVATIVE; } /// \brief Returns class name "HingeLoss" std::string name() const { return "HingeLoss"; } ///\brief calculates the sum of all double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const{ std::size_t numInputs = labels.size(); SIZE_CHECK(numInputs == predictions.size1()); double error = 0; //binary case for models with single output if(predictions.size2() == 1){ for(std::size_t i = 0; i != numInputs;++i){ SIZE_CHECK(labels(i) < 2); double y = 2.0*labels(i)-1.0; error += std::max(0.0,1.0-y*predictions(i,0)); } } else {//multi-class or multiple output case for(std::size_t i = 0; i != numInputs;++i){ SIZE_CHECK(labels(i) < predictions.size2()); for(std::size_t o = 0; o != predictions.size2(); ++o){ if(o == labels(i)) continue; error += std::max(0.0,2.0 - predictions(i,labels(i))+predictions(i,o)); } } error/=2; } return error; } double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient)const{ std::size_t numInputs = labels.size(); std::size_t outputDim = predictions.size2(); SIZE_CHECK(numInputs == predictions.size1()); gradient.resize(numInputs,outputDim); gradient.clear(); double error = 0; //binary case for models with single output if(outputDim == 1){ for(std::size_t i = 0; i != numInputs; ++i){ double y = 2.0*labels(i)-1.0; double sampleLoss = std::max(0.0,1.0-y*predictions(i,0)); if(sampleLoss > 0) gradient(i,0) = -y; error += sampleLoss; } } else {//multi-class or multiple output case for(std::size_t i = 0; i != numInputs;++i){ SIZE_CHECK(labels(i) < predictions.size2()); for(std::size_t o = 0; o != predictions.size2();++o){ if( o == labels(i)) continue; double sampleLoss = std::max(0.0, 2.0 - predictions(i,labels(i)) + predictions(i,o)); if(sampleLoss > 0){ gradient(i,o) = 0.5; gradient(i,labels(i)) -= 0.5; } error+=sampleLoss; } } error/=2; } return error; } }; } #endif