* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_HUBERLOSS_H #define SHARK_OBJECTIVEFUNCTIONS_LOSS_HUBERLOSS_H #include namespace shark { /// \brief Tukey's Biweight-loss for robust regression /// /// Tukey's Biweight-loss is a robust regression function. For predictions close to the correct classification, /// it is convex, but close to a value k, it approaches a constant value. for differences greater than k, /// the function is constant and has gradient 0. This effectively ignores large outliers at the cost /// of loosing the convexity of the loss function. /// The 1-dimensional loss is defined as ///\f[ f(x)= \frac {x^6}{6k^4} - \frac {x^4} {2k^2}+\frac {x^2} {2} \f] /// for \f$ x \in [-k,k]\f$. outside it is the constant function \f$\frac {k^2}{6}\f$. /// /// For multidimensional problems we define it with x being the two-norm of the difference /// between the label and predicted values. class TukeyBiweightLoss : public AbstractLoss { public: /// constructor TukeyBiweightLoss(double k = 1.0):m_k(k){ m_features |= base_type::HAS_FIRST_DERIVATIVE; } /// \brief Returns class name "HuberLoss" std::string name() const { return "TukeyBiweightLoss"; } ///\brief calculates the sum of all double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const{ SIZE_CHECK(labels.size1() == predictions.size1()); SIZE_CHECK(labels.size2() == predictions.size2()); std::size_t numInputs = labels.size1(); double error = 0; double k2 = sqr(m_k); double k4 = sqr(k2); double maxErr = k2/6; for(std::size_t i = 0; i != numInputs;++i){ double norm2 = norm_sqr(row(predictions,i)-row(labels,i)); //check whether we are in the quadratic area if(norm2 <= sqr(m_k)){ error = norm2/2+sqr(norm2)/6*(norm2/k4-3/k2); } else{ error += maxErr; } } return error; } double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient)const{ SIZE_CHECK(labels.size1() == predictions.size1()); SIZE_CHECK(labels.size2() == predictions.size2()); std::size_t numInputs = labels.size1(); std::size_t outputDim = predictions.size2(); gradient.resize(numInputs,outputDim); gradient.clear(); double error = 0; double k2 = sqr(m_k); double k4 = sqr(k2); double maxErr = k2/6; for(std::size_t i = 0; i != numInputs;++i){ double norm2 = norm_sqr(row(predictions,i)-row(labels,i)); //check whether we are in the quadratic area if(norm2 <= sqr(m_k)){ error = norm2/2+sqr(norm2)/6*(norm2/k4-3/k2); noalias(row(gradient,i)) = (1+sqr(norm2)/k4-2*norm2/k2)*(row(predictions,i)-row(labels,i)); } else{ error += maxErr; //gradient is initialized to 0! } } return error; } private: double m_k; }; } #endif