//=========================================================================== /*! * * * \brief Pegasos solvers for linear SVMs * * * * \author T. Glasmachers * \date 2012 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_ALGORITHMS_PEGASOS_H #define SHARK_ALGORITHMS_PEGASOS_H #include #include #include #include #include namespace shark { /// /// \brief Pegasos solver for linear (binary) support vector machines. /// template class Pegasos { public: /// \brief Solve the primal SVM problem. /// /// In addition to "standard" Pegasos this solver checks a /// meaningful stopping criterion. /// /// The function returns the number of model predictions /// during training (this is comparable to SMO iterations). template static std::size_t solve( LabeledData const& data, ///< training data double C, ///< SVM regularization parameter WeightType& w, ///< weight vector std::size_t batchsize = 1, ///< number of samples in each gradient estimate double varepsilon = 0.001) ///< solution accuracy (factor by which the primal gradient should be reduced) { std::size_t ell = data.numberOfElements(); double lambda = 1.0 / (ell * C); SHARK_ASSERT(batchsize > 0); double initialPrimal = 1.0; double normbound2 = initialPrimal / lambda; // upper bound for |sigma * w|^2 double norm_w2 = 0.0; // squared norm of w double sigma = 1.0; // scaling factor for w VectorType gradient(w.size()); // gradient (to be computed in each iteration) w = RealZeroVector(w.size()); // clear does not work on matrix rows (ublas sucks!) // pegasos main loop std::size_t start = 10; std::size_t checkinterval = (2 * ell) / batchsize; std::size_t nextcheck = start + ell / batchsize; std::size_t predictions = 0; for (std::size_t t=start; ; t++) { // check the stopping criterion: \|gradient\| < epsilon ? if (t >= nextcheck) { // compute the gradient gradient = (lambda * sigma * (double)ell) * w; for (std::size_t i=0; i normbound2) sigma *= std::sqrt(normbound2 / n2); } } // rescale the solution w *= sigma; return predictions; } protected: // gradient of the loss static bool lg( VectorType const& x, unsigned int y, double f, VectorType& gradient) { if (y == 0) { if (f > -1.0) { gradient += x; return true; } } else if (y == 1) { if (f < 1.0) { gradient -= x; return true; } } return false; } }; /// /// \brief Pegasos solver for linear multi-class support vector machines. /// template class McPegasos { public: /// \brief Multi-class margin type. enum eMarginType { emRelative, emAbsolute, }; /// \brief Multi-class loss function type. enum eLossType { elNaiveHinge, elDiscriminativeMax, elDiscriminativeSum, elTotalMax, elTotalSum, }; /// \brief Solve the primal multi-class SVM problem. /// /// In addition to "standard" Pegasos this solver checks a /// meaningful stopping criterion. /// /// The function returns the number of model predictions /// during training (this is comparable to SMO iterations). template static std::size_t solve( LabeledData const& data, ///< training data eMarginType margintype, ///< margin function type eLossType losstype, ///< loss function type bool sumToZero, ///< enforce the sum-to-zero constraint? double C, ///< SVM regularization parameter std::vector& w, ///< class-wise weight vectors std::size_t batchsize = 1, ///< number of samples in each gradient estimate double varepsilon = 0.001) ///< solution accuracy (factor by which the primal gradient should be reduced) { SHARK_ASSERT(batchsize > 0); std::size_t ell = data.numberOfElements(); unsigned int classes = w.size(); SHARK_ASSERT(classes >= 2); double lambda = 1.0 / (ell * C); double initialPrimal = -1.0; LossGradientFunction lg = NULL; if (margintype == emRelative) { if (losstype == elDiscriminativeMax || losstype == elTotalMax) { // CS case initialPrimal = 1.0; lg = lossGradientRDM; } else if (losstype == elDiscriminativeSum || losstype == elTotalSum) { // WW case initialPrimal = classes - 1.0; lg = lossGradientRDS; } } else if (margintype == emAbsolute) { if (losstype == elNaiveHinge) { // MMR case initialPrimal = 1.0; lg = lossGradientANH; } else if (losstype == elDiscriminativeMax) { // ADM case initialPrimal = 1.0; lg = lossGradientADM; } else if (losstype == elDiscriminativeSum) { // LLW case initialPrimal = classes - 1.0; lg = lossGradientADS; } else if (losstype == elTotalMax) { // ATM case initialPrimal = 1.0; lg = lossGradientATM; } else if (losstype == elTotalSum) { // ATS/OVA case initialPrimal = classes; lg = lossGradientATS; } } SHARK_RUNTIME_CHECK(initialPrimal > 0 && lg, "The combination of margin and loss is not implemented"); double normbound2 = initialPrimal / lambda; // upper bound for |sigma * w|^2 double norm_w2 = 0.0; // squared norm of w double sigma = 1.0; // scaling factor for w double target = initialPrimal * varepsilon; // target gradient norm std::vector gradient(classes); // gradient (to be computed in each iteration) RealVector f(classes); // machine prediction (computed for each example) for (unsigned int c=0; c= nextcheck) { // compute the gradient for (unsigned int c=0; c normbound2) sigma *= std::sqrt(normbound2 / n2); } } // rescale the solution for (unsigned int c=0; c&, bool); // absolute margin, naive hinge loss static bool lossGradientANH( VectorType const& x, unsigned int y, RealVector const& f, std::vector& gradient, bool sumToZero) { if (f(y) < 1.0) { gradient[y] -= x; if (sumToZero) { VectorType xx = (1.0 / (f.size() - 1.0)) * x; for (std::size_t c=0; c& gradient, bool sumToZero) { unsigned int argmax = 0; double max = -1e100; for (std::size_t c=0; c max) { max = f(c); argmax = c; } } if (f(y) < 1.0 + max) { gradient[y] -= x; gradient[argmax] += x; return true; } else return false; } // relative margin, sum loss static bool lossGradientRDS( VectorType const& x, unsigned int y, RealVector const& f, std::vector& gradient, bool sumToZero) { bool nonzero = false; for (std::size_t c=0; c& gradient, bool sumToZero) { bool nonzero = false; for (std::size_t c=0; c -1.0) { gradient[c] += x; nonzero = true; } } if (sumToZero && nonzero) { VectorType mean = gradient[0]; for (std::size_t c=1; c& gradient, bool sumToZero) { double max = -1e100; std::size_t argmax = 0; for (std::size_t c=0; c max) { max = f(c); argmax = c; } } if (max > -1.0) { gradient[argmax] += x; if (sumToZero) { VectorType xx = (1.0 / (f.size() - 1.0)) * x; for (std::size_t c=0; c& gradient, bool sumToZero) { bool nonzero = false; for (std::size_t c=0; c -1.0) { gradient[c] += x; nonzero = true; } } } if (sumToZero && nonzero) { VectorType mean = gradient[0]; for (std::size_t c=1; c& gradient, bool sumToZero) { double max = -1e100; std::size_t argmax = 0; for (std::size_t c=0; c max) { max = -f(c); argmax = c; } } else { if (f(c) > max) { max = f(c); argmax = c; } } } if (max > -1.0) { if (argmax == y) { gradient[argmax] -= x; if (sumToZero) { VectorType xx = (1.0 / (f.size() - 1.0)) * x; for (std::size_t c=0; c