//===========================================================================
/*!
*
*
* \brief Pegasos solvers for linear SVMs
*
*
*
* \author T. Glasmachers
* \date 2012
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_ALGORITHMS_PEGASOS_H
#define SHARK_ALGORITHMS_PEGASOS_H
#include
#include
#include
#include
#include
namespace shark {
///
/// \brief Pegasos solver for linear (binary) support vector machines.
///
template
class Pegasos
{
public:
/// \brief Solve the primal SVM problem.
///
/// In addition to "standard" Pegasos this solver checks a
/// meaningful stopping criterion.
///
/// The function returns the number of model predictions
/// during training (this is comparable to SMO iterations).
template
static std::size_t solve(
LabeledData const& data, ///< training data
double C, ///< SVM regularization parameter
WeightType& w, ///< weight vector
std::size_t batchsize = 1, ///< number of samples in each gradient estimate
double varepsilon = 0.001) ///< solution accuracy (factor by which the primal gradient should be reduced)
{
std::size_t ell = data.numberOfElements();
double lambda = 1.0 / (ell * C);
SHARK_ASSERT(batchsize > 0);
double initialPrimal = 1.0;
double normbound2 = initialPrimal / lambda; // upper bound for |sigma * w|^2
double norm_w2 = 0.0; // squared norm of w
double sigma = 1.0; // scaling factor for w
VectorType gradient(w.size()); // gradient (to be computed in each iteration)
w = RealZeroVector(w.size()); // clear does not work on matrix rows (ublas sucks!)
// pegasos main loop
std::size_t start = 10;
std::size_t checkinterval = (2 * ell) / batchsize;
std::size_t nextcheck = start + ell / batchsize;
std::size_t predictions = 0;
for (std::size_t t=start; ; t++)
{
// check the stopping criterion: \|gradient\| < epsilon ?
if (t >= nextcheck)
{
// compute the gradient
gradient = (lambda * sigma * (double)ell) * w;
for (std::size_t i=0; i normbound2) sigma *= std::sqrt(normbound2 / n2);
}
}
// rescale the solution
w *= sigma;
return predictions;
}
protected:
// gradient of the loss
static bool lg(
VectorType const& x,
unsigned int y,
double f,
VectorType& gradient)
{
if (y == 0)
{
if (f > -1.0)
{
gradient += x;
return true;
}
}
else if (y == 1)
{
if (f < 1.0)
{
gradient -= x;
return true;
}
}
return false;
}
};
///
/// \brief Pegasos solver for linear multi-class support vector machines.
///
template
class McPegasos
{
public:
/// \brief Multi-class margin type.
enum eMarginType
{
emRelative,
emAbsolute,
};
/// \brief Multi-class loss function type.
enum eLossType
{
elNaiveHinge,
elDiscriminativeMax,
elDiscriminativeSum,
elTotalMax,
elTotalSum,
};
/// \brief Solve the primal multi-class SVM problem.
///
/// In addition to "standard" Pegasos this solver checks a
/// meaningful stopping criterion.
///
/// The function returns the number of model predictions
/// during training (this is comparable to SMO iterations).
template
static std::size_t solve(
LabeledData const& data, ///< training data
eMarginType margintype, ///< margin function type
eLossType losstype, ///< loss function type
bool sumToZero, ///< enforce the sum-to-zero constraint?
double C, ///< SVM regularization parameter
std::vector& w, ///< class-wise weight vectors
std::size_t batchsize = 1, ///< number of samples in each gradient estimate
double varepsilon = 0.001) ///< solution accuracy (factor by which the primal gradient should be reduced)
{
SHARK_ASSERT(batchsize > 0);
std::size_t ell = data.numberOfElements();
unsigned int classes = w.size();
SHARK_ASSERT(classes >= 2);
double lambda = 1.0 / (ell * C);
double initialPrimal = -1.0;
LossGradientFunction lg = NULL;
if (margintype == emRelative)
{
if (losstype == elDiscriminativeMax || losstype == elTotalMax)
{
// CS case
initialPrimal = 1.0;
lg = lossGradientRDM;
}
else if (losstype == elDiscriminativeSum || losstype == elTotalSum)
{
// WW case
initialPrimal = classes - 1.0;
lg = lossGradientRDS;
}
}
else if (margintype == emAbsolute)
{
if (losstype == elNaiveHinge)
{
// MMR case
initialPrimal = 1.0;
lg = lossGradientANH;
}
else if (losstype == elDiscriminativeMax)
{
// ADM case
initialPrimal = 1.0;
lg = lossGradientADM;
}
else if (losstype == elDiscriminativeSum)
{
// LLW case
initialPrimal = classes - 1.0;
lg = lossGradientADS;
}
else if (losstype == elTotalMax)
{
// ATM case
initialPrimal = 1.0;
lg = lossGradientATM;
}
else if (losstype == elTotalSum)
{
// ATS/OVA case
initialPrimal = classes;
lg = lossGradientATS;
}
}
SHARK_RUNTIME_CHECK(initialPrimal > 0 && lg, "The combination of margin and loss is not implemented");
double normbound2 = initialPrimal / lambda; // upper bound for |sigma * w|^2
double norm_w2 = 0.0; // squared norm of w
double sigma = 1.0; // scaling factor for w
double target = initialPrimal * varepsilon; // target gradient norm
std::vector gradient(classes); // gradient (to be computed in each iteration)
RealVector f(classes); // machine prediction (computed for each example)
for (unsigned int c=0; c= nextcheck)
{
// compute the gradient
for (unsigned int c=0; c normbound2) sigma *= std::sqrt(normbound2 / n2);
}
}
// rescale the solution
for (unsigned int c=0; c&, bool);
// absolute margin, naive hinge loss
static bool lossGradientANH(
VectorType const& x,
unsigned int y,
RealVector const& f,
std::vector& gradient,
bool sumToZero)
{
if (f(y) < 1.0)
{
gradient[y] -= x;
if (sumToZero)
{
VectorType xx = (1.0 / (f.size() - 1.0)) * x;
for (std::size_t c=0; c& gradient,
bool sumToZero)
{
unsigned int argmax = 0;
double max = -1e100;
for (std::size_t c=0; c max)
{
max = f(c);
argmax = c;
}
}
if (f(y) < 1.0 + max)
{
gradient[y] -= x;
gradient[argmax] += x;
return true;
}
else return false;
}
// relative margin, sum loss
static bool lossGradientRDS(
VectorType const& x,
unsigned int y,
RealVector const& f,
std::vector& gradient,
bool sumToZero)
{
bool nonzero = false;
for (std::size_t c=0; c& gradient,
bool sumToZero)
{
bool nonzero = false;
for (std::size_t c=0; c -1.0)
{
gradient[c] += x;
nonzero = true;
}
}
if (sumToZero && nonzero)
{
VectorType mean = gradient[0];
for (std::size_t c=1; c& gradient,
bool sumToZero)
{
double max = -1e100;
std::size_t argmax = 0;
for (std::size_t c=0; c max)
{
max = f(c);
argmax = c;
}
}
if (max > -1.0)
{
gradient[argmax] += x;
if (sumToZero)
{
VectorType xx = (1.0 / (f.size() - 1.0)) * x;
for (std::size_t c=0; c& gradient,
bool sumToZero)
{
bool nonzero = false;
for (std::size_t c=0; c -1.0)
{
gradient[c] += x;
nonzero = true;
}
}
}
if (sumToZero && nonzero)
{
VectorType mean = gradient[0];
for (std::size_t c=1; c& gradient,
bool sumToZero)
{
double max = -1e100;
std::size_t argmax = 0;
for (std::size_t c=0; c max)
{
max = -f(c);
argmax = c;
}
}
else
{
if (f(c) > max)
{
max = f(c);
argmax = c;
}
}
}
if (max > -1.0)
{
if (argmax == y)
{
gradient[argmax] -= x;
if (sumToZero)
{
VectorType xx = (1.0 / (f.size() - 1.0)) * x;
for (std::size_t c=0; c