//===========================================================================
/*!
*
*
* \brief Polynomial kernel.
*
*
*
* \author T.Glasmachers
* \date 2011
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
#define SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
#include
namespace shark {
/// \brief Polynomial kernel.
///
/// \par
/// The polynomial kernel is defined as
/// \f$ \left( \left\langle x_1, x_2 \right\rangle + b \right)^n \f$
/// with degree \f$ n \in \mathbb{N} \f$ and non-negative offset
/// \f$ b \geq 0 \f$. For n=1 and b=0 it matches the linear kernel
/// (standard inner product).
template
class PolynomialKernel : public AbstractKernelFunction
{
private:
typedef AbstractKernelFunction base_type;
struct InternalState: public State{
RealMatrix base;//stores the inner product of vectors x_1,x_j
RealMatrix exponentedProd;//pow(innerProd,m_exponent)
void resize(std::size_t sizeX1, std::size_t sizeX2){
base.resize(sizeX1, sizeX2);
exponentedProd.resize(sizeX1, sizeX2);
}
};
public:
typedef typename base_type::BatchInputType BatchInputType;
typedef typename base_type::ConstInputReference ConstInputReference;
typedef typename base_type::ConstBatchInputReference ConstBatchInputReference;
/// Constructor.
///
/// \param degree exponent of the polynomial
/// \param offset constant added to the standard inner product
/// \param degree_is_parameter should the degree be a regular model parameter? if yes, the kernel will not be differentiable
/// \param unconstrained should the offset internally be represented as exponential of the externally visible parameter?
PolynomialKernel( unsigned int degree = 2, double offset = 0.0, bool degree_is_parameter = true, bool unconstrained = false )
: m_degree( degree ),
m_offset( offset ),
m_degreeIsParam( degree_is_parameter ),
m_unconstrained( unconstrained ) {
SHARK_RUNTIME_CHECK(degree > 0, "[PolynomialKernel::PolynomialKernel] degree must be positive");
this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
this->m_features |= base_type::SUPPORTS_VARIABLE_INPUT_SIZE;
if ( !m_degreeIsParam )
this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
}
/// \brief From INameable: return the class name.
std::string name() const
{ return "PolynomialKernel"; }
void setDegree( unsigned int deg ) {
RANGE_CHECK( deg > 0 );
SHARK_RUNTIME_CHECK( !m_degreeIsParam, "[PolynomialKernel::setDegree] Please use setParameterVector when the degree is a parameter.");
m_degree = deg;
}
unsigned int degree() const {
return m_degree;
}
RealVector parameterVector() const {
if ( m_degreeIsParam ) {
RealVector ret(2);
ret(0) = m_degree;
if ( m_unconstrained )
ret(1) = std::log( m_offset );
else
ret(1) = m_offset;
return ret;
} else {
RealVector ret(1);
if ( m_unconstrained )
ret(0) = std::log( m_offset );
else
ret(0) = m_offset;
return ret;
}
}
void setParameterVector(RealVector const& newParameters) {
if ( m_degreeIsParam ) {
SIZE_CHECK(newParameters.size() == 2);
SHARK_ASSERT(std::abs((unsigned int)newParameters(0) - newParameters(0)) <= 2*std::numeric_limits::epsilon());
RANGE_CHECK(newParameters(0) >= 1.0);
m_degree = (unsigned int)newParameters(0);
if ( m_unconstrained ) {
m_offset = std::exp(newParameters(1));
} else {
RANGE_CHECK(newParameters(1) >= 0.0);
m_offset = newParameters(1);
}
} else {
SIZE_CHECK(newParameters.size() == 1);
if ( m_unconstrained ) {
m_offset = std::exp(newParameters(0));
} else {
RANGE_CHECK(newParameters(0) >= 0.0);
m_offset = newParameters(0);
}
}
}
std::size_t numberOfParameters() const {
if ( m_degreeIsParam )
return 2;
else
return 1;
}
///\brief creates the internal state of the kernel
boost::shared_ptr createState()const{
return boost::shared_ptr(new InternalState());
}
/////////////////////////EVALUATION//////////////////////////////
/// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
double eval(ConstInputReference x1, ConstInputReference x2) const{
SIZE_CHECK(x1.size() == x2.size());
double base = inner_prod(x1, x2) + m_offset;
return std::pow(base,m_degree);
}
void eval(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2, RealMatrix& result) const {
SIZE_CHECK(batchX1.size2() == batchX2.size2());
std::size_t sizeX1 = batchX1.size1();
std::size_t sizeX2 = batchX2.size1();
result.resize(sizeX1,sizeX2);
//calculate the inner product
noalias(result) = prod(batchX1,trans(batchX2));
result += m_offset;
//now do exponentiation
if(m_degree != 1)
noalias(result) = pow(result,m_degree);
}
void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
SIZE_CHECK(batchX1.size2() == batchX2.size2());
std::size_t sizeX1 = batchX1.size1();
std::size_t sizeX2 = batchX2.size1();
InternalState& s = state.toState();
s.resize(sizeX1,sizeX2);
result.resize(sizeX1,sizeX2);
//calculate the inner product
noalias(s.base) = prod(batchX1,trans(batchX2));
s.base += m_offset;
//now do exponentiation
if(m_degree != 1)
noalias(result) = pow(s.base,m_degree);
else
noalias(result) = s.base;
noalias(s.exponentedProd) = result;
}
/////////////////////DERIVATIVES////////////////////////////////////
void weightedParameterDerivative(
ConstBatchInputReference batchX1,
ConstBatchInputReference batchX2,
RealMatrix const& coefficients,
State const& state,
RealVector& gradient
) const{
std::size_t sizeX1 = batchX1.size1();
std::size_t sizeX2 = batchX2.size1();
gradient.resize(1);
InternalState const& s = state.toState();
//internal checks
SIZE_CHECK(batchX1.size2() == batchX2.size2());
SIZE_CHECK(s.base.size1() == sizeX1);
SIZE_CHECK(s.base.size2() == sizeX2);
SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
//m_degree == 1 is easy
if(m_degree == 1){//result_ij/base_ij = 1
gradient(0) = sum(coefficients);
if ( m_unconstrained )
gradient(0) *= m_offset;
return;
}
gradient(0) = m_degree * sum(safe_div(s.exponentedProd,s.base,0.0) * coefficients);
if ( m_unconstrained )
gradient(0) *= m_offset;
}
/// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
///
/// \f$ \frac{\partial k(x_1, x_2)}{\partial x_1} = \left[ n \cdot (\langle x_1, x_2 \rangle + b)^{n-1} \right] \cdot x_2 \f$
void weightedInputDerivative(
ConstBatchInputReference batchX1,
ConstBatchInputReference batchX2,
RealMatrix const& coefficientsX2,
State const& state,
BatchInputType& gradient
) const{
std::size_t sizeX1 = batchX1.size1();
std::size_t sizeX2 = batchX2.size1();
gradient.resize(sizeX1,batchX1.size2());
InternalState const& s = state.toState();
//internal checks
SIZE_CHECK(batchX1.size2() == batchX2.size2());
SIZE_CHECK(s.base.size1() == sizeX1);
SIZE_CHECK(s.base.size2() == sizeX2);
SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
SIZE_CHECK(coefficientsX2.size1() == sizeX1);
SIZE_CHECK(coefficientsX2.size2() == sizeX2);
//again m_degree == 1 is easy, as it is for the i-th row
//just c_i X2;
if(m_degree == 1){
noalias(gradient) = prod(coefficientsX2,batchX2);
return;
}
//first calculate weights(i,j) = coeff(i)*exp(i,j)/prod(i,j)
//we have to take the usual division by 0 into account
RealMatrix weights = coefficientsX2 * safe_div(s.exponentedProd,s.base,0.0);
//and the derivative of input i of batch x1 is
//g = sum_j m_n*weights(i,j)*x2_j
//we now sum over j which is a matrix-matrix product
noalias(gradient) = m_degree * prod(weights,batchX2);
}
void read(InArchive& ar){
ar >> m_degree;
ar >> m_offset;
ar >> m_degreeIsParam;
ar >> m_unconstrained;
}
void write(OutArchive& ar) const{
ar << m_degree;
ar << m_offset;
ar << m_degreeIsParam;
ar << m_unconstrained;
}
protected:
int m_degree; ///< exponent n
double m_offset; ///< offset b
bool m_degreeIsParam; ///< is the degree a model parameter?
bool m_unconstrained; ///< is the degree internally represented as exponential of the parameter?
};
typedef PolynomialKernel<> DensePolynomialKernel;
typedef PolynomialKernel CompressedPolynomialKernel;
}
#endif