//=========================================================================== /*! * * * \brief Polynomial kernel. * * * * \author T.Glasmachers * \date 2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H #define SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H #include namespace shark { /// \brief Polynomial kernel. /// /// \par /// The polynomial kernel is defined as /// \f$ \left( \left\langle x_1, x_2 \right\rangle + b \right)^n \f$ /// with degree \f$ n \in \mathbb{N} \f$ and non-negative offset /// \f$ b \geq 0 \f$. For n=1 and b=0 it matches the linear kernel /// (standard inner product). template class PolynomialKernel : public AbstractKernelFunction { private: typedef AbstractKernelFunction base_type; struct InternalState: public State{ RealMatrix base;//stores the inner product of vectors x_1,x_j RealMatrix exponentedProd;//pow(innerProd,m_exponent) void resize(std::size_t sizeX1, std::size_t sizeX2){ base.resize(sizeX1, sizeX2); exponentedProd.resize(sizeX1, sizeX2); } }; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::ConstInputReference ConstInputReference; typedef typename base_type::ConstBatchInputReference ConstBatchInputReference; /// Constructor. /// /// \param degree exponent of the polynomial /// \param offset constant added to the standard inner product /// \param degree_is_parameter should the degree be a regular model parameter? if yes, the kernel will not be differentiable /// \param unconstrained should the offset internally be represented as exponential of the externally visible parameter? PolynomialKernel( unsigned int degree = 2, double offset = 0.0, bool degree_is_parameter = true, bool unconstrained = false ) : m_degree( degree ), m_offset( offset ), m_degreeIsParam( degree_is_parameter ), m_unconstrained( unconstrained ) { SHARK_RUNTIME_CHECK(degree > 0, "[PolynomialKernel::PolynomialKernel] degree must be positive"); this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; this->m_features |= base_type::SUPPORTS_VARIABLE_INPUT_SIZE; if ( !m_degreeIsParam ) this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; } /// \brief From INameable: return the class name. std::string name() const { return "PolynomialKernel"; } void setDegree( unsigned int deg ) { RANGE_CHECK( deg > 0 ); SHARK_RUNTIME_CHECK( !m_degreeIsParam, "[PolynomialKernel::setDegree] Please use setParameterVector when the degree is a parameter."); m_degree = deg; } unsigned int degree() const { return m_degree; } RealVector parameterVector() const { if ( m_degreeIsParam ) { RealVector ret(2); ret(0) = m_degree; if ( m_unconstrained ) ret(1) = std::log( m_offset ); else ret(1) = m_offset; return ret; } else { RealVector ret(1); if ( m_unconstrained ) ret(0) = std::log( m_offset ); else ret(0) = m_offset; return ret; } } void setParameterVector(RealVector const& newParameters) { if ( m_degreeIsParam ) { SIZE_CHECK(newParameters.size() == 2); SHARK_ASSERT(std::abs((unsigned int)newParameters(0) - newParameters(0)) <= 2*std::numeric_limits::epsilon()); RANGE_CHECK(newParameters(0) >= 1.0); m_degree = (unsigned int)newParameters(0); if ( m_unconstrained ) { m_offset = std::exp(newParameters(1)); } else { RANGE_CHECK(newParameters(1) >= 0.0); m_offset = newParameters(1); } } else { SIZE_CHECK(newParameters.size() == 1); if ( m_unconstrained ) { m_offset = std::exp(newParameters(0)); } else { RANGE_CHECK(newParameters(0) >= 0.0); m_offset = newParameters(0); } } } std::size_t numberOfParameters() const { if ( m_degreeIsParam ) return 2; else return 1; } ///\brief creates the internal state of the kernel boost::shared_ptr createState()const{ return boost::shared_ptr(new InternalState()); } /////////////////////////EVALUATION////////////////////////////// /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$ double eval(ConstInputReference x1, ConstInputReference x2) const{ SIZE_CHECK(x1.size() == x2.size()); double base = inner_prod(x1, x2) + m_offset; return std::pow(base,m_degree); } void eval(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2, RealMatrix& result) const { SIZE_CHECK(batchX1.size2() == batchX2.size2()); std::size_t sizeX1 = batchX1.size1(); std::size_t sizeX2 = batchX2.size1(); result.resize(sizeX1,sizeX2); //calculate the inner product noalias(result) = prod(batchX1,trans(batchX2)); result += m_offset; //now do exponentiation if(m_degree != 1) noalias(result) = pow(result,m_degree); } void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{ SIZE_CHECK(batchX1.size2() == batchX2.size2()); std::size_t sizeX1 = batchX1.size1(); std::size_t sizeX2 = batchX2.size1(); InternalState& s = state.toState(); s.resize(sizeX1,sizeX2); result.resize(sizeX1,sizeX2); //calculate the inner product noalias(s.base) = prod(batchX1,trans(batchX2)); s.base += m_offset; //now do exponentiation if(m_degree != 1) noalias(result) = pow(s.base,m_degree); else noalias(result) = s.base; noalias(s.exponentedProd) = result; } /////////////////////DERIVATIVES//////////////////////////////////// void weightedParameterDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficients, State const& state, RealVector& gradient ) const{ std::size_t sizeX1 = batchX1.size1(); std::size_t sizeX2 = batchX2.size1(); gradient.resize(1); InternalState const& s = state.toState(); //internal checks SIZE_CHECK(batchX1.size2() == batchX2.size2()); SIZE_CHECK(s.base.size1() == sizeX1); SIZE_CHECK(s.base.size2() == sizeX2); SIZE_CHECK(s.exponentedProd.size1() == sizeX1); SIZE_CHECK(s.exponentedProd.size2() == sizeX2); //m_degree == 1 is easy if(m_degree == 1){//result_ij/base_ij = 1 gradient(0) = sum(coefficients); if ( m_unconstrained ) gradient(0) *= m_offset; return; } gradient(0) = m_degree * sum(safe_div(s.exponentedProd,s.base,0.0) * coefficients); if ( m_unconstrained ) gradient(0) *= m_offset; } /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$ ///
/// \f$ \frac{\partial k(x_1, x_2)}{\partial x_1} = \left[ n \cdot (\langle x_1, x_2 \rangle + b)^{n-1} \right] \cdot x_2 \f$ void weightedInputDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficientsX2, State const& state, BatchInputType& gradient ) const{ std::size_t sizeX1 = batchX1.size1(); std::size_t sizeX2 = batchX2.size1(); gradient.resize(sizeX1,batchX1.size2()); InternalState const& s = state.toState(); //internal checks SIZE_CHECK(batchX1.size2() == batchX2.size2()); SIZE_CHECK(s.base.size1() == sizeX1); SIZE_CHECK(s.base.size2() == sizeX2); SIZE_CHECK(s.exponentedProd.size1() == sizeX1); SIZE_CHECK(s.exponentedProd.size2() == sizeX2); SIZE_CHECK(coefficientsX2.size1() == sizeX1); SIZE_CHECK(coefficientsX2.size2() == sizeX2); //again m_degree == 1 is easy, as it is for the i-th row //just c_i X2; if(m_degree == 1){ noalias(gradient) = prod(coefficientsX2,batchX2); return; } //first calculate weights(i,j) = coeff(i)*exp(i,j)/prod(i,j) //we have to take the usual division by 0 into account RealMatrix weights = coefficientsX2 * safe_div(s.exponentedProd,s.base,0.0); //and the derivative of input i of batch x1 is //g = sum_j m_n*weights(i,j)*x2_j //we now sum over j which is a matrix-matrix product noalias(gradient) = m_degree * prod(weights,batchX2); } void read(InArchive& ar){ ar >> m_degree; ar >> m_offset; ar >> m_degreeIsParam; ar >> m_unconstrained; } void write(OutArchive& ar) const{ ar << m_degree; ar << m_offset; ar << m_degreeIsParam; ar << m_unconstrained; } protected: int m_degree; ///< exponent n double m_offset; ///< offset b bool m_degreeIsParam; ///< is the degree a model parameter? bool m_unconstrained; ///< is the degree internally represented as exponential of the parameter? }; typedef PolynomialKernel<> DensePolynomialKernel; typedef PolynomialKernel CompressedPolynomialKernel; } #endif