//=========================================================================== /*! * * * \brief Radial Gaussian kernel * * * * \author T.Glasmachers, O. Krause, M. Tuma * \date 2010-2012 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_KERNELS_GAUSSIAN_RBF_KERNEL_H #define SHARK_MODELS_KERNELS_GAUSSIAN_RBF_KERNEL_H #include namespace shark{ /// \brief Gaussian radial basis function kernel. /// /// Gaussian radial basis function kernel /// \f$ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f$ /// with single bandwidth parameter \f$ \gamma \f$. /// Optionally, the parameter can be encoded as \f$ \exp(\eta) \f$, /// which allows for unconstrained optimization. template class GaussianRbfKernel : public AbstractKernelFunction { private: typedef AbstractKernelFunction base_type; struct InternalState: public State{ RealMatrix norm2; RealMatrix expNorm; void resize(std::size_t sizeX1, std::size_t sizeX2){ norm2.resize(sizeX1, sizeX2); expNorm.resize(sizeX1, sizeX2); } }; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::ConstInputReference ConstInputReference; typedef typename base_type::ConstBatchInputReference ConstBatchInputReference; GaussianRbfKernel(double gamma = 1.0, bool unconstrained = false){ m_gamma = gamma; m_unconstrained = unconstrained; this->m_features|=base_type::HAS_FIRST_PARAMETER_DERIVATIVE; this->m_features|=base_type::HAS_FIRST_INPUT_DERIVATIVE; this->m_features|=base_type::IS_NORMALIZED; } /// \brief From INameable: return the class name. std::string name() const { return "GaussianRbfKernel"; } RealVector parameterVector() const{ RealVector ret(1); if (m_unconstrained){ ret(0) = std::log(m_gamma); } else{ ret(0) = m_gamma; } return ret; } void setParameterVector(RealVector const& newParameters){ SHARK_RUNTIME_CHECK(newParameters.size() == 1, "[GaussianRbfKernel::setParameterVector] invalid size of parameter vector"); if (m_unconstrained){ m_gamma = std::exp(newParameters(0)); } else{ SHARK_RUNTIME_CHECK(newParameters(0) > 0.0, "[GaussianRbfKernel::setParameterVector] gamma must be positive"); m_gamma = newParameters(0); } } size_t numberOfParameters() const { return 1; } /// Get the bandwidth parameter value. double gamma() const { return m_gamma; } /// Return ``standard deviation'' of Gaussian. double sigma() const{ return 1. / std::sqrt(2 * m_gamma); } /// Set the bandwidth parameter value. /// \throws shark::Exception if gamma <= 0. void setGamma(double gamma){ SHARK_RUNTIME_CHECK(gamma > 0.0, "[GaussianRbfKernel::setGamma] gamma must be positive"); m_gamma = gamma; } /// Set ``standard deviation'' of Gaussian. void setSigma(double sigma){ m_gamma = 1. / (2 * sigma * sigma); } /// From ISerializable. void read(InArchive& ar){ ar >> m_gamma; ar >> m_unconstrained; } /// From ISerializable. void write(OutArchive& ar) const{ ar << m_gamma; ar << m_unconstrained; } ///\brief creates the internal state of the kernel boost::shared_ptr createState()const{ return boost::shared_ptr(new InternalState()); } /// \brief evaluates \f$ k(x_1,x_2)\f$ /// /// Gaussian radial basis function kernel /// \f[ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f] double eval(ConstInputReference x1, ConstInputReference x2) const{ SIZE_CHECK(x1.size() == x2.size()); double norm2 = distanceSqr(x2, x1); double exponential = std::exp(-m_gamma * norm2); return exponential; } /// \brief evaluates \f$ k(x_1,x_2)\f$ and computes the intermediate value /// /// Gaussian radial basis function kernel /// \f[ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f] void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{ SIZE_CHECK(batchX1.size2() == batchX2.size2()); std::size_t sizeX1=batchX1.size1(); std::size_t sizeX2=batchX2.size1(); //configure state memory InternalState& s=state.toState(); s.resize(sizeX1,sizeX2); //calculate kernel response noalias(s.norm2)=distanceSqr(batchX1,batchX2); noalias(s.expNorm)=exp(-m_gamma*s.norm2); result=s.expNorm; } void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{ SIZE_CHECK(batchX1.size2() == batchX2.size2()); result = distanceSqr(batchX1,batchX2); noalias(result)=exp(-m_gamma*result); } void weightedParameterDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficients, State const& state, RealVector& gradient ) const{ std::size_t sizeX1=batchX1.size1(); std::size_t sizeX2=batchX2.size1(); InternalState const& s = state.toState(); //internal checks SIZE_CHECK(batchX1.size2() == batchX2.size2()); SIZE_CHECK(s.norm2.size1() == sizeX1); SIZE_CHECK(s.norm2.size2() == sizeX2); SIZE_CHECK(s.expNorm.size1() == sizeX1); SIZE_CHECK(s.expNorm.size2() == sizeX2); gradient.resize(1); gradient(0)= - sum(coefficients *s.expNorm * s.norm2); if(m_unconstrained){ gradient *= m_gamma; } } void weightedInputDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficientsX2, State const& state, BatchInputType& gradient ) const{ std::size_t sizeX1=batchX1.size1(); std::size_t sizeX2=batchX2.size1(); InternalState const& s = state.toState(); //internal checks SIZE_CHECK(batchX1.size2() == batchX2.size2()); SIZE_CHECK(s.norm2.size1() == sizeX1); SIZE_CHECK(s.norm2.size2() == sizeX2); SIZE_CHECK(s.expNorm.size1() == sizeX1); SIZE_CHECK(s.expNorm.size2() == sizeX2); gradient.resize(sizeX1,batchX1.size2()); RealMatrix W = coefficientsX2*s.expNorm; noalias(gradient) = prod(W,batchX2); RealVector columnSum = sum_columns(coefficientsX2*s.expNorm); for(std::size_t i = 0; i != sizeX1; ++i){ noalias(row(gradient,i)) -= columnSum(i) * row(batchX1,i); } gradient*=2.0*m_gamma; } //~ /// \brief Evaluates \f$ \frac {\partial k(x_1,x_2)}{\partial \gamma}\f$ and \f$ \frac {\partial^2 k(x_1,x_2)}{\partial \gamma^2}\f$ //~ /// //~ /// Gaussian radial basis function kernel //~ /// \f[ \frac {\partial k(x_1,x_2)}{\partial \gamma} = - \| x_1 - x_2 \|^2 \cdot k(x_1,x_2) \f] //~ /// \f[ \frac {\partial^2 k(x_1,x_2)}{\partial^2 \gamma^2} = \| x_1 - x_2 \|^4 \cdot k(x_1,x_2) \f] //~ void parameterDerivative(ConstInputReference x1, ConstInputReference x2, Intermediate const& intermediate, RealVector& gradient, RealMatrix& hessian) const{ //~ SIZE_CHECK(x1.size() == x2.size()); //~ SIZE_CHECK(intermediate.size() == numberOfIntermediateValues(x1,x2)); //~ double norm2 = intermediate[0]; //~ double exponential = intermediate[1]; //~ gradient.resize(1); //~ hessian.resize(1, 1); //~ if (!m_unconstrained){ //~ gradient(0) = -exponential * norm2; //~ hessian(0, 0) = -gradient(0) * norm2; //~ } //~ else{ //~ gradient(0) = -exponential * norm2 * m_gamma; //~ hessian(0, 0) = -gradient(0) * norm2 * m_gamma; //~ } //~ } //~ /// \brief Evaluates \f$ \frac {\partial k(x_1,x_2)}{\partial x_1}\f$ and \f$ \frac {\partial^2 k(x_1,x_2)}{\partial x_1^2}\f$ //~ /// //~ /// Gaussian radial basis function kernel //~ /// \f[ \frac {\partial k(x_1,x_2)}{\partial x_1} = -2 \gamma \left( x_1 - x_2 \right)\cdot k(x_1,x_2) \f] //~ /// \f[ \frac {\partial^2 k(x_1,x_2)}{\partial^2 x_1^2} =2 \gamma \left[ -k(x_1,x_2) \cdot \mathbb{I} - \frac {\partial k(x_1,x_2)}{\partial x_1} ( x_1 - x_2 )^T\right] \f] //~ void inputDerivative(const InputType& x1, const InputType& x2, Intermediate const& intermediate, InputType& gradient, InputMatrixType& hessian) const{ //~ SIZE_CHECK(x1.size() == x2.size()); //~ SIZE_CHECK(intermediate.size() == numberOfIntermediateValues(x1,x2)); //~ double exponential = intermediate[1]; //~ gradient.resize(x1.size()); //~ noalias(gradient) = (2.0 * m_gamma * exponential) * (x2 - x1); //~ hessian.resize(x1.size(), x1.size()); //~ noalias(hessian) = 2*m_gamma*outer_prod(gradient,x2 - x1) //~ - RealIdentityMatrix(x1.size())*2*m_gamma*exponential; //~ } protected: double m_gamma; ///< kernel bandwidth parameter bool m_unconstrained; ///< use log storage }; typedef GaussianRbfKernel<> DenseRbfKernel; typedef GaussianRbfKernel CompressedRbfKernel; } #endif