//=========================================================================== /*! * * * \brief Kernel on a finite, discrete space. * * * * \author T. Glasmachers * \date 2012 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_KERNELS_MODEL_KERNEL_H #define SHARK_MODELS_KERNELS_MODEL_KERNEL_H #include #include #include #include #include namespace shark { namespace detail{ template class ModelKernelImpl : public AbstractKernelFunction { private: typedef AbstractKernelFunction base_type; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::ConstInputReference ConstInputReference; typedef typename base_type::ConstBatchInputReference ConstBatchInputReference; typedef AbstractKernelFunction Kernel; typedef AbstractModel Model; private: struct InternalState: public State{ boost::shared_ptr kernelStateX1X2; boost::shared_ptr kernelStateX2X1; boost::shared_ptr modelStateX1; boost::shared_ptr modelStateX2; typename Model::BatchOutputType intermediateX1; typename Model::BatchOutputType intermediateX2; }; public: ModelKernelImpl(Kernel* kernel, Model* model):mpe_kernel(kernel),mpe_model(model){ if(kernel->hasFirstParameterDerivative() && kernel->hasFirstInputDerivative() && model->hasFirstParameterDerivative()) this->m_features|=base_type::HAS_FIRST_PARAMETER_DERIVATIVE; } /// \brief From INameable: return the class name. std::string name() const { return "ModelKernel"; } std::size_t numberOfParameters()const{ return mpe_kernel->numberOfParameters() + mpe_model->numberOfParameters(); } RealVector parameterVector() const{ return mpe_kernel->parameterVector() | mpe_model->parameterVector(); } void setParameterVector(RealVector const& newParameters){ SIZE_CHECK(newParameters.size() == numberOfParameters()); std::size_t kParams =mpe_kernel->numberOfParameters(); mpe_kernel->setParameterVector(subrange(newParameters,0,kParams)); mpe_model->setParameterVector(subrange(newParameters,kParams,newParameters.size())); } boost::shared_ptr createState()const{ InternalState* s = new InternalState(); boost::shared_ptr sharedState(s);//create now to allow for destructor to be called in case of exception s->kernelStateX1X2 = mpe_kernel->createState(); s->kernelStateX2X1 = mpe_kernel->createState(); s->modelStateX1 = mpe_model->createState(); s->modelStateX2 = mpe_model->createState(); return sharedState; } double eval(ConstInputReference x1, ConstInputReference x2) const{ auto mx1 = (*mpe_model)(x1); auto mx2= (*mpe_model)(x2); return mpe_kernel->eval(mx1,mx2); } void eval(ConstBatchInputReference x1, ConstBatchInputReference x2, RealMatrix& result, State& state) const{ InternalState& s=state.toState(); mpe_model->eval(x1,s.intermediateX1,*s.modelStateX1); mpe_model->eval(x2,s.intermediateX2,*s.modelStateX2); mpe_kernel->eval(s.intermediateX2,s.intermediateX1,result,*s.kernelStateX2X1); mpe_kernel->eval(s.intermediateX1,s.intermediateX2,result,*s.kernelStateX1X2); } void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{ auto mx1 = (*mpe_model)(batchX1); auto mx2 = (*mpe_model)(batchX2); return mpe_kernel->eval(mx1,mx2,result); } void weightedParameterDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficients, State const& state, RealVector& gradient ) const{ gradient.resize(numberOfParameters()); InternalState const& s=state.toState(); //compute derivative of the kernel wrt. parameters RealVector kernelGrad; mpe_kernel->weightedParameterDerivative( s.intermediateX1,s.intermediateX2, coefficients,*s.kernelStateX1X2,kernelGrad ); //compute derivative of the kernel wrt left and right parameter typename Model::BatchOutputType inputDerivativeX1, inputDerivativeX2; mpe_kernel->weightedInputDerivative( s.intermediateX1,s.intermediateX2, coefficients,*s.kernelStateX1X2,inputDerivativeX1 ); mpe_kernel->weightedInputDerivative( s.intermediateX2,s.intermediateX1, trans(coefficients),*s.kernelStateX2X1,inputDerivativeX2 ); //compute derivative of model wrt parameters RealVector modelGradX1,modelGradX2; mpe_model->weightedParameterDerivative(batchX1,s.intermediateX1, inputDerivativeX1,*s.modelStateX1,modelGradX1); mpe_model->weightedParameterDerivative(batchX2,s.intermediateX2, inputDerivativeX2,*s.modelStateX2,modelGradX2); noalias(gradient) = kernelGrad | (modelGradX1+modelGradX2); } void read(InArchive& ar){ SHARK_RUNTIME_CHECK(mpe_kernel, "The kernel function is NULL, kernel needs to be constructed prior to read in"); SHARK_RUNTIME_CHECK(mpe_model, "The model is NULL, model needs to be constructed prior to read in"); ar >> *mpe_kernel; ar >> *mpe_model; } void write(OutArchive& ar) const{ ar << *mpe_kernel; ar << *mpe_model; } private: Kernel* mpe_kernel; Model* mpe_model; }; } /// \brief Kernel function that uses a Model as transformation function for another kernel /// /// Using an Abstractmodel \f$ f: X \rightarrow X' \f$ and an inner kernel /// \f$k: X' \times X' \rightarrow \mathbb{R} \f$, this class defines another kernel /// \f$K: X \times X \rightarrow \mathbb{R}\f$ using /// \f[ /// K(x,y) = k(f(x),f(y)) ///\f] /// If the inner kernel \f$k\f$ suports both input, as well as parameter derivative and /// the model also supports the parameter derivative, the kernel \f$K\f$ also /// supports the first parameter derivative using /// \f[ /// \frac{\partial}{\partial \theta} K(x,y) = /// \frac{\partial}{\partial f(x)} k(f(x),f(y))\frac{\partial}{\partial \theta} f(x) /// +\frac{\partial}{\partial f(y)} k(f(x),f(y))\frac{\partial}{\partial \theta} f(y) ///\f] /// This requires the derivative of the inputs of the kernel wrt both parameters which, /// by limitation of the current kernel interface, requires to compute \f$k(f(x),f(y))\f$ and \f$k(f(y),f(x))\f$. template class ModelKernel: public AbstractKernelFunction{ private: typedef AbstractKernelFunction base_type; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::ConstInputReference ConstInputReference; typedef typename base_type::ConstBatchInputReference ConstBatchInputReference; template ModelKernel( AbstractKernelFunction* kernel, AbstractModel* model ):m_wrapper(new detail::ModelKernelImpl(kernel,model)){ SHARK_RUNTIME_CHECK(kernel, "The kernel function is not allowed to be NULL"); SHARK_RUNTIME_CHECK(model, "The model is not allowed to be NULL"); if(m_wrapper->hasFirstParameterDerivative()) this->m_features|=base_type::HAS_FIRST_PARAMETER_DERIVATIVE; } /// \brief From INameable: return the class name. std::string name() const { return "ModelKernel"; } /// \brief Returns the number of parameters. std::size_t numberOfParameters()const{ return m_wrapper->numberOfParameters(); } ///\brief Returns the concatenated parameters of kernel and model. RealVector parameterVector() const{ return m_wrapper->parameterVector(); } ///\brief Sets the concatenated parameters of kernel and model. void setParameterVector(RealVector const& newParameters){ m_wrapper->setParameterVector(newParameters); } ///\brief Returns the internal state object used for eval and the derivatives. boost::shared_ptr createState()const{ return m_wrapper->createState(); } ///\brief Computes K(x,y) for a single input pair. double eval(ConstInputReference x1, ConstInputReference x2) const{ return m_wrapper->eval(x1,x2); } /// \brief For two batches X1 and X2 computes the matrix k_ij=K(X1_i,X2_j) and stores the state for the derivatives. void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{ return m_wrapper->eval(batchX1,batchX2,result,state); } /// \brief For two batches X1 and X2 computes the matrix k_ij=K(X1_i,X2_j). void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{ m_wrapper->eval(batchX1,batchX2,result); } ///\brief After a call to eval with state, computes the derivative wrt all parameters of the kernel and the model. /// /// This is computed over the whole kernel matrix k_ij created by eval and summed up using the coefficients c /// thus this call returns \f$ \sum_{i,j} c_{ij} \frac{\partial}{\partial \theta} k(x^1_i,x^2_j)\f$. void weightedParameterDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficients, State const& state, RealVector& gradient ) const{ m_wrapper->weightedParameterDerivative(batchX1,batchX2,coefficients,state,gradient); } ///\brief Stores the kernel to an Archive. void write(OutArchive& ar) const{ ar<< *m_wrapper; } ///\brief Reads the kernel from an Archive. void read(OutArchive& ar) const{ ar >> *m_wrapper; } private: boost::scoped_ptr > m_wrapper; }; typedef ModelKernel DenseModelKernel; typedef ModelKernel CompressedModelKernel; } #endif