//=========================================================================== /*! * * * \brief Weighted sum of m_base kernels. * * * * \author T.Glasmachers, O. Krause, M. Tuma * \date 2010, 2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_KERNELS_WEIGHTED_SUM_KERNEL_H #define SHARK_MODELS_KERNELS_WEIGHTED_SUM_KERNEL_H #include #include namespace shark { /// \brief Weighted sum of kernel functions /// /// For a set of positive definite kernels \f$ k_1, \dots, k_n \f$ /// with positive coeffitients \f$ w_1, \dots, w_n \f$ the sum /// \f[ \tilde k(x_1, x_2) := \sum_{i=1}^{n} w_i \cdot k_i(x_1, x_2) \f] /// is again a positive definite kernel function. /// Internally, the weights are represented as /// \f$ w_i = \exp(\xi_i) \f$ /// to allow for unconstrained optimization. /// /// This variant of the weighted sum kernel eleminates one redundant /// degree of freedom by fixing the first kernel weight to 1.0. /// /// The result of the kernel evaluation is devided by the sum of the /// kernel weights, so that in total, this amounts to fixing the sum /// of the of the weights to one. /// template class WeightedSumKernel : public AbstractKernelFunction { private: typedef AbstractKernelFunction base_type; struct InternalState: public State{ RealMatrix result; std::vector kernelResults; std::vector > kernelStates; InternalState(std::size_t numSubKernels) :kernelResults(numSubKernels),kernelStates(numSubKernels){} void resize(std::size_t sizeX1, std::size_t sizeX2){ result.resize(sizeX1, sizeX2); for(std::size_t i = 0; i != kernelResults.size(); ++i){ kernelResults[i].resize(sizeX1, sizeX2); } } }; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::ConstInputReference ConstInputReference; typedef typename base_type::ConstBatchInputReference ConstBatchInputReference; WeightedSumKernel(std::vector* > const& base){ SHARK_RUNTIME_CHECK( base.size() > 0, "[WeightedSumKernel::WeightedSumKernel] There should be at least one sub-kernel."); m_base.resize( base.size() ); m_numParameters = m_base.size()-1; m_adaptWeights = true; for (std::size_t i=0; i != m_base.size() ; i++) { SHARK_ASSERT( base[i] != NULL ); m_base[i].kernel = base[i]; m_base[i].weight = 1.0; m_base[i].adaptive = false; } m_weightsum = (double)m_base.size(); // set m_base class features bool hasFirstParameterDerivative = true; for ( unsigned int i=0; ihasFirstParameterDerivative() ) { hasFirstParameterDerivative = false; break; } } bool hasFirstInputDerivative = true; for ( unsigned int i=0; ihasFirstInputDerivative() ) { hasFirstInputDerivative = false; break; } } if ( hasFirstParameterDerivative ) this->m_features|=base_type::HAS_FIRST_PARAMETER_DERIVATIVE; if ( hasFirstInputDerivative ) this->m_features|=base_type::HAS_FIRST_INPUT_DERIVATIVE; } /// \brief From INameable: return the class name. std::string name() const { return "WeightedSumKernel"; } /// Check whether m_base kernel index is adaptive. bool isAdaptive( std::size_t index ) const { return m_base[index].adaptive; } /// Set adaptivity of m_base kernel index. void setAdaptive( std::size_t index, bool b = true ) { m_base[index].adaptive = b; updateNumberOfParameters(); } /// Set adaptivity of all m_base kernels. void setAdaptiveAll( bool b = true ) { for (std::size_t i=0; i!= m_base.size(); i++) m_base[i].adaptive = b; updateNumberOfParameters(); } /// Get the weight of a kernel double weight(std::size_t index){ RANGE_CHECK(index < m_base.size()); return m_base[index].weight; } void setAdaptiveWeights(bool b){ m_adaptWeights = b; } /// return the parameter vector. The first N-1 entries are the (log-encoded) kernel /// weights, the sub-kernel's parameters are stacked behind each other after that. RealVector parameterVector() const { std::size_t num = numberOfParameters(); RealVector ret(num); std::size_t index = 0; for (; index != m_base.size()-1; index++){ ret(index) = std::log(m_base[index+1].weight); } for (std::size_t i=0; i != m_base.size(); i++){ if (m_base[i].adaptive){ std::size_t n = m_base[i].kernel->numberOfParameters(); subrange(ret,index,index+n) = m_base[i].kernel->parameterVector(); index += n; } } return ret; } ///\brief creates the internal state of the kernel boost::shared_ptr createState()const{ InternalState* state = new InternalState(m_base.size()); for(std::size_t i = 0; i != m_base.size(); ++i){ state->kernelStates[i]=m_base[i].kernel->createState(); } return boost::shared_ptr(state); } /// set the parameter vector. The first N-1 entries are the (log-encoded) kernel /// weights, the sub-kernel's parameters are stacked behind each other after that. void setParameterVector(RealVector const& newParameters) { SIZE_CHECK(newParameters.size() == numberOfParameters()); m_weightsum = 1.0; std::size_t index = 0; for (; index != m_base.size()-1; index++){ double w = newParameters(index); m_base[index+1].weight = std::exp(w); m_weightsum += m_base[index+1].weight; } for (std::size_t i=0; i != m_base.size(); i++){ if (m_base[i].adaptive){ std::size_t n = m_base[i].kernel->numberOfParameters(); m_base[i].kernel->setParameterVector(subrange(newParameters,index,index+n)); index += n; } } } std::size_t numberOfParameters() const { return m_numParameters; } /// Evaluate the weighted sum kernel (according to the following equation:) /// \f$ k(x, y) = \frac{\sum_i \exp(w_i) k_i(x, y)}{sum_i exp(w_i)} \f$ double eval(ConstInputReference x1, ConstInputReference x2) const{ double numerator = 0.0; for (std::size_t i=0; i != m_base.size(); i++){ double result = m_base[i].kernel->eval(x1, x2); numerator += m_base[i].weight*result; } return numerator / m_weightsum; } /// Evaluate the kernel according to the equation: /// \f$ k(x, y) = \frac{\sum_i \exp(w_i) k_i(x, y)}{sum_i exp(w_i)} \f$ /// for two batches of inputs. void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{ std::size_t sizeX1 = batchSize(batchX1); std::size_t sizeX2 = batchSize(batchX2); ensure_size(result,sizeX1,sizeX2); result.clear(); RealMatrix kernelResult(sizeX1,sizeX2); for (std::size_t i = 0; i != m_base.size(); i++){ m_base[i].kernel->eval(batchX1, batchX2,kernelResult); result += m_base[i].weight*kernelResult; } result /= m_weightsum; } /// Evaluate the kernel according to the equation: /// \f$ k(x, y) = \frac{\sum_i \exp(w_i) k_i(x, y)}{sum_i exp(w_i)} \f$ /// for two batches of inputs. /// (see the documentation of numberOfIntermediateValues for the workings of the intermediates) void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{ std::size_t sizeX1 = batchSize(batchX1); std::size_t sizeX2 = batchSize(batchX2); ensure_size(result,sizeX1,sizeX2); result.clear(); InternalState& s = state.toState(); s.resize(sizeX1,sizeX2); for (std::size_t i=0; i != m_base.size(); i++){ m_base[i].kernel->eval(batchX1,batchX2,s.kernelResults[i],*s.kernelStates[i]); result += m_base[i].weight*s.kernelResults[i]; } //store summed result s.result=result; result /= m_weightsum; } void weightedParameterDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficients, State const& state, RealVector& gradient ) const{ ensure_size(gradient,numberOfParameters()); std::size_t numKernels = m_base.size(); //how far the loop goes; InternalState const& s = state.toState(); double sumSquared = sqr(m_weightsum); //denominator //first the derivative with respect to the (log-encoded) weight parameter //the first weight is not a parameter and does not need a gradient. //[Theoretically, we wouldn't need to store its result .] //calculate the weighted sum over all results double numeratorSum = sum(coefficients * s.result); for (std::size_t i = 1; i != numKernels && m_adaptWeights; i++) { //calculate the weighted sum over all results of this kernel double summedK=sum(coefficients * s.kernelResults[i]); gradient(i-1) = m_base[i].weight * (summedK * m_weightsum - numeratorSum) / sumSquared; } std::size_t gradPos = m_adaptWeights ? numKernels-1: 0; //starting position of subkerel gradient RealVector kernelGrad; for (std::size_t i=0; i != numKernels; i++) { if (isAdaptive(i)){ double coeff = m_base[i].weight / m_weightsum; m_base[i].kernel->weightedParameterDerivative(batchX1,batchX2,coefficients,*s.kernelStates[i],kernelGrad); std::size_t n = kernelGrad.size(); noalias(subrange(gradient,gradPos,gradPos+n)) = coeff * kernelGrad; gradPos += n; } } } /// Input derivative, calculated according to the equation: ///
/// \f$ \frac{\partial k(x, y)}{\partial x} /// \frac{\sum_i \exp(w_i) \frac{\partial k_i(x, y)}{\partial x}} /// {\sum_i exp(w_i)} \f$ /// and summed up over all of the second batch void weightedInputDerivative( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficientsX2, State const& state, BatchInputType& gradient ) const{ SIZE_CHECK(coefficientsX2.size1() == batchSize(batchX1)); SIZE_CHECK(coefficientsX2.size2() == batchSize(batchX2)); weightedInputDerivativeImpl(batchX1,batchX2,coefficientsX2,state,gradient); } void read(InArchive& ar){ for(std::size_t i = 0;i != m_base.size(); ++i ){ ar >> m_base[i].weight; ar >> m_base[i].adaptive; ar >> *(m_base[i].kernel); } ar >> m_weightsum; ar >> m_numParameters; } void write(OutArchive& ar) const{ for(std::size_t i=0;i!= m_base.size();++i){ ar << m_base[i].weight; ar << m_base[i].adaptive; ar << const_cast const&>(*(m_base[i].kernel)); } ar << m_weightsum; ar << m_numParameters; } protected: /// structure describing a single m_base kernel struct tBase { AbstractKernelFunction* kernel; ///< pointer to the m_base kernel object double weight; ///< weight in the linear combination bool adaptive; ///< whether the parameters of the kernel are part of the WeightedSumKernel's parameter vector? }; void updateNumberOfParameters(){ m_numParameters = m_adaptWeights? m_base.size()-1 : 0; for (std::size_t i=0; i != m_base.size(); i++) if (m_base[i].adaptive) m_numParameters += m_base[i].kernel->numberOfParameters(); } //we need to choose the correct implementation at compile time to ensure, that in the case, InputType //does not implement the needed operations, the implementation is replaced by a safe default which throws an exception //for this, we use enable_if/disable_if. The method is called with BatchInputType as template argument. //real implementation first. template void weightedInputDerivativeImpl( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficientsX2, State const& state, BatchInputType& gradient, typename boost::enable_if >::type* dummy = 0 )const{ std::size_t numKernels = m_base.size(); //how far the loop goes; InternalState const& s = state.toState(); //initialize gradient with the first kernel m_base[0].kernel->weightedInputDerivative(batchX1, batchX2, coefficientsX2, *s.kernelStates[0], gradient); gradient *= m_base[0].weight / m_weightsum; BatchInputType kernelGrad; for (std::size_t i=1; i != numKernels; i++){ m_base[i].kernel->weightedInputDerivative(batchX1, batchX2, coefficientsX2, *s.kernelStates[i], kernelGrad); double coeff = m_base[i].weight / m_weightsum; gradient += coeff * kernelGrad; } } template void weightedInputDerivativeImpl( ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix const& coefficientsX2, State const& state, BatchInputType& gradient, typename boost::disable_if >::type* dummy = 0 )const{ throw SHARKEXCEPTION("[WeightedSumKernel::weightdInputDerivative] The used BatchInputType is no Vector"); } std::vector m_base; ///< collection of m_base kernels double m_weightsum; ///< sum of all weights std::size_t m_numParameters; ///< total number of parameters bool m_adaptWeights; ///< whether the weights should be adapted }; typedef WeightedSumKernel<> DenseWeightedSumKernel; typedef WeightedSumKernel CompressedWeightedSumKernel; } #endif