//=========================================================================== /*! * * * \brief Special kernel classes for multi-task and transfer learning. * * * * \author T. Glasmachers, O.Krause * \date 2012 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_KERNELS_MULTITASKKERNEL_H #define SHARK_MODELS_KERNELS_MULTITASKKERNEL_H #include #include #include #include "Impl/MklKernelBase.h" namespace shark { /// /// \brief Aggregation of input data and task index. /// /// \par /// Generic data structure for augmenting arbitrary data /// with an integer. This integer is typically used as a /// task identifier in multi-task and transfer learning. /// template struct MultiTaskSample : public ISerializable { typedef InputTypeT InputType; /// \brief Default constructor. MultiTaskSample() { } /// \brief Construction from an input and a task index MultiTaskSample(InputType const& i, std::size_t t) : input(i), task(t) { } void read(InArchive& ar){ ar >> input; ar >> task; } void write(OutArchive& ar) const{ ar << input; ar << task; } InputType input; ///< input data std::size_t task; ///< task index }; } #ifndef DOXYGEN_SHOULD_SKIP_THIS BOOST_FUSION_ADAPT_TPL_STRUCT( (InputType), (shark::MultiTaskSample) (InputType), (InputType, input)(std::size_t, task) ) namespace shark { template struct Batch< MultiTaskSample >{ SHARK_CREATE_BATCH_INTERFACE( MultiTaskSample, (InputType, input)(std::size_t, task) ) }; } #endif /* DOXYGEN_SHOULD_SKIP_THIS */ namespace shark { /// /// \brief Special "Gaussian-like" kernel function on tasks. /// /// \par /// See
/// Learning Marginal Predictors: Transfer to an Unlabeled Task. /// G. Blanchard, G. Lee, C. Scott. /// /// \par /// This class computes a Gaussian kernel based on the distance /// of empirical distributions in feature space induced by yet /// another kernel. This is useful for multi-task and transfer /// learning. It reduces the definition of a kernel on tasks to /// that of a kernel on inputs, plus a single bandwidth parameter /// for the Gaussian kernel of distributions. /// /// \par /// Given unlabaled data \f$ x_i, t_i \f$ where the x-component /// is an input and the t-component is a task index, the kernel /// on tasks t and t' is defined as /// \f[ /// k(t, t') = \exp \left( -\gamma \cdot \left\| \frac{1}{\ell_{t}\ell{t'}} \sum_{i | t_i = t}\sum_{j | t_j = t'} k'(x_i, x_j) \right\|^2 \right) /// \f] /// where k' is an arbitrary kernel on inputs. /// template class GaussianTaskKernel : public DiscreteKernel { private: typedef DiscreteKernel base_type; public: typedef InputTypeT InputType; typedef MultiTaskSample MultiTaskSampleType; typedef AbstractKernelFunction KernelType; /// \brief Construction of a Gaussian kernel on tasks. /// /// \param data unlabeled data from multiple tasks /// \param tasks number of tasks in the problem /// \param inputkernel kernel on inputs based on which task similarity is defined /// \param gamma Gaussian bandwidth parameter (also refer to the member functions setGamma and setSigma). GaussianTaskKernel( Data const& data, std::size_t tasks, KernelType& inputkernel, double gamma) : DiscreteKernel(RealMatrix(tasks, tasks,0.0)) , m_data(data) , mpe_inputKernel(&inputkernel) , m_gamma(gamma){ computeMatrix(); } /// \brief From INameable: return the class name. std::string name() const { return "GaussianTaskKernel"; } RealVector parameterVector() const{ return mpe_inputKernel->parameterVector() | m_gamma; } void setParameterVector(RealVector const& newParameters){ std::size_t kParams = mpe_inputKernel->numberOfParameters(); mpe_inputKernel->setParameterVector(subrange(newParameters,0,kParams)); m_gamma = newParameters.back(); computeMatrix(); } std::size_t numberOfParameters() const{ return mpe_inputKernel->numberOfParameters() + 1; } std::size_t numberOfTasks() const { return size(); } /// \brief Kernel bandwidth parameter. double gamma() const { return m_gamma; } /// \brief Kernel width parameter, equivalent to the bandwidth parameter. /// /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$. double sigma() const { return (1.0 / std::sqrt(2 * m_gamma)); } // \brief Set the kernel bandwidth parameter. void setGamma(double gamma) { SHARK_ASSERT(gamma > 0.0); m_gamma = gamma; } /// \brief Set the kernel width (equivalent to setting the bandwidth). /// /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$. void setWidth(double sigma) { SHARK_ASSERT(sigma > 0.0); m_gamma = 1.0 / (2.0 * sigma * sigma); } /// From ISerializable. void read(InArchive& ar) { base_type::read(ar); ar >> m_gamma; } /// From ISerializable. void write(OutArchive& ar) const { base_type::write(ar); ar << m_gamma; } protected: /// \brief Compute the Gram matrix of the task kernel. /// /// \par /// Here is the real meat. This function implements the /// kernel function defined in
/// Learning Marginal Predictors: Transfer to an Unlabeled Task. /// G. Blanchard, G. Lee, C. Scott. /// /// \par /// In a first step the function computes the inner products /// of the task-wise empirical distributions, represented by /// their mean elements in the kernel-induced feature space. /// In a second step this information is used for the computation /// of squared distances between empirical distribution, which /// allows for the straightforward computation of a Gaussian /// kernel. void computeMatrix() { // count number of examples for each task const std::size_t tasks = numberOfTasks(); std::size_t elements = m_data.numberOfElements(); std::vector ell(tasks, 0); for (std::size_t i=0; ieval(m_data.element(i).input, m_data.element(j).input); base_type::m_matrix(task_i, task_j) += k; base_type::m_matrix(task_j, task_i) += k; } const double k = mpe_inputKernel->eval(m_data.element(i).input, m_data.element(i).input); base_type::m_matrix(task_i, task_i) += k; } for (std::size_t i=0; i const& m_data; ///< multi-task data KernelType* mpe_inputKernel; ///< kernel on inputs double m_gamma; ///< bandwidth of the Gaussian task kernel }; /// /// \brief Special kernel function for multi-task and transfer learning. /// /// \par /// This class is a convenience wrapper for the product of an /// input kernel and a kernel on tasks. It also encapsulates /// the projection from multi-task learning data (see class /// MultiTaskSample) to inputs and task indices. /// template class MultiTaskKernel : private detail::MklKernelBase > , public ProductKernel< MultiTaskSample > { private: typedef detail::MklKernelBase > base_type1; typedef ProductKernel< MultiTaskSample > base_type2; public: typedef AbstractKernelFunction InputKernelType; /// \brief Constructor. /// /// \param inputkernel kernel on inputs /// \param taskkernel kernel on task indices MultiTaskKernel( InputKernelType* inputkernel, DiscreteKernel* taskkernel) :base_type1(boost::fusion::make_vector(inputkernel,taskkernel)) ,base_type2(base_type1::makeKernelVector()) {} /// \brief From INameable: return the class name. std::string name() const { return "MultiTaskKernel"; } }; } // namespace shark { #endif