//=========================================================================== /*! * * * \brief Implements the Ensemble Model that can be used to merge predictions from weighted models * * \author O. Krause * \date 2018 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_ENSEMBLE_H #define SHARK_MODELS_ENSEMBLE_H #include #include #include namespace shark { namespace detail{ template class EnsembleImpl: public AbstractModel< typename std::remove_pointer::type::InputType, VectorType, typename std::remove_pointer::type::ParameterVectorType >{ public: typedef typename std::remove_pointer::type ModelType; private: typedef AbstractModel Base; // the following functions are returning a reference to the model // independent of whether a pointer to the model or the model itself // is stored. ModelType& derefIfPtr(ModelType& model)const{ return model; } ModelType const& derefIfPtr(ModelType const& model)const{ return model; } ModelType& derefIfPtr(ModelType* model)const{ return *model; } //implements the pooling operation which creates a vector from the model responses to the given patterns template struct tag{}; template void pool(InputBatch const& patterns, blas::matrix& outputs, tag >)const{ for(std::size_t i = 0; i != numberOfModels(); i++){ noalias(outputs) += weight(i) * model(i)(patterns); } outputs /= sumOfWeights(); } template void pool(InputBatch const& patterns, OutputBatch& outputs, tag)const{ blas::vector responses; for(std::size_t i = 0; i != numberOfModels(); ++i){ model(i).eval(patterns, responses); for(std::size_t p = 0; p != patterns.size1(); ++p){ outputs(p,responses(p)) += weight(i); } } outputs /= sumOfWeights(); } std::vector m_models; RealVector m_weights; public: typedef typename Base::BatchInputType BatchInputType; typedef typename Base::BatchOutputType BatchOutputType; typedef typename Base::ParameterVectorType ParameterVectorType; ParameterVectorType parameterVector() const { return {}; } void setParameterVector(ParameterVectorType const& param) { SHARK_ASSERT(param.size() == 0); } void addModel(BaseModelType const& model, double weight = 1.0){ SHARK_RUNTIME_CHECK(weight > 0, "Weights must be positive"); m_models.push_back(model); m_weights.push_back(weight); } /// \brief Removes all models from the ensemble void clearModels(){ m_models.clear(); m_weights.clear(); } ModelType& model(std::size_t index){ return derefIfPtr(m_models[index]); } ModelType const& model(std::size_t index)const{ return derefIfPtr(m_models[index]); } /// \brief Returns the weight of the i-th model. double const& weight(std::size_t i)const{ return m_weights[i]; } /// \brief Returns the weight of the i-th model. double& weight(std::size_t i){ return m_weights[i]; } /// \brief Returns the total sum of weights used for averaging double sumOfWeights() const{ return sum(m_weights); } /// \brief Returns the number of models. std::size_t numberOfModels()const{ return m_models.size(); } ///\brief Returns the expected shape of the input Shape inputShape() const{ return m_models.empty() ? Shape(): model(0).inputShape(); } ///\brief Returns the shape of the output Shape outputShape() const{ return m_models.empty() ? Shape(): model(0).outputShape(); } using Base::eval; void eval(BatchInputType const& patterns, BatchOutputType& outputs)const{ outputs.resize(patterns.size1(), outputShape().numElements()); outputs.clear(); pool(patterns,outputs, tag()); } void eval(BatchInputType const& patterns, BatchOutputType& outputs, State&)const{ eval(patterns,outputs); } void read(InArchive& archive){ std::size_t numModels; archive >> numModels; m_models.resize(numModels); for(std::size_t i = 0; i != numModels; ++i){ archive >> model(i); } archive >> m_weights; } void write(OutArchive& archive)const{ std::size_t numModels = m_models.size(); archive << numModels; for(std::size_t i = 0; i != numModels; ++i){ archive << model(i); } archive << m_weights; } }; //the following creates an ensemble base depending on whether the ensemble should be a classifier or not. template struct EnsembleBase : public detail::EnsembleImpl{ private: typedef typename std::remove_pointer::type::OutputType ModelOutputType; protected: detail::EnsembleImpl& impl(){ return *this;}; detail::EnsembleImpl const& impl() const{ return *this;}; }; //if the output type is unsigned int, this is a classifier template struct EnsembleBase : public Classifier::type::ParameterVectorType> >{ private: typedef typename std::remove_pointer::type::ParameterVectorType PoolingVectorType; protected: detail::EnsembleImpl& impl() { return this->decisionFunction();} detail::EnsembleImpl const& impl() const { return this->decisionFunction();} }; //if the OutputType is void, this is treated as choosing it as the OutputType of the model template struct EnsembleBase : public EnsembleBase::type::OutputType>{}; } /// \brief Represents en weighted ensemble of models. /// /// In an ensemble, each model computes a response for an input independently. The responses are then pooled /// to form a single label. The hope is that models in an ensemble do not produce the same type of errors /// and thus the averaged response is more reliable. An example for this is AdaBoost, where a series /// of weak models is trained and weighted to create one final prediction. /// /// There are two orthogonal aspects to consider in the Ensemble. The pooling function, which is chosen /// based on the output type of the ensemble models, and the mapping of the output of the pooling function /// to the model output. /// /// If the ensemble consists of models returning vectors, pooling is implemented /// using weighted averaging. If the models return class labels, those are first transformed /// into a one-hot encoding before averaging. Thus the output can be interpreted /// as the probability of a class label when picking a member of the emsemble randomly with probability /// proportional to its weights. /// /// The final mapping to the output is based on the OutputType template parameter, which by default /// is the same as the output type of the model. If it is unsigned int, the Ensemble is treated as Classifier /// with decision function being the result of the pooling function (i.e. the class with maximum response in /// the weighted average is chosen). In this case, Essemble is derived from Classifier<>. /// Otherwise the weighted average is returned. /// /// Note that there is a decision in algorihm design tot ake for classifiers: /// We can either let each member of the Ensemble predict /// a class-label and then pool the labels as described above, or we can create an ensemble of /// decision functions and weight them into one decision function to produce the class-label. /// Those approaches will lead to different results. For example if the underlying models /// produce class probabilities, the class with the largest average probability /// might not be the same as the class with most votes from the individual models. /// /// Models are added using addModel. /// The ModelType is allowed to be either a concrete model like LinearModel<>, in which /// case a copy of each added model is stored. If the ModelType is a pointer, for example /// AbstractModel<...>*, only pointers are stored and all added models /// must outlive the lifetime of the ensemble. This also entails differences in serialization. /// In the first case, the model can be serialized completely without any setup. In the second /// case before deserializing, the models must be constructed and added. template class Ensemble: public detail::EnsembleBase{ public: std::string name() const { return "Ensemble"; } /// \brief Adds a new model to the ensemble. /// /// \param model the new model /// \param weight weight of the model. must be > 0 void addModel(ModelType const& model, double weight = 1.0){ this->impl().addModel(model,weight); } /// \brief Removes all models from the ensemble void clearModels(){ this->impl().clearModels(); } /// \brief Returns the number of models. std::size_t numberOfModels()const{ return this->impl().numberOfModels(); } /// \brief Returns a reference to the i-th model. /// /// \param i model index. typename std::remove_pointer::type& model(std::size_t i){ return this->impl().model(i); } /// \brief Returns a const reference to the i-th model. /// /// \param i model index. typename std::remove_pointer::type const& model(std::size_t i)const{ return this->impl().model(i); } /// \brief Returns the weight of the i-th model. /// /// \param i model index. double const& weight(std::size_t i)const{ return this->impl().weight(i); } /// \brief Returns the weight of the i-th model. /// /// \param i model index. double& weight(std::size_t i){ return this->impl().weight(i); } /// \brief Returns the total sum of weights used for averaging double sumOfWeights() const{ return this->impl().sumOfWeights(); } }; } #endif