//=========================================================================== /*! * * * \brief base class for all models, as well as a specialized differentiable model * * * * \author T.Glasmachers, O. Krause * \date 2010 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_ABSTRACTMODEL_H #define SHARK_MODELS_ABSTRACTMODEL_H #include #include #include #include #include #include #include namespace shark { ///\brief Base class for all Models /// /// \par /// A model is one of the three fundaments of supervised learning: model, error measure /// and an optimization algorithm. /// It is a concept of a function which performs a mapping \f$ x \rightarrow f_w(x)\f$. /// In contrast to an error function it has two sets of parameters: /// The first is the current point to map \f$x\f$, the others are the internal model parameters \f$w\f$ /// which define the mapping. /// Often a model is used to find an optimal mapping for a problem, for example a function which /// best fits the points of a given dataset. Therefore, AbstractModel does not only offer /// the mapping itself, but also a set of special derivatives with respect to \f$ x \f$ and \f$ w \f$. /// Most of the time, only the derivative with respect to \f$ w \f$ is needed, but in some special problems, /// like finding optimal stimuli or stacking models, also the input derivative is needed. /// ///\par Models are optimized for batch processing. This means, that instead of only one data point at a time, it can /// evaluate a big set of inputs at the same time, using optimized routines for this task. /// /// \par /// The derivatives are weighted, which means that the derivatives of every single output are added together /// weighted by coefficients (see #weightedParameterDerivative). This is an optimization for the chain rule /// which is very efficient to calculate most of the time. /// /// \par /// It is allowed to store intermediate values during #eval and use them to speed up calculation of /// derivatives. Therefore it must be guaranteed that eval() is called before calculating derivatives. /// This is no restriction, since typical error measures need the mapping itself and not only the derivative. /// /// \par /// Models have names and can be serialised and have parameters. The type of the parameter vector /// can be set as third argument. By default, this is RealVector. template class AbstractModel : public IParameterizable, public INameable, public ISerializable { public: /// \brief Defines the input type of the model. typedef InputTypeT InputType; /// \brief Defines the output type of the model. typedef OutputTypeT OutputType; /// \brief Defines the output type of the model compatible with standard functors typedef OutputType result_type; ///\brief Defines the BaseType used by the model (this type). Useful for creating derived models typedef AbstractModel ModelBaseType; /// \brief defines the batch type of the input type. /// /// This could for example be std::vector but for example for RealVector it could be RealMatrix typedef typename Batch::type BatchInputType; /// \brief defines the batch type of the output type typedef typename Batch::type BatchOutputType; AbstractModel() { } virtual ~AbstractModel() { } enum Feature { HAS_FIRST_PARAMETER_DERIVATIVE = 1, HAS_FIRST_INPUT_DERIVATIVE = 4, }; SHARK_FEATURE_INTERFACE; /// \brief Returns true when the first parameter derivative is implemented. bool hasFirstParameterDerivative()const{ return m_features & HAS_FIRST_PARAMETER_DERIVATIVE; } /// \brief Returns true when the first input derivative is implemented. bool hasFirstInputDerivative()const{ return m_features & HAS_FIRST_INPUT_DERIVATIVE; } ///\brief Returns the expected shape of the input. virtual Shape inputShape() const = 0; ///\brief Returns the shape of the output. virtual Shape outputShape() const = 0; ///\brief Creates an internal state of the model. /// ///The state is needed when the derivatives are to be ///calculated. Eval can store a state which is then reused to speed up ///the calculations of the derivatives. This also allows eval to be ///evaluated in parallel! virtual boost::shared_ptr createState() const { if (hasFirstParameterDerivative() || hasFirstInputDerivative()) { throw SHARKEXCEPTION("[AbstractModel::createState] createState must be overridden by models with derivatives"); } return boost::shared_ptr(new EmptyState()); } /// \brief From ISerializable, reads a model from an archive. virtual void read( InArchive & archive ){ m_features.read(archive); ParameterVectorType p; archive & p; this->setParameterVector(p); } /// \brief writes a model to an archive /// /// the default implementation just saves the parameters, not the structure! virtual void write( OutArchive & archive ) const{ m_features.write(archive); ParameterVectorType p = this->parameterVector(); archive & p; } /// \brief Standard interface for evaluating the response of the model to a batch of patterns. /// /// \param patterns the inputs of the model /// \param outputs the predictions or response of the model to every pattern virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs) const{ boost::shared_ptr state = createState(); eval(patterns,outputs,*state); } /// \brief Standard interface for evaluating the response of the model to a batch of patterns. /// /// \param patterns the inputs of the model /// \param outputs the predictions or response of the model to every pattern /// \param state intermediate results stored by eval which can be reused for derivative computation. virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs, State& state) const = 0; /// \brief Standard interface for evaluating the response of the model to a single pattern. /// /// \param pattern the input of the model /// \param output the prediction or response of the model to the pattern virtual void eval(InputType const & pattern, OutputType& output)const{ BatchInputType patternBatch=Batch::createBatch(pattern); getBatchElement(patternBatch,0) = pattern; BatchOutputType outputBatch; eval(patternBatch,outputBatch); output = getBatchElement(outputBatch,0); } /// \brief Model evaluation as an operator for a whole dataset. This is a convenience function /// /// \param patterns the input of the model /// \returns the responses of the model Data operator()(Data const& patterns)const{ return transform(patterns,*this); } /// \brief Model evaluation as an operator for a single pattern. This is a convenience function /// /// \param pattern the input of the model /// \returns the response of the model OutputType operator()(InputType const & pattern)const{ OutputType output; eval(pattern,output); return output; } /// \brief Model evaluation as an operator for a single pattern. This is a convenience function /// /// \param patterns the input of the model /// \returns the response of the model BatchOutputType operator()(BatchInputType const & patterns)const{ BatchOutputType output; eval(patterns,output); return output; } /// \brief calculates the weighted sum of derivatives w.r.t the parameters. /// /// \param pattern the patterns to evaluate /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern /// \param state intermediate results stored by eval to speed up calculations of the derivatives /// \param derivative the calculated derivative as sum over all derivates of all patterns virtual void weightedParameterDerivative( BatchInputType const & pattern, BatchOutputType const& outputs, BatchOutputType const & coefficients, State const& state, ParameterVectorType& derivative )const{ SHARK_FEATURE_EXCEPTION(HAS_FIRST_PARAMETER_DERIVATIVE); } ///\brief calculates the weighted sum of derivatives w.r.t the inputs /// /// \param pattern the patterns to evaluate /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern /// \param state intermediate results stored by eval to sped up calculations of the derivatives /// \param derivative the calculated derivative for every pattern virtual void weightedInputDerivative( BatchInputType const & pattern, BatchOutputType const& outputs, BatchOutputType const & coefficients, State const& state, BatchInputType& derivative )const{ SHARK_FEATURE_EXCEPTION(HAS_FIRST_INPUT_DERIVATIVE); } ///\brief calculates weighted input and parameter derivative at the same time /// /// Sometimes, both derivatives are needed at the same time. But sometimes, when calculating the /// weighted parameter derivative, the input derivative can be calculated for free. This is for example true for /// the feed-forward neural networks. However, there exists the obvious default implementation to just calculate /// the derivatives one after another. /// \param patterns the patterns to evaluate /// \param coefficients the coefficients which are used to calculate the weighted sum /// \param state intermediate results stored by eval to sped up calculations of the derivatives /// \param parameterDerivative the calculated parameter derivative as sum over all derivates of all patterns /// \param inputDerivative the calculated derivative for every pattern virtual void weightedDerivatives( BatchInputType const & patterns, BatchOutputType const& outputs, BatchOutputType const & coefficients, State const& state, ParameterVectorType& parameterDerivative, BatchInputType& inputDerivative )const{ weightedParameterDerivative(patterns, outputs, coefficients,state,parameterDerivative); weightedInputDerivative(patterns, outputs, coefficients,state,inputDerivative); } }; /** * \ingroup shark_globals * * @{ */ /// \brief Initialize model parameters normally distributed. /// /// \param model: model to be initialized /// \param s: variance of mean-free normal distribution template void initRandomNormal(AbstractModel& model, double s){ typedef typename ParameterVectorType::value_type Float; typedef typename ParameterVectorType::device_type Device; auto weights = blas::normal(random::globalRng, model.numberOfParameters(), Float(0), Float(s), Device() ); model.setParameterVector(weights); } /// \brief Initialize model parameters uniformly at random. /// /// \param model model to be initialized /// \param lower lower bound of initialization interval /// \param upper upper bound of initialization interval template void initRandomUniform(AbstractModel& model, double lower, double upper){ typedef typename ParameterVectorType::value_type Float; typedef typename ParameterVectorType::device_type Device; auto weights = blas::uniform(random::globalRng, model.numberOfParameters(), Float(lower), Float(upper), Device() ); model.setParameterVector(weights); } /** @}*/ namespace detail{ //Required for correct shape infering of transform template struct InferShape >{ static Shape infer(AbstractModel const& f){return f.outputShape();} }; } } #endif