//=========================================================================== /*! * * * \brief concatenation of two models, with type erasure * * * * \author O. Krause * \date 2010-2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODEL_CONCATENATEDMODEL_H #define SHARK_MODEL_CONCATENATEDMODEL_H #include #include #include namespace shark { ///\brief ConcatenatedModel concatenates two models such that the output of the first model is input to the second. /// ///Sometimes a series of models is needed to generate the desired output. For example when input data needs to be ///normalized before it can be put into the trained model. In this case, the ConcatenatedModel can be used to ///represent this series as one model. ///The easiest way to do is is using the operator >> of AbstractModel: ///ConcatenatedModel model = model1>>model2; ///InputType must be the type of input model1 receives and model2 the output of model2. The output of model1 and input ///of model2 must match. Another way of construction is calling the constructor of ConcatenatedModel using the constructor: /// ConcatenatedModel model (&modell,&model2); ///warning: model1 and model2 must outlive model. When they are destroyed first, behavior is undefined. template class ConcatenatedModel: public AbstractModel { private: typedef AbstractModel base_type; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::BatchOutputType BatchOutputType; typedef typename base_type::ParameterVectorType ParameterVectorType; /// \brief From INameable: return the class name. std::string name() const { return "ConcatenatedModel"; } ///\brief Returns the expected shape of the input Shape inputShape() const{ return m_layers.front().model->inputShape(); } ///\brief Returns the shape of the output Shape outputShape() const{ return m_layers.back().model->outputShape(); } void add(AbstractModel* layer, bool optimize){ m_layers.push_back({layer,optimize}); enableModelOptimization(m_layers.size()-1, optimize);//recompute capabilities } ///\brief sets whether the parameters of the index-th model should be optimized /// /// If the model has non-differentiable submodels disabling those will make /// the whole model differentiable. /// Note that the models are ordered as model0 >> model1>> model2>>... void enableModelOptimization(std::size_t index, bool opt){ SIZE_CHECK(index < m_layers.size()); m_layers[index].optimize = opt; this->m_features.reset(); bool inputDerivative = true; bool parameterDerivative = true; for(std::size_t k = 0; k != m_layers.size(); ++k){ auto const& layer = m_layers[m_layers.size() - k -1];//we iterate backwards through the layers if( layer.optimize && (!layer.model->hasFirstParameterDerivative() || !inputDerivative)){ parameterDerivative = false; } if( !layer.model->hasFirstInputDerivative()){ inputDerivative = false; } } if (parameterDerivative){ this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; } if (inputDerivative){ this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; } } ParameterVectorType parameterVector() const { ParameterVectorType params(numberOfParameters()); std::size_t pos = 0; for(auto layer: m_layers){ if(!layer.optimize) continue; ParameterVectorType layerParams = layer.model->parameterVector(); noalias(subrange(params,pos,pos+layerParams.size())) = layerParams; pos += layerParams.size(); } return params; } void setParameterVector(ParameterVectorType const& newParameters) { std::size_t pos = 0; for(auto layer: m_layers){ if(!layer.optimize) continue; ParameterVectorType layerParams = subrange(newParameters,pos,pos+layer.model->numberOfParameters()); layer.model->setParameterVector(layerParams); pos += layerParams.size(); } } std::size_t numberOfParameters() const{ std::size_t numParams = 0; for(auto layer: m_layers){ if(!layer.optimize) continue; numParams += layer.model->numberOfParameters(); } return numParams; } boost::shared_ptr createState()const{ InternalState* state = new InternalState; for(std::size_t i = 0; i != m_layers.size(); ++i){ state->state.push_back(m_layers[i].model->createState()); state->intermediates.push_back(BatchOutputType()); } return boost::shared_ptr(state); } BatchOutputType const& hiddenResponses(State const& state, std::size_t index)const{ InternalState const& s = state.toState(); return s.intermediates[index]; } State const& hiddenState(State const& state, std::size_t index)const{ InternalState const& s = state.toState(); return *s.state[index]; } using base_type::eval; void eval(BatchInputType const& patterns, BatchOutputType& outputs)const { BatchOutputType intermediates; outputs = patterns; for(auto layer: m_layers){ swap(intermediates,outputs); layer.model->eval(intermediates,outputs); } } void eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{ InternalState& s = state.toState(); outputs = patterns; for(std::size_t i = 0; i != m_layers.size(); ++i){ if(i == 0) m_layers[i].model->eval(patterns,s.intermediates[i], *s.state[i]); else m_layers[i].model->eval(s.intermediates[i-1],s.intermediates[i], *s.state[i]); } outputs = s.intermediates.back(); } void weightedParameterDerivative( BatchInputType const& patterns, BatchOutputType const & outputs, BatchOutputType const& coefficients, State const& state, ParameterVectorType& gradient )const{ InternalState const& s = state.toState(); BatchOutputType inputDerivativeLast; BatchOutputType inputDerivative = coefficients; gradient.resize(numberOfParameters()); std::size_t paramEnd = gradient.size(); for(std::size_t k = 0; k != m_layers.size(); ++k){ std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers BatchInputType const* pInput = &patterns; if(i != 0) pInput = &s.intermediates[i-1]; swap(inputDerivativeLast,inputDerivative); //if the current layer does not need to be optimized, we just check whether we have to compute the chain rule if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){ if(i != 0) //check, if we are done, the input layer does not need to compute anything m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative); }else{ ParameterVectorType paramDerivative; if(i != 0){//if we are in an intermediates layer, compute chain rule m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative); } else{//lowest layer only needs to compute parameter derivative m_layers[i].model->weightedParameterDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative); } noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative; paramEnd -= paramDerivative.size(); } } } void weightedInputDerivative( BatchInputType const& patterns, BatchOutputType const & outputs, BatchOutputType const& coefficients, State const& state, BatchOutputType& derivatives )const{ InternalState const& s = state.toState(); BatchOutputType derivativeLast; derivatives = coefficients; for(std::size_t k = 0; k != m_layers.size(); ++k){ std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers BatchInputType const* pInput = &patterns; if(i != 0) pInput = &s.intermediates[i-1]; swap(derivativeLast,derivatives); m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], derivativeLast, *s.state[i], derivatives); } } virtual void weightedDerivatives( BatchInputType const & patterns, BatchOutputType const & outputs, BatchOutputType const & coefficients, State const& state, ParameterVectorType& gradient, BatchInputType& inputDerivative )const{ InternalState const& s = state.toState(); BatchOutputType inputDerivativeLast; inputDerivative = coefficients; gradient.resize(numberOfParameters()); std::size_t paramEnd = gradient.size(); for(std::size_t k = 0; k != m_layers.size(); ++k){ std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers BatchInputType const* pInput = &patterns; if(i != 0) pInput = &s.intermediates[i-1]; swap(inputDerivativeLast,inputDerivative); //if the current layer does not need to be optimized, we just check whether we have to compute the chain rule if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){ m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative); }else{ ParameterVectorType paramDerivative; m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative); noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative; paramEnd -= paramDerivative.size(); } } } /// From ISerializable void read( InArchive & archive ){ for(auto layer: m_layers){ archive >> *layer.model; archive >> layer.optimize; } } /// From ISerializable void write( OutArchive & archive ) const{ for(auto layer: m_layers){ archive << *layer.model; archive << layer.optimize; } } private: struct Layer{ AbstractModel* model; bool optimize; }; std::vector m_layers; struct InternalState: State{ std::vector > state; std::vector intermediates; }; }; ///\brief Connects two AbstractModels so that the output of the first model is the input of the second. template ConcatenatedModel operator>>( AbstractModel& firstModel, AbstractModel& secondModel ){ ConcatenatedModel sequence; sequence.add(&firstModel, true); sequence.add(&secondModel, true); return sequence; } template ConcatenatedModel operator>>( ConcatenatedModel const& firstModel, AbstractModel& secondModel ){ ConcatenatedModel sequence = firstModel; sequence.add(&secondModel, true); return sequence; } } #endif