* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef MODELS_NEURONS_H #define MODELS_NEURONS_H #include #include namespace shark{ ///\brief Neuron which computes the hyperbolic tangenst with range [-1,1]. /// ///The Tanh function is ///\f[ f(x)=\tanh(x) = \frac 2 {1+exp^(-2x)}-1 \f] ///it's derivative can be computed as ///\f[ f'(x)= 1-f(x)^2 \f] struct TanhNeuron{ typedef EmptyState State; template void evalInPlace(Arg& arg)const{ noalias(arg) = tanh(arg); } template void evalInPlace(Arg& arg, State&)const{ evalInPlace(arg); } template void multiplyDerivative(Output const& output, Derivative& der, State const& )const{ noalias(der) *= typename Output::value_type(1) - sqr(output); } }; ///\brief Neuron which computes the Logistic (logistic) function with range [0,1]. /// ///The Logistic function is ///\f[ f(x)=\frac 1 {1+exp^(-x)}\f] ///it's derivative can be computed as ///\f[ f'(x)= f(x)(1-f(x)) \f] struct LogisticNeuron{ typedef EmptyState State; template void evalInPlace(Arg& arg)const{ noalias(arg) = sigmoid(arg); } template void evalInPlace(Arg& arg, State&)const{ evalInPlace(arg); } template void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{ noalias(der) *= output * (typename Output::value_type(1) - output); } }; ///\brief Fast sigmoidal function, which does not need to compute an exponential function. /// ///It is defined as ///\f[ f(x)=\frac x {1+|x|}\f] ///it's derivative can be computed as ///\f[ f'(x)= (1 - |f(x)|)^2 \f] struct FastSigmoidNeuron{ typedef EmptyState State; template void evalInPlace(Arg& arg)const{ noalias(arg) /= typename Arg::value_type(1)+abs(arg); } template void evalInPlace(Arg& arg, State&)const{ evalInPlace(arg); } template void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{ noalias(der) *= sqr(typename Output::value_type(1) - abs(output)); } }; ///\brief Linear activation Neuron. struct LinearNeuron{ typedef EmptyState State; template void evalInPlace(Arg&)const{} template void evalInPlace(Arg& arg, State const&)const{} template void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{} }; ///\brief Rectifier Neuron f(x) = max(0,x) struct RectifierNeuron{ typedef EmptyState State; template void evalInPlace(Arg& arg)const{ noalias(arg) = max(arg,typename Arg::value_type(0)); } template void evalInPlace(Arg& arg, State&)const{ evalInPlace(arg); } template void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{ //~ noalias(der) *= heaviside(output); //~ for(std::size_t i = 0; i != output.size1(); ++i){ //~ for(std::size_t j = 0; j != output.size2(); ++j){ //~ der(i,j) *= output(i,j) > 0? 1.0:0.0; //~ } //~ } noalias(der) *= output > 0; } }; template struct NormalizerNeuron{ struct State: public shark::State{ VectorType norm; void resize(std::size_t patterns){ norm.resize(patterns); } }; template void evalInPlace(blas::vector_expression& arg)const{ noalias(arg) /= sum(arg); } template void evalInPlace(blas::matrix_expression& arg)const{ noalias(trans(arg)) /= blas::repeat(sum_columns(arg),arg().size2()); } template void evalInPlace(blas::matrix_expression& arg, State& state)const{ state.norm.resize(arg().size1()); noalias(state.norm) = sum_columns(arg); noalias(arg) /= trans(blas::repeat(state.norm,arg().size2())); } template void multiplyDerivative(Output const& output, Derivative& der, State const& s)const{ for(std::size_t i = 0; i != output.size1(); ++i){ double constant=inner_prod(row(der,i),row(output,i)); noalias(row(der,i))= (row(der,i)-constant)/s.norm(i); } } }; template struct SoftmaxNeuron{ typedef EmptyState State; template void evalInPlace(blas::vector_expression& arg)const{ noalias(arg) = exp(arg); noalias(arg) /= sum(arg); } template void evalInPlace(blas::matrix_expression& arg)const{ noalias(arg) = exp(arg); noalias(arg) /= trans(blas::repeat(sum_columns(arg),arg().size2())); } template void evalInPlace(blas::matrix_expression& arg, State&)const{ evalInPlace(arg); } template void multiplyDerivative(Output const& output, Derivative& der, State const& s)const{ for(size_t i = 0; i != output.size1(); ++i){ double mass=inner_prod(row(der,i),row(output,i)); noalias(row(der,i)) = (row(der,i) - mass) *row(output,i); } } }; template class NeuronLayer : public AbstractModel{ private: typedef AbstractModel base_type; NeuronType m_neuron; Shape m_shape; public: typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::BatchOutputType BatchOutputType; typedef typename base_type::ParameterVectorType ParameterVectorType; NeuronLayer(Shape const& shape = Shape()): m_shape(shape){ base_type::m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; base_type::m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; } /// \brief From INameable: return the class name. std::string name() const { return "NeuronLayer"; } NeuronType const& neuron()const{ return m_neuron;} NeuronType& neuron(){ return m_neuron;} Shape inputShape() const{ return m_shape; } Shape outputShape() const{ return m_shape; } /// obtain the parameter vector ParameterVectorType parameterVector() const{ return ParameterVectorType(); } /// overwrite the parameter vector void setParameterVector(ParameterVectorType const& newParameters){ SIZE_CHECK(newParameters.size() == 0); } /// return the number of parameter size_t numberOfParameters() const{ return 0; } boost::shared_ptr createState()const{ return boost::shared_ptr(new typename NeuronType::State()); } using base_type::eval; void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{ SIZE_CHECK(inputs.size2() == m_shape.numElements()); outputs.resize(inputs.size1(),inputs.size2()); noalias(outputs) = inputs; m_neuron.evalInPlace(outputs); } void eval(VectorType const& input, VectorType& output)const{ SIZE_CHECK(input.size() == m_shape.numElements()); output.resize(input.size()); noalias(output) = input; m_neuron.evalInPlace(output); } void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{ SIZE_CHECK(inputs.size2() == m_shape.numElements()); outputs.resize(inputs.size1(),inputs.size2()); noalias(outputs) = inputs; m_neuron.evalInPlace(outputs, state.toState()); } ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch void weightedParameterDerivative( BatchInputType const& inputs, BatchOutputType const& outputs, BatchOutputType const& coefficients, State const& state, ParameterVectorType& gradient )const{ SIZE_CHECK(coefficients.size1()==inputs.size1()); SIZE_CHECK(coefficients.size2()==inputs.size2()); } ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch void weightedInputDerivative( BatchInputType const & inputs, BatchOutputType const & outputs, BatchOutputType const & coefficients, State const& state, BatchInputType& derivative )const{ SIZE_CHECK(coefficients.size1() == inputs.size1()); SIZE_CHECK(coefficients.size2() == inputs.size2()); derivative.resize(inputs.size1(),inputs.size2()); noalias(derivative) = coefficients; m_neuron.multiplyDerivative(outputs, derivative, state.toState()); } /// From ISerializable void read(InArchive& archive){ archive >> m_shape;} /// From ISerializable void write(OutArchive& archive) const{ archive << m_shape;} }; } #endif