/*! * * \brief Objective function for single and double poles with partial state information (non-Markovian task) * * * Class for balancing one or two poles on a cart using a fitness * function that decreases the longer the pole(s) balance(s). Based * on code written by Verena Heidrich-Meisner for the paper * * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for * episodic reinforcement learning. Journal of Algorithms, * 64(4):152–168, 2009. * * \author Johan Valentin Damgaard * \date - * * * \par Copyright 1995-2017 Shark Development Team * * This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_NONMARKOV_OBJECTIVE_FUNCTION #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_NONMARKOV_OBJECTIVE_FUNCTION #include #include #include #include #include #include #include namespace shark { //! \brief Objective function for single and double non-Markov poles //! //! Class for balancing one or two poles on a cart using a fitness function //! that decreases the longer the pole(s) balance(s). //! Based on code written by Verena Heidrich-Meisner for the paper //! //! V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009. class NonMarkovPole : public SingleObjectiveFunction { public: //! \param single Is this an instance of the single pole problem? //! \param hidden Number of hidden neurons in underlying neural network //! \param bias Whether to use bias in neural network //! \param sigmoidType Activation sigmoid function for neural network //! \param normalize Whether to normalize input before use in neural network //! \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure NonMarkovPole(bool single, std::size_t hidden, bool bias, RecurrentStructure::SigmoidType sigmoidType = RecurrentStructure::FastSigmoid, bool normalize = true, std::size_t max_pole_evaluations = 100000) : m_single(single), m_maxPoleEvals(max_pole_evaluations), m_normalize(normalize) { if (sigmoidType == RecurrentStructure::Linear) { std::cerr << "Cannot use linear activation function for pole balancing." << std::endl; exit(EXIT_FAILURE); } // number of inputs should be 2 for single pole, 3 for double. std::size_t inputs = 0; if (single) { inputs = 2; } else { inputs = 3; } // set features m_features |= CAN_PROPOSE_STARTING_POINT; // set number of variables/weights. // number of outputs is always 1. // dimensions depend on whether we use bias if (bias){ m_dimensions = (hidden + 1) * (hidden + 1) + inputs * (hidden + 1) + hidden + 1; } else { m_dimensions = (hidden + 1) * (hidden + 1) + inputs * (hidden + 1); } // make RNNet mp_struct = new RecurrentStructure(); mp_struct->setStructure(inputs, hidden, 1, bias, sigmoidType); mp_net = new PoleRNNet(mp_struct); // check dimensions match if(m_dimensions != mp_net->numberOfParameters()) { std::cerr << "Non-Markov pole RNNet: Dimensions do not match, " << m_dimensions << " != " << mp_net->numberOfParameters() << std::endl; exit(EXIT_FAILURE); } // set eval count m_evaluationCounter = 0; } ~NonMarkovPole(){ delete mp_struct; delete mp_net; } std::string name() { return "Objective Function for Non-Markovian pole balancing."; } //! \brief Returns degrees of freedom std::size_t numberOfVariables()const{ return m_dimensions; } //! \brief Always proposes to start in a zero vector with appropriate degrees of freedom SearchPointType proposeStartingPoint() const{ SearchPointType startingPoint(m_dimensions); for(std::size_t i = 0; i != m_dimensions; i++) { startingPoint(i) = 0.0; } return startingPoint; } //! \brief Evaluates weight vector on fitness function //! \param input Vector to be evaluated. //! \return Fitness of vector ResultType eval(const SearchPointType &input) const{ SIZE_CHECK(input.size() == m_dimensions); m_evaluationCounter++; if(m_single) { return evalSingle(input); } else { return evalDouble(input); } } private: // private class for recurrent neural network. not be used outside main class. class PoleRNNet : public OnlineRNNet { public: PoleRNNet(RecurrentStructure* structure) : OnlineRNNet(structure){} boost::shared_ptr createState()const{ throw std::logic_error("State not available for PoleRNNet."); } void eval(BatchInputType const & patterns, BatchOutputType &outputs, State& state) const{ throw std::logic_error("Batch not available for PoleRNNet."); } }; //! \brief Converts neural network output for use with pole simulator //! \param output Output of the neural network. //! \return double precision floating point between 0 and 1. double convertToPoleMovement(double output) const{ switch(mp_struct->sigmoidType()) { case RecurrentStructure::Logistic: return output; case RecurrentStructure::FastSigmoid: return (output + 1.) / 2.; case RecurrentStructure::Tanh: return (output + 1.) / 2.; default: std::cerr << "Unsupported activation function for pole balancing." << std::endl; exit(EXIT_FAILURE); } } //! \brief Fitness function for single poles. Gets lower as pole balances for longer. //! \param input Vector to be evaluated. //! \return Fitness of vector ResultType evalSingle(const SearchPointType &input) const{ double init_angle = 0.07; SinglePole pole(false, m_normalize); RealVector state(2); RealMatrix output(1,1); RealMatrix inState(1,2); std::size_t eval_count = 0; bool failed = false; pole.init(init_angle); mp_net->resetInternalState(); mp_net->setParameterVector(input); while(!failed && eval_count < m_maxPoleEvals) { pole.getState(state); row(inState,0) = state; mp_net->eval(inState,output); pole.move(convertToPoleMovement(output(0,0))); failed = pole.failure(); eval_count++; } // gets lower as number of evaluations grows. min = 0 return m_maxPoleEvals - eval_count; } //! \brief Fitness function for double poles. Gets lower as poles balance for longer. //! \param input Vector to be evaluated. //! \return Fitness of vector ResultType evalDouble(const SearchPointType &input) const{ double init_angle = 0.07; DoublePole pole(false, m_normalize); RealVector state(3); RealMatrix output(1,1); RealMatrix inState(1,3); std::size_t eval_count = 0; bool failed = false; pole.init(init_angle); mp_net->resetInternalState(); mp_net->setParameterVector(input); while(!failed && eval_count < m_maxPoleEvals) { pole.getState(state); row(inState,0) = state; mp_net->eval(inState,output); pole.move(convertToPoleMovement(output(0,0))); failed = pole.failure(); eval_count++; } // gets lower as number of evaluations grows. min = 0 return m_maxPoleEvals - eval_count; } //! True if this is a single pole, false if double pole. bool m_single; //! True if neural network input is normalized, false otherwise bool m_normalize; //! Degrees of freedom std::size_t m_dimensions; //! Balance goal std::size_t m_maxPoleEvals; //! Neural network RecurrentStructure *mp_struct; OnlineRNNet *mp_net; }; } #endif