/*! * * \brief Objective function for single and double poles with full state information (Markovian task) * * * Class for balancing one or two poles on a cart using a fitness * function that decreases the longer the pole(s) balance(s). Based * on code written by Verena Heidrich-Meisner for the paper * * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for * episodic reinforcement learning. Journal of Algorithms, * 64(4):152–168, 2009. * * \author Johan Valentin Damgaard * \date - * * * \par Copyright 1995-2017 Shark Development Team * * This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_MARKOV_OBJECTIVE_FUNCTION #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_MARKOV_OBJECTIVE_FUNCTION #include #include #include #include #include #include #include namespace shark { //! Uses templates to allow changing the neural network activation function //! since FFNet uses templates. //! The FastSigmoidNeuron is recommended, as it gives better results overall. //! If errors are encountered using a specific neuron, one can try without normalization, as it fixes it in the single pole LogisticNeuron case at least. template //! //! Class for balancing one or two poles on a cart using a fitness function //! that decreases the longer the pole(s) balance(s). //! Based on code written by Verena Heidrich-Meisner for the paper //! //! V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009. class MarkovPole : public SingleObjectiveFunction { public: //! \param single_pole Indicates whether the cast has a single pole (true) or two poles (false) //! \param hidden Number of hidden neurons in underlying neural network //! \param shortcuts Whether to use shortcuts in neural network //! \param bias Whether to use bias in neural network //! \param normalize Whether to normalize input before use in neural network //! \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure MarkovPole(bool single_pole, std::size_t hidden, bool shortcuts, bool bias, bool normalize = true, std::size_t max_pole_evaluations = 100000) : m_single(single_pole), m_maxPoleEvals(max_pole_evaluations), m_normalize(normalize) { // number of inputs should be 4 for single pole, 6 for double. std::size_t inputs = 0; if (single_pole) { inputs = 4; } else { inputs = 6; } // set features m_features |= CAN_PROPOSE_STARTING_POINT; // set number of variables/weights. // number of outputs is always 1. // dimensions depend on whether we use bias and/or shortcuts if (bias && shortcuts){ m_dimensions = hidden * (inputs + 1) + inputs + hidden + 1; } else if (shortcuts) { m_dimensions = hidden * (inputs + 1) + inputs; } else if (bias) { m_dimensions = hidden * (inputs + 1) + hidden + 1; } else { m_dimensions = hidden * (inputs + 1); } // make FFNet mp_net = new FFNet(); FFNetStructures::ConnectionType type = shortcuts ? FFNetStructures::InputOutputShortcut : FFNetStructures::Normal; mp_net->setStructure(inputs, hidden, 1, type, bias); // check dimensions match if(m_dimensions != mp_net->numberOfParameters()) { std::cerr << "Markov pole FFNet: Dimensions do not match, " << m_dimensions << " != " << mp_net->numberOfParameters() << std::endl; exit(EXIT_FAILURE); } // set eval count m_evaluationCounter = 0; } ~MarkovPole() { delete mp_net; } std::string name() { return "Objective Function for Markovian pole balancing."; } //! \brief Returns degrees of freedom std::size_t numberOfVariables()const{ return m_dimensions; } //! \brief Always proposes to start in a zero vector with appropriate degrees of freedom SearchPointType proposeStartingPoint() const{ SearchPointType startingPoint(m_dimensions); for(std::size_t i = 0; i != m_dimensions; i++) { startingPoint(i) = 0.0; } return startingPoint; } //! \brief Evaluates weight vector on fitness function //! \param input Vector to be evaluated. //! \return Fitness of vector ResultType eval(const SearchPointType &input) const{ SIZE_CHECK(input.size() == m_dimensions); m_evaluationCounter++; if(m_single) { return evalSingle(input); } else { return evalDouble(input); } } private: //! \brief Converts neural network output for use with pole simulator //! \param output Output of the neural network. //! \return double precision floating point between 0 and 1. double convertToPoleMovement(double output) const{ if (typeid(mp_net->outputActivationFunction()) == typeid(LogisticNeuron)) { return output; } else if (typeid(mp_net->outputActivationFunction()) == typeid(FastSigmoidNeuron)) { return (output + 1.) / 2.; } else if (typeid(mp_net->outputActivationFunction()) == typeid(TanhNeuron)) { return (output + 1.) / 2.; } else { std::cerr << "Unsupported neuron type in Markov pole FFNet." << std::endl; exit(EXIT_FAILURE); } } //! \brief Fitness function for single poles. Gets lower as pole balances for longer. //! \param input Vector to be evaluated. //! \return Fitness of vector ResultType evalSingle(const SearchPointType &input) const{ double init_angle = 0.07; SinglePole pole(true, m_normalize); RealVector state(4); RealVector output(1); std::size_t eval_count = 0; bool failed = false; pole.init(init_angle); mp_net->setParameterVector(input); while(!failed && eval_count < m_maxPoleEvals) { pole.getState(state); mp_net->eval(state,output); pole.move(convertToPoleMovement(output(0))); failed = pole.failure(); eval_count++; } // gets lower as number of evaluations grows. min = 0 return m_maxPoleEvals - eval_count; } //! \brief Fitness function for double poles. Gets lower as poles balance for longer. //! \param input Vector to be evaluated. //! \return Fitness of vector ResultType evalDouble(const SearchPointType &input) const{ double init_angle = 0.07; DoublePole pole(true, m_normalize); RealVector state(6); RealVector output(1); std::size_t eval_count = 0; bool failed = false; pole.init(init_angle); mp_net->setParameterVector(input); while(!failed && eval_count < m_maxPoleEvals) { pole.getState(state); mp_net->eval(state,output); pole.move(convertToPoleMovement(output(0))); failed = pole.failure(); eval_count++; } // gets lower as number of evaluations grows. min = 0 return m_maxPoleEvals - eval_count; } //! True if this is a single pole, false if double pole. bool m_single; //! True if neural network input is normalized, false otherwise bool m_normalize; //! Degrees of freedom std::size_t m_dimensions; //! Balance goal std::size_t m_maxPoleEvals; //! Neural network FFNet *mp_net; HiddenNeuron m_hiddenNeuron; OutputNeuron m_outputNeuron; }; } #endif