/*!
*
*
* \brief -
*
* \author -
* \date -
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
#ifndef SHARK_UNSUPERVISED_RBM_RBM_H
#define SHARK_UNSUPERVISED_RBM_RBM_H
#include
#include
#include
#include
#include
namespace shark{
///\brief stub for the RBM class. at the moment it is just a holder of the parameter set and the Energy.
template
class RBM : public AbstractModel{
private:
typedef AbstractModel base_type;
public:
typedef HiddenLayerT HiddenType; ///< type of the hidden layer
typedef VisibleLayerT VisibleType; ///< type of the visible layer
typedef randomT randomType;
typedef Energy > EnergyType;///< Type of the energy function
typedef detail::AverageEnergyGradient GradientType;///< Type of the gradient calculator
typedef typename base_type::BatchInputType BatchInputType;
typedef typename base_type::BatchOutputType BatchOutputType;
private:
/// \brief The weight matrix connecting hidden and visible layer.
RealMatrix m_weightMatrix;
///The layer of hidden Neurons
HiddenType m_hiddenNeurons;
///The Layer of visible Neurons
VisibleType m_visibleNeurons;
randomType* mpe_rng;
bool m_forward;
bool m_evalMean;
///\brief Evaluates the input by propagating the visible input to the hidden neurons.
///
///@param patterns batch of states of visible units
///@param outputs batch of (expected) states of hidden units
void evalForward(BatchInputType const& state,BatchOutputType& output)const{
std::size_t batchSize=state.size1();
typename HiddenType::StatisticsBatch statisticsBatch(batchSize,numberOfHN());
RealMatrix inputBatch(batchSize,numberOfHN());
output.resize(state.size1(),numberOfHN());
energy().inputHidden(inputBatch,state);
hiddenNeurons().sufficientStatistics(inputBatch,statisticsBatch,blas::repeat(1.0,batchSize));
if(m_evalMean){
noalias(output) = hiddenNeurons().mean(statisticsBatch);
}
else{
hiddenNeurons().sample(statisticsBatch,output,0.0,*mpe_rng);
}
}
///\brief Evaluates the input by propagating the hidden input to the visible neurons.
///
///@param patterns batch of states of hidden units
///@param outputs batch of (expected) states of visible units
void evalBackward(BatchInputType const& state,BatchOutputType& output)const{
std::size_t batchSize = state.size1();
typename VisibleType::StatisticsBatch statisticsBatch(batchSize,numberOfVN());
RealMatrix inputBatch(batchSize,numberOfVN());
output.resize(batchSize,numberOfVN());
energy().inputVisible(inputBatch,state);
visibleNeurons().sufficientStatistics(inputBatch,statisticsBatch,blas::repeat(1.0,batchSize));
if(m_evalMean){
noalias(output) = visibleNeurons().mean(statisticsBatch);
}
else{
visibleNeurons().sample(statisticsBatch,output,0.0,*mpe_rng);
}
}
public:
RBM(randomType& rng):mpe_rng(&rng),m_forward(true),m_evalMean(true)
{ }
/// \brief From INameable: return the class name.
std::string name() const
{ return "RBM"; }
///\brief Returns the total number of parameters of the model.
std::size_t numberOfParameters()const {
std::size_t parameters = numberOfVN()*numberOfHN();
parameters += m_hiddenNeurons.numberOfParameters();
parameters += m_visibleNeurons.numberOfParameters();
return parameters;
}
///\brief Returns the parameters of the Model as parameter vector.
RealVector parameterVector () const {
return to_vector(m_weightMatrix)
| m_hiddenNeurons.parameterVector()
| m_visibleNeurons.parameterVector();
};
///\brief Sets the parameters of the model.
///
/// @param newParameters vector of parameters
void setParameterVector(const RealVector& newParameters) {
std::size_t endW = numberOfVN()*numberOfHN();
std::size_t endH = endW + m_hiddenNeurons.numberOfParameters();
std::size_t endV = endH + m_visibleNeurons.numberOfParameters();
noalias(to_vector(m_weightMatrix)) = subrange(newParameters,0,endW);
m_hiddenNeurons.setParameterVector(subrange(newParameters,endW,endH));
m_visibleNeurons.setParameterVector(subrange(newParameters,endH,endV));
}
///\brief Creates the structure of the RBM.
///
///@param hiddenNeurons number of hidden neurons.
///@param visibleNeurons number of visible neurons.
void setStructure(std::size_t visibleNeurons,std::size_t hiddenNeurons){
m_weightMatrix.resize(hiddenNeurons,visibleNeurons);
m_weightMatrix.clear();
m_hiddenNeurons.resize(hiddenNeurons);
m_visibleNeurons.resize(visibleNeurons);
}
///\brief Returns the layer of hidden neurons.
HiddenType const& hiddenNeurons()const{
return m_hiddenNeurons;
}
///\brief Returns the layer of hidden neurons.
HiddenType& hiddenNeurons(){
return m_hiddenNeurons;
}
///\brief Returns the layer of visible neurons.
VisibleType& visibleNeurons(){
return m_visibleNeurons;
}
///\brief Returns the layer of visible neurons.
VisibleType const& visibleNeurons()const{
return m_visibleNeurons;
}
///\brief Returns the weight matrix connecting the layers.
RealMatrix& weightMatrix(){
return m_weightMatrix;
}
///\brief Returns the weight matrix connecting the layers.
RealMatrix const& weightMatrix()const{
return m_weightMatrix;
}
///\brief Returns the energy function of the RBM.
EnergyType energy()const{
return EnergyType(*this);
}
///\brief Returns the random number generator associated with this RBM.
randomType& rng(){
return *mpe_rng;
}
///\brief Sets the type of evaluation, eval will perform.
///
///Eval performs its operation based on the state of this function.
///There are two ways to pass data through an rbm: either forward, setting the states of the
///visible neurons and sample the hidden states or backwards, where the state of the hidden is fixed and the visible
///are sampled.
///Instead of the state of the hidden/visible, one often wants the mean of the state \f$ E_{p(h|v)}\left(h\right)\f$.
///By default, the RBM uses the forward evaluation and returns the mean of the state
///
///@param forward whether the forward view should be used false=backwards
///@param evalMean whether the mean state should be returned. false=a sample is returned
void evaluationType(bool forward,bool evalMean){
m_forward = forward;
m_evalMean = evalMean;
}
Shape outputShape() const{
if(m_forward){
return numberOfHN();
}else{
return numberOfVN();
}
}
Shape inputShape() const{
if(m_forward){
return numberOfVN();
}else{
return numberOfHN();
}
}
boost::shared_ptr createState()const{
return boost::shared_ptr(new EmptyState());
}
///\brief Passes information through/samples from an RBM in a forward or backward way.
///
///Eval performs its operation based on the given evaluation type.
///There are two ways to pass data through an RBM: either forward, setting the states of the
///visible neurons and sample the hidden states or backwards, where the state of the hidden is fixed and the visible
///are sampled.
///Instead of the state of the hidden/visible, one often wants the mean of the state \f$ E_{p(h|v)}\left(h\right)\f$.
///By default, the RBM uses the forward evaluation and returns the mean of the state,
///but other evaluation modes can be set by evaluationType().
///
///@param patterns the batch of (visible or hidden) inputs
///@param outputs the batch of (visible or hidden) outputs
void eval(BatchInputType const& patterns,BatchOutputType& outputs)const{
if(m_forward){
evalForward(patterns,outputs);
}
else{
evalBackward(patterns,outputs);
}
}
void eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{
eval(patterns,outputs);
}
///\brief Calculates the input of the hidden neurons given the state of the visible in a batch-vise fassion.
///
///@param inputs the batch of vectors the input of the hidden neurons is stored in
///@param visibleStates the batch of states of the visible neurons
void inputHidden(RealMatrix& inputs, RealMatrix const& visibleStates)const{
SIZE_CHECK(visibleStates.size1() == inputs.size1());
SIZE_CHECK(inputs.size2() == m_hiddenNeurons.size());
SIZE_CHECK( visibleStates.size2() == m_visibleNeurons.size());
noalias(inputs) = prod(m_visibleNeurons.phi(visibleStates),trans(m_weightMatrix));
}
///\brief Calculates the input of the visible neurons given the state of the hidden.
///
///@param inputs the vector the input of the visible neurons is stored in
///@param hiddenStates the state of the hidden neurons
void inputVisible(RealMatrix& inputs, RealMatrix const& hiddenStates)const{
SIZE_CHECK(hiddenStates.size1() == inputs.size1());
SIZE_CHECK(inputs.size2() == m_visibleNeurons.size());
noalias(inputs) = prod(m_hiddenNeurons.phi(hiddenStates),m_weightMatrix);
}
using base_type::eval;
///\brief Returns the number of hidden Neurons.
std::size_t numberOfHN()const{
return m_hiddenNeurons.size();
}
///\brief Returns the number of visible Neurons.
std::size_t numberOfVN()const{
return m_visibleNeurons.size();
}
/// \brief Reads the network from an archive.
void read(InArchive& archive){
archive >> m_weightMatrix;
archive >> m_hiddenNeurons;
archive >> m_visibleNeurons;
//serialization of the rng is a bit...complex
//let's hope that we can remove this hack one time. But we really can't ignore the state of the rng.
std::string str;
archive>> str;
std::stringstream stream(str);
stream>> *mpe_rng;
}
/// \brief Writes the network to an archive.
void write(OutArchive& archive) const{
archive << m_weightMatrix;
archive << m_hiddenNeurons;
archive << m_visibleNeurons;
std::stringstream stream;
stream <<*mpe_rng;
std::string str = stream.str();
archive <