/*!
*
*
* \brief -
*
* \author -
* \date -
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
#ifndef SHARK_UNSUPERVISED_RBm_ENERGY_H
#define SHARK_UNSUPERVISED_RBm_ENERGY_H
#include
#include
namespace shark{
/// \brief The Energy function determining the Gibbs distribution of an RBM.
///
///General Energy function which uses the information given by the neurons to automatize
///the calculation of the value of the energy for certain states, the derivative of the energy
///and the factorization of the probability.
///
/// Following (but slightly simplifying from the formulas given by)
/// Welling at al. a general form of an RBM's Energy function is given by
/// \f$ E(\vec v,\vec h)= f_h(\vec h) + f_v(\vec v) + \sum_{k,l} \phi_{hk}(\vec h) W_{k,l} \phi_{vl}(\vec v) \f$
/// We call \f$ f_h(\vec h) \f$ and \f$ f_v(\vec v) \f$ the term of the Energy (energy term)
/// associated to the hidden or the visible neurons respectively.
/// \f$ \sum_{k,l} \phi_{hk}(\vec h) W_{k,l} \phi_{vl}(\vec v) \f$ is called the interaction term.
/// In the standard case of an binary RBM we have \f$ f_h(\vec h) = \vec h \vec c \f$
/// and \f$ f_v(\vec v) = \vec v \vec b \f$, where \f$ \vec c \f$ and \f$ \vec b \f$
/// are the vectors of the bias parameters for the hidden and the visible neurons respectively.
/// Furthermore, the interaction term simplifies to \f$ \vec h W \vec v \f$, so we have just
/// one singe 'phi-function' for each layer that is the identity function.
template
struct Energy{
typedef typename RBM::HiddenType HiddenType; //< type of the hidden layer
typedef typename RBM::VisibleType VisibleType; //< type of the visible layer
//typedefs for single element
typedef typename HiddenType::SufficientStatistics HiddenStatistics;
typedef typename VisibleType::SufficientStatistics VisibleStatistics;
//batch typedefs
typedef typename HiddenType::StatisticsBatch HiddenStatisticsBatch;
typedef typename VisibleType::StatisticsBatch VisibleStatisticsBatch;
Energy(RBM const& rbm)
: m_rbm(rbm)
, m_hiddenNeurons(rbm.hiddenNeurons())
, m_visibleNeurons(rbm.visibleNeurons()){}
///\brief Calculates the Energy given the states of batches of hidden and visible variables .
RealVector energy(RealMatrix const& hidden, RealMatrix const& visible)const{
SIZE_CHECK(visible.size1() == hidden.size1());
std::size_t batchSize = visible.size1();
RealMatrix input(batchSize,m_hiddenNeurons.size());
inputHidden( input, visible);
return energyFromHiddenInput( input, hidden, visible);
}
///\brief Calculates the input of the hidden neurons given the state of the visible in a batch-vise fassion.
///
///@param inputs the batch of vectors the input of the hidden neurons is stored in
///@param visibleStates the batch of states of the visible neurons@
///@todo Remove this and replace fully by the rbm method if possible
void inputHidden(RealMatrix& inputs, RealMatrix const& visibleStates)const{
m_rbm.inputHidden(inputs,visibleStates);
}
///\brief Calculates the input of the visible neurons given the state of the hidden.
///
///@param inputs the vector the input of the visible neurons is stored in
///@param hiddenStates the state of the hidden neurons
///@todo Remove this and replace fully by the rbm method if possible
void inputVisible(RealMatrix& inputs, RealMatrix const& hiddenStates)const{
m_rbm.inputVisible(inputs,hiddenStates);
}
///\brief Computes the logarithm of the unnormalized probability of each state of the
/// hidden neurons in a batch by using the precomputed input/activation of the visible neurons.
///
///@param hiddenState the batch of states of the hidden neurons
///@param visibleInput the batch of current inputs for he visible units given hiddenState
///@param beta the inverse temperature
///@return the unnormalized probability
template
RealVector logUnnormalizedProbabilityHidden(
RealMatrix const& hiddenState,
RealMatrix const& visibleInput,
BetaVector const& beta
)const{
SIZE_CHECK(hiddenState.size1()==visibleInput.size1());
SIZE_CHECK(hiddenState.size1()==beta.size());
std::size_t batchSize = hiddenState.size1();
//calculate the energy terms of the hidden neurons for the whole batch
RealVector energyTerms = m_hiddenNeurons.energyTerm(hiddenState,beta);
//calculate resulting probabilities in sequence
RealVector p(batchSize);
for(std::size_t i = 0; i != batchSize; ++i){
p(i) = m_visibleNeurons.logMarginalize(row(visibleInput,i),beta(i))+energyTerms(i);
}
return p;
}
///\brief Computes the logarithm of the unnormalized probability of each state of the
/// visible neurons in a batch by using the precomputed input/activation of the hidden neurons.
///
///@param visibleState the batch of states of the hidden neurons
///@param hiddenInput the batch of current inputs for he visible units given visibleState
///@param beta the inverse temperature
///@return the unnormalized probability
template
RealVector logUnnormalizedProbabilityVisible(
RealMatrix const& visibleState,
RealMatrix const& hiddenInput,
BetaVector const& beta
)const{
SIZE_CHECK(visibleState.size1()==hiddenInput.size1());
SIZE_CHECK(visibleState.size1()==beta.size());
std::size_t batchSize = visibleState.size1();
//calculate the energy terms of the visible neurons for the whole batch
RealVector energyTerms = m_visibleNeurons.energyTerm(visibleState,beta);
RealVector p(batchSize);
for(std::size_t i = 0; i != batchSize; ++i){
p(i) = m_hiddenNeurons.logMarginalize(row(hiddenInput,i),beta(i))+energyTerms(i);
}
return p;
}
///\brief Computes the logarithm of the unnormalized probability for each state of the visible neurons from a batch.
///
///@param visibleStates the batch of states of the hidden neurons
///@param beta the inverse temperature
template
RealVector logUnnormalizedProbabilityVisible(RealMatrix const& visibleStates, BetaVector const& beta)const{
SIZE_CHECK(visibleStates.size1() == beta.size());
RealMatrix hiddenInputs(beta.size(),m_hiddenNeurons.size());
inputHidden(hiddenInputs,visibleStates);
return logUnnormalizedProbabilityVisible(visibleStates, hiddenInputs, beta);
}
///\brief Computes the logarithm of the unnormalized probability of each state of the hidden neurons from a batch.
///
///@param hiddenStates a batch of states of the hidden neurons
///@param beta the inverse temperature
template
RealVector logUnnormalizedProbabilityHidden(RealMatrix const& hiddenStates, BetaVector const& beta)const{
SIZE_CHECK(hiddenStates.size1() == beta.size());
RealMatrix visibleInputs(beta.size(),m_visibleNeurons.size());
inputVisible(visibleInputs,hiddenStates);
return logUnnormalizedProbabilityHidden(hiddenStates, visibleInputs, beta);
}
///\brief Optimization of the calculation of the energy, when the input of the hidden units is already available.
///@param hiddenInput the vector of inputs of the hidden neurons
///@param hidden the states of the hidden neurons
///@param visible the states of the visible neurons
///@return the value of the energy function
RealVector energyFromHiddenInput(
RealMatrix const& hiddenInput,
RealMatrix const& hidden,
RealMatrix const& visible
)const{
RealMatrix const& phiOfH = m_hiddenNeurons.phi(hidden);
std::size_t batchSize = hiddenInput.size1();
RealVector energies(batchSize);
for(std::size_t i = 0; i != batchSize; ++i){
energies(i) = -inner_prod(row(hiddenInput,i),row(phiOfH,i));
}
energies -= m_hiddenNeurons.energyTerm(hidden,blas::repeat(1.0,batchSize));
energies -= m_visibleNeurons.energyTerm(visible,blas::repeat(1.0,batchSize));
return energies;
}
///\brief Optimization of the calculation of the energy, when the input of the visible units is already available.
///@param visibleInput the vector of inputs of the visible neurons
///@param hidden the states of the hidden neurons
///@param visible the states of the visible neurons
///@return the value of the energy function
RealVector energyFromVisibleInput(
RealMatrix const& visibleInput,
RealMatrix const& hidden,
RealMatrix const& visible
)const{
RealMatrix const& phiOfV = m_visibleNeurons.phi(visible);
std::size_t batchSize = visibleInput.size1();
RealVector energies(batchSize);
for(std::size_t i = 0; i != batchSize; ++i){
energies(i) = -inner_prod(row(phiOfV,i),row(visibleInput,i));
}
energies -= m_hiddenNeurons.energyTerm(hidden,blas::repeat(1.0,batchSize));
energies -= m_visibleNeurons.energyTerm(visible,blas::repeat(1.0,batchSize));
return energies;
}
private:
RBM const& m_rbm;
HiddenType const& m_hiddenNeurons;
VisibleType const& m_visibleNeurons;
};
}
#endif