/*!
*
*
* \brief -
*
* \author O. Krause, A.Fischer
* \date 2012-2014
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
#ifndef SHARK_UNSUPERVISED_RBM_ANALYTICS_H
#define SHARK_UNSUPERVISED_RBM_ANALYTICS_H
#include "Impl/analytics.h"
#include
#include
namespace shark {
///\brief Calculates the value of the partition function $Z$.
///
///Only useful for small input and theoretical analysis
///
///@param rbm the RBM for which to calculate the function
///@param beta the inverse temperature of the RBM. default is 1
///@return the value of the partition function $Z*e^(-constant)$
template
double logPartitionFunction(RBMType const& rbm, double beta = 1.0) {
//special case: beta=0 is easy to compute and no explicit summation is needed
//as the RBM does not have any cross-terms anymore, the distribution is p(v,h)=p(v)*p(h)
//so we can simply assume v=0 and integrate all h in p(0,h) and then integrate over all v in p(v,0)
//and then multiply (or add the logarithms).
if(beta == 0.0){
RealVector zeroH(rbm.numberOfHN(),0.0);
RealVector zeroV(rbm.numberOfVN(),0.0);
double logPartition = rbm.visibleNeurons().logMarginalize(zeroV,0.0);
logPartition += rbm.hiddenNeurons().logMarginalize(zeroH,0.0);
return logPartition;
}
//choose correct version based on the enumeration tags
typedef typename RBMType::HiddenType::StateSpace::EnumerationTag HiddenTag;
typedef typename RBMType::VisibleType::StateSpace::EnumerationTag VisibleTag;
return detail::logPartitionFunction(rbm,VisibleTag(),HiddenTag(),beta);
}
///\brief Estimates the negative log-likelihood of a set of input vectors under the models distribution using the partition function
///
///Only useful for small input and theoretical analysis
///
///@param rbm the Restricted Boltzmann machine for which the negative log likelihood of the data is to be calculated
///@param inputs the input vectors
///@param logPartition the logarithmic value of the partition function of the RBM.
///@param beta the inverse temperature of the RBM. default is 1
///@return the log-likelihood
template
double negativeLogLikelihoodFromLogPartition(
RBMType const&rbm,
UnlabeledData const& inputs,
double logPartition,
double beta = 1.0
) {
double logP=0;
for(RealMatrix const& batch: inputs.batches()) {
logP += sum(rbm.energy().logUnnormalizedProbabilityVisible(batch, blas::repeat(beta,batch.size1())));
logP -= batch.size1()*logPartition;
}
return -logP;
}
///\brief Estimates the negative log-likelihood of a set of input vectors under the models distribution.
///
///Only useful for small input and theoretical analysis
///
///@param rbm the Restricted Boltzmann machine for which the negative log likelihood of the data is to be calculated
///@param inputs the input vectors
///@param beta the inverse temperature of the RBM. default is 1
///@return the log-likelihood
template
double negativeLogLikelihood(
RBMType const& rbm,
UnlabeledData const& inputs,
double beta = 1.0
) {
double const logPartition = logPartitionFunction(rbm,beta);
return negativeLogLikelihoodFromLogPartition(rbm,inputs,logPartition,beta);
}
enum PartitionEstimationAlgorithm{
AIS,
AISMean,
TwoSidedAISMean,
AcceptanceRatio,
AcceptanceRatioMean
};
inline double estimateLogFreeEnergyFromEnergySamples(
RealMatrix const& energyDiffUp,
RealMatrix const& energyDiffDown,
PartitionEstimationAlgorithm algorithm = AIS
){
std::size_t chains = energyDiffUp.size1();
std::size_t samples = energyDiffUp.size2();
double deltaF = 0;
switch(algorithm){
case AIS:
deltaF = soft_max(-sum_rows(energyDiffUp))-std::log(double(samples));
break;
case AISMean:
for(std::size_t i = chains-1; i != 0; --i){
deltaF += soft_max(-row(energyDiffUp,i))-std::log(double(samples));
}
break;
case TwoSidedAISMean:
for(std::size_t i = chains-1; i != 0; --i){
deltaF += detail::twoSidedAIS(row(energyDiffUp,i),row(energyDiffDown,i-1));
}
break;
case AcceptanceRatioMean:
for(std::size_t i = chains-1; i != 0; --i){
deltaF += detail::acceptanceRatio(row(energyDiffUp,i),row(energyDiffDown,i-1));
}
break;
case AcceptanceRatio:
deltaF = detail::acceptanceRatio(sum_rows(energyDiffUp),sum_rows(energyDiffDown));
}
return deltaF;
}
template
double estimateLogFreeEnergy(
RBMType& rbm, Data const& initDataset,
RealVector const& beta, std::size_t samples,
PartitionEstimationAlgorithm algorithm = AcceptanceRatioMean,
float burnInPercentage =0.1
){
std::size_t chains = beta.size();
RealMatrix energyDiffUp(chains,samples);
RealMatrix energyDiffDown(chains,samples);
detail::sampleEnergies(rbm,initDataset,beta,energyDiffUp,energyDiffDown,burnInPercentage);
return estimateLogFreeEnergyFromEnergySamples(
energyDiffUp,energyDiffDown,algorithm
);
}
template
double annealedImportanceSampling(
RBMType& rbm,RealVector const& beta, std::size_t samples
){
std::size_t chains = beta.size();
RealMatrix energyDiffTempering(chains,samples,0.0);
detail::sampleEnergiesWithTempering(rbm,beta,energyDiffTempering);
return soft_max(-sum_rows(energyDiffTempering))-std::log(double(samples));
}
template
double estimateLogFreeEnergy(
RBMType& rbm, Data const& initDataset,
std::size_t chains, std::size_t samples,
PartitionEstimationAlgorithm algorithm = AIS,
float burnInPercentage =0.1
){
RealVector beta(chains);
for(std::size_t i = 0; i != chains; ++i){
beta(i) = 1.0-i/double(chains-1);
}
return estimateLogFreeEnergy(rbm,initDataset,beta,samples,algorithm,burnInPercentage);
}
template
double annealedImportanceSampling(
RBMType& rbm,std::size_t chains, std::size_t samples
){
RealVector beta(chains);
for(std::size_t i = 0; i != chains; ++i){
beta(i) = 1.0-i/double(chains-1);
}
return annealedImportanceSampling(rbm,beta,samples);
}
}
#endif