/*! * * * \brief Negative Log Likelihood error function * * * * \author O.Krause * \date 2014 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H #define SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H #include #include #include #include #include namespace shark{ /// \brief Computes the negative log likelihood of a dataset under a model /// /// The negative log likelihood is defined as /// \f[ L(\theta) = -\frac 1 N \sum_{i=1}^N log(p_{\theta}(x_i)) \f] /// where \f$ \theta \f$ is the vector of parameters of the model \f$ p \f$ and \f$ x \f$ are the /// datapoints of the training set. Minimizing this /// maximizes the probability of the datast under p. This error measure is /// closely related to the Kulback-Leibler-Divergence. /// /// For this error function, the model is only allowed to have a single output /// - the probability of the sample. The distribution must be normalized as otherwise /// the likeelihood does not mean anything. class NegativeLogLikelihood : public AbstractObjectiveFunction< RealVector, double > { public: typedef UnlabeledData DatasetType; NegativeLogLikelihood( DatasetType const& data, AbstractModel* model ):mep_model(model),m_data(data){ if(mep_model->hasFirstParameterDerivative()) m_features |= HAS_FIRST_DERIVATIVE; m_features |= CAN_PROPOSE_STARTING_POINT; } /// \brief From INameable: return the class name. std::string name() const { return "NegativeLogLikelihood"; } SearchPointType proposeStartingPoint() const{ return mep_model->parameterVector(); } std::size_t numberOfVariables()const{ return mep_model->numberOfParameters(); } ResultType eval(RealVector const& input) const{ SIZE_CHECK(input.size() == numberOfVariables()); m_evaluationCounter++; mep_model->setParameterVector(input); double error = 0; double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities SHARK_PARALLEL_FOR(int i = 0; i < (int)m_data.numberOfBatches(); ++i){ RealMatrix predictions = (*mep_model)(m_data.batch(i)); SIZE_CHECK(predictions.size2() == 1); double logLikelihoodOfSamples = sum(log(max(predictions,minProb))); SHARK_CRITICAL_REGION{ error += logLikelihoodOfSamples; } } error/=m_data.numberOfElements();//compute mean return -error;//negative log likelihood } ResultType evalDerivative( SearchPointType const& input, FirstOrderDerivative & derivative ) const{ SIZE_CHECK(input.size() == numberOfVariables()); m_evaluationCounter++; mep_model->setParameterVector(input); derivative.resize(input.size()); derivative.clear(); //compute partitioning on threads std::size_t numBatches = m_data.numberOfBatches(); std::size_t numElements = m_data.numberOfElements(); std::size_t numThreads = std::min(SHARK_NUM_THREADS,numBatches); //calculate optimal partitioning std::size_t batchesPerThread = numBatches/numThreads; std::size_t leftOver = numBatches - batchesPerThread*numThreads; double error = 0; double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities SHARK_PARALLEL_FOR(int ti = 0; ti < (int)numThreads; ++ti){//MSVC does not support unsigned integrals in paralll loops std::size_t t = ti; //~ //get start and end index of batch-range std::size_t start = t*batchesPerThread+std::min(t,leftOver); std::size_t end = (t+1)*batchesPerThread+std::min(t+1,leftOver); //calculate error and derivative of the current thread FirstOrderDerivative threadDerivative(input.size(),0.0); double threadError = 0; boost::shared_ptr state = mep_model->createState(); RealVector batchDerivative; RealMatrix predictions; for(std::size_t i = start; i != end; ++i){ mep_model->eval(m_data.batch(i),predictions,*state); SIZE_CHECK(predictions.size2() == 1); threadError += sum(log(max(predictions,minProb))); RealMatrix coeffs(predictions.size1(),predictions.size2(),0.0); //the below handls numeric instabilities... for(std::size_t j = 0; j != predictions.size1(); ++j){ for(std::size_t k = 0; k != predictions.size2(); ++k){ if(predictions(j,k) >= minProb){ coeffs(j,k) = 1.0/predictions(j,k); } } } mep_model->weightedParameterDerivative( m_data.batch(i),predictions, coeffs,*state,batchDerivative ); threadDerivative += batchDerivative; } //sum over all threads SHARK_CRITICAL_REGION{ error += threadError; noalias(derivative) += threadDerivative; } } error /= numElements; derivative /= numElements; derivative *= -1; return -error;//negative log likelihood } private: AbstractModel* mep_model; UnlabeledData m_data; }; } #endif