//===========================================================================
/*!
 * 
 *
 * \brief       Polynomial kernel.
 * 
 * 
 *
 * \author      T.Glasmachers
 * \date        2011
 *
 *
 * \par Copyright 1995-2017 Shark Development Team
 * 
 * <BR><HR>
 * This file is part of Shark.
 * <http://shark-ml.org/>
 * 
 * Shark is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published 
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Shark is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
//===========================================================================

#ifndef SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
#define SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H

#include <shark/Models/Kernels/AbstractKernelFunction.h>

namespace shark {


/// \brief Polynomial kernel.
///
/// \par
/// The polynomial kernel is defined as
/// \f$ \left( \left\langle x_1, x_2 \right\rangle + b \right)^n \f$
/// with degree \f$ n \in \mathbb{N} \f$ and non-negative offset
/// \f$ b \geq 0 \f$. For n=1 and b=0 it matches the linear kernel
/// (standard inner product).
template<class InputType = RealVector>
class PolynomialKernel : public AbstractKernelFunction<InputType>
{
private:
	typedef AbstractKernelFunction<InputType> base_type;
	
	struct InternalState: public State{
		RealMatrix base;//stores the inner product of vectors x_1,x_j
		RealMatrix exponentedProd;//pow(innerProd,m_exponent)
		
		void resize(std::size_t sizeX1, std::size_t sizeX2){
			base.resize(sizeX1, sizeX2);
			exponentedProd.resize(sizeX1, sizeX2);
		}
	};
public:
	typedef typename base_type::BatchInputType BatchInputType;
	typedef typename base_type::ConstInputReference ConstInputReference;
	typedef typename base_type::ConstBatchInputReference ConstBatchInputReference;

	/// Constructor.
	///
	/// \param  degree  exponent of the polynomial
	/// \param  offset  constant added to the standard inner product
	/// \param  degree_is_parameter should the degree be a regular model parameter? if yes, the kernel will not be differentiable
	/// \param  unconstrained should the offset internally be represented as exponential of the externally visible parameter?
	PolynomialKernel( unsigned int degree = 2, double offset = 0.0, bool degree_is_parameter = true, bool unconstrained = false )
	: m_degree( degree ),
	  m_offset( offset ),
	  m_degreeIsParam( degree_is_parameter ),
	  m_unconstrained( unconstrained ) {
		SHARK_RUNTIME_CHECK(degree > 0, "[PolynomialKernel::PolynomialKernel] degree must be positive");
		this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
		this->m_features |= base_type::SUPPORTS_VARIABLE_INPUT_SIZE;
		  if ( !m_degreeIsParam )
			this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
	}

	/// \brief From INameable: return the class name.
	std::string name() const
	{ return "PolynomialKernel"; }
	
	void setDegree( unsigned int deg ) {
		RANGE_CHECK( deg > 0 );
		SHARK_RUNTIME_CHECK( !m_degreeIsParam, "[PolynomialKernel::setDegree] Please use setParameterVector when the degree is a parameter.");
		m_degree = deg;
	}
	
	unsigned int degree() const {
		return m_degree;
	}

	RealVector parameterVector() const {
		if ( m_degreeIsParam ) {
			RealVector ret(2);
			ret(0) = m_degree;
			if ( m_unconstrained ) 
				ret(1) = std::log( m_offset );
			else 
				ret(1) = m_offset;
			return ret;
		} else {
			RealVector ret(1);
			if ( m_unconstrained ) 
				ret(0) = std::log( m_offset );
			else 
				ret(0) = m_offset;
			return ret;
		}
	}

	void setParameterVector(RealVector const& newParameters) {
		if ( m_degreeIsParam ) {
			SIZE_CHECK(newParameters.size() == 2);
			SHARK_ASSERT(std::abs((unsigned int)newParameters(0) - newParameters(0)) <= 2*std::numeric_limits<double>::epsilon());
			RANGE_CHECK(newParameters(0) >= 1.0);
			m_degree = (unsigned int)newParameters(0);
			if ( m_unconstrained ) {
				m_offset = std::exp(newParameters(1));
			} else {
				RANGE_CHECK(newParameters(1) >= 0.0);
				m_offset = newParameters(1);
			}
		} else {
			SIZE_CHECK(newParameters.size() == 1);
			if ( m_unconstrained ) {
				m_offset = std::exp(newParameters(0));
			} else {
				RANGE_CHECK(newParameters(0) >= 0.0);
				m_offset = newParameters(0);
			}
		}
	}

	std::size_t numberOfParameters() const { 
		if ( m_degreeIsParam ) 
			return 2; 
		else 
			return 1;
	}
	
	///\brief creates the internal state of the kernel
	boost::shared_ptr<State> createState()const{
		return boost::shared_ptr<State>(new InternalState());
	}

	/////////////////////////EVALUATION//////////////////////////////
	
	/// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
	double eval(ConstInputReference x1, ConstInputReference x2) const{
		SIZE_CHECK(x1.size() == x2.size());
		double base = inner_prod(x1, x2) + m_offset;
		return std::pow(base,m_degree);
	}
	
	void eval(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2, RealMatrix& result) const {
		SIZE_CHECK(batchX1.size2() == batchX2.size2());
		std::size_t sizeX1 = batchX1.size1();
		std::size_t sizeX2 = batchX2.size1();
		result.resize(sizeX1,sizeX2);
		
		//calculate the inner product
		noalias(result) = prod(batchX1,trans(batchX2));
		result += m_offset;
		//now do exponentiation
		if(m_degree != 1)
			noalias(result) = pow(result,m_degree);
	}
	
	void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
		SIZE_CHECK(batchX1.size2() == batchX2.size2());
		
		std::size_t sizeX1 = batchX1.size1();
		std::size_t sizeX2 = batchX2.size1();

		InternalState& s = state.toState<InternalState>();
		s.resize(sizeX1,sizeX2);
		result.resize(sizeX1,sizeX2);
		
		//calculate the inner product
		noalias(s.base) = prod(batchX1,trans(batchX2));
		s.base += m_offset;
		
		//now do exponentiation
		if(m_degree != 1)
			noalias(result) = pow(s.base,m_degree);
		else
			noalias(result) = s.base;
		noalias(s.exponentedProd) = result;
	}
	
	/////////////////////DERIVATIVES////////////////////////////////////
	
	void weightedParameterDerivative(
		ConstBatchInputReference batchX1, 
		ConstBatchInputReference batchX2, 
		RealMatrix const& coefficients,
		State const& state, 
		RealVector& gradient
	) const{
		
		std::size_t sizeX1 = batchX1.size1();
		std::size_t sizeX2 = batchX2.size1();
		gradient.resize(1);
		InternalState const& s = state.toState<InternalState>();
		
		//internal checks
		SIZE_CHECK(batchX1.size2() == batchX2.size2());
		SIZE_CHECK(s.base.size1() == sizeX1);
		SIZE_CHECK(s.base.size2() == sizeX2);
		SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
		SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
		
		
		//m_degree == 1 is easy
		if(m_degree == 1){//result_ij/base_ij = 1
			gradient(0) = sum(coefficients);
			if ( m_unconstrained ) 
				gradient(0) *= m_offset;
			return;
		}
		
		gradient(0) = m_degree * sum(safe_div(s.exponentedProd,s.base,0.0) * coefficients);
		if ( m_unconstrained ) 
			gradient(0) *= m_offset;
	}
	
	/// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
	/// <br/>
	/// \f$ \frac{\partial k(x_1, x_2)}{\partial x_1} = \left[ n \cdot (\langle x_1, x_2 \rangle + b)^{n-1} \right] \cdot x_2 \f$
	void weightedInputDerivative( 
		ConstBatchInputReference batchX1, 
		ConstBatchInputReference batchX2, 
		RealMatrix const& coefficientsX2,
		State const& state, 
		BatchInputType& gradient
	) const{
		
		std::size_t sizeX1 = batchX1.size1();
		std::size_t sizeX2 = batchX2.size1();
		gradient.resize(sizeX1,batchX1.size2());
		
		InternalState const& s = state.toState<InternalState>();
		
		//internal checks
		SIZE_CHECK(batchX1.size2() == batchX2.size2());
		SIZE_CHECK(s.base.size1() == sizeX1);
		SIZE_CHECK(s.base.size2() == sizeX2);
		SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
		SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
		SIZE_CHECK(coefficientsX2.size1() == sizeX1);
		SIZE_CHECK(coefficientsX2.size2() == sizeX2);
		
		
		//again m_degree == 1 is easy, as it is for the i-th row
		//just c_i X2;
		if(m_degree == 1){
			noalias(gradient) = prod(coefficientsX2,batchX2);
			return;
		}
		
		//first calculate weights(i,j) = coeff(i)*exp(i,j)/prod(i,j)
		//we have to take the usual division by 0 into account
		RealMatrix weights = coefficientsX2 * safe_div(s.exponentedProd,s.base,0.0);
		//and the derivative of input i of batch x1 is 
		//g = sum_j m_n*weights(i,j)*x2_j
		//we now sum over j which is a matrix-matrix product
		noalias(gradient) = m_degree * prod(weights,batchX2);
	}
	
	void read(InArchive& ar){
		ar >> m_degree;
		ar >> m_offset;
		ar >> m_degreeIsParam;
		ar >> m_unconstrained;
	}

	void write(OutArchive& ar) const{
		ar << m_degree;
		ar << m_offset;
		ar << m_degreeIsParam;
		ar << m_unconstrained;
	}

protected:
	int m_degree;                ///< exponent n
	double m_offset;                      ///< offset b
	bool m_degreeIsParam;                 ///< is the degree a model parameter?
	bool m_unconstrained;                 ///< is the degree internally represented as exponential of the parameter?
};

typedef PolynomialKernel<> DensePolynomialKernel;
typedef PolynomialKernel<CompressedRealVector> CompressedPolynomialKernel;


}
#endif