//===========================================================================
/*!
*
*
* \brief Do special kernel evaluation by skipping missing features
*
*
*
* \author B. Li
* \date 2012
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H
#define SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H
#include "shark/Core/Exception.h"
#include "shark/LinAlg/Base.h"
#include "shark/Models/Kernels/AbstractKernelFunction.h"
#include "shark/Models/Kernels/LinearKernel.h"
#include "shark/Models/Kernels/MonomialKernel.h"
#include "shark/Models/Kernels/PolynomialKernel.h"
#include
#include
#include
namespace shark {
/// Does a kernel function evaluation with Missing features in the inputs
/// @param kernelFunction The kernel function used to do evaluation
/// @param inputA a input
/// @param inputB another input
///
/// The kernel k(x,y) is evaluated taking missing features into account. For this it is checked whether a feature
/// of x or y is nan and in this case the corresponding features in @a inputA and @a inputB won't be considered.
template
double evalSkipMissingFeatures(
const AbstractKernelFunction& kernelFunction,
const InputTypeT1& inputA,
const InputTypeT2& inputB)
{
SIZE_CHECK(inputA.size() == inputB.size());
// Do kernel type check
SHARK_RUNTIME_CHECK(kernelFunction.supportsVariableInputSize(), "Kernel must support variable input size.");
// Work out features that are valid for both dataset i and j, and also should not be filtered out by missingness
// Because we won't exact length of valid features beforehand, so we choose to construct two vectors and then
// construct another two InputTypes with them.
typedef typename InputType::value_type InputValueType;
std::vector tempInputA;
std::vector tempInputB;
tempInputA.reserve(inputA.size());
tempInputB.reserve(inputB.size());
for (std::size_t index = 0; index < inputA.size(); ++index)
{
//using namespace boost::math;
if (!boost::math::isnan(inputA(index)) && !boost::math::isnan(inputB(index)))
{
tempInputA.push_back(inputA(index));
tempInputB.push_back(inputB(index));
}
}
SIZE_CHECK(tempInputA.size() == tempInputB.size());
SIZE_CHECK(tempInputA.size() > 0);
InputType validInputA(tempInputA.size());
InputType validInputB(tempInputA.size());
std::copy(tempInputA.begin(),tempInputA.end(),validInputA.begin());
std::copy(tempInputB.begin(),tempInputB.end(),validInputB.begin());
// And then pass them to the kernel for calculation
return kernelFunction.eval(validInputA, validInputB);
}
/// Do kernel function evaluation while Missing features in the inputs
/// @param kernelFunction The kernel function used to do evaluation
/// @param inputA a input
/// @param inputB another input
/// @param missingness
/// used to decide which features in the inputs to take into consideration for the purpose of evaluation.
/// If a feature is NaN, then the corresponding features in @a inputA and @a inputB won't be considered.
template
double evalSkipMissingFeatures(
const AbstractKernelFunction& kernelFunction,
const InputTypeT1& inputA,
const InputTypeT2& inputB,
InputTypeT3 const& missingness)
{
SIZE_CHECK(inputA.size() == inputB.size());
//SIZE_CHECK(inputA.size() == missingness.size());
// Do kernel type check
SHARK_RUNTIME_CHECK(kernelFunction.supportsVariableInputSize(), "Kernel must support variable input size.");
// Work out features that are valid for both dataset i and j, and also should not be filtered out by missingness
// Because we won't exact length of valid features beforehand, so we choose to construct two vectors and then
// construct another two InputTypes with them.
typedef typename InputType::value_type InputValueType;
std::vector tempInputA;
std::vector tempInputB;
tempInputA.resize(inputA.size());
tempInputB.resize(inputB.size());
for (std::size_t index = 0; index < inputA.size(); ++index)
{
if (!boost::math::isnan(inputA(index)) && !boost::math::isnan(inputB(index)) && !boost::math::isnan(missingness(index)))
{
tempInputA.push_back(inputA(index));
tempInputB.push_back(inputB(index));
}
}
SIZE_CHECK(tempInputA.size() == tempInputB.size());
SIZE_CHECK(tempInputA.size() > 0);
InputType validInputA(tempInputA.size());
InputType validInputB(tempInputA.size());
for (std::size_t i = 0; i < tempInputA.size(); ++i)
{
validInputA(i) = tempInputA[i];
validInputB(i) = tempInputB[i];
}
// And then pass them to the kernel for calculation
return kernelFunction.eval(validInputA, validInputB);
}
} // namespace shark {
#endif // SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H