//=========================================================================== /*! * * * \brief Do special kernel evaluation by skipping missing features * * * * \author B. Li * \date 2012 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H #define SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H #include "shark/Core/Exception.h" #include "shark/LinAlg/Base.h" #include "shark/Models/Kernels/AbstractKernelFunction.h" #include "shark/Models/Kernels/LinearKernel.h" #include "shark/Models/Kernels/MonomialKernel.h" #include "shark/Models/Kernels/PolynomialKernel.h" #include #include #include namespace shark { /// Does a kernel function evaluation with Missing features in the inputs /// @param kernelFunction The kernel function used to do evaluation /// @param inputA a input /// @param inputB another input /// /// The kernel k(x,y) is evaluated taking missing features into account. For this it is checked whether a feature /// of x or y is nan and in this case the corresponding features in @a inputA and @a inputB won't be considered. template double evalSkipMissingFeatures( const AbstractKernelFunction& kernelFunction, const InputTypeT1& inputA, const InputTypeT2& inputB) { SIZE_CHECK(inputA.size() == inputB.size()); // Do kernel type check SHARK_RUNTIME_CHECK(kernelFunction.supportsVariableInputSize(), "Kernel must support variable input size."); // Work out features that are valid for both dataset i and j, and also should not be filtered out by missingness // Because we won't exact length of valid features beforehand, so we choose to construct two vectors and then // construct another two InputTypes with them. typedef typename InputType::value_type InputValueType; std::vector tempInputA; std::vector tempInputB; tempInputA.reserve(inputA.size()); tempInputB.reserve(inputB.size()); for (std::size_t index = 0; index < inputA.size(); ++index) { //using namespace boost::math; if (!boost::math::isnan(inputA(index)) && !boost::math::isnan(inputB(index))) { tempInputA.push_back(inputA(index)); tempInputB.push_back(inputB(index)); } } SIZE_CHECK(tempInputA.size() == tempInputB.size()); SIZE_CHECK(tempInputA.size() > 0); InputType validInputA(tempInputA.size()); InputType validInputB(tempInputA.size()); std::copy(tempInputA.begin(),tempInputA.end(),validInputA.begin()); std::copy(tempInputB.begin(),tempInputB.end(),validInputB.begin()); // And then pass them to the kernel for calculation return kernelFunction.eval(validInputA, validInputB); } /// Do kernel function evaluation while Missing features in the inputs /// @param kernelFunction The kernel function used to do evaluation /// @param inputA a input /// @param inputB another input /// @param missingness /// used to decide which features in the inputs to take into consideration for the purpose of evaluation. /// If a feature is NaN, then the corresponding features in @a inputA and @a inputB won't be considered. template double evalSkipMissingFeatures( const AbstractKernelFunction& kernelFunction, const InputTypeT1& inputA, const InputTypeT2& inputB, InputTypeT3 const& missingness) { SIZE_CHECK(inputA.size() == inputB.size()); //SIZE_CHECK(inputA.size() == missingness.size()); // Do kernel type check SHARK_RUNTIME_CHECK(kernelFunction.supportsVariableInputSize(), "Kernel must support variable input size."); // Work out features that are valid for both dataset i and j, and also should not be filtered out by missingness // Because we won't exact length of valid features beforehand, so we choose to construct two vectors and then // construct another two InputTypes with them. typedef typename InputType::value_type InputValueType; std::vector tempInputA; std::vector tempInputB; tempInputA.resize(inputA.size()); tempInputB.resize(inputB.size()); for (std::size_t index = 0; index < inputA.size(); ++index) { if (!boost::math::isnan(inputA(index)) && !boost::math::isnan(inputB(index)) && !boost::math::isnan(missingness(index))) { tempInputA.push_back(inputA(index)); tempInputB.push_back(inputB(index)); } } SIZE_CHECK(tempInputA.size() == tempInputB.size()); SIZE_CHECK(tempInputA.size() > 0); InputType validInputA(tempInputA.size()); InputType validInputB(tempInputA.size()); for (std::size_t i = 0; i < tempInputA.size(); ++i) { validInputA(i) = tempInputA[i]; validInputB(i) = tempInputB[i]; } // And then pass them to the kernel for calculation return kernelFunction.eval(validInputA, validInputB); } } // namespace shark { #endif // SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H