//===========================================================================
/*!
*
*
* \brief Tree for nearest neighbor search in kernel-induced feature spaces.
*
*
*
* \author T. Glasmachers
* \date 2011
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_ALGORITHMS_NEARESTNEIGHBORS_KHCTREE_H
#define SHARK_ALGORITHMS_NEARESTNEIGHBORS_KHCTREE_H
#include
#include
#include
namespace shark {
///
/// \brief KHC-tree, a binary space-partitioning tree
///
/// \par
/// KHC-tree data structure for efficient nearest
/// neighbor searches in kernel-induced feature
/// spaces.
/// "KHC" stands for Kernel Hierarchical Clustering.
/// The space separation is based on distances to
/// cluster centers.
///
/// \par
/// The tree is constructed from data by splitting
/// along the pair of data points with largest
/// distance. This quantity is approximated using
/// a given number of randomly sampled pairs, which
/// is controlled by the template parameter
/// CuttingAccuracy.
///
/// \par
/// The bucket size for the tree is always one,
/// such that there is a unique point in each leaf
/// cell.
///
///Since the KHCTree needs direct access to the elements, it's template parameter is not the actual point type
///But the Range, the points are stored in. Be aware that this range should be a View when a Dataset is used as storage,
///since during construction, the KHC-Tree needs random access to the elements.
template
class KHCTree : public BinaryTree
{
public:
typedef IndexedIterator::type> const_iterator;
typedef typename Container::value_type value_type;
typedef AbstractKernelFunction kernel_type;
typedef BinaryTree base_type;
/// Construct the tree from data.
/// It is assumed that the container exceeds
/// the lifetime of the KHCTree (which holds
/// only references to the points), and that
/// the memory locations of the points remain
/// unchanged.
KHCTree(Container const& points, kernel_type const* kernel, TreeConstruction tc = TreeConstruction())
: base_type(points.size())
, mep_kernel(kernel)
, m_normalInvNorm(1.0)
{
//create a list to the iterator elements as temporary storage
//we need indexed operators to have a fast lookup of the position of the elements in the container
std::vector elements(m_size);
boost::iota(elements,const_iterator(boost::begin(points),0));
buildTree(tc,elements);
//after the creation of the trees, the iterators in the array are sorted in the order,
//they are referenced by the nodes.so we can create the indexList using the indizes of the iterators
for(std::size_t i = 0; i != m_size; ++i){
mp_indexList[i] = elements[i].index();
}
}
/// \par
/// Compute the squared distance of this cell to
/// the given reference point, or alternatively
/// a lower bound on this value.
double squaredDistanceLowerBound(value_type const& reference) const{
double dist = 0.0;
KHCTree const* t = this;
KHCTree const* p = (KHCTree const*)mep_parent;
while (p != NULL){
double v = p->distanceFromPlane(reference);
if (t == p->mp_right)
v = -v;
if (v > dist)
dist = v;
t = p;
p = (KHCTree const*)p->mep_parent;
}
return (dist * dist);
}
protected:
using base_type::mep_parent;
using base_type::mp_left;
using base_type::mp_right;
using base_type::mp_indexList;
using base_type::m_size;
using base_type::m_nodes;
/// (internal) construction of a non-root node
KHCTree(KHCTree* parent, std::size_t* list, std::size_t size)
: base_type(parent, list, size)
, mep_kernel(parent->mep_kernel)
, m_normalInvNorm(1.0)
{ }
/// (internal) construction method:
/// median-cuts of the dimension with widest spread
template
void buildTree(TreeConstruction tc, Range& points){
typedef typename Range::iterator range_iterator;
//check whether we are finished
if (tc.maxDepth() == 0 || m_size <= tc.maxBucketSize()) {
m_nodes = 1;
return;
}
// use only a subset of size at most CuttingAccuracy
// to estimate the vector along the longest
// distance
if (m_size <= CuttingAccuracy){
calculateNormal(points);
}
else{
boost::array samples;
for(std::size_t i = 0; i != CuttingAccuracy; i++)
samples[i] = points[m_size * (2*i+1) / (2*CuttingAccuracy)];
calculateNormal(samples);
}
//calculate the distance from the plane for every point in the list
std::vector distance(m_size);
for(std::size_t i = 0; i != m_size; ++i){
distance[i] = funct(*points[i]);
}
// split the list into sub-cells
range_iterator split = this->splitList(distance,points);
range_iterator begin = boost::begin(points);
range_iterator end = boost::end(points);
if (split == end) {//can't split points.
m_nodes = 1;
return;
}
// create sub-nodes
std::size_t leftSize = split-begin;
mp_left = new KHCTree(this, mp_indexList, leftSize);
mp_right = new KHCTree(this, mp_indexList + leftSize, m_size - leftSize);
// recurse
boost::iterator_range left(begin,split);
boost::iterator_range right(split,end);
((KHCTree*)mp_left)->buildTree(tc.nextDepthLevel(),left);
((KHCTree*)mp_right)->buildTree(tc.nextDepthLevel(),right);
m_nodes = 1 + mp_left->nodes() + mp_right->nodes();
}
template
void calculateNormal(Range const& samples){
std::size_t numSamples =batchSize(samples);
double best_dist2 = -1.0;
for (std::size_t i=1; i != numSamples; i++){
for (std::size_t j = 0; j != i; j++){
double dist2 = mep_kernel->featureDistanceSqr(*samples[i], *samples[j]);
if (dist2 > best_dist2){
best_dist2 = dist2;
mep_positive = samples[i];
mep_negative = samples[j];
}
}
}
m_normalInvNorm = 1.0 / std::sqrt(best_dist2);
if (! (boost::math::isfinite)(m_normalInvNorm))
m_normalInvNorm = 1.0;
}
/// function describing the separating hyperplane
double funct(value_type const& reference) const{
double result = mep_kernel->eval(*mep_positive, reference);
result -= mep_kernel->eval(*mep_negative, reference);
result *= m_normalInvNorm;
return result;
}
/// kernel function
kernel_type const* mep_kernel;
/// "positive" side cluster center
const_iterator mep_positive;
/// "negative" side cluster center
const_iterator mep_negative;
/// one divided by squared distance between "positive" and "negative" cluster center
double m_normalInvNorm;
};
}
#endif